1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a demangler for Rust v0 mangled symbols as specified in
10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Demangle/RustDemangle.h"
15 #include "llvm/Demangle/Demangle.h"
16 
17 #include <algorithm>
18 #include <cassert>
19 #include <cstring>
20 #include <limits>
21 
22 using namespace llvm;
23 using namespace rust_demangle;
24 
25 char *llvm::rustDemangle(const char *MangledName, char *Buf, size_t *N,
26                          int *Status) {
27   if (MangledName == nullptr || (Buf != nullptr && N == nullptr)) {
28     if (Status != nullptr)
29       *Status = demangle_invalid_args;
30     return nullptr;
31   }
32 
33   // Return early if mangled name doesn't look like a Rust symbol.
34   StringView Mangled(MangledName);
35   if (!Mangled.startsWith("_R")) {
36     if (Status != nullptr)
37       *Status = demangle_invalid_mangled_name;
38     return nullptr;
39   }
40 
41   Demangler D;
42   if (!initializeOutputStream(nullptr, nullptr, D.Output, 1024)) {
43     if (Status != nullptr)
44       *Status = demangle_memory_alloc_failure;
45     return nullptr;
46   }
47 
48   if (!D.demangle(Mangled)) {
49     if (Status != nullptr)
50       *Status = demangle_invalid_mangled_name;
51     std::free(D.Output.getBuffer());
52     return nullptr;
53   }
54 
55   D.Output += '\0';
56   char *Demangled = D.Output.getBuffer();
57   size_t DemangledLen = D.Output.getCurrentPosition();
58 
59   if (Buf != nullptr) {
60     if (DemangledLen <= *N) {
61       std::memcpy(Buf, Demangled, DemangledLen);
62       std::free(Demangled);
63       Demangled = Buf;
64     } else {
65       std::free(Buf);
66     }
67   }
68 
69   if (N != nullptr)
70     *N = DemangledLen;
71 
72   if (Status != nullptr)
73     *Status = demangle_success;
74 
75   return Demangled;
76 }
77 
78 Demangler::Demangler(size_t MaxRecursionLevel)
79     : MaxRecursionLevel(MaxRecursionLevel) {}
80 
81 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
82 
83 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
84 
85 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
86 
87 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
88 static inline bool isValid(const char C) {
89   return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
90 }
91 
92 // Demangles Rust v0 mangled symbol. Returns true when successful, and false
93 // otherwise. The demangled symbol is stored in Output field. It is
94 // responsibility of the caller to free the memory behind the output stream.
95 //
96 // <symbol-name> = "_R" <path> [<instantiating-crate>]
97 bool Demangler::demangle(StringView Mangled) {
98   Position = 0;
99   Error = false;
100   RecursionLevel = 0;
101 
102   if (!Mangled.consumeFront("_R")) {
103     Error = true;
104     return false;
105   }
106   Input = Mangled;
107 
108   demanglePath();
109 
110   // FIXME parse optional <instantiating-crate>.
111 
112   if (Position != Input.size())
113     Error = true;
114 
115   return !Error;
116 }
117 
118 // <path> = "C" <identifier>               // crate root
119 //        | "M" <impl-path> <type>         // <T> (inherent impl)
120 //        | "X" <impl-path> <type> <path>  // <T as Trait> (trait impl)
121 //        | "Y" <type> <path>              // <T as Trait> (trait definition)
122 //        | "N" <ns> <path> <identifier>   // ...::ident (nested path)
123 //        | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
124 //        | <backref>
125 // <identifier> = [<disambiguator>] <undisambiguated-identifier>
126 // <ns> = "C"      // closure
127 //      | "S"      // shim
128 //      | <A-Z>    // other special namespaces
129 //      | <a-z>    // internal namespaces
130 void Demangler::demanglePath() {
131   if (Error || RecursionLevel >= MaxRecursionLevel) {
132     Error = true;
133     return;
134   }
135   SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
136 
137   switch (consume()) {
138   case 'C': {
139     parseOptionalBase62Number('s');
140     Identifier Ident = parseIdentifier();
141     print(Ident.Name);
142     break;
143   }
144   case 'N': {
145     char NS = consume();
146     if (!isLower(NS) && !isUpper(NS)) {
147       Error = true;
148       break;
149     }
150     demanglePath();
151 
152     uint64_t Disambiguator = parseOptionalBase62Number('s');
153     Identifier Ident = parseIdentifier();
154 
155     if (isUpper(NS)) {
156       // Special namespaces
157       print("::{");
158       if (NS == 'C')
159         print("closure");
160       else if (NS == 'S')
161         print("shim");
162       else
163         print(NS);
164       if (!Ident.empty()) {
165         print(":");
166         print(Ident.Name);
167       }
168       print('#');
169       printDecimalNumber(Disambiguator);
170       print('}');
171     } else {
172       // Implementation internal namespaces.
173       if (!Ident.empty()) {
174         print("::");
175         print(Ident.Name);
176       }
177     }
178     break;
179   }
180   case 'I': {
181     demanglePath();
182     print("::<");
183     for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
184       if (I > 0)
185         print(", ");
186       demangleGenericArg();
187     }
188     print(">");
189     break;
190   }
191   default:
192     // FIXME parse remaining productions.
193     Error = true;
194     break;
195   }
196 }
197 
198 // <generic-arg> = <lifetime>
199 //               | <type>
200 //               | "K" <const>
201 // <lifetime> = "L" <base-62-number>
202 void Demangler::demangleGenericArg() {
203   // FIXME parse remaining productions
204   demangleType();
205 }
206 
207 static const char *const BasicTypes[] = {
208     "i8",    // a
209     "bool",  // b
210     "char",  // c
211     "f64",   // d
212     "str",   // e
213     "f32",   // f
214     nullptr, // g
215     "u8",    // h
216     "isize", // i
217     "usize", // j
218     nullptr, // k
219     "i32",   // l
220     "u32",   // m
221     "i128",  // n
222     "u128",  // o
223     "_",     // p
224     nullptr, // q
225     nullptr, // r
226     "i16",   // s
227     "u16",   // t
228     "()",    // u
229     "...",   // v
230     nullptr, // w
231     "i64",   // x
232     "u64",   // y
233     "!",     // z
234 };
235 
236 // <basic-type> = "a"      // i8
237 //              | "b"      // bool
238 //              | "c"      // char
239 //              | "d"      // f64
240 //              | "e"      // str
241 //              | "f"      // f32
242 //              | "h"      // u8
243 //              | "i"      // isize
244 //              | "j"      // usize
245 //              | "l"      // i32
246 //              | "m"      // u32
247 //              | "n"      // i128
248 //              | "o"      // u128
249 //              | "s"      // i16
250 //              | "t"      // u16
251 //              | "u"      // ()
252 //              | "v"      // ...
253 //              | "x"      // i64
254 //              | "y"      // u64
255 //              | "z"      // !
256 //              | "p"      // placeholder (e.g. for generic params), shown as _
257 static const char *parseBasicType(char C) {
258   if (isLower(C))
259     return BasicTypes[C - 'a'];
260   return nullptr;
261 }
262 
263 // <type> = | <basic-type>
264 //          | <path>                      // named type
265 //          | "A" <type> <const>          // [T; N]
266 //          | "S" <type>                  // [T]
267 //          | "T" {<type>} "E"            // (T1, T2, T3, ...)
268 //          | "R" [<lifetime>] <type>     // &T
269 //          | "Q" [<lifetime>] <type>     // &mut T
270 //          | "P" <type>                  // *const T
271 //          | "O" <type>                  // *mut T
272 //          | "F" <fn-sig>                // fn(...) -> ...
273 //          | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
274 //          | <backref>                   // backref
275 void Demangler::demangleType() {
276   if (const char *BasicType = parseBasicType(consume())) {
277     print(BasicType);
278   } else {
279     // FIXME parse remaining productions.
280     Error = true;
281   }
282 }
283 
284 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
285 Identifier Demangler::parseIdentifier() {
286   bool Punycode = consumeIf('u');
287   uint64_t Bytes = parseDecimalNumber();
288 
289   // Underscore resolves the ambiguity when identifier starts with a decimal
290   // digit or another underscore.
291   consumeIf('_');
292 
293   if (Error || Bytes > Input.size() - Position) {
294     Error = true;
295     return {};
296   }
297   StringView S = Input.substr(Position, Bytes);
298   Position += Bytes;
299 
300   if (!std::all_of(S.begin(), S.end(), isValid)) {
301     Error = true;
302     return {};
303   }
304 
305   return {S, Punycode};
306 }
307 
308 // Parses optional base 62 number. The presence of a number is determined using
309 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise.
310 uint64_t Demangler::parseOptionalBase62Number(char Tag) {
311   if (!consumeIf(Tag))
312     return 0;
313 
314   uint64_t N = parseBase62Number();
315   if (Error || !addAssign(N, 1))
316     return 0;
317 
318   return N;
319 }
320 
321 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
322 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
323 // "1_" encodes 2, etc.
324 //
325 // <base-62-number> = {<0-9a-zA-Z>} "_"
326 uint64_t Demangler::parseBase62Number() {
327   if (consumeIf('_'))
328     return 0;
329 
330   uint64_t Value = 0;
331 
332   while (true) {
333     uint64_t Digit;
334     char C = consume();
335 
336     if (C == '_') {
337       break;
338     } else if (isDigit(C)) {
339       Digit = C - '0';
340     } else if (isLower(C)) {
341       Digit = 10 + (C - 'a');
342     } else if (isUpper(C)) {
343       Digit = 10 + 26 + (C - 'A');
344     } else {
345       Error = true;
346       return 0;
347     }
348 
349     if (!mulAssign(Value, 62))
350       return 0;
351 
352     if (!addAssign(Value, Digit))
353       return 0;
354   }
355 
356   if (!addAssign(Value, 1))
357     return 0;
358 
359   return Value;
360 }
361 
362 // Parses a decimal number that had been encoded without any leading zeros.
363 //
364 // <decimal-number> = "0"
365 //                  | <1-9> {<0-9>}
366 uint64_t Demangler::parseDecimalNumber() {
367   char C = look();
368   if (!isDigit(C)) {
369     Error = true;
370     return 0;
371   }
372 
373   if (C == '0') {
374     consume();
375     return 0;
376   }
377 
378   uint64_t Value = 0;
379 
380   while (isDigit(look())) {
381     if (!mulAssign(Value, 10)) {
382       Error = true;
383       return 0;
384     }
385 
386     uint64_t D = consume() - '0';
387     if (!addAssign(Value, D))
388       return 0;
389   }
390 
391   return Value;
392 }
393