1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a demangler for Rust v0 mangled symbols as specified in
10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Demangle/RustDemangle.h"
15 #include "llvm/Demangle/Demangle.h"
16 
17 #include <algorithm>
18 #include <cassert>
19 #include <cstring>
20 #include <limits>
21 
22 using namespace llvm;
23 using namespace rust_demangle;
24 
25 char *llvm::rustDemangle(const char *MangledName, char *Buf, size_t *N,
26                          int *Status) {
27   if (MangledName == nullptr || (Buf != nullptr && N == nullptr)) {
28     if (Status != nullptr)
29       *Status = demangle_invalid_args;
30     return nullptr;
31   }
32 
33   // Return early if mangled name doesn't look like a Rust symbol.
34   StringView Mangled(MangledName);
35   if (!Mangled.startsWith("_R")) {
36     if (Status != nullptr)
37       *Status = demangle_invalid_mangled_name;
38     return nullptr;
39   }
40 
41   Demangler D;
42   if (!initializeOutputStream(nullptr, nullptr, D.Output, 1024)) {
43     if (Status != nullptr)
44       *Status = demangle_memory_alloc_failure;
45     return nullptr;
46   }
47 
48   if (!D.demangle(Mangled)) {
49     if (Status != nullptr)
50       *Status = demangle_invalid_mangled_name;
51     std::free(D.Output.getBuffer());
52     return nullptr;
53   }
54 
55   D.Output += '\0';
56   char *Demangled = D.Output.getBuffer();
57   size_t DemangledLen = D.Output.getCurrentPosition();
58 
59   if (Buf != nullptr) {
60     if (DemangledLen <= *N) {
61       std::memcpy(Buf, Demangled, DemangledLen);
62       std::free(Demangled);
63       Demangled = Buf;
64     } else {
65       std::free(Buf);
66     }
67   }
68 
69   if (N != nullptr)
70     *N = DemangledLen;
71 
72   if (Status != nullptr)
73     *Status = demangle_success;
74 
75   return Demangled;
76 }
77 
78 Demangler::Demangler(size_t MaxRecursionLevel)
79     : MaxRecursionLevel(MaxRecursionLevel) {}
80 
81 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
82 
83 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
84 
85 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
86 
87 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
88 static inline bool isValid(const char C) {
89   return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
90 }
91 
92 // Demangles Rust v0 mangled symbol. Returns true when successful, and false
93 // otherwise. The demangled symbol is stored in Output field. It is
94 // responsibility of the caller to free the memory behind the output stream.
95 //
96 // <symbol-name> = "_R" <path> [<instantiating-crate>]
97 bool Demangler::demangle(StringView Mangled) {
98   Position = 0;
99   Error = false;
100   RecursionLevel = 0;
101 
102   if (!Mangled.consumeFront("_R")) {
103     Error = true;
104     return false;
105   }
106   Input = Mangled;
107 
108   demanglePath();
109 
110   // FIXME parse optional <instantiating-crate>.
111 
112   if (Position != Input.size())
113     Error = true;
114 
115   return !Error;
116 }
117 
118 // <path> = "C" <identifier>               // crate root
119 //        | "M" <impl-path> <type>         // <T> (inherent impl)
120 //        | "X" <impl-path> <type> <path>  // <T as Trait> (trait impl)
121 //        | "Y" <type> <path>              // <T as Trait> (trait definition)
122 //        | "N" <ns> <path> <identifier>   // ...::ident (nested path)
123 //        | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
124 //        | <backref>
125 // <identifier> = [<disambiguator>] <undisambiguated-identifier>
126 // <ns> = "C"      // closure
127 //      | "S"      // shim
128 //      | <A-Z>    // other special namespaces
129 //      | <a-z>    // internal namespaces
130 void Demangler::demanglePath() {
131   if (Error || RecursionLevel >= MaxRecursionLevel) {
132     Error = true;
133     return;
134   }
135   RecursionLevel += 1;
136 
137   switch (consume()) {
138   case 'C': {
139     parseOptionalBase62Number('s');
140     Identifier Ident = parseIdentifier();
141     print(Ident.Name);
142     break;
143   }
144   case 'N': {
145     char NS = consume();
146     if (!isLower(NS) && !isUpper(NS)) {
147       Error = true;
148       break;
149     }
150     demanglePath();
151 
152     parseOptionalBase62Number('s');
153     Identifier Ident = parseIdentifier();
154 
155     if (!Ident.empty()) {
156       // FIXME print special namespaces:
157       // * "C" closures
158       // * "S" shim
159       print("::");
160       print(Ident.Name);
161     }
162     break;
163   }
164   default:
165     // FIXME parse remaining productions.
166     Error = true;
167     break;
168   }
169 
170   RecursionLevel -= 1;
171 }
172 
173 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
174 Identifier Demangler::parseIdentifier() {
175   bool Punycode = consumeIf('u');
176   uint64_t Bytes = parseDecimalNumber();
177 
178   // Underscore resolves the ambiguity when identifier starts with a decimal
179   // digit or another underscore.
180   consumeIf('_');
181 
182   if (Error || Bytes > Input.size() - Position) {
183     Error = true;
184     return {};
185   }
186   StringView S = Input.substr(Position, Bytes);
187   Position += Bytes;
188 
189   if (!std::all_of(S.begin(), S.end(), isValid)) {
190     Error = true;
191     return {};
192   }
193 
194   return {S, Punycode};
195 }
196 
197 // Parses optional base 62 number. The presence of a number is determined using
198 // Tag.
199 void Demangler::parseOptionalBase62Number(char Tag) {
200   // Parsing result is currently unused.
201   if (consumeIf(Tag))
202     parseBase62Number();
203 }
204 
205 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
206 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
207 // "1_" encodes 2, etc.
208 //
209 // <base-62-number> = {<0-9a-zA-Z>} "_"
210 uint64_t Demangler::parseBase62Number() {
211   if (consumeIf('_'))
212     return 0;
213 
214   uint64_t Value = 0;
215 
216   while (true) {
217     uint64_t Digit;
218     char C = consume();
219 
220     if (C == '_') {
221       break;
222     } else if (isDigit(C)) {
223       Digit = C - '0';
224     } else if (isLower(C)) {
225       Digit = 10 + (C - 'a');
226     } else if (isUpper(C)) {
227       Digit = 10 + 26 + (C - 'A');
228     } else {
229       Error = true;
230       return 0;
231     }
232 
233     if (!mulAssign(Value, 62))
234       return 0;
235 
236     if (!addAssign(Value, Digit))
237       return 0;
238   }
239 
240   if (!addAssign(Value, 1))
241     return 0;
242 
243   return Value;
244 }
245 
246 // Parses a decimal number that had been encoded without any leading zeros.
247 //
248 // <decimal-number> = "0"
249 //                  | <1-9> {<0-9>}
250 uint64_t Demangler::parseDecimalNumber() {
251   char C = look();
252   if (!isDigit(C)) {
253     Error = true;
254     return 0;
255   }
256 
257   if (C == '0') {
258     consume();
259     return 0;
260   }
261 
262   uint64_t Value = 0;
263 
264   while (isDigit(look())) {
265     if (!mulAssign(Value, 10)) {
266       Error = true;
267       return 0;
268     }
269 
270     uint64_t D = consume() - '0';
271     if (!addAssign(Value, D))
272       return 0;
273   }
274 
275   return Value;
276 }
277