1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a demangler for Rust v0 mangled symbols as specified in
10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/Demangle/Demangle.h"
15 #include "llvm/Demangle/StringView.h"
16 #include "llvm/Demangle/Utility.h"
17
18 #include <algorithm>
19 #include <cassert>
20 #include <cstdint>
21 #include <cstring>
22 #include <limits>
23
24 using namespace llvm;
25
26 using llvm::itanium_demangle::OutputBuffer;
27 using llvm::itanium_demangle::ScopedOverride;
28 using llvm::itanium_demangle::StringView;
29
30 namespace {
31
32 struct Identifier {
33 StringView Name;
34 bool Punycode;
35
empty__anon798090d70111::Identifier36 bool empty() const { return Name.empty(); }
37 };
38
39 enum class BasicType {
40 Bool,
41 Char,
42 I8,
43 I16,
44 I32,
45 I64,
46 I128,
47 ISize,
48 U8,
49 U16,
50 U32,
51 U64,
52 U128,
53 USize,
54 F32,
55 F64,
56 Str,
57 Placeholder,
58 Unit,
59 Variadic,
60 Never,
61 };
62
63 enum class IsInType {
64 No,
65 Yes,
66 };
67
68 enum class LeaveGenericsOpen {
69 No,
70 Yes,
71 };
72
73 class Demangler {
74 // Maximum recursion level. Used to avoid stack overflow.
75 size_t MaxRecursionLevel;
76 // Current recursion level.
77 size_t RecursionLevel;
78 size_t BoundLifetimes;
79 // Input string that is being demangled with "_R" prefix removed.
80 StringView Input;
81 // Position in the input string.
82 size_t Position;
83 // When true, print methods append the output to the stream.
84 // When false, the output is suppressed.
85 bool Print;
86 // True if an error occurred.
87 bool Error;
88
89 public:
90 // Demangled output.
91 OutputBuffer Output;
92
93 Demangler(size_t MaxRecursionLevel = 500);
94
95 bool demangle(StringView MangledName);
96
97 private:
98 bool demanglePath(IsInType Type,
99 LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No);
100 void demangleImplPath(IsInType InType);
101 void demangleGenericArg();
102 void demangleType();
103 void demangleFnSig();
104 void demangleDynBounds();
105 void demangleDynTrait();
106 void demangleOptionalBinder();
107 void demangleConst();
108 void demangleConstInt();
109 void demangleConstBool();
110 void demangleConstChar();
111
demangleBackref(Callable Demangler)112 template <typename Callable> void demangleBackref(Callable Demangler) {
113 uint64_t Backref = parseBase62Number();
114 if (Error || Backref >= Position) {
115 Error = true;
116 return;
117 }
118
119 if (!Print)
120 return;
121
122 ScopedOverride<size_t> SavePosition(Position, Position);
123 Position = Backref;
124 Demangler();
125 }
126
127 Identifier parseIdentifier();
128 uint64_t parseOptionalBase62Number(char Tag);
129 uint64_t parseBase62Number();
130 uint64_t parseDecimalNumber();
131 uint64_t parseHexNumber(StringView &HexDigits);
132
133 void print(char C);
134 void print(StringView S);
135 void printDecimalNumber(uint64_t N);
136 void printBasicType(BasicType);
137 void printLifetime(uint64_t Index);
138 void printIdentifier(Identifier Ident);
139
140 char look() const;
141 char consume();
142 bool consumeIf(char Prefix);
143
144 bool addAssign(uint64_t &A, uint64_t B);
145 bool mulAssign(uint64_t &A, uint64_t B);
146 };
147
148 } // namespace
149
rustDemangle(const char * MangledName)150 char *llvm::rustDemangle(const char *MangledName) {
151 if (MangledName == nullptr)
152 return nullptr;
153
154 // Return early if mangled name doesn't look like a Rust symbol.
155 StringView Mangled(MangledName);
156 if (!Mangled.startsWith("_R"))
157 return nullptr;
158
159 Demangler D;
160 if (!initializeOutputBuffer(nullptr, nullptr, D.Output, 1024))
161 return nullptr;
162
163 if (!D.demangle(Mangled)) {
164 std::free(D.Output.getBuffer());
165 return nullptr;
166 }
167
168 D.Output += '\0';
169
170 return D.Output.getBuffer();
171 }
172
Demangler(size_t MaxRecursionLevel)173 Demangler::Demangler(size_t MaxRecursionLevel)
174 : MaxRecursionLevel(MaxRecursionLevel) {}
175
isDigit(const char C)176 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
177
isHexDigit(const char C)178 static inline bool isHexDigit(const char C) {
179 return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
180 }
181
isLower(const char C)182 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
183
isUpper(const char C)184 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
185
186 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
isValid(const char C)187 static inline bool isValid(const char C) {
188 return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
189 }
190
191 // Demangles Rust v0 mangled symbol. Returns true when successful, and false
192 // otherwise. The demangled symbol is stored in Output field. It is
193 // responsibility of the caller to free the memory behind the output stream.
194 //
195 // <symbol-name> = "_R" <path> [<instantiating-crate>]
demangle(StringView Mangled)196 bool Demangler::demangle(StringView Mangled) {
197 Position = 0;
198 Error = false;
199 Print = true;
200 RecursionLevel = 0;
201 BoundLifetimes = 0;
202
203 if (!Mangled.consumeFront("_R")) {
204 Error = true;
205 return false;
206 }
207 size_t Dot = Mangled.find('.');
208 Input = Mangled.substr(0, Dot);
209 StringView Suffix = Mangled.dropFront(Dot);
210
211 demanglePath(IsInType::No);
212
213 if (Position != Input.size()) {
214 ScopedOverride<bool> SavePrint(Print, false);
215 demanglePath(IsInType::No);
216 }
217
218 if (Position != Input.size())
219 Error = true;
220
221 if (!Suffix.empty()) {
222 print(" (");
223 print(Suffix);
224 print(")");
225 }
226
227 return !Error;
228 }
229
230 // Demangles a path. InType indicates whether a path is inside a type. When
231 // LeaveOpen is true, a closing `>` after generic arguments is omitted from the
232 // output. Return value indicates whether generics arguments have been left
233 // open.
234 //
235 // <path> = "C" <identifier> // crate root
236 // | "M" <impl-path> <type> // <T> (inherent impl)
237 // | "X" <impl-path> <type> <path> // <T as Trait> (trait impl)
238 // | "Y" <type> <path> // <T as Trait> (trait definition)
239 // | "N" <ns> <path> <identifier> // ...::ident (nested path)
240 // | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
241 // | <backref>
242 // <identifier> = [<disambiguator>] <undisambiguated-identifier>
243 // <ns> = "C" // closure
244 // | "S" // shim
245 // | <A-Z> // other special namespaces
246 // | <a-z> // internal namespaces
demanglePath(IsInType InType,LeaveGenericsOpen LeaveOpen)247 bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
248 if (Error || RecursionLevel >= MaxRecursionLevel) {
249 Error = true;
250 return false;
251 }
252 ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
253
254 switch (consume()) {
255 case 'C': {
256 parseOptionalBase62Number('s');
257 printIdentifier(parseIdentifier());
258 break;
259 }
260 case 'M': {
261 demangleImplPath(InType);
262 print("<");
263 demangleType();
264 print(">");
265 break;
266 }
267 case 'X': {
268 demangleImplPath(InType);
269 print("<");
270 demangleType();
271 print(" as ");
272 demanglePath(IsInType::Yes);
273 print(">");
274 break;
275 }
276 case 'Y': {
277 print("<");
278 demangleType();
279 print(" as ");
280 demanglePath(IsInType::Yes);
281 print(">");
282 break;
283 }
284 case 'N': {
285 char NS = consume();
286 if (!isLower(NS) && !isUpper(NS)) {
287 Error = true;
288 break;
289 }
290 demanglePath(InType);
291
292 uint64_t Disambiguator = parseOptionalBase62Number('s');
293 Identifier Ident = parseIdentifier();
294
295 if (isUpper(NS)) {
296 // Special namespaces
297 print("::{");
298 if (NS == 'C')
299 print("closure");
300 else if (NS == 'S')
301 print("shim");
302 else
303 print(NS);
304 if (!Ident.empty()) {
305 print(":");
306 printIdentifier(Ident);
307 }
308 print('#');
309 printDecimalNumber(Disambiguator);
310 print('}');
311 } else {
312 // Implementation internal namespaces.
313 if (!Ident.empty()) {
314 print("::");
315 printIdentifier(Ident);
316 }
317 }
318 break;
319 }
320 case 'I': {
321 demanglePath(InType);
322 // Omit "::" when in a type, where it is optional.
323 if (InType == IsInType::No)
324 print("::");
325 print("<");
326 for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
327 if (I > 0)
328 print(", ");
329 demangleGenericArg();
330 }
331 if (LeaveOpen == LeaveGenericsOpen::Yes)
332 return true;
333 else
334 print(">");
335 break;
336 }
337 case 'B': {
338 bool IsOpen = false;
339 demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); });
340 return IsOpen;
341 }
342 default:
343 Error = true;
344 break;
345 }
346
347 return false;
348 }
349
350 // <impl-path> = [<disambiguator>] <path>
351 // <disambiguator> = "s" <base-62-number>
demangleImplPath(IsInType InType)352 void Demangler::demangleImplPath(IsInType InType) {
353 ScopedOverride<bool> SavePrint(Print, false);
354 parseOptionalBase62Number('s');
355 demanglePath(InType);
356 }
357
358 // <generic-arg> = <lifetime>
359 // | <type>
360 // | "K" <const>
361 // <lifetime> = "L" <base-62-number>
demangleGenericArg()362 void Demangler::demangleGenericArg() {
363 if (consumeIf('L'))
364 printLifetime(parseBase62Number());
365 else if (consumeIf('K'))
366 demangleConst();
367 else
368 demangleType();
369 }
370
371 // <basic-type> = "a" // i8
372 // | "b" // bool
373 // | "c" // char
374 // | "d" // f64
375 // | "e" // str
376 // | "f" // f32
377 // | "h" // u8
378 // | "i" // isize
379 // | "j" // usize
380 // | "l" // i32
381 // | "m" // u32
382 // | "n" // i128
383 // | "o" // u128
384 // | "s" // i16
385 // | "t" // u16
386 // | "u" // ()
387 // | "v" // ...
388 // | "x" // i64
389 // | "y" // u64
390 // | "z" // !
391 // | "p" // placeholder (e.g. for generic params), shown as _
parseBasicType(char C,BasicType & Type)392 static bool parseBasicType(char C, BasicType &Type) {
393 switch (C) {
394 case 'a':
395 Type = BasicType::I8;
396 return true;
397 case 'b':
398 Type = BasicType::Bool;
399 return true;
400 case 'c':
401 Type = BasicType::Char;
402 return true;
403 case 'd':
404 Type = BasicType::F64;
405 return true;
406 case 'e':
407 Type = BasicType::Str;
408 return true;
409 case 'f':
410 Type = BasicType::F32;
411 return true;
412 case 'h':
413 Type = BasicType::U8;
414 return true;
415 case 'i':
416 Type = BasicType::ISize;
417 return true;
418 case 'j':
419 Type = BasicType::USize;
420 return true;
421 case 'l':
422 Type = BasicType::I32;
423 return true;
424 case 'm':
425 Type = BasicType::U32;
426 return true;
427 case 'n':
428 Type = BasicType::I128;
429 return true;
430 case 'o':
431 Type = BasicType::U128;
432 return true;
433 case 'p':
434 Type = BasicType::Placeholder;
435 return true;
436 case 's':
437 Type = BasicType::I16;
438 return true;
439 case 't':
440 Type = BasicType::U16;
441 return true;
442 case 'u':
443 Type = BasicType::Unit;
444 return true;
445 case 'v':
446 Type = BasicType::Variadic;
447 return true;
448 case 'x':
449 Type = BasicType::I64;
450 return true;
451 case 'y':
452 Type = BasicType::U64;
453 return true;
454 case 'z':
455 Type = BasicType::Never;
456 return true;
457 default:
458 return false;
459 }
460 }
461
printBasicType(BasicType Type)462 void Demangler::printBasicType(BasicType Type) {
463 switch (Type) {
464 case BasicType::Bool:
465 print("bool");
466 break;
467 case BasicType::Char:
468 print("char");
469 break;
470 case BasicType::I8:
471 print("i8");
472 break;
473 case BasicType::I16:
474 print("i16");
475 break;
476 case BasicType::I32:
477 print("i32");
478 break;
479 case BasicType::I64:
480 print("i64");
481 break;
482 case BasicType::I128:
483 print("i128");
484 break;
485 case BasicType::ISize:
486 print("isize");
487 break;
488 case BasicType::U8:
489 print("u8");
490 break;
491 case BasicType::U16:
492 print("u16");
493 break;
494 case BasicType::U32:
495 print("u32");
496 break;
497 case BasicType::U64:
498 print("u64");
499 break;
500 case BasicType::U128:
501 print("u128");
502 break;
503 case BasicType::USize:
504 print("usize");
505 break;
506 case BasicType::F32:
507 print("f32");
508 break;
509 case BasicType::F64:
510 print("f64");
511 break;
512 case BasicType::Str:
513 print("str");
514 break;
515 case BasicType::Placeholder:
516 print("_");
517 break;
518 case BasicType::Unit:
519 print("()");
520 break;
521 case BasicType::Variadic:
522 print("...");
523 break;
524 case BasicType::Never:
525 print("!");
526 break;
527 }
528 }
529
530 // <type> = | <basic-type>
531 // | <path> // named type
532 // | "A" <type> <const> // [T; N]
533 // | "S" <type> // [T]
534 // | "T" {<type>} "E" // (T1, T2, T3, ...)
535 // | "R" [<lifetime>] <type> // &T
536 // | "Q" [<lifetime>] <type> // &mut T
537 // | "P" <type> // *const T
538 // | "O" <type> // *mut T
539 // | "F" <fn-sig> // fn(...) -> ...
540 // | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
541 // | <backref> // backref
demangleType()542 void Demangler::demangleType() {
543 if (Error || RecursionLevel >= MaxRecursionLevel) {
544 Error = true;
545 return;
546 }
547 ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
548
549 size_t Start = Position;
550 char C = consume();
551 BasicType Type;
552 if (parseBasicType(C, Type))
553 return printBasicType(Type);
554
555 switch (C) {
556 case 'A':
557 print("[");
558 demangleType();
559 print("; ");
560 demangleConst();
561 print("]");
562 break;
563 case 'S':
564 print("[");
565 demangleType();
566 print("]");
567 break;
568 case 'T': {
569 print("(");
570 size_t I = 0;
571 for (; !Error && !consumeIf('E'); ++I) {
572 if (I > 0)
573 print(", ");
574 demangleType();
575 }
576 if (I == 1)
577 print(",");
578 print(")");
579 break;
580 }
581 case 'R':
582 case 'Q':
583 print('&');
584 if (consumeIf('L')) {
585 if (auto Lifetime = parseBase62Number()) {
586 printLifetime(Lifetime);
587 print(' ');
588 }
589 }
590 if (C == 'Q')
591 print("mut ");
592 demangleType();
593 break;
594 case 'P':
595 print("*const ");
596 demangleType();
597 break;
598 case 'O':
599 print("*mut ");
600 demangleType();
601 break;
602 case 'F':
603 demangleFnSig();
604 break;
605 case 'D':
606 demangleDynBounds();
607 if (consumeIf('L')) {
608 if (auto Lifetime = parseBase62Number()) {
609 print(" + ");
610 printLifetime(Lifetime);
611 }
612 } else {
613 Error = true;
614 }
615 break;
616 case 'B':
617 demangleBackref([&] { demangleType(); });
618 break;
619 default:
620 Position = Start;
621 demanglePath(IsInType::Yes);
622 break;
623 }
624 }
625
626 // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>
627 // <abi> = "C"
628 // | <undisambiguated-identifier>
demangleFnSig()629 void Demangler::demangleFnSig() {
630 ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
631 demangleOptionalBinder();
632
633 if (consumeIf('U'))
634 print("unsafe ");
635
636 if (consumeIf('K')) {
637 print("extern \"");
638 if (consumeIf('C')) {
639 print("C");
640 } else {
641 Identifier Ident = parseIdentifier();
642 if (Ident.Punycode)
643 Error = true;
644 for (char C : Ident.Name) {
645 // When mangling ABI string, the "-" is replaced with "_".
646 if (C == '_')
647 C = '-';
648 print(C);
649 }
650 }
651 print("\" ");
652 }
653
654 print("fn(");
655 for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
656 if (I > 0)
657 print(", ");
658 demangleType();
659 }
660 print(")");
661
662 if (consumeIf('u')) {
663 // Skip the unit type from the output.
664 } else {
665 print(" -> ");
666 demangleType();
667 }
668 }
669
670 // <dyn-bounds> = [<binder>] {<dyn-trait>} "E"
demangleDynBounds()671 void Demangler::demangleDynBounds() {
672 ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
673 print("dyn ");
674 demangleOptionalBinder();
675 for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
676 if (I > 0)
677 print(" + ");
678 demangleDynTrait();
679 }
680 }
681
682 // <dyn-trait> = <path> {<dyn-trait-assoc-binding>}
683 // <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
demangleDynTrait()684 void Demangler::demangleDynTrait() {
685 bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes);
686 while (!Error && consumeIf('p')) {
687 if (!IsOpen) {
688 IsOpen = true;
689 print('<');
690 } else {
691 print(", ");
692 }
693 print(parseIdentifier().Name);
694 print(" = ");
695 demangleType();
696 }
697 if (IsOpen)
698 print(">");
699 }
700
701 // Demangles optional binder and updates the number of bound lifetimes.
702 //
703 // <binder> = "G" <base-62-number>
demangleOptionalBinder()704 void Demangler::demangleOptionalBinder() {
705 uint64_t Binder = parseOptionalBase62Number('G');
706 if (Error || Binder == 0)
707 return;
708
709 // In valid inputs each bound lifetime is referenced later. Referencing a
710 // lifetime requires at least one byte of input. Reject inputs that are too
711 // short to reference all bound lifetimes. Otherwise demangling of invalid
712 // binders could generate excessive amounts of output.
713 if (Binder >= Input.size() - BoundLifetimes) {
714 Error = true;
715 return;
716 }
717
718 print("for<");
719 for (size_t I = 0; I != Binder; ++I) {
720 BoundLifetimes += 1;
721 if (I > 0)
722 print(", ");
723 printLifetime(1);
724 }
725 print("> ");
726 }
727
728 // <const> = <basic-type> <const-data>
729 // | "p" // placeholder
730 // | <backref>
demangleConst()731 void Demangler::demangleConst() {
732 if (Error || RecursionLevel >= MaxRecursionLevel) {
733 Error = true;
734 return;
735 }
736 ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
737
738 char C = consume();
739 BasicType Type;
740 if (parseBasicType(C, Type)) {
741 switch (Type) {
742 case BasicType::I8:
743 case BasicType::I16:
744 case BasicType::I32:
745 case BasicType::I64:
746 case BasicType::I128:
747 case BasicType::ISize:
748 case BasicType::U8:
749 case BasicType::U16:
750 case BasicType::U32:
751 case BasicType::U64:
752 case BasicType::U128:
753 case BasicType::USize:
754 demangleConstInt();
755 break;
756 case BasicType::Bool:
757 demangleConstBool();
758 break;
759 case BasicType::Char:
760 demangleConstChar();
761 break;
762 case BasicType::Placeholder:
763 print('_');
764 break;
765 default:
766 Error = true;
767 break;
768 }
769 } else if (C == 'B') {
770 demangleBackref([&] { demangleConst(); });
771 } else {
772 Error = true;
773 }
774 }
775
776 // <const-data> = ["n"] <hex-number>
demangleConstInt()777 void Demangler::demangleConstInt() {
778 if (consumeIf('n'))
779 print('-');
780
781 StringView HexDigits;
782 uint64_t Value = parseHexNumber(HexDigits);
783 if (HexDigits.size() <= 16) {
784 printDecimalNumber(Value);
785 } else {
786 print("0x");
787 print(HexDigits);
788 }
789 }
790
791 // <const-data> = "0_" // false
792 // | "1_" // true
demangleConstBool()793 void Demangler::demangleConstBool() {
794 StringView HexDigits;
795 parseHexNumber(HexDigits);
796 if (HexDigits == "0")
797 print("false");
798 else if (HexDigits == "1")
799 print("true");
800 else
801 Error = true;
802 }
803
804 /// Returns true if CodePoint represents a printable ASCII character.
isAsciiPrintable(uint64_t CodePoint)805 static bool isAsciiPrintable(uint64_t CodePoint) {
806 return 0x20 <= CodePoint && CodePoint <= 0x7e;
807 }
808
809 // <const-data> = <hex-number>
demangleConstChar()810 void Demangler::demangleConstChar() {
811 StringView HexDigits;
812 uint64_t CodePoint = parseHexNumber(HexDigits);
813 if (Error || HexDigits.size() > 6) {
814 Error = true;
815 return;
816 }
817
818 print("'");
819 switch (CodePoint) {
820 case '\t':
821 print(R"(\t)");
822 break;
823 case '\r':
824 print(R"(\r)");
825 break;
826 case '\n':
827 print(R"(\n)");
828 break;
829 case '\\':
830 print(R"(\\)");
831 break;
832 case '"':
833 print(R"(")");
834 break;
835 case '\'':
836 print(R"(\')");
837 break;
838 default:
839 if (isAsciiPrintable(CodePoint)) {
840 char C = CodePoint;
841 print(C);
842 } else {
843 print(R"(\u{)");
844 print(HexDigits);
845 print('}');
846 }
847 break;
848 }
849 print('\'');
850 }
851
852 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
parseIdentifier()853 Identifier Demangler::parseIdentifier() {
854 bool Punycode = consumeIf('u');
855 uint64_t Bytes = parseDecimalNumber();
856
857 // Underscore resolves the ambiguity when identifier starts with a decimal
858 // digit or another underscore.
859 consumeIf('_');
860
861 if (Error || Bytes > Input.size() - Position) {
862 Error = true;
863 return {};
864 }
865 StringView S = Input.substr(Position, Bytes);
866 Position += Bytes;
867
868 if (!std::all_of(S.begin(), S.end(), isValid)) {
869 Error = true;
870 return {};
871 }
872
873 return {S, Punycode};
874 }
875
876 // Parses optional base 62 number. The presence of a number is determined using
877 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise
878 //
879 // This function is indended for parsing disambiguators and binders which when
880 // not present have their value interpreted as 0, and otherwise as decoded
881 // value + 1. For example for binders, value for "G_" is 1, for "G0_" value is
882 // 2. When "G" is absent value is 0.
parseOptionalBase62Number(char Tag)883 uint64_t Demangler::parseOptionalBase62Number(char Tag) {
884 if (!consumeIf(Tag))
885 return 0;
886
887 uint64_t N = parseBase62Number();
888 if (Error || !addAssign(N, 1))
889 return 0;
890
891 return N;
892 }
893
894 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
895 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
896 // "1_" encodes 2, etc.
897 //
898 // <base-62-number> = {<0-9a-zA-Z>} "_"
parseBase62Number()899 uint64_t Demangler::parseBase62Number() {
900 if (consumeIf('_'))
901 return 0;
902
903 uint64_t Value = 0;
904
905 while (true) {
906 uint64_t Digit;
907 char C = consume();
908
909 if (C == '_') {
910 break;
911 } else if (isDigit(C)) {
912 Digit = C - '0';
913 } else if (isLower(C)) {
914 Digit = 10 + (C - 'a');
915 } else if (isUpper(C)) {
916 Digit = 10 + 26 + (C - 'A');
917 } else {
918 Error = true;
919 return 0;
920 }
921
922 if (!mulAssign(Value, 62))
923 return 0;
924
925 if (!addAssign(Value, Digit))
926 return 0;
927 }
928
929 if (!addAssign(Value, 1))
930 return 0;
931
932 return Value;
933 }
934
935 // Parses a decimal number that had been encoded without any leading zeros.
936 //
937 // <decimal-number> = "0"
938 // | <1-9> {<0-9>}
parseDecimalNumber()939 uint64_t Demangler::parseDecimalNumber() {
940 char C = look();
941 if (!isDigit(C)) {
942 Error = true;
943 return 0;
944 }
945
946 if (C == '0') {
947 consume();
948 return 0;
949 }
950
951 uint64_t Value = 0;
952
953 while (isDigit(look())) {
954 if (!mulAssign(Value, 10)) {
955 Error = true;
956 return 0;
957 }
958
959 uint64_t D = consume() - '0';
960 if (!addAssign(Value, D))
961 return 0;
962 }
963
964 return Value;
965 }
966
967 // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
968 // value and stores hex digits in HexDigits. The return value is unspecified if
969 // HexDigits.size() > 16.
970 //
971 // <hex-number> = "0_"
972 // | <1-9a-f> {<0-9a-f>} "_"
parseHexNumber(StringView & HexDigits)973 uint64_t Demangler::parseHexNumber(StringView &HexDigits) {
974 size_t Start = Position;
975 uint64_t Value = 0;
976
977 if (!isHexDigit(look()))
978 Error = true;
979
980 if (consumeIf('0')) {
981 if (!consumeIf('_'))
982 Error = true;
983 } else {
984 while (!Error && !consumeIf('_')) {
985 char C = consume();
986 Value *= 16;
987 if (isDigit(C))
988 Value += C - '0';
989 else if ('a' <= C && C <= 'f')
990 Value += 10 + (C - 'a');
991 else
992 Error = true;
993 }
994 }
995
996 if (Error) {
997 HexDigits = StringView();
998 return 0;
999 }
1000
1001 size_t End = Position - 1;
1002 assert(Start < End);
1003 HexDigits = Input.substr(Start, End - Start);
1004 return Value;
1005 }
1006
print(char C)1007 void Demangler::print(char C) {
1008 if (Error || !Print)
1009 return;
1010
1011 Output += C;
1012 }
1013
print(StringView S)1014 void Demangler::print(StringView S) {
1015 if (Error || !Print)
1016 return;
1017
1018 Output += S;
1019 }
1020
printDecimalNumber(uint64_t N)1021 void Demangler::printDecimalNumber(uint64_t N) {
1022 if (Error || !Print)
1023 return;
1024
1025 Output << N;
1026 }
1027
1028 // Prints a lifetime. An index 0 always represents an erased lifetime. Indices
1029 // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes
1030 // bound by one of the enclosing binders.
printLifetime(uint64_t Index)1031 void Demangler::printLifetime(uint64_t Index) {
1032 if (Index == 0) {
1033 print("'_");
1034 return;
1035 }
1036
1037 if (Index - 1 >= BoundLifetimes) {
1038 Error = true;
1039 return;
1040 }
1041
1042 uint64_t Depth = BoundLifetimes - Index;
1043 print('\'');
1044 if (Depth < 26) {
1045 char C = 'a' + Depth;
1046 print(C);
1047 } else {
1048 print('z');
1049 printDecimalNumber(Depth - 26 + 1);
1050 }
1051 }
1052
decodePunycodeDigit(char C,size_t & Value)1053 static inline bool decodePunycodeDigit(char C, size_t &Value) {
1054 if (isLower(C)) {
1055 Value = C - 'a';
1056 return true;
1057 }
1058
1059 if (isDigit(C)) {
1060 Value = 26 + (C - '0');
1061 return true;
1062 }
1063
1064 return false;
1065 }
1066
removeNullBytes(OutputBuffer & Output,size_t StartIdx)1067 static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) {
1068 char *Buffer = Output.getBuffer();
1069 char *Start = Buffer + StartIdx;
1070 char *End = Buffer + Output.getCurrentPosition();
1071 Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer);
1072 }
1073
1074 // Encodes code point as UTF-8 and stores results in Output. Returns false if
1075 // CodePoint is not a valid unicode scalar value.
encodeUTF8(size_t CodePoint,char * Output)1076 static inline bool encodeUTF8(size_t CodePoint, char *Output) {
1077 if (0xD800 <= CodePoint && CodePoint <= 0xDFFF)
1078 return false;
1079
1080 if (CodePoint <= 0x7F) {
1081 Output[0] = CodePoint;
1082 return true;
1083 }
1084
1085 if (CodePoint <= 0x7FF) {
1086 Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F);
1087 Output[1] = 0x80 | (CodePoint & 0x3F);
1088 return true;
1089 }
1090
1091 if (CodePoint <= 0xFFFF) {
1092 Output[0] = 0xE0 | (CodePoint >> 12);
1093 Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F);
1094 Output[2] = 0x80 | (CodePoint & 0x3F);
1095 return true;
1096 }
1097
1098 if (CodePoint <= 0x10FFFF) {
1099 Output[0] = 0xF0 | (CodePoint >> 18);
1100 Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F);
1101 Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F);
1102 Output[3] = 0x80 | (CodePoint & 0x3F);
1103 return true;
1104 }
1105
1106 return false;
1107 }
1108
1109 // Decodes string encoded using punycode and appends results to Output.
1110 // Returns true if decoding was successful.
decodePunycode(StringView Input,OutputBuffer & Output)1111 static bool decodePunycode(StringView Input, OutputBuffer &Output) {
1112 size_t OutputSize = Output.getCurrentPosition();
1113 size_t InputIdx = 0;
1114
1115 // Rust uses an underscore as a delimiter.
1116 size_t DelimiterPos = StringView::npos;
1117 for (size_t I = 0; I != Input.size(); ++I)
1118 if (Input[I] == '_')
1119 DelimiterPos = I;
1120
1121 if (DelimiterPos != StringView::npos) {
1122 // Copy basic code points before the last delimiter to the output.
1123 for (; InputIdx != DelimiterPos; ++InputIdx) {
1124 char C = Input[InputIdx];
1125 if (!isValid(C))
1126 return false;
1127 // Code points are padded with zeros while decoding is in progress.
1128 char UTF8[4] = {C};
1129 Output += StringView(UTF8, UTF8 + 4);
1130 }
1131 // Skip over the delimiter.
1132 ++InputIdx;
1133 }
1134
1135 size_t Base = 36;
1136 size_t Skew = 38;
1137 size_t Bias = 72;
1138 size_t N = 0x80;
1139 size_t TMin = 1;
1140 size_t TMax = 26;
1141 size_t Damp = 700;
1142
1143 auto Adapt = [&](size_t Delta, size_t NumPoints) {
1144 Delta /= Damp;
1145 Delta += Delta / NumPoints;
1146 Damp = 2;
1147
1148 size_t K = 0;
1149 while (Delta > (Base - TMin) * TMax / 2) {
1150 Delta /= Base - TMin;
1151 K += Base;
1152 }
1153 return K + (((Base - TMin + 1) * Delta) / (Delta + Skew));
1154 };
1155
1156 // Main decoding loop.
1157 for (size_t I = 0; InputIdx != Input.size(); I += 1) {
1158 size_t OldI = I;
1159 size_t W = 1;
1160 size_t Max = std::numeric_limits<size_t>::max();
1161 for (size_t K = Base; true; K += Base) {
1162 if (InputIdx == Input.size())
1163 return false;
1164 char C = Input[InputIdx++];
1165 size_t Digit = 0;
1166 if (!decodePunycodeDigit(C, Digit))
1167 return false;
1168
1169 if (Digit > (Max - I) / W)
1170 return false;
1171 I += Digit * W;
1172
1173 size_t T;
1174 if (K <= Bias)
1175 T = TMin;
1176 else if (K >= Bias + TMax)
1177 T = TMax;
1178 else
1179 T = K - Bias;
1180
1181 if (Digit < T)
1182 break;
1183
1184 if (W > Max / (Base - T))
1185 return false;
1186 W *= (Base - T);
1187 }
1188 size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1;
1189 Bias = Adapt(I - OldI, NumPoints);
1190
1191 if (I / NumPoints > Max - N)
1192 return false;
1193 N += I / NumPoints;
1194 I = I % NumPoints;
1195
1196 // Insert N at position I in the output.
1197 char UTF8[4] = {};
1198 if (!encodeUTF8(N, UTF8))
1199 return false;
1200 Output.insert(OutputSize + I * 4, UTF8, 4);
1201 }
1202
1203 removeNullBytes(Output, OutputSize);
1204 return true;
1205 }
1206
printIdentifier(Identifier Ident)1207 void Demangler::printIdentifier(Identifier Ident) {
1208 if (Error || !Print)
1209 return;
1210
1211 if (Ident.Punycode) {
1212 if (!decodePunycode(Ident.Name, Output))
1213 Error = true;
1214 } else {
1215 print(Ident.Name);
1216 }
1217 }
1218
look() const1219 char Demangler::look() const {
1220 if (Error || Position >= Input.size())
1221 return 0;
1222
1223 return Input[Position];
1224 }
1225
consume()1226 char Demangler::consume() {
1227 if (Error || Position >= Input.size()) {
1228 Error = true;
1229 return 0;
1230 }
1231
1232 return Input[Position++];
1233 }
1234
consumeIf(char Prefix)1235 bool Demangler::consumeIf(char Prefix) {
1236 if (Error || Position >= Input.size() || Input[Position] != Prefix)
1237 return false;
1238
1239 Position += 1;
1240 return true;
1241 }
1242
1243 /// Computes A + B. When computation wraps around sets the error and returns
1244 /// false. Otherwise assigns the result to A and returns true.
addAssign(uint64_t & A,uint64_t B)1245 bool Demangler::addAssign(uint64_t &A, uint64_t B) {
1246 if (A > std::numeric_limits<uint64_t>::max() - B) {
1247 Error = true;
1248 return false;
1249 }
1250
1251 A += B;
1252 return true;
1253 }
1254
1255 /// Computes A * B. When computation wraps around sets the error and returns
1256 /// false. Otherwise assigns the result to A and returns true.
mulAssign(uint64_t & A,uint64_t B)1257 bool Demangler::mulAssign(uint64_t &A, uint64_t B) {
1258 if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) {
1259 Error = true;
1260 return false;
1261 }
1262
1263 A *= B;
1264 return true;
1265 }
1266