1 //===- MicrosoftDemangle.cpp ----------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is dual licensed under the MIT and the University of Illinois Open 6 // Source Licenses. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines a demangler for MSVC-style mangled symbols. 11 // 12 // This file has no dependencies on the rest of LLVM so that it can be 13 // easily reused in other programs such as libcxxabi. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Demangle/Demangle.h" 18 19 #include "Compiler.h" 20 #include "StringView.h" 21 #include "Utility.h" 22 23 #include <cctype> 24 #include <cstdio> 25 #include <tuple> 26 27 // This memory allocator is extremely fast, but it doesn't call dtors 28 // for allocated objects. That means you can't use STL containers 29 // (such as std::vector) with this allocator. But it pays off -- 30 // the demangler is 3x faster with this allocator compared to one with 31 // STL containers. 32 namespace { 33 constexpr size_t AllocUnit = 4096; 34 35 class ArenaAllocator { 36 struct AllocatorNode { 37 uint8_t *Buf = nullptr; 38 size_t Used = 0; 39 size_t Capacity = 0; 40 AllocatorNode *Next = nullptr; 41 }; 42 43 void addNode(size_t Capacity) { 44 AllocatorNode *NewHead = new AllocatorNode; 45 NewHead->Buf = new uint8_t[Capacity]; 46 NewHead->Next = Head; 47 NewHead->Capacity = Capacity; 48 Head = NewHead; 49 NewHead->Used = 0; 50 } 51 52 public: 53 ArenaAllocator() { addNode(AllocUnit); } 54 55 ~ArenaAllocator() { 56 while (Head) { 57 assert(Head->Buf); 58 delete[] Head->Buf; 59 AllocatorNode *Next = Head->Next; 60 delete Head; 61 Head = Next; 62 } 63 } 64 65 char *allocUnalignedBuffer(size_t Length) { 66 uint8_t *Buf = Head->Buf + Head->Used; 67 68 Head->Used += Length; 69 if (Head->Used > Head->Capacity) { 70 // It's possible we need a buffer which is larger than our default unit 71 // size, so we need to be careful to add a node with capacity that is at 72 // least as large as what we need. 73 addNode(std::max(AllocUnit, Length)); 74 Head->Used = Length; 75 Buf = Head->Buf; 76 } 77 78 return reinterpret_cast<char *>(Buf); 79 } 80 81 template <typename T, typename... Args> T *alloc(Args &&... ConstructorArgs) { 82 83 size_t Size = sizeof(T); 84 assert(Head && Head->Buf); 85 86 size_t P = (size_t)Head->Buf + Head->Used; 87 uintptr_t AlignedP = 88 (((size_t)P + alignof(T) - 1) & ~(size_t)(alignof(T) - 1)); 89 uint8_t *PP = (uint8_t *)AlignedP; 90 size_t Adjustment = AlignedP - P; 91 92 Head->Used += Size + Adjustment; 93 if (Head->Used < Head->Capacity) 94 return new (PP) T(std::forward<Args>(ConstructorArgs)...); 95 96 addNode(AllocUnit); 97 Head->Used = Size; 98 return new (Head->Buf) T(std::forward<Args>(ConstructorArgs)...); 99 } 100 101 private: 102 AllocatorNode *Head = nullptr; 103 }; 104 } // namespace 105 106 static bool startsWithDigit(StringView S) { 107 return !S.empty() && std::isdigit(S.front()); 108 } 109 110 // Writes a space if the last token does not end with a punctuation. 111 static void outputSpaceIfNecessary(OutputStream &OS) { 112 if (OS.empty()) 113 return; 114 115 char C = OS.back(); 116 if (isalnum(C) || C == '>') 117 OS << " "; 118 } 119 120 // Storage classes 121 enum Qualifiers : uint8_t { 122 Q_None = 0, 123 Q_Const = 1 << 0, 124 Q_Volatile = 1 << 1, 125 Q_Far = 1 << 2, 126 Q_Huge = 1 << 3, 127 Q_Unaligned = 1 << 4, 128 Q_Restrict = 1 << 5, 129 Q_Pointer64 = 1 << 6 130 }; 131 132 enum class StorageClass : uint8_t { 133 None, 134 PrivateStatic, 135 ProtectedStatic, 136 PublicStatic, 137 Global, 138 FunctionLocalStatic 139 }; 140 141 enum class QualifierMangleMode { Drop, Mangle, Result }; 142 143 enum class PointerAffinity { Pointer, Reference, RValueReference }; 144 145 // Calling conventions 146 enum class CallingConv : uint8_t { 147 None, 148 Cdecl, 149 Pascal, 150 Thiscall, 151 Stdcall, 152 Fastcall, 153 Clrcall, 154 Eabi, 155 Vectorcall, 156 Regcall, 157 }; 158 159 enum class ReferenceKind : uint8_t { None, LValueRef, RValueRef }; 160 161 // Types 162 enum class PrimTy : uint8_t { 163 Unknown, 164 None, 165 Function, 166 Ptr, 167 MemberPtr, 168 Array, 169 170 Struct, 171 Union, 172 Class, 173 Enum, 174 175 Void, 176 Bool, 177 Char, 178 Schar, 179 Uchar, 180 Char16, 181 Char32, 182 Short, 183 Ushort, 184 Int, 185 Uint, 186 Long, 187 Ulong, 188 Int64, 189 Uint64, 190 Wchar, 191 Float, 192 Double, 193 Ldouble, 194 Nullptr 195 }; 196 197 // Function classes 198 enum FuncClass : uint8_t { 199 Public = 1 << 0, 200 Protected = 1 << 1, 201 Private = 1 << 2, 202 Global = 1 << 3, 203 Static = 1 << 4, 204 Virtual = 1 << 5, 205 Far = 1 << 6, 206 }; 207 208 enum NameBackrefBehavior : uint8_t { 209 NBB_None = 0, // don't save any names as backrefs. 210 NBB_Template = 1 << 0, // save template instanations. 211 NBB_Simple = 1 << 1, // save simple names. 212 }; 213 214 enum class SymbolCategory { Function, Variable }; 215 216 namespace { 217 218 struct NameResolver { 219 virtual ~NameResolver() = default; 220 virtual StringView resolve(StringView S) = 0; 221 }; 222 223 struct Type; 224 struct Name; 225 226 struct FunctionParams { 227 bool IsVariadic = false; 228 229 Type *Current = nullptr; 230 231 FunctionParams *Next = nullptr; 232 }; 233 234 struct TemplateParams { 235 bool IsTemplateTemplate = false; 236 bool IsAliasTemplate = false; 237 238 // Type can be null if this is a template template parameter. In that case 239 // only Name will be valid. 240 Type *ParamType = nullptr; 241 242 // Name can be valid if this is a template template parameter (see above) or 243 // this is a function declaration (e.g. foo<&SomeFunc>). In the latter case 244 // Name contains the name of the function and Type contains the signature. 245 Name *ParamName = nullptr; 246 247 TemplateParams *Next = nullptr; 248 }; 249 250 // The type class. Mangled symbols are first parsed and converted to 251 // this type and then converted to string. 252 struct Type { 253 virtual ~Type() {} 254 255 virtual Type *clone(ArenaAllocator &Arena) const; 256 257 // Write the "first half" of a given type. This is a static functions to 258 // give the code a chance to do processing that is common to a subset of 259 // subclasses 260 static void outputPre(OutputStream &OS, Type &Ty, NameResolver &Resolver); 261 262 // Write the "second half" of a given type. This is a static functions to 263 // give the code a chance to do processing that is common to a subset of 264 // subclasses 265 static void outputPost(OutputStream &OS, Type &Ty, NameResolver &Resolver); 266 267 virtual void outputPre(OutputStream &OS, NameResolver &Resolver); 268 virtual void outputPost(OutputStream &OS, NameResolver &Resolver); 269 270 // Primitive type such as Int. 271 PrimTy Prim = PrimTy::Unknown; 272 273 Qualifiers Quals = Q_None; 274 StorageClass Storage = StorageClass::None; // storage class 275 }; 276 277 // Represents an identifier which may be a template. 278 struct Name { 279 // Name read from an MangledName string. 280 StringView Str; 281 282 bool IsTemplateInstantiation = false; 283 bool IsOperator = false; 284 bool IsBackReference = false; 285 286 // Template parameters. Only valid if Flags contains NF_TemplateInstantiation. 287 TemplateParams *TParams = nullptr; 288 289 // Nested BackReferences (e.g. "A::B::C") are represented as a linked list. 290 Name *Next = nullptr; 291 }; 292 293 struct PointerType : public Type { 294 Type *clone(ArenaAllocator &Arena) const override; 295 void outputPre(OutputStream &OS, NameResolver &Resolver) override; 296 void outputPost(OutputStream &OS, NameResolver &Resolver) override; 297 298 PointerAffinity Affinity; 299 300 // Represents a type X in "a pointer to X", "a reference to X", 301 // "an array of X", or "a function returning X". 302 Type *Pointee = nullptr; 303 }; 304 305 struct MemberPointerType : public Type { 306 Type *clone(ArenaAllocator &Arena) const override; 307 void outputPre(OutputStream &OS, NameResolver &Resolver) override; 308 void outputPost(OutputStream &OS, NameResolver &Resolver) override; 309 310 Name *MemberName = nullptr; 311 312 // Represents a type X in "a pointer to X", "a reference to X", 313 // "an array of X", or "a function returning X". 314 Type *Pointee = nullptr; 315 }; 316 317 struct FunctionType : public Type { 318 Type *clone(ArenaAllocator &Arena) const override; 319 void outputPre(OutputStream &OS, NameResolver &Resolver) override; 320 void outputPost(OutputStream &OS, NameResolver &Resolver) override; 321 322 // True if this FunctionType instance is the Pointee of a PointerType or 323 // MemberPointerType. 324 bool IsFunctionPointer = false; 325 326 Type *ReturnType = nullptr; 327 // If this is a reference, the type of reference. 328 ReferenceKind RefKind; 329 330 CallingConv CallConvention; 331 FuncClass FunctionClass; 332 333 FunctionParams Params; 334 }; 335 336 struct UdtType : public Type { 337 Type *clone(ArenaAllocator &Arena) const override; 338 void outputPre(OutputStream &OS, NameResolver &Resolver) override; 339 340 Name *UdtName = nullptr; 341 }; 342 343 struct ArrayType : public Type { 344 Type *clone(ArenaAllocator &Arena) const override; 345 void outputPre(OutputStream &OS, NameResolver &Resolver) override; 346 void outputPost(OutputStream &OS, NameResolver &Resolver) override; 347 348 // Either NextDimension or ElementType will be valid. 349 ArrayType *NextDimension = nullptr; 350 uint32_t ArrayDimension = 0; 351 352 Type *ElementType = nullptr; 353 }; 354 355 } // namespace 356 357 static bool isMemberPointer(StringView MangledName) { 358 switch (MangledName.popFront()) { 359 case '$': 360 // This is probably an rvalue reference (e.g. $$Q), and you cannot have an 361 // rvalue reference to a member. 362 return false; 363 case 'A': 364 // 'A' indicates a reference, and you cannot have a reference to a member 365 // function or member. 366 return false; 367 case 'P': 368 case 'Q': 369 case 'R': 370 case 'S': 371 // These 4 values indicate some kind of pointer, but we still don't know 372 // what. 373 break; 374 default: 375 assert(false && "Ty is not a pointer type!"); 376 } 377 378 // If it starts with a number, then 6 indicates a non-member function 379 // pointer, and 8 indicates a member function pointer. 380 if (startsWithDigit(MangledName)) { 381 assert(MangledName[0] == '6' || MangledName[0] == '8'); 382 return (MangledName[0] == '8'); 383 } 384 385 // Remove ext qualifiers since those can appear on either type and are 386 // therefore not indicative. 387 MangledName.consumeFront('E'); // 64-bit 388 MangledName.consumeFront('I'); // restrict 389 MangledName.consumeFront('F'); // unaligned 390 391 assert(!MangledName.empty()); 392 393 // The next value should be either ABCD (non-member) or QRST (member). 394 switch (MangledName.front()) { 395 case 'A': 396 case 'B': 397 case 'C': 398 case 'D': 399 return false; 400 case 'Q': 401 case 'R': 402 case 'S': 403 case 'T': 404 return true; 405 default: 406 assert(false); 407 } 408 return false; 409 } 410 411 static void outputCallingConvention(OutputStream &OS, CallingConv CC) { 412 outputSpaceIfNecessary(OS); 413 414 switch (CC) { 415 case CallingConv::Cdecl: 416 OS << "__cdecl"; 417 break; 418 case CallingConv::Fastcall: 419 OS << "__fastcall"; 420 break; 421 case CallingConv::Pascal: 422 OS << "__pascal"; 423 break; 424 case CallingConv::Regcall: 425 OS << "__regcall"; 426 break; 427 case CallingConv::Stdcall: 428 OS << "__stdcall"; 429 break; 430 case CallingConv::Thiscall: 431 OS << "__thiscall"; 432 break; 433 case CallingConv::Eabi: 434 OS << "__eabi"; 435 break; 436 case CallingConv::Vectorcall: 437 OS << "__vectorcall"; 438 break; 439 case CallingConv::Clrcall: 440 OS << "__clrcall"; 441 break; 442 default: 443 break; 444 } 445 } 446 447 static bool startsWithLocalScopePattern(StringView S) { 448 if (!S.consumeFront('?')) 449 return false; 450 if (S.size() < 2) 451 return false; 452 453 size_t End = S.find('?'); 454 if (End == StringView::npos) 455 return false; 456 StringView Candidate = S.substr(0, End); 457 if (Candidate.empty()) 458 return false; 459 460 // \?[0-9]\? 461 // ?@? is the discriminator 0. 462 if (Candidate.size() == 1) 463 return Candidate[0] == '@' || (Candidate[0] >= '0' && Candidate[0] <= '9'); 464 465 // If it's not 0-9, then it's an encoded number terminated with an @ 466 if (Candidate.back() != '@') 467 return false; 468 Candidate = Candidate.dropBack(); 469 470 // An encoded number starts with B-P and all subsequent digits are in A-P. 471 // Note that the reason the first digit cannot be A is two fold. First, it 472 // would create an ambiguity with ?A which delimits the beginning of an 473 // anonymous namespace. Second, A represents 0, and you don't start a multi 474 // digit number with a leading 0. Presumably the anonymous namespace 475 // ambiguity is also why single digit encoded numbers use 0-9 rather than A-J. 476 if (Candidate[0] < 'B' || Candidate[0] > 'P') 477 return false; 478 Candidate = Candidate.dropFront(); 479 while (!Candidate.empty()) { 480 if (Candidate[0] < 'A' || Candidate[0] > 'P') 481 return false; 482 Candidate = Candidate.dropFront(); 483 } 484 485 return true; 486 } 487 488 // Write a function or template parameter list. 489 static void outputParameterList(OutputStream &OS, const FunctionParams &Params, 490 NameResolver &Resolver) { 491 if (!Params.Current) { 492 OS << "void"; 493 return; 494 } 495 496 const FunctionParams *Head = &Params; 497 while (Head) { 498 Type::outputPre(OS, *Head->Current, Resolver); 499 Type::outputPost(OS, *Head->Current, Resolver); 500 501 Head = Head->Next; 502 503 if (Head) 504 OS << ", "; 505 } 506 } 507 508 static void outputName(OutputStream &OS, const Name *TheName, 509 NameResolver &Resolver); 510 511 static void outputParameterList(OutputStream &OS, const TemplateParams &Params, 512 NameResolver &Resolver) { 513 if (!Params.ParamType && !Params.ParamName) { 514 OS << "<>"; 515 return; 516 } 517 518 OS << "<"; 519 const TemplateParams *Head = &Params; 520 while (Head) { 521 // Type can be null if this is a template template parameter, 522 // and Name can be null if this is a simple type. 523 524 if (Head->ParamType && Head->ParamName) { 525 // Function pointer. 526 OS << "&"; 527 Type::outputPre(OS, *Head->ParamType, Resolver); 528 outputName(OS, Head->ParamName, Resolver); 529 Type::outputPost(OS, *Head->ParamType, Resolver); 530 } else if (Head->ParamType) { 531 // simple type. 532 Type::outputPre(OS, *Head->ParamType, Resolver); 533 Type::outputPost(OS, *Head->ParamType, Resolver); 534 } else { 535 // Template alias. 536 outputName(OS, Head->ParamName, Resolver); 537 } 538 539 Head = Head->Next; 540 541 if (Head) 542 OS << ", "; 543 } 544 OS << ">"; 545 } 546 547 static void outputNameComponent(OutputStream &OS, const Name &N, 548 NameResolver &Resolver) { 549 StringView S = N.Str; 550 551 if (N.IsBackReference) 552 S = Resolver.resolve(N.Str); 553 OS << S; 554 555 if (N.IsTemplateInstantiation) 556 outputParameterList(OS, *N.TParams, Resolver); 557 } 558 559 static void outputName(OutputStream &OS, const Name *TheName, 560 NameResolver &Resolver) { 561 if (!TheName) 562 return; 563 564 outputSpaceIfNecessary(OS); 565 566 const Name *Previous = nullptr; 567 // Print out namespaces or outer class BackReferences. 568 for (; TheName->Next; TheName = TheName->Next) { 569 Previous = TheName; 570 outputNameComponent(OS, *TheName, Resolver); 571 OS << "::"; 572 } 573 574 // Print out a regular name. 575 if (!TheName->IsOperator) { 576 outputNameComponent(OS, *TheName, Resolver); 577 return; 578 } 579 580 // Print out ctor or dtor. 581 if (TheName->Str == "dtor") 582 OS << "~"; 583 584 if (TheName->Str == "ctor" || TheName->Str == "dtor") { 585 outputNameComponent(OS, *Previous, Resolver); 586 return; 587 } 588 589 // Print out an overloaded operator. 590 OS << "operator"; 591 outputNameComponent(OS, *TheName, Resolver); 592 } 593 594 namespace { 595 596 Type *Type::clone(ArenaAllocator &Arena) const { 597 return Arena.alloc<Type>(*this); 598 } 599 600 // Write the "first half" of a given type. 601 void Type::outputPre(OutputStream &OS, Type &Ty, NameResolver &Resolver) { 602 // Function types require custom handling of const and static so we 603 // handle them separately. All other types use the same decoration 604 // for these modifiers, so handle them here in common code. 605 if (Ty.Prim == PrimTy::Function) { 606 Ty.outputPre(OS, Resolver); 607 return; 608 } 609 610 switch (Ty.Storage) { 611 case StorageClass::PrivateStatic: 612 case StorageClass::PublicStatic: 613 case StorageClass::ProtectedStatic: 614 OS << "static "; 615 default: 616 break; 617 } 618 Ty.outputPre(OS, Resolver); 619 620 if (Ty.Quals & Q_Const) { 621 outputSpaceIfNecessary(OS); 622 OS << "const"; 623 } 624 625 if (Ty.Quals & Q_Volatile) { 626 outputSpaceIfNecessary(OS); 627 OS << "volatile"; 628 } 629 630 if (Ty.Quals & Q_Restrict) { 631 outputSpaceIfNecessary(OS); 632 OS << "__restrict"; 633 } 634 } 635 636 // Write the "second half" of a given type. 637 void Type::outputPost(OutputStream &OS, Type &Ty, NameResolver &Resolver) { 638 Ty.outputPost(OS, Resolver); 639 } 640 641 void Type::outputPre(OutputStream &OS, NameResolver &Resolver) { 642 switch (Prim) { 643 case PrimTy::Void: 644 OS << "void"; 645 break; 646 case PrimTy::Bool: 647 OS << "bool"; 648 break; 649 case PrimTy::Char: 650 OS << "char"; 651 break; 652 case PrimTy::Schar: 653 OS << "signed char"; 654 break; 655 case PrimTy::Uchar: 656 OS << "unsigned char"; 657 break; 658 case PrimTy::Char16: 659 OS << "char16_t"; 660 break; 661 case PrimTy::Char32: 662 OS << "char32_t"; 663 break; 664 case PrimTy::Short: 665 OS << "short"; 666 break; 667 case PrimTy::Ushort: 668 OS << "unsigned short"; 669 break; 670 case PrimTy::Int: 671 OS << "int"; 672 break; 673 case PrimTy::Uint: 674 OS << "unsigned int"; 675 break; 676 case PrimTy::Long: 677 OS << "long"; 678 break; 679 case PrimTy::Ulong: 680 OS << "unsigned long"; 681 break; 682 case PrimTy::Int64: 683 OS << "__int64"; 684 break; 685 case PrimTy::Uint64: 686 OS << "unsigned __int64"; 687 break; 688 case PrimTy::Wchar: 689 OS << "wchar_t"; 690 break; 691 case PrimTy::Float: 692 OS << "float"; 693 break; 694 case PrimTy::Double: 695 OS << "double"; 696 break; 697 case PrimTy::Ldouble: 698 OS << "long double"; 699 break; 700 case PrimTy::Nullptr: 701 OS << "std::nullptr_t"; 702 break; 703 default: 704 assert(false && "Invalid primitive type!"); 705 } 706 } 707 void Type::outputPost(OutputStream &OS, NameResolver &Resolver) {} 708 709 Type *PointerType::clone(ArenaAllocator &Arena) const { 710 return Arena.alloc<PointerType>(*this); 711 } 712 713 static void outputPointerIndicator(OutputStream &OS, PointerAffinity Affinity, 714 const Name *MemberName, const Type *Pointee, 715 NameResolver &Resolver) { 716 // "[]" and "()" (for function parameters) take precedence over "*", 717 // so "int *x(int)" means "x is a function returning int *". We need 718 // parentheses to supercede the default precedence. (e.g. we want to 719 // emit something like "int (*x)(int)".) 720 if (Pointee->Prim == PrimTy::Function || Pointee->Prim == PrimTy::Array) { 721 OS << "("; 722 if (Pointee->Prim == PrimTy::Function) { 723 const FunctionType *FTy = static_cast<const FunctionType *>(Pointee); 724 assert(FTy->IsFunctionPointer); 725 outputCallingConvention(OS, FTy->CallConvention); 726 OS << " "; 727 } 728 } 729 730 if (MemberName) { 731 outputName(OS, MemberName, Resolver); 732 OS << "::"; 733 } 734 735 if (Affinity == PointerAffinity::Pointer) 736 OS << "*"; 737 else if (Affinity == PointerAffinity::Reference) 738 OS << "&"; 739 else 740 OS << "&&"; 741 } 742 743 void PointerType::outputPre(OutputStream &OS, NameResolver &Resolver) { 744 Type::outputPre(OS, *Pointee, Resolver); 745 746 outputSpaceIfNecessary(OS); 747 748 if (Quals & Q_Unaligned) 749 OS << "__unaligned "; 750 751 outputPointerIndicator(OS, Affinity, nullptr, Pointee, Resolver); 752 753 // FIXME: We should output this, but it requires updating lots of tests. 754 // if (Ty.Quals & Q_Pointer64) 755 // OS << " __ptr64"; 756 } 757 758 void PointerType::outputPost(OutputStream &OS, NameResolver &Resolver) { 759 if (Pointee->Prim == PrimTy::Function || Pointee->Prim == PrimTy::Array) 760 OS << ")"; 761 762 Type::outputPost(OS, *Pointee, Resolver); 763 } 764 765 Type *MemberPointerType::clone(ArenaAllocator &Arena) const { 766 return Arena.alloc<MemberPointerType>(*this); 767 } 768 769 void MemberPointerType::outputPre(OutputStream &OS, NameResolver &Resolver) { 770 Type::outputPre(OS, *Pointee, Resolver); 771 772 outputSpaceIfNecessary(OS); 773 774 outputPointerIndicator(OS, PointerAffinity::Pointer, MemberName, Pointee, 775 Resolver); 776 777 // FIXME: We should output this, but it requires updating lots of tests. 778 // if (Ty.Quals & Q_Pointer64) 779 // OS << " __ptr64"; 780 if (Quals & Q_Restrict) 781 OS << " __restrict"; 782 } 783 784 void MemberPointerType::outputPost(OutputStream &OS, NameResolver &Resolver) { 785 if (Pointee->Prim == PrimTy::Function || Pointee->Prim == PrimTy::Array) 786 OS << ")"; 787 788 Type::outputPost(OS, *Pointee, Resolver); 789 } 790 791 Type *FunctionType::clone(ArenaAllocator &Arena) const { 792 return Arena.alloc<FunctionType>(*this); 793 } 794 795 void FunctionType::outputPre(OutputStream &OS, NameResolver &Resolver) { 796 if (!(FunctionClass & Global)) { 797 if (FunctionClass & Static) 798 OS << "static "; 799 } 800 801 if (ReturnType) { 802 Type::outputPre(OS, *ReturnType, Resolver); 803 OS << " "; 804 } 805 806 // Function pointers print the calling convention as void (__cdecl *)(params) 807 // rather than void __cdecl (*)(params). So we need to let the PointerType 808 // class handle this. 809 if (!IsFunctionPointer) 810 outputCallingConvention(OS, CallConvention); 811 } 812 813 void FunctionType::outputPost(OutputStream &OS, NameResolver &Resolver) { 814 OS << "("; 815 outputParameterList(OS, Params, Resolver); 816 OS << ")"; 817 if (Quals & Q_Const) 818 OS << " const"; 819 if (Quals & Q_Volatile) 820 OS << " volatile"; 821 if (Quals & Q_Restrict) 822 OS << " __restrict"; 823 if (Quals & Q_Unaligned) 824 OS << " __unaligned"; 825 826 if (RefKind == ReferenceKind::LValueRef) 827 OS << " &"; 828 else if (RefKind == ReferenceKind::RValueRef) 829 OS << " &&"; 830 831 if (ReturnType) 832 Type::outputPost(OS, *ReturnType, Resolver); 833 return; 834 } 835 836 Type *UdtType::clone(ArenaAllocator &Arena) const { 837 return Arena.alloc<UdtType>(*this); 838 } 839 840 void UdtType::outputPre(OutputStream &OS, NameResolver &Resolver) { 841 switch (Prim) { 842 case PrimTy::Class: 843 OS << "class "; 844 break; 845 case PrimTy::Struct: 846 OS << "struct "; 847 break; 848 case PrimTy::Union: 849 OS << "union "; 850 break; 851 case PrimTy::Enum: 852 OS << "enum "; 853 break; 854 default: 855 assert(false && "Not a udt type!"); 856 } 857 858 outputName(OS, UdtName, Resolver); 859 } 860 861 Type *ArrayType::clone(ArenaAllocator &Arena) const { 862 return Arena.alloc<ArrayType>(*this); 863 } 864 865 void ArrayType::outputPre(OutputStream &OS, NameResolver &Resolver) { 866 Type::outputPre(OS, *ElementType, Resolver); 867 } 868 869 void ArrayType::outputPost(OutputStream &OS, NameResolver &Resolver) { 870 if (ArrayDimension > 0) 871 OS << "[" << ArrayDimension << "]"; 872 if (NextDimension) 873 Type::outputPost(OS, *NextDimension, Resolver); 874 else if (ElementType) 875 Type::outputPost(OS, *ElementType, Resolver); 876 } 877 878 struct Symbol { 879 SymbolCategory Category; 880 881 Name *SymbolName = nullptr; 882 Type *SymbolType = nullptr; 883 }; 884 885 } // namespace 886 887 namespace { 888 889 // Demangler class takes the main role in demangling symbols. 890 // It has a set of functions to parse mangled symbols into Type instances. 891 // It also has a set of functions to cnovert Type instances to strings. 892 class Demangler : public NameResolver { 893 public: 894 Demangler() = default; 895 virtual ~Demangler() = default; 896 897 // You are supposed to call parse() first and then check if error is true. If 898 // it is false, call output() to write the formatted name to the given stream. 899 Symbol *parse(StringView &MangledName); 900 void output(const Symbol *S, OutputStream &OS); 901 902 StringView resolve(StringView N) override; 903 904 // True if an error occurred. 905 bool Error = false; 906 907 void dumpBackReferences(); 908 909 private: 910 Type *demangleVariableEncoding(StringView &MangledName); 911 Type *demangleFunctionEncoding(StringView &MangledName); 912 913 Qualifiers demanglePointerExtQualifiers(StringView &MangledName); 914 915 // Parser functions. This is a recursive-descent parser. 916 Type *demangleType(StringView &MangledName, QualifierMangleMode QMM); 917 Type *demangleBasicType(StringView &MangledName); 918 UdtType *demangleClassType(StringView &MangledName); 919 PointerType *demanglePointerType(StringView &MangledName); 920 MemberPointerType *demangleMemberPointerType(StringView &MangledName); 921 FunctionType *demangleFunctionType(StringView &MangledName, bool HasThisQuals, 922 bool IsFunctionPointer); 923 924 ArrayType *demangleArrayType(StringView &MangledName); 925 926 TemplateParams *demangleTemplateParameterList(StringView &MangledName); 927 FunctionParams demangleFunctionParameterList(StringView &MangledName); 928 929 int demangleNumber(StringView &MangledName); 930 931 void memorizeString(StringView s); 932 933 /// Allocate a copy of \p Borrowed into memory that we own. 934 StringView copyString(StringView Borrowed); 935 936 Name *demangleFullyQualifiedTypeName(StringView &MangledName); 937 Name *demangleFullyQualifiedSymbolName(StringView &MangledName); 938 939 Name *demangleUnqualifiedTypeName(StringView &MangledName, bool Memorize); 940 Name *demangleUnqualifiedSymbolName(StringView &MangledName, 941 NameBackrefBehavior NBB); 942 943 Name *demangleNameScopeChain(StringView &MangledName, Name *UnqualifiedName); 944 Name *demangleNameScopePiece(StringView &MangledName); 945 946 Name *demangleBackRefName(StringView &MangledName); 947 Name *demangleTemplateInstantiationName(StringView &MangledName, 948 NameBackrefBehavior NBB); 949 Name *demangleOperatorName(StringView &MangledName); 950 Name *demangleSimpleName(StringView &MangledName, bool Memorize); 951 Name *demangleAnonymousNamespaceName(StringView &MangledName); 952 Name *demangleLocallyScopedNamePiece(StringView &MangledName); 953 954 StringView demangleSimpleString(StringView &MangledName, bool Memorize); 955 956 FuncClass demangleFunctionClass(StringView &MangledName); 957 CallingConv demangleCallingConvention(StringView &MangledName); 958 StorageClass demangleVariableStorageClass(StringView &MangledName); 959 ReferenceKind demangleReferenceKind(StringView &MangledName); 960 void demangleThrowSpecification(StringView &MangledName); 961 962 std::pair<Qualifiers, bool> demangleQualifiers(StringView &MangledName); 963 964 // Memory allocator. 965 ArenaAllocator Arena; 966 967 // A single type uses one global back-ref table for all function params. 968 // This means back-refs can even go "into" other types. Examples: 969 // 970 // // Second int* is a back-ref to first. 971 // void foo(int *, int*); 972 // 973 // // Second int* is not a back-ref to first (first is not a function param). 974 // int* foo(int*); 975 // 976 // // Second int* is a back-ref to first (ALL function types share the same 977 // // back-ref map. 978 // using F = void(*)(int*); 979 // F G(int *); 980 Type *FunctionParamBackRefs[10]; 981 size_t FunctionParamBackRefCount = 0; 982 983 // The first 10 BackReferences in a mangled name can be back-referenced by 984 // special name @[0-9]. This is a storage for the first 10 BackReferences. 985 StringView BackReferences[10]; 986 size_t BackRefCount = 0; 987 }; 988 } // namespace 989 990 StringView Demangler::copyString(StringView Borrowed) { 991 char *Stable = Arena.allocUnalignedBuffer(Borrowed.size() + 1); 992 std::strcpy(Stable, Borrowed.begin()); 993 994 return {Stable, Borrowed.size()}; 995 } 996 997 // Parser entry point. 998 Symbol *Demangler::parse(StringView &MangledName) { 999 Symbol *S = Arena.alloc<Symbol>(); 1000 1001 // MSVC-style mangled symbols must start with '?'. 1002 if (!MangledName.consumeFront("?")) { 1003 S->SymbolName = Arena.alloc<Name>(); 1004 S->SymbolName->Str = MangledName; 1005 S->SymbolType = Arena.alloc<Type>(); 1006 S->SymbolType->Prim = PrimTy::Unknown; 1007 return S; 1008 } 1009 1010 // What follows is a main symbol name. This may include 1011 // namespaces or class BackReferences. 1012 S->SymbolName = demangleFullyQualifiedSymbolName(MangledName); 1013 if (Error) 1014 return nullptr; 1015 // Read a variable. 1016 if (startsWithDigit(MangledName)) { 1017 S->Category = SymbolCategory::Variable; 1018 S->SymbolType = demangleVariableEncoding(MangledName); 1019 } else { 1020 S->Category = SymbolCategory::Function; 1021 S->SymbolType = demangleFunctionEncoding(MangledName); 1022 } 1023 1024 if (Error) 1025 return nullptr; 1026 1027 return S; 1028 } 1029 1030 // <type-encoding> ::= <storage-class> <variable-type> 1031 // <storage-class> ::= 0 # private static member 1032 // ::= 1 # protected static member 1033 // ::= 2 # public static member 1034 // ::= 3 # global 1035 // ::= 4 # static local 1036 1037 Type *Demangler::demangleVariableEncoding(StringView &MangledName) { 1038 StorageClass SC = demangleVariableStorageClass(MangledName); 1039 1040 Type *Ty = demangleType(MangledName, QualifierMangleMode::Drop); 1041 1042 Ty->Storage = SC; 1043 1044 // <variable-type> ::= <type> <cvr-qualifiers> 1045 // ::= <type> <pointee-cvr-qualifiers> # pointers, references 1046 switch (Ty->Prim) { 1047 case PrimTy::Ptr: 1048 case PrimTy::MemberPtr: { 1049 Qualifiers ExtraChildQuals = Q_None; 1050 Ty->Quals = 1051 Qualifiers(Ty->Quals | demanglePointerExtQualifiers(MangledName)); 1052 1053 bool IsMember = false; 1054 std::tie(ExtraChildQuals, IsMember) = demangleQualifiers(MangledName); 1055 1056 if (Ty->Prim == PrimTy::MemberPtr) { 1057 assert(IsMember); 1058 Name *BackRefName = demangleFullyQualifiedTypeName(MangledName); 1059 (void)BackRefName; 1060 MemberPointerType *MPTy = static_cast<MemberPointerType *>(Ty); 1061 MPTy->Pointee->Quals = Qualifiers(MPTy->Pointee->Quals | ExtraChildQuals); 1062 } else { 1063 PointerType *PTy = static_cast<PointerType *>(Ty); 1064 PTy->Pointee->Quals = Qualifiers(PTy->Pointee->Quals | ExtraChildQuals); 1065 } 1066 1067 break; 1068 } 1069 default: 1070 Ty->Quals = demangleQualifiers(MangledName).first; 1071 break; 1072 } 1073 1074 return Ty; 1075 } 1076 1077 // Sometimes numbers are encoded in mangled symbols. For example, 1078 // "int (*x)[20]" is a valid C type (x is a pointer to an array of 1079 // length 20), so we need some way to embed numbers as part of symbols. 1080 // This function parses it. 1081 // 1082 // <number> ::= [?] <non-negative integer> 1083 // 1084 // <non-negative integer> ::= <decimal digit> # when 1 <= Number <= 10 1085 // ::= <hex digit>+ @ # when Numbrer == 0 or >= 10 1086 // 1087 // <hex-digit> ::= [A-P] # A = 0, B = 1, ... 1088 int Demangler::demangleNumber(StringView &MangledName) { 1089 bool neg = MangledName.consumeFront("?"); 1090 1091 if (startsWithDigit(MangledName)) { 1092 int32_t Ret = MangledName[0] - '0' + 1; 1093 MangledName = MangledName.dropFront(1); 1094 return neg ? -Ret : Ret; 1095 } 1096 1097 int Ret = 0; 1098 for (size_t i = 0; i < MangledName.size(); ++i) { 1099 char C = MangledName[i]; 1100 if (C == '@') { 1101 MangledName = MangledName.dropFront(i + 1); 1102 return neg ? -Ret : Ret; 1103 } 1104 if ('A' <= C && C <= 'P') { 1105 Ret = (Ret << 4) + (C - 'A'); 1106 continue; 1107 } 1108 break; 1109 } 1110 1111 Error = true; 1112 return 0; 1113 } 1114 1115 // First 10 strings can be referenced by special BackReferences ?0, ?1, ..., ?9. 1116 // Memorize it. 1117 void Demangler::memorizeString(StringView S) { 1118 if (BackRefCount >= sizeof(BackReferences) / sizeof(*BackReferences)) 1119 return; 1120 for (size_t i = 0; i < BackRefCount; ++i) 1121 if (S == BackReferences[i]) 1122 return; 1123 BackReferences[BackRefCount++] = S; 1124 } 1125 1126 Name *Demangler::demangleBackRefName(StringView &MangledName) { 1127 assert(startsWithDigit(MangledName)); 1128 Name *Node = Arena.alloc<Name>(); 1129 Node->IsBackReference = true; 1130 Node->Str = {MangledName.begin(), 1}; 1131 MangledName = MangledName.dropFront(); 1132 return Node; 1133 } 1134 1135 Name *Demangler::demangleTemplateInstantiationName(StringView &MangledName, 1136 NameBackrefBehavior NBB) { 1137 assert(MangledName.startsWith("?$")); 1138 MangledName.consumeFront("?$"); 1139 1140 Name *Node = demangleUnqualifiedSymbolName(MangledName, NBB_None); 1141 if (Error) 1142 return nullptr; 1143 1144 Node->TParams = demangleTemplateParameterList(MangledName); 1145 if (Error) 1146 return nullptr; 1147 1148 Node->IsTemplateInstantiation = true; 1149 1150 if (NBB & NBB_Template) { 1151 // Render this class template name into a string buffer so that we can 1152 // memorize it for the purpose of back-referencing. 1153 OutputStream OS = OutputStream::create(nullptr, nullptr, 1024); 1154 outputName(OS, Node, *this); 1155 OS << '\0'; 1156 char *Name = OS.getBuffer(); 1157 1158 StringView Owned = copyString(Name); 1159 memorizeString(Owned); 1160 std::free(Name); 1161 } 1162 1163 return Node; 1164 } 1165 1166 Name *Demangler::demangleOperatorName(StringView &MangledName) { 1167 assert(MangledName.startsWith('?')); 1168 MangledName.consumeFront('?'); 1169 1170 auto NameString = [this, &MangledName]() -> StringView { 1171 switch (MangledName.popFront()) { 1172 case '0': 1173 return "ctor"; 1174 case '1': 1175 return "dtor"; 1176 case '2': 1177 return " new"; 1178 case '3': 1179 return " delete"; 1180 case '4': 1181 return "="; 1182 case '5': 1183 return ">>"; 1184 case '6': 1185 return "<<"; 1186 case '7': 1187 return "!"; 1188 case '8': 1189 return "=="; 1190 case '9': 1191 return "!="; 1192 case 'A': 1193 return "[]"; 1194 case 'C': 1195 return "->"; 1196 case 'D': 1197 return "*"; 1198 case 'E': 1199 return "++"; 1200 case 'F': 1201 return "--"; 1202 case 'G': 1203 return "-"; 1204 case 'H': 1205 return "+"; 1206 case 'I': 1207 return "&"; 1208 case 'J': 1209 return "->*"; 1210 case 'K': 1211 return "/"; 1212 case 'L': 1213 return "%"; 1214 case 'M': 1215 return "<"; 1216 case 'N': 1217 return "<="; 1218 case 'O': 1219 return ">"; 1220 case 'P': 1221 return ">="; 1222 case 'Q': 1223 return ","; 1224 case 'R': 1225 return "()"; 1226 case 'S': 1227 return "~"; 1228 case 'T': 1229 return "^"; 1230 case 'U': 1231 return "|"; 1232 case 'V': 1233 return "&&"; 1234 case 'W': 1235 return "||"; 1236 case 'X': 1237 return "*="; 1238 case 'Y': 1239 return "+="; 1240 case 'Z': 1241 return "-="; 1242 case '_': { 1243 if (MangledName.empty()) 1244 break; 1245 1246 switch (MangledName.popFront()) { 1247 case '0': 1248 return "/="; 1249 case '1': 1250 return "%="; 1251 case '2': 1252 return ">>="; 1253 case '3': 1254 return "<<="; 1255 case '4': 1256 return "&="; 1257 case '5': 1258 return "|="; 1259 case '6': 1260 return "^="; 1261 case 'U': 1262 return " new[]"; 1263 case 'V': 1264 return " delete[]"; 1265 case '_': 1266 if (MangledName.consumeFront("L")) 1267 return " co_await"; 1268 if (MangledName.consumeFront("K")) { 1269 size_t EndPos = MangledName.find('@'); 1270 if (EndPos == StringView::npos) 1271 break; 1272 StringView OpName = demangleSimpleString(MangledName, false); 1273 size_t FullSize = OpName.size() + 3; // <space>""OpName 1274 char *Buffer = Arena.allocUnalignedBuffer(FullSize); 1275 Buffer[0] = ' '; 1276 Buffer[1] = '"'; 1277 Buffer[2] = '"'; 1278 std::memcpy(Buffer + 3, OpName.begin(), OpName.size()); 1279 return {Buffer, FullSize}; 1280 } 1281 } 1282 } 1283 } 1284 Error = true; 1285 return ""; 1286 }; 1287 1288 Name *Node = Arena.alloc<Name>(); 1289 Node->Str = NameString(); 1290 Node->IsOperator = true; 1291 return Node; 1292 } 1293 1294 Name *Demangler::demangleSimpleName(StringView &MangledName, bool Memorize) { 1295 StringView S = demangleSimpleString(MangledName, Memorize); 1296 if (Error) 1297 return nullptr; 1298 1299 Name *Node = Arena.alloc<Name>(); 1300 Node->Str = S; 1301 return Node; 1302 } 1303 1304 StringView Demangler::demangleSimpleString(StringView &MangledName, 1305 bool Memorize) { 1306 StringView S; 1307 for (size_t i = 0; i < MangledName.size(); ++i) { 1308 if (MangledName[i] != '@') 1309 continue; 1310 S = MangledName.substr(0, i); 1311 MangledName = MangledName.dropFront(i + 1); 1312 1313 if (Memorize) 1314 memorizeString(S); 1315 return S; 1316 } 1317 1318 Error = true; 1319 return {}; 1320 } 1321 1322 Name *Demangler::demangleAnonymousNamespaceName(StringView &MangledName) { 1323 assert(MangledName.startsWith("?A")); 1324 MangledName.consumeFront("?A"); 1325 1326 Name *Node = Arena.alloc<Name>(); 1327 Node->Str = "`anonymous namespace'"; 1328 if (MangledName.consumeFront('@')) 1329 return Node; 1330 1331 Error = true; 1332 return nullptr; 1333 } 1334 1335 Name *Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) { 1336 assert(startsWithLocalScopePattern(MangledName)); 1337 1338 Name *Node = Arena.alloc<Name>(); 1339 MangledName.consumeFront('?'); 1340 int ScopeIdentifier = demangleNumber(MangledName); 1341 1342 // One ? to terminate the number 1343 MangledName.consumeFront('?'); 1344 1345 assert(!Error); 1346 Symbol *Scope = parse(MangledName); 1347 if (Error) 1348 return nullptr; 1349 1350 // Render the parent symbol's name into a buffer. 1351 OutputStream OS = OutputStream::create(nullptr, nullptr, 1024); 1352 OS << '`'; 1353 output(Scope, OS); 1354 OS << '\''; 1355 OS << "::`" << ScopeIdentifier << "'"; 1356 OS << '\0'; 1357 char *Result = OS.getBuffer(); 1358 Node->Str = copyString(Result); 1359 std::free(Result); 1360 return Node; 1361 } 1362 1363 // Parses a type name in the form of A@B@C@@ which represents C::B::A. 1364 Name *Demangler::demangleFullyQualifiedTypeName(StringView &MangledName) { 1365 Name *TypeName = demangleUnqualifiedTypeName(MangledName, true); 1366 if (Error) 1367 return nullptr; 1368 assert(TypeName); 1369 1370 Name *QualName = demangleNameScopeChain(MangledName, TypeName); 1371 if (Error) 1372 return nullptr; 1373 assert(QualName); 1374 return QualName; 1375 } 1376 1377 // Parses a symbol name in the form of A@B@C@@ which represents C::B::A. 1378 // Symbol names have slightly different rules regarding what can appear 1379 // so we separate out the implementations for flexibility. 1380 Name *Demangler::demangleFullyQualifiedSymbolName(StringView &MangledName) { 1381 // This is the final component of a symbol name (i.e. the leftmost component 1382 // of a mangled name. Since the only possible template instantiation that 1383 // can appear in this context is a function template, and since those are 1384 // not saved for the purposes of name backreferences, only backref simple 1385 // names. 1386 Name *SymbolName = demangleUnqualifiedSymbolName(MangledName, NBB_Simple); 1387 if (Error) 1388 return nullptr; 1389 assert(SymbolName); 1390 1391 Name *QualName = demangleNameScopeChain(MangledName, SymbolName); 1392 if (Error) 1393 return nullptr; 1394 assert(QualName); 1395 return QualName; 1396 } 1397 1398 Name *Demangler::demangleUnqualifiedTypeName(StringView &MangledName, 1399 bool Memorize) { 1400 // An inner-most name can be a back-reference, because a fully-qualified name 1401 // (e.g. Scope + Inner) can contain other fully qualified names inside of 1402 // them (for example template parameters), and these nested parameters can 1403 // refer to previously mangled types. 1404 if (startsWithDigit(MangledName)) 1405 return demangleBackRefName(MangledName); 1406 1407 if (MangledName.startsWith("?$")) 1408 return demangleTemplateInstantiationName(MangledName, NBB_Template); 1409 1410 return demangleSimpleName(MangledName, Memorize); 1411 } 1412 1413 Name *Demangler::demangleUnqualifiedSymbolName(StringView &MangledName, 1414 NameBackrefBehavior NBB) { 1415 if (startsWithDigit(MangledName)) 1416 return demangleBackRefName(MangledName); 1417 if (MangledName.startsWith("?$")) 1418 return demangleTemplateInstantiationName(MangledName, NBB); 1419 if (MangledName.startsWith('?')) 1420 return demangleOperatorName(MangledName); 1421 return demangleSimpleName(MangledName, (NBB & NBB_Simple) != 0); 1422 } 1423 1424 Name *Demangler::demangleNameScopePiece(StringView &MangledName) { 1425 if (startsWithDigit(MangledName)) 1426 return demangleBackRefName(MangledName); 1427 1428 if (MangledName.startsWith("?$")) 1429 return demangleTemplateInstantiationName(MangledName, NBB_Template); 1430 1431 if (MangledName.startsWith("?A")) 1432 return demangleAnonymousNamespaceName(MangledName); 1433 1434 if (startsWithLocalScopePattern(MangledName)) 1435 return demangleLocallyScopedNamePiece(MangledName); 1436 1437 return demangleSimpleName(MangledName, true); 1438 } 1439 1440 Name *Demangler::demangleNameScopeChain(StringView &MangledName, 1441 Name *UnqualifiedName) { 1442 Name *Head = UnqualifiedName; 1443 1444 while (!MangledName.consumeFront("@")) { 1445 if (MangledName.empty()) { 1446 Error = true; 1447 return nullptr; 1448 } 1449 1450 assert(!Error); 1451 Name *Elem = demangleNameScopePiece(MangledName); 1452 if (Error) 1453 return nullptr; 1454 1455 Elem->Next = Head; 1456 Head = Elem; 1457 } 1458 return Head; 1459 } 1460 1461 FuncClass Demangler::demangleFunctionClass(StringView &MangledName) { 1462 SwapAndRestore<StringView> RestoreOnError(MangledName, MangledName); 1463 RestoreOnError.shouldRestore(false); 1464 1465 switch (MangledName.popFront()) { 1466 case 'A': 1467 return Private; 1468 case 'B': 1469 return FuncClass(Private | Far); 1470 case 'C': 1471 return FuncClass(Private | Static); 1472 case 'D': 1473 return FuncClass(Private | Static); 1474 case 'E': 1475 return FuncClass(Private | Virtual); 1476 case 'F': 1477 return FuncClass(Private | Virtual); 1478 case 'I': 1479 return Protected; 1480 case 'J': 1481 return FuncClass(Protected | Far); 1482 case 'K': 1483 return FuncClass(Protected | Static); 1484 case 'L': 1485 return FuncClass(Protected | Static | Far); 1486 case 'M': 1487 return FuncClass(Protected | Virtual); 1488 case 'N': 1489 return FuncClass(Protected | Virtual | Far); 1490 case 'Q': 1491 return Public; 1492 case 'R': 1493 return FuncClass(Public | Far); 1494 case 'S': 1495 return FuncClass(Public | Static); 1496 case 'T': 1497 return FuncClass(Public | Static | Far); 1498 case 'U': 1499 return FuncClass(Public | Virtual); 1500 case 'V': 1501 return FuncClass(Public | Virtual | Far); 1502 case 'Y': 1503 return Global; 1504 case 'Z': 1505 return FuncClass(Global | Far); 1506 } 1507 1508 Error = true; 1509 RestoreOnError.shouldRestore(true); 1510 return Public; 1511 } 1512 1513 CallingConv Demangler::demangleCallingConvention(StringView &MangledName) { 1514 switch (MangledName.popFront()) { 1515 case 'A': 1516 case 'B': 1517 return CallingConv::Cdecl; 1518 case 'C': 1519 case 'D': 1520 return CallingConv::Pascal; 1521 case 'E': 1522 case 'F': 1523 return CallingConv::Thiscall; 1524 case 'G': 1525 case 'H': 1526 return CallingConv::Stdcall; 1527 case 'I': 1528 case 'J': 1529 return CallingConv::Fastcall; 1530 case 'M': 1531 case 'N': 1532 return CallingConv::Clrcall; 1533 case 'O': 1534 case 'P': 1535 return CallingConv::Eabi; 1536 case 'Q': 1537 return CallingConv::Vectorcall; 1538 } 1539 1540 return CallingConv::None; 1541 } 1542 1543 StorageClass Demangler::demangleVariableStorageClass(StringView &MangledName) { 1544 assert(std::isdigit(MangledName.front())); 1545 1546 switch (MangledName.popFront()) { 1547 case '0': 1548 return StorageClass::PrivateStatic; 1549 case '1': 1550 return StorageClass::ProtectedStatic; 1551 case '2': 1552 return StorageClass::PublicStatic; 1553 case '3': 1554 return StorageClass::Global; 1555 case '4': 1556 return StorageClass::FunctionLocalStatic; 1557 } 1558 Error = true; 1559 return StorageClass::None; 1560 } 1561 1562 std::pair<Qualifiers, bool> 1563 Demangler::demangleQualifiers(StringView &MangledName) { 1564 1565 switch (MangledName.popFront()) { 1566 // Member qualifiers 1567 case 'Q': 1568 return std::make_pair(Q_None, true); 1569 case 'R': 1570 return std::make_pair(Q_Const, true); 1571 case 'S': 1572 return std::make_pair(Q_Volatile, true); 1573 case 'T': 1574 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), true); 1575 // Non-Member qualifiers 1576 case 'A': 1577 return std::make_pair(Q_None, false); 1578 case 'B': 1579 return std::make_pair(Q_Const, false); 1580 case 'C': 1581 return std::make_pair(Q_Volatile, false); 1582 case 'D': 1583 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), false); 1584 } 1585 Error = true; 1586 return std::make_pair(Q_None, false); 1587 } 1588 1589 static bool isTagType(StringView S) { 1590 switch (S.front()) { 1591 case 'T': // union 1592 case 'U': // struct 1593 case 'V': // class 1594 case 'W': // enum 1595 return true; 1596 } 1597 return false; 1598 } 1599 1600 static bool isPointerType(StringView S) { 1601 if (S.startsWith("$$Q")) // foo && 1602 return true; 1603 1604 switch (S.front()) { 1605 case 'A': // foo & 1606 case 'P': // foo * 1607 case 'Q': // foo *const 1608 case 'R': // foo *volatile 1609 case 'S': // foo *const volatile 1610 return true; 1611 } 1612 return false; 1613 } 1614 1615 static bool isArrayType(StringView S) { return S[0] == 'Y'; } 1616 1617 static bool isFunctionType(StringView S) { 1618 return S.startsWith("$$A8@@") || S.startsWith("$$A6"); 1619 } 1620 1621 // <variable-type> ::= <type> <cvr-qualifiers> 1622 // ::= <type> <pointee-cvr-qualifiers> # pointers, references 1623 Type *Demangler::demangleType(StringView &MangledName, 1624 QualifierMangleMode QMM) { 1625 Qualifiers Quals = Q_None; 1626 bool IsMember = false; 1627 bool IsMemberKnown = false; 1628 if (QMM == QualifierMangleMode::Mangle) { 1629 std::tie(Quals, IsMember) = demangleQualifiers(MangledName); 1630 IsMemberKnown = true; 1631 } else if (QMM == QualifierMangleMode::Result) { 1632 if (MangledName.consumeFront('?')) { 1633 std::tie(Quals, IsMember) = demangleQualifiers(MangledName); 1634 IsMemberKnown = true; 1635 } 1636 } 1637 1638 Type *Ty = nullptr; 1639 if (isTagType(MangledName)) 1640 Ty = demangleClassType(MangledName); 1641 else if (isPointerType(MangledName)) { 1642 if (!IsMemberKnown) 1643 IsMember = isMemberPointer(MangledName); 1644 1645 if (IsMember) 1646 Ty = demangleMemberPointerType(MangledName); 1647 else 1648 Ty = demanglePointerType(MangledName); 1649 } else if (isArrayType(MangledName)) 1650 Ty = demangleArrayType(MangledName); 1651 else if (isFunctionType(MangledName)) { 1652 if (MangledName.consumeFront("$$A8@@")) 1653 Ty = demangleFunctionType(MangledName, true, false); 1654 else { 1655 assert(MangledName.startsWith("$$A6")); 1656 MangledName.consumeFront("$$A6"); 1657 Ty = demangleFunctionType(MangledName, false, false); 1658 } 1659 } else { 1660 Ty = demangleBasicType(MangledName); 1661 assert(Ty && !Error); 1662 if (!Ty || Error) 1663 return Ty; 1664 } 1665 1666 Ty->Quals = Qualifiers(Ty->Quals | Quals); 1667 return Ty; 1668 } 1669 1670 ReferenceKind Demangler::demangleReferenceKind(StringView &MangledName) { 1671 if (MangledName.consumeFront('G')) 1672 return ReferenceKind::LValueRef; 1673 else if (MangledName.consumeFront('H')) 1674 return ReferenceKind::RValueRef; 1675 return ReferenceKind::None; 1676 } 1677 1678 void Demangler::demangleThrowSpecification(StringView &MangledName) { 1679 if (MangledName.consumeFront('Z')) 1680 return; 1681 1682 Error = true; 1683 } 1684 1685 FunctionType *Demangler::demangleFunctionType(StringView &MangledName, 1686 bool HasThisQuals, 1687 bool IsFunctionPointer) { 1688 FunctionType *FTy = Arena.alloc<FunctionType>(); 1689 FTy->Prim = PrimTy::Function; 1690 FTy->IsFunctionPointer = IsFunctionPointer; 1691 1692 if (HasThisQuals) { 1693 FTy->Quals = demanglePointerExtQualifiers(MangledName); 1694 FTy->RefKind = demangleReferenceKind(MangledName); 1695 FTy->Quals = Qualifiers(FTy->Quals | demangleQualifiers(MangledName).first); 1696 } 1697 1698 // Fields that appear on both member and non-member functions. 1699 FTy->CallConvention = demangleCallingConvention(MangledName); 1700 1701 // <return-type> ::= <type> 1702 // ::= @ # structors (they have no declared return type) 1703 bool IsStructor = MangledName.consumeFront('@'); 1704 if (!IsStructor) 1705 FTy->ReturnType = demangleType(MangledName, QualifierMangleMode::Result); 1706 1707 FTy->Params = demangleFunctionParameterList(MangledName); 1708 1709 demangleThrowSpecification(MangledName); 1710 1711 return FTy; 1712 } 1713 1714 Type *Demangler::demangleFunctionEncoding(StringView &MangledName) { 1715 FuncClass FC = demangleFunctionClass(MangledName); 1716 1717 bool HasThisQuals = !(FC & (Global | Static)); 1718 FunctionType *FTy = demangleFunctionType(MangledName, HasThisQuals, false); 1719 FTy->FunctionClass = FC; 1720 1721 return FTy; 1722 } 1723 1724 // Reads a primitive type. 1725 Type *Demangler::demangleBasicType(StringView &MangledName) { 1726 Type *Ty = Arena.alloc<Type>(); 1727 1728 if (MangledName.consumeFront("$$T")) { 1729 Ty->Prim = PrimTy::Nullptr; 1730 return Ty; 1731 } 1732 1733 switch (MangledName.popFront()) { 1734 case 'X': 1735 Ty->Prim = PrimTy::Void; 1736 break; 1737 case 'D': 1738 Ty->Prim = PrimTy::Char; 1739 break; 1740 case 'C': 1741 Ty->Prim = PrimTy::Schar; 1742 break; 1743 case 'E': 1744 Ty->Prim = PrimTy::Uchar; 1745 break; 1746 case 'F': 1747 Ty->Prim = PrimTy::Short; 1748 break; 1749 case 'G': 1750 Ty->Prim = PrimTy::Ushort; 1751 break; 1752 case 'H': 1753 Ty->Prim = PrimTy::Int; 1754 break; 1755 case 'I': 1756 Ty->Prim = PrimTy::Uint; 1757 break; 1758 case 'J': 1759 Ty->Prim = PrimTy::Long; 1760 break; 1761 case 'K': 1762 Ty->Prim = PrimTy::Ulong; 1763 break; 1764 case 'M': 1765 Ty->Prim = PrimTy::Float; 1766 break; 1767 case 'N': 1768 Ty->Prim = PrimTy::Double; 1769 break; 1770 case 'O': 1771 Ty->Prim = PrimTy::Ldouble; 1772 break; 1773 case '_': { 1774 if (MangledName.empty()) { 1775 Error = true; 1776 return nullptr; 1777 } 1778 switch (MangledName.popFront()) { 1779 case 'N': 1780 Ty->Prim = PrimTy::Bool; 1781 break; 1782 case 'J': 1783 Ty->Prim = PrimTy::Int64; 1784 break; 1785 case 'K': 1786 Ty->Prim = PrimTy::Uint64; 1787 break; 1788 case 'W': 1789 Ty->Prim = PrimTy::Wchar; 1790 break; 1791 case 'S': 1792 Ty->Prim = PrimTy::Char16; 1793 break; 1794 case 'U': 1795 Ty->Prim = PrimTy::Char32; 1796 break; 1797 default: 1798 Error = true; 1799 return nullptr; 1800 } 1801 break; 1802 } 1803 default: 1804 Error = true; 1805 return nullptr; 1806 } 1807 return Ty; 1808 } 1809 1810 UdtType *Demangler::demangleClassType(StringView &MangledName) { 1811 UdtType *UTy = Arena.alloc<UdtType>(); 1812 1813 switch (MangledName.popFront()) { 1814 case 'T': 1815 UTy->Prim = PrimTy::Union; 1816 break; 1817 case 'U': 1818 UTy->Prim = PrimTy::Struct; 1819 break; 1820 case 'V': 1821 UTy->Prim = PrimTy::Class; 1822 break; 1823 case 'W': 1824 if (MangledName.popFront() != '4') { 1825 Error = true; 1826 return nullptr; 1827 } 1828 UTy->Prim = PrimTy::Enum; 1829 break; 1830 default: 1831 assert(false); 1832 } 1833 1834 UTy->UdtName = demangleFullyQualifiedTypeName(MangledName); 1835 return UTy; 1836 } 1837 1838 static std::pair<Qualifiers, PointerAffinity> 1839 demanglePointerCVQualifiers(StringView &MangledName) { 1840 if (MangledName.consumeFront("$$Q")) 1841 return std::make_pair(Q_None, PointerAffinity::RValueReference); 1842 1843 switch (MangledName.popFront()) { 1844 case 'A': 1845 return std::make_pair(Q_None, PointerAffinity::Reference); 1846 case 'P': 1847 return std::make_pair(Q_None, PointerAffinity::Pointer); 1848 case 'Q': 1849 return std::make_pair(Q_Const, PointerAffinity::Pointer); 1850 case 'R': 1851 return std::make_pair(Q_Volatile, PointerAffinity::Pointer); 1852 case 'S': 1853 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), 1854 PointerAffinity::Pointer); 1855 default: 1856 assert(false && "Ty is not a pointer type!"); 1857 } 1858 return std::make_pair(Q_None, PointerAffinity::Pointer); 1859 } 1860 1861 // <pointer-type> ::= E? <pointer-cvr-qualifiers> <ext-qualifiers> <type> 1862 // # the E is required for 64-bit non-static pointers 1863 PointerType *Demangler::demanglePointerType(StringView &MangledName) { 1864 PointerType *Pointer = Arena.alloc<PointerType>(); 1865 1866 std::tie(Pointer->Quals, Pointer->Affinity) = 1867 demanglePointerCVQualifiers(MangledName); 1868 1869 Pointer->Prim = PrimTy::Ptr; 1870 if (MangledName.consumeFront("6")) { 1871 Pointer->Pointee = demangleFunctionType(MangledName, false, true); 1872 return Pointer; 1873 } 1874 1875 Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName); 1876 Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals); 1877 1878 Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Mangle); 1879 return Pointer; 1880 } 1881 1882 MemberPointerType * 1883 Demangler::demangleMemberPointerType(StringView &MangledName) { 1884 MemberPointerType *Pointer = Arena.alloc<MemberPointerType>(); 1885 Pointer->Prim = PrimTy::MemberPtr; 1886 1887 PointerAffinity Affinity; 1888 std::tie(Pointer->Quals, Affinity) = demanglePointerCVQualifiers(MangledName); 1889 assert(Affinity == PointerAffinity::Pointer); 1890 1891 Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName); 1892 Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals); 1893 1894 if (MangledName.consumeFront("8")) { 1895 Pointer->MemberName = demangleFullyQualifiedSymbolName(MangledName); 1896 Pointer->Pointee = demangleFunctionType(MangledName, true, true); 1897 } else { 1898 Qualifiers PointeeQuals = Q_None; 1899 bool IsMember = false; 1900 std::tie(PointeeQuals, IsMember) = demangleQualifiers(MangledName); 1901 assert(IsMember); 1902 Pointer->MemberName = demangleFullyQualifiedSymbolName(MangledName); 1903 1904 Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Drop); 1905 Pointer->Pointee->Quals = PointeeQuals; 1906 } 1907 1908 return Pointer; 1909 } 1910 1911 Qualifiers Demangler::demanglePointerExtQualifiers(StringView &MangledName) { 1912 Qualifiers Quals = Q_None; 1913 if (MangledName.consumeFront('E')) 1914 Quals = Qualifiers(Quals | Q_Pointer64); 1915 if (MangledName.consumeFront('I')) 1916 Quals = Qualifiers(Quals | Q_Restrict); 1917 if (MangledName.consumeFront('F')) 1918 Quals = Qualifiers(Quals | Q_Unaligned); 1919 1920 return Quals; 1921 } 1922 1923 ArrayType *Demangler::demangleArrayType(StringView &MangledName) { 1924 assert(MangledName.front() == 'Y'); 1925 MangledName.popFront(); 1926 1927 int Dimension = demangleNumber(MangledName); 1928 if (Dimension <= 0) { 1929 Error = true; 1930 return nullptr; 1931 } 1932 1933 ArrayType *ATy = Arena.alloc<ArrayType>(); 1934 ArrayType *Dim = ATy; 1935 for (int I = 0; I < Dimension; ++I) { 1936 Dim->Prim = PrimTy::Array; 1937 Dim->ArrayDimension = demangleNumber(MangledName); 1938 Dim->NextDimension = Arena.alloc<ArrayType>(); 1939 Dim = Dim->NextDimension; 1940 } 1941 1942 if (MangledName.consumeFront("$$C")) { 1943 if (MangledName.consumeFront("B")) 1944 ATy->Quals = Q_Const; 1945 else if (MangledName.consumeFront("C") || MangledName.consumeFront("D")) 1946 ATy->Quals = Qualifiers(Q_Const | Q_Volatile); 1947 else if (!MangledName.consumeFront("A")) 1948 Error = true; 1949 } 1950 1951 ATy->ElementType = demangleType(MangledName, QualifierMangleMode::Drop); 1952 Dim->ElementType = ATy->ElementType; 1953 return ATy; 1954 } 1955 1956 // Reads a function or a template parameters. 1957 FunctionParams 1958 Demangler::demangleFunctionParameterList(StringView &MangledName) { 1959 // Empty parameter list. 1960 if (MangledName.consumeFront('X')) 1961 return {}; 1962 1963 FunctionParams *Head; 1964 FunctionParams **Current = &Head; 1965 while (!Error && !MangledName.startsWith('@') && 1966 !MangledName.startsWith('Z')) { 1967 1968 if (startsWithDigit(MangledName)) { 1969 size_t N = MangledName[0] - '0'; 1970 if (N >= FunctionParamBackRefCount) { 1971 Error = true; 1972 return {}; 1973 } 1974 MangledName = MangledName.dropFront(); 1975 1976 *Current = Arena.alloc<FunctionParams>(); 1977 (*Current)->Current = FunctionParamBackRefs[N]->clone(Arena); 1978 Current = &(*Current)->Next; 1979 continue; 1980 } 1981 1982 size_t OldSize = MangledName.size(); 1983 1984 *Current = Arena.alloc<FunctionParams>(); 1985 (*Current)->Current = demangleType(MangledName, QualifierMangleMode::Drop); 1986 1987 size_t CharsConsumed = OldSize - MangledName.size(); 1988 assert(CharsConsumed != 0); 1989 1990 // Single-letter types are ignored for backreferences because memorizing 1991 // them doesn't save anything. 1992 if (FunctionParamBackRefCount <= 9 && CharsConsumed > 1) 1993 FunctionParamBackRefs[FunctionParamBackRefCount++] = (*Current)->Current; 1994 1995 Current = &(*Current)->Next; 1996 } 1997 1998 if (Error) 1999 return {}; 2000 2001 // A non-empty parameter list is terminated by either 'Z' (variadic) parameter 2002 // list or '@' (non variadic). Careful not to consume "@Z", as in that case 2003 // the following Z could be a throw specifier. 2004 if (MangledName.consumeFront('@')) 2005 return *Head; 2006 2007 if (MangledName.consumeFront('Z')) { 2008 Head->IsVariadic = true; 2009 return *Head; 2010 } 2011 2012 Error = true; 2013 return {}; 2014 } 2015 2016 TemplateParams * 2017 Demangler::demangleTemplateParameterList(StringView &MangledName) { 2018 TemplateParams *Head; 2019 TemplateParams **Current = &Head; 2020 while (!Error && !MangledName.startsWith('@')) { 2021 // Template parameter lists don't participate in back-referencing. 2022 *Current = Arena.alloc<TemplateParams>(); 2023 2024 // Empty parameter pack. 2025 if (MangledName.consumeFront("$S") || MangledName.consumeFront("$$V") || 2026 MangledName.consumeFront("$$$V")) { 2027 break; 2028 } 2029 2030 if (MangledName.consumeFront("$$Y")) { 2031 (*Current)->IsTemplateTemplate = true; 2032 (*Current)->IsAliasTemplate = true; 2033 (*Current)->ParamName = demangleFullyQualifiedTypeName(MangledName); 2034 } else if (MangledName.consumeFront("$1?")) { 2035 (*Current)->ParamName = demangleFullyQualifiedSymbolName(MangledName); 2036 (*Current)->ParamType = demangleFunctionEncoding(MangledName); 2037 } else { 2038 (*Current)->ParamType = 2039 demangleType(MangledName, QualifierMangleMode::Drop); 2040 } 2041 if (Error) 2042 return nullptr; 2043 2044 Current = &(*Current)->Next; 2045 } 2046 2047 if (Error) 2048 return nullptr; 2049 2050 // Template parameter lists cannot be variadic, so it can only be terminated 2051 // by @. 2052 if (MangledName.consumeFront('@')) 2053 return Head; 2054 Error = true; 2055 return nullptr; 2056 } 2057 2058 StringView Demangler::resolve(StringView N) { 2059 assert(N.size() == 1 && isdigit(N[0])); 2060 size_t Digit = N[0] - '0'; 2061 if (Digit >= BackRefCount) 2062 return N; 2063 return BackReferences[Digit]; 2064 } 2065 2066 void Demangler::output(const Symbol *S, OutputStream &OS) { 2067 // Converts an AST to a string. 2068 // 2069 // Converting an AST representing a C++ type to a string is tricky due 2070 // to the bad grammar of the C++ declaration inherited from C. You have 2071 // to construct a string from inside to outside. For example, if a type 2072 // X is a pointer to a function returning int, the order you create a 2073 // string becomes something like this: 2074 // 2075 // (1) X is a pointer: *X 2076 // (2) (1) is a function returning int: int (*X)() 2077 // 2078 // So you cannot construct a result just by appending strings to a result. 2079 // 2080 // To deal with this, we split the function into two. outputPre() writes 2081 // the "first half" of type declaration, and outputPost() writes the 2082 // "second half". For example, outputPre() writes a return type for a 2083 // function and outputPost() writes an parameter list. 2084 Type::outputPre(OS, *S->SymbolType, *this); 2085 outputName(OS, S->SymbolName, *this); 2086 Type::outputPost(OS, *S->SymbolType, *this); 2087 } 2088 2089 void Demangler::dumpBackReferences() { 2090 std::printf("%d function parameter backreferences\n", 2091 (int)FunctionParamBackRefCount); 2092 2093 // Create an output stream so we can render each type. 2094 OutputStream OS = OutputStream::create(nullptr, 0, 1024); 2095 for (size_t I = 0; I < FunctionParamBackRefCount; ++I) { 2096 OS.setCurrentPosition(0); 2097 2098 Type *T = FunctionParamBackRefs[I]; 2099 Type::outputPre(OS, *T, *this); 2100 Type::outputPost(OS, *T, *this); 2101 2102 std::printf(" [%d] - %.*s\n", (int)I, (int)OS.getCurrentPosition(), 2103 OS.getBuffer()); 2104 } 2105 std::free(OS.getBuffer()); 2106 2107 if (FunctionParamBackRefCount > 0) 2108 std::printf("\n"); 2109 std::printf("%d name backreferences\n", (int)BackRefCount); 2110 for (size_t I = 0; I < BackRefCount; ++I) { 2111 std::printf(" [%d] - %.*s\n", (int)I, (int)BackReferences[I].size(), 2112 BackReferences[I].begin()); 2113 } 2114 if (BackRefCount > 0) 2115 std::printf("\n"); 2116 } 2117 2118 char *llvm::microsoftDemangle(const char *MangledName, char *Buf, size_t *N, 2119 int *Status, MSDemangleFlags Flags) { 2120 Demangler D; 2121 StringView Name{MangledName}; 2122 Symbol *S = D.parse(Name); 2123 2124 if (Flags & MSDF_DumpBackrefs) 2125 D.dumpBackReferences(); 2126 OutputStream OS = OutputStream::create(Buf, N, 1024); 2127 if (D.Error) { 2128 OS << MangledName; 2129 *Status = llvm::demangle_invalid_mangled_name; 2130 } else { 2131 D.output(S, OS); 2132 *Status = llvm::demangle_success; 2133 } 2134 2135 OS << '\0'; 2136 return OS.getBuffer(); 2137 } 2138