1 //===- MicrosoftDemangle.cpp ----------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is dual licensed under the MIT and the University of Illinois Open 6 // Source Licenses. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines a demangler for MSVC-style mangled symbols. 11 // 12 // This file has no dependencies on the rest of LLVM so that it can be 13 // easily reused in other programs such as libcxxabi. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "MicrosoftDemangleNodes.h" 18 #include "llvm/Demangle/Demangle.h" 19 20 #include "llvm/Demangle/Compiler.h" 21 #include "llvm/Demangle/StringView.h" 22 #include "llvm/Demangle/Utility.h" 23 24 #include <array> 25 #include <cctype> 26 #include <cstdio> 27 #include <tuple> 28 29 using namespace llvm; 30 using namespace ms_demangle; 31 32 static bool startsWithDigit(StringView S) { 33 return !S.empty() && std::isdigit(S.front()); 34 } 35 36 enum class QualifierMangleMode { Drop, Mangle, Result }; 37 38 struct NodeList { 39 Node *N = nullptr; 40 NodeList *Next = nullptr; 41 }; 42 43 enum class FunctionIdentifierCodeGroup { Basic, Under, DoubleUnder }; 44 45 enum NameBackrefBehavior : uint8_t { 46 NBB_None = 0, // don't save any names as backrefs. 47 NBB_Template = 1 << 0, // save template instanations. 48 NBB_Simple = 1 << 1, // save simple names. 49 }; 50 51 static bool isMemberPointer(StringView MangledName) { 52 switch (MangledName.popFront()) { 53 case '$': 54 // This is probably an rvalue reference (e.g. $$Q), and you cannot have an 55 // rvalue reference to a member. 56 return false; 57 case 'A': 58 // 'A' indicates a reference, and you cannot have a reference to a member 59 // function or member. 60 return false; 61 case 'P': 62 case 'Q': 63 case 'R': 64 case 'S': 65 // These 4 values indicate some kind of pointer, but we still don't know 66 // what. 67 break; 68 default: 69 assert(false && "Ty is not a pointer type!"); 70 } 71 72 // If it starts with a number, then 6 indicates a non-member function 73 // pointer, and 8 indicates a member function pointer. 74 if (startsWithDigit(MangledName)) { 75 assert(MangledName[0] == '6' || MangledName[0] == '8'); 76 return (MangledName[0] == '8'); 77 } 78 79 // Remove ext qualifiers since those can appear on either type and are 80 // therefore not indicative. 81 MangledName.consumeFront('E'); // 64-bit 82 MangledName.consumeFront('I'); // restrict 83 MangledName.consumeFront('F'); // unaligned 84 85 assert(!MangledName.empty()); 86 87 // The next value should be either ABCD (non-member) or QRST (member). 88 switch (MangledName.front()) { 89 case 'A': 90 case 'B': 91 case 'C': 92 case 'D': 93 return false; 94 case 'Q': 95 case 'R': 96 case 'S': 97 case 'T': 98 return true; 99 default: 100 assert(false); 101 } 102 return false; 103 } 104 105 static SpecialIntrinsicKind 106 consumeSpecialIntrinsicKind(StringView &MangledName) { 107 if (MangledName.consumeFront("?_7")) 108 return SpecialIntrinsicKind::Vftable; 109 if (MangledName.consumeFront("?_8")) 110 return SpecialIntrinsicKind::Vbtable; 111 if (MangledName.consumeFront("?_9")) 112 return SpecialIntrinsicKind::VcallThunk; 113 if (MangledName.consumeFront("?_A")) 114 return SpecialIntrinsicKind::Typeof; 115 if (MangledName.consumeFront("?_B")) 116 return SpecialIntrinsicKind::LocalStaticGuard; 117 if (MangledName.consumeFront("?_C")) 118 return SpecialIntrinsicKind::StringLiteralSymbol; 119 if (MangledName.consumeFront("?_P")) 120 return SpecialIntrinsicKind::UdtReturning; 121 if (MangledName.consumeFront("?_R0")) 122 return SpecialIntrinsicKind::RttiTypeDescriptor; 123 if (MangledName.consumeFront("?_R1")) 124 return SpecialIntrinsicKind::RttiBaseClassDescriptor; 125 if (MangledName.consumeFront("?_R2")) 126 return SpecialIntrinsicKind::RttiBaseClassArray; 127 if (MangledName.consumeFront("?_R3")) 128 return SpecialIntrinsicKind::RttiClassHierarchyDescriptor; 129 if (MangledName.consumeFront("?_R4")) 130 return SpecialIntrinsicKind::RttiCompleteObjLocator; 131 if (MangledName.consumeFront("?_S")) 132 return SpecialIntrinsicKind::LocalVftable; 133 if (MangledName.consumeFront("?__E")) 134 return SpecialIntrinsicKind::DynamicInitializer; 135 if (MangledName.consumeFront("?__F")) 136 return SpecialIntrinsicKind::DynamicAtexitDestructor; 137 if (MangledName.consumeFront("?__J")) 138 return SpecialIntrinsicKind::LocalStaticThreadGuard; 139 return SpecialIntrinsicKind::None; 140 } 141 142 static bool startsWithLocalScopePattern(StringView S) { 143 if (!S.consumeFront('?')) 144 return false; 145 if (S.size() < 2) 146 return false; 147 148 size_t End = S.find('?'); 149 if (End == StringView::npos) 150 return false; 151 StringView Candidate = S.substr(0, End); 152 if (Candidate.empty()) 153 return false; 154 155 // \?[0-9]\? 156 // ?@? is the discriminator 0. 157 if (Candidate.size() == 1) 158 return Candidate[0] == '@' || (Candidate[0] >= '0' && Candidate[0] <= '9'); 159 160 // If it's not 0-9, then it's an encoded number terminated with an @ 161 if (Candidate.back() != '@') 162 return false; 163 Candidate = Candidate.dropBack(); 164 165 // An encoded number starts with B-P and all subsequent digits are in A-P. 166 // Note that the reason the first digit cannot be A is two fold. First, it 167 // would create an ambiguity with ?A which delimits the beginning of an 168 // anonymous namespace. Second, A represents 0, and you don't start a multi 169 // digit number with a leading 0. Presumably the anonymous namespace 170 // ambiguity is also why single digit encoded numbers use 0-9 rather than A-J. 171 if (Candidate[0] < 'B' || Candidate[0] > 'P') 172 return false; 173 Candidate = Candidate.dropFront(); 174 while (!Candidate.empty()) { 175 if (Candidate[0] < 'A' || Candidate[0] > 'P') 176 return false; 177 Candidate = Candidate.dropFront(); 178 } 179 180 return true; 181 } 182 183 static bool isTagType(StringView S) { 184 switch (S.front()) { 185 case 'T': // union 186 case 'U': // struct 187 case 'V': // class 188 case 'W': // enum 189 return true; 190 } 191 return false; 192 } 193 194 static bool isCustomType(StringView S) { return S[0] == '?'; } 195 196 static bool isPointerType(StringView S) { 197 if (S.startsWith("$$Q")) // foo && 198 return true; 199 200 switch (S.front()) { 201 case 'A': // foo & 202 case 'P': // foo * 203 case 'Q': // foo *const 204 case 'R': // foo *volatile 205 case 'S': // foo *const volatile 206 return true; 207 } 208 return false; 209 } 210 211 static bool isArrayType(StringView S) { return S[0] == 'Y'; } 212 213 static bool isFunctionType(StringView S) { 214 return S.startsWith("$$A8@@") || S.startsWith("$$A6"); 215 } 216 217 static FunctionRefQualifier 218 demangleFunctionRefQualifier(StringView &MangledName) { 219 if (MangledName.consumeFront('G')) 220 return FunctionRefQualifier::Reference; 221 else if (MangledName.consumeFront('H')) 222 return FunctionRefQualifier::RValueReference; 223 return FunctionRefQualifier::None; 224 } 225 226 static std::pair<Qualifiers, PointerAffinity> 227 demanglePointerCVQualifiers(StringView &MangledName) { 228 if (MangledName.consumeFront("$$Q")) 229 return std::make_pair(Q_None, PointerAffinity::RValueReference); 230 231 switch (MangledName.popFront()) { 232 case 'A': 233 return std::make_pair(Q_None, PointerAffinity::Reference); 234 case 'P': 235 return std::make_pair(Q_None, PointerAffinity::Pointer); 236 case 'Q': 237 return std::make_pair(Q_Const, PointerAffinity::Pointer); 238 case 'R': 239 return std::make_pair(Q_Volatile, PointerAffinity::Pointer); 240 case 'S': 241 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), 242 PointerAffinity::Pointer); 243 default: 244 assert(false && "Ty is not a pointer type!"); 245 } 246 return std::make_pair(Q_None, PointerAffinity::Pointer); 247 } 248 249 namespace { 250 251 struct BackrefContext { 252 static constexpr size_t Max = 10; 253 254 TypeNode *FunctionParams[Max]; 255 size_t FunctionParamCount = 0; 256 257 // The first 10 BackReferences in a mangled name can be back-referenced by 258 // special name @[0-9]. This is a storage for the first 10 BackReferences. 259 NamedIdentifierNode *Names[Max]; 260 size_t NamesCount = 0; 261 }; 262 263 // Demangler class takes the main role in demangling symbols. 264 // It has a set of functions to parse mangled symbols into Type instances. 265 // It also has a set of functions to cnovert Type instances to strings. 266 class Demangler { 267 public: 268 Demangler() = default; 269 virtual ~Demangler() = default; 270 271 // You are supposed to call parse() first and then check if error is true. If 272 // it is false, call output() to write the formatted name to the given stream. 273 SymbolNode *parse(StringView &MangledName); 274 275 // True if an error occurred. 276 bool Error = false; 277 278 void dumpBackReferences(); 279 280 private: 281 SymbolNode *demangleEncodedSymbol(StringView &MangledName, 282 QualifiedNameNode *QN); 283 284 VariableSymbolNode *demangleVariableEncoding(StringView &MangledName, 285 StorageClass SC); 286 FunctionSymbolNode *demangleFunctionEncoding(StringView &MangledName); 287 288 Qualifiers demanglePointerExtQualifiers(StringView &MangledName); 289 290 // Parser functions. This is a recursive-descent parser. 291 TypeNode *demangleType(StringView &MangledName, QualifierMangleMode QMM); 292 PrimitiveTypeNode *demanglePrimitiveType(StringView &MangledName); 293 CustomTypeNode *demangleCustomType(StringView &MangledName); 294 TagTypeNode *demangleClassType(StringView &MangledName); 295 PointerTypeNode *demanglePointerType(StringView &MangledName); 296 PointerTypeNode *demangleMemberPointerType(StringView &MangledName); 297 FunctionSignatureNode *demangleFunctionType(StringView &MangledName, 298 bool HasThisQuals); 299 300 ArrayTypeNode *demangleArrayType(StringView &MangledName); 301 302 NodeArrayNode *demangleTemplateParameterList(StringView &MangledName); 303 NodeArrayNode *demangleFunctionParameterList(StringView &MangledName); 304 305 std::pair<uint64_t, bool> demangleNumber(StringView &MangledName); 306 uint64_t demangleUnsigned(StringView &MangledName); 307 int64_t demangleSigned(StringView &MangledName); 308 309 void memorizeString(StringView s); 310 void memorizeIdentifier(IdentifierNode *Identifier); 311 312 /// Allocate a copy of \p Borrowed into memory that we own. 313 StringView copyString(StringView Borrowed); 314 315 QualifiedNameNode *demangleFullyQualifiedTypeName(StringView &MangledName); 316 QualifiedNameNode *demangleFullyQualifiedSymbolName(StringView &MangledName); 317 318 IdentifierNode *demangleUnqualifiedTypeName(StringView &MangledName, 319 bool Memorize); 320 IdentifierNode *demangleUnqualifiedSymbolName(StringView &MangledName, 321 NameBackrefBehavior NBB); 322 323 QualifiedNameNode *demangleNameScopeChain(StringView &MangledName, 324 IdentifierNode *UnqualifiedName); 325 IdentifierNode *demangleNameScopePiece(StringView &MangledName); 326 327 NamedIdentifierNode *demangleBackRefName(StringView &MangledName); 328 IdentifierNode *demangleTemplateInstantiationName(StringView &MangledName, 329 NameBackrefBehavior NBB); 330 IdentifierNode *demangleFunctionIdentifierCode(StringView &MangledName); 331 IdentifierNode * 332 demangleFunctionIdentifierCode(StringView &MangledName, 333 FunctionIdentifierCodeGroup Group); 334 StructorIdentifierNode *demangleStructorIdentifier(StringView &MangledName, 335 bool IsDestructor); 336 ConversionOperatorIdentifierNode * 337 demangleConversionOperatorIdentifier(StringView &MangledName); 338 LiteralOperatorIdentifierNode * 339 demangleLiteralOperatorIdentifier(StringView &MangledName); 340 341 SymbolNode *demangleSpecialIntrinsic(StringView &MangledName); 342 SpecialTableSymbolNode * 343 demangleSpecialTableSymbolNode(StringView &MangledName, 344 SpecialIntrinsicKind SIK); 345 LocalStaticGuardVariableNode * 346 demangleLocalStaticGuard(StringView &MangledName); 347 VariableSymbolNode *demangleUntypedVariable(ArenaAllocator &Arena, 348 StringView &MangledName, 349 StringView VariableName); 350 VariableSymbolNode * 351 demangleRttiBaseClassDescriptorNode(ArenaAllocator &Arena, 352 StringView &MangledName); 353 FunctionSymbolNode *demangleInitFiniStub(StringView &MangledName, 354 bool IsDestructor); 355 356 NamedIdentifierNode *demangleSimpleName(StringView &MangledName, 357 bool Memorize); 358 NamedIdentifierNode *demangleAnonymousNamespaceName(StringView &MangledName); 359 NamedIdentifierNode *demangleLocallyScopedNamePiece(StringView &MangledName); 360 EncodedStringLiteralNode *demangleStringLiteral(StringView &MangledName); 361 FunctionSymbolNode *demangleVcallThunkNode(StringView &MangledName); 362 363 StringView demangleSimpleString(StringView &MangledName, bool Memorize); 364 365 FuncClass demangleFunctionClass(StringView &MangledName); 366 CallingConv demangleCallingConvention(StringView &MangledName); 367 StorageClass demangleVariableStorageClass(StringView &MangledName); 368 void demangleThrowSpecification(StringView &MangledName); 369 wchar_t demangleWcharLiteral(StringView &MangledName); 370 uint8_t demangleCharLiteral(StringView &MangledName); 371 372 std::pair<Qualifiers, bool> demangleQualifiers(StringView &MangledName); 373 374 // Memory allocator. 375 ArenaAllocator Arena; 376 377 // A single type uses one global back-ref table for all function params. 378 // This means back-refs can even go "into" other types. Examples: 379 // 380 // // Second int* is a back-ref to first. 381 // void foo(int *, int*); 382 // 383 // // Second int* is not a back-ref to first (first is not a function param). 384 // int* foo(int*); 385 // 386 // // Second int* is a back-ref to first (ALL function types share the same 387 // // back-ref map. 388 // using F = void(*)(int*); 389 // F G(int *); 390 BackrefContext Backrefs; 391 }; 392 } // namespace 393 394 StringView Demangler::copyString(StringView Borrowed) { 395 char *Stable = Arena.allocUnalignedBuffer(Borrowed.size() + 1); 396 std::strcpy(Stable, Borrowed.begin()); 397 398 return {Stable, Borrowed.size()}; 399 } 400 401 SpecialTableSymbolNode * 402 Demangler::demangleSpecialTableSymbolNode(StringView &MangledName, 403 SpecialIntrinsicKind K) { 404 NamedIdentifierNode *NI = Arena.alloc<NamedIdentifierNode>(); 405 switch (K) { 406 case SpecialIntrinsicKind::Vftable: 407 NI->Name = "`vftable'"; 408 break; 409 case SpecialIntrinsicKind::Vbtable: 410 NI->Name = "`vbtable'"; 411 break; 412 case SpecialIntrinsicKind::LocalVftable: 413 NI->Name = "`local vftable'"; 414 break; 415 case SpecialIntrinsicKind::RttiCompleteObjLocator: 416 NI->Name = "`RTTI Complete Object Locator'"; 417 break; 418 default: 419 LLVM_BUILTIN_UNREACHABLE; 420 } 421 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI); 422 SpecialTableSymbolNode *STSN = Arena.alloc<SpecialTableSymbolNode>(); 423 STSN->Name = QN; 424 bool IsMember = false; 425 char Front = MangledName.popFront(); 426 if (Front != '6' && Front != '7') { 427 Error = true; 428 return nullptr; 429 } 430 431 std::tie(STSN->Quals, IsMember) = demangleQualifiers(MangledName); 432 if (!MangledName.consumeFront('@')) 433 STSN->TargetName = demangleFullyQualifiedTypeName(MangledName); 434 return STSN; 435 } 436 437 LocalStaticGuardVariableNode * 438 Demangler::demangleLocalStaticGuard(StringView &MangledName) { 439 LocalStaticGuardIdentifierNode *LSGI = 440 Arena.alloc<LocalStaticGuardIdentifierNode>(); 441 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, LSGI); 442 LocalStaticGuardVariableNode *LSGVN = 443 Arena.alloc<LocalStaticGuardVariableNode>(); 444 LSGVN->Name = QN; 445 446 if (MangledName.consumeFront("4IA")) 447 LSGVN->IsVisible = false; 448 else if (MangledName.consumeFront("5")) 449 LSGVN->IsVisible = true; 450 else { 451 Error = true; 452 return nullptr; 453 } 454 455 if (!MangledName.empty()) 456 LSGI->ScopeIndex = demangleUnsigned(MangledName); 457 return LSGVN; 458 } 459 460 static NamedIdentifierNode *synthesizeNamedIdentifier(ArenaAllocator &Arena, 461 StringView Name) { 462 NamedIdentifierNode *Id = Arena.alloc<NamedIdentifierNode>(); 463 Id->Name = Name; 464 return Id; 465 } 466 467 static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena, 468 IdentifierNode *Identifier) { 469 QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>(); 470 QN->Components = Arena.alloc<NodeArrayNode>(); 471 QN->Components->Count = 1; 472 QN->Components->Nodes = Arena.allocArray<Node *>(1); 473 QN->Components->Nodes[0] = Identifier; 474 return QN; 475 } 476 477 static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena, 478 StringView Name) { 479 NamedIdentifierNode *Id = synthesizeNamedIdentifier(Arena, Name); 480 return synthesizeQualifiedName(Arena, Id); 481 } 482 483 static VariableSymbolNode *synthesizeVariable(ArenaAllocator &Arena, 484 TypeNode *Type, 485 StringView VariableName) { 486 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 487 VSN->Type = Type; 488 VSN->Name = synthesizeQualifiedName(Arena, VariableName); 489 return VSN; 490 } 491 492 VariableSymbolNode *Demangler::demangleUntypedVariable( 493 ArenaAllocator &Arena, StringView &MangledName, StringView VariableName) { 494 NamedIdentifierNode *NI = synthesizeNamedIdentifier(Arena, VariableName); 495 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI); 496 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 497 VSN->Name = QN; 498 if (MangledName.consumeFront("8")) 499 return VSN; 500 501 Error = true; 502 return nullptr; 503 } 504 505 VariableSymbolNode * 506 Demangler::demangleRttiBaseClassDescriptorNode(ArenaAllocator &Arena, 507 StringView &MangledName) { 508 RttiBaseClassDescriptorNode *RBCDN = 509 Arena.alloc<RttiBaseClassDescriptorNode>(); 510 RBCDN->NVOffset = demangleUnsigned(MangledName); 511 RBCDN->VBPtrOffset = demangleSigned(MangledName); 512 RBCDN->VBTableOffset = demangleUnsigned(MangledName); 513 RBCDN->Flags = demangleUnsigned(MangledName); 514 if (Error) 515 return nullptr; 516 517 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 518 VSN->Name = demangleNameScopeChain(MangledName, RBCDN); 519 MangledName.consumeFront('8'); 520 return VSN; 521 } 522 523 FunctionSymbolNode *Demangler::demangleInitFiniStub(StringView &MangledName, 524 bool IsDestructor) { 525 DynamicStructorIdentifierNode *DSIN = 526 Arena.alloc<DynamicStructorIdentifierNode>(); 527 DSIN->IsDestructor = IsDestructor; 528 529 bool IsKnownStaticDataMember = false; 530 if (MangledName.consumeFront('?')) 531 IsKnownStaticDataMember = true; 532 533 QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName); 534 535 SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN); 536 FunctionSymbolNode *FSN = nullptr; 537 Symbol->Name = QN; 538 539 if (Symbol->kind() == NodeKind::VariableSymbol) { 540 DSIN->Variable = static_cast<VariableSymbolNode *>(Symbol); 541 542 // Older versions of clang mangled this type of symbol incorrectly. They 543 // would omit the leading ? and they would only emit a single @ at the end. 544 // The correct mangling is a leading ? and 2 trailing @ signs. Handle 545 // both cases. 546 int AtCount = IsKnownStaticDataMember ? 2 : 1; 547 for (int I = 0; I < AtCount; ++I) { 548 if (MangledName.consumeFront('@')) 549 continue; 550 Error = true; 551 return nullptr; 552 } 553 554 FSN = demangleFunctionEncoding(MangledName); 555 FSN->Name = synthesizeQualifiedName(Arena, DSIN); 556 } else { 557 if (IsKnownStaticDataMember) { 558 // This was supposed to be a static data member, but we got a function. 559 Error = true; 560 return nullptr; 561 } 562 563 FSN = static_cast<FunctionSymbolNode *>(Symbol); 564 DSIN->Name = Symbol->Name; 565 FSN->Name = synthesizeQualifiedName(Arena, DSIN); 566 } 567 568 return FSN; 569 } 570 571 SymbolNode *Demangler::demangleSpecialIntrinsic(StringView &MangledName) { 572 SpecialIntrinsicKind SIK = consumeSpecialIntrinsicKind(MangledName); 573 if (SIK == SpecialIntrinsicKind::None) 574 return nullptr; 575 576 switch (SIK) { 577 case SpecialIntrinsicKind::StringLiteralSymbol: 578 return demangleStringLiteral(MangledName); 579 case SpecialIntrinsicKind::Vftable: 580 case SpecialIntrinsicKind::Vbtable: 581 case SpecialIntrinsicKind::LocalVftable: 582 case SpecialIntrinsicKind::RttiCompleteObjLocator: 583 return demangleSpecialTableSymbolNode(MangledName, SIK); 584 case SpecialIntrinsicKind::VcallThunk: 585 return demangleVcallThunkNode(MangledName); 586 case SpecialIntrinsicKind::LocalStaticGuard: 587 return demangleLocalStaticGuard(MangledName); 588 case SpecialIntrinsicKind::RttiTypeDescriptor: { 589 TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result); 590 if (Error) 591 break; 592 if (!MangledName.consumeFront("@8")) 593 break; 594 if (!MangledName.empty()) 595 break; 596 return synthesizeVariable(Arena, T, "`RTTI Type Descriptor'"); 597 } 598 case SpecialIntrinsicKind::RttiBaseClassArray: 599 return demangleUntypedVariable(Arena, MangledName, 600 "`RTTI Base Class Array'"); 601 case SpecialIntrinsicKind::RttiClassHierarchyDescriptor: 602 return demangleUntypedVariable(Arena, MangledName, 603 "`RTTI Class Hierarchy Descriptor'"); 604 case SpecialIntrinsicKind::RttiBaseClassDescriptor: 605 return demangleRttiBaseClassDescriptorNode(Arena, MangledName); 606 case SpecialIntrinsicKind::DynamicInitializer: 607 return demangleInitFiniStub(MangledName, false); 608 case SpecialIntrinsicKind::DynamicAtexitDestructor: 609 return demangleInitFiniStub(MangledName, true); 610 default: 611 break; 612 } 613 Error = true; 614 return nullptr; 615 } 616 617 IdentifierNode * 618 Demangler::demangleFunctionIdentifierCode(StringView &MangledName) { 619 assert(MangledName.startsWith('?')); 620 MangledName = MangledName.dropFront(); 621 622 if (MangledName.consumeFront("__")) 623 return demangleFunctionIdentifierCode( 624 MangledName, FunctionIdentifierCodeGroup::DoubleUnder); 625 else if (MangledName.consumeFront("_")) 626 return demangleFunctionIdentifierCode(MangledName, 627 FunctionIdentifierCodeGroup::Under); 628 return demangleFunctionIdentifierCode(MangledName, 629 FunctionIdentifierCodeGroup::Basic); 630 } 631 632 StructorIdentifierNode * 633 Demangler::demangleStructorIdentifier(StringView &MangledName, 634 bool IsDestructor) { 635 StructorIdentifierNode *N = Arena.alloc<StructorIdentifierNode>(); 636 N->IsDestructor = IsDestructor; 637 return N; 638 } 639 640 ConversionOperatorIdentifierNode * 641 Demangler::demangleConversionOperatorIdentifier(StringView &MangledName) { 642 ConversionOperatorIdentifierNode *N = 643 Arena.alloc<ConversionOperatorIdentifierNode>(); 644 return N; 645 } 646 647 LiteralOperatorIdentifierNode * 648 Demangler::demangleLiteralOperatorIdentifier(StringView &MangledName) { 649 LiteralOperatorIdentifierNode *N = 650 Arena.alloc<LiteralOperatorIdentifierNode>(); 651 N->Name = demangleSimpleString(MangledName, false); 652 return N; 653 } 654 655 static IntrinsicFunctionKind 656 translateIntrinsicFunctionCode(char CH, FunctionIdentifierCodeGroup Group) { 657 // Not all ? identifiers are intrinsics *functions*. This function only maps 658 // operator codes for the special functions, all others are handled elsewhere, 659 // hence the IFK::None entries in the table. 660 using IFK = IntrinsicFunctionKind; 661 static IFK Basic[36] = { 662 IFK::None, // ?0 # Foo::Foo() 663 IFK::None, // ?1 # Foo::~Foo() 664 IFK::New, // ?2 # operator new 665 IFK::Delete, // ?3 # operator delete 666 IFK::Assign, // ?4 # operator= 667 IFK::RightShift, // ?5 # operator>> 668 IFK::LeftShift, // ?6 # operator<< 669 IFK::LogicalNot, // ?7 # operator! 670 IFK::Equals, // ?8 # operator== 671 IFK::NotEquals, // ?9 # operator!= 672 IFK::ArraySubscript, // ?A # operator[] 673 IFK::None, // ?B # Foo::operator <type>() 674 IFK::Pointer, // ?C # operator-> 675 IFK::Dereference, // ?D # operator* 676 IFK::Increment, // ?E # operator++ 677 IFK::Decrement, // ?F # operator-- 678 IFK::Minus, // ?G # operator- 679 IFK::Plus, // ?H # operator+ 680 IFK::BitwiseAnd, // ?I # operator& 681 IFK::MemberPointer, // ?J # operator->* 682 IFK::Divide, // ?K # operator/ 683 IFK::Modulus, // ?L # operator% 684 IFK::LessThan, // ?M operator< 685 IFK::LessThanEqual, // ?N operator<= 686 IFK::GreaterThan, // ?O operator> 687 IFK::GreaterThanEqual, // ?P operator>= 688 IFK::Comma, // ?Q operator, 689 IFK::Parens, // ?R operator() 690 IFK::BitwiseNot, // ?S operator~ 691 IFK::BitwiseXor, // ?T operator^ 692 IFK::BitwiseOr, // ?U operator| 693 IFK::LogicalAnd, // ?V operator&& 694 IFK::LogicalOr, // ?W operator|| 695 IFK::TimesEqual, // ?X operator*= 696 IFK::PlusEqual, // ?Y operator+= 697 IFK::MinusEqual, // ?Z operator-= 698 }; 699 static IFK Under[36] = { 700 IFK::DivEqual, // ?_0 operator/= 701 IFK::ModEqual, // ?_1 operator%= 702 IFK::RshEqual, // ?_2 operator>>= 703 IFK::LshEqual, // ?_3 operator<<= 704 IFK::BitwiseAndEqual, // ?_4 operator&= 705 IFK::BitwiseOrEqual, // ?_5 operator|= 706 IFK::BitwiseXorEqual, // ?_6 operator^= 707 IFK::None, // ?_7 # vftable 708 IFK::None, // ?_8 # vbtable 709 IFK::None, // ?_9 # vcall 710 IFK::None, // ?_A # typeof 711 IFK::None, // ?_B # local static guard 712 IFK::None, // ?_C # string literal 713 IFK::VbaseDtor, // ?_D # vbase destructor 714 IFK::VecDelDtor, // ?_E # vector deleting destructor 715 IFK::DefaultCtorClosure, // ?_F # default constructor closure 716 IFK::ScalarDelDtor, // ?_G # scalar deleting destructor 717 IFK::VecCtorIter, // ?_H # vector constructor iterator 718 IFK::VecDtorIter, // ?_I # vector destructor iterator 719 IFK::VecVbaseCtorIter, // ?_J # vector vbase constructor iterator 720 IFK::VdispMap, // ?_K # virtual displacement map 721 IFK::EHVecCtorIter, // ?_L # eh vector constructor iterator 722 IFK::EHVecDtorIter, // ?_M # eh vector destructor iterator 723 IFK::EHVecVbaseCtorIter, // ?_N # eh vector vbase constructor iterator 724 IFK::CopyCtorClosure, // ?_O # copy constructor closure 725 IFK::None, // ?_P<name> # udt returning <name> 726 IFK::None, // ?_Q # <unknown> 727 IFK::None, // ?_R0 - ?_R4 # RTTI Codes 728 IFK::None, // ?_S # local vftable 729 IFK::LocalVftableCtorClosure, // ?_T # local vftable constructor closure 730 IFK::ArrayNew, // ?_U operator new[] 731 IFK::ArrayDelete, // ?_V operator delete[] 732 IFK::None, // ?_W <unused> 733 IFK::None, // ?_X <unused> 734 IFK::None, // ?_Y <unused> 735 IFK::None, // ?_Z <unused> 736 }; 737 static IFK DoubleUnder[36] = { 738 IFK::None, // ?__0 <unused> 739 IFK::None, // ?__1 <unused> 740 IFK::None, // ?__2 <unused> 741 IFK::None, // ?__3 <unused> 742 IFK::None, // ?__4 <unused> 743 IFK::None, // ?__5 <unused> 744 IFK::None, // ?__6 <unused> 745 IFK::None, // ?__7 <unused> 746 IFK::None, // ?__8 <unused> 747 IFK::None, // ?__9 <unused> 748 IFK::ManVectorCtorIter, // ?__A managed vector ctor iterator 749 IFK::ManVectorDtorIter, // ?__B managed vector dtor iterator 750 IFK::EHVectorCopyCtorIter, // ?__C EH vector copy ctor iterator 751 IFK::EHVectorVbaseCopyCtorIter, // ?__D EH vector vbase copy ctor iter 752 IFK::None, // ?__E dynamic initializer for `T' 753 IFK::None, // ?__F dynamic atexit destructor for `T' 754 IFK::VectorCopyCtorIter, // ?__G vector copy constructor iter 755 IFK::VectorVbaseCopyCtorIter, // ?__H vector vbase copy ctor iter 756 IFK::ManVectorVbaseCopyCtorIter, // ?__I managed vector vbase copy ctor 757 // iter 758 IFK::None, // ?__J local static thread guard 759 IFK::None, // ?__K operator ""_name 760 IFK::CoAwait, // ?__L co_await 761 IFK::None, // ?__M <unused> 762 IFK::None, // ?__N <unused> 763 IFK::None, // ?__O <unused> 764 IFK::None, // ?__P <unused> 765 IFK::None, // ?__Q <unused> 766 IFK::None, // ?__R <unused> 767 IFK::None, // ?__S <unused> 768 IFK::None, // ?__T <unused> 769 IFK::None, // ?__U <unused> 770 IFK::None, // ?__V <unused> 771 IFK::None, // ?__W <unused> 772 IFK::None, // ?__X <unused> 773 IFK::None, // ?__Y <unused> 774 IFK::None, // ?__Z <unused> 775 }; 776 777 int Index = (CH >= '0' && CH <= '9') ? (CH - '0') : (CH - 'A' + 10); 778 switch (Group) { 779 case FunctionIdentifierCodeGroup::Basic: 780 return Basic[Index]; 781 case FunctionIdentifierCodeGroup::Under: 782 return Under[Index]; 783 case FunctionIdentifierCodeGroup::DoubleUnder: 784 return DoubleUnder[Index]; 785 } 786 LLVM_BUILTIN_UNREACHABLE; 787 } 788 789 IdentifierNode * 790 Demangler::demangleFunctionIdentifierCode(StringView &MangledName, 791 FunctionIdentifierCodeGroup Group) { 792 switch (Group) { 793 case FunctionIdentifierCodeGroup::Basic: 794 switch (char CH = MangledName.popFront()) { 795 case '0': 796 case '1': 797 return demangleStructorIdentifier(MangledName, CH == '1'); 798 case 'B': 799 return demangleConversionOperatorIdentifier(MangledName); 800 default: 801 return Arena.alloc<IntrinsicFunctionIdentifierNode>( 802 translateIntrinsicFunctionCode(CH, Group)); 803 } 804 break; 805 case FunctionIdentifierCodeGroup::Under: 806 return Arena.alloc<IntrinsicFunctionIdentifierNode>( 807 translateIntrinsicFunctionCode(MangledName.popFront(), Group)); 808 case FunctionIdentifierCodeGroup::DoubleUnder: 809 switch (char CH = MangledName.popFront()) { 810 case 'K': 811 return demangleLiteralOperatorIdentifier(MangledName); 812 default: 813 return Arena.alloc<IntrinsicFunctionIdentifierNode>( 814 translateIntrinsicFunctionCode(CH, Group)); 815 } 816 } 817 // No Mangling Yet: Spaceship, // operator<=> 818 819 return nullptr; 820 } 821 822 SymbolNode *Demangler::demangleEncodedSymbol(StringView &MangledName, 823 QualifiedNameNode *Name) { 824 // Read a variable. 825 switch (MangledName.front()) { 826 case '0': 827 case '1': 828 case '2': 829 case '3': 830 case '4': { 831 StorageClass SC = demangleVariableStorageClass(MangledName); 832 return demangleVariableEncoding(MangledName, SC); 833 } 834 case '8': 835 return nullptr; 836 } 837 FunctionSymbolNode *FSN = demangleFunctionEncoding(MangledName); 838 839 IdentifierNode *UQN = Name->getUnqualifiedIdentifier(); 840 if (UQN->kind() == NodeKind::ConversionOperatorIdentifier) { 841 ConversionOperatorIdentifierNode *COIN = 842 static_cast<ConversionOperatorIdentifierNode *>(UQN); 843 COIN->TargetType = FSN->Signature->ReturnType; 844 } 845 return FSN; 846 } 847 848 // Parser entry point. 849 SymbolNode *Demangler::parse(StringView &MangledName) { 850 // We can't demangle MD5 names, just output them as-is. 851 // Also, MSVC-style mangled symbols must start with '?'. 852 if (MangledName.startsWith("??@")) { 853 // This is an MD5 mangled name. We can't demangle it, just return the 854 // mangled name. 855 SymbolNode *S = Arena.alloc<SymbolNode>(NodeKind::Md5Symbol); 856 S->Name = synthesizeQualifiedName(Arena, MangledName); 857 return S; 858 } 859 860 if (!MangledName.startsWith('?')) { 861 Error = true; 862 return nullptr; 863 } 864 865 MangledName.consumeFront('?'); 866 867 // ?$ is a template instantiation, but all other names that start with ? are 868 // operators / special names. 869 if (SymbolNode *SI = demangleSpecialIntrinsic(MangledName)) 870 return SI; 871 872 // What follows is a main symbol name. This may include namespaces or class 873 // back references. 874 QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName); 875 if (Error) 876 return nullptr; 877 878 SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN); 879 if (Symbol) { 880 Symbol->Name = QN; 881 } 882 883 if (Error) 884 return nullptr; 885 886 return Symbol; 887 } 888 889 // <type-encoding> ::= <storage-class> <variable-type> 890 // <storage-class> ::= 0 # private static member 891 // ::= 1 # protected static member 892 // ::= 2 # public static member 893 // ::= 3 # global 894 // ::= 4 # static local 895 896 VariableSymbolNode *Demangler::demangleVariableEncoding(StringView &MangledName, 897 StorageClass SC) { 898 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 899 900 VSN->Type = demangleType(MangledName, QualifierMangleMode::Drop); 901 VSN->SC = SC; 902 903 // <variable-type> ::= <type> <cvr-qualifiers> 904 // ::= <type> <pointee-cvr-qualifiers> # pointers, references 905 switch (VSN->Type->kind()) { 906 case NodeKind::PointerType: { 907 PointerTypeNode *PTN = static_cast<PointerTypeNode *>(VSN->Type); 908 909 Qualifiers ExtraChildQuals = Q_None; 910 PTN->Quals = Qualifiers(VSN->Type->Quals | 911 demanglePointerExtQualifiers(MangledName)); 912 913 bool IsMember = false; 914 std::tie(ExtraChildQuals, IsMember) = demangleQualifiers(MangledName); 915 916 if (PTN->ClassParent) { 917 QualifiedNameNode *BackRefName = 918 demangleFullyQualifiedTypeName(MangledName); 919 (void)BackRefName; 920 } 921 PTN->Pointee->Quals = Qualifiers(PTN->Pointee->Quals | ExtraChildQuals); 922 923 break; 924 } 925 default: 926 VSN->Type->Quals = demangleQualifiers(MangledName).first; 927 break; 928 } 929 930 return VSN; 931 } 932 933 // Sometimes numbers are encoded in mangled symbols. For example, 934 // "int (*x)[20]" is a valid C type (x is a pointer to an array of 935 // length 20), so we need some way to embed numbers as part of symbols. 936 // This function parses it. 937 // 938 // <number> ::= [?] <non-negative integer> 939 // 940 // <non-negative integer> ::= <decimal digit> # when 1 <= Number <= 10 941 // ::= <hex digit>+ @ # when Numbrer == 0 or >= 10 942 // 943 // <hex-digit> ::= [A-P] # A = 0, B = 1, ... 944 std::pair<uint64_t, bool> Demangler::demangleNumber(StringView &MangledName) { 945 bool IsNegative = MangledName.consumeFront('?'); 946 947 if (startsWithDigit(MangledName)) { 948 uint64_t Ret = MangledName[0] - '0' + 1; 949 MangledName = MangledName.dropFront(1); 950 return {Ret, IsNegative}; 951 } 952 953 uint64_t Ret = 0; 954 for (size_t i = 0; i < MangledName.size(); ++i) { 955 char C = MangledName[i]; 956 if (C == '@') { 957 MangledName = MangledName.dropFront(i + 1); 958 return {Ret, IsNegative}; 959 } 960 if ('A' <= C && C <= 'P') { 961 Ret = (Ret << 4) + (C - 'A'); 962 continue; 963 } 964 break; 965 } 966 967 Error = true; 968 return {0ULL, false}; 969 } 970 971 uint64_t Demangler::demangleUnsigned(StringView &MangledName) { 972 bool IsNegative = false; 973 uint64_t Number = 0; 974 std::tie(Number, IsNegative) = demangleNumber(MangledName); 975 if (IsNegative) 976 Error = true; 977 return Number; 978 } 979 980 int64_t Demangler::demangleSigned(StringView &MangledName) { 981 bool IsNegative = false; 982 uint64_t Number = 0; 983 std::tie(Number, IsNegative) = demangleNumber(MangledName); 984 if (Number > INT64_MAX) 985 Error = true; 986 int64_t I = static_cast<int64_t>(Number); 987 return IsNegative ? -I : I; 988 } 989 990 // First 10 strings can be referenced by special BackReferences ?0, ?1, ..., ?9. 991 // Memorize it. 992 void Demangler::memorizeString(StringView S) { 993 if (Backrefs.NamesCount >= BackrefContext::Max) 994 return; 995 for (size_t i = 0; i < Backrefs.NamesCount; ++i) 996 if (S == Backrefs.Names[i]->Name) 997 return; 998 NamedIdentifierNode *N = Arena.alloc<NamedIdentifierNode>(); 999 N->Name = S; 1000 Backrefs.Names[Backrefs.NamesCount++] = N; 1001 } 1002 1003 NamedIdentifierNode *Demangler::demangleBackRefName(StringView &MangledName) { 1004 assert(startsWithDigit(MangledName)); 1005 1006 size_t I = MangledName[0] - '0'; 1007 if (I >= Backrefs.NamesCount) { 1008 Error = true; 1009 return nullptr; 1010 } 1011 1012 MangledName = MangledName.dropFront(); 1013 return Backrefs.Names[I]; 1014 } 1015 1016 void Demangler::memorizeIdentifier(IdentifierNode *Identifier) { 1017 // Render this class template name into a string buffer so that we can 1018 // memorize it for the purpose of back-referencing. 1019 OutputStream OS; 1020 if (initializeOutputStream(nullptr, nullptr, OS, 1024)) 1021 // FIXME: Propagate out-of-memory as an error? 1022 std::terminate(); 1023 Identifier->output(OS, OF_Default); 1024 OS << '\0'; 1025 char *Name = OS.getBuffer(); 1026 1027 StringView Owned = copyString(Name); 1028 memorizeString(Owned); 1029 std::free(Name); 1030 } 1031 1032 IdentifierNode * 1033 Demangler::demangleTemplateInstantiationName(StringView &MangledName, 1034 NameBackrefBehavior NBB) { 1035 assert(MangledName.startsWith("?$")); 1036 MangledName.consumeFront("?$"); 1037 1038 BackrefContext OuterContext; 1039 std::swap(OuterContext, Backrefs); 1040 1041 IdentifierNode *Identifier = 1042 demangleUnqualifiedSymbolName(MangledName, NBB_Simple); 1043 if (!Error) 1044 Identifier->TemplateParams = demangleTemplateParameterList(MangledName); 1045 1046 std::swap(OuterContext, Backrefs); 1047 if (Error) 1048 return nullptr; 1049 1050 if (NBB & NBB_Template) 1051 memorizeIdentifier(Identifier); 1052 1053 return Identifier; 1054 } 1055 1056 NamedIdentifierNode *Demangler::demangleSimpleName(StringView &MangledName, 1057 bool Memorize) { 1058 StringView S = demangleSimpleString(MangledName, Memorize); 1059 if (Error) 1060 return nullptr; 1061 1062 NamedIdentifierNode *Name = Arena.alloc<NamedIdentifierNode>(); 1063 Name->Name = S; 1064 return Name; 1065 } 1066 1067 static bool isRebasedHexDigit(char C) { return (C >= 'A' && C <= 'P'); } 1068 1069 static uint8_t rebasedHexDigitToNumber(char C) { 1070 assert(isRebasedHexDigit(C)); 1071 return (C <= 'J') ? (C - 'A') : (10 + C - 'K'); 1072 } 1073 1074 uint8_t Demangler::demangleCharLiteral(StringView &MangledName) { 1075 if (!MangledName.startsWith('?')) 1076 return MangledName.popFront(); 1077 1078 MangledName = MangledName.dropFront(); 1079 if (MangledName.empty()) 1080 goto CharLiteralError; 1081 1082 if (MangledName.consumeFront('$')) { 1083 // Two hex digits 1084 if (MangledName.size() < 2) 1085 goto CharLiteralError; 1086 StringView Nibbles = MangledName.substr(0, 2); 1087 if (!isRebasedHexDigit(Nibbles[0]) || !isRebasedHexDigit(Nibbles[1])) 1088 goto CharLiteralError; 1089 // Don't append the null terminator. 1090 uint8_t C1 = rebasedHexDigitToNumber(Nibbles[0]); 1091 uint8_t C2 = rebasedHexDigitToNumber(Nibbles[1]); 1092 MangledName = MangledName.dropFront(2); 1093 return (C1 << 4) | C2; 1094 } 1095 1096 if (startsWithDigit(MangledName)) { 1097 const char *Lookup = ",/\\:. \n\t'-"; 1098 char C = Lookup[MangledName[0] - '0']; 1099 MangledName = MangledName.dropFront(); 1100 return C; 1101 } 1102 1103 if (MangledName[0] >= 'a' && MangledName[0] <= 'z') { 1104 char Lookup[26] = {'\xE1', '\xE2', '\xE3', '\xE4', '\xE5', '\xE6', '\xE7', 1105 '\xE8', '\xE9', '\xEA', '\xEB', '\xEC', '\xED', '\xEE', 1106 '\xEF', '\xF0', '\xF1', '\xF2', '\xF3', '\xF4', '\xF5', 1107 '\xF6', '\xF7', '\xF8', '\xF9', '\xFA'}; 1108 char C = Lookup[MangledName[0] - 'a']; 1109 MangledName = MangledName.dropFront(); 1110 return C; 1111 } 1112 1113 if (MangledName[0] >= 'A' && MangledName[0] <= 'Z') { 1114 char Lookup[26] = {'\xC1', '\xC2', '\xC3', '\xC4', '\xC5', '\xC6', '\xC7', 1115 '\xC8', '\xC9', '\xCA', '\xCB', '\xCC', '\xCD', '\xCE', 1116 '\xCF', '\xD0', '\xD1', '\xD2', '\xD3', '\xD4', '\xD5', 1117 '\xD6', '\xD7', '\xD8', '\xD9', '\xDA'}; 1118 char C = Lookup[MangledName[0] - 'A']; 1119 MangledName = MangledName.dropFront(); 1120 return C; 1121 } 1122 1123 CharLiteralError: 1124 Error = true; 1125 return '\0'; 1126 } 1127 1128 wchar_t Demangler::demangleWcharLiteral(StringView &MangledName) { 1129 uint8_t C1, C2; 1130 1131 C1 = demangleCharLiteral(MangledName); 1132 if (Error) 1133 goto WCharLiteralError; 1134 C2 = demangleCharLiteral(MangledName); 1135 if (Error) 1136 goto WCharLiteralError; 1137 1138 return ((wchar_t)C1 << 8) | (wchar_t)C2; 1139 1140 WCharLiteralError: 1141 Error = true; 1142 return L'\0'; 1143 } 1144 1145 static void writeHexDigit(char *Buffer, uint8_t Digit) { 1146 assert(Digit <= 15); 1147 *Buffer = (Digit < 10) ? ('0' + Digit) : ('A' + Digit - 10); 1148 } 1149 1150 static void outputHex(OutputStream &OS, unsigned C) { 1151 if (C == 0) { 1152 OS << "\\x00"; 1153 return; 1154 } 1155 // It's easier to do the math if we can work from right to left, but we need 1156 // to print the numbers from left to right. So render this into a temporary 1157 // buffer first, then output the temporary buffer. Each byte is of the form 1158 // \xAB, which means that each byte needs 4 characters. Since there are at 1159 // most 4 bytes, we need a 4*4+1 = 17 character temporary buffer. 1160 char TempBuffer[17]; 1161 1162 ::memset(TempBuffer, 0, sizeof(TempBuffer)); 1163 constexpr int MaxPos = 15; 1164 1165 int Pos = MaxPos - 1; 1166 while (C != 0) { 1167 for (int I = 0; I < 2; ++I) { 1168 writeHexDigit(&TempBuffer[Pos--], C % 16); 1169 C /= 16; 1170 } 1171 TempBuffer[Pos--] = 'x'; 1172 TempBuffer[Pos--] = '\\'; 1173 assert(Pos >= 0); 1174 } 1175 OS << StringView(&TempBuffer[Pos + 1]); 1176 } 1177 1178 static void outputEscapedChar(OutputStream &OS, unsigned C) { 1179 switch (C) { 1180 case '\'': // single quote 1181 OS << "\\\'"; 1182 return; 1183 case '\"': // double quote 1184 OS << "\\\""; 1185 return; 1186 case '\\': // backslash 1187 OS << "\\\\"; 1188 return; 1189 case '\a': // bell 1190 OS << "\\a"; 1191 return; 1192 case '\b': // backspace 1193 OS << "\\b"; 1194 return; 1195 case '\f': // form feed 1196 OS << "\\f"; 1197 return; 1198 case '\n': // new line 1199 OS << "\\n"; 1200 return; 1201 case '\r': // carriage return 1202 OS << "\\r"; 1203 return; 1204 case '\t': // tab 1205 OS << "\\t"; 1206 return; 1207 case '\v': // vertical tab 1208 OS << "\\v"; 1209 return; 1210 default: 1211 break; 1212 } 1213 1214 if (C > 0x1F && C < 0x7F) { 1215 // Standard ascii char. 1216 OS << (char)C; 1217 return; 1218 } 1219 1220 outputHex(OS, C); 1221 } 1222 1223 static unsigned countTrailingNullBytes(const uint8_t *StringBytes, int Length) { 1224 const uint8_t *End = StringBytes + Length - 1; 1225 unsigned Count = 0; 1226 while (Length > 0 && *End == 0) { 1227 --Length; 1228 --End; 1229 ++Count; 1230 } 1231 return Count; 1232 } 1233 1234 static unsigned countEmbeddedNulls(const uint8_t *StringBytes, 1235 unsigned Length) { 1236 unsigned Result = 0; 1237 for (unsigned I = 0; I < Length; ++I) { 1238 if (*StringBytes++ == 0) 1239 ++Result; 1240 } 1241 return Result; 1242 } 1243 1244 static unsigned guessCharByteSize(const uint8_t *StringBytes, unsigned NumChars, 1245 unsigned NumBytes) { 1246 assert(NumBytes > 0); 1247 1248 // If the number of bytes is odd, this is guaranteed to be a char string. 1249 if (NumBytes % 2 == 1) 1250 return 1; 1251 1252 // All strings can encode at most 32 bytes of data. If it's less than that, 1253 // then we encoded the entire string. In this case we check for a 1-byte, 1254 // 2-byte, or 4-byte null terminator. 1255 if (NumBytes < 32) { 1256 unsigned TrailingNulls = countTrailingNullBytes(StringBytes, NumChars); 1257 if (TrailingNulls >= 4) 1258 return 4; 1259 if (TrailingNulls >= 2) 1260 return 2; 1261 return 1; 1262 } 1263 1264 // The whole string was not able to be encoded. Try to look at embedded null 1265 // terminators to guess. The heuristic is that we count all embedded null 1266 // terminators. If more than 2/3 are null, it's a char32. If more than 1/3 1267 // are null, it's a char16. Otherwise it's a char8. This obviously isn't 1268 // perfect and is biased towards languages that have ascii alphabets, but this 1269 // was always going to be best effort since the encoding is lossy. 1270 unsigned Nulls = countEmbeddedNulls(StringBytes, NumChars); 1271 if (Nulls >= 2 * NumChars / 3) 1272 return 4; 1273 if (Nulls >= NumChars / 3) 1274 return 2; 1275 return 1; 1276 } 1277 1278 static unsigned decodeMultiByteChar(const uint8_t *StringBytes, 1279 unsigned CharIndex, unsigned CharBytes) { 1280 assert(CharBytes == 1 || CharBytes == 2 || CharBytes == 4); 1281 unsigned Offset = CharIndex * CharBytes; 1282 unsigned Result = 0; 1283 StringBytes = StringBytes + Offset; 1284 for (unsigned I = 0; I < CharBytes; ++I) { 1285 unsigned C = static_cast<unsigned>(StringBytes[I]); 1286 Result |= C << (8 * I); 1287 } 1288 return Result; 1289 } 1290 1291 FunctionSymbolNode *Demangler::demangleVcallThunkNode(StringView &MangledName) { 1292 FunctionSymbolNode *FSN = Arena.alloc<FunctionSymbolNode>(); 1293 VcallThunkIdentifierNode *VTIN = Arena.alloc<VcallThunkIdentifierNode>(); 1294 FSN->Signature = Arena.alloc<ThunkSignatureNode>(); 1295 FSN->Signature->FunctionClass = FC_NoParameterList; 1296 1297 FSN->Name = demangleNameScopeChain(MangledName, VTIN); 1298 if (!Error) 1299 Error = !MangledName.consumeFront("$B"); 1300 if (!Error) 1301 VTIN->OffsetInVTable = demangleUnsigned(MangledName); 1302 if (!Error) 1303 Error = !MangledName.consumeFront('A'); 1304 if (!Error) 1305 FSN->Signature->CallConvention = demangleCallingConvention(MangledName); 1306 return (Error) ? nullptr : FSN; 1307 } 1308 1309 EncodedStringLiteralNode * 1310 Demangler::demangleStringLiteral(StringView &MangledName) { 1311 // This function uses goto, so declare all variables up front. 1312 OutputStream OS; 1313 StringView CRC; 1314 uint64_t StringByteSize; 1315 bool IsWcharT = false; 1316 bool IsNegative = false; 1317 size_t CrcEndPos = 0; 1318 char *ResultBuffer = nullptr; 1319 1320 EncodedStringLiteralNode *Result = Arena.alloc<EncodedStringLiteralNode>(); 1321 1322 // Prefix indicating the beginning of a string literal 1323 if (!MangledName.consumeFront("@_")) 1324 goto StringLiteralError; 1325 if (MangledName.empty()) 1326 goto StringLiteralError; 1327 1328 // Char Type (regular or wchar_t) 1329 switch (MangledName.popFront()) { 1330 case '1': 1331 IsWcharT = true; 1332 LLVM_FALLTHROUGH; 1333 case '0': 1334 break; 1335 default: 1336 goto StringLiteralError; 1337 } 1338 1339 // Encoded Length 1340 std::tie(StringByteSize, IsNegative) = demangleNumber(MangledName); 1341 if (Error || IsNegative) 1342 goto StringLiteralError; 1343 1344 // CRC 32 (always 8 characters plus a terminator) 1345 CrcEndPos = MangledName.find('@'); 1346 if (CrcEndPos == StringView::npos) 1347 goto StringLiteralError; 1348 CRC = MangledName.substr(0, CrcEndPos); 1349 MangledName = MangledName.dropFront(CrcEndPos + 1); 1350 if (MangledName.empty()) 1351 goto StringLiteralError; 1352 1353 if (initializeOutputStream(nullptr, nullptr, OS, 1024)) 1354 // FIXME: Propagate out-of-memory as an error? 1355 std::terminate(); 1356 if (IsWcharT) { 1357 Result->Char = CharKind::Wchar; 1358 if (StringByteSize > 64) 1359 Result->IsTruncated = true; 1360 1361 while (!MangledName.consumeFront('@')) { 1362 assert(StringByteSize >= 2); 1363 wchar_t W = demangleWcharLiteral(MangledName); 1364 if (StringByteSize != 2 || Result->IsTruncated) 1365 outputEscapedChar(OS, W); 1366 StringByteSize -= 2; 1367 if (Error) 1368 goto StringLiteralError; 1369 } 1370 } else { 1371 // The max byte length is actually 32, but some compilers mangled strings 1372 // incorrectly, so we have to assume it can go higher. 1373 constexpr unsigned MaxStringByteLength = 32 * 4; 1374 uint8_t StringBytes[MaxStringByteLength]; 1375 1376 unsigned BytesDecoded = 0; 1377 while (!MangledName.consumeFront('@')) { 1378 assert(StringByteSize >= 1); 1379 StringBytes[BytesDecoded++] = demangleCharLiteral(MangledName); 1380 } 1381 1382 if (StringByteSize > BytesDecoded) 1383 Result->IsTruncated = true; 1384 1385 unsigned CharBytes = 1386 guessCharByteSize(StringBytes, BytesDecoded, StringByteSize); 1387 assert(StringByteSize % CharBytes == 0); 1388 switch (CharBytes) { 1389 case 1: 1390 Result->Char = CharKind::Char; 1391 break; 1392 case 2: 1393 Result->Char = CharKind::Char16; 1394 break; 1395 case 4: 1396 Result->Char = CharKind::Char32; 1397 break; 1398 default: 1399 LLVM_BUILTIN_UNREACHABLE; 1400 } 1401 const unsigned NumChars = BytesDecoded / CharBytes; 1402 for (unsigned CharIndex = 0; CharIndex < NumChars; ++CharIndex) { 1403 unsigned NextChar = 1404 decodeMultiByteChar(StringBytes, CharIndex, CharBytes); 1405 if (CharIndex + 1 < NumChars || Result->IsTruncated) 1406 outputEscapedChar(OS, NextChar); 1407 } 1408 } 1409 1410 OS << '\0'; 1411 ResultBuffer = OS.getBuffer(); 1412 Result->DecodedString = copyString(ResultBuffer); 1413 std::free(ResultBuffer); 1414 return Result; 1415 1416 StringLiteralError: 1417 Error = true; 1418 return nullptr; 1419 } 1420 1421 StringView Demangler::demangleSimpleString(StringView &MangledName, 1422 bool Memorize) { 1423 StringView S; 1424 for (size_t i = 0; i < MangledName.size(); ++i) { 1425 if (MangledName[i] != '@') 1426 continue; 1427 S = MangledName.substr(0, i); 1428 MangledName = MangledName.dropFront(i + 1); 1429 1430 if (Memorize) 1431 memorizeString(S); 1432 return S; 1433 } 1434 1435 Error = true; 1436 return {}; 1437 } 1438 1439 NamedIdentifierNode * 1440 Demangler::demangleAnonymousNamespaceName(StringView &MangledName) { 1441 assert(MangledName.startsWith("?A")); 1442 MangledName.consumeFront("?A"); 1443 1444 NamedIdentifierNode *Node = Arena.alloc<NamedIdentifierNode>(); 1445 Node->Name = "`anonymous namespace'"; 1446 size_t EndPos = MangledName.find('@'); 1447 if (EndPos == StringView::npos) { 1448 Error = true; 1449 return nullptr; 1450 } 1451 StringView NamespaceKey = MangledName.substr(0, EndPos); 1452 memorizeString(NamespaceKey); 1453 MangledName = MangledName.substr(EndPos + 1); 1454 return Node; 1455 } 1456 1457 NamedIdentifierNode * 1458 Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) { 1459 assert(startsWithLocalScopePattern(MangledName)); 1460 1461 NamedIdentifierNode *Identifier = Arena.alloc<NamedIdentifierNode>(); 1462 MangledName.consumeFront('?'); 1463 auto Number = demangleNumber(MangledName); 1464 assert(!Number.second); 1465 1466 // One ? to terminate the number 1467 MangledName.consumeFront('?'); 1468 1469 assert(!Error); 1470 Node *Scope = parse(MangledName); 1471 if (Error) 1472 return nullptr; 1473 1474 // Render the parent symbol's name into a buffer. 1475 OutputStream OS; 1476 if (initializeOutputStream(nullptr, nullptr, OS, 1024)) 1477 // FIXME: Propagate out-of-memory as an error? 1478 std::terminate(); 1479 OS << '`'; 1480 Scope->output(OS, OF_Default); 1481 OS << '\''; 1482 OS << "::`" << Number.first << "'"; 1483 OS << '\0'; 1484 char *Result = OS.getBuffer(); 1485 Identifier->Name = copyString(Result); 1486 std::free(Result); 1487 return Identifier; 1488 } 1489 1490 // Parses a type name in the form of A@B@C@@ which represents C::B::A. 1491 QualifiedNameNode * 1492 Demangler::demangleFullyQualifiedTypeName(StringView &MangledName) { 1493 IdentifierNode *Identifier = demangleUnqualifiedTypeName(MangledName, true); 1494 if (Error) 1495 return nullptr; 1496 assert(Identifier); 1497 1498 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier); 1499 if (Error) 1500 return nullptr; 1501 assert(QN); 1502 return QN; 1503 } 1504 1505 // Parses a symbol name in the form of A@B@C@@ which represents C::B::A. 1506 // Symbol names have slightly different rules regarding what can appear 1507 // so we separate out the implementations for flexibility. 1508 QualifiedNameNode * 1509 Demangler::demangleFullyQualifiedSymbolName(StringView &MangledName) { 1510 // This is the final component of a symbol name (i.e. the leftmost component 1511 // of a mangled name. Since the only possible template instantiation that 1512 // can appear in this context is a function template, and since those are 1513 // not saved for the purposes of name backreferences, only backref simple 1514 // names. 1515 IdentifierNode *Identifier = 1516 demangleUnqualifiedSymbolName(MangledName, NBB_Simple); 1517 if (Error) 1518 return nullptr; 1519 1520 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier); 1521 if (Error) 1522 return nullptr; 1523 1524 if (Identifier->kind() == NodeKind::StructorIdentifier) { 1525 StructorIdentifierNode *SIN = 1526 static_cast<StructorIdentifierNode *>(Identifier); 1527 assert(QN->Components->Count >= 2); 1528 Node *ClassNode = QN->Components->Nodes[QN->Components->Count - 2]; 1529 SIN->Class = static_cast<IdentifierNode *>(ClassNode); 1530 } 1531 assert(QN); 1532 return QN; 1533 } 1534 1535 IdentifierNode *Demangler::demangleUnqualifiedTypeName(StringView &MangledName, 1536 bool Memorize) { 1537 // An inner-most name can be a back-reference, because a fully-qualified name 1538 // (e.g. Scope + Inner) can contain other fully qualified names inside of 1539 // them (for example template parameters), and these nested parameters can 1540 // refer to previously mangled types. 1541 if (startsWithDigit(MangledName)) 1542 return demangleBackRefName(MangledName); 1543 1544 if (MangledName.startsWith("?$")) 1545 return demangleTemplateInstantiationName(MangledName, NBB_Template); 1546 1547 return demangleSimpleName(MangledName, Memorize); 1548 } 1549 1550 IdentifierNode * 1551 Demangler::demangleUnqualifiedSymbolName(StringView &MangledName, 1552 NameBackrefBehavior NBB) { 1553 if (startsWithDigit(MangledName)) 1554 return demangleBackRefName(MangledName); 1555 if (MangledName.startsWith("?$")) 1556 return demangleTemplateInstantiationName(MangledName, NBB); 1557 if (MangledName.startsWith('?')) 1558 return demangleFunctionIdentifierCode(MangledName); 1559 return demangleSimpleName(MangledName, (NBB & NBB_Simple) != 0); 1560 } 1561 1562 IdentifierNode *Demangler::demangleNameScopePiece(StringView &MangledName) { 1563 if (startsWithDigit(MangledName)) 1564 return demangleBackRefName(MangledName); 1565 1566 if (MangledName.startsWith("?$")) 1567 return demangleTemplateInstantiationName(MangledName, NBB_Template); 1568 1569 if (MangledName.startsWith("?A")) 1570 return demangleAnonymousNamespaceName(MangledName); 1571 1572 if (startsWithLocalScopePattern(MangledName)) 1573 return demangleLocallyScopedNamePiece(MangledName); 1574 1575 return demangleSimpleName(MangledName, true); 1576 } 1577 1578 static NodeArrayNode *nodeListToNodeArray(ArenaAllocator &Arena, NodeList *Head, 1579 size_t Count) { 1580 NodeArrayNode *N = Arena.alloc<NodeArrayNode>(); 1581 N->Count = Count; 1582 N->Nodes = Arena.allocArray<Node *>(Count); 1583 for (size_t I = 0; I < Count; ++I) { 1584 N->Nodes[I] = Head->N; 1585 Head = Head->Next; 1586 } 1587 return N; 1588 } 1589 1590 QualifiedNameNode * 1591 Demangler::demangleNameScopeChain(StringView &MangledName, 1592 IdentifierNode *UnqualifiedName) { 1593 NodeList *Head = Arena.alloc<NodeList>(); 1594 1595 Head->N = UnqualifiedName; 1596 1597 size_t Count = 1; 1598 while (!MangledName.consumeFront("@")) { 1599 ++Count; 1600 NodeList *NewHead = Arena.alloc<NodeList>(); 1601 NewHead->Next = Head; 1602 Head = NewHead; 1603 1604 if (MangledName.empty()) { 1605 Error = true; 1606 return nullptr; 1607 } 1608 1609 assert(!Error); 1610 IdentifierNode *Elem = demangleNameScopePiece(MangledName); 1611 if (Error) 1612 return nullptr; 1613 1614 Head->N = Elem; 1615 } 1616 1617 QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>(); 1618 QN->Components = nodeListToNodeArray(Arena, Head, Count); 1619 return QN; 1620 } 1621 1622 FuncClass Demangler::demangleFunctionClass(StringView &MangledName) { 1623 switch (MangledName.popFront()) { 1624 case '9': 1625 return FuncClass(FC_ExternC | FC_NoParameterList); 1626 case 'A': 1627 return FC_Private; 1628 case 'B': 1629 return FuncClass(FC_Private | FC_Far); 1630 case 'C': 1631 return FuncClass(FC_Private | FC_Static); 1632 case 'D': 1633 return FuncClass(FC_Private | FC_Static); 1634 case 'E': 1635 return FuncClass(FC_Private | FC_Virtual); 1636 case 'F': 1637 return FuncClass(FC_Private | FC_Virtual); 1638 case 'G': 1639 return FuncClass(FC_Private | FC_StaticThisAdjust); 1640 case 'H': 1641 return FuncClass(FC_Private | FC_StaticThisAdjust | FC_Far); 1642 case 'I': 1643 return FuncClass(FC_Protected); 1644 case 'J': 1645 return FuncClass(FC_Protected | FC_Far); 1646 case 'K': 1647 return FuncClass(FC_Protected | FC_Static); 1648 case 'L': 1649 return FuncClass(FC_Protected | FC_Static | FC_Far); 1650 case 'M': 1651 return FuncClass(FC_Protected | FC_Virtual); 1652 case 'N': 1653 return FuncClass(FC_Protected | FC_Virtual | FC_Far); 1654 case 'O': 1655 return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust); 1656 case 'P': 1657 return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust | FC_Far); 1658 case 'Q': 1659 return FuncClass(FC_Public); 1660 case 'R': 1661 return FuncClass(FC_Public | FC_Far); 1662 case 'S': 1663 return FuncClass(FC_Public | FC_Static); 1664 case 'T': 1665 return FuncClass(FC_Public | FC_Static | FC_Far); 1666 case 'U': 1667 return FuncClass(FC_Public | FC_Virtual); 1668 case 'V': 1669 return FuncClass(FC_Public | FC_Virtual | FC_Far); 1670 case 'W': 1671 return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust); 1672 case 'X': 1673 return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust | FC_Far); 1674 case 'Y': 1675 return FuncClass(FC_Global); 1676 case 'Z': 1677 return FuncClass(FC_Global | FC_Far); 1678 case '$': { 1679 FuncClass VFlag = FC_VirtualThisAdjust; 1680 if (MangledName.consumeFront('R')) 1681 VFlag = FuncClass(VFlag | FC_VirtualThisAdjustEx); 1682 1683 switch (MangledName.popFront()) { 1684 case '0': 1685 return FuncClass(FC_Private | FC_Virtual | VFlag); 1686 case '1': 1687 return FuncClass(FC_Private | FC_Virtual | VFlag | FC_Far); 1688 case '2': 1689 return FuncClass(FC_Protected | FC_Virtual | VFlag); 1690 case '3': 1691 return FuncClass(FC_Protected | FC_Virtual | VFlag | FC_Far); 1692 case '4': 1693 return FuncClass(FC_Public | FC_Virtual | VFlag); 1694 case '5': 1695 return FuncClass(FC_Public | FC_Virtual | VFlag | FC_Far); 1696 } 1697 } 1698 } 1699 1700 Error = true; 1701 return FC_Public; 1702 } 1703 1704 CallingConv Demangler::demangleCallingConvention(StringView &MangledName) { 1705 switch (MangledName.popFront()) { 1706 case 'A': 1707 case 'B': 1708 return CallingConv::Cdecl; 1709 case 'C': 1710 case 'D': 1711 return CallingConv::Pascal; 1712 case 'E': 1713 case 'F': 1714 return CallingConv::Thiscall; 1715 case 'G': 1716 case 'H': 1717 return CallingConv::Stdcall; 1718 case 'I': 1719 case 'J': 1720 return CallingConv::Fastcall; 1721 case 'M': 1722 case 'N': 1723 return CallingConv::Clrcall; 1724 case 'O': 1725 case 'P': 1726 return CallingConv::Eabi; 1727 case 'Q': 1728 return CallingConv::Vectorcall; 1729 } 1730 1731 return CallingConv::None; 1732 } 1733 1734 StorageClass Demangler::demangleVariableStorageClass(StringView &MangledName) { 1735 assert(std::isdigit(MangledName.front())); 1736 1737 switch (MangledName.popFront()) { 1738 case '0': 1739 return StorageClass::PrivateStatic; 1740 case '1': 1741 return StorageClass::ProtectedStatic; 1742 case '2': 1743 return StorageClass::PublicStatic; 1744 case '3': 1745 return StorageClass::Global; 1746 case '4': 1747 return StorageClass::FunctionLocalStatic; 1748 } 1749 Error = true; 1750 return StorageClass::None; 1751 } 1752 1753 std::pair<Qualifiers, bool> 1754 Demangler::demangleQualifiers(StringView &MangledName) { 1755 1756 switch (MangledName.popFront()) { 1757 // Member qualifiers 1758 case 'Q': 1759 return std::make_pair(Q_None, true); 1760 case 'R': 1761 return std::make_pair(Q_Const, true); 1762 case 'S': 1763 return std::make_pair(Q_Volatile, true); 1764 case 'T': 1765 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), true); 1766 // Non-Member qualifiers 1767 case 'A': 1768 return std::make_pair(Q_None, false); 1769 case 'B': 1770 return std::make_pair(Q_Const, false); 1771 case 'C': 1772 return std::make_pair(Q_Volatile, false); 1773 case 'D': 1774 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), false); 1775 } 1776 Error = true; 1777 return std::make_pair(Q_None, false); 1778 } 1779 1780 // <variable-type> ::= <type> <cvr-qualifiers> 1781 // ::= <type> <pointee-cvr-qualifiers> # pointers, references 1782 TypeNode *Demangler::demangleType(StringView &MangledName, 1783 QualifierMangleMode QMM) { 1784 Qualifiers Quals = Q_None; 1785 bool IsMember = false; 1786 if (QMM == QualifierMangleMode::Mangle) { 1787 std::tie(Quals, IsMember) = demangleQualifiers(MangledName); 1788 } else if (QMM == QualifierMangleMode::Result) { 1789 if (MangledName.consumeFront('?')) 1790 std::tie(Quals, IsMember) = demangleQualifiers(MangledName); 1791 } 1792 1793 TypeNode *Ty = nullptr; 1794 if (isTagType(MangledName)) 1795 Ty = demangleClassType(MangledName); 1796 else if (isPointerType(MangledName)) { 1797 if (isMemberPointer(MangledName)) 1798 Ty = demangleMemberPointerType(MangledName); 1799 else 1800 Ty = demanglePointerType(MangledName); 1801 } else if (isArrayType(MangledName)) 1802 Ty = demangleArrayType(MangledName); 1803 else if (isFunctionType(MangledName)) { 1804 if (MangledName.consumeFront("$$A8@@")) 1805 Ty = demangleFunctionType(MangledName, true); 1806 else { 1807 assert(MangledName.startsWith("$$A6")); 1808 MangledName.consumeFront("$$A6"); 1809 Ty = demangleFunctionType(MangledName, false); 1810 } 1811 } else if (isCustomType(MangledName)) { 1812 Ty = demangleCustomType(MangledName); 1813 } else { 1814 Ty = demanglePrimitiveType(MangledName); 1815 if (!Ty || Error) 1816 return Ty; 1817 } 1818 1819 Ty->Quals = Qualifiers(Ty->Quals | Quals); 1820 return Ty; 1821 } 1822 1823 void Demangler::demangleThrowSpecification(StringView &MangledName) { 1824 if (MangledName.consumeFront('Z')) 1825 return; 1826 1827 Error = true; 1828 } 1829 1830 FunctionSignatureNode *Demangler::demangleFunctionType(StringView &MangledName, 1831 bool HasThisQuals) { 1832 FunctionSignatureNode *FTy = Arena.alloc<FunctionSignatureNode>(); 1833 1834 if (HasThisQuals) { 1835 FTy->Quals = demanglePointerExtQualifiers(MangledName); 1836 FTy->RefQualifier = demangleFunctionRefQualifier(MangledName); 1837 FTy->Quals = Qualifiers(FTy->Quals | demangleQualifiers(MangledName).first); 1838 } 1839 1840 // Fields that appear on both member and non-member functions. 1841 FTy->CallConvention = demangleCallingConvention(MangledName); 1842 1843 // <return-type> ::= <type> 1844 // ::= @ # structors (they have no declared return type) 1845 bool IsStructor = MangledName.consumeFront('@'); 1846 if (!IsStructor) 1847 FTy->ReturnType = demangleType(MangledName, QualifierMangleMode::Result); 1848 1849 FTy->Params = demangleFunctionParameterList(MangledName); 1850 1851 demangleThrowSpecification(MangledName); 1852 1853 return FTy; 1854 } 1855 1856 FunctionSymbolNode * 1857 Demangler::demangleFunctionEncoding(StringView &MangledName) { 1858 FuncClass ExtraFlags = FC_None; 1859 if (MangledName.consumeFront("$$J0")) 1860 ExtraFlags = FC_ExternC; 1861 1862 FuncClass FC = demangleFunctionClass(MangledName); 1863 FC = FuncClass(ExtraFlags | FC); 1864 1865 FunctionSignatureNode *FSN = nullptr; 1866 ThunkSignatureNode *TTN = nullptr; 1867 if (FC & FC_StaticThisAdjust) { 1868 TTN = Arena.alloc<ThunkSignatureNode>(); 1869 TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName); 1870 } else if (FC & FC_VirtualThisAdjust) { 1871 TTN = Arena.alloc<ThunkSignatureNode>(); 1872 if (FC & FC_VirtualThisAdjustEx) { 1873 TTN->ThisAdjust.VBPtrOffset = demangleSigned(MangledName); 1874 TTN->ThisAdjust.VBOffsetOffset = demangleSigned(MangledName); 1875 } 1876 TTN->ThisAdjust.VtordispOffset = demangleSigned(MangledName); 1877 TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName); 1878 } 1879 1880 if (FC & FC_NoParameterList) { 1881 // This is an extern "C" function whose full signature hasn't been mangled. 1882 // This happens when we need to mangle a local symbol inside of an extern 1883 // "C" function. 1884 FSN = Arena.alloc<FunctionSignatureNode>(); 1885 } else { 1886 bool HasThisQuals = !(FC & (FC_Global | FC_Static)); 1887 FSN = demangleFunctionType(MangledName, HasThisQuals); 1888 } 1889 if (TTN) { 1890 *static_cast<FunctionSignatureNode *>(TTN) = *FSN; 1891 FSN = TTN; 1892 } 1893 FSN->FunctionClass = FC; 1894 1895 FunctionSymbolNode *Symbol = Arena.alloc<FunctionSymbolNode>(); 1896 Symbol->Signature = FSN; 1897 return Symbol; 1898 } 1899 1900 CustomTypeNode *Demangler::demangleCustomType(StringView &MangledName) { 1901 assert(MangledName.startsWith('?')); 1902 MangledName.popFront(); 1903 1904 CustomTypeNode *CTN = Arena.alloc<CustomTypeNode>(); 1905 CTN->Identifier = demangleUnqualifiedTypeName(MangledName, true); 1906 if (!MangledName.consumeFront('@')) 1907 Error = true; 1908 if (Error) 1909 return nullptr; 1910 return CTN; 1911 } 1912 1913 // Reads a primitive type. 1914 PrimitiveTypeNode *Demangler::demanglePrimitiveType(StringView &MangledName) { 1915 if (MangledName.consumeFront("$$T")) 1916 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Nullptr); 1917 1918 switch (MangledName.popFront()) { 1919 case 'X': 1920 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Void); 1921 case 'D': 1922 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char); 1923 case 'C': 1924 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Schar); 1925 case 'E': 1926 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uchar); 1927 case 'F': 1928 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Short); 1929 case 'G': 1930 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ushort); 1931 case 'H': 1932 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int); 1933 case 'I': 1934 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint); 1935 case 'J': 1936 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Long); 1937 case 'K': 1938 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ulong); 1939 case 'M': 1940 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Float); 1941 case 'N': 1942 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Double); 1943 case 'O': 1944 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ldouble); 1945 case '_': { 1946 if (MangledName.empty()) { 1947 Error = true; 1948 return nullptr; 1949 } 1950 switch (MangledName.popFront()) { 1951 case 'N': 1952 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Bool); 1953 case 'J': 1954 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int64); 1955 case 'K': 1956 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint64); 1957 case 'W': 1958 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Wchar); 1959 case 'S': 1960 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char16); 1961 case 'U': 1962 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char32); 1963 } 1964 break; 1965 } 1966 } 1967 Error = true; 1968 return nullptr; 1969 } 1970 1971 TagTypeNode *Demangler::demangleClassType(StringView &MangledName) { 1972 TagTypeNode *TT = nullptr; 1973 1974 switch (MangledName.popFront()) { 1975 case 'T': 1976 TT = Arena.alloc<TagTypeNode>(TagKind::Union); 1977 break; 1978 case 'U': 1979 TT = Arena.alloc<TagTypeNode>(TagKind::Struct); 1980 break; 1981 case 'V': 1982 TT = Arena.alloc<TagTypeNode>(TagKind::Class); 1983 break; 1984 case 'W': 1985 if (MangledName.popFront() != '4') { 1986 Error = true; 1987 return nullptr; 1988 } 1989 TT = Arena.alloc<TagTypeNode>(TagKind::Enum); 1990 break; 1991 default: 1992 assert(false); 1993 } 1994 1995 TT->QualifiedName = demangleFullyQualifiedTypeName(MangledName); 1996 return TT; 1997 } 1998 1999 // <pointer-type> ::= E? <pointer-cvr-qualifiers> <ext-qualifiers> <type> 2000 // # the E is required for 64-bit non-static pointers 2001 PointerTypeNode *Demangler::demanglePointerType(StringView &MangledName) { 2002 PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>(); 2003 2004 std::tie(Pointer->Quals, Pointer->Affinity) = 2005 demanglePointerCVQualifiers(MangledName); 2006 2007 if (MangledName.consumeFront("6")) { 2008 Pointer->Pointee = demangleFunctionType(MangledName, false); 2009 return Pointer; 2010 } 2011 2012 Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName); 2013 Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals); 2014 2015 Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Mangle); 2016 return Pointer; 2017 } 2018 2019 PointerTypeNode *Demangler::demangleMemberPointerType(StringView &MangledName) { 2020 PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>(); 2021 2022 std::tie(Pointer->Quals, Pointer->Affinity) = 2023 demanglePointerCVQualifiers(MangledName); 2024 assert(Pointer->Affinity == PointerAffinity::Pointer); 2025 2026 Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName); 2027 Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals); 2028 2029 if (MangledName.consumeFront("8")) { 2030 Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName); 2031 Pointer->Pointee = demangleFunctionType(MangledName, true); 2032 } else { 2033 Qualifiers PointeeQuals = Q_None; 2034 bool IsMember = false; 2035 std::tie(PointeeQuals, IsMember) = demangleQualifiers(MangledName); 2036 assert(IsMember); 2037 Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName); 2038 2039 Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Drop); 2040 Pointer->Pointee->Quals = PointeeQuals; 2041 } 2042 2043 return Pointer; 2044 } 2045 2046 Qualifiers Demangler::demanglePointerExtQualifiers(StringView &MangledName) { 2047 Qualifiers Quals = Q_None; 2048 if (MangledName.consumeFront('E')) 2049 Quals = Qualifiers(Quals | Q_Pointer64); 2050 if (MangledName.consumeFront('I')) 2051 Quals = Qualifiers(Quals | Q_Restrict); 2052 if (MangledName.consumeFront('F')) 2053 Quals = Qualifiers(Quals | Q_Unaligned); 2054 2055 return Quals; 2056 } 2057 2058 ArrayTypeNode *Demangler::demangleArrayType(StringView &MangledName) { 2059 assert(MangledName.front() == 'Y'); 2060 MangledName.popFront(); 2061 2062 uint64_t Rank = 0; 2063 bool IsNegative = false; 2064 std::tie(Rank, IsNegative) = demangleNumber(MangledName); 2065 if (IsNegative || Rank == 0) { 2066 Error = true; 2067 return nullptr; 2068 } 2069 2070 ArrayTypeNode *ATy = Arena.alloc<ArrayTypeNode>(); 2071 NodeList *Head = Arena.alloc<NodeList>(); 2072 NodeList *Tail = Head; 2073 2074 for (uint64_t I = 0; I < Rank; ++I) { 2075 uint64_t D = 0; 2076 std::tie(D, IsNegative) = demangleNumber(MangledName); 2077 if (IsNegative) { 2078 Error = true; 2079 return nullptr; 2080 } 2081 Tail->N = Arena.alloc<IntegerLiteralNode>(D, IsNegative); 2082 if (I + 1 < Rank) { 2083 Tail->Next = Arena.alloc<NodeList>(); 2084 Tail = Tail->Next; 2085 } 2086 } 2087 ATy->Dimensions = nodeListToNodeArray(Arena, Head, Rank); 2088 2089 if (MangledName.consumeFront("$$C")) { 2090 bool IsMember = false; 2091 std::tie(ATy->Quals, IsMember) = demangleQualifiers(MangledName); 2092 if (IsMember) { 2093 Error = true; 2094 return nullptr; 2095 } 2096 } 2097 2098 ATy->ElementType = demangleType(MangledName, QualifierMangleMode::Drop); 2099 return ATy; 2100 } 2101 2102 // Reads a function or a template parameters. 2103 NodeArrayNode * 2104 Demangler::demangleFunctionParameterList(StringView &MangledName) { 2105 // Empty parameter list. 2106 if (MangledName.consumeFront('X')) 2107 return {}; 2108 2109 NodeList *Head = Arena.alloc<NodeList>(); 2110 NodeList **Current = &Head; 2111 size_t Count = 0; 2112 while (!Error && !MangledName.startsWith('@') && 2113 !MangledName.startsWith('Z')) { 2114 ++Count; 2115 2116 if (startsWithDigit(MangledName)) { 2117 size_t N = MangledName[0] - '0'; 2118 if (N >= Backrefs.FunctionParamCount) { 2119 Error = true; 2120 return {}; 2121 } 2122 MangledName = MangledName.dropFront(); 2123 2124 *Current = Arena.alloc<NodeList>(); 2125 (*Current)->N = Backrefs.FunctionParams[N]; 2126 Current = &(*Current)->Next; 2127 continue; 2128 } 2129 2130 size_t OldSize = MangledName.size(); 2131 2132 *Current = Arena.alloc<NodeList>(); 2133 TypeNode *TN = demangleType(MangledName, QualifierMangleMode::Drop); 2134 2135 (*Current)->N = TN; 2136 2137 size_t CharsConsumed = OldSize - MangledName.size(); 2138 assert(CharsConsumed != 0); 2139 2140 // Single-letter types are ignored for backreferences because memorizing 2141 // them doesn't save anything. 2142 if (Backrefs.FunctionParamCount <= 9 && CharsConsumed > 1) 2143 Backrefs.FunctionParams[Backrefs.FunctionParamCount++] = TN; 2144 2145 Current = &(*Current)->Next; 2146 } 2147 2148 if (Error) 2149 return {}; 2150 2151 NodeArrayNode *NA = nodeListToNodeArray(Arena, Head, Count); 2152 // A non-empty parameter list is terminated by either 'Z' (variadic) parameter 2153 // list or '@' (non variadic). Careful not to consume "@Z", as in that case 2154 // the following Z could be a throw specifier. 2155 if (MangledName.consumeFront('@')) 2156 return NA; 2157 2158 if (MangledName.consumeFront('Z')) { 2159 // This is a variadic parameter list. We probably need a variadic node to 2160 // append to the end. 2161 return NA; 2162 } 2163 2164 Error = true; 2165 return {}; 2166 } 2167 2168 NodeArrayNode * 2169 Demangler::demangleTemplateParameterList(StringView &MangledName) { 2170 NodeList *Head; 2171 NodeList **Current = &Head; 2172 size_t Count = 0; 2173 2174 while (!Error && !MangledName.startsWith('@')) { 2175 if (MangledName.consumeFront("$S") || MangledName.consumeFront("$$V") || 2176 MangledName.consumeFront("$$$V") || MangledName.consumeFront("$$Z")) { 2177 // parameter pack separator 2178 continue; 2179 } 2180 2181 ++Count; 2182 2183 // Template parameter lists don't participate in back-referencing. 2184 *Current = Arena.alloc<NodeList>(); 2185 2186 NodeList &TP = **Current; 2187 2188 TemplateParameterReferenceNode *TPRN = nullptr; 2189 if (MangledName.consumeFront("$$Y")) { 2190 // Template alias 2191 TP.N = demangleFullyQualifiedTypeName(MangledName); 2192 } else if (MangledName.consumeFront("$$B")) { 2193 // Array 2194 TP.N = demangleType(MangledName, QualifierMangleMode::Drop); 2195 } else if (MangledName.consumeFront("$$C")) { 2196 // Type has qualifiers. 2197 TP.N = demangleType(MangledName, QualifierMangleMode::Mangle); 2198 } else if (MangledName.startsWith("$1") || MangledName.startsWith("$H") || 2199 MangledName.startsWith("$I") || MangledName.startsWith("$J")) { 2200 // Pointer to member 2201 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>(); 2202 TPRN->IsMemberPointer = true; 2203 2204 MangledName = MangledName.dropFront(); 2205 // 1 - single inheritance <name> 2206 // H - multiple inheritance <name> <number> 2207 // I - virtual inheritance <name> <number> <number> <number> 2208 // J - unspecified inheritance <name> <number> <number> <number> 2209 char InheritanceSpecifier = MangledName.popFront(); 2210 SymbolNode *S = nullptr; 2211 if (MangledName.startsWith('?')) { 2212 S = parse(MangledName); 2213 memorizeIdentifier(S->Name->getUnqualifiedIdentifier()); 2214 } 2215 2216 switch (InheritanceSpecifier) { 2217 case 'J': 2218 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2219 demangleSigned(MangledName); 2220 LLVM_FALLTHROUGH; 2221 case 'I': 2222 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2223 demangleSigned(MangledName); 2224 LLVM_FALLTHROUGH; 2225 case 'H': 2226 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2227 demangleSigned(MangledName); 2228 LLVM_FALLTHROUGH; 2229 case '1': 2230 break; 2231 default: 2232 Error = true; 2233 break; 2234 } 2235 TPRN->Affinity = PointerAffinity::Pointer; 2236 TPRN->Symbol = S; 2237 } else if (MangledName.startsWith("$E?")) { 2238 MangledName.consumeFront("$E"); 2239 // Reference to symbol 2240 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>(); 2241 TPRN->Symbol = parse(MangledName); 2242 TPRN->Affinity = PointerAffinity::Reference; 2243 } else if (MangledName.startsWith("$F") || MangledName.startsWith("$G")) { 2244 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>(); 2245 2246 // Data member pointer. 2247 MangledName = MangledName.dropFront(); 2248 char InheritanceSpecifier = MangledName.popFront(); 2249 2250 switch (InheritanceSpecifier) { 2251 case 'G': 2252 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2253 demangleSigned(MangledName); 2254 LLVM_FALLTHROUGH; 2255 case 'F': 2256 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2257 demangleSigned(MangledName); 2258 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2259 demangleSigned(MangledName); 2260 LLVM_FALLTHROUGH; 2261 case '0': 2262 break; 2263 default: 2264 Error = true; 2265 break; 2266 } 2267 TPRN->IsMemberPointer = true; 2268 2269 } else if (MangledName.consumeFront("$0")) { 2270 // Integral non-type template parameter 2271 bool IsNegative = false; 2272 uint64_t Value = 0; 2273 std::tie(Value, IsNegative) = demangleNumber(MangledName); 2274 2275 TP.N = Arena.alloc<IntegerLiteralNode>(Value, IsNegative); 2276 } else { 2277 TP.N = demangleType(MangledName, QualifierMangleMode::Drop); 2278 } 2279 if (Error) 2280 return nullptr; 2281 2282 Current = &TP.Next; 2283 } 2284 2285 if (Error) 2286 return nullptr; 2287 2288 // Template parameter lists cannot be variadic, so it can only be terminated 2289 // by @. 2290 if (MangledName.consumeFront('@')) 2291 return nodeListToNodeArray(Arena, Head, Count); 2292 Error = true; 2293 return nullptr; 2294 } 2295 2296 void Demangler::dumpBackReferences() { 2297 std::printf("%d function parameter backreferences\n", 2298 (int)Backrefs.FunctionParamCount); 2299 2300 // Create an output stream so we can render each type. 2301 OutputStream OS; 2302 if (initializeOutputStream(nullptr, nullptr, OS, 1024)) 2303 std::terminate(); 2304 for (size_t I = 0; I < Backrefs.FunctionParamCount; ++I) { 2305 OS.setCurrentPosition(0); 2306 2307 TypeNode *T = Backrefs.FunctionParams[I]; 2308 T->output(OS, OF_Default); 2309 2310 std::printf(" [%d] - %.*s\n", (int)I, (int)OS.getCurrentPosition(), 2311 OS.getBuffer()); 2312 } 2313 std::free(OS.getBuffer()); 2314 2315 if (Backrefs.FunctionParamCount > 0) 2316 std::printf("\n"); 2317 std::printf("%d name backreferences\n", (int)Backrefs.NamesCount); 2318 for (size_t I = 0; I < Backrefs.NamesCount; ++I) { 2319 std::printf(" [%d] - %.*s\n", (int)I, (int)Backrefs.Names[I]->Name.size(), 2320 Backrefs.Names[I]->Name.begin()); 2321 } 2322 if (Backrefs.NamesCount > 0) 2323 std::printf("\n"); 2324 } 2325 2326 char *llvm::microsoftDemangle(const char *MangledName, char *Buf, size_t *N, 2327 int *Status, MSDemangleFlags Flags) { 2328 int InternalStatus = demangle_success; 2329 Demangler D; 2330 OutputStream S; 2331 2332 StringView Name{MangledName}; 2333 SymbolNode *AST = D.parse(Name); 2334 2335 if (Flags & MSDF_DumpBackrefs) 2336 D.dumpBackReferences(); 2337 2338 if (D.Error) 2339 InternalStatus = demangle_invalid_mangled_name; 2340 else if (initializeOutputStream(Buf, N, S, 1024)) 2341 InternalStatus = demangle_memory_alloc_failure; 2342 else { 2343 AST->output(S, OF_Default); 2344 S += '\0'; 2345 if (N != nullptr) 2346 *N = S.getCurrentPosition(); 2347 Buf = S.getBuffer(); 2348 } 2349 2350 if (Status) 2351 *Status = InternalStatus; 2352 return InternalStatus == demangle_success ? Buf : nullptr; 2353 } 2354