1 //===- MicrosoftDemangle.cpp ----------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is dual licensed under the MIT and the University of Illinois Open 6 // Source Licenses. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines a demangler for MSVC-style mangled symbols. 11 // 12 // This file has no dependencies on the rest of LLVM so that it can be 13 // easily reused in other programs such as libcxxabi. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "MicrosoftDemangleNodes.h" 18 #include "llvm/Demangle/Demangle.h" 19 20 #include "llvm/Demangle/Compiler.h" 21 #include "llvm/Demangle/StringView.h" 22 #include "llvm/Demangle/Utility.h" 23 24 #include <array> 25 #include <cctype> 26 #include <cstdio> 27 #include <tuple> 28 29 using namespace llvm; 30 using namespace ms_demangle; 31 32 static bool startsWithDigit(StringView S) { 33 return !S.empty() && std::isdigit(S.front()); 34 } 35 36 enum class QualifierMangleMode { Drop, Mangle, Result }; 37 38 struct NodeList { 39 Node *N = nullptr; 40 NodeList *Next = nullptr; 41 }; 42 43 enum class FunctionIdentifierCodeGroup { Basic, Under, DoubleUnder }; 44 45 enum NameBackrefBehavior : uint8_t { 46 NBB_None = 0, // don't save any names as backrefs. 47 NBB_Template = 1 << 0, // save template instanations. 48 NBB_Simple = 1 << 1, // save simple names. 49 }; 50 51 static bool isMemberPointer(StringView MangledName) { 52 switch (MangledName.popFront()) { 53 case '$': 54 // This is probably an rvalue reference (e.g. $$Q), and you cannot have an 55 // rvalue reference to a member. 56 return false; 57 case 'A': 58 // 'A' indicates a reference, and you cannot have a reference to a member 59 // function or member. 60 return false; 61 case 'P': 62 case 'Q': 63 case 'R': 64 case 'S': 65 // These 4 values indicate some kind of pointer, but we still don't know 66 // what. 67 break; 68 default: 69 assert(false && "Ty is not a pointer type!"); 70 } 71 72 // If it starts with a number, then 6 indicates a non-member function 73 // pointer, and 8 indicates a member function pointer. 74 if (startsWithDigit(MangledName)) { 75 assert(MangledName[0] == '6' || MangledName[0] == '8'); 76 return (MangledName[0] == '8'); 77 } 78 79 // Remove ext qualifiers since those can appear on either type and are 80 // therefore not indicative. 81 MangledName.consumeFront('E'); // 64-bit 82 MangledName.consumeFront('I'); // restrict 83 MangledName.consumeFront('F'); // unaligned 84 85 assert(!MangledName.empty()); 86 87 // The next value should be either ABCD (non-member) or QRST (member). 88 switch (MangledName.front()) { 89 case 'A': 90 case 'B': 91 case 'C': 92 case 'D': 93 return false; 94 case 'Q': 95 case 'R': 96 case 'S': 97 case 'T': 98 return true; 99 default: 100 assert(false); 101 } 102 return false; 103 } 104 105 static SpecialIntrinsicKind 106 consumeSpecialIntrinsicKind(StringView &MangledName) { 107 if (MangledName.consumeFront("?_7")) 108 return SpecialIntrinsicKind::Vftable; 109 if (MangledName.consumeFront("?_8")) 110 return SpecialIntrinsicKind::Vbtable; 111 if (MangledName.consumeFront("?_9")) 112 return SpecialIntrinsicKind::VcallThunk; 113 if (MangledName.consumeFront("?_A")) 114 return SpecialIntrinsicKind::Typeof; 115 if (MangledName.consumeFront("?_B")) 116 return SpecialIntrinsicKind::LocalStaticGuard; 117 if (MangledName.consumeFront("?_C")) 118 return SpecialIntrinsicKind::StringLiteralSymbol; 119 if (MangledName.consumeFront("?_P")) 120 return SpecialIntrinsicKind::UdtReturning; 121 if (MangledName.consumeFront("?_R0")) 122 return SpecialIntrinsicKind::RttiTypeDescriptor; 123 if (MangledName.consumeFront("?_R1")) 124 return SpecialIntrinsicKind::RttiBaseClassDescriptor; 125 if (MangledName.consumeFront("?_R2")) 126 return SpecialIntrinsicKind::RttiBaseClassArray; 127 if (MangledName.consumeFront("?_R3")) 128 return SpecialIntrinsicKind::RttiClassHierarchyDescriptor; 129 if (MangledName.consumeFront("?_R4")) 130 return SpecialIntrinsicKind::RttiCompleteObjLocator; 131 if (MangledName.consumeFront("?_S")) 132 return SpecialIntrinsicKind::LocalVftable; 133 if (MangledName.consumeFront("?__E")) 134 return SpecialIntrinsicKind::DynamicInitializer; 135 if (MangledName.consumeFront("?__F")) 136 return SpecialIntrinsicKind::DynamicAtexitDestructor; 137 if (MangledName.consumeFront("?__J")) 138 return SpecialIntrinsicKind::LocalStaticThreadGuard; 139 return SpecialIntrinsicKind::None; 140 } 141 142 static bool startsWithLocalScopePattern(StringView S) { 143 if (!S.consumeFront('?')) 144 return false; 145 if (S.size() < 2) 146 return false; 147 148 size_t End = S.find('?'); 149 if (End == StringView::npos) 150 return false; 151 StringView Candidate = S.substr(0, End); 152 if (Candidate.empty()) 153 return false; 154 155 // \?[0-9]\? 156 // ?@? is the discriminator 0. 157 if (Candidate.size() == 1) 158 return Candidate[0] == '@' || (Candidate[0] >= '0' && Candidate[0] <= '9'); 159 160 // If it's not 0-9, then it's an encoded number terminated with an @ 161 if (Candidate.back() != '@') 162 return false; 163 Candidate = Candidate.dropBack(); 164 165 // An encoded number starts with B-P and all subsequent digits are in A-P. 166 // Note that the reason the first digit cannot be A is two fold. First, it 167 // would create an ambiguity with ?A which delimits the beginning of an 168 // anonymous namespace. Second, A represents 0, and you don't start a multi 169 // digit number with a leading 0. Presumably the anonymous namespace 170 // ambiguity is also why single digit encoded numbers use 0-9 rather than A-J. 171 if (Candidate[0] < 'B' || Candidate[0] > 'P') 172 return false; 173 Candidate = Candidate.dropFront(); 174 while (!Candidate.empty()) { 175 if (Candidate[0] < 'A' || Candidate[0] > 'P') 176 return false; 177 Candidate = Candidate.dropFront(); 178 } 179 180 return true; 181 } 182 183 static bool isTagType(StringView S) { 184 switch (S.front()) { 185 case 'T': // union 186 case 'U': // struct 187 case 'V': // class 188 case 'W': // enum 189 return true; 190 } 191 return false; 192 } 193 194 static bool isCustomType(StringView S) { return S[0] == '?'; } 195 196 static bool isPointerType(StringView S) { 197 if (S.startsWith("$$Q")) // foo && 198 return true; 199 200 switch (S.front()) { 201 case 'A': // foo & 202 case 'P': // foo * 203 case 'Q': // foo *const 204 case 'R': // foo *volatile 205 case 'S': // foo *const volatile 206 return true; 207 } 208 return false; 209 } 210 211 static bool isArrayType(StringView S) { return S[0] == 'Y'; } 212 213 static bool isFunctionType(StringView S) { 214 return S.startsWith("$$A8@@") || S.startsWith("$$A6"); 215 } 216 217 static FunctionRefQualifier 218 demangleFunctionRefQualifier(StringView &MangledName) { 219 if (MangledName.consumeFront('G')) 220 return FunctionRefQualifier::Reference; 221 else if (MangledName.consumeFront('H')) 222 return FunctionRefQualifier::RValueReference; 223 return FunctionRefQualifier::None; 224 } 225 226 static std::pair<Qualifiers, PointerAffinity> 227 demanglePointerCVQualifiers(StringView &MangledName) { 228 if (MangledName.consumeFront("$$Q")) 229 return std::make_pair(Q_None, PointerAffinity::RValueReference); 230 231 switch (MangledName.popFront()) { 232 case 'A': 233 return std::make_pair(Q_None, PointerAffinity::Reference); 234 case 'P': 235 return std::make_pair(Q_None, PointerAffinity::Pointer); 236 case 'Q': 237 return std::make_pair(Q_Const, PointerAffinity::Pointer); 238 case 'R': 239 return std::make_pair(Q_Volatile, PointerAffinity::Pointer); 240 case 'S': 241 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), 242 PointerAffinity::Pointer); 243 default: 244 assert(false && "Ty is not a pointer type!"); 245 } 246 return std::make_pair(Q_None, PointerAffinity::Pointer); 247 } 248 249 namespace { 250 251 struct BackrefContext { 252 static constexpr size_t Max = 10; 253 254 TypeNode *FunctionParams[Max]; 255 size_t FunctionParamCount = 0; 256 257 // The first 10 BackReferences in a mangled name can be back-referenced by 258 // special name @[0-9]. This is a storage for the first 10 BackReferences. 259 NamedIdentifierNode *Names[Max]; 260 size_t NamesCount = 0; 261 }; 262 263 // Demangler class takes the main role in demangling symbols. 264 // It has a set of functions to parse mangled symbols into Type instances. 265 // It also has a set of functions to cnovert Type instances to strings. 266 class Demangler { 267 public: 268 Demangler() = default; 269 virtual ~Demangler() = default; 270 271 // You are supposed to call parse() first and then check if error is true. If 272 // it is false, call output() to write the formatted name to the given stream. 273 SymbolNode *parse(StringView &MangledName); 274 275 // True if an error occurred. 276 bool Error = false; 277 278 void dumpBackReferences(); 279 280 private: 281 SymbolNode *demangleEncodedSymbol(StringView &MangledName, 282 QualifiedNameNode *QN); 283 284 VariableSymbolNode *demangleVariableEncoding(StringView &MangledName, 285 StorageClass SC); 286 FunctionSymbolNode *demangleFunctionEncoding(StringView &MangledName); 287 288 Qualifiers demanglePointerExtQualifiers(StringView &MangledName); 289 290 // Parser functions. This is a recursive-descent parser. 291 TypeNode *demangleType(StringView &MangledName, QualifierMangleMode QMM); 292 PrimitiveTypeNode *demanglePrimitiveType(StringView &MangledName); 293 CustomTypeNode *demangleCustomType(StringView &MangledName); 294 TagTypeNode *demangleClassType(StringView &MangledName); 295 PointerTypeNode *demanglePointerType(StringView &MangledName); 296 PointerTypeNode *demangleMemberPointerType(StringView &MangledName); 297 FunctionSignatureNode *demangleFunctionType(StringView &MangledName, 298 bool HasThisQuals); 299 300 ArrayTypeNode *demangleArrayType(StringView &MangledName); 301 302 NodeArrayNode *demangleTemplateParameterList(StringView &MangledName); 303 NodeArrayNode *demangleFunctionParameterList(StringView &MangledName); 304 305 std::pair<uint64_t, bool> demangleNumber(StringView &MangledName); 306 uint64_t demangleUnsigned(StringView &MangledName); 307 int64_t demangleSigned(StringView &MangledName); 308 309 void memorizeString(StringView s); 310 void memorizeIdentifier(IdentifierNode *Identifier); 311 312 /// Allocate a copy of \p Borrowed into memory that we own. 313 StringView copyString(StringView Borrowed); 314 315 QualifiedNameNode *demangleFullyQualifiedTypeName(StringView &MangledName); 316 QualifiedNameNode *demangleFullyQualifiedSymbolName(StringView &MangledName); 317 318 IdentifierNode *demangleUnqualifiedTypeName(StringView &MangledName, 319 bool Memorize); 320 IdentifierNode *demangleUnqualifiedSymbolName(StringView &MangledName, 321 NameBackrefBehavior NBB); 322 323 QualifiedNameNode *demangleNameScopeChain(StringView &MangledName, 324 IdentifierNode *UnqualifiedName); 325 IdentifierNode *demangleNameScopePiece(StringView &MangledName); 326 327 NamedIdentifierNode *demangleBackRefName(StringView &MangledName); 328 IdentifierNode *demangleTemplateInstantiationName(StringView &MangledName, 329 NameBackrefBehavior NBB); 330 IdentifierNode *demangleFunctionIdentifierCode(StringView &MangledName); 331 IdentifierNode * 332 demangleFunctionIdentifierCode(StringView &MangledName, 333 FunctionIdentifierCodeGroup Group); 334 StructorIdentifierNode *demangleStructorIdentifier(StringView &MangledName, 335 bool IsDestructor); 336 ConversionOperatorIdentifierNode * 337 demangleConversionOperatorIdentifier(StringView &MangledName); 338 LiteralOperatorIdentifierNode * 339 demangleLiteralOperatorIdentifier(StringView &MangledName); 340 341 SymbolNode *demangleSpecialIntrinsic(StringView &MangledName); 342 SpecialTableSymbolNode * 343 demangleSpecialTableSymbolNode(StringView &MangledName, 344 SpecialIntrinsicKind SIK); 345 LocalStaticGuardVariableNode * 346 demangleLocalStaticGuard(StringView &MangledName); 347 VariableSymbolNode *demangleUntypedVariable(ArenaAllocator &Arena, 348 StringView &MangledName, 349 StringView VariableName); 350 VariableSymbolNode * 351 demangleRttiBaseClassDescriptorNode(ArenaAllocator &Arena, 352 StringView &MangledName); 353 FunctionSymbolNode *demangleInitFiniStub(StringView &MangledName, 354 bool IsDestructor); 355 356 NamedIdentifierNode *demangleSimpleName(StringView &MangledName, 357 bool Memorize); 358 NamedIdentifierNode *demangleAnonymousNamespaceName(StringView &MangledName); 359 NamedIdentifierNode *demangleLocallyScopedNamePiece(StringView &MangledName); 360 EncodedStringLiteralNode *demangleStringLiteral(StringView &MangledName); 361 FunctionSymbolNode *demangleVcallThunkNode(StringView &MangledName); 362 363 StringView demangleSimpleString(StringView &MangledName, bool Memorize); 364 365 FuncClass demangleFunctionClass(StringView &MangledName); 366 CallingConv demangleCallingConvention(StringView &MangledName); 367 StorageClass demangleVariableStorageClass(StringView &MangledName); 368 void demangleThrowSpecification(StringView &MangledName); 369 wchar_t demangleWcharLiteral(StringView &MangledName); 370 uint8_t demangleCharLiteral(StringView &MangledName); 371 372 std::pair<Qualifiers, bool> demangleQualifiers(StringView &MangledName); 373 374 // Memory allocator. 375 ArenaAllocator Arena; 376 377 // A single type uses one global back-ref table for all function params. 378 // This means back-refs can even go "into" other types. Examples: 379 // 380 // // Second int* is a back-ref to first. 381 // void foo(int *, int*); 382 // 383 // // Second int* is not a back-ref to first (first is not a function param). 384 // int* foo(int*); 385 // 386 // // Second int* is a back-ref to first (ALL function types share the same 387 // // back-ref map. 388 // using F = void(*)(int*); 389 // F G(int *); 390 BackrefContext Backrefs; 391 }; 392 } // namespace 393 394 StringView Demangler::copyString(StringView Borrowed) { 395 char *Stable = Arena.allocUnalignedBuffer(Borrowed.size() + 1); 396 std::strcpy(Stable, Borrowed.begin()); 397 398 return {Stable, Borrowed.size()}; 399 } 400 401 SpecialTableSymbolNode * 402 Demangler::demangleSpecialTableSymbolNode(StringView &MangledName, 403 SpecialIntrinsicKind K) { 404 NamedIdentifierNode *NI = Arena.alloc<NamedIdentifierNode>(); 405 switch (K) { 406 case SpecialIntrinsicKind::Vftable: 407 NI->Name = "`vftable'"; 408 break; 409 case SpecialIntrinsicKind::Vbtable: 410 NI->Name = "`vbtable'"; 411 break; 412 case SpecialIntrinsicKind::LocalVftable: 413 NI->Name = "`local vftable'"; 414 break; 415 case SpecialIntrinsicKind::RttiCompleteObjLocator: 416 NI->Name = "`RTTI Complete Object Locator'"; 417 break; 418 default: 419 LLVM_BUILTIN_UNREACHABLE; 420 } 421 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI); 422 SpecialTableSymbolNode *STSN = Arena.alloc<SpecialTableSymbolNode>(); 423 STSN->Name = QN; 424 bool IsMember = false; 425 char Front = MangledName.popFront(); 426 if (Front != '6' && Front != '7') { 427 Error = true; 428 return nullptr; 429 } 430 431 std::tie(STSN->Quals, IsMember) = demangleQualifiers(MangledName); 432 if (!MangledName.consumeFront('@')) 433 STSN->TargetName = demangleFullyQualifiedTypeName(MangledName); 434 return STSN; 435 } 436 437 LocalStaticGuardVariableNode * 438 Demangler::demangleLocalStaticGuard(StringView &MangledName) { 439 LocalStaticGuardIdentifierNode *LSGI = 440 Arena.alloc<LocalStaticGuardIdentifierNode>(); 441 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, LSGI); 442 LocalStaticGuardVariableNode *LSGVN = 443 Arena.alloc<LocalStaticGuardVariableNode>(); 444 LSGVN->Name = QN; 445 446 if (MangledName.consumeFront("4IA")) 447 LSGVN->IsVisible = false; 448 else if (MangledName.consumeFront("5")) 449 LSGVN->IsVisible = true; 450 else { 451 Error = true; 452 return nullptr; 453 } 454 455 if (!MangledName.empty()) 456 LSGI->ScopeIndex = demangleUnsigned(MangledName); 457 return LSGVN; 458 } 459 460 static NamedIdentifierNode *synthesizeNamedIdentifier(ArenaAllocator &Arena, 461 StringView Name) { 462 NamedIdentifierNode *Id = Arena.alloc<NamedIdentifierNode>(); 463 Id->Name = Name; 464 return Id; 465 } 466 467 static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena, 468 IdentifierNode *Identifier) { 469 QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>(); 470 QN->Components = Arena.alloc<NodeArrayNode>(); 471 QN->Components->Count = 1; 472 QN->Components->Nodes = Arena.allocArray<Node *>(1); 473 QN->Components->Nodes[0] = Identifier; 474 return QN; 475 } 476 477 static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena, 478 StringView Name) { 479 NamedIdentifierNode *Id = synthesizeNamedIdentifier(Arena, Name); 480 return synthesizeQualifiedName(Arena, Id); 481 } 482 483 static VariableSymbolNode *synthesizeVariable(ArenaAllocator &Arena, 484 TypeNode *Type, 485 StringView VariableName) { 486 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 487 VSN->Type = Type; 488 VSN->Name = synthesizeQualifiedName(Arena, VariableName); 489 return VSN; 490 } 491 492 VariableSymbolNode *Demangler::demangleUntypedVariable( 493 ArenaAllocator &Arena, StringView &MangledName, StringView VariableName) { 494 NamedIdentifierNode *NI = synthesizeNamedIdentifier(Arena, VariableName); 495 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI); 496 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 497 VSN->Name = QN; 498 if (MangledName.consumeFront("8")) 499 return VSN; 500 501 Error = true; 502 return nullptr; 503 } 504 505 VariableSymbolNode * 506 Demangler::demangleRttiBaseClassDescriptorNode(ArenaAllocator &Arena, 507 StringView &MangledName) { 508 RttiBaseClassDescriptorNode *RBCDN = 509 Arena.alloc<RttiBaseClassDescriptorNode>(); 510 RBCDN->NVOffset = demangleUnsigned(MangledName); 511 RBCDN->VBPtrOffset = demangleSigned(MangledName); 512 RBCDN->VBTableOffset = demangleUnsigned(MangledName); 513 RBCDN->Flags = demangleUnsigned(MangledName); 514 if (Error) 515 return nullptr; 516 517 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 518 VSN->Name = demangleNameScopeChain(MangledName, RBCDN); 519 MangledName.consumeFront('8'); 520 return VSN; 521 } 522 523 FunctionSymbolNode *Demangler::demangleInitFiniStub(StringView &MangledName, 524 bool IsDestructor) { 525 DynamicStructorIdentifierNode *DSIN = 526 Arena.alloc<DynamicStructorIdentifierNode>(); 527 DSIN->IsDestructor = IsDestructor; 528 529 // What follows is a main symbol name. This may include namespaces or class 530 // back references. 531 QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName); 532 533 SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN); 534 FunctionSymbolNode *FSN = nullptr; 535 Symbol->Name = QN; 536 537 if (Symbol->kind() == NodeKind::VariableSymbol) { 538 DSIN->Variable = static_cast<VariableSymbolNode *>(Symbol); 539 if (!MangledName.consumeFront('@')) { 540 Error = true; 541 return nullptr; 542 } 543 544 FSN = demangleFunctionEncoding(MangledName); 545 FSN->Name = synthesizeQualifiedName(Arena, DSIN); 546 } else { 547 FSN = static_cast<FunctionSymbolNode *>(Symbol); 548 DSIN->Name = Symbol->Name; 549 FSN->Name = synthesizeQualifiedName(Arena, DSIN); 550 } 551 552 return FSN; 553 } 554 555 SymbolNode *Demangler::demangleSpecialIntrinsic(StringView &MangledName) { 556 SpecialIntrinsicKind SIK = consumeSpecialIntrinsicKind(MangledName); 557 if (SIK == SpecialIntrinsicKind::None) 558 return nullptr; 559 560 switch (SIK) { 561 case SpecialIntrinsicKind::StringLiteralSymbol: 562 return demangleStringLiteral(MangledName); 563 case SpecialIntrinsicKind::Vftable: 564 case SpecialIntrinsicKind::Vbtable: 565 case SpecialIntrinsicKind::LocalVftable: 566 case SpecialIntrinsicKind::RttiCompleteObjLocator: 567 return demangleSpecialTableSymbolNode(MangledName, SIK); 568 case SpecialIntrinsicKind::VcallThunk: 569 return demangleVcallThunkNode(MangledName); 570 case SpecialIntrinsicKind::LocalStaticGuard: 571 return demangleLocalStaticGuard(MangledName); 572 case SpecialIntrinsicKind::RttiTypeDescriptor: { 573 TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result); 574 if (Error) 575 break; 576 if (!MangledName.consumeFront("@8")) 577 break; 578 if (!MangledName.empty()) 579 break; 580 return synthesizeVariable(Arena, T, "`RTTI Type Descriptor'"); 581 } 582 case SpecialIntrinsicKind::RttiBaseClassArray: 583 return demangleUntypedVariable(Arena, MangledName, 584 "`RTTI Base Class Array'"); 585 case SpecialIntrinsicKind::RttiClassHierarchyDescriptor: 586 return demangleUntypedVariable(Arena, MangledName, 587 "`RTTI Class Hierarchy Descriptor'"); 588 case SpecialIntrinsicKind::RttiBaseClassDescriptor: 589 return demangleRttiBaseClassDescriptorNode(Arena, MangledName); 590 case SpecialIntrinsicKind::DynamicInitializer: 591 return demangleInitFiniStub(MangledName, false); 592 case SpecialIntrinsicKind::DynamicAtexitDestructor: 593 return demangleInitFiniStub(MangledName, true); 594 default: 595 break; 596 } 597 Error = true; 598 return nullptr; 599 } 600 601 IdentifierNode * 602 Demangler::demangleFunctionIdentifierCode(StringView &MangledName) { 603 assert(MangledName.startsWith('?')); 604 MangledName = MangledName.dropFront(); 605 606 if (MangledName.consumeFront("__")) 607 return demangleFunctionIdentifierCode( 608 MangledName, FunctionIdentifierCodeGroup::DoubleUnder); 609 else if (MangledName.consumeFront("_")) 610 return demangleFunctionIdentifierCode(MangledName, 611 FunctionIdentifierCodeGroup::Under); 612 return demangleFunctionIdentifierCode(MangledName, 613 FunctionIdentifierCodeGroup::Basic); 614 } 615 616 StructorIdentifierNode * 617 Demangler::demangleStructorIdentifier(StringView &MangledName, 618 bool IsDestructor) { 619 StructorIdentifierNode *N = Arena.alloc<StructorIdentifierNode>(); 620 N->IsDestructor = IsDestructor; 621 return N; 622 } 623 624 ConversionOperatorIdentifierNode * 625 Demangler::demangleConversionOperatorIdentifier(StringView &MangledName) { 626 ConversionOperatorIdentifierNode *N = 627 Arena.alloc<ConversionOperatorIdentifierNode>(); 628 return N; 629 } 630 631 LiteralOperatorIdentifierNode * 632 Demangler::demangleLiteralOperatorIdentifier(StringView &MangledName) { 633 LiteralOperatorIdentifierNode *N = 634 Arena.alloc<LiteralOperatorIdentifierNode>(); 635 N->Name = demangleSimpleString(MangledName, false); 636 return N; 637 } 638 639 IntrinsicFunctionKind 640 translateIntrinsicFunctionCode(char CH, FunctionIdentifierCodeGroup Group) { 641 // Not all ? identifiers are intrinsics *functions*. This function only maps 642 // operator codes for the special functions, all others are handled elsewhere, 643 // hence the IFK::None entries in the table. 644 using IFK = IntrinsicFunctionKind; 645 static IFK Basic[36] = { 646 IFK::None, // ?0 # Foo::Foo() 647 IFK::None, // ?1 # Foo::~Foo() 648 IFK::New, // ?2 # operator new 649 IFK::Delete, // ?3 # operator delete 650 IFK::Assign, // ?4 # operator= 651 IFK::RightShift, // ?5 # operator>> 652 IFK::LeftShift, // ?6 # operator<< 653 IFK::LogicalNot, // ?7 # operator! 654 IFK::Equals, // ?8 # operator== 655 IFK::NotEquals, // ?9 # operator!= 656 IFK::ArraySubscript, // ?A # operator[] 657 IFK::None, // ?B # Foo::operator <type>() 658 IFK::Pointer, // ?C # operator-> 659 IFK::Dereference, // ?D # operator* 660 IFK::Increment, // ?E # operator++ 661 IFK::Decrement, // ?F # operator-- 662 IFK::Minus, // ?G # operator- 663 IFK::Plus, // ?H # operator+ 664 IFK::BitwiseAnd, // ?I # operator& 665 IFK::MemberPointer, // ?J # operator->* 666 IFK::Divide, // ?K # operator/ 667 IFK::Modulus, // ?L # operator% 668 IFK::LessThan, // ?M operator< 669 IFK::LessThanEqual, // ?N operator<= 670 IFK::GreaterThan, // ?O operator> 671 IFK::GreaterThanEqual, // ?P operator>= 672 IFK::Comma, // ?Q operator, 673 IFK::Parens, // ?R operator() 674 IFK::BitwiseNot, // ?S operator~ 675 IFK::BitwiseXor, // ?T operator^ 676 IFK::BitwiseOr, // ?U operator| 677 IFK::LogicalAnd, // ?V operator&& 678 IFK::LogicalOr, // ?W operator|| 679 IFK::TimesEqual, // ?X operator*= 680 IFK::PlusEqual, // ?Y operator+= 681 IFK::MinusEqual, // ?Z operator-= 682 }; 683 static IFK Under[36] = { 684 IFK::DivEqual, // ?_0 operator/= 685 IFK::ModEqual, // ?_1 operator%= 686 IFK::RshEqual, // ?_2 operator>>= 687 IFK::LshEqual, // ?_3 operator<<= 688 IFK::BitwiseAndEqual, // ?_4 operator&= 689 IFK::BitwiseOrEqual, // ?_5 operator|= 690 IFK::BitwiseXorEqual, // ?_6 operator^= 691 IFK::None, // ?_7 # vftable 692 IFK::None, // ?_8 # vbtable 693 IFK::None, // ?_9 # vcall 694 IFK::None, // ?_A # typeof 695 IFK::None, // ?_B # local static guard 696 IFK::None, // ?_C # string literal 697 IFK::VbaseDtor, // ?_D # vbase destructor 698 IFK::VecDelDtor, // ?_E # vector deleting destructor 699 IFK::DefaultCtorClosure, // ?_F # default constructor closure 700 IFK::ScalarDelDtor, // ?_G # scalar deleting destructor 701 IFK::VecCtorIter, // ?_H # vector constructor iterator 702 IFK::VecDtorIter, // ?_I # vector destructor iterator 703 IFK::VecVbaseCtorIter, // ?_J # vector vbase constructor iterator 704 IFK::VdispMap, // ?_K # virtual displacement map 705 IFK::EHVecCtorIter, // ?_L # eh vector constructor iterator 706 IFK::EHVecDtorIter, // ?_M # eh vector destructor iterator 707 IFK::EHVecVbaseCtorIter, // ?_N # eh vector vbase constructor iterator 708 IFK::CopyCtorClosure, // ?_O # copy constructor closure 709 IFK::None, // ?_P<name> # udt returning <name> 710 IFK::None, // ?_Q # <unknown> 711 IFK::None, // ?_R0 - ?_R4 # RTTI Codes 712 IFK::None, // ?_S # local vftable 713 IFK::LocalVftableCtorClosure, // ?_T # local vftable constructor closure 714 IFK::ArrayNew, // ?_U operator new[] 715 IFK::ArrayDelete, // ?_V operator delete[] 716 IFK::None, // ?_W <unused> 717 IFK::None, // ?_X <unused> 718 IFK::None, // ?_Y <unused> 719 IFK::None, // ?_Z <unused> 720 }; 721 static IFK DoubleUnder[36] = { 722 IFK::None, // ?__0 <unused> 723 IFK::None, // ?__1 <unused> 724 IFK::None, // ?__2 <unused> 725 IFK::None, // ?__3 <unused> 726 IFK::None, // ?__4 <unused> 727 IFK::None, // ?__5 <unused> 728 IFK::None, // ?__6 <unused> 729 IFK::None, // ?__7 <unused> 730 IFK::None, // ?__8 <unused> 731 IFK::None, // ?__9 <unused> 732 IFK::ManVectorCtorIter, // ?__A managed vector ctor iterator 733 IFK::ManVectorDtorIter, // ?__B managed vector dtor iterator 734 IFK::EHVectorCopyCtorIter, // ?__C EH vector copy ctor iterator 735 IFK::EHVectorVbaseCopyCtorIter, // ?__D EH vector vbase copy ctor iter 736 IFK::None, // ?__E dynamic initializer for `T' 737 IFK::None, // ?__F dynamic atexit destructor for `T' 738 IFK::VectorCopyCtorIter, // ?__G vector copy constructor iter 739 IFK::VectorVbaseCopyCtorIter, // ?__H vector vbase copy ctor iter 740 IFK::ManVectorVbaseCopyCtorIter, // ?__I managed vector vbase copy ctor 741 // iter 742 IFK::None, // ?__J local static thread guard 743 IFK::None, // ?__K operator ""_name 744 IFK::CoAwait, // ?__L co_await 745 IFK::None, // ?__M <unused> 746 IFK::None, // ?__N <unused> 747 IFK::None, // ?__O <unused> 748 IFK::None, // ?__P <unused> 749 IFK::None, // ?__Q <unused> 750 IFK::None, // ?__R <unused> 751 IFK::None, // ?__S <unused> 752 IFK::None, // ?__T <unused> 753 IFK::None, // ?__U <unused> 754 IFK::None, // ?__V <unused> 755 IFK::None, // ?__W <unused> 756 IFK::None, // ?__X <unused> 757 IFK::None, // ?__Y <unused> 758 IFK::None, // ?__Z <unused> 759 }; 760 761 int Index = (CH >= '0' && CH <= '9') ? (CH - '0') : (CH - 'A' + 10); 762 switch (Group) { 763 case FunctionIdentifierCodeGroup::Basic: 764 return Basic[Index]; 765 case FunctionIdentifierCodeGroup::Under: 766 return Under[Index]; 767 case FunctionIdentifierCodeGroup::DoubleUnder: 768 return DoubleUnder[Index]; 769 } 770 LLVM_BUILTIN_UNREACHABLE; 771 } 772 773 IdentifierNode * 774 Demangler::demangleFunctionIdentifierCode(StringView &MangledName, 775 FunctionIdentifierCodeGroup Group) { 776 switch (Group) { 777 case FunctionIdentifierCodeGroup::Basic: 778 switch (char CH = MangledName.popFront()) { 779 case '0': 780 case '1': 781 return demangleStructorIdentifier(MangledName, CH == '1'); 782 case 'B': 783 return demangleConversionOperatorIdentifier(MangledName); 784 default: 785 return Arena.alloc<IntrinsicFunctionIdentifierNode>( 786 translateIntrinsicFunctionCode(CH, Group)); 787 } 788 break; 789 case FunctionIdentifierCodeGroup::Under: 790 return Arena.alloc<IntrinsicFunctionIdentifierNode>( 791 translateIntrinsicFunctionCode(MangledName.popFront(), Group)); 792 case FunctionIdentifierCodeGroup::DoubleUnder: 793 switch (char CH = MangledName.popFront()) { 794 case 'K': 795 return demangleLiteralOperatorIdentifier(MangledName); 796 default: 797 return Arena.alloc<IntrinsicFunctionIdentifierNode>( 798 translateIntrinsicFunctionCode(CH, Group)); 799 } 800 } 801 // No Mangling Yet: Spaceship, // operator<=> 802 803 return nullptr; 804 } 805 806 SymbolNode *Demangler::demangleEncodedSymbol(StringView &MangledName, 807 QualifiedNameNode *Name) { 808 // Read a variable. 809 switch (MangledName.front()) { 810 case '0': 811 case '1': 812 case '2': 813 case '3': 814 case '4': { 815 StorageClass SC = demangleVariableStorageClass(MangledName); 816 return demangleVariableEncoding(MangledName, SC); 817 } 818 case '8': 819 return nullptr; 820 } 821 FunctionSymbolNode *FSN = demangleFunctionEncoding(MangledName); 822 823 IdentifierNode *UQN = Name->getUnqualifiedIdentifier(); 824 if (UQN->kind() == NodeKind::ConversionOperatorIdentifier) { 825 ConversionOperatorIdentifierNode *COIN = 826 static_cast<ConversionOperatorIdentifierNode *>(UQN); 827 COIN->TargetType = FSN->Signature->ReturnType; 828 } 829 return FSN; 830 } 831 832 // Parser entry point. 833 SymbolNode *Demangler::parse(StringView &MangledName) { 834 // We can't demangle MD5 names, just output them as-is. 835 // Also, MSVC-style mangled symbols must start with '?'. 836 if (MangledName.startsWith("??@")) { 837 // This is an MD5 mangled name. We can't demangle it, just return the 838 // mangled name. 839 SymbolNode *S = Arena.alloc<SymbolNode>(NodeKind::Md5Symbol); 840 S->Name = synthesizeQualifiedName(Arena, MangledName); 841 return S; 842 } 843 844 if (!MangledName.startsWith('?')) { 845 Error = true; 846 return nullptr; 847 } 848 849 MangledName.consumeFront('?'); 850 851 // ?$ is a template instantiation, but all other names that start with ? are 852 // operators / special names. 853 if (SymbolNode *SI = demangleSpecialIntrinsic(MangledName)) 854 return SI; 855 856 // What follows is a main symbol name. This may include namespaces or class 857 // back references. 858 QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName); 859 if (Error) 860 return nullptr; 861 862 SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN); 863 if (Symbol) { 864 Symbol->Name = QN; 865 } 866 867 if (Error) 868 return nullptr; 869 870 return Symbol; 871 } 872 873 // <type-encoding> ::= <storage-class> <variable-type> 874 // <storage-class> ::= 0 # private static member 875 // ::= 1 # protected static member 876 // ::= 2 # public static member 877 // ::= 3 # global 878 // ::= 4 # static local 879 880 VariableSymbolNode *Demangler::demangleVariableEncoding(StringView &MangledName, 881 StorageClass SC) { 882 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 883 884 VSN->Type = demangleType(MangledName, QualifierMangleMode::Drop); 885 VSN->SC = SC; 886 887 // <variable-type> ::= <type> <cvr-qualifiers> 888 // ::= <type> <pointee-cvr-qualifiers> # pointers, references 889 switch (VSN->Type->kind()) { 890 case NodeKind::PointerType: { 891 PointerTypeNode *PTN = static_cast<PointerTypeNode *>(VSN->Type); 892 893 Qualifiers ExtraChildQuals = Q_None; 894 PTN->Quals = Qualifiers(VSN->Type->Quals | 895 demanglePointerExtQualifiers(MangledName)); 896 897 bool IsMember = false; 898 std::tie(ExtraChildQuals, IsMember) = demangleQualifiers(MangledName); 899 900 if (PTN->ClassParent) { 901 QualifiedNameNode *BackRefName = 902 demangleFullyQualifiedTypeName(MangledName); 903 (void)BackRefName; 904 } 905 PTN->Pointee->Quals = Qualifiers(PTN->Pointee->Quals | ExtraChildQuals); 906 907 break; 908 } 909 default: 910 VSN->Type->Quals = demangleQualifiers(MangledName).first; 911 break; 912 } 913 914 return VSN; 915 } 916 917 // Sometimes numbers are encoded in mangled symbols. For example, 918 // "int (*x)[20]" is a valid C type (x is a pointer to an array of 919 // length 20), so we need some way to embed numbers as part of symbols. 920 // This function parses it. 921 // 922 // <number> ::= [?] <non-negative integer> 923 // 924 // <non-negative integer> ::= <decimal digit> # when 1 <= Number <= 10 925 // ::= <hex digit>+ @ # when Numbrer == 0 or >= 10 926 // 927 // <hex-digit> ::= [A-P] # A = 0, B = 1, ... 928 std::pair<uint64_t, bool> Demangler::demangleNumber(StringView &MangledName) { 929 bool IsNegative = MangledName.consumeFront('?'); 930 931 if (startsWithDigit(MangledName)) { 932 uint64_t Ret = MangledName[0] - '0' + 1; 933 MangledName = MangledName.dropFront(1); 934 return {Ret, IsNegative}; 935 } 936 937 uint64_t Ret = 0; 938 for (size_t i = 0; i < MangledName.size(); ++i) { 939 char C = MangledName[i]; 940 if (C == '@') { 941 MangledName = MangledName.dropFront(i + 1); 942 return {Ret, IsNegative}; 943 } 944 if ('A' <= C && C <= 'P') { 945 Ret = (Ret << 4) + (C - 'A'); 946 continue; 947 } 948 break; 949 } 950 951 Error = true; 952 return {0ULL, false}; 953 } 954 955 uint64_t Demangler::demangleUnsigned(StringView &MangledName) { 956 bool IsNegative = false; 957 uint64_t Number = 0; 958 std::tie(Number, IsNegative) = demangleNumber(MangledName); 959 if (IsNegative) 960 Error = true; 961 return Number; 962 } 963 964 int64_t Demangler::demangleSigned(StringView &MangledName) { 965 bool IsNegative = false; 966 uint64_t Number = 0; 967 std::tie(Number, IsNegative) = demangleNumber(MangledName); 968 if (Number > INT64_MAX) 969 Error = true; 970 int64_t I = static_cast<int64_t>(Number); 971 return IsNegative ? -I : I; 972 } 973 974 // First 10 strings can be referenced by special BackReferences ?0, ?1, ..., ?9. 975 // Memorize it. 976 void Demangler::memorizeString(StringView S) { 977 if (Backrefs.NamesCount >= BackrefContext::Max) 978 return; 979 for (size_t i = 0; i < Backrefs.NamesCount; ++i) 980 if (S == Backrefs.Names[i]->Name) 981 return; 982 NamedIdentifierNode *N = Arena.alloc<NamedIdentifierNode>(); 983 N->Name = S; 984 Backrefs.Names[Backrefs.NamesCount++] = N; 985 } 986 987 NamedIdentifierNode *Demangler::demangleBackRefName(StringView &MangledName) { 988 assert(startsWithDigit(MangledName)); 989 990 size_t I = MangledName[0] - '0'; 991 if (I >= Backrefs.NamesCount) { 992 Error = true; 993 return nullptr; 994 } 995 996 MangledName = MangledName.dropFront(); 997 return Backrefs.Names[I]; 998 } 999 1000 void Demangler::memorizeIdentifier(IdentifierNode *Identifier) { 1001 // Render this class template name into a string buffer so that we can 1002 // memorize it for the purpose of back-referencing. 1003 OutputStream OS = OutputStream::create(nullptr, nullptr, 1024); 1004 Identifier->output(OS, OF_Default); 1005 OS << '\0'; 1006 char *Name = OS.getBuffer(); 1007 1008 StringView Owned = copyString(Name); 1009 memorizeString(Owned); 1010 std::free(Name); 1011 } 1012 1013 IdentifierNode * 1014 Demangler::demangleTemplateInstantiationName(StringView &MangledName, 1015 NameBackrefBehavior NBB) { 1016 assert(MangledName.startsWith("?$")); 1017 MangledName.consumeFront("?$"); 1018 1019 BackrefContext OuterContext; 1020 std::swap(OuterContext, Backrefs); 1021 1022 IdentifierNode *Identifier = 1023 demangleUnqualifiedSymbolName(MangledName, NBB_Simple); 1024 if (!Error) 1025 Identifier->TemplateParams = demangleTemplateParameterList(MangledName); 1026 1027 std::swap(OuterContext, Backrefs); 1028 if (Error) 1029 return nullptr; 1030 1031 if (NBB & NBB_Template) 1032 memorizeIdentifier(Identifier); 1033 1034 return Identifier; 1035 } 1036 1037 NamedIdentifierNode *Demangler::demangleSimpleName(StringView &MangledName, 1038 bool Memorize) { 1039 StringView S = demangleSimpleString(MangledName, Memorize); 1040 if (Error) 1041 return nullptr; 1042 1043 NamedIdentifierNode *Name = Arena.alloc<NamedIdentifierNode>(); 1044 Name->Name = S; 1045 return Name; 1046 } 1047 1048 static bool isRebasedHexDigit(char C) { return (C >= 'A' && C <= 'P'); } 1049 1050 static uint8_t rebasedHexDigitToNumber(char C) { 1051 assert(isRebasedHexDigit(C)); 1052 return (C <= 'J') ? (C - 'A') : (10 + C - 'K'); 1053 } 1054 1055 uint8_t Demangler::demangleCharLiteral(StringView &MangledName) { 1056 if (!MangledName.startsWith('?')) 1057 return MangledName.popFront(); 1058 1059 MangledName = MangledName.dropFront(); 1060 if (MangledName.empty()) 1061 goto CharLiteralError; 1062 1063 if (MangledName.consumeFront('$')) { 1064 // Two hex digits 1065 if (MangledName.size() < 2) 1066 goto CharLiteralError; 1067 StringView Nibbles = MangledName.substr(0, 2); 1068 if (!isRebasedHexDigit(Nibbles[0]) || !isRebasedHexDigit(Nibbles[1])) 1069 goto CharLiteralError; 1070 // Don't append the null terminator. 1071 uint8_t C1 = rebasedHexDigitToNumber(Nibbles[0]); 1072 uint8_t C2 = rebasedHexDigitToNumber(Nibbles[1]); 1073 MangledName = MangledName.dropFront(2); 1074 return (C1 << 4) | C2; 1075 } 1076 1077 if (startsWithDigit(MangledName)) { 1078 const char *Lookup = ",/\\:. \n\t'-"; 1079 char C = Lookup[MangledName[0] - '0']; 1080 MangledName = MangledName.dropFront(); 1081 return C; 1082 } 1083 1084 if (MangledName[0] >= 'a' && MangledName[0] <= 'z') { 1085 char Lookup[26] = {'\xE1', '\xE2', '\xE3', '\xE4', '\xE5', '\xE6', '\xE7', 1086 '\xE8', '\xE9', '\xEA', '\xEB', '\xEC', '\xED', '\xEE', 1087 '\xEF', '\xF0', '\xF1', '\xF2', '\xF3', '\xF4', '\xF5', 1088 '\xF6', '\xF7', '\xF8', '\xF9', '\xFA'}; 1089 char C = Lookup[MangledName[0] - 'a']; 1090 MangledName = MangledName.dropFront(); 1091 return C; 1092 } 1093 1094 if (MangledName[0] >= 'A' && MangledName[0] <= 'Z') { 1095 char Lookup[26] = {'\xC1', '\xC2', '\xC3', '\xC4', '\xC5', '\xC6', '\xC7', 1096 '\xC8', '\xC9', '\xCA', '\xCB', '\xCC', '\xCD', '\xCE', 1097 '\xCF', '\xD0', '\xD1', '\xD2', '\xD3', '\xD4', '\xD5', 1098 '\xD6', '\xD7', '\xD8', '\xD9', '\xDA'}; 1099 char C = Lookup[MangledName[0] - 'A']; 1100 MangledName = MangledName.dropFront(); 1101 return C; 1102 } 1103 1104 CharLiteralError: 1105 Error = true; 1106 return '\0'; 1107 } 1108 1109 wchar_t Demangler::demangleWcharLiteral(StringView &MangledName) { 1110 uint8_t C1, C2; 1111 1112 C1 = demangleCharLiteral(MangledName); 1113 if (Error) 1114 goto WCharLiteralError; 1115 C2 = demangleCharLiteral(MangledName); 1116 if (Error) 1117 goto WCharLiteralError; 1118 1119 return ((wchar_t)C1 << 8) | (wchar_t)C2; 1120 1121 WCharLiteralError: 1122 Error = true; 1123 return L'\0'; 1124 } 1125 1126 static void writeHexDigit(char *Buffer, uint8_t Digit) { 1127 assert(Digit <= 15); 1128 *Buffer = (Digit < 10) ? ('0' + Digit) : ('A' + Digit - 10); 1129 } 1130 1131 static void outputHex(OutputStream &OS, unsigned C) { 1132 if (C == 0) { 1133 OS << "\\x00"; 1134 return; 1135 } 1136 // It's easier to do the math if we can work from right to left, but we need 1137 // to print the numbers from left to right. So render this into a temporary 1138 // buffer first, then output the temporary buffer. Each byte is of the form 1139 // \xAB, which means that each byte needs 4 characters. Since there are at 1140 // most 4 bytes, we need a 4*4+1 = 17 character temporary buffer. 1141 char TempBuffer[17]; 1142 1143 ::memset(TempBuffer, 0, sizeof(TempBuffer)); 1144 constexpr int MaxPos = 15; 1145 1146 int Pos = MaxPos - 1; 1147 while (C != 0) { 1148 for (int I = 0; I < 2; ++I) { 1149 writeHexDigit(&TempBuffer[Pos--], C % 16); 1150 C /= 16; 1151 } 1152 TempBuffer[Pos--] = 'x'; 1153 TempBuffer[Pos--] = '\\'; 1154 assert(Pos >= 0); 1155 } 1156 OS << StringView(&TempBuffer[Pos + 1]); 1157 } 1158 1159 static void outputEscapedChar(OutputStream &OS, unsigned C) { 1160 switch (C) { 1161 case '\'': // single quote 1162 OS << "\\\'"; 1163 return; 1164 case '\"': // double quote 1165 OS << "\\\""; 1166 return; 1167 case '\\': // backslash 1168 OS << "\\\\"; 1169 return; 1170 case '\a': // bell 1171 OS << "\\a"; 1172 return; 1173 case '\b': // backspace 1174 OS << "\\b"; 1175 return; 1176 case '\f': // form feed 1177 OS << "\\f"; 1178 return; 1179 case '\n': // new line 1180 OS << "\\n"; 1181 return; 1182 case '\r': // carriage return 1183 OS << "\\r"; 1184 return; 1185 case '\t': // tab 1186 OS << "\\t"; 1187 return; 1188 case '\v': // vertical tab 1189 OS << "\\v"; 1190 return; 1191 default: 1192 break; 1193 } 1194 1195 if (C > 0x1F && C < 0x7F) { 1196 // Standard ascii char. 1197 OS << (char)C; 1198 return; 1199 } 1200 1201 outputHex(OS, C); 1202 } 1203 1204 unsigned countTrailingNullBytes(const uint8_t *StringBytes, int Length) { 1205 const uint8_t *End = StringBytes + Length - 1; 1206 unsigned Count = 0; 1207 while (Length > 0 && *End == 0) { 1208 --Length; 1209 --End; 1210 ++Count; 1211 } 1212 return Count; 1213 } 1214 1215 unsigned countEmbeddedNulls(const uint8_t *StringBytes, unsigned Length) { 1216 unsigned Result = 0; 1217 for (unsigned I = 0; I < Length; ++I) { 1218 if (*StringBytes++ == 0) 1219 ++Result; 1220 } 1221 return Result; 1222 } 1223 1224 unsigned guessCharByteSize(const uint8_t *StringBytes, unsigned NumChars, 1225 unsigned NumBytes) { 1226 assert(NumBytes > 0); 1227 1228 // If the number of bytes is odd, this is guaranteed to be a char string. 1229 if (NumBytes % 2 == 1) 1230 return 1; 1231 1232 // All strings can encode at most 32 bytes of data. If it's less than that, 1233 // then we encoded the entire string. In this case we check for a 1-byte, 1234 // 2-byte, or 4-byte null terminator. 1235 if (NumBytes < 32) { 1236 unsigned TrailingNulls = countTrailingNullBytes(StringBytes, NumChars); 1237 if (TrailingNulls >= 4) 1238 return 4; 1239 if (TrailingNulls >= 2) 1240 return 2; 1241 return 1; 1242 } 1243 1244 // The whole string was not able to be encoded. Try to look at embedded null 1245 // terminators to guess. The heuristic is that we count all embedded null 1246 // terminators. If more than 2/3 are null, it's a char32. If more than 1/3 1247 // are null, it's a char16. Otherwise it's a char8. This obviously isn't 1248 // perfect and is biased towards languages that have ascii alphabets, but this 1249 // was always going to be best effort since the encoding is lossy. 1250 unsigned Nulls = countEmbeddedNulls(StringBytes, NumChars); 1251 if (Nulls >= 2 * NumChars / 3) 1252 return 4; 1253 if (Nulls >= NumChars / 3) 1254 return 2; 1255 return 1; 1256 } 1257 1258 static unsigned decodeMultiByteChar(const uint8_t *StringBytes, 1259 unsigned CharIndex, unsigned CharBytes) { 1260 assert(CharBytes == 1 || CharBytes == 2 || CharBytes == 4); 1261 unsigned Offset = CharIndex * CharBytes; 1262 unsigned Result = 0; 1263 StringBytes = StringBytes + Offset; 1264 for (unsigned I = 0; I < CharBytes; ++I) { 1265 unsigned C = static_cast<unsigned>(StringBytes[I]); 1266 Result |= C << (8 * I); 1267 } 1268 return Result; 1269 } 1270 1271 FunctionSymbolNode *Demangler::demangleVcallThunkNode(StringView &MangledName) { 1272 FunctionSymbolNode *FSN = Arena.alloc<FunctionSymbolNode>(); 1273 VcallThunkIdentifierNode *VTIN = Arena.alloc<VcallThunkIdentifierNode>(); 1274 FSN->Signature = Arena.alloc<ThunkSignatureNode>(); 1275 FSN->Signature->FunctionClass = FC_NoParameterList; 1276 1277 FSN->Name = demangleNameScopeChain(MangledName, VTIN); 1278 if (!Error) 1279 Error = !MangledName.consumeFront("$B"); 1280 if (!Error) 1281 VTIN->OffsetInVTable = demangleUnsigned(MangledName); 1282 if (!Error) 1283 Error = !MangledName.consumeFront('A'); 1284 if (!Error) 1285 FSN->Signature->CallConvention = demangleCallingConvention(MangledName); 1286 return (Error) ? nullptr : FSN; 1287 } 1288 1289 EncodedStringLiteralNode * 1290 Demangler::demangleStringLiteral(StringView &MangledName) { 1291 // This function uses goto, so declare all variables up front. 1292 OutputStream OS; 1293 StringView CRC; 1294 uint64_t StringByteSize; 1295 bool IsWcharT = false; 1296 bool IsNegative = false; 1297 size_t CrcEndPos = 0; 1298 char *ResultBuffer = nullptr; 1299 1300 EncodedStringLiteralNode *Result = Arena.alloc<EncodedStringLiteralNode>(); 1301 1302 // Prefix indicating the beginning of a string literal 1303 if (!MangledName.consumeFront("@_")) 1304 goto StringLiteralError; 1305 if (MangledName.empty()) 1306 goto StringLiteralError; 1307 1308 // Char Type (regular or wchar_t) 1309 switch (MangledName.popFront()) { 1310 case '1': 1311 IsWcharT = true; 1312 LLVM_FALLTHROUGH; 1313 case '0': 1314 break; 1315 default: 1316 goto StringLiteralError; 1317 } 1318 1319 // Encoded Length 1320 std::tie(StringByteSize, IsNegative) = demangleNumber(MangledName); 1321 if (Error || IsNegative) 1322 goto StringLiteralError; 1323 1324 // CRC 32 (always 8 characters plus a terminator) 1325 CrcEndPos = MangledName.find('@'); 1326 if (CrcEndPos == StringView::npos) 1327 goto StringLiteralError; 1328 CRC = MangledName.substr(0, CrcEndPos); 1329 MangledName = MangledName.dropFront(CrcEndPos + 1); 1330 if (MangledName.empty()) 1331 goto StringLiteralError; 1332 1333 OS = OutputStream::create(nullptr, nullptr, 1024); 1334 if (IsWcharT) { 1335 Result->Char = CharKind::Wchar; 1336 if (StringByteSize > 64) 1337 Result->IsTruncated = true; 1338 1339 while (!MangledName.consumeFront('@')) { 1340 assert(StringByteSize >= 2); 1341 wchar_t W = demangleWcharLiteral(MangledName); 1342 if (StringByteSize != 2 || Result->IsTruncated) 1343 outputEscapedChar(OS, W); 1344 StringByteSize -= 2; 1345 if (Error) 1346 goto StringLiteralError; 1347 } 1348 } else { 1349 // The max byte length is actually 32, but some compilers mangled strings 1350 // incorrectly, so we have to assume it can go higher. 1351 constexpr unsigned MaxStringByteLength = 32 * 4; 1352 uint8_t StringBytes[MaxStringByteLength]; 1353 1354 unsigned BytesDecoded = 0; 1355 while (!MangledName.consumeFront('@')) { 1356 assert(StringByteSize >= 1); 1357 StringBytes[BytesDecoded++] = demangleCharLiteral(MangledName); 1358 } 1359 1360 if (StringByteSize > BytesDecoded) 1361 Result->IsTruncated = true; 1362 1363 unsigned CharBytes = 1364 guessCharByteSize(StringBytes, BytesDecoded, StringByteSize); 1365 assert(StringByteSize % CharBytes == 0); 1366 switch (CharBytes) { 1367 case 1: 1368 Result->Char = CharKind::Char; 1369 break; 1370 case 2: 1371 Result->Char = CharKind::Char16; 1372 break; 1373 case 4: 1374 Result->Char = CharKind::Char32; 1375 break; 1376 default: 1377 LLVM_BUILTIN_UNREACHABLE; 1378 } 1379 const unsigned NumChars = BytesDecoded / CharBytes; 1380 for (unsigned CharIndex = 0; CharIndex < NumChars; ++CharIndex) { 1381 unsigned NextChar = 1382 decodeMultiByteChar(StringBytes, CharIndex, CharBytes); 1383 if (CharIndex + 1 < NumChars || Result->IsTruncated) 1384 outputEscapedChar(OS, NextChar); 1385 } 1386 } 1387 1388 OS << '\0'; 1389 ResultBuffer = OS.getBuffer(); 1390 Result->DecodedString = copyString(ResultBuffer); 1391 std::free(ResultBuffer); 1392 return Result; 1393 1394 StringLiteralError: 1395 Error = true; 1396 return nullptr; 1397 } 1398 1399 StringView Demangler::demangleSimpleString(StringView &MangledName, 1400 bool Memorize) { 1401 StringView S; 1402 for (size_t i = 0; i < MangledName.size(); ++i) { 1403 if (MangledName[i] != '@') 1404 continue; 1405 S = MangledName.substr(0, i); 1406 MangledName = MangledName.dropFront(i + 1); 1407 1408 if (Memorize) 1409 memorizeString(S); 1410 return S; 1411 } 1412 1413 Error = true; 1414 return {}; 1415 } 1416 1417 NamedIdentifierNode * 1418 Demangler::demangleAnonymousNamespaceName(StringView &MangledName) { 1419 assert(MangledName.startsWith("?A")); 1420 MangledName.consumeFront("?A"); 1421 1422 NamedIdentifierNode *Node = Arena.alloc<NamedIdentifierNode>(); 1423 Node->Name = "`anonymous namespace'"; 1424 size_t EndPos = MangledName.find('@'); 1425 if (EndPos == StringView::npos) { 1426 Error = true; 1427 return nullptr; 1428 } 1429 StringView NamespaceKey = MangledName.substr(0, EndPos); 1430 memorizeString(NamespaceKey); 1431 MangledName = MangledName.substr(EndPos + 1); 1432 return Node; 1433 } 1434 1435 NamedIdentifierNode * 1436 Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) { 1437 assert(startsWithLocalScopePattern(MangledName)); 1438 1439 NamedIdentifierNode *Identifier = Arena.alloc<NamedIdentifierNode>(); 1440 MangledName.consumeFront('?'); 1441 auto Number = demangleNumber(MangledName); 1442 assert(!Number.second); 1443 1444 // One ? to terminate the number 1445 MangledName.consumeFront('?'); 1446 1447 assert(!Error); 1448 Node *Scope = parse(MangledName); 1449 if (Error) 1450 return nullptr; 1451 1452 // Render the parent symbol's name into a buffer. 1453 OutputStream OS = OutputStream::create(nullptr, nullptr, 1024); 1454 OS << '`'; 1455 Scope->output(OS, OF_Default); 1456 OS << '\''; 1457 OS << "::`" << Number.first << "'"; 1458 OS << '\0'; 1459 char *Result = OS.getBuffer(); 1460 Identifier->Name = copyString(Result); 1461 std::free(Result); 1462 return Identifier; 1463 } 1464 1465 // Parses a type name in the form of A@B@C@@ which represents C::B::A. 1466 QualifiedNameNode * 1467 Demangler::demangleFullyQualifiedTypeName(StringView &MangledName) { 1468 IdentifierNode *Identifier = demangleUnqualifiedTypeName(MangledName, true); 1469 if (Error) 1470 return nullptr; 1471 assert(Identifier); 1472 1473 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier); 1474 if (Error) 1475 return nullptr; 1476 assert(QN); 1477 return QN; 1478 } 1479 1480 // Parses a symbol name in the form of A@B@C@@ which represents C::B::A. 1481 // Symbol names have slightly different rules regarding what can appear 1482 // so we separate out the implementations for flexibility. 1483 QualifiedNameNode * 1484 Demangler::demangleFullyQualifiedSymbolName(StringView &MangledName) { 1485 // This is the final component of a symbol name (i.e. the leftmost component 1486 // of a mangled name. Since the only possible template instantiation that 1487 // can appear in this context is a function template, and since those are 1488 // not saved for the purposes of name backreferences, only backref simple 1489 // names. 1490 IdentifierNode *Identifier = 1491 demangleUnqualifiedSymbolName(MangledName, NBB_Simple); 1492 if (Error) 1493 return nullptr; 1494 1495 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier); 1496 if (Error) 1497 return nullptr; 1498 1499 if (Identifier->kind() == NodeKind::StructorIdentifier) { 1500 StructorIdentifierNode *SIN = 1501 static_cast<StructorIdentifierNode *>(Identifier); 1502 assert(QN->Components->Count >= 2); 1503 Node *ClassNode = QN->Components->Nodes[QN->Components->Count - 2]; 1504 SIN->Class = static_cast<IdentifierNode *>(ClassNode); 1505 } 1506 assert(QN); 1507 return QN; 1508 } 1509 1510 IdentifierNode *Demangler::demangleUnqualifiedTypeName(StringView &MangledName, 1511 bool Memorize) { 1512 // An inner-most name can be a back-reference, because a fully-qualified name 1513 // (e.g. Scope + Inner) can contain other fully qualified names inside of 1514 // them (for example template parameters), and these nested parameters can 1515 // refer to previously mangled types. 1516 if (startsWithDigit(MangledName)) 1517 return demangleBackRefName(MangledName); 1518 1519 if (MangledName.startsWith("?$")) 1520 return demangleTemplateInstantiationName(MangledName, NBB_Template); 1521 1522 return demangleSimpleName(MangledName, Memorize); 1523 } 1524 1525 IdentifierNode * 1526 Demangler::demangleUnqualifiedSymbolName(StringView &MangledName, 1527 NameBackrefBehavior NBB) { 1528 if (startsWithDigit(MangledName)) 1529 return demangleBackRefName(MangledName); 1530 if (MangledName.startsWith("?$")) 1531 return demangleTemplateInstantiationName(MangledName, NBB); 1532 if (MangledName.startsWith('?')) 1533 return demangleFunctionIdentifierCode(MangledName); 1534 return demangleSimpleName(MangledName, (NBB & NBB_Simple) != 0); 1535 } 1536 1537 IdentifierNode *Demangler::demangleNameScopePiece(StringView &MangledName) { 1538 if (startsWithDigit(MangledName)) 1539 return demangleBackRefName(MangledName); 1540 1541 if (MangledName.startsWith("?$")) 1542 return demangleTemplateInstantiationName(MangledName, NBB_Template); 1543 1544 if (MangledName.startsWith("?A")) 1545 return demangleAnonymousNamespaceName(MangledName); 1546 1547 if (startsWithLocalScopePattern(MangledName)) 1548 return demangleLocallyScopedNamePiece(MangledName); 1549 1550 return demangleSimpleName(MangledName, true); 1551 } 1552 1553 static NodeArrayNode *nodeListToNodeArray(ArenaAllocator &Arena, NodeList *Head, 1554 size_t Count) { 1555 NodeArrayNode *N = Arena.alloc<NodeArrayNode>(); 1556 N->Count = Count; 1557 N->Nodes = Arena.allocArray<Node *>(Count); 1558 for (size_t I = 0; I < Count; ++I) { 1559 N->Nodes[I] = Head->N; 1560 Head = Head->Next; 1561 } 1562 return N; 1563 } 1564 1565 QualifiedNameNode * 1566 Demangler::demangleNameScopeChain(StringView &MangledName, 1567 IdentifierNode *UnqualifiedName) { 1568 NodeList *Head = Arena.alloc<NodeList>(); 1569 1570 Head->N = UnqualifiedName; 1571 1572 size_t Count = 1; 1573 while (!MangledName.consumeFront("@")) { 1574 ++Count; 1575 NodeList *NewHead = Arena.alloc<NodeList>(); 1576 NewHead->Next = Head; 1577 Head = NewHead; 1578 1579 if (MangledName.empty()) { 1580 Error = true; 1581 return nullptr; 1582 } 1583 1584 assert(!Error); 1585 IdentifierNode *Elem = demangleNameScopePiece(MangledName); 1586 if (Error) 1587 return nullptr; 1588 1589 Head->N = Elem; 1590 } 1591 1592 QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>(); 1593 QN->Components = nodeListToNodeArray(Arena, Head, Count); 1594 return QN; 1595 } 1596 1597 FuncClass Demangler::demangleFunctionClass(StringView &MangledName) { 1598 switch (MangledName.popFront()) { 1599 case '9': 1600 return FuncClass(FC_ExternC | FC_NoParameterList); 1601 case 'A': 1602 return FC_Private; 1603 case 'B': 1604 return FuncClass(FC_Private | FC_Far); 1605 case 'C': 1606 return FuncClass(FC_Private | FC_Static); 1607 case 'D': 1608 return FuncClass(FC_Private | FC_Static); 1609 case 'E': 1610 return FuncClass(FC_Private | FC_Virtual); 1611 case 'F': 1612 return FuncClass(FC_Private | FC_Virtual); 1613 case 'G': 1614 return FuncClass(FC_Private | FC_StaticThisAdjust); 1615 case 'H': 1616 return FuncClass(FC_Private | FC_StaticThisAdjust | FC_Far); 1617 case 'I': 1618 return FuncClass(FC_Protected); 1619 case 'J': 1620 return FuncClass(FC_Protected | FC_Far); 1621 case 'K': 1622 return FuncClass(FC_Protected | FC_Static); 1623 case 'L': 1624 return FuncClass(FC_Protected | FC_Static | FC_Far); 1625 case 'M': 1626 return FuncClass(FC_Protected | FC_Virtual); 1627 case 'N': 1628 return FuncClass(FC_Protected | FC_Virtual | FC_Far); 1629 case 'O': 1630 return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust); 1631 case 'P': 1632 return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust | FC_Far); 1633 case 'Q': 1634 return FuncClass(FC_Public); 1635 case 'R': 1636 return FuncClass(FC_Public | FC_Far); 1637 case 'S': 1638 return FuncClass(FC_Public | FC_Static); 1639 case 'T': 1640 return FuncClass(FC_Public | FC_Static | FC_Far); 1641 case 'U': 1642 return FuncClass(FC_Public | FC_Virtual); 1643 case 'V': 1644 return FuncClass(FC_Public | FC_Virtual | FC_Far); 1645 case 'W': 1646 return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust); 1647 case 'X': 1648 return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust | FC_Far); 1649 case 'Y': 1650 return FuncClass(FC_Global); 1651 case 'Z': 1652 return FuncClass(FC_Global | FC_Far); 1653 case '$': { 1654 FuncClass VFlag = FC_VirtualThisAdjust; 1655 if (MangledName.consumeFront('R')) 1656 VFlag = FuncClass(VFlag | FC_VirtualThisAdjustEx); 1657 1658 switch (MangledName.popFront()) { 1659 case '0': 1660 return FuncClass(FC_Private | FC_Virtual | VFlag); 1661 case '1': 1662 return FuncClass(FC_Private | FC_Virtual | VFlag | FC_Far); 1663 case '2': 1664 return FuncClass(FC_Protected | FC_Virtual | VFlag); 1665 case '3': 1666 return FuncClass(FC_Protected | FC_Virtual | VFlag | FC_Far); 1667 case '4': 1668 return FuncClass(FC_Public | FC_Virtual | VFlag); 1669 case '5': 1670 return FuncClass(FC_Public | FC_Virtual | VFlag | FC_Far); 1671 } 1672 } 1673 } 1674 1675 Error = true; 1676 return FC_Public; 1677 } 1678 1679 CallingConv Demangler::demangleCallingConvention(StringView &MangledName) { 1680 switch (MangledName.popFront()) { 1681 case 'A': 1682 case 'B': 1683 return CallingConv::Cdecl; 1684 case 'C': 1685 case 'D': 1686 return CallingConv::Pascal; 1687 case 'E': 1688 case 'F': 1689 return CallingConv::Thiscall; 1690 case 'G': 1691 case 'H': 1692 return CallingConv::Stdcall; 1693 case 'I': 1694 case 'J': 1695 return CallingConv::Fastcall; 1696 case 'M': 1697 case 'N': 1698 return CallingConv::Clrcall; 1699 case 'O': 1700 case 'P': 1701 return CallingConv::Eabi; 1702 case 'Q': 1703 return CallingConv::Vectorcall; 1704 } 1705 1706 return CallingConv::None; 1707 } 1708 1709 StorageClass Demangler::demangleVariableStorageClass(StringView &MangledName) { 1710 assert(std::isdigit(MangledName.front())); 1711 1712 switch (MangledName.popFront()) { 1713 case '0': 1714 return StorageClass::PrivateStatic; 1715 case '1': 1716 return StorageClass::ProtectedStatic; 1717 case '2': 1718 return StorageClass::PublicStatic; 1719 case '3': 1720 return StorageClass::Global; 1721 case '4': 1722 return StorageClass::FunctionLocalStatic; 1723 } 1724 Error = true; 1725 return StorageClass::None; 1726 } 1727 1728 std::pair<Qualifiers, bool> 1729 Demangler::demangleQualifiers(StringView &MangledName) { 1730 1731 switch (MangledName.popFront()) { 1732 // Member qualifiers 1733 case 'Q': 1734 return std::make_pair(Q_None, true); 1735 case 'R': 1736 return std::make_pair(Q_Const, true); 1737 case 'S': 1738 return std::make_pair(Q_Volatile, true); 1739 case 'T': 1740 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), true); 1741 // Non-Member qualifiers 1742 case 'A': 1743 return std::make_pair(Q_None, false); 1744 case 'B': 1745 return std::make_pair(Q_Const, false); 1746 case 'C': 1747 return std::make_pair(Q_Volatile, false); 1748 case 'D': 1749 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), false); 1750 } 1751 Error = true; 1752 return std::make_pair(Q_None, false); 1753 } 1754 1755 // <variable-type> ::= <type> <cvr-qualifiers> 1756 // ::= <type> <pointee-cvr-qualifiers> # pointers, references 1757 TypeNode *Demangler::demangleType(StringView &MangledName, 1758 QualifierMangleMode QMM) { 1759 Qualifiers Quals = Q_None; 1760 bool IsMember = false; 1761 if (QMM == QualifierMangleMode::Mangle) { 1762 std::tie(Quals, IsMember) = demangleQualifiers(MangledName); 1763 } else if (QMM == QualifierMangleMode::Result) { 1764 if (MangledName.consumeFront('?')) 1765 std::tie(Quals, IsMember) = demangleQualifiers(MangledName); 1766 } 1767 1768 TypeNode *Ty = nullptr; 1769 if (isTagType(MangledName)) 1770 Ty = demangleClassType(MangledName); 1771 else if (isPointerType(MangledName)) { 1772 if (isMemberPointer(MangledName)) 1773 Ty = demangleMemberPointerType(MangledName); 1774 else 1775 Ty = demanglePointerType(MangledName); 1776 } else if (isArrayType(MangledName)) 1777 Ty = demangleArrayType(MangledName); 1778 else if (isFunctionType(MangledName)) { 1779 if (MangledName.consumeFront("$$A8@@")) 1780 Ty = demangleFunctionType(MangledName, true); 1781 else { 1782 assert(MangledName.startsWith("$$A6")); 1783 MangledName.consumeFront("$$A6"); 1784 Ty = demangleFunctionType(MangledName, false); 1785 } 1786 } else if (isCustomType(MangledName)) { 1787 Ty = demangleCustomType(MangledName); 1788 } else { 1789 Ty = demanglePrimitiveType(MangledName); 1790 if (!Ty || Error) 1791 return Ty; 1792 } 1793 1794 Ty->Quals = Qualifiers(Ty->Quals | Quals); 1795 return Ty; 1796 } 1797 1798 void Demangler::demangleThrowSpecification(StringView &MangledName) { 1799 if (MangledName.consumeFront('Z')) 1800 return; 1801 1802 Error = true; 1803 } 1804 1805 FunctionSignatureNode *Demangler::demangleFunctionType(StringView &MangledName, 1806 bool HasThisQuals) { 1807 FunctionSignatureNode *FTy = Arena.alloc<FunctionSignatureNode>(); 1808 1809 if (HasThisQuals) { 1810 FTy->Quals = demanglePointerExtQualifiers(MangledName); 1811 FTy->RefQualifier = demangleFunctionRefQualifier(MangledName); 1812 FTy->Quals = Qualifiers(FTy->Quals | demangleQualifiers(MangledName).first); 1813 } 1814 1815 // Fields that appear on both member and non-member functions. 1816 FTy->CallConvention = demangleCallingConvention(MangledName); 1817 1818 // <return-type> ::= <type> 1819 // ::= @ # structors (they have no declared return type) 1820 bool IsStructor = MangledName.consumeFront('@'); 1821 if (!IsStructor) 1822 FTy->ReturnType = demangleType(MangledName, QualifierMangleMode::Result); 1823 1824 FTy->Params = demangleFunctionParameterList(MangledName); 1825 1826 demangleThrowSpecification(MangledName); 1827 1828 return FTy; 1829 } 1830 1831 FunctionSymbolNode * 1832 Demangler::demangleFunctionEncoding(StringView &MangledName) { 1833 FuncClass ExtraFlags = FC_None; 1834 if (MangledName.consumeFront("$$J0")) 1835 ExtraFlags = FC_ExternC; 1836 1837 FuncClass FC = demangleFunctionClass(MangledName); 1838 FC = FuncClass(ExtraFlags | FC); 1839 1840 FunctionSignatureNode *FSN = nullptr; 1841 ThunkSignatureNode *TTN = nullptr; 1842 if (FC & FC_StaticThisAdjust) { 1843 TTN = Arena.alloc<ThunkSignatureNode>(); 1844 TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName); 1845 } else if (FC & FC_VirtualThisAdjust) { 1846 TTN = Arena.alloc<ThunkSignatureNode>(); 1847 if (FC & FC_VirtualThisAdjustEx) { 1848 TTN->ThisAdjust.VBPtrOffset = demangleSigned(MangledName); 1849 TTN->ThisAdjust.VBOffsetOffset = demangleSigned(MangledName); 1850 } 1851 TTN->ThisAdjust.VtordispOffset = demangleSigned(MangledName); 1852 TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName); 1853 } 1854 1855 if (FC & FC_NoParameterList) { 1856 // This is an extern "C" function whose full signature hasn't been mangled. 1857 // This happens when we need to mangle a local symbol inside of an extern 1858 // "C" function. 1859 FSN = Arena.alloc<FunctionSignatureNode>(); 1860 } else { 1861 bool HasThisQuals = !(FC & (FC_Global | FC_Static)); 1862 FSN = demangleFunctionType(MangledName, HasThisQuals); 1863 } 1864 if (TTN) { 1865 *static_cast<FunctionSignatureNode *>(TTN) = *FSN; 1866 FSN = TTN; 1867 } 1868 FSN->FunctionClass = FC; 1869 1870 FunctionSymbolNode *Symbol = Arena.alloc<FunctionSymbolNode>(); 1871 Symbol->Signature = FSN; 1872 return Symbol; 1873 } 1874 1875 CustomTypeNode *Demangler::demangleCustomType(StringView &MangledName) { 1876 assert(MangledName.startsWith('?')); 1877 MangledName.popFront(); 1878 1879 CustomTypeNode *CTN = Arena.alloc<CustomTypeNode>(); 1880 CTN->Identifier = demangleUnqualifiedTypeName(MangledName, true); 1881 if (!MangledName.consumeFront('@')) 1882 Error = true; 1883 if (Error) 1884 return nullptr; 1885 return CTN; 1886 } 1887 1888 // Reads a primitive type. 1889 PrimitiveTypeNode *Demangler::demanglePrimitiveType(StringView &MangledName) { 1890 if (MangledName.consumeFront("$$T")) 1891 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Nullptr); 1892 1893 switch (MangledName.popFront()) { 1894 case 'X': 1895 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Void); 1896 case 'D': 1897 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char); 1898 case 'C': 1899 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Schar); 1900 case 'E': 1901 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uchar); 1902 case 'F': 1903 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Short); 1904 case 'G': 1905 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ushort); 1906 case 'H': 1907 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int); 1908 case 'I': 1909 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint); 1910 case 'J': 1911 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Long); 1912 case 'K': 1913 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ulong); 1914 case 'M': 1915 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Float); 1916 case 'N': 1917 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Double); 1918 case 'O': 1919 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ldouble); 1920 case '_': { 1921 if (MangledName.empty()) { 1922 Error = true; 1923 return nullptr; 1924 } 1925 switch (MangledName.popFront()) { 1926 case 'N': 1927 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Bool); 1928 case 'J': 1929 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int64); 1930 case 'K': 1931 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint64); 1932 case 'W': 1933 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Wchar); 1934 case 'S': 1935 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char16); 1936 case 'U': 1937 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char32); 1938 } 1939 break; 1940 } 1941 } 1942 Error = true; 1943 return nullptr; 1944 } 1945 1946 TagTypeNode *Demangler::demangleClassType(StringView &MangledName) { 1947 TagTypeNode *TT = nullptr; 1948 1949 switch (MangledName.popFront()) { 1950 case 'T': 1951 TT = Arena.alloc<TagTypeNode>(TagKind::Union); 1952 break; 1953 case 'U': 1954 TT = Arena.alloc<TagTypeNode>(TagKind::Struct); 1955 break; 1956 case 'V': 1957 TT = Arena.alloc<TagTypeNode>(TagKind::Class); 1958 break; 1959 case 'W': 1960 if (MangledName.popFront() != '4') { 1961 Error = true; 1962 return nullptr; 1963 } 1964 TT = Arena.alloc<TagTypeNode>(TagKind::Enum); 1965 break; 1966 default: 1967 assert(false); 1968 } 1969 1970 TT->QualifiedName = demangleFullyQualifiedTypeName(MangledName); 1971 return TT; 1972 } 1973 1974 // <pointer-type> ::= E? <pointer-cvr-qualifiers> <ext-qualifiers> <type> 1975 // # the E is required for 64-bit non-static pointers 1976 PointerTypeNode *Demangler::demanglePointerType(StringView &MangledName) { 1977 PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>(); 1978 1979 std::tie(Pointer->Quals, Pointer->Affinity) = 1980 demanglePointerCVQualifiers(MangledName); 1981 1982 if (MangledName.consumeFront("6")) { 1983 Pointer->Pointee = demangleFunctionType(MangledName, false); 1984 return Pointer; 1985 } 1986 1987 Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName); 1988 Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals); 1989 1990 Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Mangle); 1991 return Pointer; 1992 } 1993 1994 PointerTypeNode *Demangler::demangleMemberPointerType(StringView &MangledName) { 1995 PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>(); 1996 1997 std::tie(Pointer->Quals, Pointer->Affinity) = 1998 demanglePointerCVQualifiers(MangledName); 1999 assert(Pointer->Affinity == PointerAffinity::Pointer); 2000 2001 Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName); 2002 Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals); 2003 2004 if (MangledName.consumeFront("8")) { 2005 Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName); 2006 Pointer->Pointee = demangleFunctionType(MangledName, true); 2007 } else { 2008 Qualifiers PointeeQuals = Q_None; 2009 bool IsMember = false; 2010 std::tie(PointeeQuals, IsMember) = demangleQualifiers(MangledName); 2011 assert(IsMember); 2012 Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName); 2013 2014 Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Drop); 2015 Pointer->Pointee->Quals = PointeeQuals; 2016 } 2017 2018 return Pointer; 2019 } 2020 2021 Qualifiers Demangler::demanglePointerExtQualifiers(StringView &MangledName) { 2022 Qualifiers Quals = Q_None; 2023 if (MangledName.consumeFront('E')) 2024 Quals = Qualifiers(Quals | Q_Pointer64); 2025 if (MangledName.consumeFront('I')) 2026 Quals = Qualifiers(Quals | Q_Restrict); 2027 if (MangledName.consumeFront('F')) 2028 Quals = Qualifiers(Quals | Q_Unaligned); 2029 2030 return Quals; 2031 } 2032 2033 ArrayTypeNode *Demangler::demangleArrayType(StringView &MangledName) { 2034 assert(MangledName.front() == 'Y'); 2035 MangledName.popFront(); 2036 2037 uint64_t Rank = 0; 2038 bool IsNegative = false; 2039 std::tie(Rank, IsNegative) = demangleNumber(MangledName); 2040 if (IsNegative || Rank == 0) { 2041 Error = true; 2042 return nullptr; 2043 } 2044 2045 ArrayTypeNode *ATy = Arena.alloc<ArrayTypeNode>(); 2046 NodeList *Head = Arena.alloc<NodeList>(); 2047 NodeList *Tail = Head; 2048 2049 for (uint64_t I = 0; I < Rank; ++I) { 2050 uint64_t D = 0; 2051 std::tie(D, IsNegative) = demangleNumber(MangledName); 2052 if (IsNegative) { 2053 Error = true; 2054 return nullptr; 2055 } 2056 Tail->N = Arena.alloc<IntegerLiteralNode>(D, IsNegative); 2057 if (I + 1 < Rank) { 2058 Tail->Next = Arena.alloc<NodeList>(); 2059 Tail = Tail->Next; 2060 } 2061 } 2062 ATy->Dimensions = nodeListToNodeArray(Arena, Head, Rank); 2063 2064 if (MangledName.consumeFront("$$C")) { 2065 bool IsMember = false; 2066 std::tie(ATy->Quals, IsMember) = demangleQualifiers(MangledName); 2067 if (IsMember) { 2068 Error = true; 2069 return nullptr; 2070 } 2071 } 2072 2073 ATy->ElementType = demangleType(MangledName, QualifierMangleMode::Drop); 2074 return ATy; 2075 } 2076 2077 // Reads a function or a template parameters. 2078 NodeArrayNode * 2079 Demangler::demangleFunctionParameterList(StringView &MangledName) { 2080 // Empty parameter list. 2081 if (MangledName.consumeFront('X')) 2082 return {}; 2083 2084 NodeList *Head = Arena.alloc<NodeList>(); 2085 NodeList **Current = &Head; 2086 size_t Count = 0; 2087 while (!Error && !MangledName.startsWith('@') && 2088 !MangledName.startsWith('Z')) { 2089 ++Count; 2090 2091 if (startsWithDigit(MangledName)) { 2092 size_t N = MangledName[0] - '0'; 2093 if (N >= Backrefs.FunctionParamCount) { 2094 Error = true; 2095 return {}; 2096 } 2097 MangledName = MangledName.dropFront(); 2098 2099 *Current = Arena.alloc<NodeList>(); 2100 (*Current)->N = Backrefs.FunctionParams[N]; 2101 Current = &(*Current)->Next; 2102 continue; 2103 } 2104 2105 size_t OldSize = MangledName.size(); 2106 2107 *Current = Arena.alloc<NodeList>(); 2108 TypeNode *TN = demangleType(MangledName, QualifierMangleMode::Drop); 2109 2110 (*Current)->N = TN; 2111 2112 size_t CharsConsumed = OldSize - MangledName.size(); 2113 assert(CharsConsumed != 0); 2114 2115 // Single-letter types are ignored for backreferences because memorizing 2116 // them doesn't save anything. 2117 if (Backrefs.FunctionParamCount <= 9 && CharsConsumed > 1) 2118 Backrefs.FunctionParams[Backrefs.FunctionParamCount++] = TN; 2119 2120 Current = &(*Current)->Next; 2121 } 2122 2123 if (Error) 2124 return {}; 2125 2126 NodeArrayNode *NA = nodeListToNodeArray(Arena, Head, Count); 2127 // A non-empty parameter list is terminated by either 'Z' (variadic) parameter 2128 // list or '@' (non variadic). Careful not to consume "@Z", as in that case 2129 // the following Z could be a throw specifier. 2130 if (MangledName.consumeFront('@')) 2131 return NA; 2132 2133 if (MangledName.consumeFront('Z')) { 2134 // This is a variadic parameter list. We probably need a variadic node to 2135 // append to the end. 2136 return NA; 2137 } 2138 2139 Error = true; 2140 return {}; 2141 } 2142 2143 NodeArrayNode * 2144 Demangler::demangleTemplateParameterList(StringView &MangledName) { 2145 NodeList *Head; 2146 NodeList **Current = &Head; 2147 size_t Count = 0; 2148 2149 while (!Error && !MangledName.startsWith('@')) { 2150 if (MangledName.consumeFront("$S") || MangledName.consumeFront("$$V") || 2151 MangledName.consumeFront("$$$V")) { 2152 // Empty parameter pack. 2153 continue; 2154 } 2155 2156 ++Count; 2157 2158 // Template parameter lists don't participate in back-referencing. 2159 *Current = Arena.alloc<NodeList>(); 2160 2161 NodeList &TP = **Current; 2162 2163 TemplateParameterReferenceNode *TPRN = nullptr; 2164 if (MangledName.consumeFront("$$Y")) { 2165 // Template alias 2166 TP.N = demangleFullyQualifiedTypeName(MangledName); 2167 } else if (MangledName.consumeFront("$$B")) { 2168 // Array 2169 TP.N = demangleType(MangledName, QualifierMangleMode::Drop); 2170 } else if (MangledName.consumeFront("$$C")) { 2171 // Type has qualifiers. 2172 TP.N = demangleType(MangledName, QualifierMangleMode::Mangle); 2173 } else if (MangledName.startsWith("$1") || MangledName.startsWith("$H") || 2174 MangledName.startsWith("$I") || MangledName.startsWith("$J")) { 2175 // Pointer to member 2176 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>(); 2177 TPRN->IsMemberPointer = true; 2178 2179 MangledName = MangledName.dropFront(); 2180 // 1 - single inheritance <name> 2181 // H - multiple inheritance <name> <number> 2182 // I - virtual inheritance <name> <number> <number> <number> 2183 // J - unspecified inheritance <name> <number> <number> <number> 2184 char InheritanceSpecifier = MangledName.popFront(); 2185 SymbolNode *S = nullptr; 2186 if (MangledName.startsWith('?')) { 2187 S = parse(MangledName); 2188 memorizeIdentifier(S->Name->getUnqualifiedIdentifier()); 2189 } 2190 2191 switch (InheritanceSpecifier) { 2192 case 'J': 2193 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2194 demangleSigned(MangledName); 2195 LLVM_FALLTHROUGH; 2196 case 'I': 2197 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2198 demangleSigned(MangledName); 2199 LLVM_FALLTHROUGH; 2200 case 'H': 2201 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2202 demangleSigned(MangledName); 2203 LLVM_FALLTHROUGH; 2204 case '1': 2205 break; 2206 default: 2207 Error = true; 2208 break; 2209 } 2210 TPRN->Affinity = PointerAffinity::Pointer; 2211 TPRN->Symbol = S; 2212 } else if (MangledName.startsWith("$E?")) { 2213 MangledName.consumeFront("$E"); 2214 // Reference to symbol 2215 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>(); 2216 TPRN->Symbol = parse(MangledName); 2217 TPRN->Affinity = PointerAffinity::Reference; 2218 } else if (MangledName.startsWith("$F") || MangledName.startsWith("$G")) { 2219 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>(); 2220 2221 // Data member pointer. 2222 MangledName = MangledName.dropFront(); 2223 char InheritanceSpecifier = MangledName.popFront(); 2224 2225 switch (InheritanceSpecifier) { 2226 case 'G': 2227 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2228 demangleSigned(MangledName); 2229 LLVM_FALLTHROUGH; 2230 case 'F': 2231 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2232 demangleSigned(MangledName); 2233 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2234 demangleSigned(MangledName); 2235 LLVM_FALLTHROUGH; 2236 case '0': 2237 break; 2238 default: 2239 Error = true; 2240 break; 2241 } 2242 TPRN->IsMemberPointer = true; 2243 2244 } else if (MangledName.consumeFront("$0")) { 2245 // Integral non-type template parameter 2246 bool IsNegative = false; 2247 uint64_t Value = 0; 2248 std::tie(Value, IsNegative) = demangleNumber(MangledName); 2249 2250 TP.N = Arena.alloc<IntegerLiteralNode>(Value, IsNegative); 2251 } else { 2252 TP.N = demangleType(MangledName, QualifierMangleMode::Drop); 2253 } 2254 if (Error) 2255 return nullptr; 2256 2257 Current = &TP.Next; 2258 } 2259 2260 if (Error) 2261 return nullptr; 2262 2263 // Template parameter lists cannot be variadic, so it can only be terminated 2264 // by @. 2265 if (MangledName.consumeFront('@')) 2266 return nodeListToNodeArray(Arena, Head, Count); 2267 Error = true; 2268 return nullptr; 2269 } 2270 2271 void Demangler::dumpBackReferences() { 2272 std::printf("%d function parameter backreferences\n", 2273 (int)Backrefs.FunctionParamCount); 2274 2275 // Create an output stream so we can render each type. 2276 OutputStream OS = OutputStream::create(nullptr, 0, 1024); 2277 for (size_t I = 0; I < Backrefs.FunctionParamCount; ++I) { 2278 OS.setCurrentPosition(0); 2279 2280 TypeNode *T = Backrefs.FunctionParams[I]; 2281 T->output(OS, OF_Default); 2282 2283 std::printf(" [%d] - %.*s\n", (int)I, (int)OS.getCurrentPosition(), 2284 OS.getBuffer()); 2285 } 2286 std::free(OS.getBuffer()); 2287 2288 if (Backrefs.FunctionParamCount > 0) 2289 std::printf("\n"); 2290 std::printf("%d name backreferences\n", (int)Backrefs.NamesCount); 2291 for (size_t I = 0; I < Backrefs.NamesCount; ++I) { 2292 std::printf(" [%d] - %.*s\n", (int)I, (int)Backrefs.Names[I]->Name.size(), 2293 Backrefs.Names[I]->Name.begin()); 2294 } 2295 if (Backrefs.NamesCount > 0) 2296 std::printf("\n"); 2297 } 2298 2299 char *llvm::microsoftDemangle(const char *MangledName, char *Buf, size_t *N, 2300 int *Status, MSDemangleFlags Flags) { 2301 Demangler D; 2302 StringView Name{MangledName}; 2303 SymbolNode *S = D.parse(Name); 2304 2305 if (Flags & MSDF_DumpBackrefs) 2306 D.dumpBackReferences(); 2307 OutputStream OS = OutputStream::create(Buf, N, 1024); 2308 if (D.Error) { 2309 OS << MangledName; 2310 *Status = llvm::demangle_invalid_mangled_name; 2311 } else { 2312 S->output(OS, OF_Default); 2313 *Status = llvm::demangle_success; 2314 } 2315 2316 OS << '\0'; 2317 return OS.getBuffer(); 2318 } 2319