1 //===- MicrosoftDemangle.cpp ----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a demangler for MSVC-style mangled symbols. 10 // 11 // This file has no dependencies on the rest of LLVM so that it can be 12 // easily reused in other programs such as libcxxabi. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/Demangle/MicrosoftDemangle.h" 17 #include "llvm/Demangle/Demangle.h" 18 #include "llvm/Demangle/MicrosoftDemangleNodes.h" 19 20 #include "llvm/Demangle/DemangleConfig.h" 21 #include "llvm/Demangle/StringView.h" 22 #include "llvm/Demangle/Utility.h" 23 24 #include <array> 25 #include <cctype> 26 #include <cstdio> 27 #include <tuple> 28 29 using namespace llvm; 30 using namespace ms_demangle; 31 32 static bool startsWithDigit(StringView S) { 33 return !S.empty() && std::isdigit(S.front()); 34 } 35 36 37 struct NodeList { 38 Node *N = nullptr; 39 NodeList *Next = nullptr; 40 }; 41 42 static bool isMemberPointer(StringView MangledName, bool &Error) { 43 Error = false; 44 switch (MangledName.popFront()) { 45 case '$': 46 // This is probably an rvalue reference (e.g. $$Q), and you cannot have an 47 // rvalue reference to a member. 48 return false; 49 case 'A': 50 // 'A' indicates a reference, and you cannot have a reference to a member 51 // function or member. 52 return false; 53 case 'P': 54 case 'Q': 55 case 'R': 56 case 'S': 57 // These 4 values indicate some kind of pointer, but we still don't know 58 // what. 59 break; 60 default: 61 Error = true; 62 return false; 63 } 64 65 // If it starts with a number, then 6 indicates a non-member function 66 // pointer, and 8 indicates a member function pointer. 67 if (startsWithDigit(MangledName)) { 68 if (MangledName[0] != '6' && MangledName[0] != '8') { 69 Error = true; 70 return false; 71 } 72 return (MangledName[0] == '8'); 73 } 74 75 // Remove ext qualifiers since those can appear on either type and are 76 // therefore not indicative. 77 MangledName.consumeFront('E'); // 64-bit 78 MangledName.consumeFront('I'); // restrict 79 MangledName.consumeFront('F'); // unaligned 80 81 if (MangledName.empty()) { 82 Error = true; 83 return false; 84 } 85 86 // The next value should be either ABCD (non-member) or QRST (member). 87 switch (MangledName.front()) { 88 case 'A': 89 case 'B': 90 case 'C': 91 case 'D': 92 return false; 93 case 'Q': 94 case 'R': 95 case 'S': 96 case 'T': 97 return true; 98 default: 99 Error = true; 100 return false; 101 } 102 } 103 104 static SpecialIntrinsicKind 105 consumeSpecialIntrinsicKind(StringView &MangledName) { 106 if (MangledName.consumeFront("?_7")) 107 return SpecialIntrinsicKind::Vftable; 108 if (MangledName.consumeFront("?_8")) 109 return SpecialIntrinsicKind::Vbtable; 110 if (MangledName.consumeFront("?_9")) 111 return SpecialIntrinsicKind::VcallThunk; 112 if (MangledName.consumeFront("?_A")) 113 return SpecialIntrinsicKind::Typeof; 114 if (MangledName.consumeFront("?_B")) 115 return SpecialIntrinsicKind::LocalStaticGuard; 116 if (MangledName.consumeFront("?_C")) 117 return SpecialIntrinsicKind::StringLiteralSymbol; 118 if (MangledName.consumeFront("?_P")) 119 return SpecialIntrinsicKind::UdtReturning; 120 if (MangledName.consumeFront("?_R0")) 121 return SpecialIntrinsicKind::RttiTypeDescriptor; 122 if (MangledName.consumeFront("?_R1")) 123 return SpecialIntrinsicKind::RttiBaseClassDescriptor; 124 if (MangledName.consumeFront("?_R2")) 125 return SpecialIntrinsicKind::RttiBaseClassArray; 126 if (MangledName.consumeFront("?_R3")) 127 return SpecialIntrinsicKind::RttiClassHierarchyDescriptor; 128 if (MangledName.consumeFront("?_R4")) 129 return SpecialIntrinsicKind::RttiCompleteObjLocator; 130 if (MangledName.consumeFront("?_S")) 131 return SpecialIntrinsicKind::LocalVftable; 132 if (MangledName.consumeFront("?__E")) 133 return SpecialIntrinsicKind::DynamicInitializer; 134 if (MangledName.consumeFront("?__F")) 135 return SpecialIntrinsicKind::DynamicAtexitDestructor; 136 if (MangledName.consumeFront("?__J")) 137 return SpecialIntrinsicKind::LocalStaticThreadGuard; 138 return SpecialIntrinsicKind::None; 139 } 140 141 static bool startsWithLocalScopePattern(StringView S) { 142 if (!S.consumeFront('?')) 143 return false; 144 if (S.size() < 2) 145 return false; 146 147 size_t End = S.find('?'); 148 if (End == StringView::npos) 149 return false; 150 StringView Candidate = S.substr(0, End); 151 if (Candidate.empty()) 152 return false; 153 154 // \?[0-9]\? 155 // ?@? is the discriminator 0. 156 if (Candidate.size() == 1) 157 return Candidate[0] == '@' || (Candidate[0] >= '0' && Candidate[0] <= '9'); 158 159 // If it's not 0-9, then it's an encoded number terminated with an @ 160 if (Candidate.back() != '@') 161 return false; 162 Candidate = Candidate.dropBack(); 163 164 // An encoded number starts with B-P and all subsequent digits are in A-P. 165 // Note that the reason the first digit cannot be A is two fold. First, it 166 // would create an ambiguity with ?A which delimits the beginning of an 167 // anonymous namespace. Second, A represents 0, and you don't start a multi 168 // digit number with a leading 0. Presumably the anonymous namespace 169 // ambiguity is also why single digit encoded numbers use 0-9 rather than A-J. 170 if (Candidate[0] < 'B' || Candidate[0] > 'P') 171 return false; 172 Candidate = Candidate.dropFront(); 173 while (!Candidate.empty()) { 174 if (Candidate[0] < 'A' || Candidate[0] > 'P') 175 return false; 176 Candidate = Candidate.dropFront(); 177 } 178 179 return true; 180 } 181 182 static bool isTagType(StringView S) { 183 switch (S.front()) { 184 case 'T': // union 185 case 'U': // struct 186 case 'V': // class 187 case 'W': // enum 188 return true; 189 } 190 return false; 191 } 192 193 static bool isCustomType(StringView S) { return S[0] == '?'; } 194 195 static bool isPointerType(StringView S) { 196 if (S.startsWith("$$Q")) // foo && 197 return true; 198 199 switch (S.front()) { 200 case 'A': // foo & 201 case 'P': // foo * 202 case 'Q': // foo *const 203 case 'R': // foo *volatile 204 case 'S': // foo *const volatile 205 return true; 206 } 207 return false; 208 } 209 210 static bool isArrayType(StringView S) { return S[0] == 'Y'; } 211 212 static bool isFunctionType(StringView S) { 213 return S.startsWith("$$A8@@") || S.startsWith("$$A6"); 214 } 215 216 static FunctionRefQualifier 217 demangleFunctionRefQualifier(StringView &MangledName) { 218 if (MangledName.consumeFront('G')) 219 return FunctionRefQualifier::Reference; 220 else if (MangledName.consumeFront('H')) 221 return FunctionRefQualifier::RValueReference; 222 return FunctionRefQualifier::None; 223 } 224 225 static std::pair<Qualifiers, PointerAffinity> 226 demanglePointerCVQualifiers(StringView &MangledName) { 227 if (MangledName.consumeFront("$$Q")) 228 return std::make_pair(Q_None, PointerAffinity::RValueReference); 229 230 switch (MangledName.popFront()) { 231 case 'A': 232 return std::make_pair(Q_None, PointerAffinity::Reference); 233 case 'P': 234 return std::make_pair(Q_None, PointerAffinity::Pointer); 235 case 'Q': 236 return std::make_pair(Q_Const, PointerAffinity::Pointer); 237 case 'R': 238 return std::make_pair(Q_Volatile, PointerAffinity::Pointer); 239 case 'S': 240 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), 241 PointerAffinity::Pointer); 242 default: 243 assert(false && "Ty is not a pointer type!"); 244 } 245 return std::make_pair(Q_None, PointerAffinity::Pointer); 246 } 247 248 StringView Demangler::copyString(StringView Borrowed) { 249 char *Stable = Arena.allocUnalignedBuffer(Borrowed.size() + 1); 250 std::strcpy(Stable, Borrowed.begin()); 251 252 return {Stable, Borrowed.size()}; 253 } 254 255 SpecialTableSymbolNode * 256 Demangler::demangleSpecialTableSymbolNode(StringView &MangledName, 257 SpecialIntrinsicKind K) { 258 NamedIdentifierNode *NI = Arena.alloc<NamedIdentifierNode>(); 259 switch (K) { 260 case SpecialIntrinsicKind::Vftable: 261 NI->Name = "`vftable'"; 262 break; 263 case SpecialIntrinsicKind::Vbtable: 264 NI->Name = "`vbtable'"; 265 break; 266 case SpecialIntrinsicKind::LocalVftable: 267 NI->Name = "`local vftable'"; 268 break; 269 case SpecialIntrinsicKind::RttiCompleteObjLocator: 270 NI->Name = "`RTTI Complete Object Locator'"; 271 break; 272 default: 273 DEMANGLE_UNREACHABLE; 274 } 275 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI); 276 SpecialTableSymbolNode *STSN = Arena.alloc<SpecialTableSymbolNode>(); 277 STSN->Name = QN; 278 bool IsMember = false; 279 if (MangledName.empty()) { 280 Error = true; 281 return nullptr; 282 } 283 char Front = MangledName.popFront(); 284 if (Front != '6' && Front != '7') { 285 Error = true; 286 return nullptr; 287 } 288 289 std::tie(STSN->Quals, IsMember) = demangleQualifiers(MangledName); 290 if (!MangledName.consumeFront('@')) 291 STSN->TargetName = demangleFullyQualifiedTypeName(MangledName); 292 return STSN; 293 } 294 295 LocalStaticGuardVariableNode * 296 Demangler::demangleLocalStaticGuard(StringView &MangledName, bool IsThread) { 297 LocalStaticGuardIdentifierNode *LSGI = 298 Arena.alloc<LocalStaticGuardIdentifierNode>(); 299 LSGI->IsThread = IsThread; 300 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, LSGI); 301 LocalStaticGuardVariableNode *LSGVN = 302 Arena.alloc<LocalStaticGuardVariableNode>(); 303 LSGVN->Name = QN; 304 305 if (MangledName.consumeFront("4IA")) 306 LSGVN->IsVisible = false; 307 else if (MangledName.consumeFront("5")) 308 LSGVN->IsVisible = true; 309 else { 310 Error = true; 311 return nullptr; 312 } 313 314 if (!MangledName.empty()) 315 LSGI->ScopeIndex = demangleUnsigned(MangledName); 316 return LSGVN; 317 } 318 319 static NamedIdentifierNode *synthesizeNamedIdentifier(ArenaAllocator &Arena, 320 StringView Name) { 321 NamedIdentifierNode *Id = Arena.alloc<NamedIdentifierNode>(); 322 Id->Name = Name; 323 return Id; 324 } 325 326 static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena, 327 IdentifierNode *Identifier) { 328 QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>(); 329 QN->Components = Arena.alloc<NodeArrayNode>(); 330 QN->Components->Count = 1; 331 QN->Components->Nodes = Arena.allocArray<Node *>(1); 332 QN->Components->Nodes[0] = Identifier; 333 return QN; 334 } 335 336 static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena, 337 StringView Name) { 338 NamedIdentifierNode *Id = synthesizeNamedIdentifier(Arena, Name); 339 return synthesizeQualifiedName(Arena, Id); 340 } 341 342 static VariableSymbolNode *synthesizeVariable(ArenaAllocator &Arena, 343 TypeNode *Type, 344 StringView VariableName) { 345 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 346 VSN->Type = Type; 347 VSN->Name = synthesizeQualifiedName(Arena, VariableName); 348 return VSN; 349 } 350 351 VariableSymbolNode *Demangler::demangleUntypedVariable( 352 ArenaAllocator &Arena, StringView &MangledName, StringView VariableName) { 353 NamedIdentifierNode *NI = synthesizeNamedIdentifier(Arena, VariableName); 354 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI); 355 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 356 VSN->Name = QN; 357 if (MangledName.consumeFront("8")) 358 return VSN; 359 360 Error = true; 361 return nullptr; 362 } 363 364 VariableSymbolNode * 365 Demangler::demangleRttiBaseClassDescriptorNode(ArenaAllocator &Arena, 366 StringView &MangledName) { 367 RttiBaseClassDescriptorNode *RBCDN = 368 Arena.alloc<RttiBaseClassDescriptorNode>(); 369 RBCDN->NVOffset = demangleUnsigned(MangledName); 370 RBCDN->VBPtrOffset = demangleSigned(MangledName); 371 RBCDN->VBTableOffset = demangleUnsigned(MangledName); 372 RBCDN->Flags = demangleUnsigned(MangledName); 373 if (Error) 374 return nullptr; 375 376 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 377 VSN->Name = demangleNameScopeChain(MangledName, RBCDN); 378 MangledName.consumeFront('8'); 379 return VSN; 380 } 381 382 FunctionSymbolNode *Demangler::demangleInitFiniStub(StringView &MangledName, 383 bool IsDestructor) { 384 DynamicStructorIdentifierNode *DSIN = 385 Arena.alloc<DynamicStructorIdentifierNode>(); 386 DSIN->IsDestructor = IsDestructor; 387 388 bool IsKnownStaticDataMember = false; 389 if (MangledName.consumeFront('?')) 390 IsKnownStaticDataMember = true; 391 392 SymbolNode *Symbol = demangleDeclarator(MangledName); 393 if (Error) 394 return nullptr; 395 396 FunctionSymbolNode *FSN = nullptr; 397 398 if (Symbol->kind() == NodeKind::VariableSymbol) { 399 DSIN->Variable = static_cast<VariableSymbolNode *>(Symbol); 400 401 // Older versions of clang mangled this type of symbol incorrectly. They 402 // would omit the leading ? and they would only emit a single @ at the end. 403 // The correct mangling is a leading ? and 2 trailing @ signs. Handle 404 // both cases. 405 int AtCount = IsKnownStaticDataMember ? 2 : 1; 406 for (int I = 0; I < AtCount; ++I) { 407 if (MangledName.consumeFront('@')) 408 continue; 409 Error = true; 410 return nullptr; 411 } 412 413 FSN = demangleFunctionEncoding(MangledName); 414 if (FSN) 415 FSN->Name = synthesizeQualifiedName(Arena, DSIN); 416 } else { 417 if (IsKnownStaticDataMember) { 418 // This was supposed to be a static data member, but we got a function. 419 Error = true; 420 return nullptr; 421 } 422 423 FSN = static_cast<FunctionSymbolNode *>(Symbol); 424 DSIN->Name = Symbol->Name; 425 FSN->Name = synthesizeQualifiedName(Arena, DSIN); 426 } 427 428 return FSN; 429 } 430 431 SymbolNode *Demangler::demangleSpecialIntrinsic(StringView &MangledName) { 432 SpecialIntrinsicKind SIK = consumeSpecialIntrinsicKind(MangledName); 433 if (SIK == SpecialIntrinsicKind::None) 434 return nullptr; 435 436 switch (SIK) { 437 case SpecialIntrinsicKind::StringLiteralSymbol: 438 return demangleStringLiteral(MangledName); 439 case SpecialIntrinsicKind::Vftable: 440 case SpecialIntrinsicKind::Vbtable: 441 case SpecialIntrinsicKind::LocalVftable: 442 case SpecialIntrinsicKind::RttiCompleteObjLocator: 443 return demangleSpecialTableSymbolNode(MangledName, SIK); 444 case SpecialIntrinsicKind::VcallThunk: 445 return demangleVcallThunkNode(MangledName); 446 case SpecialIntrinsicKind::LocalStaticGuard: 447 return demangleLocalStaticGuard(MangledName, /*IsThread=*/false); 448 case SpecialIntrinsicKind::LocalStaticThreadGuard: 449 return demangleLocalStaticGuard(MangledName, /*IsThread=*/true); 450 case SpecialIntrinsicKind::RttiTypeDescriptor: { 451 TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result); 452 if (Error) 453 break; 454 if (!MangledName.consumeFront("@8")) 455 break; 456 if (!MangledName.empty()) 457 break; 458 return synthesizeVariable(Arena, T, "`RTTI Type Descriptor'"); 459 } 460 case SpecialIntrinsicKind::RttiBaseClassArray: 461 return demangleUntypedVariable(Arena, MangledName, 462 "`RTTI Base Class Array'"); 463 case SpecialIntrinsicKind::RttiClassHierarchyDescriptor: 464 return demangleUntypedVariable(Arena, MangledName, 465 "`RTTI Class Hierarchy Descriptor'"); 466 case SpecialIntrinsicKind::RttiBaseClassDescriptor: 467 return demangleRttiBaseClassDescriptorNode(Arena, MangledName); 468 case SpecialIntrinsicKind::DynamicInitializer: 469 return demangleInitFiniStub(MangledName, false); 470 case SpecialIntrinsicKind::DynamicAtexitDestructor: 471 return demangleInitFiniStub(MangledName, true); 472 default: 473 break; 474 } 475 Error = true; 476 return nullptr; 477 } 478 479 IdentifierNode * 480 Demangler::demangleFunctionIdentifierCode(StringView &MangledName) { 481 assert(MangledName.startsWith('?')); 482 MangledName = MangledName.dropFront(); 483 if (MangledName.empty()) { 484 Error = true; 485 return nullptr; 486 } 487 488 if (MangledName.consumeFront("__")) 489 return demangleFunctionIdentifierCode( 490 MangledName, FunctionIdentifierCodeGroup::DoubleUnder); 491 if (MangledName.consumeFront("_")) 492 return demangleFunctionIdentifierCode(MangledName, 493 FunctionIdentifierCodeGroup::Under); 494 return demangleFunctionIdentifierCode(MangledName, 495 FunctionIdentifierCodeGroup::Basic); 496 } 497 498 StructorIdentifierNode * 499 Demangler::demangleStructorIdentifier(StringView &MangledName, 500 bool IsDestructor) { 501 StructorIdentifierNode *N = Arena.alloc<StructorIdentifierNode>(); 502 N->IsDestructor = IsDestructor; 503 return N; 504 } 505 506 ConversionOperatorIdentifierNode * 507 Demangler::demangleConversionOperatorIdentifier(StringView &MangledName) { 508 ConversionOperatorIdentifierNode *N = 509 Arena.alloc<ConversionOperatorIdentifierNode>(); 510 return N; 511 } 512 513 LiteralOperatorIdentifierNode * 514 Demangler::demangleLiteralOperatorIdentifier(StringView &MangledName) { 515 LiteralOperatorIdentifierNode *N = 516 Arena.alloc<LiteralOperatorIdentifierNode>(); 517 N->Name = demangleSimpleString(MangledName, /*Memorize=*/false); 518 return N; 519 } 520 521 IntrinsicFunctionKind 522 Demangler::translateIntrinsicFunctionCode(char CH, 523 FunctionIdentifierCodeGroup Group) { 524 using IFK = IntrinsicFunctionKind; 525 if (!(CH >= '0' && CH <= '9') && !(CH >= 'A' && CH <= 'Z')) { 526 Error = true; 527 return IFK::None; 528 } 529 530 // Not all ? identifiers are intrinsics *functions*. This function only maps 531 // operator codes for the special functions, all others are handled elsewhere, 532 // hence the IFK::None entries in the table. 533 static IFK Basic[36] = { 534 IFK::None, // ?0 # Foo::Foo() 535 IFK::None, // ?1 # Foo::~Foo() 536 IFK::New, // ?2 # operator new 537 IFK::Delete, // ?3 # operator delete 538 IFK::Assign, // ?4 # operator= 539 IFK::RightShift, // ?5 # operator>> 540 IFK::LeftShift, // ?6 # operator<< 541 IFK::LogicalNot, // ?7 # operator! 542 IFK::Equals, // ?8 # operator== 543 IFK::NotEquals, // ?9 # operator!= 544 IFK::ArraySubscript, // ?A # operator[] 545 IFK::None, // ?B # Foo::operator <type>() 546 IFK::Pointer, // ?C # operator-> 547 IFK::Dereference, // ?D # operator* 548 IFK::Increment, // ?E # operator++ 549 IFK::Decrement, // ?F # operator-- 550 IFK::Minus, // ?G # operator- 551 IFK::Plus, // ?H # operator+ 552 IFK::BitwiseAnd, // ?I # operator& 553 IFK::MemberPointer, // ?J # operator->* 554 IFK::Divide, // ?K # operator/ 555 IFK::Modulus, // ?L # operator% 556 IFK::LessThan, // ?M operator< 557 IFK::LessThanEqual, // ?N operator<= 558 IFK::GreaterThan, // ?O operator> 559 IFK::GreaterThanEqual, // ?P operator>= 560 IFK::Comma, // ?Q operator, 561 IFK::Parens, // ?R operator() 562 IFK::BitwiseNot, // ?S operator~ 563 IFK::BitwiseXor, // ?T operator^ 564 IFK::BitwiseOr, // ?U operator| 565 IFK::LogicalAnd, // ?V operator&& 566 IFK::LogicalOr, // ?W operator|| 567 IFK::TimesEqual, // ?X operator*= 568 IFK::PlusEqual, // ?Y operator+= 569 IFK::MinusEqual, // ?Z operator-= 570 }; 571 static IFK Under[36] = { 572 IFK::DivEqual, // ?_0 operator/= 573 IFK::ModEqual, // ?_1 operator%= 574 IFK::RshEqual, // ?_2 operator>>= 575 IFK::LshEqual, // ?_3 operator<<= 576 IFK::BitwiseAndEqual, // ?_4 operator&= 577 IFK::BitwiseOrEqual, // ?_5 operator|= 578 IFK::BitwiseXorEqual, // ?_6 operator^= 579 IFK::None, // ?_7 # vftable 580 IFK::None, // ?_8 # vbtable 581 IFK::None, // ?_9 # vcall 582 IFK::None, // ?_A # typeof 583 IFK::None, // ?_B # local static guard 584 IFK::None, // ?_C # string literal 585 IFK::VbaseDtor, // ?_D # vbase destructor 586 IFK::VecDelDtor, // ?_E # vector deleting destructor 587 IFK::DefaultCtorClosure, // ?_F # default constructor closure 588 IFK::ScalarDelDtor, // ?_G # scalar deleting destructor 589 IFK::VecCtorIter, // ?_H # vector constructor iterator 590 IFK::VecDtorIter, // ?_I # vector destructor iterator 591 IFK::VecVbaseCtorIter, // ?_J # vector vbase constructor iterator 592 IFK::VdispMap, // ?_K # virtual displacement map 593 IFK::EHVecCtorIter, // ?_L # eh vector constructor iterator 594 IFK::EHVecDtorIter, // ?_M # eh vector destructor iterator 595 IFK::EHVecVbaseCtorIter, // ?_N # eh vector vbase constructor iterator 596 IFK::CopyCtorClosure, // ?_O # copy constructor closure 597 IFK::None, // ?_P<name> # udt returning <name> 598 IFK::None, // ?_Q # <unknown> 599 IFK::None, // ?_R0 - ?_R4 # RTTI Codes 600 IFK::None, // ?_S # local vftable 601 IFK::LocalVftableCtorClosure, // ?_T # local vftable constructor closure 602 IFK::ArrayNew, // ?_U operator new[] 603 IFK::ArrayDelete, // ?_V operator delete[] 604 IFK::None, // ?_W <unused> 605 IFK::None, // ?_X <unused> 606 IFK::None, // ?_Y <unused> 607 IFK::None, // ?_Z <unused> 608 }; 609 static IFK DoubleUnder[36] = { 610 IFK::None, // ?__0 <unused> 611 IFK::None, // ?__1 <unused> 612 IFK::None, // ?__2 <unused> 613 IFK::None, // ?__3 <unused> 614 IFK::None, // ?__4 <unused> 615 IFK::None, // ?__5 <unused> 616 IFK::None, // ?__6 <unused> 617 IFK::None, // ?__7 <unused> 618 IFK::None, // ?__8 <unused> 619 IFK::None, // ?__9 <unused> 620 IFK::ManVectorCtorIter, // ?__A managed vector ctor iterator 621 IFK::ManVectorDtorIter, // ?__B managed vector dtor iterator 622 IFK::EHVectorCopyCtorIter, // ?__C EH vector copy ctor iterator 623 IFK::EHVectorVbaseCopyCtorIter, // ?__D EH vector vbase copy ctor iter 624 IFK::None, // ?__E dynamic initializer for `T' 625 IFK::None, // ?__F dynamic atexit destructor for `T' 626 IFK::VectorCopyCtorIter, // ?__G vector copy constructor iter 627 IFK::VectorVbaseCopyCtorIter, // ?__H vector vbase copy ctor iter 628 IFK::ManVectorVbaseCopyCtorIter, // ?__I managed vector vbase copy ctor 629 // iter 630 IFK::None, // ?__J local static thread guard 631 IFK::None, // ?__K operator ""_name 632 IFK::CoAwait, // ?__L operator co_await 633 IFK::Spaceship, // ?__M operator<=> 634 IFK::None, // ?__N <unused> 635 IFK::None, // ?__O <unused> 636 IFK::None, // ?__P <unused> 637 IFK::None, // ?__Q <unused> 638 IFK::None, // ?__R <unused> 639 IFK::None, // ?__S <unused> 640 IFK::None, // ?__T <unused> 641 IFK::None, // ?__U <unused> 642 IFK::None, // ?__V <unused> 643 IFK::None, // ?__W <unused> 644 IFK::None, // ?__X <unused> 645 IFK::None, // ?__Y <unused> 646 IFK::None, // ?__Z <unused> 647 }; 648 649 int Index = (CH >= '0' && CH <= '9') ? (CH - '0') : (CH - 'A' + 10); 650 switch (Group) { 651 case FunctionIdentifierCodeGroup::Basic: 652 return Basic[Index]; 653 case FunctionIdentifierCodeGroup::Under: 654 return Under[Index]; 655 case FunctionIdentifierCodeGroup::DoubleUnder: 656 return DoubleUnder[Index]; 657 } 658 DEMANGLE_UNREACHABLE; 659 } 660 661 IdentifierNode * 662 Demangler::demangleFunctionIdentifierCode(StringView &MangledName, 663 FunctionIdentifierCodeGroup Group) { 664 if (MangledName.empty()) { 665 Error = true; 666 return nullptr; 667 } 668 switch (Group) { 669 case FunctionIdentifierCodeGroup::Basic: 670 switch (char CH = MangledName.popFront()) { 671 case '0': 672 case '1': 673 return demangleStructorIdentifier(MangledName, CH == '1'); 674 case 'B': 675 return demangleConversionOperatorIdentifier(MangledName); 676 default: 677 return Arena.alloc<IntrinsicFunctionIdentifierNode>( 678 translateIntrinsicFunctionCode(CH, Group)); 679 } 680 case FunctionIdentifierCodeGroup::Under: 681 return Arena.alloc<IntrinsicFunctionIdentifierNode>( 682 translateIntrinsicFunctionCode(MangledName.popFront(), Group)); 683 case FunctionIdentifierCodeGroup::DoubleUnder: 684 switch (char CH = MangledName.popFront()) { 685 case 'K': 686 return demangleLiteralOperatorIdentifier(MangledName); 687 default: 688 return Arena.alloc<IntrinsicFunctionIdentifierNode>( 689 translateIntrinsicFunctionCode(CH, Group)); 690 } 691 } 692 693 DEMANGLE_UNREACHABLE; 694 } 695 696 SymbolNode *Demangler::demangleEncodedSymbol(StringView &MangledName, 697 QualifiedNameNode *Name) { 698 if (MangledName.empty()) { 699 Error = true; 700 return nullptr; 701 } 702 703 // Read a variable. 704 switch (MangledName.front()) { 705 case '0': 706 case '1': 707 case '2': 708 case '3': 709 case '4': { 710 StorageClass SC = demangleVariableStorageClass(MangledName); 711 return demangleVariableEncoding(MangledName, SC); 712 } 713 } 714 FunctionSymbolNode *FSN = demangleFunctionEncoding(MangledName); 715 716 IdentifierNode *UQN = Name->getUnqualifiedIdentifier(); 717 if (UQN->kind() == NodeKind::ConversionOperatorIdentifier) { 718 ConversionOperatorIdentifierNode *COIN = 719 static_cast<ConversionOperatorIdentifierNode *>(UQN); 720 if (FSN) 721 COIN->TargetType = FSN->Signature->ReturnType; 722 } 723 return FSN; 724 } 725 726 SymbolNode *Demangler::demangleDeclarator(StringView &MangledName) { 727 // What follows is a main symbol name. This may include namespaces or class 728 // back references. 729 QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName); 730 if (Error) 731 return nullptr; 732 733 SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN); 734 if (Error) 735 return nullptr; 736 Symbol->Name = QN; 737 738 IdentifierNode *UQN = QN->getUnqualifiedIdentifier(); 739 if (UQN->kind() == NodeKind::ConversionOperatorIdentifier) { 740 ConversionOperatorIdentifierNode *COIN = 741 static_cast<ConversionOperatorIdentifierNode *>(UQN); 742 if (!COIN->TargetType) { 743 Error = true; 744 return nullptr; 745 } 746 } 747 return Symbol; 748 } 749 750 SymbolNode *Demangler::demangleMD5Name(StringView &MangledName) { 751 assert(MangledName.startsWith("??@")); 752 // This is an MD5 mangled name. We can't demangle it, just return the 753 // mangled name. 754 // An MD5 mangled name is ??@ followed by 32 characters and a terminating @. 755 size_t MD5Last = MangledName.find('@', strlen("??@")); 756 if (MD5Last == StringView::npos) { 757 Error = true; 758 return nullptr; 759 } 760 const char *Start = MangledName.begin(); 761 MangledName = MangledName.dropFront(MD5Last + 1); 762 763 // There are two additional special cases for MD5 names: 764 // 1. For complete object locators where the object name is long enough 765 // for the object to have an MD5 name, the complete object locator is 766 // called ??@...@??_R4@ (with a trailing "??_R4@" instead of the usual 767 // leading "??_R4". This is handled here. 768 // 2. For catchable types, in versions of MSVC before 2015 (<1900) or after 769 // 2017.2 (>= 1914), the catchable type mangling is _CT??@...@??@...@8 770 // instead of_CT??@...@8 with just one MD5 name. Since we don't yet 771 // demangle catchable types anywhere, this isn't handled for MD5 names 772 // either. 773 MangledName.consumeFront("??_R4@"); 774 775 StringView MD5(Start, MangledName.begin()); 776 SymbolNode *S = Arena.alloc<SymbolNode>(NodeKind::Md5Symbol); 777 S->Name = synthesizeQualifiedName(Arena, MD5); 778 779 return S; 780 } 781 782 // Parser entry point. 783 SymbolNode *Demangler::parse(StringView &MangledName) { 784 if (MangledName.startsWith("??@")) 785 return demangleMD5Name(MangledName); 786 787 // MSVC-style mangled symbols must start with '?'. 788 if (!MangledName.startsWith('?')) { 789 Error = true; 790 return nullptr; 791 } 792 793 MangledName.consumeFront('?'); 794 795 // ?$ is a template instantiation, but all other names that start with ? are 796 // operators / special names. 797 if (SymbolNode *SI = demangleSpecialIntrinsic(MangledName)) 798 return SI; 799 800 return demangleDeclarator(MangledName); 801 } 802 803 TagTypeNode *Demangler::parseTagUniqueName(StringView &MangledName) { 804 if (!MangledName.consumeFront(".?A")) 805 return nullptr; 806 MangledName.consumeFront(".?A"); 807 if (MangledName.empty()) 808 return nullptr; 809 810 return demangleClassType(MangledName); 811 } 812 813 // <type-encoding> ::= <storage-class> <variable-type> 814 // <storage-class> ::= 0 # private static member 815 // ::= 1 # protected static member 816 // ::= 2 # public static member 817 // ::= 3 # global 818 // ::= 4 # static local 819 820 VariableSymbolNode *Demangler::demangleVariableEncoding(StringView &MangledName, 821 StorageClass SC) { 822 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 823 824 VSN->Type = demangleType(MangledName, QualifierMangleMode::Drop); 825 VSN->SC = SC; 826 827 if (Error) 828 return nullptr; 829 830 // <variable-type> ::= <type> <cvr-qualifiers> 831 // ::= <type> <pointee-cvr-qualifiers> # pointers, references 832 switch (VSN->Type->kind()) { 833 case NodeKind::PointerType: { 834 PointerTypeNode *PTN = static_cast<PointerTypeNode *>(VSN->Type); 835 836 Qualifiers ExtraChildQuals = Q_None; 837 PTN->Quals = Qualifiers(VSN->Type->Quals | 838 demanglePointerExtQualifiers(MangledName)); 839 840 bool IsMember = false; 841 std::tie(ExtraChildQuals, IsMember) = demangleQualifiers(MangledName); 842 843 if (PTN->ClassParent) { 844 QualifiedNameNode *BackRefName = 845 demangleFullyQualifiedTypeName(MangledName); 846 (void)BackRefName; 847 } 848 PTN->Pointee->Quals = Qualifiers(PTN->Pointee->Quals | ExtraChildQuals); 849 850 break; 851 } 852 default: 853 VSN->Type->Quals = demangleQualifiers(MangledName).first; 854 break; 855 } 856 857 return VSN; 858 } 859 860 // Sometimes numbers are encoded in mangled symbols. For example, 861 // "int (*x)[20]" is a valid C type (x is a pointer to an array of 862 // length 20), so we need some way to embed numbers as part of symbols. 863 // This function parses it. 864 // 865 // <number> ::= [?] <non-negative integer> 866 // 867 // <non-negative integer> ::= <decimal digit> # when 1 <= Number <= 10 868 // ::= <hex digit>+ @ # when Number == 0 or >= 10 869 // 870 // <hex-digit> ::= [A-P] # A = 0, B = 1, ... 871 std::pair<uint64_t, bool> Demangler::demangleNumber(StringView &MangledName) { 872 bool IsNegative = MangledName.consumeFront('?'); 873 874 if (startsWithDigit(MangledName)) { 875 uint64_t Ret = MangledName[0] - '0' + 1; 876 MangledName = MangledName.dropFront(1); 877 return {Ret, IsNegative}; 878 } 879 880 uint64_t Ret = 0; 881 for (size_t i = 0; i < MangledName.size(); ++i) { 882 char C = MangledName[i]; 883 if (C == '@') { 884 MangledName = MangledName.dropFront(i + 1); 885 return {Ret, IsNegative}; 886 } 887 if ('A' <= C && C <= 'P') { 888 Ret = (Ret << 4) + (C - 'A'); 889 continue; 890 } 891 break; 892 } 893 894 Error = true; 895 return {0ULL, false}; 896 } 897 898 uint64_t Demangler::demangleUnsigned(StringView &MangledName) { 899 bool IsNegative = false; 900 uint64_t Number = 0; 901 std::tie(Number, IsNegative) = demangleNumber(MangledName); 902 if (IsNegative) 903 Error = true; 904 return Number; 905 } 906 907 int64_t Demangler::demangleSigned(StringView &MangledName) { 908 bool IsNegative = false; 909 uint64_t Number = 0; 910 std::tie(Number, IsNegative) = demangleNumber(MangledName); 911 if (Number > INT64_MAX) 912 Error = true; 913 int64_t I = static_cast<int64_t>(Number); 914 return IsNegative ? -I : I; 915 } 916 917 // First 10 strings can be referenced by special BackReferences ?0, ?1, ..., ?9. 918 // Memorize it. 919 void Demangler::memorizeString(StringView S) { 920 if (Backrefs.NamesCount >= BackrefContext::Max) 921 return; 922 for (size_t i = 0; i < Backrefs.NamesCount; ++i) 923 if (S == Backrefs.Names[i]->Name) 924 return; 925 NamedIdentifierNode *N = Arena.alloc<NamedIdentifierNode>(); 926 N->Name = S; 927 Backrefs.Names[Backrefs.NamesCount++] = N; 928 } 929 930 NamedIdentifierNode *Demangler::demangleBackRefName(StringView &MangledName) { 931 assert(startsWithDigit(MangledName)); 932 933 size_t I = MangledName[0] - '0'; 934 if (I >= Backrefs.NamesCount) { 935 Error = true; 936 return nullptr; 937 } 938 939 MangledName = MangledName.dropFront(); 940 return Backrefs.Names[I]; 941 } 942 943 void Demangler::memorizeIdentifier(IdentifierNode *Identifier) { 944 // Render this class template name into a string buffer so that we can 945 // memorize it for the purpose of back-referencing. 946 OutputStream OS; 947 if (!initializeOutputStream(nullptr, nullptr, OS, 1024)) 948 // FIXME: Propagate out-of-memory as an error? 949 std::terminate(); 950 Identifier->output(OS, OF_Default); 951 OS << '\0'; 952 char *Name = OS.getBuffer(); 953 954 StringView Owned = copyString(Name); 955 memorizeString(Owned); 956 std::free(Name); 957 } 958 959 IdentifierNode * 960 Demangler::demangleTemplateInstantiationName(StringView &MangledName, 961 NameBackrefBehavior NBB) { 962 assert(MangledName.startsWith("?$")); 963 MangledName.consumeFront("?$"); 964 965 BackrefContext OuterContext; 966 std::swap(OuterContext, Backrefs); 967 968 IdentifierNode *Identifier = 969 demangleUnqualifiedSymbolName(MangledName, NBB_Simple); 970 if (!Error) 971 Identifier->TemplateParams = demangleTemplateParameterList(MangledName); 972 973 std::swap(OuterContext, Backrefs); 974 if (Error) 975 return nullptr; 976 977 if (NBB & NBB_Template) { 978 // NBB_Template is only set for types and non-leaf names ("a::" in "a::b"). 979 // Structors and conversion operators only makes sense in a leaf name, so 980 // reject them in NBB_Template contexts. 981 if (Identifier->kind() == NodeKind::ConversionOperatorIdentifier || 982 Identifier->kind() == NodeKind::StructorIdentifier) { 983 Error = true; 984 return nullptr; 985 } 986 987 memorizeIdentifier(Identifier); 988 } 989 990 return Identifier; 991 } 992 993 NamedIdentifierNode *Demangler::demangleSimpleName(StringView &MangledName, 994 bool Memorize) { 995 StringView S = demangleSimpleString(MangledName, Memorize); 996 if (Error) 997 return nullptr; 998 999 NamedIdentifierNode *Name = Arena.alloc<NamedIdentifierNode>(); 1000 Name->Name = S; 1001 return Name; 1002 } 1003 1004 static bool isRebasedHexDigit(char C) { return (C >= 'A' && C <= 'P'); } 1005 1006 static uint8_t rebasedHexDigitToNumber(char C) { 1007 assert(isRebasedHexDigit(C)); 1008 return (C <= 'J') ? (C - 'A') : (10 + C - 'K'); 1009 } 1010 1011 uint8_t Demangler::demangleCharLiteral(StringView &MangledName) { 1012 assert(!MangledName.empty()); 1013 if (!MangledName.startsWith('?')) 1014 return MangledName.popFront(); 1015 1016 MangledName = MangledName.dropFront(); 1017 if (MangledName.empty()) 1018 goto CharLiteralError; 1019 1020 if (MangledName.consumeFront('$')) { 1021 // Two hex digits 1022 if (MangledName.size() < 2) 1023 goto CharLiteralError; 1024 StringView Nibbles = MangledName.substr(0, 2); 1025 if (!isRebasedHexDigit(Nibbles[0]) || !isRebasedHexDigit(Nibbles[1])) 1026 goto CharLiteralError; 1027 // Don't append the null terminator. 1028 uint8_t C1 = rebasedHexDigitToNumber(Nibbles[0]); 1029 uint8_t C2 = rebasedHexDigitToNumber(Nibbles[1]); 1030 MangledName = MangledName.dropFront(2); 1031 return (C1 << 4) | C2; 1032 } 1033 1034 if (startsWithDigit(MangledName)) { 1035 const char *Lookup = ",/\\:. \n\t'-"; 1036 char C = Lookup[MangledName[0] - '0']; 1037 MangledName = MangledName.dropFront(); 1038 return C; 1039 } 1040 1041 if (MangledName[0] >= 'a' && MangledName[0] <= 'z') { 1042 char Lookup[26] = {'\xE1', '\xE2', '\xE3', '\xE4', '\xE5', '\xE6', '\xE7', 1043 '\xE8', '\xE9', '\xEA', '\xEB', '\xEC', '\xED', '\xEE', 1044 '\xEF', '\xF0', '\xF1', '\xF2', '\xF3', '\xF4', '\xF5', 1045 '\xF6', '\xF7', '\xF8', '\xF9', '\xFA'}; 1046 char C = Lookup[MangledName[0] - 'a']; 1047 MangledName = MangledName.dropFront(); 1048 return C; 1049 } 1050 1051 if (MangledName[0] >= 'A' && MangledName[0] <= 'Z') { 1052 char Lookup[26] = {'\xC1', '\xC2', '\xC3', '\xC4', '\xC5', '\xC6', '\xC7', 1053 '\xC8', '\xC9', '\xCA', '\xCB', '\xCC', '\xCD', '\xCE', 1054 '\xCF', '\xD0', '\xD1', '\xD2', '\xD3', '\xD4', '\xD5', 1055 '\xD6', '\xD7', '\xD8', '\xD9', '\xDA'}; 1056 char C = Lookup[MangledName[0] - 'A']; 1057 MangledName = MangledName.dropFront(); 1058 return C; 1059 } 1060 1061 CharLiteralError: 1062 Error = true; 1063 return '\0'; 1064 } 1065 1066 wchar_t Demangler::demangleWcharLiteral(StringView &MangledName) { 1067 uint8_t C1, C2; 1068 1069 C1 = demangleCharLiteral(MangledName); 1070 if (Error || MangledName.empty()) 1071 goto WCharLiteralError; 1072 C2 = demangleCharLiteral(MangledName); 1073 if (Error) 1074 goto WCharLiteralError; 1075 1076 return ((wchar_t)C1 << 8) | (wchar_t)C2; 1077 1078 WCharLiteralError: 1079 Error = true; 1080 return L'\0'; 1081 } 1082 1083 static void writeHexDigit(char *Buffer, uint8_t Digit) { 1084 assert(Digit <= 15); 1085 *Buffer = (Digit < 10) ? ('0' + Digit) : ('A' + Digit - 10); 1086 } 1087 1088 static void outputHex(OutputStream &OS, unsigned C) { 1089 if (C == 0) { 1090 OS << "\\x00"; 1091 return; 1092 } 1093 // It's easier to do the math if we can work from right to left, but we need 1094 // to print the numbers from left to right. So render this into a temporary 1095 // buffer first, then output the temporary buffer. Each byte is of the form 1096 // \xAB, which means that each byte needs 4 characters. Since there are at 1097 // most 4 bytes, we need a 4*4+1 = 17 character temporary buffer. 1098 char TempBuffer[17]; 1099 1100 ::memset(TempBuffer, 0, sizeof(TempBuffer)); 1101 constexpr int MaxPos = sizeof(TempBuffer) - 1; 1102 1103 int Pos = MaxPos - 1; // TempBuffer[MaxPos] is the terminating \0. 1104 while (C != 0) { 1105 for (int I = 0; I < 2; ++I) { 1106 writeHexDigit(&TempBuffer[Pos--], C % 16); 1107 C /= 16; 1108 } 1109 } 1110 TempBuffer[Pos--] = 'x'; 1111 assert(Pos >= 0); 1112 TempBuffer[Pos--] = '\\'; 1113 OS << StringView(&TempBuffer[Pos + 1]); 1114 } 1115 1116 static void outputEscapedChar(OutputStream &OS, unsigned C) { 1117 switch (C) { 1118 case '\0': // nul 1119 OS << "\\0"; 1120 return; 1121 case '\'': // single quote 1122 OS << "\\\'"; 1123 return; 1124 case '\"': // double quote 1125 OS << "\\\""; 1126 return; 1127 case '\\': // backslash 1128 OS << "\\\\"; 1129 return; 1130 case '\a': // bell 1131 OS << "\\a"; 1132 return; 1133 case '\b': // backspace 1134 OS << "\\b"; 1135 return; 1136 case '\f': // form feed 1137 OS << "\\f"; 1138 return; 1139 case '\n': // new line 1140 OS << "\\n"; 1141 return; 1142 case '\r': // carriage return 1143 OS << "\\r"; 1144 return; 1145 case '\t': // tab 1146 OS << "\\t"; 1147 return; 1148 case '\v': // vertical tab 1149 OS << "\\v"; 1150 return; 1151 default: 1152 break; 1153 } 1154 1155 if (C > 0x1F && C < 0x7F) { 1156 // Standard ascii char. 1157 OS << (char)C; 1158 return; 1159 } 1160 1161 outputHex(OS, C); 1162 } 1163 1164 static unsigned countTrailingNullBytes(const uint8_t *StringBytes, int Length) { 1165 const uint8_t *End = StringBytes + Length - 1; 1166 unsigned Count = 0; 1167 while (Length > 0 && *End == 0) { 1168 --Length; 1169 --End; 1170 ++Count; 1171 } 1172 return Count; 1173 } 1174 1175 static unsigned countEmbeddedNulls(const uint8_t *StringBytes, 1176 unsigned Length) { 1177 unsigned Result = 0; 1178 for (unsigned I = 0; I < Length; ++I) { 1179 if (*StringBytes++ == 0) 1180 ++Result; 1181 } 1182 return Result; 1183 } 1184 1185 // A mangled (non-wide) string literal stores the total length of the string it 1186 // refers to (passed in NumBytes), and it contains up to 32 bytes of actual text 1187 // (passed in StringBytes, NumChars). 1188 static unsigned guessCharByteSize(const uint8_t *StringBytes, unsigned NumChars, 1189 uint64_t NumBytes) { 1190 assert(NumBytes > 0); 1191 1192 // If the number of bytes is odd, this is guaranteed to be a char string. 1193 if (NumBytes % 2 == 1) 1194 return 1; 1195 1196 // All strings can encode at most 32 bytes of data. If it's less than that, 1197 // then we encoded the entire string. In this case we check for a 1-byte, 1198 // 2-byte, or 4-byte null terminator. 1199 if (NumBytes < 32) { 1200 unsigned TrailingNulls = countTrailingNullBytes(StringBytes, NumChars); 1201 if (TrailingNulls >= 4 && NumBytes % 4 == 0) 1202 return 4; 1203 if (TrailingNulls >= 2) 1204 return 2; 1205 return 1; 1206 } 1207 1208 // The whole string was not able to be encoded. Try to look at embedded null 1209 // terminators to guess. The heuristic is that we count all embedded null 1210 // terminators. If more than 2/3 are null, it's a char32. If more than 1/3 1211 // are null, it's a char16. Otherwise it's a char8. This obviously isn't 1212 // perfect and is biased towards languages that have ascii alphabets, but this 1213 // was always going to be best effort since the encoding is lossy. 1214 unsigned Nulls = countEmbeddedNulls(StringBytes, NumChars); 1215 if (Nulls >= 2 * NumChars / 3 && NumBytes % 4 == 0) 1216 return 4; 1217 if (Nulls >= NumChars / 3) 1218 return 2; 1219 return 1; 1220 } 1221 1222 static unsigned decodeMultiByteChar(const uint8_t *StringBytes, 1223 unsigned CharIndex, unsigned CharBytes) { 1224 assert(CharBytes == 1 || CharBytes == 2 || CharBytes == 4); 1225 unsigned Offset = CharIndex * CharBytes; 1226 unsigned Result = 0; 1227 StringBytes = StringBytes + Offset; 1228 for (unsigned I = 0; I < CharBytes; ++I) { 1229 unsigned C = static_cast<unsigned>(StringBytes[I]); 1230 Result |= C << (8 * I); 1231 } 1232 return Result; 1233 } 1234 1235 FunctionSymbolNode *Demangler::demangleVcallThunkNode(StringView &MangledName) { 1236 FunctionSymbolNode *FSN = Arena.alloc<FunctionSymbolNode>(); 1237 VcallThunkIdentifierNode *VTIN = Arena.alloc<VcallThunkIdentifierNode>(); 1238 FSN->Signature = Arena.alloc<ThunkSignatureNode>(); 1239 FSN->Signature->FunctionClass = FC_NoParameterList; 1240 1241 FSN->Name = demangleNameScopeChain(MangledName, VTIN); 1242 if (!Error) 1243 Error = !MangledName.consumeFront("$B"); 1244 if (!Error) 1245 VTIN->OffsetInVTable = demangleUnsigned(MangledName); 1246 if (!Error) 1247 Error = !MangledName.consumeFront('A'); 1248 if (!Error) 1249 FSN->Signature->CallConvention = demangleCallingConvention(MangledName); 1250 return (Error) ? nullptr : FSN; 1251 } 1252 1253 EncodedStringLiteralNode * 1254 Demangler::demangleStringLiteral(StringView &MangledName) { 1255 // This function uses goto, so declare all variables up front. 1256 OutputStream OS; 1257 StringView CRC; 1258 uint64_t StringByteSize; 1259 bool IsWcharT = false; 1260 bool IsNegative = false; 1261 size_t CrcEndPos = 0; 1262 char *ResultBuffer = nullptr; 1263 1264 EncodedStringLiteralNode *Result = Arena.alloc<EncodedStringLiteralNode>(); 1265 1266 // Must happen before the first `goto StringLiteralError`. 1267 if (!initializeOutputStream(nullptr, nullptr, OS, 1024)) 1268 // FIXME: Propagate out-of-memory as an error? 1269 std::terminate(); 1270 1271 // Prefix indicating the beginning of a string literal 1272 if (!MangledName.consumeFront("@_")) 1273 goto StringLiteralError; 1274 if (MangledName.empty()) 1275 goto StringLiteralError; 1276 1277 // Char Type (regular or wchar_t) 1278 switch (MangledName.popFront()) { 1279 case '1': 1280 IsWcharT = true; 1281 DEMANGLE_FALLTHROUGH; 1282 case '0': 1283 break; 1284 default: 1285 goto StringLiteralError; 1286 } 1287 1288 // Encoded Length 1289 std::tie(StringByteSize, IsNegative) = demangleNumber(MangledName); 1290 if (Error || IsNegative || StringByteSize < (IsWcharT ? 2 : 1)) 1291 goto StringLiteralError; 1292 1293 // CRC 32 (always 8 characters plus a terminator) 1294 CrcEndPos = MangledName.find('@'); 1295 if (CrcEndPos == StringView::npos) 1296 goto StringLiteralError; 1297 CRC = MangledName.substr(0, CrcEndPos); 1298 MangledName = MangledName.dropFront(CrcEndPos + 1); 1299 if (MangledName.empty()) 1300 goto StringLiteralError; 1301 1302 if (IsWcharT) { 1303 Result->Char = CharKind::Wchar; 1304 if (StringByteSize > 64) 1305 Result->IsTruncated = true; 1306 1307 while (!MangledName.consumeFront('@')) { 1308 if (MangledName.size() < 2) 1309 goto StringLiteralError; 1310 wchar_t W = demangleWcharLiteral(MangledName); 1311 if (StringByteSize != 2 || Result->IsTruncated) 1312 outputEscapedChar(OS, W); 1313 StringByteSize -= 2; 1314 if (Error) 1315 goto StringLiteralError; 1316 } 1317 } else { 1318 // The max byte length is actually 32, but some compilers mangled strings 1319 // incorrectly, so we have to assume it can go higher. 1320 constexpr unsigned MaxStringByteLength = 32 * 4; 1321 uint8_t StringBytes[MaxStringByteLength]; 1322 1323 unsigned BytesDecoded = 0; 1324 while (!MangledName.consumeFront('@')) { 1325 if (MangledName.size() < 1 || BytesDecoded >= MaxStringByteLength) 1326 goto StringLiteralError; 1327 StringBytes[BytesDecoded++] = demangleCharLiteral(MangledName); 1328 } 1329 1330 if (StringByteSize > BytesDecoded) 1331 Result->IsTruncated = true; 1332 1333 unsigned CharBytes = 1334 guessCharByteSize(StringBytes, BytesDecoded, StringByteSize); 1335 assert(StringByteSize % CharBytes == 0); 1336 switch (CharBytes) { 1337 case 1: 1338 Result->Char = CharKind::Char; 1339 break; 1340 case 2: 1341 Result->Char = CharKind::Char16; 1342 break; 1343 case 4: 1344 Result->Char = CharKind::Char32; 1345 break; 1346 default: 1347 DEMANGLE_UNREACHABLE; 1348 } 1349 const unsigned NumChars = BytesDecoded / CharBytes; 1350 for (unsigned CharIndex = 0; CharIndex < NumChars; ++CharIndex) { 1351 unsigned NextChar = 1352 decodeMultiByteChar(StringBytes, CharIndex, CharBytes); 1353 if (CharIndex + 1 < NumChars || Result->IsTruncated) 1354 outputEscapedChar(OS, NextChar); 1355 } 1356 } 1357 1358 OS << '\0'; 1359 ResultBuffer = OS.getBuffer(); 1360 Result->DecodedString = copyString(ResultBuffer); 1361 std::free(ResultBuffer); 1362 return Result; 1363 1364 StringLiteralError: 1365 Error = true; 1366 std::free(OS.getBuffer()); 1367 return nullptr; 1368 } 1369 1370 // Returns MangledName's prefix before the first '@', or an error if 1371 // MangledName contains no '@' or the prefix has length 0. 1372 StringView Demangler::demangleSimpleString(StringView &MangledName, 1373 bool Memorize) { 1374 StringView S; 1375 for (size_t i = 0; i < MangledName.size(); ++i) { 1376 if (MangledName[i] != '@') 1377 continue; 1378 if (i == 0) 1379 break; 1380 S = MangledName.substr(0, i); 1381 MangledName = MangledName.dropFront(i + 1); 1382 1383 if (Memorize) 1384 memorizeString(S); 1385 return S; 1386 } 1387 1388 Error = true; 1389 return {}; 1390 } 1391 1392 NamedIdentifierNode * 1393 Demangler::demangleAnonymousNamespaceName(StringView &MangledName) { 1394 assert(MangledName.startsWith("?A")); 1395 MangledName.consumeFront("?A"); 1396 1397 NamedIdentifierNode *Node = Arena.alloc<NamedIdentifierNode>(); 1398 Node->Name = "`anonymous namespace'"; 1399 size_t EndPos = MangledName.find('@'); 1400 if (EndPos == StringView::npos) { 1401 Error = true; 1402 return nullptr; 1403 } 1404 StringView NamespaceKey = MangledName.substr(0, EndPos); 1405 memorizeString(NamespaceKey); 1406 MangledName = MangledName.substr(EndPos + 1); 1407 return Node; 1408 } 1409 1410 NamedIdentifierNode * 1411 Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) { 1412 assert(startsWithLocalScopePattern(MangledName)); 1413 1414 NamedIdentifierNode *Identifier = Arena.alloc<NamedIdentifierNode>(); 1415 MangledName.consumeFront('?'); 1416 uint64_t Number = 0; 1417 bool IsNegative = false; 1418 std::tie(Number, IsNegative) = demangleNumber(MangledName); 1419 assert(!IsNegative); 1420 1421 // One ? to terminate the number 1422 MangledName.consumeFront('?'); 1423 1424 assert(!Error); 1425 Node *Scope = parse(MangledName); 1426 if (Error) 1427 return nullptr; 1428 1429 // Render the parent symbol's name into a buffer. 1430 OutputStream OS; 1431 if (!initializeOutputStream(nullptr, nullptr, OS, 1024)) 1432 // FIXME: Propagate out-of-memory as an error? 1433 std::terminate(); 1434 OS << '`'; 1435 Scope->output(OS, OF_Default); 1436 OS << '\''; 1437 OS << "::`" << Number << "'"; 1438 OS << '\0'; 1439 char *Result = OS.getBuffer(); 1440 Identifier->Name = copyString(Result); 1441 std::free(Result); 1442 return Identifier; 1443 } 1444 1445 // Parses a type name in the form of A@B@C@@ which represents C::B::A. 1446 QualifiedNameNode * 1447 Demangler::demangleFullyQualifiedTypeName(StringView &MangledName) { 1448 IdentifierNode *Identifier = 1449 demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true); 1450 if (Error) 1451 return nullptr; 1452 assert(Identifier); 1453 1454 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier); 1455 if (Error) 1456 return nullptr; 1457 assert(QN); 1458 return QN; 1459 } 1460 1461 // Parses a symbol name in the form of A@B@C@@ which represents C::B::A. 1462 // Symbol names have slightly different rules regarding what can appear 1463 // so we separate out the implementations for flexibility. 1464 QualifiedNameNode * 1465 Demangler::demangleFullyQualifiedSymbolName(StringView &MangledName) { 1466 // This is the final component of a symbol name (i.e. the leftmost component 1467 // of a mangled name. Since the only possible template instantiation that 1468 // can appear in this context is a function template, and since those are 1469 // not saved for the purposes of name backreferences, only backref simple 1470 // names. 1471 IdentifierNode *Identifier = 1472 demangleUnqualifiedSymbolName(MangledName, NBB_Simple); 1473 if (Error) 1474 return nullptr; 1475 1476 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier); 1477 if (Error) 1478 return nullptr; 1479 1480 if (Identifier->kind() == NodeKind::StructorIdentifier) { 1481 if (QN->Components->Count < 2) { 1482 Error = true; 1483 return nullptr; 1484 } 1485 StructorIdentifierNode *SIN = 1486 static_cast<StructorIdentifierNode *>(Identifier); 1487 Node *ClassNode = QN->Components->Nodes[QN->Components->Count - 2]; 1488 SIN->Class = static_cast<IdentifierNode *>(ClassNode); 1489 } 1490 assert(QN); 1491 return QN; 1492 } 1493 1494 IdentifierNode *Demangler::demangleUnqualifiedTypeName(StringView &MangledName, 1495 bool Memorize) { 1496 // An inner-most name can be a back-reference, because a fully-qualified name 1497 // (e.g. Scope + Inner) can contain other fully qualified names inside of 1498 // them (for example template parameters), and these nested parameters can 1499 // refer to previously mangled types. 1500 if (startsWithDigit(MangledName)) 1501 return demangleBackRefName(MangledName); 1502 1503 if (MangledName.startsWith("?$")) 1504 return demangleTemplateInstantiationName(MangledName, NBB_Template); 1505 1506 return demangleSimpleName(MangledName, Memorize); 1507 } 1508 1509 IdentifierNode * 1510 Demangler::demangleUnqualifiedSymbolName(StringView &MangledName, 1511 NameBackrefBehavior NBB) { 1512 if (startsWithDigit(MangledName)) 1513 return demangleBackRefName(MangledName); 1514 if (MangledName.startsWith("?$")) 1515 return demangleTemplateInstantiationName(MangledName, NBB); 1516 if (MangledName.startsWith('?')) 1517 return demangleFunctionIdentifierCode(MangledName); 1518 return demangleSimpleName(MangledName, /*Memorize=*/(NBB & NBB_Simple) != 0); 1519 } 1520 1521 IdentifierNode *Demangler::demangleNameScopePiece(StringView &MangledName) { 1522 if (startsWithDigit(MangledName)) 1523 return demangleBackRefName(MangledName); 1524 1525 if (MangledName.startsWith("?$")) 1526 return demangleTemplateInstantiationName(MangledName, NBB_Template); 1527 1528 if (MangledName.startsWith("?A")) 1529 return demangleAnonymousNamespaceName(MangledName); 1530 1531 if (startsWithLocalScopePattern(MangledName)) 1532 return demangleLocallyScopedNamePiece(MangledName); 1533 1534 return demangleSimpleName(MangledName, /*Memorize=*/true); 1535 } 1536 1537 static NodeArrayNode *nodeListToNodeArray(ArenaAllocator &Arena, NodeList *Head, 1538 size_t Count) { 1539 NodeArrayNode *N = Arena.alloc<NodeArrayNode>(); 1540 N->Count = Count; 1541 N->Nodes = Arena.allocArray<Node *>(Count); 1542 for (size_t I = 0; I < Count; ++I) { 1543 N->Nodes[I] = Head->N; 1544 Head = Head->Next; 1545 } 1546 return N; 1547 } 1548 1549 QualifiedNameNode * 1550 Demangler::demangleNameScopeChain(StringView &MangledName, 1551 IdentifierNode *UnqualifiedName) { 1552 NodeList *Head = Arena.alloc<NodeList>(); 1553 1554 Head->N = UnqualifiedName; 1555 1556 size_t Count = 1; 1557 while (!MangledName.consumeFront("@")) { 1558 ++Count; 1559 NodeList *NewHead = Arena.alloc<NodeList>(); 1560 NewHead->Next = Head; 1561 Head = NewHead; 1562 1563 if (MangledName.empty()) { 1564 Error = true; 1565 return nullptr; 1566 } 1567 1568 assert(!Error); 1569 IdentifierNode *Elem = demangleNameScopePiece(MangledName); 1570 if (Error) 1571 return nullptr; 1572 1573 Head->N = Elem; 1574 } 1575 1576 QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>(); 1577 QN->Components = nodeListToNodeArray(Arena, Head, Count); 1578 return QN; 1579 } 1580 1581 FuncClass Demangler::demangleFunctionClass(StringView &MangledName) { 1582 switch (MangledName.popFront()) { 1583 case '9': 1584 return FuncClass(FC_ExternC | FC_NoParameterList); 1585 case 'A': 1586 return FC_Private; 1587 case 'B': 1588 return FuncClass(FC_Private | FC_Far); 1589 case 'C': 1590 return FuncClass(FC_Private | FC_Static); 1591 case 'D': 1592 return FuncClass(FC_Private | FC_Static); 1593 case 'E': 1594 return FuncClass(FC_Private | FC_Virtual); 1595 case 'F': 1596 return FuncClass(FC_Private | FC_Virtual); 1597 case 'G': 1598 return FuncClass(FC_Private | FC_StaticThisAdjust); 1599 case 'H': 1600 return FuncClass(FC_Private | FC_StaticThisAdjust | FC_Far); 1601 case 'I': 1602 return FuncClass(FC_Protected); 1603 case 'J': 1604 return FuncClass(FC_Protected | FC_Far); 1605 case 'K': 1606 return FuncClass(FC_Protected | FC_Static); 1607 case 'L': 1608 return FuncClass(FC_Protected | FC_Static | FC_Far); 1609 case 'M': 1610 return FuncClass(FC_Protected | FC_Virtual); 1611 case 'N': 1612 return FuncClass(FC_Protected | FC_Virtual | FC_Far); 1613 case 'O': 1614 return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust); 1615 case 'P': 1616 return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust | FC_Far); 1617 case 'Q': 1618 return FuncClass(FC_Public); 1619 case 'R': 1620 return FuncClass(FC_Public | FC_Far); 1621 case 'S': 1622 return FuncClass(FC_Public | FC_Static); 1623 case 'T': 1624 return FuncClass(FC_Public | FC_Static | FC_Far); 1625 case 'U': 1626 return FuncClass(FC_Public | FC_Virtual); 1627 case 'V': 1628 return FuncClass(FC_Public | FC_Virtual | FC_Far); 1629 case 'W': 1630 return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust); 1631 case 'X': 1632 return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust | FC_Far); 1633 case 'Y': 1634 return FuncClass(FC_Global); 1635 case 'Z': 1636 return FuncClass(FC_Global | FC_Far); 1637 case '$': { 1638 FuncClass VFlag = FC_VirtualThisAdjust; 1639 if (MangledName.consumeFront('R')) 1640 VFlag = FuncClass(VFlag | FC_VirtualThisAdjustEx); 1641 if (MangledName.empty()) 1642 break; 1643 switch (MangledName.popFront()) { 1644 case '0': 1645 return FuncClass(FC_Private | FC_Virtual | VFlag); 1646 case '1': 1647 return FuncClass(FC_Private | FC_Virtual | VFlag | FC_Far); 1648 case '2': 1649 return FuncClass(FC_Protected | FC_Virtual | VFlag); 1650 case '3': 1651 return FuncClass(FC_Protected | FC_Virtual | VFlag | FC_Far); 1652 case '4': 1653 return FuncClass(FC_Public | FC_Virtual | VFlag); 1654 case '5': 1655 return FuncClass(FC_Public | FC_Virtual | VFlag | FC_Far); 1656 } 1657 } 1658 } 1659 1660 Error = true; 1661 return FC_Public; 1662 } 1663 1664 CallingConv Demangler::demangleCallingConvention(StringView &MangledName) { 1665 if (MangledName.empty()) { 1666 Error = true; 1667 return CallingConv::None; 1668 } 1669 1670 switch (MangledName.popFront()) { 1671 case 'A': 1672 case 'B': 1673 return CallingConv::Cdecl; 1674 case 'C': 1675 case 'D': 1676 return CallingConv::Pascal; 1677 case 'E': 1678 case 'F': 1679 return CallingConv::Thiscall; 1680 case 'G': 1681 case 'H': 1682 return CallingConv::Stdcall; 1683 case 'I': 1684 case 'J': 1685 return CallingConv::Fastcall; 1686 case 'M': 1687 case 'N': 1688 return CallingConv::Clrcall; 1689 case 'O': 1690 case 'P': 1691 return CallingConv::Eabi; 1692 case 'Q': 1693 return CallingConv::Vectorcall; 1694 } 1695 1696 return CallingConv::None; 1697 } 1698 1699 StorageClass Demangler::demangleVariableStorageClass(StringView &MangledName) { 1700 assert(std::isdigit(MangledName.front())); 1701 1702 switch (MangledName.popFront()) { 1703 case '0': 1704 return StorageClass::PrivateStatic; 1705 case '1': 1706 return StorageClass::ProtectedStatic; 1707 case '2': 1708 return StorageClass::PublicStatic; 1709 case '3': 1710 return StorageClass::Global; 1711 case '4': 1712 return StorageClass::FunctionLocalStatic; 1713 } 1714 Error = true; 1715 return StorageClass::None; 1716 } 1717 1718 std::pair<Qualifiers, bool> 1719 Demangler::demangleQualifiers(StringView &MangledName) { 1720 if (MangledName.empty()) { 1721 Error = true; 1722 return std::make_pair(Q_None, false); 1723 } 1724 1725 switch (MangledName.popFront()) { 1726 // Member qualifiers 1727 case 'Q': 1728 return std::make_pair(Q_None, true); 1729 case 'R': 1730 return std::make_pair(Q_Const, true); 1731 case 'S': 1732 return std::make_pair(Q_Volatile, true); 1733 case 'T': 1734 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), true); 1735 // Non-Member qualifiers 1736 case 'A': 1737 return std::make_pair(Q_None, false); 1738 case 'B': 1739 return std::make_pair(Q_Const, false); 1740 case 'C': 1741 return std::make_pair(Q_Volatile, false); 1742 case 'D': 1743 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), false); 1744 } 1745 Error = true; 1746 return std::make_pair(Q_None, false); 1747 } 1748 1749 // <variable-type> ::= <type> <cvr-qualifiers> 1750 // ::= <type> <pointee-cvr-qualifiers> # pointers, references 1751 TypeNode *Demangler::demangleType(StringView &MangledName, 1752 QualifierMangleMode QMM) { 1753 Qualifiers Quals = Q_None; 1754 bool IsMember = false; 1755 if (QMM == QualifierMangleMode::Mangle) { 1756 std::tie(Quals, IsMember) = demangleQualifiers(MangledName); 1757 } else if (QMM == QualifierMangleMode::Result) { 1758 if (MangledName.consumeFront('?')) 1759 std::tie(Quals, IsMember) = demangleQualifiers(MangledName); 1760 } 1761 1762 if (MangledName.empty()) { 1763 Error = true; 1764 return nullptr; 1765 } 1766 1767 TypeNode *Ty = nullptr; 1768 if (isTagType(MangledName)) 1769 Ty = demangleClassType(MangledName); 1770 else if (isPointerType(MangledName)) { 1771 if (isMemberPointer(MangledName, Error)) 1772 Ty = demangleMemberPointerType(MangledName); 1773 else if (!Error) 1774 Ty = demanglePointerType(MangledName); 1775 else 1776 return nullptr; 1777 } else if (isArrayType(MangledName)) 1778 Ty = demangleArrayType(MangledName); 1779 else if (isFunctionType(MangledName)) { 1780 if (MangledName.consumeFront("$$A8@@")) 1781 Ty = demangleFunctionType(MangledName, true); 1782 else { 1783 assert(MangledName.startsWith("$$A6")); 1784 MangledName.consumeFront("$$A6"); 1785 Ty = demangleFunctionType(MangledName, false); 1786 } 1787 } else if (isCustomType(MangledName)) { 1788 Ty = demangleCustomType(MangledName); 1789 } else { 1790 Ty = demanglePrimitiveType(MangledName); 1791 } 1792 1793 if (!Ty || Error) 1794 return Ty; 1795 Ty->Quals = Qualifiers(Ty->Quals | Quals); 1796 return Ty; 1797 } 1798 1799 bool Demangler::demangleThrowSpecification(StringView &MangledName) { 1800 if (MangledName.consumeFront("_E")) 1801 return true; 1802 if (MangledName.consumeFront('Z')) 1803 return false; 1804 1805 Error = true; 1806 return false; 1807 } 1808 1809 FunctionSignatureNode *Demangler::demangleFunctionType(StringView &MangledName, 1810 bool HasThisQuals) { 1811 FunctionSignatureNode *FTy = Arena.alloc<FunctionSignatureNode>(); 1812 1813 if (HasThisQuals) { 1814 FTy->Quals = demanglePointerExtQualifiers(MangledName); 1815 FTy->RefQualifier = demangleFunctionRefQualifier(MangledName); 1816 FTy->Quals = Qualifiers(FTy->Quals | demangleQualifiers(MangledName).first); 1817 } 1818 1819 // Fields that appear on both member and non-member functions. 1820 FTy->CallConvention = demangleCallingConvention(MangledName); 1821 1822 // <return-type> ::= <type> 1823 // ::= @ # structors (they have no declared return type) 1824 bool IsStructor = MangledName.consumeFront('@'); 1825 if (!IsStructor) 1826 FTy->ReturnType = demangleType(MangledName, QualifierMangleMode::Result); 1827 1828 FTy->Params = demangleFunctionParameterList(MangledName); 1829 1830 FTy->IsNoexcept = demangleThrowSpecification(MangledName); 1831 1832 return FTy; 1833 } 1834 1835 FunctionSymbolNode * 1836 Demangler::demangleFunctionEncoding(StringView &MangledName) { 1837 FuncClass ExtraFlags = FC_None; 1838 if (MangledName.consumeFront("$$J0")) 1839 ExtraFlags = FC_ExternC; 1840 1841 if (MangledName.empty()) { 1842 Error = true; 1843 return nullptr; 1844 } 1845 1846 FuncClass FC = demangleFunctionClass(MangledName); 1847 FC = FuncClass(ExtraFlags | FC); 1848 1849 FunctionSignatureNode *FSN = nullptr; 1850 ThunkSignatureNode *TTN = nullptr; 1851 if (FC & FC_StaticThisAdjust) { 1852 TTN = Arena.alloc<ThunkSignatureNode>(); 1853 TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName); 1854 } else if (FC & FC_VirtualThisAdjust) { 1855 TTN = Arena.alloc<ThunkSignatureNode>(); 1856 if (FC & FC_VirtualThisAdjustEx) { 1857 TTN->ThisAdjust.VBPtrOffset = demangleSigned(MangledName); 1858 TTN->ThisAdjust.VBOffsetOffset = demangleSigned(MangledName); 1859 } 1860 TTN->ThisAdjust.VtordispOffset = demangleSigned(MangledName); 1861 TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName); 1862 } 1863 1864 if (FC & FC_NoParameterList) { 1865 // This is an extern "C" function whose full signature hasn't been mangled. 1866 // This happens when we need to mangle a local symbol inside of an extern 1867 // "C" function. 1868 FSN = Arena.alloc<FunctionSignatureNode>(); 1869 } else { 1870 bool HasThisQuals = !(FC & (FC_Global | FC_Static)); 1871 FSN = demangleFunctionType(MangledName, HasThisQuals); 1872 } 1873 1874 if (Error) 1875 return nullptr; 1876 1877 if (TTN) { 1878 *static_cast<FunctionSignatureNode *>(TTN) = *FSN; 1879 FSN = TTN; 1880 } 1881 FSN->FunctionClass = FC; 1882 1883 FunctionSymbolNode *Symbol = Arena.alloc<FunctionSymbolNode>(); 1884 Symbol->Signature = FSN; 1885 return Symbol; 1886 } 1887 1888 CustomTypeNode *Demangler::demangleCustomType(StringView &MangledName) { 1889 assert(MangledName.startsWith('?')); 1890 MangledName.popFront(); 1891 1892 CustomTypeNode *CTN = Arena.alloc<CustomTypeNode>(); 1893 CTN->Identifier = demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true); 1894 if (!MangledName.consumeFront('@')) 1895 Error = true; 1896 if (Error) 1897 return nullptr; 1898 return CTN; 1899 } 1900 1901 // Reads a primitive type. 1902 PrimitiveTypeNode *Demangler::demanglePrimitiveType(StringView &MangledName) { 1903 if (MangledName.consumeFront("$$T")) 1904 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Nullptr); 1905 1906 switch (MangledName.popFront()) { 1907 case 'X': 1908 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Void); 1909 case 'D': 1910 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char); 1911 case 'C': 1912 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Schar); 1913 case 'E': 1914 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uchar); 1915 case 'F': 1916 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Short); 1917 case 'G': 1918 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ushort); 1919 case 'H': 1920 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int); 1921 case 'I': 1922 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint); 1923 case 'J': 1924 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Long); 1925 case 'K': 1926 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ulong); 1927 case 'M': 1928 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Float); 1929 case 'N': 1930 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Double); 1931 case 'O': 1932 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ldouble); 1933 case '_': { 1934 if (MangledName.empty()) { 1935 Error = true; 1936 return nullptr; 1937 } 1938 switch (MangledName.popFront()) { 1939 case 'N': 1940 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Bool); 1941 case 'J': 1942 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int64); 1943 case 'K': 1944 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint64); 1945 case 'W': 1946 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Wchar); 1947 case 'Q': 1948 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char8); 1949 case 'S': 1950 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char16); 1951 case 'U': 1952 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char32); 1953 } 1954 break; 1955 } 1956 } 1957 Error = true; 1958 return nullptr; 1959 } 1960 1961 TagTypeNode *Demangler::demangleClassType(StringView &MangledName) { 1962 TagTypeNode *TT = nullptr; 1963 1964 switch (MangledName.popFront()) { 1965 case 'T': 1966 TT = Arena.alloc<TagTypeNode>(TagKind::Union); 1967 break; 1968 case 'U': 1969 TT = Arena.alloc<TagTypeNode>(TagKind::Struct); 1970 break; 1971 case 'V': 1972 TT = Arena.alloc<TagTypeNode>(TagKind::Class); 1973 break; 1974 case 'W': 1975 if (!MangledName.consumeFront('4')) { 1976 Error = true; 1977 return nullptr; 1978 } 1979 TT = Arena.alloc<TagTypeNode>(TagKind::Enum); 1980 break; 1981 default: 1982 assert(false); 1983 } 1984 1985 TT->QualifiedName = demangleFullyQualifiedTypeName(MangledName); 1986 return TT; 1987 } 1988 1989 // <pointer-type> ::= E? <pointer-cvr-qualifiers> <ext-qualifiers> <type> 1990 // # the E is required for 64-bit non-static pointers 1991 PointerTypeNode *Demangler::demanglePointerType(StringView &MangledName) { 1992 PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>(); 1993 1994 std::tie(Pointer->Quals, Pointer->Affinity) = 1995 demanglePointerCVQualifiers(MangledName); 1996 1997 if (MangledName.consumeFront("6")) { 1998 Pointer->Pointee = demangleFunctionType(MangledName, false); 1999 return Pointer; 2000 } 2001 2002 Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName); 2003 Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals); 2004 2005 Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Mangle); 2006 return Pointer; 2007 } 2008 2009 PointerTypeNode *Demangler::demangleMemberPointerType(StringView &MangledName) { 2010 PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>(); 2011 2012 std::tie(Pointer->Quals, Pointer->Affinity) = 2013 demanglePointerCVQualifiers(MangledName); 2014 assert(Pointer->Affinity == PointerAffinity::Pointer); 2015 2016 Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName); 2017 Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals); 2018 2019 // isMemberPointer() only returns true if there is at least one character 2020 // after the qualifiers. 2021 if (MangledName.consumeFront("8")) { 2022 Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName); 2023 Pointer->Pointee = demangleFunctionType(MangledName, true); 2024 } else { 2025 Qualifiers PointeeQuals = Q_None; 2026 bool IsMember = false; 2027 std::tie(PointeeQuals, IsMember) = demangleQualifiers(MangledName); 2028 assert(IsMember || Error); 2029 Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName); 2030 2031 Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Drop); 2032 if (Pointer->Pointee) 2033 Pointer->Pointee->Quals = PointeeQuals; 2034 } 2035 2036 return Pointer; 2037 } 2038 2039 Qualifiers Demangler::demanglePointerExtQualifiers(StringView &MangledName) { 2040 Qualifiers Quals = Q_None; 2041 if (MangledName.consumeFront('E')) 2042 Quals = Qualifiers(Quals | Q_Pointer64); 2043 if (MangledName.consumeFront('I')) 2044 Quals = Qualifiers(Quals | Q_Restrict); 2045 if (MangledName.consumeFront('F')) 2046 Quals = Qualifiers(Quals | Q_Unaligned); 2047 2048 return Quals; 2049 } 2050 2051 ArrayTypeNode *Demangler::demangleArrayType(StringView &MangledName) { 2052 assert(MangledName.front() == 'Y'); 2053 MangledName.popFront(); 2054 2055 uint64_t Rank = 0; 2056 bool IsNegative = false; 2057 std::tie(Rank, IsNegative) = demangleNumber(MangledName); 2058 if (IsNegative || Rank == 0) { 2059 Error = true; 2060 return nullptr; 2061 } 2062 2063 ArrayTypeNode *ATy = Arena.alloc<ArrayTypeNode>(); 2064 NodeList *Head = Arena.alloc<NodeList>(); 2065 NodeList *Tail = Head; 2066 2067 for (uint64_t I = 0; I < Rank; ++I) { 2068 uint64_t D = 0; 2069 std::tie(D, IsNegative) = demangleNumber(MangledName); 2070 if (Error || IsNegative) { 2071 Error = true; 2072 return nullptr; 2073 } 2074 Tail->N = Arena.alloc<IntegerLiteralNode>(D, IsNegative); 2075 if (I + 1 < Rank) { 2076 Tail->Next = Arena.alloc<NodeList>(); 2077 Tail = Tail->Next; 2078 } 2079 } 2080 ATy->Dimensions = nodeListToNodeArray(Arena, Head, Rank); 2081 2082 if (MangledName.consumeFront("$$C")) { 2083 bool IsMember = false; 2084 std::tie(ATy->Quals, IsMember) = demangleQualifiers(MangledName); 2085 if (IsMember) { 2086 Error = true; 2087 return nullptr; 2088 } 2089 } 2090 2091 ATy->ElementType = demangleType(MangledName, QualifierMangleMode::Drop); 2092 return ATy; 2093 } 2094 2095 // Reads a function or a template parameters. 2096 NodeArrayNode * 2097 Demangler::demangleFunctionParameterList(StringView &MangledName) { 2098 // Empty parameter list. 2099 if (MangledName.consumeFront('X')) 2100 return nullptr; 2101 2102 NodeList *Head = Arena.alloc<NodeList>(); 2103 NodeList **Current = &Head; 2104 size_t Count = 0; 2105 while (!Error && !MangledName.startsWith('@') && 2106 !MangledName.startsWith('Z')) { 2107 ++Count; 2108 2109 if (startsWithDigit(MangledName)) { 2110 size_t N = MangledName[0] - '0'; 2111 if (N >= Backrefs.FunctionParamCount) { 2112 Error = true; 2113 return nullptr; 2114 } 2115 MangledName = MangledName.dropFront(); 2116 2117 *Current = Arena.alloc<NodeList>(); 2118 (*Current)->N = Backrefs.FunctionParams[N]; 2119 Current = &(*Current)->Next; 2120 continue; 2121 } 2122 2123 size_t OldSize = MangledName.size(); 2124 2125 *Current = Arena.alloc<NodeList>(); 2126 TypeNode *TN = demangleType(MangledName, QualifierMangleMode::Drop); 2127 if (!TN || Error) 2128 return nullptr; 2129 2130 (*Current)->N = TN; 2131 2132 size_t CharsConsumed = OldSize - MangledName.size(); 2133 assert(CharsConsumed != 0); 2134 2135 // Single-letter types are ignored for backreferences because memorizing 2136 // them doesn't save anything. 2137 if (Backrefs.FunctionParamCount <= 9 && CharsConsumed > 1) 2138 Backrefs.FunctionParams[Backrefs.FunctionParamCount++] = TN; 2139 2140 Current = &(*Current)->Next; 2141 } 2142 2143 if (Error) 2144 return nullptr; 2145 2146 NodeArrayNode *NA = nodeListToNodeArray(Arena, Head, Count); 2147 // A non-empty parameter list is terminated by either 'Z' (variadic) parameter 2148 // list or '@' (non variadic). Careful not to consume "@Z", as in that case 2149 // the following Z could be a throw specifier. 2150 if (MangledName.consumeFront('@')) 2151 return NA; 2152 2153 if (MangledName.consumeFront('Z')) { 2154 // This is a variadic parameter list. We probably need a variadic node to 2155 // append to the end. 2156 return NA; 2157 } 2158 2159 Error = true; 2160 return nullptr; 2161 } 2162 2163 NodeArrayNode * 2164 Demangler::demangleTemplateParameterList(StringView &MangledName) { 2165 NodeList *Head; 2166 NodeList **Current = &Head; 2167 size_t Count = 0; 2168 2169 while (!Error && !MangledName.startsWith('@')) { 2170 if (MangledName.consumeFront("$S") || MangledName.consumeFront("$$V") || 2171 MangledName.consumeFront("$$$V") || MangledName.consumeFront("$$Z")) { 2172 // parameter pack separator 2173 continue; 2174 } 2175 2176 ++Count; 2177 2178 // Template parameter lists don't participate in back-referencing. 2179 *Current = Arena.alloc<NodeList>(); 2180 2181 NodeList &TP = **Current; 2182 2183 TemplateParameterReferenceNode *TPRN = nullptr; 2184 if (MangledName.consumeFront("$$Y")) { 2185 // Template alias 2186 TP.N = demangleFullyQualifiedTypeName(MangledName); 2187 } else if (MangledName.consumeFront("$$B")) { 2188 // Array 2189 TP.N = demangleType(MangledName, QualifierMangleMode::Drop); 2190 } else if (MangledName.consumeFront("$$C")) { 2191 // Type has qualifiers. 2192 TP.N = demangleType(MangledName, QualifierMangleMode::Mangle); 2193 } else if (MangledName.startsWith("$1") || MangledName.startsWith("$H") || 2194 MangledName.startsWith("$I") || MangledName.startsWith("$J")) { 2195 // Pointer to member 2196 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>(); 2197 TPRN->IsMemberPointer = true; 2198 2199 MangledName = MangledName.dropFront(); 2200 // 1 - single inheritance <name> 2201 // H - multiple inheritance <name> <number> 2202 // I - virtual inheritance <name> <number> <number> <number> 2203 // J - unspecified inheritance <name> <number> <number> <number> 2204 char InheritanceSpecifier = MangledName.popFront(); 2205 SymbolNode *S = nullptr; 2206 if (MangledName.startsWith('?')) { 2207 S = parse(MangledName); 2208 if (Error || !S->Name) { 2209 Error = true; 2210 return nullptr; 2211 } 2212 memorizeIdentifier(S->Name->getUnqualifiedIdentifier()); 2213 } 2214 2215 switch (InheritanceSpecifier) { 2216 case 'J': 2217 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2218 demangleSigned(MangledName); 2219 DEMANGLE_FALLTHROUGH; 2220 case 'I': 2221 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2222 demangleSigned(MangledName); 2223 DEMANGLE_FALLTHROUGH; 2224 case 'H': 2225 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2226 demangleSigned(MangledName); 2227 DEMANGLE_FALLTHROUGH; 2228 case '1': 2229 break; 2230 default: 2231 Error = true; 2232 break; 2233 } 2234 TPRN->Affinity = PointerAffinity::Pointer; 2235 TPRN->Symbol = S; 2236 } else if (MangledName.startsWith("$E?")) { 2237 MangledName.consumeFront("$E"); 2238 // Reference to symbol 2239 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>(); 2240 TPRN->Symbol = parse(MangledName); 2241 TPRN->Affinity = PointerAffinity::Reference; 2242 } else if (MangledName.startsWith("$F") || MangledName.startsWith("$G")) { 2243 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>(); 2244 2245 // Data member pointer. 2246 MangledName = MangledName.dropFront(); 2247 char InheritanceSpecifier = MangledName.popFront(); 2248 2249 switch (InheritanceSpecifier) { 2250 case 'G': 2251 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2252 demangleSigned(MangledName); 2253 DEMANGLE_FALLTHROUGH; 2254 case 'F': 2255 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2256 demangleSigned(MangledName); 2257 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2258 demangleSigned(MangledName); 2259 DEMANGLE_FALLTHROUGH; 2260 case '0': 2261 break; 2262 default: 2263 Error = true; 2264 break; 2265 } 2266 TPRN->IsMemberPointer = true; 2267 2268 } else if (MangledName.consumeFront("$0")) { 2269 // Integral non-type template parameter 2270 bool IsNegative = false; 2271 uint64_t Value = 0; 2272 std::tie(Value, IsNegative) = demangleNumber(MangledName); 2273 2274 TP.N = Arena.alloc<IntegerLiteralNode>(Value, IsNegative); 2275 } else { 2276 TP.N = demangleType(MangledName, QualifierMangleMode::Drop); 2277 } 2278 if (Error) 2279 return nullptr; 2280 2281 Current = &TP.Next; 2282 } 2283 2284 if (Error) 2285 return nullptr; 2286 2287 // Template parameter lists cannot be variadic, so it can only be terminated 2288 // by @. 2289 if (MangledName.consumeFront('@')) 2290 return nodeListToNodeArray(Arena, Head, Count); 2291 Error = true; 2292 return nullptr; 2293 } 2294 2295 void Demangler::dumpBackReferences() { 2296 std::printf("%d function parameter backreferences\n", 2297 (int)Backrefs.FunctionParamCount); 2298 2299 // Create an output stream so we can render each type. 2300 OutputStream OS; 2301 if (!initializeOutputStream(nullptr, nullptr, OS, 1024)) 2302 std::terminate(); 2303 for (size_t I = 0; I < Backrefs.FunctionParamCount; ++I) { 2304 OS.setCurrentPosition(0); 2305 2306 TypeNode *T = Backrefs.FunctionParams[I]; 2307 T->output(OS, OF_Default); 2308 2309 std::printf(" [%d] - %.*s\n", (int)I, (int)OS.getCurrentPosition(), 2310 OS.getBuffer()); 2311 } 2312 std::free(OS.getBuffer()); 2313 2314 if (Backrefs.FunctionParamCount > 0) 2315 std::printf("\n"); 2316 std::printf("%d name backreferences\n", (int)Backrefs.NamesCount); 2317 for (size_t I = 0; I < Backrefs.NamesCount; ++I) { 2318 std::printf(" [%d] - %.*s\n", (int)I, (int)Backrefs.Names[I]->Name.size(), 2319 Backrefs.Names[I]->Name.begin()); 2320 } 2321 if (Backrefs.NamesCount > 0) 2322 std::printf("\n"); 2323 } 2324 2325 char *llvm::microsoftDemangle(const char *MangledName, char *Buf, size_t *N, 2326 int *Status, MSDemangleFlags Flags) { 2327 int InternalStatus = demangle_success; 2328 Demangler D; 2329 OutputStream S; 2330 2331 StringView Name{MangledName}; 2332 SymbolNode *AST = D.parse(Name); 2333 2334 if (Flags & MSDF_DumpBackrefs) 2335 D.dumpBackReferences(); 2336 2337 if (D.Error) 2338 InternalStatus = demangle_invalid_mangled_name; 2339 else if (!initializeOutputStream(Buf, N, S, 1024)) 2340 InternalStatus = demangle_memory_alloc_failure; 2341 else { 2342 AST->output(S, OF_Default); 2343 S += '\0'; 2344 if (N != nullptr) 2345 *N = S.getCurrentPosition(); 2346 Buf = S.getBuffer(); 2347 } 2348 2349 if (Status) 2350 *Status = InternalStatus; 2351 return InternalStatus == demangle_success ? Buf : nullptr; 2352 } 2353