1 //===- MicrosoftDemangle.cpp ----------------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is dual licensed under the MIT and the University of Illinois Open
6 // Source Licenses. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines a demangler for MSVC-style mangled symbols.
11 //
12 // This file has no dependencies on the rest of LLVM so that it can be
13 // easily reused in other programs such as libcxxabi.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "MicrosoftDemangleNodes.h"
18 #include "llvm/Demangle/Demangle.h"
19 
20 #include "llvm/Demangle/Compiler.h"
21 #include "llvm/Demangle/StringView.h"
22 #include "llvm/Demangle/Utility.h"
23 
24 #include <array>
25 #include <cctype>
26 #include <cstdio>
27 #include <tuple>
28 
29 using namespace llvm;
30 using namespace ms_demangle;
31 
32 static bool startsWithDigit(StringView S) {
33   return !S.empty() && std::isdigit(S.front());
34 }
35 
36 enum class QualifierMangleMode { Drop, Mangle, Result };
37 
38 struct NodeList {
39   Node *N = nullptr;
40   NodeList *Next = nullptr;
41 };
42 
43 enum class FunctionIdentifierCodeGroup { Basic, Under, DoubleUnder };
44 
45 enum NameBackrefBehavior : uint8_t {
46   NBB_None = 0,          // don't save any names as backrefs.
47   NBB_Template = 1 << 0, // save template instanations.
48   NBB_Simple = 1 << 1,   // save simple names.
49 };
50 
51 static bool isMemberPointer(StringView MangledName) {
52   switch (MangledName.popFront()) {
53   case '$':
54     // This is probably an rvalue reference (e.g. $$Q), and you cannot have an
55     // rvalue reference to a member.
56     return false;
57   case 'A':
58     // 'A' indicates a reference, and you cannot have a reference to a member
59     // function or member.
60     return false;
61   case 'P':
62   case 'Q':
63   case 'R':
64   case 'S':
65     // These 4 values indicate some kind of pointer, but we still don't know
66     // what.
67     break;
68   default:
69     assert(false && "Ty is not a pointer type!");
70   }
71 
72   // If it starts with a number, then 6 indicates a non-member function
73   // pointer, and 8 indicates a member function pointer.
74   if (startsWithDigit(MangledName)) {
75     assert(MangledName[0] == '6' || MangledName[0] == '8');
76     return (MangledName[0] == '8');
77   }
78 
79   // Remove ext qualifiers since those can appear on either type and are
80   // therefore not indicative.
81   MangledName.consumeFront('E'); // 64-bit
82   MangledName.consumeFront('I'); // restrict
83   MangledName.consumeFront('F'); // unaligned
84 
85   assert(!MangledName.empty());
86 
87   // The next value should be either ABCD (non-member) or QRST (member).
88   switch (MangledName.front()) {
89   case 'A':
90   case 'B':
91   case 'C':
92   case 'D':
93     return false;
94   case 'Q':
95   case 'R':
96   case 'S':
97   case 'T':
98     return true;
99   default:
100     assert(false);
101   }
102   return false;
103 }
104 
105 static SpecialIntrinsicKind
106 consumeSpecialIntrinsicKind(StringView &MangledName) {
107   if (MangledName.consumeFront("?_7"))
108     return SpecialIntrinsicKind::Vftable;
109   if (MangledName.consumeFront("?_8"))
110     return SpecialIntrinsicKind::Vbtable;
111   if (MangledName.consumeFront("?_9"))
112     return SpecialIntrinsicKind::VcallThunk;
113   if (MangledName.consumeFront("?_A"))
114     return SpecialIntrinsicKind::Typeof;
115   if (MangledName.consumeFront("?_B"))
116     return SpecialIntrinsicKind::LocalStaticGuard;
117   if (MangledName.consumeFront("?_C"))
118     return SpecialIntrinsicKind::StringLiteralSymbol;
119   if (MangledName.consumeFront("?_P"))
120     return SpecialIntrinsicKind::UdtReturning;
121   if (MangledName.consumeFront("?_R0"))
122     return SpecialIntrinsicKind::RttiTypeDescriptor;
123   if (MangledName.consumeFront("?_R1"))
124     return SpecialIntrinsicKind::RttiBaseClassDescriptor;
125   if (MangledName.consumeFront("?_R2"))
126     return SpecialIntrinsicKind::RttiBaseClassArray;
127   if (MangledName.consumeFront("?_R3"))
128     return SpecialIntrinsicKind::RttiClassHierarchyDescriptor;
129   if (MangledName.consumeFront("?_R4"))
130     return SpecialIntrinsicKind::RttiCompleteObjLocator;
131   if (MangledName.consumeFront("?_S"))
132     return SpecialIntrinsicKind::LocalVftable;
133   if (MangledName.consumeFront("?__E"))
134     return SpecialIntrinsicKind::DynamicInitializer;
135   if (MangledName.consumeFront("?__F"))
136     return SpecialIntrinsicKind::DynamicAtexitDestructor;
137   if (MangledName.consumeFront("?__J"))
138     return SpecialIntrinsicKind::LocalStaticThreadGuard;
139   return SpecialIntrinsicKind::None;
140 }
141 
142 static bool startsWithLocalScopePattern(StringView S) {
143   if (!S.consumeFront('?'))
144     return false;
145   if (S.size() < 2)
146     return false;
147 
148   size_t End = S.find('?');
149   if (End == StringView::npos)
150     return false;
151   StringView Candidate = S.substr(0, End);
152   if (Candidate.empty())
153     return false;
154 
155   // \?[0-9]\?
156   // ?@? is the discriminator 0.
157   if (Candidate.size() == 1)
158     return Candidate[0] == '@' || (Candidate[0] >= '0' && Candidate[0] <= '9');
159 
160   // If it's not 0-9, then it's an encoded number terminated with an @
161   if (Candidate.back() != '@')
162     return false;
163   Candidate = Candidate.dropBack();
164 
165   // An encoded number starts with B-P and all subsequent digits are in A-P.
166   // Note that the reason the first digit cannot be A is two fold.  First, it
167   // would create an ambiguity with ?A which delimits the beginning of an
168   // anonymous namespace.  Second, A represents 0, and you don't start a multi
169   // digit number with a leading 0.  Presumably the anonymous namespace
170   // ambiguity is also why single digit encoded numbers use 0-9 rather than A-J.
171   if (Candidate[0] < 'B' || Candidate[0] > 'P')
172     return false;
173   Candidate = Candidate.dropFront();
174   while (!Candidate.empty()) {
175     if (Candidate[0] < 'A' || Candidate[0] > 'P')
176       return false;
177     Candidate = Candidate.dropFront();
178   }
179 
180   return true;
181 }
182 
183 static bool isTagType(StringView S) {
184   switch (S.front()) {
185   case 'T': // union
186   case 'U': // struct
187   case 'V': // class
188   case 'W': // enum
189     return true;
190   }
191   return false;
192 }
193 
194 static bool isCustomType(StringView S) { return S[0] == '?'; }
195 
196 static bool isPointerType(StringView S) {
197   if (S.startsWith("$$Q")) // foo &&
198     return true;
199 
200   switch (S.front()) {
201   case 'A': // foo &
202   case 'P': // foo *
203   case 'Q': // foo *const
204   case 'R': // foo *volatile
205   case 'S': // foo *const volatile
206     return true;
207   }
208   return false;
209 }
210 
211 static bool isArrayType(StringView S) { return S[0] == 'Y'; }
212 
213 static bool isFunctionType(StringView S) {
214   return S.startsWith("$$A8@@") || S.startsWith("$$A6");
215 }
216 
217 static FunctionRefQualifier
218 demangleFunctionRefQualifier(StringView &MangledName) {
219   if (MangledName.consumeFront('G'))
220     return FunctionRefQualifier::Reference;
221   else if (MangledName.consumeFront('H'))
222     return FunctionRefQualifier::RValueReference;
223   return FunctionRefQualifier::None;
224 }
225 
226 static std::pair<Qualifiers, PointerAffinity>
227 demanglePointerCVQualifiers(StringView &MangledName) {
228   if (MangledName.consumeFront("$$Q"))
229     return std::make_pair(Q_None, PointerAffinity::RValueReference);
230 
231   switch (MangledName.popFront()) {
232   case 'A':
233     return std::make_pair(Q_None, PointerAffinity::Reference);
234   case 'P':
235     return std::make_pair(Q_None, PointerAffinity::Pointer);
236   case 'Q':
237     return std::make_pair(Q_Const, PointerAffinity::Pointer);
238   case 'R':
239     return std::make_pair(Q_Volatile, PointerAffinity::Pointer);
240   case 'S':
241     return std::make_pair(Qualifiers(Q_Const | Q_Volatile),
242                           PointerAffinity::Pointer);
243   default:
244     assert(false && "Ty is not a pointer type!");
245   }
246   return std::make_pair(Q_None, PointerAffinity::Pointer);
247 }
248 
249 namespace {
250 
251 struct BackrefContext {
252   static constexpr size_t Max = 10;
253 
254   TypeNode *FunctionParams[Max];
255   size_t FunctionParamCount = 0;
256 
257   // The first 10 BackReferences in a mangled name can be back-referenced by
258   // special name @[0-9]. This is a storage for the first 10 BackReferences.
259   NamedIdentifierNode *Names[Max];
260   size_t NamesCount = 0;
261 };
262 
263 // Demangler class takes the main role in demangling symbols.
264 // It has a set of functions to parse mangled symbols into Type instances.
265 // It also has a set of functions to cnovert Type instances to strings.
266 class Demangler {
267 public:
268   Demangler() = default;
269   virtual ~Demangler() = default;
270 
271   // You are supposed to call parse() first and then check if error is true.  If
272   // it is false, call output() to write the formatted name to the given stream.
273   SymbolNode *parse(StringView &MangledName);
274 
275   // True if an error occurred.
276   bool Error = false;
277 
278   void dumpBackReferences();
279 
280 private:
281   SymbolNode *demangleEncodedSymbol(StringView &MangledName,
282                                     QualifiedNameNode *QN);
283 
284   VariableSymbolNode *demangleVariableEncoding(StringView &MangledName,
285                                                StorageClass SC);
286   FunctionSymbolNode *demangleFunctionEncoding(StringView &MangledName);
287 
288   Qualifiers demanglePointerExtQualifiers(StringView &MangledName);
289 
290   // Parser functions. This is a recursive-descent parser.
291   TypeNode *demangleType(StringView &MangledName, QualifierMangleMode QMM);
292   PrimitiveTypeNode *demanglePrimitiveType(StringView &MangledName);
293   CustomTypeNode *demangleCustomType(StringView &MangledName);
294   TagTypeNode *demangleClassType(StringView &MangledName);
295   PointerTypeNode *demanglePointerType(StringView &MangledName);
296   PointerTypeNode *demangleMemberPointerType(StringView &MangledName);
297   FunctionSignatureNode *demangleFunctionType(StringView &MangledName,
298                                               bool HasThisQuals);
299 
300   ArrayTypeNode *demangleArrayType(StringView &MangledName);
301 
302   NodeArrayNode *demangleTemplateParameterList(StringView &MangledName);
303   NodeArrayNode *demangleFunctionParameterList(StringView &MangledName);
304 
305   std::pair<uint64_t, bool> demangleNumber(StringView &MangledName);
306   uint64_t demangleUnsigned(StringView &MangledName);
307   int64_t demangleSigned(StringView &MangledName);
308 
309   void memorizeString(StringView s);
310   void memorizeIdentifier(IdentifierNode *Identifier);
311 
312   /// Allocate a copy of \p Borrowed into memory that we own.
313   StringView copyString(StringView Borrowed);
314 
315   QualifiedNameNode *demangleFullyQualifiedTypeName(StringView &MangledName);
316   QualifiedNameNode *demangleFullyQualifiedSymbolName(StringView &MangledName);
317 
318   IdentifierNode *demangleUnqualifiedTypeName(StringView &MangledName,
319                                               bool Memorize);
320   IdentifierNode *demangleUnqualifiedSymbolName(StringView &MangledName,
321                                                 NameBackrefBehavior NBB);
322 
323   QualifiedNameNode *demangleNameScopeChain(StringView &MangledName,
324                                             IdentifierNode *UnqualifiedName);
325   IdentifierNode *demangleNameScopePiece(StringView &MangledName);
326 
327   NamedIdentifierNode *demangleBackRefName(StringView &MangledName);
328   IdentifierNode *demangleTemplateInstantiationName(StringView &MangledName,
329                                                     NameBackrefBehavior NBB);
330   IdentifierNode *demangleFunctionIdentifierCode(StringView &MangledName);
331   IdentifierNode *
332   demangleFunctionIdentifierCode(StringView &MangledName,
333                                  FunctionIdentifierCodeGroup Group);
334   StructorIdentifierNode *demangleStructorIdentifier(StringView &MangledName,
335                                                      bool IsDestructor);
336   ConversionOperatorIdentifierNode *
337   demangleConversionOperatorIdentifier(StringView &MangledName);
338   LiteralOperatorIdentifierNode *
339   demangleLiteralOperatorIdentifier(StringView &MangledName);
340 
341   SymbolNode *demangleSpecialIntrinsic(StringView &MangledName);
342   SpecialTableSymbolNode *
343   demangleSpecialTableSymbolNode(StringView &MangledName,
344                                  SpecialIntrinsicKind SIK);
345   LocalStaticGuardVariableNode *
346   demangleLocalStaticGuard(StringView &MangledName);
347   VariableSymbolNode *demangleUntypedVariable(ArenaAllocator &Arena,
348                                               StringView &MangledName,
349                                               StringView VariableName);
350   VariableSymbolNode *
351   demangleRttiBaseClassDescriptorNode(ArenaAllocator &Arena,
352                                       StringView &MangledName);
353   FunctionSymbolNode *demangleInitFiniStub(StringView &MangledName,
354                                            bool IsDestructor);
355 
356   NamedIdentifierNode *demangleSimpleName(StringView &MangledName,
357                                           bool Memorize);
358   NamedIdentifierNode *demangleAnonymousNamespaceName(StringView &MangledName);
359   NamedIdentifierNode *demangleLocallyScopedNamePiece(StringView &MangledName);
360   EncodedStringLiteralNode *demangleStringLiteral(StringView &MangledName);
361   FunctionSymbolNode *demangleVcallThunkNode(StringView &MangledName);
362 
363   StringView demangleSimpleString(StringView &MangledName, bool Memorize);
364 
365   FuncClass demangleFunctionClass(StringView &MangledName);
366   CallingConv demangleCallingConvention(StringView &MangledName);
367   StorageClass demangleVariableStorageClass(StringView &MangledName);
368   void demangleThrowSpecification(StringView &MangledName);
369   wchar_t demangleWcharLiteral(StringView &MangledName);
370   uint8_t demangleCharLiteral(StringView &MangledName);
371 
372   std::pair<Qualifiers, bool> demangleQualifiers(StringView &MangledName);
373 
374   // Memory allocator.
375   ArenaAllocator Arena;
376 
377   // A single type uses one global back-ref table for all function params.
378   // This means back-refs can even go "into" other types.  Examples:
379   //
380   //  // Second int* is a back-ref to first.
381   //  void foo(int *, int*);
382   //
383   //  // Second int* is not a back-ref to first (first is not a function param).
384   //  int* foo(int*);
385   //
386   //  // Second int* is a back-ref to first (ALL function types share the same
387   //  // back-ref map.
388   //  using F = void(*)(int*);
389   //  F G(int *);
390   BackrefContext Backrefs;
391 };
392 } // namespace
393 
394 StringView Demangler::copyString(StringView Borrowed) {
395   char *Stable = Arena.allocUnalignedBuffer(Borrowed.size() + 1);
396   std::strcpy(Stable, Borrowed.begin());
397 
398   return {Stable, Borrowed.size()};
399 }
400 
401 SpecialTableSymbolNode *
402 Demangler::demangleSpecialTableSymbolNode(StringView &MangledName,
403                                           SpecialIntrinsicKind K) {
404   NamedIdentifierNode *NI = Arena.alloc<NamedIdentifierNode>();
405   switch (K) {
406   case SpecialIntrinsicKind::Vftable:
407     NI->Name = "`vftable'";
408     break;
409   case SpecialIntrinsicKind::Vbtable:
410     NI->Name = "`vbtable'";
411     break;
412   case SpecialIntrinsicKind::LocalVftable:
413     NI->Name = "`local vftable'";
414     break;
415   case SpecialIntrinsicKind::RttiCompleteObjLocator:
416     NI->Name = "`RTTI Complete Object Locator'";
417     break;
418   default:
419     LLVM_BUILTIN_UNREACHABLE;
420   }
421   QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI);
422   SpecialTableSymbolNode *STSN = Arena.alloc<SpecialTableSymbolNode>();
423   STSN->Name = QN;
424   bool IsMember = false;
425   char Front = MangledName.popFront();
426   if (Front != '6' && Front != '7') {
427     Error = true;
428     return nullptr;
429   }
430 
431   std::tie(STSN->Quals, IsMember) = demangleQualifiers(MangledName);
432   if (!MangledName.consumeFront('@'))
433     STSN->TargetName = demangleFullyQualifiedTypeName(MangledName);
434   return STSN;
435 }
436 
437 LocalStaticGuardVariableNode *
438 Demangler::demangleLocalStaticGuard(StringView &MangledName) {
439   LocalStaticGuardIdentifierNode *LSGI =
440       Arena.alloc<LocalStaticGuardIdentifierNode>();
441   QualifiedNameNode *QN = demangleNameScopeChain(MangledName, LSGI);
442   LocalStaticGuardVariableNode *LSGVN =
443       Arena.alloc<LocalStaticGuardVariableNode>();
444   LSGVN->Name = QN;
445 
446   if (MangledName.consumeFront("4IA"))
447     LSGVN->IsVisible = false;
448   else if (MangledName.consumeFront("5"))
449     LSGVN->IsVisible = true;
450   else {
451     Error = true;
452     return nullptr;
453   }
454 
455   if (!MangledName.empty())
456     LSGI->ScopeIndex = demangleUnsigned(MangledName);
457   return LSGVN;
458 }
459 
460 static NamedIdentifierNode *synthesizeNamedIdentifier(ArenaAllocator &Arena,
461                                                       StringView Name) {
462   NamedIdentifierNode *Id = Arena.alloc<NamedIdentifierNode>();
463   Id->Name = Name;
464   return Id;
465 }
466 
467 static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena,
468                                                   IdentifierNode *Identifier) {
469   QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>();
470   QN->Components = Arena.alloc<NodeArrayNode>();
471   QN->Components->Count = 1;
472   QN->Components->Nodes = Arena.allocArray<Node *>(1);
473   QN->Components->Nodes[0] = Identifier;
474   return QN;
475 }
476 
477 static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena,
478                                                   StringView Name) {
479   NamedIdentifierNode *Id = synthesizeNamedIdentifier(Arena, Name);
480   return synthesizeQualifiedName(Arena, Id);
481 }
482 
483 static VariableSymbolNode *synthesizeVariable(ArenaAllocator &Arena,
484                                               TypeNode *Type,
485                                               StringView VariableName) {
486   VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
487   VSN->Type = Type;
488   VSN->Name = synthesizeQualifiedName(Arena, VariableName);
489   return VSN;
490 }
491 
492 VariableSymbolNode *Demangler::demangleUntypedVariable(
493     ArenaAllocator &Arena, StringView &MangledName, StringView VariableName) {
494   NamedIdentifierNode *NI = synthesizeNamedIdentifier(Arena, VariableName);
495   QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI);
496   VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
497   VSN->Name = QN;
498   if (MangledName.consumeFront("8"))
499     return VSN;
500 
501   Error = true;
502   return nullptr;
503 }
504 
505 VariableSymbolNode *
506 Demangler::demangleRttiBaseClassDescriptorNode(ArenaAllocator &Arena,
507                                                StringView &MangledName) {
508   RttiBaseClassDescriptorNode *RBCDN =
509       Arena.alloc<RttiBaseClassDescriptorNode>();
510   RBCDN->NVOffset = demangleUnsigned(MangledName);
511   RBCDN->VBPtrOffset = demangleSigned(MangledName);
512   RBCDN->VBTableOffset = demangleUnsigned(MangledName);
513   RBCDN->Flags = demangleUnsigned(MangledName);
514   if (Error)
515     return nullptr;
516 
517   VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
518   VSN->Name = demangleNameScopeChain(MangledName, RBCDN);
519   MangledName.consumeFront('8');
520   return VSN;
521 }
522 
523 FunctionSymbolNode *Demangler::demangleInitFiniStub(StringView &MangledName,
524                                                     bool IsDestructor) {
525   DynamicStructorIdentifierNode *DSIN =
526       Arena.alloc<DynamicStructorIdentifierNode>();
527   DSIN->IsDestructor = IsDestructor;
528 
529   bool IsKnownStaticDataMember = false;
530   if (MangledName.consumeFront('?'))
531     IsKnownStaticDataMember = true;
532 
533   QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName);
534 
535   SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN);
536   FunctionSymbolNode *FSN = nullptr;
537   Symbol->Name = QN;
538 
539   if (Symbol->kind() == NodeKind::VariableSymbol) {
540     DSIN->Variable = static_cast<VariableSymbolNode *>(Symbol);
541 
542     // Older versions of clang mangled this type of symbol incorrectly.  They
543     // would omit the leading ? and they would only emit a single @ at the end.
544     // The correct mangling is a leading ? and 2 trailing @ signs.  Handle
545     // both cases.
546     int AtCount = IsKnownStaticDataMember ? 2 : 1;
547     for (int I = 0; I < AtCount; ++I) {
548       if (MangledName.consumeFront('@'))
549         continue;
550       Error = true;
551       return nullptr;
552     }
553 
554     FSN = demangleFunctionEncoding(MangledName);
555     FSN->Name = synthesizeQualifiedName(Arena, DSIN);
556   } else {
557     if (IsKnownStaticDataMember) {
558       // This was supposed to be a static data member, but we got a function.
559       Error = true;
560       return nullptr;
561     }
562 
563     FSN = static_cast<FunctionSymbolNode *>(Symbol);
564     DSIN->Name = Symbol->Name;
565     FSN->Name = synthesizeQualifiedName(Arena, DSIN);
566   }
567 
568   return FSN;
569 }
570 
571 SymbolNode *Demangler::demangleSpecialIntrinsic(StringView &MangledName) {
572   SpecialIntrinsicKind SIK = consumeSpecialIntrinsicKind(MangledName);
573   if (SIK == SpecialIntrinsicKind::None)
574     return nullptr;
575 
576   switch (SIK) {
577   case SpecialIntrinsicKind::StringLiteralSymbol:
578     return demangleStringLiteral(MangledName);
579   case SpecialIntrinsicKind::Vftable:
580   case SpecialIntrinsicKind::Vbtable:
581   case SpecialIntrinsicKind::LocalVftable:
582   case SpecialIntrinsicKind::RttiCompleteObjLocator:
583     return demangleSpecialTableSymbolNode(MangledName, SIK);
584   case SpecialIntrinsicKind::VcallThunk:
585     return demangleVcallThunkNode(MangledName);
586   case SpecialIntrinsicKind::LocalStaticGuard:
587     return demangleLocalStaticGuard(MangledName);
588   case SpecialIntrinsicKind::RttiTypeDescriptor: {
589     TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result);
590     if (Error)
591       break;
592     if (!MangledName.consumeFront("@8"))
593       break;
594     if (!MangledName.empty())
595       break;
596     return synthesizeVariable(Arena, T, "`RTTI Type Descriptor'");
597   }
598   case SpecialIntrinsicKind::RttiBaseClassArray:
599     return demangleUntypedVariable(Arena, MangledName,
600                                    "`RTTI Base Class Array'");
601   case SpecialIntrinsicKind::RttiClassHierarchyDescriptor:
602     return demangleUntypedVariable(Arena, MangledName,
603                                    "`RTTI Class Hierarchy Descriptor'");
604   case SpecialIntrinsicKind::RttiBaseClassDescriptor:
605     return demangleRttiBaseClassDescriptorNode(Arena, MangledName);
606   case SpecialIntrinsicKind::DynamicInitializer:
607     return demangleInitFiniStub(MangledName, false);
608   case SpecialIntrinsicKind::DynamicAtexitDestructor:
609     return demangleInitFiniStub(MangledName, true);
610   default:
611     break;
612   }
613   Error = true;
614   return nullptr;
615 }
616 
617 IdentifierNode *
618 Demangler::demangleFunctionIdentifierCode(StringView &MangledName) {
619   assert(MangledName.startsWith('?'));
620   MangledName = MangledName.dropFront();
621 
622   if (MangledName.consumeFront("__"))
623     return demangleFunctionIdentifierCode(
624         MangledName, FunctionIdentifierCodeGroup::DoubleUnder);
625   else if (MangledName.consumeFront("_"))
626     return demangleFunctionIdentifierCode(MangledName,
627                                           FunctionIdentifierCodeGroup::Under);
628   return demangleFunctionIdentifierCode(MangledName,
629                                         FunctionIdentifierCodeGroup::Basic);
630 }
631 
632 StructorIdentifierNode *
633 Demangler::demangleStructorIdentifier(StringView &MangledName,
634                                       bool IsDestructor) {
635   StructorIdentifierNode *N = Arena.alloc<StructorIdentifierNode>();
636   N->IsDestructor = IsDestructor;
637   return N;
638 }
639 
640 ConversionOperatorIdentifierNode *
641 Demangler::demangleConversionOperatorIdentifier(StringView &MangledName) {
642   ConversionOperatorIdentifierNode *N =
643       Arena.alloc<ConversionOperatorIdentifierNode>();
644   return N;
645 }
646 
647 LiteralOperatorIdentifierNode *
648 Demangler::demangleLiteralOperatorIdentifier(StringView &MangledName) {
649   LiteralOperatorIdentifierNode *N =
650       Arena.alloc<LiteralOperatorIdentifierNode>();
651   N->Name = demangleSimpleString(MangledName, false);
652   return N;
653 }
654 
655 IntrinsicFunctionKind
656 translateIntrinsicFunctionCode(char CH, FunctionIdentifierCodeGroup Group) {
657   // Not all ? identifiers are intrinsics *functions*.  This function only maps
658   // operator codes for the special functions, all others are handled elsewhere,
659   // hence the IFK::None entries in the table.
660   using IFK = IntrinsicFunctionKind;
661   static IFK Basic[36] = {
662       IFK::None,             // ?0 # Foo::Foo()
663       IFK::None,             // ?1 # Foo::~Foo()
664       IFK::New,              // ?2 # operator new
665       IFK::Delete,           // ?3 # operator delete
666       IFK::Assign,           // ?4 # operator=
667       IFK::RightShift,       // ?5 # operator>>
668       IFK::LeftShift,        // ?6 # operator<<
669       IFK::LogicalNot,       // ?7 # operator!
670       IFK::Equals,           // ?8 # operator==
671       IFK::NotEquals,        // ?9 # operator!=
672       IFK::ArraySubscript,   // ?A # operator[]
673       IFK::None,             // ?B # Foo::operator <type>()
674       IFK::Pointer,          // ?C # operator->
675       IFK::Dereference,      // ?D # operator*
676       IFK::Increment,        // ?E # operator++
677       IFK::Decrement,        // ?F # operator--
678       IFK::Minus,            // ?G # operator-
679       IFK::Plus,             // ?H # operator+
680       IFK::BitwiseAnd,       // ?I # operator&
681       IFK::MemberPointer,    // ?J # operator->*
682       IFK::Divide,           // ?K # operator/
683       IFK::Modulus,          // ?L # operator%
684       IFK::LessThan,         // ?M operator<
685       IFK::LessThanEqual,    // ?N operator<=
686       IFK::GreaterThan,      // ?O operator>
687       IFK::GreaterThanEqual, // ?P operator>=
688       IFK::Comma,            // ?Q operator,
689       IFK::Parens,           // ?R operator()
690       IFK::BitwiseNot,       // ?S operator~
691       IFK::BitwiseXor,       // ?T operator^
692       IFK::BitwiseOr,        // ?U operator|
693       IFK::LogicalAnd,       // ?V operator&&
694       IFK::LogicalOr,        // ?W operator||
695       IFK::TimesEqual,       // ?X operator*=
696       IFK::PlusEqual,        // ?Y operator+=
697       IFK::MinusEqual,       // ?Z operator-=
698   };
699   static IFK Under[36] = {
700       IFK::DivEqual,           // ?_0 operator/=
701       IFK::ModEqual,           // ?_1 operator%=
702       IFK::RshEqual,           // ?_2 operator>>=
703       IFK::LshEqual,           // ?_3 operator<<=
704       IFK::BitwiseAndEqual,    // ?_4 operator&=
705       IFK::BitwiseOrEqual,     // ?_5 operator|=
706       IFK::BitwiseXorEqual,    // ?_6 operator^=
707       IFK::None,               // ?_7 # vftable
708       IFK::None,               // ?_8 # vbtable
709       IFK::None,               // ?_9 # vcall
710       IFK::None,               // ?_A # typeof
711       IFK::None,               // ?_B # local static guard
712       IFK::None,               // ?_C # string literal
713       IFK::VbaseDtor,          // ?_D # vbase destructor
714       IFK::VecDelDtor,         // ?_E # vector deleting destructor
715       IFK::DefaultCtorClosure, // ?_F # default constructor closure
716       IFK::ScalarDelDtor,      // ?_G # scalar deleting destructor
717       IFK::VecCtorIter,        // ?_H # vector constructor iterator
718       IFK::VecDtorIter,        // ?_I # vector destructor iterator
719       IFK::VecVbaseCtorIter,   // ?_J # vector vbase constructor iterator
720       IFK::VdispMap,           // ?_K # virtual displacement map
721       IFK::EHVecCtorIter,      // ?_L # eh vector constructor iterator
722       IFK::EHVecDtorIter,      // ?_M # eh vector destructor iterator
723       IFK::EHVecVbaseCtorIter, // ?_N # eh vector vbase constructor iterator
724       IFK::CopyCtorClosure,    // ?_O # copy constructor closure
725       IFK::None,               // ?_P<name> # udt returning <name>
726       IFK::None,               // ?_Q # <unknown>
727       IFK::None,               // ?_R0 - ?_R4 # RTTI Codes
728       IFK::None,               // ?_S # local vftable
729       IFK::LocalVftableCtorClosure, // ?_T # local vftable constructor closure
730       IFK::ArrayNew,                // ?_U operator new[]
731       IFK::ArrayDelete,             // ?_V operator delete[]
732       IFK::None,                    // ?_W <unused>
733       IFK::None,                    // ?_X <unused>
734       IFK::None,                    // ?_Y <unused>
735       IFK::None,                    // ?_Z <unused>
736   };
737   static IFK DoubleUnder[36] = {
738       IFK::None,                       // ?__0 <unused>
739       IFK::None,                       // ?__1 <unused>
740       IFK::None,                       // ?__2 <unused>
741       IFK::None,                       // ?__3 <unused>
742       IFK::None,                       // ?__4 <unused>
743       IFK::None,                       // ?__5 <unused>
744       IFK::None,                       // ?__6 <unused>
745       IFK::None,                       // ?__7 <unused>
746       IFK::None,                       // ?__8 <unused>
747       IFK::None,                       // ?__9 <unused>
748       IFK::ManVectorCtorIter,          // ?__A managed vector ctor iterator
749       IFK::ManVectorDtorIter,          // ?__B managed vector dtor iterator
750       IFK::EHVectorCopyCtorIter,       // ?__C EH vector copy ctor iterator
751       IFK::EHVectorVbaseCopyCtorIter,  // ?__D EH vector vbase copy ctor iter
752       IFK::None,                       // ?__E dynamic initializer for `T'
753       IFK::None,                       // ?__F dynamic atexit destructor for `T'
754       IFK::VectorCopyCtorIter,         // ?__G vector copy constructor iter
755       IFK::VectorVbaseCopyCtorIter,    // ?__H vector vbase copy ctor iter
756       IFK::ManVectorVbaseCopyCtorIter, // ?__I managed vector vbase copy ctor
757                                        // iter
758       IFK::None,                       // ?__J local static thread guard
759       IFK::None,                       // ?__K operator ""_name
760       IFK::CoAwait,                    // ?__L co_await
761       IFK::None,                       // ?__M <unused>
762       IFK::None,                       // ?__N <unused>
763       IFK::None,                       // ?__O <unused>
764       IFK::None,                       // ?__P <unused>
765       IFK::None,                       // ?__Q <unused>
766       IFK::None,                       // ?__R <unused>
767       IFK::None,                       // ?__S <unused>
768       IFK::None,                       // ?__T <unused>
769       IFK::None,                       // ?__U <unused>
770       IFK::None,                       // ?__V <unused>
771       IFK::None,                       // ?__W <unused>
772       IFK::None,                       // ?__X <unused>
773       IFK::None,                       // ?__Y <unused>
774       IFK::None,                       // ?__Z <unused>
775   };
776 
777   int Index = (CH >= '0' && CH <= '9') ? (CH - '0') : (CH - 'A' + 10);
778   switch (Group) {
779   case FunctionIdentifierCodeGroup::Basic:
780     return Basic[Index];
781   case FunctionIdentifierCodeGroup::Under:
782     return Under[Index];
783   case FunctionIdentifierCodeGroup::DoubleUnder:
784     return DoubleUnder[Index];
785   }
786   LLVM_BUILTIN_UNREACHABLE;
787 }
788 
789 IdentifierNode *
790 Demangler::demangleFunctionIdentifierCode(StringView &MangledName,
791                                           FunctionIdentifierCodeGroup Group) {
792   switch (Group) {
793   case FunctionIdentifierCodeGroup::Basic:
794     switch (char CH = MangledName.popFront()) {
795     case '0':
796     case '1':
797       return demangleStructorIdentifier(MangledName, CH == '1');
798     case 'B':
799       return demangleConversionOperatorIdentifier(MangledName);
800     default:
801       return Arena.alloc<IntrinsicFunctionIdentifierNode>(
802           translateIntrinsicFunctionCode(CH, Group));
803     }
804     break;
805   case FunctionIdentifierCodeGroup::Under:
806     return Arena.alloc<IntrinsicFunctionIdentifierNode>(
807         translateIntrinsicFunctionCode(MangledName.popFront(), Group));
808   case FunctionIdentifierCodeGroup::DoubleUnder:
809     switch (char CH = MangledName.popFront()) {
810     case 'K':
811       return demangleLiteralOperatorIdentifier(MangledName);
812     default:
813       return Arena.alloc<IntrinsicFunctionIdentifierNode>(
814           translateIntrinsicFunctionCode(CH, Group));
815     }
816   }
817   // No Mangling Yet:      Spaceship,                    // operator<=>
818 
819   return nullptr;
820 }
821 
822 SymbolNode *Demangler::demangleEncodedSymbol(StringView &MangledName,
823                                              QualifiedNameNode *Name) {
824   // Read a variable.
825   switch (MangledName.front()) {
826   case '0':
827   case '1':
828   case '2':
829   case '3':
830   case '4': {
831     StorageClass SC = demangleVariableStorageClass(MangledName);
832     return demangleVariableEncoding(MangledName, SC);
833   }
834   case '8':
835     return nullptr;
836   }
837   FunctionSymbolNode *FSN = demangleFunctionEncoding(MangledName);
838 
839   IdentifierNode *UQN = Name->getUnqualifiedIdentifier();
840   if (UQN->kind() == NodeKind::ConversionOperatorIdentifier) {
841     ConversionOperatorIdentifierNode *COIN =
842         static_cast<ConversionOperatorIdentifierNode *>(UQN);
843     COIN->TargetType = FSN->Signature->ReturnType;
844   }
845   return FSN;
846 }
847 
848 // Parser entry point.
849 SymbolNode *Demangler::parse(StringView &MangledName) {
850   // We can't demangle MD5 names, just output them as-is.
851   // Also, MSVC-style mangled symbols must start with '?'.
852   if (MangledName.startsWith("??@")) {
853     // This is an MD5 mangled name.  We can't demangle it, just return the
854     // mangled name.
855     SymbolNode *S = Arena.alloc<SymbolNode>(NodeKind::Md5Symbol);
856     S->Name = synthesizeQualifiedName(Arena, MangledName);
857     return S;
858   }
859 
860   if (!MangledName.startsWith('?')) {
861     Error = true;
862     return nullptr;
863   }
864 
865   MangledName.consumeFront('?');
866 
867   // ?$ is a template instantiation, but all other names that start with ? are
868   // operators / special names.
869   if (SymbolNode *SI = demangleSpecialIntrinsic(MangledName))
870     return SI;
871 
872   // What follows is a main symbol name. This may include namespaces or class
873   // back references.
874   QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName);
875   if (Error)
876     return nullptr;
877 
878   SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN);
879   if (Symbol) {
880     Symbol->Name = QN;
881   }
882 
883   if (Error)
884     return nullptr;
885 
886   return Symbol;
887 }
888 
889 // <type-encoding> ::= <storage-class> <variable-type>
890 // <storage-class> ::= 0  # private static member
891 //                 ::= 1  # protected static member
892 //                 ::= 2  # public static member
893 //                 ::= 3  # global
894 //                 ::= 4  # static local
895 
896 VariableSymbolNode *Demangler::demangleVariableEncoding(StringView &MangledName,
897                                                         StorageClass SC) {
898   VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
899 
900   VSN->Type = demangleType(MangledName, QualifierMangleMode::Drop);
901   VSN->SC = SC;
902 
903   // <variable-type> ::= <type> <cvr-qualifiers>
904   //                 ::= <type> <pointee-cvr-qualifiers> # pointers, references
905   switch (VSN->Type->kind()) {
906   case NodeKind::PointerType: {
907     PointerTypeNode *PTN = static_cast<PointerTypeNode *>(VSN->Type);
908 
909     Qualifiers ExtraChildQuals = Q_None;
910     PTN->Quals = Qualifiers(VSN->Type->Quals |
911                             demanglePointerExtQualifiers(MangledName));
912 
913     bool IsMember = false;
914     std::tie(ExtraChildQuals, IsMember) = demangleQualifiers(MangledName);
915 
916     if (PTN->ClassParent) {
917       QualifiedNameNode *BackRefName =
918           demangleFullyQualifiedTypeName(MangledName);
919       (void)BackRefName;
920     }
921     PTN->Pointee->Quals = Qualifiers(PTN->Pointee->Quals | ExtraChildQuals);
922 
923     break;
924   }
925   default:
926     VSN->Type->Quals = demangleQualifiers(MangledName).first;
927     break;
928   }
929 
930   return VSN;
931 }
932 
933 // Sometimes numbers are encoded in mangled symbols. For example,
934 // "int (*x)[20]" is a valid C type (x is a pointer to an array of
935 // length 20), so we need some way to embed numbers as part of symbols.
936 // This function parses it.
937 //
938 // <number>               ::= [?] <non-negative integer>
939 //
940 // <non-negative integer> ::= <decimal digit> # when 1 <= Number <= 10
941 //                        ::= <hex digit>+ @  # when Numbrer == 0 or >= 10
942 //
943 // <hex-digit>            ::= [A-P]           # A = 0, B = 1, ...
944 std::pair<uint64_t, bool> Demangler::demangleNumber(StringView &MangledName) {
945   bool IsNegative = MangledName.consumeFront('?');
946 
947   if (startsWithDigit(MangledName)) {
948     uint64_t Ret = MangledName[0] - '0' + 1;
949     MangledName = MangledName.dropFront(1);
950     return {Ret, IsNegative};
951   }
952 
953   uint64_t Ret = 0;
954   for (size_t i = 0; i < MangledName.size(); ++i) {
955     char C = MangledName[i];
956     if (C == '@') {
957       MangledName = MangledName.dropFront(i + 1);
958       return {Ret, IsNegative};
959     }
960     if ('A' <= C && C <= 'P') {
961       Ret = (Ret << 4) + (C - 'A');
962       continue;
963     }
964     break;
965   }
966 
967   Error = true;
968   return {0ULL, false};
969 }
970 
971 uint64_t Demangler::demangleUnsigned(StringView &MangledName) {
972   bool IsNegative = false;
973   uint64_t Number = 0;
974   std::tie(Number, IsNegative) = demangleNumber(MangledName);
975   if (IsNegative)
976     Error = true;
977   return Number;
978 }
979 
980 int64_t Demangler::demangleSigned(StringView &MangledName) {
981   bool IsNegative = false;
982   uint64_t Number = 0;
983   std::tie(Number, IsNegative) = demangleNumber(MangledName);
984   if (Number > INT64_MAX)
985     Error = true;
986   int64_t I = static_cast<int64_t>(Number);
987   return IsNegative ? -I : I;
988 }
989 
990 // First 10 strings can be referenced by special BackReferences ?0, ?1, ..., ?9.
991 // Memorize it.
992 void Demangler::memorizeString(StringView S) {
993   if (Backrefs.NamesCount >= BackrefContext::Max)
994     return;
995   for (size_t i = 0; i < Backrefs.NamesCount; ++i)
996     if (S == Backrefs.Names[i]->Name)
997       return;
998   NamedIdentifierNode *N = Arena.alloc<NamedIdentifierNode>();
999   N->Name = S;
1000   Backrefs.Names[Backrefs.NamesCount++] = N;
1001 }
1002 
1003 NamedIdentifierNode *Demangler::demangleBackRefName(StringView &MangledName) {
1004   assert(startsWithDigit(MangledName));
1005 
1006   size_t I = MangledName[0] - '0';
1007   if (I >= Backrefs.NamesCount) {
1008     Error = true;
1009     return nullptr;
1010   }
1011 
1012   MangledName = MangledName.dropFront();
1013   return Backrefs.Names[I];
1014 }
1015 
1016 void Demangler::memorizeIdentifier(IdentifierNode *Identifier) {
1017   // Render this class template name into a string buffer so that we can
1018   // memorize it for the purpose of back-referencing.
1019   OutputStream OS;
1020   if (initializeOutputStream(nullptr, nullptr, OS, 1024))
1021     // FIXME: Propagate out-of-memory as an error?
1022     std::terminate();
1023   Identifier->output(OS, OF_Default);
1024   OS << '\0';
1025   char *Name = OS.getBuffer();
1026 
1027   StringView Owned = copyString(Name);
1028   memorizeString(Owned);
1029   std::free(Name);
1030 }
1031 
1032 IdentifierNode *
1033 Demangler::demangleTemplateInstantiationName(StringView &MangledName,
1034                                              NameBackrefBehavior NBB) {
1035   assert(MangledName.startsWith("?$"));
1036   MangledName.consumeFront("?$");
1037 
1038   BackrefContext OuterContext;
1039   std::swap(OuterContext, Backrefs);
1040 
1041   IdentifierNode *Identifier =
1042       demangleUnqualifiedSymbolName(MangledName, NBB_Simple);
1043   if (!Error)
1044     Identifier->TemplateParams = demangleTemplateParameterList(MangledName);
1045 
1046   std::swap(OuterContext, Backrefs);
1047   if (Error)
1048     return nullptr;
1049 
1050   if (NBB & NBB_Template)
1051     memorizeIdentifier(Identifier);
1052 
1053   return Identifier;
1054 }
1055 
1056 NamedIdentifierNode *Demangler::demangleSimpleName(StringView &MangledName,
1057                                                    bool Memorize) {
1058   StringView S = demangleSimpleString(MangledName, Memorize);
1059   if (Error)
1060     return nullptr;
1061 
1062   NamedIdentifierNode *Name = Arena.alloc<NamedIdentifierNode>();
1063   Name->Name = S;
1064   return Name;
1065 }
1066 
1067 static bool isRebasedHexDigit(char C) { return (C >= 'A' && C <= 'P'); }
1068 
1069 static uint8_t rebasedHexDigitToNumber(char C) {
1070   assert(isRebasedHexDigit(C));
1071   return (C <= 'J') ? (C - 'A') : (10 + C - 'K');
1072 }
1073 
1074 uint8_t Demangler::demangleCharLiteral(StringView &MangledName) {
1075   if (!MangledName.startsWith('?'))
1076     return MangledName.popFront();
1077 
1078   MangledName = MangledName.dropFront();
1079   if (MangledName.empty())
1080     goto CharLiteralError;
1081 
1082   if (MangledName.consumeFront('$')) {
1083     // Two hex digits
1084     if (MangledName.size() < 2)
1085       goto CharLiteralError;
1086     StringView Nibbles = MangledName.substr(0, 2);
1087     if (!isRebasedHexDigit(Nibbles[0]) || !isRebasedHexDigit(Nibbles[1]))
1088       goto CharLiteralError;
1089     // Don't append the null terminator.
1090     uint8_t C1 = rebasedHexDigitToNumber(Nibbles[0]);
1091     uint8_t C2 = rebasedHexDigitToNumber(Nibbles[1]);
1092     MangledName = MangledName.dropFront(2);
1093     return (C1 << 4) | C2;
1094   }
1095 
1096   if (startsWithDigit(MangledName)) {
1097     const char *Lookup = ",/\\:. \n\t'-";
1098     char C = Lookup[MangledName[0] - '0'];
1099     MangledName = MangledName.dropFront();
1100     return C;
1101   }
1102 
1103   if (MangledName[0] >= 'a' && MangledName[0] <= 'z') {
1104     char Lookup[26] = {'\xE1', '\xE2', '\xE3', '\xE4', '\xE5', '\xE6', '\xE7',
1105                        '\xE8', '\xE9', '\xEA', '\xEB', '\xEC', '\xED', '\xEE',
1106                        '\xEF', '\xF0', '\xF1', '\xF2', '\xF3', '\xF4', '\xF5',
1107                        '\xF6', '\xF7', '\xF8', '\xF9', '\xFA'};
1108     char C = Lookup[MangledName[0] - 'a'];
1109     MangledName = MangledName.dropFront();
1110     return C;
1111   }
1112 
1113   if (MangledName[0] >= 'A' && MangledName[0] <= 'Z') {
1114     char Lookup[26] = {'\xC1', '\xC2', '\xC3', '\xC4', '\xC5', '\xC6', '\xC7',
1115                        '\xC8', '\xC9', '\xCA', '\xCB', '\xCC', '\xCD', '\xCE',
1116                        '\xCF', '\xD0', '\xD1', '\xD2', '\xD3', '\xD4', '\xD5',
1117                        '\xD6', '\xD7', '\xD8', '\xD9', '\xDA'};
1118     char C = Lookup[MangledName[0] - 'A'];
1119     MangledName = MangledName.dropFront();
1120     return C;
1121   }
1122 
1123 CharLiteralError:
1124   Error = true;
1125   return '\0';
1126 }
1127 
1128 wchar_t Demangler::demangleWcharLiteral(StringView &MangledName) {
1129   uint8_t C1, C2;
1130 
1131   C1 = demangleCharLiteral(MangledName);
1132   if (Error)
1133     goto WCharLiteralError;
1134   C2 = demangleCharLiteral(MangledName);
1135   if (Error)
1136     goto WCharLiteralError;
1137 
1138   return ((wchar_t)C1 << 8) | (wchar_t)C2;
1139 
1140 WCharLiteralError:
1141   Error = true;
1142   return L'\0';
1143 }
1144 
1145 static void writeHexDigit(char *Buffer, uint8_t Digit) {
1146   assert(Digit <= 15);
1147   *Buffer = (Digit < 10) ? ('0' + Digit) : ('A' + Digit - 10);
1148 }
1149 
1150 static void outputHex(OutputStream &OS, unsigned C) {
1151   if (C == 0) {
1152     OS << "\\x00";
1153     return;
1154   }
1155   // It's easier to do the math if we can work from right to left, but we need
1156   // to print the numbers from left to right.  So render this into a temporary
1157   // buffer first, then output the temporary buffer.  Each byte is of the form
1158   // \xAB, which means that each byte needs 4 characters.  Since there are at
1159   // most 4 bytes, we need a 4*4+1 = 17 character temporary buffer.
1160   char TempBuffer[17];
1161 
1162   ::memset(TempBuffer, 0, sizeof(TempBuffer));
1163   constexpr int MaxPos = 15;
1164 
1165   int Pos = MaxPos - 1;
1166   while (C != 0) {
1167     for (int I = 0; I < 2; ++I) {
1168       writeHexDigit(&TempBuffer[Pos--], C % 16);
1169       C /= 16;
1170     }
1171     TempBuffer[Pos--] = 'x';
1172     TempBuffer[Pos--] = '\\';
1173     assert(Pos >= 0);
1174   }
1175   OS << StringView(&TempBuffer[Pos + 1]);
1176 }
1177 
1178 static void outputEscapedChar(OutputStream &OS, unsigned C) {
1179   switch (C) {
1180   case '\'': // single quote
1181     OS << "\\\'";
1182     return;
1183   case '\"': // double quote
1184     OS << "\\\"";
1185     return;
1186   case '\\': // backslash
1187     OS << "\\\\";
1188     return;
1189   case '\a': // bell
1190     OS << "\\a";
1191     return;
1192   case '\b': // backspace
1193     OS << "\\b";
1194     return;
1195   case '\f': // form feed
1196     OS << "\\f";
1197     return;
1198   case '\n': // new line
1199     OS << "\\n";
1200     return;
1201   case '\r': // carriage return
1202     OS << "\\r";
1203     return;
1204   case '\t': // tab
1205     OS << "\\t";
1206     return;
1207   case '\v': // vertical tab
1208     OS << "\\v";
1209     return;
1210   default:
1211     break;
1212   }
1213 
1214   if (C > 0x1F && C < 0x7F) {
1215     // Standard ascii char.
1216     OS << (char)C;
1217     return;
1218   }
1219 
1220   outputHex(OS, C);
1221 }
1222 
1223 unsigned countTrailingNullBytes(const uint8_t *StringBytes, int Length) {
1224   const uint8_t *End = StringBytes + Length - 1;
1225   unsigned Count = 0;
1226   while (Length > 0 && *End == 0) {
1227     --Length;
1228     --End;
1229     ++Count;
1230   }
1231   return Count;
1232 }
1233 
1234 unsigned countEmbeddedNulls(const uint8_t *StringBytes, unsigned Length) {
1235   unsigned Result = 0;
1236   for (unsigned I = 0; I < Length; ++I) {
1237     if (*StringBytes++ == 0)
1238       ++Result;
1239   }
1240   return Result;
1241 }
1242 
1243 unsigned guessCharByteSize(const uint8_t *StringBytes, unsigned NumChars,
1244                            unsigned NumBytes) {
1245   assert(NumBytes > 0);
1246 
1247   // If the number of bytes is odd, this is guaranteed to be a char string.
1248   if (NumBytes % 2 == 1)
1249     return 1;
1250 
1251   // All strings can encode at most 32 bytes of data.  If it's less than that,
1252   // then we encoded the entire string.  In this case we check for a 1-byte,
1253   // 2-byte, or 4-byte null terminator.
1254   if (NumBytes < 32) {
1255     unsigned TrailingNulls = countTrailingNullBytes(StringBytes, NumChars);
1256     if (TrailingNulls >= 4)
1257       return 4;
1258     if (TrailingNulls >= 2)
1259       return 2;
1260     return 1;
1261   }
1262 
1263   // The whole string was not able to be encoded.  Try to look at embedded null
1264   // terminators to guess.  The heuristic is that we count all embedded null
1265   // terminators.  If more than 2/3 are null, it's a char32.  If more than 1/3
1266   // are null, it's a char16.  Otherwise it's a char8.  This obviously isn't
1267   // perfect and is biased towards languages that have ascii alphabets, but this
1268   // was always going to be best effort since the encoding is lossy.
1269   unsigned Nulls = countEmbeddedNulls(StringBytes, NumChars);
1270   if (Nulls >= 2 * NumChars / 3)
1271     return 4;
1272   if (Nulls >= NumChars / 3)
1273     return 2;
1274   return 1;
1275 }
1276 
1277 static unsigned decodeMultiByteChar(const uint8_t *StringBytes,
1278                                     unsigned CharIndex, unsigned CharBytes) {
1279   assert(CharBytes == 1 || CharBytes == 2 || CharBytes == 4);
1280   unsigned Offset = CharIndex * CharBytes;
1281   unsigned Result = 0;
1282   StringBytes = StringBytes + Offset;
1283   for (unsigned I = 0; I < CharBytes; ++I) {
1284     unsigned C = static_cast<unsigned>(StringBytes[I]);
1285     Result |= C << (8 * I);
1286   }
1287   return Result;
1288 }
1289 
1290 FunctionSymbolNode *Demangler::demangleVcallThunkNode(StringView &MangledName) {
1291   FunctionSymbolNode *FSN = Arena.alloc<FunctionSymbolNode>();
1292   VcallThunkIdentifierNode *VTIN = Arena.alloc<VcallThunkIdentifierNode>();
1293   FSN->Signature = Arena.alloc<ThunkSignatureNode>();
1294   FSN->Signature->FunctionClass = FC_NoParameterList;
1295 
1296   FSN->Name = demangleNameScopeChain(MangledName, VTIN);
1297   if (!Error)
1298     Error = !MangledName.consumeFront("$B");
1299   if (!Error)
1300     VTIN->OffsetInVTable = demangleUnsigned(MangledName);
1301   if (!Error)
1302     Error = !MangledName.consumeFront('A');
1303   if (!Error)
1304     FSN->Signature->CallConvention = demangleCallingConvention(MangledName);
1305   return (Error) ? nullptr : FSN;
1306 }
1307 
1308 EncodedStringLiteralNode *
1309 Demangler::demangleStringLiteral(StringView &MangledName) {
1310   // This function uses goto, so declare all variables up front.
1311   OutputStream OS;
1312   StringView CRC;
1313   uint64_t StringByteSize;
1314   bool IsWcharT = false;
1315   bool IsNegative = false;
1316   size_t CrcEndPos = 0;
1317   char *ResultBuffer = nullptr;
1318 
1319   EncodedStringLiteralNode *Result = Arena.alloc<EncodedStringLiteralNode>();
1320 
1321   // Prefix indicating the beginning of a string literal
1322   if (!MangledName.consumeFront("@_"))
1323     goto StringLiteralError;
1324   if (MangledName.empty())
1325     goto StringLiteralError;
1326 
1327   // Char Type (regular or wchar_t)
1328   switch (MangledName.popFront()) {
1329   case '1':
1330     IsWcharT = true;
1331     LLVM_FALLTHROUGH;
1332   case '0':
1333     break;
1334   default:
1335     goto StringLiteralError;
1336   }
1337 
1338   // Encoded Length
1339   std::tie(StringByteSize, IsNegative) = demangleNumber(MangledName);
1340   if (Error || IsNegative)
1341     goto StringLiteralError;
1342 
1343   // CRC 32 (always 8 characters plus a terminator)
1344   CrcEndPos = MangledName.find('@');
1345   if (CrcEndPos == StringView::npos)
1346     goto StringLiteralError;
1347   CRC = MangledName.substr(0, CrcEndPos);
1348   MangledName = MangledName.dropFront(CrcEndPos + 1);
1349   if (MangledName.empty())
1350     goto StringLiteralError;
1351 
1352   if (initializeOutputStream(nullptr, nullptr, OS, 1024))
1353     // FIXME: Propagate out-of-memory as an error?
1354     std::terminate();
1355   if (IsWcharT) {
1356     Result->Char = CharKind::Wchar;
1357     if (StringByteSize > 64)
1358       Result->IsTruncated = true;
1359 
1360     while (!MangledName.consumeFront('@')) {
1361       assert(StringByteSize >= 2);
1362       wchar_t W = demangleWcharLiteral(MangledName);
1363       if (StringByteSize != 2 || Result->IsTruncated)
1364         outputEscapedChar(OS, W);
1365       StringByteSize -= 2;
1366       if (Error)
1367         goto StringLiteralError;
1368     }
1369   } else {
1370     // The max byte length is actually 32, but some compilers mangled strings
1371     // incorrectly, so we have to assume it can go higher.
1372     constexpr unsigned MaxStringByteLength = 32 * 4;
1373     uint8_t StringBytes[MaxStringByteLength];
1374 
1375     unsigned BytesDecoded = 0;
1376     while (!MangledName.consumeFront('@')) {
1377       assert(StringByteSize >= 1);
1378       StringBytes[BytesDecoded++] = demangleCharLiteral(MangledName);
1379     }
1380 
1381     if (StringByteSize > BytesDecoded)
1382       Result->IsTruncated = true;
1383 
1384     unsigned CharBytes =
1385         guessCharByteSize(StringBytes, BytesDecoded, StringByteSize);
1386     assert(StringByteSize % CharBytes == 0);
1387     switch (CharBytes) {
1388     case 1:
1389       Result->Char = CharKind::Char;
1390       break;
1391     case 2:
1392       Result->Char = CharKind::Char16;
1393       break;
1394     case 4:
1395       Result->Char = CharKind::Char32;
1396       break;
1397     default:
1398       LLVM_BUILTIN_UNREACHABLE;
1399     }
1400     const unsigned NumChars = BytesDecoded / CharBytes;
1401     for (unsigned CharIndex = 0; CharIndex < NumChars; ++CharIndex) {
1402       unsigned NextChar =
1403           decodeMultiByteChar(StringBytes, CharIndex, CharBytes);
1404       if (CharIndex + 1 < NumChars || Result->IsTruncated)
1405         outputEscapedChar(OS, NextChar);
1406     }
1407   }
1408 
1409   OS << '\0';
1410   ResultBuffer = OS.getBuffer();
1411   Result->DecodedString = copyString(ResultBuffer);
1412   std::free(ResultBuffer);
1413   return Result;
1414 
1415 StringLiteralError:
1416   Error = true;
1417   return nullptr;
1418 }
1419 
1420 StringView Demangler::demangleSimpleString(StringView &MangledName,
1421                                            bool Memorize) {
1422   StringView S;
1423   for (size_t i = 0; i < MangledName.size(); ++i) {
1424     if (MangledName[i] != '@')
1425       continue;
1426     S = MangledName.substr(0, i);
1427     MangledName = MangledName.dropFront(i + 1);
1428 
1429     if (Memorize)
1430       memorizeString(S);
1431     return S;
1432   }
1433 
1434   Error = true;
1435   return {};
1436 }
1437 
1438 NamedIdentifierNode *
1439 Demangler::demangleAnonymousNamespaceName(StringView &MangledName) {
1440   assert(MangledName.startsWith("?A"));
1441   MangledName.consumeFront("?A");
1442 
1443   NamedIdentifierNode *Node = Arena.alloc<NamedIdentifierNode>();
1444   Node->Name = "`anonymous namespace'";
1445   size_t EndPos = MangledName.find('@');
1446   if (EndPos == StringView::npos) {
1447     Error = true;
1448     return nullptr;
1449   }
1450   StringView NamespaceKey = MangledName.substr(0, EndPos);
1451   memorizeString(NamespaceKey);
1452   MangledName = MangledName.substr(EndPos + 1);
1453   return Node;
1454 }
1455 
1456 NamedIdentifierNode *
1457 Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) {
1458   assert(startsWithLocalScopePattern(MangledName));
1459 
1460   NamedIdentifierNode *Identifier = Arena.alloc<NamedIdentifierNode>();
1461   MangledName.consumeFront('?');
1462   auto Number = demangleNumber(MangledName);
1463   assert(!Number.second);
1464 
1465   // One ? to terminate the number
1466   MangledName.consumeFront('?');
1467 
1468   assert(!Error);
1469   Node *Scope = parse(MangledName);
1470   if (Error)
1471     return nullptr;
1472 
1473   // Render the parent symbol's name into a buffer.
1474   OutputStream OS;
1475   if (initializeOutputStream(nullptr, nullptr, OS, 1024))
1476     // FIXME: Propagate out-of-memory as an error?
1477     std::terminate();
1478   OS << '`';
1479   Scope->output(OS, OF_Default);
1480   OS << '\'';
1481   OS << "::`" << Number.first << "'";
1482   OS << '\0';
1483   char *Result = OS.getBuffer();
1484   Identifier->Name = copyString(Result);
1485   std::free(Result);
1486   return Identifier;
1487 }
1488 
1489 // Parses a type name in the form of A@B@C@@ which represents C::B::A.
1490 QualifiedNameNode *
1491 Demangler::demangleFullyQualifiedTypeName(StringView &MangledName) {
1492   IdentifierNode *Identifier = demangleUnqualifiedTypeName(MangledName, true);
1493   if (Error)
1494     return nullptr;
1495   assert(Identifier);
1496 
1497   QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier);
1498   if (Error)
1499     return nullptr;
1500   assert(QN);
1501   return QN;
1502 }
1503 
1504 // Parses a symbol name in the form of A@B@C@@ which represents C::B::A.
1505 // Symbol names have slightly different rules regarding what can appear
1506 // so we separate out the implementations for flexibility.
1507 QualifiedNameNode *
1508 Demangler::demangleFullyQualifiedSymbolName(StringView &MangledName) {
1509   // This is the final component of a symbol name (i.e. the leftmost component
1510   // of a mangled name.  Since the only possible template instantiation that
1511   // can appear in this context is a function template, and since those are
1512   // not saved for the purposes of name backreferences, only backref simple
1513   // names.
1514   IdentifierNode *Identifier =
1515       demangleUnqualifiedSymbolName(MangledName, NBB_Simple);
1516   if (Error)
1517     return nullptr;
1518 
1519   QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier);
1520   if (Error)
1521     return nullptr;
1522 
1523   if (Identifier->kind() == NodeKind::StructorIdentifier) {
1524     StructorIdentifierNode *SIN =
1525         static_cast<StructorIdentifierNode *>(Identifier);
1526     assert(QN->Components->Count >= 2);
1527     Node *ClassNode = QN->Components->Nodes[QN->Components->Count - 2];
1528     SIN->Class = static_cast<IdentifierNode *>(ClassNode);
1529   }
1530   assert(QN);
1531   return QN;
1532 }
1533 
1534 IdentifierNode *Demangler::demangleUnqualifiedTypeName(StringView &MangledName,
1535                                                        bool Memorize) {
1536   // An inner-most name can be a back-reference, because a fully-qualified name
1537   // (e.g. Scope + Inner) can contain other fully qualified names inside of
1538   // them (for example template parameters), and these nested parameters can
1539   // refer to previously mangled types.
1540   if (startsWithDigit(MangledName))
1541     return demangleBackRefName(MangledName);
1542 
1543   if (MangledName.startsWith("?$"))
1544     return demangleTemplateInstantiationName(MangledName, NBB_Template);
1545 
1546   return demangleSimpleName(MangledName, Memorize);
1547 }
1548 
1549 IdentifierNode *
1550 Demangler::demangleUnqualifiedSymbolName(StringView &MangledName,
1551                                          NameBackrefBehavior NBB) {
1552   if (startsWithDigit(MangledName))
1553     return demangleBackRefName(MangledName);
1554   if (MangledName.startsWith("?$"))
1555     return demangleTemplateInstantiationName(MangledName, NBB);
1556   if (MangledName.startsWith('?'))
1557     return demangleFunctionIdentifierCode(MangledName);
1558   return demangleSimpleName(MangledName, (NBB & NBB_Simple) != 0);
1559 }
1560 
1561 IdentifierNode *Demangler::demangleNameScopePiece(StringView &MangledName) {
1562   if (startsWithDigit(MangledName))
1563     return demangleBackRefName(MangledName);
1564 
1565   if (MangledName.startsWith("?$"))
1566     return demangleTemplateInstantiationName(MangledName, NBB_Template);
1567 
1568   if (MangledName.startsWith("?A"))
1569     return demangleAnonymousNamespaceName(MangledName);
1570 
1571   if (startsWithLocalScopePattern(MangledName))
1572     return demangleLocallyScopedNamePiece(MangledName);
1573 
1574   return demangleSimpleName(MangledName, true);
1575 }
1576 
1577 static NodeArrayNode *nodeListToNodeArray(ArenaAllocator &Arena, NodeList *Head,
1578                                           size_t Count) {
1579   NodeArrayNode *N = Arena.alloc<NodeArrayNode>();
1580   N->Count = Count;
1581   N->Nodes = Arena.allocArray<Node *>(Count);
1582   for (size_t I = 0; I < Count; ++I) {
1583     N->Nodes[I] = Head->N;
1584     Head = Head->Next;
1585   }
1586   return N;
1587 }
1588 
1589 QualifiedNameNode *
1590 Demangler::demangleNameScopeChain(StringView &MangledName,
1591                                   IdentifierNode *UnqualifiedName) {
1592   NodeList *Head = Arena.alloc<NodeList>();
1593 
1594   Head->N = UnqualifiedName;
1595 
1596   size_t Count = 1;
1597   while (!MangledName.consumeFront("@")) {
1598     ++Count;
1599     NodeList *NewHead = Arena.alloc<NodeList>();
1600     NewHead->Next = Head;
1601     Head = NewHead;
1602 
1603     if (MangledName.empty()) {
1604       Error = true;
1605       return nullptr;
1606     }
1607 
1608     assert(!Error);
1609     IdentifierNode *Elem = demangleNameScopePiece(MangledName);
1610     if (Error)
1611       return nullptr;
1612 
1613     Head->N = Elem;
1614   }
1615 
1616   QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>();
1617   QN->Components = nodeListToNodeArray(Arena, Head, Count);
1618   return QN;
1619 }
1620 
1621 FuncClass Demangler::demangleFunctionClass(StringView &MangledName) {
1622   switch (MangledName.popFront()) {
1623   case '9':
1624     return FuncClass(FC_ExternC | FC_NoParameterList);
1625   case 'A':
1626     return FC_Private;
1627   case 'B':
1628     return FuncClass(FC_Private | FC_Far);
1629   case 'C':
1630     return FuncClass(FC_Private | FC_Static);
1631   case 'D':
1632     return FuncClass(FC_Private | FC_Static);
1633   case 'E':
1634     return FuncClass(FC_Private | FC_Virtual);
1635   case 'F':
1636     return FuncClass(FC_Private | FC_Virtual);
1637   case 'G':
1638     return FuncClass(FC_Private | FC_StaticThisAdjust);
1639   case 'H':
1640     return FuncClass(FC_Private | FC_StaticThisAdjust | FC_Far);
1641   case 'I':
1642     return FuncClass(FC_Protected);
1643   case 'J':
1644     return FuncClass(FC_Protected | FC_Far);
1645   case 'K':
1646     return FuncClass(FC_Protected | FC_Static);
1647   case 'L':
1648     return FuncClass(FC_Protected | FC_Static | FC_Far);
1649   case 'M':
1650     return FuncClass(FC_Protected | FC_Virtual);
1651   case 'N':
1652     return FuncClass(FC_Protected | FC_Virtual | FC_Far);
1653   case 'O':
1654     return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust);
1655   case 'P':
1656     return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust | FC_Far);
1657   case 'Q':
1658     return FuncClass(FC_Public);
1659   case 'R':
1660     return FuncClass(FC_Public | FC_Far);
1661   case 'S':
1662     return FuncClass(FC_Public | FC_Static);
1663   case 'T':
1664     return FuncClass(FC_Public | FC_Static | FC_Far);
1665   case 'U':
1666     return FuncClass(FC_Public | FC_Virtual);
1667   case 'V':
1668     return FuncClass(FC_Public | FC_Virtual | FC_Far);
1669   case 'W':
1670     return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust);
1671   case 'X':
1672     return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust | FC_Far);
1673   case 'Y':
1674     return FuncClass(FC_Global);
1675   case 'Z':
1676     return FuncClass(FC_Global | FC_Far);
1677   case '$': {
1678     FuncClass VFlag = FC_VirtualThisAdjust;
1679     if (MangledName.consumeFront('R'))
1680       VFlag = FuncClass(VFlag | FC_VirtualThisAdjustEx);
1681 
1682     switch (MangledName.popFront()) {
1683     case '0':
1684       return FuncClass(FC_Private | FC_Virtual | VFlag);
1685     case '1':
1686       return FuncClass(FC_Private | FC_Virtual | VFlag | FC_Far);
1687     case '2':
1688       return FuncClass(FC_Protected | FC_Virtual | VFlag);
1689     case '3':
1690       return FuncClass(FC_Protected | FC_Virtual | VFlag | FC_Far);
1691     case '4':
1692       return FuncClass(FC_Public | FC_Virtual | VFlag);
1693     case '5':
1694       return FuncClass(FC_Public | FC_Virtual | VFlag | FC_Far);
1695     }
1696   }
1697   }
1698 
1699   Error = true;
1700   return FC_Public;
1701 }
1702 
1703 CallingConv Demangler::demangleCallingConvention(StringView &MangledName) {
1704   switch (MangledName.popFront()) {
1705   case 'A':
1706   case 'B':
1707     return CallingConv::Cdecl;
1708   case 'C':
1709   case 'D':
1710     return CallingConv::Pascal;
1711   case 'E':
1712   case 'F':
1713     return CallingConv::Thiscall;
1714   case 'G':
1715   case 'H':
1716     return CallingConv::Stdcall;
1717   case 'I':
1718   case 'J':
1719     return CallingConv::Fastcall;
1720   case 'M':
1721   case 'N':
1722     return CallingConv::Clrcall;
1723   case 'O':
1724   case 'P':
1725     return CallingConv::Eabi;
1726   case 'Q':
1727     return CallingConv::Vectorcall;
1728   }
1729 
1730   return CallingConv::None;
1731 }
1732 
1733 StorageClass Demangler::demangleVariableStorageClass(StringView &MangledName) {
1734   assert(std::isdigit(MangledName.front()));
1735 
1736   switch (MangledName.popFront()) {
1737   case '0':
1738     return StorageClass::PrivateStatic;
1739   case '1':
1740     return StorageClass::ProtectedStatic;
1741   case '2':
1742     return StorageClass::PublicStatic;
1743   case '3':
1744     return StorageClass::Global;
1745   case '4':
1746     return StorageClass::FunctionLocalStatic;
1747   }
1748   Error = true;
1749   return StorageClass::None;
1750 }
1751 
1752 std::pair<Qualifiers, bool>
1753 Demangler::demangleQualifiers(StringView &MangledName) {
1754 
1755   switch (MangledName.popFront()) {
1756   // Member qualifiers
1757   case 'Q':
1758     return std::make_pair(Q_None, true);
1759   case 'R':
1760     return std::make_pair(Q_Const, true);
1761   case 'S':
1762     return std::make_pair(Q_Volatile, true);
1763   case 'T':
1764     return std::make_pair(Qualifiers(Q_Const | Q_Volatile), true);
1765   // Non-Member qualifiers
1766   case 'A':
1767     return std::make_pair(Q_None, false);
1768   case 'B':
1769     return std::make_pair(Q_Const, false);
1770   case 'C':
1771     return std::make_pair(Q_Volatile, false);
1772   case 'D':
1773     return std::make_pair(Qualifiers(Q_Const | Q_Volatile), false);
1774   }
1775   Error = true;
1776   return std::make_pair(Q_None, false);
1777 }
1778 
1779 // <variable-type> ::= <type> <cvr-qualifiers>
1780 //                 ::= <type> <pointee-cvr-qualifiers> # pointers, references
1781 TypeNode *Demangler::demangleType(StringView &MangledName,
1782                                   QualifierMangleMode QMM) {
1783   Qualifiers Quals = Q_None;
1784   bool IsMember = false;
1785   if (QMM == QualifierMangleMode::Mangle) {
1786     std::tie(Quals, IsMember) = demangleQualifiers(MangledName);
1787   } else if (QMM == QualifierMangleMode::Result) {
1788     if (MangledName.consumeFront('?'))
1789       std::tie(Quals, IsMember) = demangleQualifiers(MangledName);
1790   }
1791 
1792   TypeNode *Ty = nullptr;
1793   if (isTagType(MangledName))
1794     Ty = demangleClassType(MangledName);
1795   else if (isPointerType(MangledName)) {
1796     if (isMemberPointer(MangledName))
1797       Ty = demangleMemberPointerType(MangledName);
1798     else
1799       Ty = demanglePointerType(MangledName);
1800   } else if (isArrayType(MangledName))
1801     Ty = demangleArrayType(MangledName);
1802   else if (isFunctionType(MangledName)) {
1803     if (MangledName.consumeFront("$$A8@@"))
1804       Ty = demangleFunctionType(MangledName, true);
1805     else {
1806       assert(MangledName.startsWith("$$A6"));
1807       MangledName.consumeFront("$$A6");
1808       Ty = demangleFunctionType(MangledName, false);
1809     }
1810   } else if (isCustomType(MangledName)) {
1811     Ty = demangleCustomType(MangledName);
1812   } else {
1813     Ty = demanglePrimitiveType(MangledName);
1814     if (!Ty || Error)
1815       return Ty;
1816   }
1817 
1818   Ty->Quals = Qualifiers(Ty->Quals | Quals);
1819   return Ty;
1820 }
1821 
1822 void Demangler::demangleThrowSpecification(StringView &MangledName) {
1823   if (MangledName.consumeFront('Z'))
1824     return;
1825 
1826   Error = true;
1827 }
1828 
1829 FunctionSignatureNode *Demangler::demangleFunctionType(StringView &MangledName,
1830                                                        bool HasThisQuals) {
1831   FunctionSignatureNode *FTy = Arena.alloc<FunctionSignatureNode>();
1832 
1833   if (HasThisQuals) {
1834     FTy->Quals = demanglePointerExtQualifiers(MangledName);
1835     FTy->RefQualifier = demangleFunctionRefQualifier(MangledName);
1836     FTy->Quals = Qualifiers(FTy->Quals | demangleQualifiers(MangledName).first);
1837   }
1838 
1839   // Fields that appear on both member and non-member functions.
1840   FTy->CallConvention = demangleCallingConvention(MangledName);
1841 
1842   // <return-type> ::= <type>
1843   //               ::= @ # structors (they have no declared return type)
1844   bool IsStructor = MangledName.consumeFront('@');
1845   if (!IsStructor)
1846     FTy->ReturnType = demangleType(MangledName, QualifierMangleMode::Result);
1847 
1848   FTy->Params = demangleFunctionParameterList(MangledName);
1849 
1850   demangleThrowSpecification(MangledName);
1851 
1852   return FTy;
1853 }
1854 
1855 FunctionSymbolNode *
1856 Demangler::demangleFunctionEncoding(StringView &MangledName) {
1857   FuncClass ExtraFlags = FC_None;
1858   if (MangledName.consumeFront("$$J0"))
1859     ExtraFlags = FC_ExternC;
1860 
1861   FuncClass FC = demangleFunctionClass(MangledName);
1862   FC = FuncClass(ExtraFlags | FC);
1863 
1864   FunctionSignatureNode *FSN = nullptr;
1865   ThunkSignatureNode *TTN = nullptr;
1866   if (FC & FC_StaticThisAdjust) {
1867     TTN = Arena.alloc<ThunkSignatureNode>();
1868     TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName);
1869   } else if (FC & FC_VirtualThisAdjust) {
1870     TTN = Arena.alloc<ThunkSignatureNode>();
1871     if (FC & FC_VirtualThisAdjustEx) {
1872       TTN->ThisAdjust.VBPtrOffset = demangleSigned(MangledName);
1873       TTN->ThisAdjust.VBOffsetOffset = demangleSigned(MangledName);
1874     }
1875     TTN->ThisAdjust.VtordispOffset = demangleSigned(MangledName);
1876     TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName);
1877   }
1878 
1879   if (FC & FC_NoParameterList) {
1880     // This is an extern "C" function whose full signature hasn't been mangled.
1881     // This happens when we need to mangle a local symbol inside of an extern
1882     // "C" function.
1883     FSN = Arena.alloc<FunctionSignatureNode>();
1884   } else {
1885     bool HasThisQuals = !(FC & (FC_Global | FC_Static));
1886     FSN = demangleFunctionType(MangledName, HasThisQuals);
1887   }
1888   if (TTN) {
1889     *static_cast<FunctionSignatureNode *>(TTN) = *FSN;
1890     FSN = TTN;
1891   }
1892   FSN->FunctionClass = FC;
1893 
1894   FunctionSymbolNode *Symbol = Arena.alloc<FunctionSymbolNode>();
1895   Symbol->Signature = FSN;
1896   return Symbol;
1897 }
1898 
1899 CustomTypeNode *Demangler::demangleCustomType(StringView &MangledName) {
1900   assert(MangledName.startsWith('?'));
1901   MangledName.popFront();
1902 
1903   CustomTypeNode *CTN = Arena.alloc<CustomTypeNode>();
1904   CTN->Identifier = demangleUnqualifiedTypeName(MangledName, true);
1905   if (!MangledName.consumeFront('@'))
1906     Error = true;
1907   if (Error)
1908     return nullptr;
1909   return CTN;
1910 }
1911 
1912 // Reads a primitive type.
1913 PrimitiveTypeNode *Demangler::demanglePrimitiveType(StringView &MangledName) {
1914   if (MangledName.consumeFront("$$T"))
1915     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Nullptr);
1916 
1917   switch (MangledName.popFront()) {
1918   case 'X':
1919     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Void);
1920   case 'D':
1921     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char);
1922   case 'C':
1923     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Schar);
1924   case 'E':
1925     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uchar);
1926   case 'F':
1927     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Short);
1928   case 'G':
1929     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ushort);
1930   case 'H':
1931     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int);
1932   case 'I':
1933     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint);
1934   case 'J':
1935     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Long);
1936   case 'K':
1937     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ulong);
1938   case 'M':
1939     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Float);
1940   case 'N':
1941     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Double);
1942   case 'O':
1943     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ldouble);
1944   case '_': {
1945     if (MangledName.empty()) {
1946       Error = true;
1947       return nullptr;
1948     }
1949     switch (MangledName.popFront()) {
1950     case 'N':
1951       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Bool);
1952     case 'J':
1953       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int64);
1954     case 'K':
1955       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint64);
1956     case 'W':
1957       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Wchar);
1958     case 'S':
1959       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char16);
1960     case 'U':
1961       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char32);
1962     }
1963     break;
1964   }
1965   }
1966   Error = true;
1967   return nullptr;
1968 }
1969 
1970 TagTypeNode *Demangler::demangleClassType(StringView &MangledName) {
1971   TagTypeNode *TT = nullptr;
1972 
1973   switch (MangledName.popFront()) {
1974   case 'T':
1975     TT = Arena.alloc<TagTypeNode>(TagKind::Union);
1976     break;
1977   case 'U':
1978     TT = Arena.alloc<TagTypeNode>(TagKind::Struct);
1979     break;
1980   case 'V':
1981     TT = Arena.alloc<TagTypeNode>(TagKind::Class);
1982     break;
1983   case 'W':
1984     if (MangledName.popFront() != '4') {
1985       Error = true;
1986       return nullptr;
1987     }
1988     TT = Arena.alloc<TagTypeNode>(TagKind::Enum);
1989     break;
1990   default:
1991     assert(false);
1992   }
1993 
1994   TT->QualifiedName = demangleFullyQualifiedTypeName(MangledName);
1995   return TT;
1996 }
1997 
1998 // <pointer-type> ::= E? <pointer-cvr-qualifiers> <ext-qualifiers> <type>
1999 //                       # the E is required for 64-bit non-static pointers
2000 PointerTypeNode *Demangler::demanglePointerType(StringView &MangledName) {
2001   PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>();
2002 
2003   std::tie(Pointer->Quals, Pointer->Affinity) =
2004       demanglePointerCVQualifiers(MangledName);
2005 
2006   if (MangledName.consumeFront("6")) {
2007     Pointer->Pointee = demangleFunctionType(MangledName, false);
2008     return Pointer;
2009   }
2010 
2011   Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName);
2012   Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals);
2013 
2014   Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Mangle);
2015   return Pointer;
2016 }
2017 
2018 PointerTypeNode *Demangler::demangleMemberPointerType(StringView &MangledName) {
2019   PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>();
2020 
2021   std::tie(Pointer->Quals, Pointer->Affinity) =
2022       demanglePointerCVQualifiers(MangledName);
2023   assert(Pointer->Affinity == PointerAffinity::Pointer);
2024 
2025   Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName);
2026   Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals);
2027 
2028   if (MangledName.consumeFront("8")) {
2029     Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName);
2030     Pointer->Pointee = demangleFunctionType(MangledName, true);
2031   } else {
2032     Qualifiers PointeeQuals = Q_None;
2033     bool IsMember = false;
2034     std::tie(PointeeQuals, IsMember) = demangleQualifiers(MangledName);
2035     assert(IsMember);
2036     Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName);
2037 
2038     Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Drop);
2039     Pointer->Pointee->Quals = PointeeQuals;
2040   }
2041 
2042   return Pointer;
2043 }
2044 
2045 Qualifiers Demangler::demanglePointerExtQualifiers(StringView &MangledName) {
2046   Qualifiers Quals = Q_None;
2047   if (MangledName.consumeFront('E'))
2048     Quals = Qualifiers(Quals | Q_Pointer64);
2049   if (MangledName.consumeFront('I'))
2050     Quals = Qualifiers(Quals | Q_Restrict);
2051   if (MangledName.consumeFront('F'))
2052     Quals = Qualifiers(Quals | Q_Unaligned);
2053 
2054   return Quals;
2055 }
2056 
2057 ArrayTypeNode *Demangler::demangleArrayType(StringView &MangledName) {
2058   assert(MangledName.front() == 'Y');
2059   MangledName.popFront();
2060 
2061   uint64_t Rank = 0;
2062   bool IsNegative = false;
2063   std::tie(Rank, IsNegative) = demangleNumber(MangledName);
2064   if (IsNegative || Rank == 0) {
2065     Error = true;
2066     return nullptr;
2067   }
2068 
2069   ArrayTypeNode *ATy = Arena.alloc<ArrayTypeNode>();
2070   NodeList *Head = Arena.alloc<NodeList>();
2071   NodeList *Tail = Head;
2072 
2073   for (uint64_t I = 0; I < Rank; ++I) {
2074     uint64_t D = 0;
2075     std::tie(D, IsNegative) = demangleNumber(MangledName);
2076     if (IsNegative) {
2077       Error = true;
2078       return nullptr;
2079     }
2080     Tail->N = Arena.alloc<IntegerLiteralNode>(D, IsNegative);
2081     if (I + 1 < Rank) {
2082       Tail->Next = Arena.alloc<NodeList>();
2083       Tail = Tail->Next;
2084     }
2085   }
2086   ATy->Dimensions = nodeListToNodeArray(Arena, Head, Rank);
2087 
2088   if (MangledName.consumeFront("$$C")) {
2089     bool IsMember = false;
2090     std::tie(ATy->Quals, IsMember) = demangleQualifiers(MangledName);
2091     if (IsMember) {
2092       Error = true;
2093       return nullptr;
2094     }
2095   }
2096 
2097   ATy->ElementType = demangleType(MangledName, QualifierMangleMode::Drop);
2098   return ATy;
2099 }
2100 
2101 // Reads a function or a template parameters.
2102 NodeArrayNode *
2103 Demangler::demangleFunctionParameterList(StringView &MangledName) {
2104   // Empty parameter list.
2105   if (MangledName.consumeFront('X'))
2106     return {};
2107 
2108   NodeList *Head = Arena.alloc<NodeList>();
2109   NodeList **Current = &Head;
2110   size_t Count = 0;
2111   while (!Error && !MangledName.startsWith('@') &&
2112          !MangledName.startsWith('Z')) {
2113     ++Count;
2114 
2115     if (startsWithDigit(MangledName)) {
2116       size_t N = MangledName[0] - '0';
2117       if (N >= Backrefs.FunctionParamCount) {
2118         Error = true;
2119         return {};
2120       }
2121       MangledName = MangledName.dropFront();
2122 
2123       *Current = Arena.alloc<NodeList>();
2124       (*Current)->N = Backrefs.FunctionParams[N];
2125       Current = &(*Current)->Next;
2126       continue;
2127     }
2128 
2129     size_t OldSize = MangledName.size();
2130 
2131     *Current = Arena.alloc<NodeList>();
2132     TypeNode *TN = demangleType(MangledName, QualifierMangleMode::Drop);
2133 
2134     (*Current)->N = TN;
2135 
2136     size_t CharsConsumed = OldSize - MangledName.size();
2137     assert(CharsConsumed != 0);
2138 
2139     // Single-letter types are ignored for backreferences because memorizing
2140     // them doesn't save anything.
2141     if (Backrefs.FunctionParamCount <= 9 && CharsConsumed > 1)
2142       Backrefs.FunctionParams[Backrefs.FunctionParamCount++] = TN;
2143 
2144     Current = &(*Current)->Next;
2145   }
2146 
2147   if (Error)
2148     return {};
2149 
2150   NodeArrayNode *NA = nodeListToNodeArray(Arena, Head, Count);
2151   // A non-empty parameter list is terminated by either 'Z' (variadic) parameter
2152   // list or '@' (non variadic).  Careful not to consume "@Z", as in that case
2153   // the following Z could be a throw specifier.
2154   if (MangledName.consumeFront('@'))
2155     return NA;
2156 
2157   if (MangledName.consumeFront('Z')) {
2158     // This is a variadic parameter list.  We probably need a variadic node to
2159     // append to the end.
2160     return NA;
2161   }
2162 
2163   Error = true;
2164   return {};
2165 }
2166 
2167 NodeArrayNode *
2168 Demangler::demangleTemplateParameterList(StringView &MangledName) {
2169   NodeList *Head;
2170   NodeList **Current = &Head;
2171   size_t Count = 0;
2172 
2173   while (!Error && !MangledName.startsWith('@')) {
2174     if (MangledName.consumeFront("$S") || MangledName.consumeFront("$$V") ||
2175         MangledName.consumeFront("$$$V") || MangledName.consumeFront("$$Z")) {
2176       // parameter pack separator
2177       continue;
2178     }
2179 
2180     ++Count;
2181 
2182     // Template parameter lists don't participate in back-referencing.
2183     *Current = Arena.alloc<NodeList>();
2184 
2185     NodeList &TP = **Current;
2186 
2187     TemplateParameterReferenceNode *TPRN = nullptr;
2188     if (MangledName.consumeFront("$$Y")) {
2189       // Template alias
2190       TP.N = demangleFullyQualifiedTypeName(MangledName);
2191     } else if (MangledName.consumeFront("$$B")) {
2192       // Array
2193       TP.N = demangleType(MangledName, QualifierMangleMode::Drop);
2194     } else if (MangledName.consumeFront("$$C")) {
2195       // Type has qualifiers.
2196       TP.N = demangleType(MangledName, QualifierMangleMode::Mangle);
2197     } else if (MangledName.startsWith("$1") || MangledName.startsWith("$H") ||
2198                MangledName.startsWith("$I") || MangledName.startsWith("$J")) {
2199       // Pointer to member
2200       TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>();
2201       TPRN->IsMemberPointer = true;
2202 
2203       MangledName = MangledName.dropFront();
2204       // 1 - single inheritance       <name>
2205       // H - multiple inheritance     <name> <number>
2206       // I - virtual inheritance      <name> <number> <number> <number>
2207       // J - unspecified inheritance  <name> <number> <number> <number>
2208       char InheritanceSpecifier = MangledName.popFront();
2209       SymbolNode *S = nullptr;
2210       if (MangledName.startsWith('?')) {
2211         S = parse(MangledName);
2212         memorizeIdentifier(S->Name->getUnqualifiedIdentifier());
2213       }
2214 
2215       switch (InheritanceSpecifier) {
2216       case 'J':
2217         TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2218             demangleSigned(MangledName);
2219         LLVM_FALLTHROUGH;
2220       case 'I':
2221         TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2222             demangleSigned(MangledName);
2223         LLVM_FALLTHROUGH;
2224       case 'H':
2225         TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2226             demangleSigned(MangledName);
2227         LLVM_FALLTHROUGH;
2228       case '1':
2229         break;
2230       default:
2231         Error = true;
2232         break;
2233       }
2234       TPRN->Affinity = PointerAffinity::Pointer;
2235       TPRN->Symbol = S;
2236     } else if (MangledName.startsWith("$E?")) {
2237       MangledName.consumeFront("$E");
2238       // Reference to symbol
2239       TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>();
2240       TPRN->Symbol = parse(MangledName);
2241       TPRN->Affinity = PointerAffinity::Reference;
2242     } else if (MangledName.startsWith("$F") || MangledName.startsWith("$G")) {
2243       TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>();
2244 
2245       // Data member pointer.
2246       MangledName = MangledName.dropFront();
2247       char InheritanceSpecifier = MangledName.popFront();
2248 
2249       switch (InheritanceSpecifier) {
2250       case 'G':
2251         TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2252             demangleSigned(MangledName);
2253         LLVM_FALLTHROUGH;
2254       case 'F':
2255         TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2256             demangleSigned(MangledName);
2257         TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2258             demangleSigned(MangledName);
2259         LLVM_FALLTHROUGH;
2260       case '0':
2261         break;
2262       default:
2263         Error = true;
2264         break;
2265       }
2266       TPRN->IsMemberPointer = true;
2267 
2268     } else if (MangledName.consumeFront("$0")) {
2269       // Integral non-type template parameter
2270       bool IsNegative = false;
2271       uint64_t Value = 0;
2272       std::tie(Value, IsNegative) = demangleNumber(MangledName);
2273 
2274       TP.N = Arena.alloc<IntegerLiteralNode>(Value, IsNegative);
2275     } else {
2276       TP.N = demangleType(MangledName, QualifierMangleMode::Drop);
2277     }
2278     if (Error)
2279       return nullptr;
2280 
2281     Current = &TP.Next;
2282   }
2283 
2284   if (Error)
2285     return nullptr;
2286 
2287   // Template parameter lists cannot be variadic, so it can only be terminated
2288   // by @.
2289   if (MangledName.consumeFront('@'))
2290     return nodeListToNodeArray(Arena, Head, Count);
2291   Error = true;
2292   return nullptr;
2293 }
2294 
2295 void Demangler::dumpBackReferences() {
2296   std::printf("%d function parameter backreferences\n",
2297               (int)Backrefs.FunctionParamCount);
2298 
2299   // Create an output stream so we can render each type.
2300   OutputStream OS;
2301   if (initializeOutputStream(nullptr, nullptr, OS, 1024))
2302     std::terminate();
2303   for (size_t I = 0; I < Backrefs.FunctionParamCount; ++I) {
2304     OS.setCurrentPosition(0);
2305 
2306     TypeNode *T = Backrefs.FunctionParams[I];
2307     T->output(OS, OF_Default);
2308 
2309     std::printf("  [%d] - %.*s\n", (int)I, (int)OS.getCurrentPosition(),
2310                 OS.getBuffer());
2311   }
2312   std::free(OS.getBuffer());
2313 
2314   if (Backrefs.FunctionParamCount > 0)
2315     std::printf("\n");
2316   std::printf("%d name backreferences\n", (int)Backrefs.NamesCount);
2317   for (size_t I = 0; I < Backrefs.NamesCount; ++I) {
2318     std::printf("  [%d] - %.*s\n", (int)I, (int)Backrefs.Names[I]->Name.size(),
2319                 Backrefs.Names[I]->Name.begin());
2320   }
2321   if (Backrefs.NamesCount > 0)
2322     std::printf("\n");
2323 }
2324 
2325 char *llvm::microsoftDemangle(const char *MangledName, char *Buf, size_t *N,
2326                               int *Status, MSDemangleFlags Flags) {
2327   int InternalStatus = demangle_success;
2328   Demangler D;
2329   OutputStream S;
2330 
2331   StringView Name{MangledName};
2332   SymbolNode *AST = D.parse(Name);
2333 
2334   if (Flags & MSDF_DumpBackrefs)
2335     D.dumpBackReferences();
2336 
2337   if (D.Error)
2338     InternalStatus = demangle_invalid_mangled_name;
2339   else if (initializeOutputStream(Buf, N, S, 1024))
2340     InternalStatus = demangle_memory_alloc_failure;
2341   else {
2342     AST->output(S, OF_Default);
2343     S += '\0';
2344     if (N != nullptr)
2345       *N = S.getCurrentPosition();
2346     Buf = S.getBuffer();
2347   }
2348 
2349   if (Status)
2350     *Status = InternalStatus;
2351   return InternalStatus == demangle_success ? Buf : nullptr;
2352 }
2353