1 //===-- Mangled.cpp -------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Core/Mangled.h"
10 
11 #include "lldb/Core/DataFileCache.h"
12 #include "lldb/Core/RichManglingContext.h"
13 #include "lldb/Target/Language.h"
14 #include "lldb/Utility/ConstString.h"
15 #include "lldb/Utility/DataEncoder.h"
16 #include "lldb/Utility/LLDBLog.h"
17 #include "lldb/Utility/Log.h"
18 #include "lldb/Utility/RegularExpression.h"
19 #include "lldb/Utility/Stream.h"
20 #include "lldb/lldb-enumerations.h"
21 
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/Demangle/Demangle.h"
24 #include "llvm/Support/Compiler.h"
25 
26 #include <mutex>
27 #include <string>
28 #include <utility>
29 
30 #include <cstdlib>
31 #include <cstring>
32 using namespace lldb_private;
33 
34 static inline bool cstring_is_mangled(llvm::StringRef s) {
35   return Mangled::GetManglingScheme(s) != Mangled::eManglingSchemeNone;
36 }
37 
38 #pragma mark Mangled
39 
40 Mangled::ManglingScheme Mangled::GetManglingScheme(llvm::StringRef const name) {
41   if (name.empty())
42     return Mangled::eManglingSchemeNone;
43 
44   if (name.startswith("?"))
45     return Mangled::eManglingSchemeMSVC;
46 
47   if (name.startswith("_R"))
48     return Mangled::eManglingSchemeRustV0;
49 
50   if (name.startswith("_D"))
51     return Mangled::eManglingSchemeD;
52 
53   if (name.startswith("_Z"))
54     return Mangled::eManglingSchemeItanium;
55 
56   // ___Z is a clang extension of block invocations
57   if (name.startswith("___Z"))
58     return Mangled::eManglingSchemeItanium;
59 
60   return Mangled::eManglingSchemeNone;
61 }
62 
63 Mangled::Mangled(ConstString s) : m_mangled(), m_demangled() {
64   if (s)
65     SetValue(s);
66 }
67 
68 Mangled::Mangled(llvm::StringRef name) {
69   if (!name.empty())
70     SetValue(ConstString(name));
71 }
72 
73 // Convert to bool operator. This allows code to check any Mangled objects
74 // to see if they contain anything valid using code such as:
75 //
76 //  Mangled mangled(...);
77 //  if (mangled)
78 //  { ...
79 Mangled::operator bool() const { return m_mangled || m_demangled; }
80 
81 // Clear the mangled and demangled values.
82 void Mangled::Clear() {
83   m_mangled.Clear();
84   m_demangled.Clear();
85 }
86 
87 // Compare the string values.
88 int Mangled::Compare(const Mangled &a, const Mangled &b) {
89   return ConstString::Compare(a.GetName(ePreferMangled),
90                               b.GetName(ePreferMangled));
91 }
92 
93 // Set the string value in this objects. If "mangled" is true, then the mangled
94 // named is set with the new value in "s", else the demangled name is set.
95 void Mangled::SetValue(ConstString s, bool mangled) {
96   if (s) {
97     if (mangled) {
98       m_demangled.Clear();
99       m_mangled = s;
100     } else {
101       m_demangled = s;
102       m_mangled.Clear();
103     }
104   } else {
105     m_demangled.Clear();
106     m_mangled.Clear();
107   }
108 }
109 
110 void Mangled::SetValue(ConstString name) {
111   if (name) {
112     if (cstring_is_mangled(name.GetStringRef())) {
113       m_demangled.Clear();
114       m_mangled = name;
115     } else {
116       m_demangled = name;
117       m_mangled.Clear();
118     }
119   } else {
120     m_demangled.Clear();
121     m_mangled.Clear();
122   }
123 }
124 
125 // Local helpers for different demangling implementations.
126 static char *GetMSVCDemangledStr(const char *M) {
127   char *demangled_cstr = llvm::microsoftDemangle(
128       M, nullptr, nullptr, nullptr, nullptr,
129       llvm::MSDemangleFlags(
130           llvm::MSDF_NoAccessSpecifier | llvm::MSDF_NoCallingConvention |
131           llvm::MSDF_NoMemberType | llvm::MSDF_NoVariableType));
132 
133   if (Log *log = GetLog(LLDBLog::Demangle)) {
134     if (demangled_cstr && demangled_cstr[0])
135       LLDB_LOGF(log, "demangled msvc: %s -> \"%s\"", M, demangled_cstr);
136     else
137       LLDB_LOGF(log, "demangled msvc: %s -> error", M);
138   }
139 
140   return demangled_cstr;
141 }
142 
143 static char *GetItaniumDemangledStr(const char *M) {
144   char *demangled_cstr = nullptr;
145 
146   llvm::ItaniumPartialDemangler ipd;
147   bool err = ipd.partialDemangle(M);
148   if (!err) {
149     // Default buffer and size (will realloc in case it's too small).
150     size_t demangled_size = 80;
151     demangled_cstr = static_cast<char *>(std::malloc(demangled_size));
152     demangled_cstr = ipd.finishDemangle(demangled_cstr, &demangled_size);
153 
154     assert(demangled_cstr &&
155            "finishDemangle must always succeed if partialDemangle did");
156     assert(demangled_cstr[demangled_size - 1] == '\0' &&
157            "Expected demangled_size to return length including trailing null");
158   }
159 
160   if (Log *log = GetLog(LLDBLog::Demangle)) {
161     if (demangled_cstr)
162       LLDB_LOGF(log, "demangled itanium: %s -> \"%s\"", M, demangled_cstr);
163     else
164       LLDB_LOGF(log, "demangled itanium: %s -> error: failed to demangle", M);
165   }
166 
167   return demangled_cstr;
168 }
169 
170 static char *GetRustV0DemangledStr(const char *M) {
171   char *demangled_cstr = llvm::rustDemangle(M, nullptr, nullptr, nullptr);
172 
173   if (Log *log = GetLog(LLDBLog::Demangle)) {
174     if (demangled_cstr && demangled_cstr[0])
175       LLDB_LOG(log, "demangled rustv0: {0} -> \"{1}\"", M, demangled_cstr);
176     else
177       LLDB_LOG(log, "demangled rustv0: {0} -> error: failed to demangle", M);
178   }
179 
180   return demangled_cstr;
181 }
182 
183 static char *GetDLangDemangledStr(const char *M) {
184   char *demangled_cstr = llvm::dlangDemangle(M);
185 
186   if (Log *log = GetLog(LLDBLog::Demangle)) {
187     if (demangled_cstr && demangled_cstr[0])
188       LLDB_LOG(log, "demangled dlang: {0} -> \"{1}\"", M, demangled_cstr);
189     else
190       LLDB_LOG(log, "demangled dlang: {0} -> error: failed to demangle", M);
191   }
192 
193   return demangled_cstr;
194 }
195 
196 // Explicit demangling for scheduled requests during batch processing. This
197 // makes use of ItaniumPartialDemangler's rich demangle info
198 bool Mangled::DemangleWithRichManglingInfo(
199     RichManglingContext &context, SkipMangledNameFn *skip_mangled_name) {
200   // Others are not meant to arrive here. ObjC names or C's main() for example
201   // have their names stored in m_demangled, while m_mangled is empty.
202   assert(m_mangled);
203 
204   // Check whether or not we are interested in this name at all.
205   ManglingScheme scheme = GetManglingScheme(m_mangled.GetStringRef());
206   if (skip_mangled_name && skip_mangled_name(m_mangled.GetStringRef(), scheme))
207     return false;
208 
209   switch (scheme) {
210   case eManglingSchemeNone:
211     // The current mangled_name_filter would allow llvm_unreachable here.
212     return false;
213 
214   case eManglingSchemeItanium:
215     // We want the rich mangling info here, so we don't care whether or not
216     // there is a demangled string in the pool already.
217     if (context.FromItaniumName(m_mangled)) {
218       // If we got an info, we have a name. Copy to string pool and connect the
219       // counterparts to accelerate later access in GetDemangledName().
220       m_demangled.SetStringWithMangledCounterpart(context.ParseFullName(),
221                                                   m_mangled);
222       return true;
223     } else {
224       m_demangled.SetCString("");
225       return false;
226     }
227 
228   case eManglingSchemeMSVC: {
229     // We have no rich mangling for MSVC-mangled names yet, so first try to
230     // demangle it if necessary.
231     if (!m_demangled && !m_mangled.GetMangledCounterpart(m_demangled)) {
232       if (char *d = GetMSVCDemangledStr(m_mangled.GetCString())) {
233         // If we got an info, we have a name. Copy to string pool and connect
234         // the counterparts to accelerate later access in GetDemangledName().
235         m_demangled.SetStringWithMangledCounterpart(llvm::StringRef(d),
236                                                     m_mangled);
237         ::free(d);
238       } else {
239         m_demangled.SetCString("");
240       }
241     }
242 
243     if (m_demangled.IsEmpty()) {
244       // Cannot demangle it, so don't try parsing.
245       return false;
246     } else {
247       // Demangled successfully, we can try and parse it with
248       // CPlusPlusLanguage::MethodName.
249       return context.FromCxxMethodName(m_demangled);
250     }
251   }
252 
253   case eManglingSchemeRustV0:
254   case eManglingSchemeD:
255     // Rich demangling scheme is not supported
256     return false;
257   }
258   llvm_unreachable("Fully covered switch above!");
259 }
260 
261 // Generate the demangled name on demand using this accessor. Code in this
262 // class will need to use this accessor if it wishes to decode the demangled
263 // name. The result is cached and will be kept until a new string value is
264 // supplied to this object, or until the end of the object's lifetime.
265 ConstString Mangled::GetDemangledName() const {
266   // Check to make sure we have a valid mangled name and that we haven't
267   // already decoded our mangled name.
268   if (m_mangled && m_demangled.IsNull()) {
269     // Don't bother running anything that isn't mangled
270     const char *mangled_name = m_mangled.GetCString();
271     ManglingScheme mangling_scheme =
272         GetManglingScheme(m_mangled.GetStringRef());
273     if (mangling_scheme != eManglingSchemeNone &&
274         !m_mangled.GetMangledCounterpart(m_demangled)) {
275       // We didn't already mangle this name, demangle it and if all goes well
276       // add it to our map.
277       char *demangled_name = nullptr;
278       switch (mangling_scheme) {
279       case eManglingSchemeMSVC:
280         demangled_name = GetMSVCDemangledStr(mangled_name);
281         break;
282       case eManglingSchemeItanium: {
283         demangled_name = GetItaniumDemangledStr(mangled_name);
284         break;
285       }
286       case eManglingSchemeRustV0:
287         demangled_name = GetRustV0DemangledStr(mangled_name);
288         break;
289       case eManglingSchemeD:
290         demangled_name = GetDLangDemangledStr(mangled_name);
291         break;
292       case eManglingSchemeNone:
293         llvm_unreachable("eManglingSchemeNone was handled already");
294       }
295       if (demangled_name) {
296         m_demangled.SetStringWithMangledCounterpart(
297             llvm::StringRef(demangled_name), m_mangled);
298         free(demangled_name);
299       }
300     }
301     if (m_demangled.IsNull()) {
302       // Set the demangled string to the empty string to indicate we tried to
303       // parse it once and failed.
304       m_demangled.SetCString("");
305     }
306   }
307 
308   return m_demangled;
309 }
310 
311 ConstString Mangled::GetDisplayDemangledName() const {
312   return GetDemangledName();
313 }
314 
315 bool Mangled::NameMatches(const RegularExpression &regex) const {
316   if (m_mangled && regex.Execute(m_mangled.GetStringRef()))
317     return true;
318 
319   ConstString demangled = GetDemangledName();
320   return demangled && regex.Execute(demangled.GetStringRef());
321 }
322 
323 // Get the demangled name if there is one, else return the mangled name.
324 ConstString Mangled::GetName(Mangled::NamePreference preference) const {
325   if (preference == ePreferMangled && m_mangled)
326     return m_mangled;
327 
328   // Call the accessor to make sure we get a demangled name in case it hasn't
329   // been demangled yet...
330   ConstString demangled = GetDemangledName();
331 
332   if (preference == ePreferDemangledWithoutArguments) {
333     if (Language *lang = Language::FindPlugin(GuessLanguage())) {
334       return lang->GetDemangledFunctionNameWithoutArguments(*this);
335     }
336   }
337   if (preference == ePreferDemangled) {
338     if (demangled)
339       return demangled;
340     return m_mangled;
341   }
342   return demangled;
343 }
344 
345 // Dump a Mangled object to stream "s". We don't force our demangled name to be
346 // computed currently (we don't use the accessor).
347 void Mangled::Dump(Stream *s) const {
348   if (m_mangled) {
349     *s << ", mangled = " << m_mangled;
350   }
351   if (m_demangled) {
352     const char *demangled = m_demangled.AsCString();
353     s->Printf(", demangled = %s", demangled[0] ? demangled : "<error>");
354   }
355 }
356 
357 // Dumps a debug version of this string with extra object and state information
358 // to stream "s".
359 void Mangled::DumpDebug(Stream *s) const {
360   s->Printf("%*p: Mangled mangled = ", static_cast<int>(sizeof(void *) * 2),
361             static_cast<const void *>(this));
362   m_mangled.DumpDebug(s);
363   s->Printf(", demangled = ");
364   m_demangled.DumpDebug(s);
365 }
366 
367 // Return the size in byte that this object takes in memory. The size includes
368 // the size of the objects it owns, and not the strings that it references
369 // because they are shared strings.
370 size_t Mangled::MemorySize() const {
371   return m_mangled.MemorySize() + m_demangled.MemorySize();
372 }
373 
374 // We "guess" the language because we can't determine a symbol's language from
375 // it's name.  For example, a Pascal symbol can be mangled using the C++
376 // Itanium scheme, and defined in a compilation unit within the same module as
377 // other C++ units.  In addition, different targets could have different ways
378 // of mangling names from a given language, likewise the compilation units
379 // within those targets.
380 lldb::LanguageType Mangled::GuessLanguage() const {
381   lldb::LanguageType result = lldb::eLanguageTypeUnknown;
382   // Ask each language plugin to check if the mangled name belongs to it.
383   Language::ForEach([this, &result](Language *l) {
384     if (l->SymbolNameFitsToLanguage(*this)) {
385       result = l->GetLanguageType();
386       return false;
387     }
388     return true;
389   });
390   return result;
391 }
392 
393 // Dump OBJ to the supplied stream S.
394 Stream &operator<<(Stream &s, const Mangled &obj) {
395   if (obj.GetMangledName())
396     s << "mangled = '" << obj.GetMangledName() << "'";
397 
398   ConstString demangled = obj.GetDemangledName();
399   if (demangled)
400     s << ", demangled = '" << demangled << '\'';
401   else
402     s << ", demangled = <error>";
403   return s;
404 }
405 
406 // When encoding Mangled objects we can get away with encoding as little
407 // information as is required. The enumeration below helps us to efficiently
408 // encode Mangled objects.
409 enum MangledEncoding {
410   /// If the Mangled object has neither a mangled name or demangled name we can
411   /// encode the object with one zero byte using the Empty enumeration.
412   Empty = 0u,
413   /// If the Mangled object has only a demangled name and no mangled named, we
414   /// can encode only the demangled name.
415   DemangledOnly = 1u,
416   /// If the mangle name can calculate the demangled name (it is the
417   /// mangled/demangled counterpart), then we only need to encode the mangled
418   /// name as the demangled name can be recomputed.
419   MangledOnly = 2u,
420   /// If we have a Mangled object with two different names that are not related
421   /// then we need to save both strings. This can happen if we have a name that
422   /// isn't a true mangled name, but we want to be able to lookup a symbol by
423   /// name and type in the symbol table. We do this for Objective C symbols like
424   /// "OBJC_CLASS_$_NSValue" where the mangled named will be set to
425   /// "OBJC_CLASS_$_NSValue" and the demangled name will be manually set to
426   /// "NSValue". If we tried to demangled the name "OBJC_CLASS_$_NSValue" it
427   /// would fail, but in these cases we want these unrelated names to be
428   /// preserved.
429   MangledAndDemangled = 3u
430 };
431 
432 bool Mangled::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr,
433                      const StringTableReader &strtab) {
434   m_mangled.Clear();
435   m_demangled.Clear();
436   MangledEncoding encoding = (MangledEncoding)data.GetU8(offset_ptr);
437   switch (encoding) {
438     case Empty:
439       return true;
440 
441     case DemangledOnly:
442       m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
443       return true;
444 
445     case MangledOnly:
446       m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
447       return true;
448 
449     case MangledAndDemangled:
450       m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
451       m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
452       return true;
453   }
454   return false;
455 }
456 /// The encoding format for the Mangled object is as follows:
457 ///
458 /// uint8_t encoding;
459 /// char str1[]; (only if DemangledOnly, MangledOnly)
460 /// char str2[]; (only if MangledAndDemangled)
461 ///
462 /// The strings are stored as NULL terminated UTF8 strings and str1 and str2
463 /// are only saved if we need them based on the encoding.
464 ///
465 /// Some mangled names have a mangled name that can be demangled by the built
466 /// in demanglers. These kinds of mangled objects know when the mangled and
467 /// demangled names are the counterparts for each other. This is done because
468 /// demangling is very expensive and avoiding demangling the same name twice
469 /// saves us a lot of compute time. For these kinds of names we only need to
470 /// save the mangled name and have the encoding set to "MangledOnly".
471 ///
472 /// If a mangled obejct has only a demangled name, then we save only that string
473 /// and have the encoding set to "DemangledOnly".
474 ///
475 /// Some mangled objects have both mangled and demangled names, but the
476 /// demangled name can not be computed from the mangled name. This is often used
477 /// for runtime named, like Objective C runtime V2 and V3 names. Both these
478 /// names must be saved and the encoding is set to "MangledAndDemangled".
479 ///
480 /// For a Mangled object with no names, we only need to set the encoding to
481 /// "Empty" and not store any string values.
482 void Mangled::Encode(DataEncoder &file, ConstStringTable &strtab) const {
483   MangledEncoding encoding = Empty;
484   if (m_mangled) {
485     encoding = MangledOnly;
486     if (m_demangled) {
487       // We have both mangled and demangled names. If the demangled name is the
488       // counterpart of the mangled name, then we only need to save the mangled
489       // named. If they are different, we need to save both.
490       ConstString s;
491       if (!(m_mangled.GetMangledCounterpart(s) && s == m_demangled))
492         encoding = MangledAndDemangled;
493     }
494   } else if (m_demangled) {
495     encoding = DemangledOnly;
496   }
497   file.AppendU8(encoding);
498   switch (encoding) {
499     case Empty:
500       break;
501     case DemangledOnly:
502       file.AppendU32(strtab.Add(m_demangled));
503       break;
504     case MangledOnly:
505       file.AppendU32(strtab.Add(m_mangled));
506       break;
507     case MangledAndDemangled:
508       file.AppendU32(strtab.Add(m_mangled));
509       file.AppendU32(strtab.Add(m_demangled));
510       break;
511   }
512 }
513