1 //===-- Mangled.cpp -------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "lldb/Core/Mangled.h" 10 11 #include "lldb/Core/DataFileCache.h" 12 #include "lldb/Core/RichManglingContext.h" 13 #include "lldb/Target/Language.h" 14 #include "lldb/Utility/ConstString.h" 15 #include "lldb/Utility/DataEncoder.h" 16 #include "lldb/Utility/LLDBLog.h" 17 #include "lldb/Utility/Log.h" 18 #include "lldb/Utility/RegularExpression.h" 19 #include "lldb/Utility/Stream.h" 20 #include "lldb/lldb-enumerations.h" 21 22 #include "llvm/ADT/StringRef.h" 23 #include "llvm/Demangle/Demangle.h" 24 #include "llvm/Support/Compiler.h" 25 26 #include <mutex> 27 #include <string> 28 #include <utility> 29 30 #include <cstdlib> 31 #include <cstring> 32 using namespace lldb_private; 33 34 static inline bool cstring_is_mangled(llvm::StringRef s) { 35 return Mangled::GetManglingScheme(s) != Mangled::eManglingSchemeNone; 36 } 37 38 #pragma mark Mangled 39 40 Mangled::ManglingScheme Mangled::GetManglingScheme(llvm::StringRef const name) { 41 if (name.empty()) 42 return Mangled::eManglingSchemeNone; 43 44 if (name.startswith("?")) 45 return Mangled::eManglingSchemeMSVC; 46 47 if (name.startswith("_R")) 48 return Mangled::eManglingSchemeRustV0; 49 50 if (name.startswith("_D")) 51 return Mangled::eManglingSchemeD; 52 53 if (name.startswith("_Z")) 54 return Mangled::eManglingSchemeItanium; 55 56 // ___Z is a clang extension of block invocations 57 if (name.startswith("___Z")) 58 return Mangled::eManglingSchemeItanium; 59 60 return Mangled::eManglingSchemeNone; 61 } 62 63 Mangled::Mangled(ConstString s) : m_mangled(), m_demangled() { 64 if (s) 65 SetValue(s); 66 } 67 68 Mangled::Mangled(llvm::StringRef name) { 69 if (!name.empty()) 70 SetValue(ConstString(name)); 71 } 72 73 // Convert to bool operator. This allows code to check any Mangled objects 74 // to see if they contain anything valid using code such as: 75 // 76 // Mangled mangled(...); 77 // if (mangled) 78 // { ... 79 Mangled::operator bool() const { return m_mangled || m_demangled; } 80 81 // Clear the mangled and demangled values. 82 void Mangled::Clear() { 83 m_mangled.Clear(); 84 m_demangled.Clear(); 85 } 86 87 // Compare the string values. 88 int Mangled::Compare(const Mangled &a, const Mangled &b) { 89 return ConstString::Compare(a.GetName(ePreferMangled), 90 b.GetName(ePreferMangled)); 91 } 92 93 // Set the string value in this objects. If "mangled" is true, then the mangled 94 // named is set with the new value in "s", else the demangled name is set. 95 void Mangled::SetValue(ConstString s, bool mangled) { 96 if (s) { 97 if (mangled) { 98 m_demangled.Clear(); 99 m_mangled = s; 100 } else { 101 m_demangled = s; 102 m_mangled.Clear(); 103 } 104 } else { 105 m_demangled.Clear(); 106 m_mangled.Clear(); 107 } 108 } 109 110 void Mangled::SetValue(ConstString name) { 111 if (name) { 112 if (cstring_is_mangled(name.GetStringRef())) { 113 m_demangled.Clear(); 114 m_mangled = name; 115 } else { 116 m_demangled = name; 117 m_mangled.Clear(); 118 } 119 } else { 120 m_demangled.Clear(); 121 m_mangled.Clear(); 122 } 123 } 124 125 // Local helpers for different demangling implementations. 126 static char *GetMSVCDemangledStr(const char *M) { 127 char *demangled_cstr = llvm::microsoftDemangle( 128 M, nullptr, nullptr, nullptr, nullptr, 129 llvm::MSDemangleFlags( 130 llvm::MSDF_NoAccessSpecifier | llvm::MSDF_NoCallingConvention | 131 llvm::MSDF_NoMemberType | llvm::MSDF_NoVariableType)); 132 133 if (Log *log = GetLog(LLDBLog::Demangle)) { 134 if (demangled_cstr && demangled_cstr[0]) 135 LLDB_LOGF(log, "demangled msvc: %s -> \"%s\"", M, demangled_cstr); 136 else 137 LLDB_LOGF(log, "demangled msvc: %s -> error", M); 138 } 139 140 return demangled_cstr; 141 } 142 143 static char *GetItaniumDemangledStr(const char *M) { 144 char *demangled_cstr = nullptr; 145 146 llvm::ItaniumPartialDemangler ipd; 147 bool err = ipd.partialDemangle(M); 148 if (!err) { 149 // Default buffer and size (will realloc in case it's too small). 150 size_t demangled_size = 80; 151 demangled_cstr = static_cast<char *>(std::malloc(demangled_size)); 152 demangled_cstr = ipd.finishDemangle(demangled_cstr, &demangled_size); 153 154 assert(demangled_cstr && 155 "finishDemangle must always succeed if partialDemangle did"); 156 assert(demangled_cstr[demangled_size - 1] == '\0' && 157 "Expected demangled_size to return length including trailing null"); 158 } 159 160 if (Log *log = GetLog(LLDBLog::Demangle)) { 161 if (demangled_cstr) 162 LLDB_LOGF(log, "demangled itanium: %s -> \"%s\"", M, demangled_cstr); 163 else 164 LLDB_LOGF(log, "demangled itanium: %s -> error: failed to demangle", M); 165 } 166 167 return demangled_cstr; 168 } 169 170 static char *GetRustV0DemangledStr(const char *M) { 171 char *demangled_cstr = llvm::rustDemangle(M, nullptr, nullptr, nullptr); 172 173 if (Log *log = GetLog(LLDBLog::Demangle)) { 174 if (demangled_cstr && demangled_cstr[0]) 175 LLDB_LOG(log, "demangled rustv0: {0} -> \"{1}\"", M, demangled_cstr); 176 else 177 LLDB_LOG(log, "demangled rustv0: {0} -> error: failed to demangle", M); 178 } 179 180 return demangled_cstr; 181 } 182 183 static char *GetDLangDemangledStr(const char *M) { 184 char *demangled_cstr = llvm::dlangDemangle(M); 185 186 if (Log *log = GetLog(LLDBLog::Demangle)) { 187 if (demangled_cstr && demangled_cstr[0]) 188 LLDB_LOG(log, "demangled dlang: {0} -> \"{1}\"", M, demangled_cstr); 189 else 190 LLDB_LOG(log, "demangled dlang: {0} -> error: failed to demangle", M); 191 } 192 193 return demangled_cstr; 194 } 195 196 // Explicit demangling for scheduled requests during batch processing. This 197 // makes use of ItaniumPartialDemangler's rich demangle info 198 bool Mangled::DemangleWithRichManglingInfo( 199 RichManglingContext &context, SkipMangledNameFn *skip_mangled_name) { 200 // Others are not meant to arrive here. ObjC names or C's main() for example 201 // have their names stored in m_demangled, while m_mangled is empty. 202 assert(m_mangled); 203 204 // Check whether or not we are interested in this name at all. 205 ManglingScheme scheme = GetManglingScheme(m_mangled.GetStringRef()); 206 if (skip_mangled_name && skip_mangled_name(m_mangled.GetStringRef(), scheme)) 207 return false; 208 209 switch (scheme) { 210 case eManglingSchemeNone: 211 // The current mangled_name_filter would allow llvm_unreachable here. 212 return false; 213 214 case eManglingSchemeItanium: 215 // We want the rich mangling info here, so we don't care whether or not 216 // there is a demangled string in the pool already. 217 if (context.FromItaniumName(m_mangled)) { 218 // If we got an info, we have a name. Copy to string pool and connect the 219 // counterparts to accelerate later access in GetDemangledName(). 220 m_demangled.SetStringWithMangledCounterpart(context.ParseFullName(), 221 m_mangled); 222 return true; 223 } else { 224 m_demangled.SetCString(""); 225 return false; 226 } 227 228 case eManglingSchemeMSVC: { 229 // We have no rich mangling for MSVC-mangled names yet, so first try to 230 // demangle it if necessary. 231 if (!m_demangled && !m_mangled.GetMangledCounterpart(m_demangled)) { 232 if (char *d = GetMSVCDemangledStr(m_mangled.GetCString())) { 233 // If we got an info, we have a name. Copy to string pool and connect 234 // the counterparts to accelerate later access in GetDemangledName(). 235 m_demangled.SetStringWithMangledCounterpart(llvm::StringRef(d), 236 m_mangled); 237 ::free(d); 238 } else { 239 m_demangled.SetCString(""); 240 } 241 } 242 243 if (m_demangled.IsEmpty()) { 244 // Cannot demangle it, so don't try parsing. 245 return false; 246 } else { 247 // Demangled successfully, we can try and parse it with 248 // CPlusPlusLanguage::MethodName. 249 return context.FromCxxMethodName(m_demangled); 250 } 251 } 252 253 case eManglingSchemeRustV0: 254 case eManglingSchemeD: 255 // Rich demangling scheme is not supported 256 return false; 257 } 258 llvm_unreachable("Fully covered switch above!"); 259 } 260 261 // Generate the demangled name on demand using this accessor. Code in this 262 // class will need to use this accessor if it wishes to decode the demangled 263 // name. The result is cached and will be kept until a new string value is 264 // supplied to this object, or until the end of the object's lifetime. 265 ConstString Mangled::GetDemangledName() const { 266 // Check to make sure we have a valid mangled name and that we haven't 267 // already decoded our mangled name. 268 if (m_mangled && m_demangled.IsNull()) { 269 // Don't bother running anything that isn't mangled 270 const char *mangled_name = m_mangled.GetCString(); 271 ManglingScheme mangling_scheme = 272 GetManglingScheme(m_mangled.GetStringRef()); 273 if (mangling_scheme != eManglingSchemeNone && 274 !m_mangled.GetMangledCounterpart(m_demangled)) { 275 // We didn't already mangle this name, demangle it and if all goes well 276 // add it to our map. 277 char *demangled_name = nullptr; 278 switch (mangling_scheme) { 279 case eManglingSchemeMSVC: 280 demangled_name = GetMSVCDemangledStr(mangled_name); 281 break; 282 case eManglingSchemeItanium: { 283 demangled_name = GetItaniumDemangledStr(mangled_name); 284 break; 285 } 286 case eManglingSchemeRustV0: 287 demangled_name = GetRustV0DemangledStr(mangled_name); 288 break; 289 case eManglingSchemeD: 290 demangled_name = GetDLangDemangledStr(mangled_name); 291 break; 292 case eManglingSchemeNone: 293 llvm_unreachable("eManglingSchemeNone was handled already"); 294 } 295 if (demangled_name) { 296 m_demangled.SetStringWithMangledCounterpart( 297 llvm::StringRef(demangled_name), m_mangled); 298 free(demangled_name); 299 } 300 } 301 if (m_demangled.IsNull()) { 302 // Set the demangled string to the empty string to indicate we tried to 303 // parse it once and failed. 304 m_demangled.SetCString(""); 305 } 306 } 307 308 return m_demangled; 309 } 310 311 ConstString Mangled::GetDisplayDemangledName() const { 312 return GetDemangledName(); 313 } 314 315 bool Mangled::NameMatches(const RegularExpression ®ex) const { 316 if (m_mangled && regex.Execute(m_mangled.GetStringRef())) 317 return true; 318 319 ConstString demangled = GetDemangledName(); 320 return demangled && regex.Execute(demangled.GetStringRef()); 321 } 322 323 // Get the demangled name if there is one, else return the mangled name. 324 ConstString Mangled::GetName(Mangled::NamePreference preference) const { 325 if (preference == ePreferMangled && m_mangled) 326 return m_mangled; 327 328 // Call the accessor to make sure we get a demangled name in case it hasn't 329 // been demangled yet... 330 ConstString demangled = GetDemangledName(); 331 332 if (preference == ePreferDemangledWithoutArguments) { 333 if (Language *lang = Language::FindPlugin(GuessLanguage())) { 334 return lang->GetDemangledFunctionNameWithoutArguments(*this); 335 } 336 } 337 if (preference == ePreferDemangled) { 338 if (demangled) 339 return demangled; 340 return m_mangled; 341 } 342 return demangled; 343 } 344 345 // Dump a Mangled object to stream "s". We don't force our demangled name to be 346 // computed currently (we don't use the accessor). 347 void Mangled::Dump(Stream *s) const { 348 if (m_mangled) { 349 *s << ", mangled = " << m_mangled; 350 } 351 if (m_demangled) { 352 const char *demangled = m_demangled.AsCString(); 353 s->Printf(", demangled = %s", demangled[0] ? demangled : "<error>"); 354 } 355 } 356 357 // Dumps a debug version of this string with extra object and state information 358 // to stream "s". 359 void Mangled::DumpDebug(Stream *s) const { 360 s->Printf("%*p: Mangled mangled = ", static_cast<int>(sizeof(void *) * 2), 361 static_cast<const void *>(this)); 362 m_mangled.DumpDebug(s); 363 s->Printf(", demangled = "); 364 m_demangled.DumpDebug(s); 365 } 366 367 // Return the size in byte that this object takes in memory. The size includes 368 // the size of the objects it owns, and not the strings that it references 369 // because they are shared strings. 370 size_t Mangled::MemorySize() const { 371 return m_mangled.MemorySize() + m_demangled.MemorySize(); 372 } 373 374 // We "guess" the language because we can't determine a symbol's language from 375 // it's name. For example, a Pascal symbol can be mangled using the C++ 376 // Itanium scheme, and defined in a compilation unit within the same module as 377 // other C++ units. In addition, different targets could have different ways 378 // of mangling names from a given language, likewise the compilation units 379 // within those targets. 380 lldb::LanguageType Mangled::GuessLanguage() const { 381 lldb::LanguageType result = lldb::eLanguageTypeUnknown; 382 // Ask each language plugin to check if the mangled name belongs to it. 383 Language::ForEach([this, &result](Language *l) { 384 if (l->SymbolNameFitsToLanguage(*this)) { 385 result = l->GetLanguageType(); 386 return false; 387 } 388 return true; 389 }); 390 return result; 391 } 392 393 // Dump OBJ to the supplied stream S. 394 Stream &operator<<(Stream &s, const Mangled &obj) { 395 if (obj.GetMangledName()) 396 s << "mangled = '" << obj.GetMangledName() << "'"; 397 398 ConstString demangled = obj.GetDemangledName(); 399 if (demangled) 400 s << ", demangled = '" << demangled << '\''; 401 else 402 s << ", demangled = <error>"; 403 return s; 404 } 405 406 // When encoding Mangled objects we can get away with encoding as little 407 // information as is required. The enumeration below helps us to efficiently 408 // encode Mangled objects. 409 enum MangledEncoding { 410 /// If the Mangled object has neither a mangled name or demangled name we can 411 /// encode the object with one zero byte using the Empty enumeration. 412 Empty = 0u, 413 /// If the Mangled object has only a demangled name and no mangled named, we 414 /// can encode only the demangled name. 415 DemangledOnly = 1u, 416 /// If the mangle name can calculate the demangled name (it is the 417 /// mangled/demangled counterpart), then we only need to encode the mangled 418 /// name as the demangled name can be recomputed. 419 MangledOnly = 2u, 420 /// If we have a Mangled object with two different names that are not related 421 /// then we need to save both strings. This can happen if we have a name that 422 /// isn't a true mangled name, but we want to be able to lookup a symbol by 423 /// name and type in the symbol table. We do this for Objective C symbols like 424 /// "OBJC_CLASS_$_NSValue" where the mangled named will be set to 425 /// "OBJC_CLASS_$_NSValue" and the demangled name will be manually set to 426 /// "NSValue". If we tried to demangled the name "OBJC_CLASS_$_NSValue" it 427 /// would fail, but in these cases we want these unrelated names to be 428 /// preserved. 429 MangledAndDemangled = 3u 430 }; 431 432 bool Mangled::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr, 433 const StringTableReader &strtab) { 434 m_mangled.Clear(); 435 m_demangled.Clear(); 436 MangledEncoding encoding = (MangledEncoding)data.GetU8(offset_ptr); 437 switch (encoding) { 438 case Empty: 439 return true; 440 441 case DemangledOnly: 442 m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr))); 443 return true; 444 445 case MangledOnly: 446 m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr))); 447 return true; 448 449 case MangledAndDemangled: 450 m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr))); 451 m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr))); 452 return true; 453 } 454 return false; 455 } 456 /// The encoding format for the Mangled object is as follows: 457 /// 458 /// uint8_t encoding; 459 /// char str1[]; (only if DemangledOnly, MangledOnly) 460 /// char str2[]; (only if MangledAndDemangled) 461 /// 462 /// The strings are stored as NULL terminated UTF8 strings and str1 and str2 463 /// are only saved if we need them based on the encoding. 464 /// 465 /// Some mangled names have a mangled name that can be demangled by the built 466 /// in demanglers. These kinds of mangled objects know when the mangled and 467 /// demangled names are the counterparts for each other. This is done because 468 /// demangling is very expensive and avoiding demangling the same name twice 469 /// saves us a lot of compute time. For these kinds of names we only need to 470 /// save the mangled name and have the encoding set to "MangledOnly". 471 /// 472 /// If a mangled obejct has only a demangled name, then we save only that string 473 /// and have the encoding set to "DemangledOnly". 474 /// 475 /// Some mangled objects have both mangled and demangled names, but the 476 /// demangled name can not be computed from the mangled name. This is often used 477 /// for runtime named, like Objective C runtime V2 and V3 names. Both these 478 /// names must be saved and the encoding is set to "MangledAndDemangled". 479 /// 480 /// For a Mangled object with no names, we only need to set the encoding to 481 /// "Empty" and not store any string values. 482 void Mangled::Encode(DataEncoder &file, ConstStringTable &strtab) const { 483 MangledEncoding encoding = Empty; 484 if (m_mangled) { 485 encoding = MangledOnly; 486 if (m_demangled) { 487 // We have both mangled and demangled names. If the demangled name is the 488 // counterpart of the mangled name, then we only need to save the mangled 489 // named. If they are different, we need to save both. 490 ConstString s; 491 if (!(m_mangled.GetMangledCounterpart(s) && s == m_demangled)) 492 encoding = MangledAndDemangled; 493 } 494 } else if (m_demangled) { 495 encoding = DemangledOnly; 496 } 497 file.AppendU8(encoding); 498 switch (encoding) { 499 case Empty: 500 break; 501 case DemangledOnly: 502 file.AppendU32(strtab.Add(m_demangled)); 503 break; 504 case MangledOnly: 505 file.AppendU32(strtab.Add(m_mangled)); 506 break; 507 case MangledAndDemangled: 508 file.AppendU32(strtab.Add(m_mangled)); 509 file.AppendU32(strtab.Add(m_demangled)); 510 break; 511 } 512 } 513