1 //===-- Mangled.cpp ---------------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include <cxxabi.h>
11 
12 #include "llvm/ADT/DenseMap.h"
13 
14 #include "lldb/Core/ConstString.h"
15 #include "lldb/Core/Mangled.h"
16 #include "lldb/Core/RegularExpression.h"
17 #include "lldb/Core/Stream.h"
18 #include "lldb/Core/Timer.h"
19 #include <ctype.h>
20 #include <string.h>
21 
22 using namespace lldb_private;
23 
24 #pragma mark Mangled
25 //----------------------------------------------------------------------
26 // Default constructor
27 //----------------------------------------------------------------------
28 Mangled::Mangled () :
29     m_mangled(),
30     m_demangled()
31 {
32 }
33 
34 //----------------------------------------------------------------------
35 // Constructor with an optional string and a boolean indicating if it is
36 // the mangled version.
37 //----------------------------------------------------------------------
38 Mangled::Mangled (const char *s, bool mangled) :
39     m_mangled(),
40     m_demangled()
41 {
42     if (s && s[0])
43     {
44         SetValue(s, mangled);
45     }
46 }
47 
48 //----------------------------------------------------------------------
49 // Destructor
50 //----------------------------------------------------------------------
51 Mangled::~Mangled ()
52 {
53 }
54 
55 //----------------------------------------------------------------------
56 // Convert to pointer operator. This allows code to check any Mangled
57 // objects to see if they contain anything valid using code such as:
58 //
59 //  Mangled mangled(...);
60 //  if (mangled)
61 //  { ...
62 //----------------------------------------------------------------------
63 Mangled::operator void* () const
64 {
65     return (m_mangled) ? const_cast<Mangled*>(this) : NULL;
66 }
67 
68 //----------------------------------------------------------------------
69 // Logical NOT operator. This allows code to check any Mangled
70 // objects to see if they are invalid using code such as:
71 //
72 //  Mangled mangled(...);
73 //  if (!file_spec)
74 //  { ...
75 //----------------------------------------------------------------------
76 bool
77 Mangled::operator! () const
78 {
79     return !m_mangled;
80 }
81 
82 //----------------------------------------------------------------------
83 // Clear the mangled and demangled values.
84 //----------------------------------------------------------------------
85 void
86 Mangled::Clear ()
87 {
88     m_mangled.Clear();
89     m_demangled.Clear();
90 }
91 
92 
93 //----------------------------------------------------------------------
94 // Compare the the string values.
95 //----------------------------------------------------------------------
96 int
97 Mangled::Compare (const Mangled& a, const Mangled& b)
98 {
99     return ConstString::Compare(a.GetName(ePreferMangled), a.GetName(ePreferMangled));
100 }
101 
102 
103 
104 //----------------------------------------------------------------------
105 // Set the string value in this objects. If "mangled" is true, then
106 // the mangled named is set with the new value in "s", else the
107 // demangled name is set.
108 //----------------------------------------------------------------------
109 void
110 Mangled::SetValue (const char *s, bool mangled)
111 {
112     if (s)
113     {
114         if (mangled)
115         {
116             m_demangled.Clear();
117             m_mangled.SetCString (s);
118         }
119         else
120         {
121             m_demangled.SetCString(s);
122             m_mangled.Clear();
123         }
124     }
125     else
126     {
127         m_demangled.Clear();
128         m_mangled.Clear();
129     }
130 }
131 
132 //----------------------------------------------------------------------
133 // Generate the demangled name on demand using this accessor. Code in
134 // this class will need to use this accessor if it wishes to decode
135 // the demangled name. The result is cached and will be kept until a
136 // new string value is supplied to this object, or until the end of the
137 // object's lifetime.
138 //----------------------------------------------------------------------
139 const ConstString&
140 Mangled::GetDemangledName () const
141 {
142     // Check to make sure we have a valid mangled name and that we
143     // haven't already decoded our mangled name.
144     if (m_mangled && !m_demangled)
145     {
146         // We need to generate and cache the demangled name.
147         Timer scoped_timer (__PRETTY_FUNCTION__,
148                             "Mangled::GetDemangledName (m_mangled = %s)",
149                             m_mangled.GetCString());
150 
151         // We already know mangled is valid from the above check,
152         // lets just make sure it isn't empty...
153         const char * mangled = m_mangled.AsCString();
154         // Don't bother running anything that doesn't start with _Z through the demangler
155         if (mangled[0] != '\0' && mangled[0] == '_' && mangled[1] == 'Z')
156         {
157             // Since demangling can be a costly, and since all names that go
158             // into a ConstString (like our m_mangled and m_demangled members)
159             // end up being unique "const char *" values, we can use a DenseMap
160             // to speed up our lookup. We do this because often our symbol table
161             // and our debug information both have the mangled names which they
162             // would each need to demangle. Also, with GCC we end up with the one
163             // definition rule where a lot of STL code produces symbols that are
164             // in multiple compile units and the mangled names end up being in
165             // the same binary multiple times. The performance win isn't huge,
166             // but we showed a 20% improvement on darwin.
167             typedef llvm::DenseMap<const char *, const char *> MangledToDemangledMap;
168             static MangledToDemangledMap g_mangled_to_demangled;
169 
170             // Check our mangled string pointer to demangled string pointer map first
171             MangledToDemangledMap::const_iterator pos = g_mangled_to_demangled.find (mangled);
172             if (pos != g_mangled_to_demangled.end())
173             {
174                 // We have already demangled this string, we can just use our saved result!
175                 m_demangled.SetCString(pos->second);
176             }
177             else
178             {
179                 // We didn't already mangle this name, demangle it and if all goes well
180                 // add it to our map.
181                 char *demangled_name = abi::__cxa_demangle (mangled, NULL, NULL, NULL);
182 
183                 if (demangled_name)
184                 {
185                     m_demangled.SetCString (demangled_name);
186                     // Now that the name has been uniqued, add the uniqued C string
187                     // pointer from m_mangled as the key to the uniqued C string
188                     // pointer in m_demangled.
189                     g_mangled_to_demangled.insert (std::make_pair (mangled, m_demangled.GetCString()));
190                     free (demangled_name);
191                 }
192             }
193         }
194         if (!m_demangled)
195         {
196             // Set the demangled string to the empty string to indicate we
197             // tried to parse it once and failed.
198             m_demangled.SetCString("");
199         }
200     }
201 
202     return m_demangled;
203 }
204 
205 
206 bool
207 Mangled::NameMatches (const RegularExpression& regex) const
208 {
209     if (m_mangled && regex.Execute (m_mangled.AsCString()))
210         return true;
211 
212     if (GetDemangledName() && regex.Execute (m_demangled.AsCString()))
213         return true;
214     return false;
215 }
216 
217 
218 //----------------------------------------------------------------------
219 // Mangled name get accessor
220 //----------------------------------------------------------------------
221 ConstString&
222 Mangled::GetMangledName ()
223 {
224     return m_mangled;
225 }
226 
227 //----------------------------------------------------------------------
228 // Mangled name const get accessor
229 //----------------------------------------------------------------------
230 const ConstString&
231 Mangled::GetMangledName () const
232 {
233     return m_mangled;
234 }
235 
236 //----------------------------------------------------------------------
237 // Get the demangled name if there is one, else return the mangled name.
238 //----------------------------------------------------------------------
239 const ConstString&
240 Mangled::GetName (Mangled::NamePreference preference) const
241 {
242     if (preference == ePreferDemangled)
243     {
244         // Call the accessor to make sure we get a demangled name in case
245         // it hasn't been demangled yet...
246         if (GetDemangledName())
247             return m_demangled;
248         return m_mangled;
249     }
250     else
251     {
252         if (m_mangled)
253             return m_mangled;
254         return GetDemangledName();
255     }
256 }
257 
258 //----------------------------------------------------------------------
259 // Generate the tokens from the demangled name.
260 //
261 // Returns the number of tokens that were parsed.
262 //----------------------------------------------------------------------
263 size_t
264 Mangled::GetTokens (Mangled::TokenList &tokens) const
265 {
266     tokens.Clear();
267     const ConstString& demangled = GetDemangledName();
268     if (demangled && !demangled.IsEmpty())
269         tokens.Parse(demangled.AsCString());
270 
271     return tokens.Size();
272 }
273 
274 //----------------------------------------------------------------------
275 // Dump a Mangled object to stream "s". We don't force our
276 // demangled name to be computed currently (we don't use the accessor).
277 //----------------------------------------------------------------------
278 void
279 Mangled::Dump (Stream *s) const
280 {
281     if (m_mangled)
282     {
283         *s << ", mangled = " << m_mangled;
284     }
285     if (m_demangled)
286     {
287         const char * demangled = m_demangled.AsCString();
288         s->Printf(", demangled = %s", demangled[0] ? demangled : "<error>");
289     }
290 }
291 
292 //----------------------------------------------------------------------
293 // Dumps a debug version of this string with extra object and state
294 // information to stream "s".
295 //----------------------------------------------------------------------
296 void
297 Mangled::DumpDebug (Stream *s) const
298 {
299     s->Printf("%*p: Mangled mangled = ", (int)sizeof(void*) * 2, this);
300     m_mangled.DumpDebug(s);
301     s->Printf(", demangled = ");
302     m_demangled.DumpDebug(s);
303 }
304 
305 //----------------------------------------------------------------------
306 // Return the size in byte that this object takes in memory. The size
307 // includes the size of the objects it owns, and not the strings that
308 // it references because they are shared strings.
309 //----------------------------------------------------------------------
310 size_t
311 Mangled::MemorySize () const
312 {
313     return m_mangled.MemorySize() + m_demangled.MemorySize();
314 }
315 
316 //----------------------------------------------------------------------
317 // Dump OBJ to the supplied stream S.
318 //----------------------------------------------------------------------
319 Stream&
320 operator << (Stream& s, const Mangled& obj)
321 {
322     if (obj.GetMangledName())
323         s << "mangled = '" << obj.GetMangledName() << "'";
324 
325     const ConstString& demangled = obj.GetDemangledName();
326     if (demangled)
327         s << ", demangled = '" << demangled << '\'';
328     else
329         s << ", demangled = <error>";
330     return s;
331 }
332 
333 
334 
335 
336 #pragma mark Mangled::Token
337 
338 //--------------------------------------------------------------
339 // Default constructor
340 //--------------------------------------------------------------
341 Mangled::Token::Token () :
342     type(eInvalid),
343     value()
344 {
345 }
346 
347 //--------------------------------------------------------------
348 // Equal to operator
349 //--------------------------------------------------------------
350 bool
351 Mangled::Token::operator== (const Token& rhs) const
352 {
353     return type == rhs.type && value == rhs.value;
354 }
355 
356 //--------------------------------------------------------------
357 // Dump the token to a stream "s"
358 //--------------------------------------------------------------
359 void
360 Mangled::Token::Dump (Stream *s) const
361 {
362     switch (type)
363     {
364     case eInvalid:      s->PutCString("invalid    "); break;
365     case eNameSpace:    s->PutCString("namespace  "); break;
366     case eMethodName:   s->PutCString("method     "); break;
367     case eType:         s->PutCString("type       "); break;
368     case eTemplate:     s->PutCString("template   "); break;
369     case eTemplateBeg:  s->PutCString("template < "); break;
370     case eTemplateEnd:  s->PutCString("template > "); break;
371     case eParamsBeg:    s->PutCString("params   ( "); break;
372     case eParamsEnd:    s->PutCString("params   ) "); break;
373     case eQualifier:    s->PutCString("qualifier  "); break;
374     case eError:        s->PutCString("ERROR      "); break;
375     default:
376         s->Printf("type = %i", type);
377         break;
378     }
379     value.DumpDebug(s);
380 }
381 
382 //--------------------------------------------------------------
383 // Returns true if this token is a wildcard
384 //--------------------------------------------------------------
385 bool
386 Mangled::Token::IsWildcard () const
387 {
388     static ConstString g_wildcard_str("*");
389     return value == g_wildcard_str;
390 }
391 
392 
393 //----------------------------------------------------------------------
394 // Dump "obj" to the supplied stream "s"
395 //----------------------------------------------------------------------
396 Stream&
397 lldb_private::operator << (Stream& s, const Mangled::Token& obj)
398 {
399     obj.Dump(&s);
400     return s;
401 }
402 
403 
404 #pragma mark Mangled::TokenList
405 //----------------------------------------------------------------------
406 // Mangled::TokenList
407 //----------------------------------------------------------------------
408 
409 //--------------------------------------------------------------
410 // Default constructor. If demangled is non-NULL and not-empty
411 // the token list will parse up the demangled string it is
412 // given, else the object will initialize an empty token list.
413 //--------------------------------------------------------------
414 Mangled::TokenList::TokenList (const char *demangled) :
415     m_tokens()
416 {
417     if (demangled && demangled[0])
418     {
419         Parse(demangled);
420     }
421 }
422 
423 //----------------------------------------------------------------------
424 // Destructor
425 //----------------------------------------------------------------------
426 Mangled::TokenList::~TokenList ()
427 {
428 }
429 
430 //----------------------------------------------------------------------
431 // Parses "demangled" into tokens. This allows complex
432 // comparisons to be done. Comparisons can include wildcards at
433 // the namespace, method name, template, and template and
434 // parameter type levels.
435 //
436 // Example queries include:
437 // "std::basic_string<*>"   // Find all std::basic_string variants
438 // "std::basic_string<*>::erase(*)" // Find all std::basic_string::erase variants with any number of parameters
439 // "*::clear()"             // Find all functions with a method name of
440 //                          // "clear" that are in any namespace that
441 //                          // have no parameters
442 // "::printf"               // Find the printf function in the global namespace
443 // "printf"                 // Ditto
444 // "foo::*(int)"            // Find all functions in the class or namespace "foo" that take a single integer argument
445 //
446 // Returns the number of tokens that were decoded, or zero when
447 // we fail.
448 //----------------------------------------------------------------------
449 size_t
450 Mangled::TokenList::Parse (const char *s)
451 {
452     m_tokens.clear();
453 
454     Token token;
455     token.type = eNameSpace;
456 
457     TokenType max_type = eInvalid;
458     const char *p = s;
459     size_t span = 0;
460     size_t sep_size = 0;
461 
462     while (*p != '\0')
463     {
464         p = p + span + sep_size;
465         while (isspace(*p))
466             ++p;
467 
468         if (*p == '\0')
469             break;
470 
471         span = strcspn(p, ":<>(),");
472         sep_size = 1;
473         token.type = eInvalid;
474         switch (p[span])
475         {
476         case '\0':
477             break;
478 
479         case ':':
480             if (p[span+1] == ':')
481             {
482                 sep_size = 2;
483                 if (span > 0)
484                 {
485                     token.type = eNameSpace;
486                     token.value.SetCStringWithLength (p, span);
487                     m_tokens.push_back(token);
488                 }
489                 else
490                     continue;
491             }
492             break;
493 
494         case '(':
495             if (span > 0)
496             {
497                 token.type = eMethodName;
498                 token.value.SetCStringWithLength (p, span);
499                 m_tokens.push_back(token);
500             }
501 
502             token.type = eParamsBeg;
503             token.value.Clear();
504             m_tokens.push_back(token);
505             break;
506 
507         case ',':
508             if (span > 0)
509             {
510                 token.type = eType;
511                 token.value.SetCStringWithLength (p, span);
512                 m_tokens.push_back(token);
513             }
514             else
515             {
516                 continue;
517             }
518             break;
519 
520         case ')':
521             if (span > 0)
522             {
523                 token.type = eType;
524                 token.value.SetCStringWithLength (p, span);
525                 m_tokens.push_back(token);
526             }
527 
528             token.type = eParamsEnd;
529             token.value.Clear();
530             m_tokens.push_back(token);
531             break;
532 
533         case '<':
534             if (span > 0)
535             {
536                 token.type = eTemplate;
537                 token.value.SetCStringWithLength (p, span);
538                 m_tokens.push_back(token);
539             }
540 
541             token.type = eTemplateBeg;
542             token.value.Clear();
543             m_tokens.push_back(token);
544             break;
545 
546         case '>':
547             if (span > 0)
548             {
549                 token.type = eType;
550                 token.value.SetCStringWithLength (p, span);
551                 m_tokens.push_back(token);
552             }
553 
554             token.type = eTemplateEnd;
555             token.value.Clear();
556             m_tokens.push_back(token);
557             break;
558         }
559 
560         if (max_type < token.type)
561             max_type = token.type;
562 
563         if (token.type == eInvalid)
564         {
565             if (max_type >= eParamsEnd)
566             {
567                 token.type = eQualifier;
568                 token.value.SetCString(p);
569                 m_tokens.push_back(token);
570             }
571             else if (max_type >= eParamsBeg)
572             {
573                 token.type = eType;
574                 token.value.SetCString(p);
575                 m_tokens.push_back(token);
576             }
577             else
578             {
579                 token.type = eMethodName;
580                 token.value.SetCString(p);
581                 m_tokens.push_back(token);
582             }
583             break;
584         }
585     }
586     return m_tokens.size();
587 }
588 
589 
590 //----------------------------------------------------------------------
591 // Clear the token list.
592 //----------------------------------------------------------------------
593 void
594 Mangled::TokenList::Clear ()
595 {
596     m_tokens.clear();
597 }
598 
599 //----------------------------------------------------------------------
600 // Dump the token list to the stream "s"
601 //----------------------------------------------------------------------
602 void
603 Mangled::TokenList::Dump (Stream *s) const
604 {
605     collection::const_iterator pos;
606     collection::const_iterator beg = m_tokens.begin();
607     collection::const_iterator end = m_tokens.end();
608     for (pos = beg; pos != end; ++pos)
609     {
610         s->Indent("token[");
611         *s << (uint32_t)std::distance(beg, pos) << "] = " << *pos << "\n";
612     }
613 }
614 
615 //----------------------------------------------------------------------
616 // Find the first token in the list that has "token_type" as its
617 // type
618 //----------------------------------------------------------------------
619 const Mangled::Token *
620 Mangled::TokenList::Find (TokenType token_type) const
621 {
622     collection::const_iterator pos;
623     collection::const_iterator beg = m_tokens.begin();
624     collection::const_iterator end = m_tokens.end();
625     for (pos = beg; pos != end; ++pos)
626     {
627         if (pos->type == token_type)
628             return &(*pos);
629     }
630     return NULL;
631 }
632 
633 //----------------------------------------------------------------------
634 // Return the token at index "idx", or NULL if the index is
635 // out of range.
636 //----------------------------------------------------------------------
637 const Mangled::Token *
638 Mangled::TokenList::GetTokenAtIndex (uint32_t idx) const
639 {
640     if (idx < m_tokens.size())
641         return &m_tokens[idx];
642     return NULL;
643 }
644 
645 
646 //----------------------------------------------------------------------
647 // Given a token list, see if it matches this object's tokens.
648 // "token_list" can contain wild card values to enable powerful
649 // matching. Matching the std::string::erase(*) example that was
650 // tokenized above we could use a token list such as:
651 //
652 //      token           name
653 //      -----------     ----------------------------------------
654 //      eNameSpace      "std"
655 //      eTemplate       "basic_string"
656 //      eTemplateBeg
657 //      eInvalid        "*"
658 //      eTemplateEnd
659 //      eMethodName     "erase"
660 //      eParamsBeg
661 //      eInvalid        "*"
662 //      eParamsEnd
663 //
664 // Returns true if it "token_list" matches this object's tokens,
665 // false otherwise.
666 //----------------------------------------------------------------------
667 bool
668 Mangled::TokenList::MatchesQuery (const Mangled::TokenList &match) const
669 {
670     size_t match_count = 0;
671     collection::const_iterator pos;
672     collection::const_iterator pos_end = m_tokens.end();
673 
674     collection::const_iterator match_pos;
675     collection::const_iterator match_pos_end = match.m_tokens.end();
676     collection::const_iterator match_wildcard_pos = match_pos_end;
677     collection::const_iterator match_next_pos = match_pos_end;
678 
679     size_t template_scope_depth = 0;
680 
681     for (pos = m_tokens.begin(), match_pos = match.m_tokens.begin();
682          pos != pos_end && match_pos != match_pos_end;
683          ++match_pos)
684     {
685         match_next_pos = match_pos + 1;
686         // Is this a wildcard?
687         if (match_pos->IsWildcard())
688         {
689             if (match_wildcard_pos != match_pos_end)
690                 return false;   // Can't have two wildcards in effect at once.
691 
692             match_wildcard_pos = match_pos;
693             // Are we at the end of the MATCH token list?
694             if (match_next_pos == match_pos_end)
695             {
696                 // There is nothing more to match, return if we have any matches so far...
697                 return match_count > 0;
698             }
699         }
700 
701         if (match_pos->type == eInvalid || match_pos->type == eError)
702         {
703             return false;
704         }
705         else
706         {
707             if (match_pos->type == eTemplateBeg)
708             {
709                 ++template_scope_depth;
710             }
711             else if (match_pos->type == eTemplateEnd)
712             {
713                 assert(template_scope_depth > 0);
714                 --template_scope_depth;
715             }
716 
717             // Do we have a wildcard going right now?
718             if (match_wildcard_pos == match_pos_end)
719             {
720                 // No wildcard matching right now, just check and see if things match
721                 if (*pos == *match_pos)
722                     ++match_count;
723                 else
724                     return false;
725             }
726             else
727             {
728                 // We have a wildcard match going
729 
730                 // For template types we need to make sure to match the template depths...
731                 const size_t start_wildcard_template_scope_depth = template_scope_depth;
732                 size_t curr_wildcard_template_scope_depth = template_scope_depth;
733                 while (pos != pos_end)
734                 {
735                     if (match_wildcard_pos->type == eNameSpace && pos->type == eParamsBeg)
736                         return false;
737 
738                     if (start_wildcard_template_scope_depth == curr_wildcard_template_scope_depth)
739                     {
740                         if (*pos == *match_next_pos)
741                         {
742                             ++match_count;
743                             match_pos = match_next_pos;
744                             match_wildcard_pos = match_pos_end;
745                             break;
746                         }
747                     }
748                     if (pos->type == eTemplateBeg)
749                         ++curr_wildcard_template_scope_depth;
750                     else if (pos->type == eTemplateEnd)
751                         --curr_wildcard_template_scope_depth;
752 
753 
754                     ++pos;
755                 }
756             }
757         }
758 
759         if (pos != pos_end)
760             ++pos;
761     }
762     if (match_pos != match_pos_end)
763         return false;
764 
765     return match_count > 0;
766 }
767 
768 
769 //----------------------------------------------------------------------
770 // Return the number of tokens in the token collection
771 //----------------------------------------------------------------------
772 size_t
773 Mangled::TokenList::Size () const
774 {
775     return m_tokens.size();
776 }
777 
778 
779 //----------------------------------------------------------------------
780 // Stream out the tokens
781 //----------------------------------------------------------------------
782 Stream&
783 lldb_private::operator << (Stream& s, const Mangled::TokenList& obj)
784 {
785     obj.Dump(&s);
786     return s;
787 }
788