1 //===-- Mangled.cpp ---------------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include <cxxabi.h>
11 
12 #include "llvm/ADT/DenseMap.h"
13 
14 #include "lldb/Core/ConstString.h"
15 #include "lldb/Core/Mangled.h"
16 #include "lldb/Core/RegularExpression.h"
17 #include "lldb/Core/Stream.h"
18 #include "lldb/Core/Timer.h"
19 #include <ctype.h>
20 #include <string.h>
21 
22 using namespace lldb_private;
23 
24 #pragma mark Mangled
25 //----------------------------------------------------------------------
26 // Default constructor
27 //----------------------------------------------------------------------
28 Mangled::Mangled () :
29     m_mangled(),
30     m_demangled()
31 {
32 }
33 
34 //----------------------------------------------------------------------
35 // Constructor with an optional string and a boolean indicating if it is
36 // the mangled version.
37 //----------------------------------------------------------------------
38 Mangled::Mangled (const char *s, bool mangled) :
39     m_mangled(),
40     m_demangled()
41 {
42     if (s && s[0])
43     {
44         SetValue(s, mangled);
45     }
46 }
47 
48 //----------------------------------------------------------------------
49 // Destructor
50 //----------------------------------------------------------------------
51 Mangled::~Mangled ()
52 {
53 }
54 
55 //----------------------------------------------------------------------
56 // Convert to pointer operator. This allows code to check any Mangled
57 // objects to see if they contain anything valid using code such as:
58 //
59 //  Mangled mangled(...);
60 //  if (mangled)
61 //  { ...
62 //----------------------------------------------------------------------
63 Mangled::operator void* () const
64 {
65     return (m_mangled) ? const_cast<Mangled*>(this) : NULL;
66 }
67 
68 //----------------------------------------------------------------------
69 // Logical NOT operator. This allows code to check any Mangled
70 // objects to see if they are invalid using code such as:
71 //
72 //  Mangled mangled(...);
73 //  if (!file_spec)
74 //  { ...
75 //----------------------------------------------------------------------
76 bool
77 Mangled::operator! () const
78 {
79     return !m_mangled;
80 }
81 
82 //----------------------------------------------------------------------
83 // Clear the mangled and demangled values.
84 //----------------------------------------------------------------------
85 void
86 Mangled::Clear ()
87 {
88     m_mangled.Clear();
89     m_demangled.Clear();
90 }
91 
92 
93 //----------------------------------------------------------------------
94 // Compare the the string values.
95 //----------------------------------------------------------------------
96 int
97 Mangled::Compare (const Mangled& a, const Mangled& b)
98 {
99     return ConstString::Compare(a.GetName(ePreferMangled), a.GetName(ePreferMangled));
100 }
101 
102 
103 
104 //----------------------------------------------------------------------
105 // Set the string value in this objects. If "mangled" is true, then
106 // the mangled named is set with the new value in "s", else the
107 // demangled name is set.
108 //----------------------------------------------------------------------
109 void
110 Mangled::SetValue (const char *s, bool mangled)
111 {
112     m_mangled.Clear();
113     m_demangled.Clear();
114 
115     if (s)
116     {
117         if (mangled)
118             m_mangled.SetCString (s);
119         else
120             m_demangled.SetCString(s);
121     }
122 }
123 
124 //----------------------------------------------------------------------
125 // Generate the demangled name on demand using this accessor. Code in
126 // this class will need to use this accessor if it wishes to decode
127 // the demangled name. The result is cached and will be kept until a
128 // new string value is supplied to this object, or until the end of the
129 // object's lifetime.
130 //----------------------------------------------------------------------
131 const ConstString&
132 Mangled::GetDemangledName () const
133 {
134     // Check to make sure we have a valid mangled name and that we
135     // haven't already decoded our mangled name.
136     if (m_mangled && !m_demangled)
137     {
138         // We need to generate and cache the demangled name.
139         Timer scoped_timer (__PRETTY_FUNCTION__,
140                             "Mangled::GetDemangledName (m_mangled = %s)",
141                             m_mangled.GetCString());
142 
143         // We already know mangled is valid from the above check,
144         // lets just make sure it isn't empty...
145         const char * mangled = m_mangled.AsCString();
146         // Don't bother running anything that doesn't start with _Z through the demangler
147         if (mangled[0] != '\0' && mangled[0] == '_' && mangled[1] == 'Z')
148         {
149             // Since demangling can be a costly, and since all names that go
150             // into a ConstString (like our m_mangled and m_demangled members)
151             // end up being unique "const char *" values, we can use a DenseMap
152             // to speed up our lookup. We do this because often our symbol table
153             // and our debug information both have the mangled names which they
154             // would each need to demangle. Also, with GCC we end up with the one
155             // definition rule where a lot of STL code produces symbols that are
156             // in multiple compile units and the mangled names end up being in
157             // the same binary multiple times. The performance win isn't huge,
158             // but we showed a 20% improvement on darwin.
159             typedef llvm::DenseMap<const char *, const char *> MangledToDemangledMap;
160             static MangledToDemangledMap g_mangled_to_demangled;
161 
162             // Check our mangled string pointer to demangled string pointer map first
163             MangledToDemangledMap::const_iterator pos = g_mangled_to_demangled.find (mangled);
164             if (pos != g_mangled_to_demangled.end())
165             {
166                 // We have already demangled this string, we can just use our saved result!
167                 m_demangled.SetCString(pos->second);
168             }
169             else
170             {
171                 // We didn't already mangle this name, demangle it and if all goes well
172                 // add it to our map.
173                 char *demangled_name = abi::__cxa_demangle (mangled, NULL, NULL, NULL);
174 
175                 if (demangled_name)
176                 {
177                     m_demangled.SetCString (demangled_name);
178                     // Now that the name has been uniqued, add the uniqued C string
179                     // pointer from m_mangled as the key to the uniqued C string
180                     // pointer in m_demangled.
181                     g_mangled_to_demangled.insert (std::make_pair (mangled, m_demangled.GetCString()));
182                     free (demangled_name);
183                 }
184             }
185         }
186         if (!m_demangled)
187         {
188             // Set the demangled string to the empty string to indicate we
189             // tried to parse it once and failed.
190             m_demangled.SetCString("");
191         }
192     }
193 
194     return m_demangled;
195 }
196 
197 
198 bool
199 Mangled::NameMatches (const RegularExpression& regex) const
200 {
201     if (m_mangled && regex.Execute (m_mangled.AsCString()))
202         return true;
203 
204     if (GetDemangledName() && regex.Execute (m_demangled.AsCString()))
205         return true;
206     return false;
207 }
208 
209 
210 //----------------------------------------------------------------------
211 // Mangled name get accessor
212 //----------------------------------------------------------------------
213 ConstString&
214 Mangled::GetMangledName ()
215 {
216     return m_mangled;
217 }
218 
219 //----------------------------------------------------------------------
220 // Mangled name const get accessor
221 //----------------------------------------------------------------------
222 const ConstString&
223 Mangled::GetMangledName () const
224 {
225     return m_mangled;
226 }
227 
228 //----------------------------------------------------------------------
229 // Get the demangled name if there is one, else return the mangled name.
230 //----------------------------------------------------------------------
231 const ConstString&
232 Mangled::GetName (Mangled::NamePreference preference) const
233 {
234     if (preference == ePreferDemangled)
235     {
236         // Call the accessor to make sure we get a demangled name in case
237         // it hasn't been demangled yet...
238         if (GetDemangledName())
239             return m_demangled;
240         return m_mangled;
241     }
242     else
243     {
244         if (m_mangled)
245             return m_mangled;
246         return GetDemangledName();
247     }
248 }
249 
250 //----------------------------------------------------------------------
251 // Generate the tokens from the demangled name.
252 //
253 // Returns the number of tokens that were parsed.
254 //----------------------------------------------------------------------
255 size_t
256 Mangled::GetTokens (Mangled::TokenList &tokens) const
257 {
258     tokens.Clear();
259     const ConstString& demangled = GetDemangledName();
260     if (demangled && !demangled.IsEmpty())
261         tokens.Parse(demangled.AsCString());
262 
263     return tokens.Size();
264 }
265 
266 //----------------------------------------------------------------------
267 // Dump a Mangled object to stream "s". We don't force our
268 // demangled name to be computed currently (we don't use the accessor).
269 //----------------------------------------------------------------------
270 void
271 Mangled::Dump (Stream *s) const
272 {
273     if (m_mangled)
274     {
275         *s << ", mangled = " << m_mangled;
276     }
277     if (m_demangled)
278     {
279         const char * demangled = m_demangled.AsCString();
280         s->Printf(", demangled = %s", demangled[0] ? demangled : "<error>");
281     }
282 }
283 
284 //----------------------------------------------------------------------
285 // Dumps a debug version of this string with extra object and state
286 // information to stream "s".
287 //----------------------------------------------------------------------
288 void
289 Mangled::DumpDebug (Stream *s) const
290 {
291     s->Printf("%*p: Mangled mangled = ", (int)sizeof(void*) * 2, this);
292     m_mangled.DumpDebug(s);
293     s->Printf(", demangled = ");
294     m_demangled.DumpDebug(s);
295 }
296 
297 //----------------------------------------------------------------------
298 // Return the size in byte that this object takes in memory. The size
299 // includes the size of the objects it owns, and not the strings that
300 // it references because they are shared strings.
301 //----------------------------------------------------------------------
302 size_t
303 Mangled::MemorySize () const
304 {
305     return m_mangled.MemorySize() + m_demangled.MemorySize();
306 }
307 
308 //----------------------------------------------------------------------
309 // Dump OBJ to the supplied stream S.
310 //----------------------------------------------------------------------
311 Stream&
312 operator << (Stream& s, const Mangled& obj)
313 {
314     if (obj.GetMangledName())
315         s << "mangled = '" << obj.GetMangledName() << "'";
316 
317     const ConstString& demangled = obj.GetDemangledName();
318     if (demangled)
319         s << ", demangled = '" << demangled << '\'';
320     else
321         s << ", demangled = <error>";
322     return s;
323 }
324 
325 
326 
327 
328 #pragma mark Mangled::Token
329 
330 //--------------------------------------------------------------
331 // Default constructor
332 //--------------------------------------------------------------
333 Mangled::Token::Token () :
334     type(eInvalid),
335     value()
336 {
337 }
338 
339 //--------------------------------------------------------------
340 // Equal to operator
341 //--------------------------------------------------------------
342 bool
343 Mangled::Token::operator== (const Token& rhs) const
344 {
345     return type == rhs.type && value == rhs.value;
346 }
347 
348 //--------------------------------------------------------------
349 // Dump the token to a stream "s"
350 //--------------------------------------------------------------
351 void
352 Mangled::Token::Dump (Stream *s) const
353 {
354     switch (type)
355     {
356     case eInvalid:      s->PutCString("invalid    "); break;
357     case eNameSpace:    s->PutCString("namespace  "); break;
358     case eMethodName:   s->PutCString("method     "); break;
359     case eType:         s->PutCString("type       "); break;
360     case eTemplate:     s->PutCString("template   "); break;
361     case eTemplateBeg:  s->PutCString("template < "); break;
362     case eTemplateEnd:  s->PutCString("template > "); break;
363     case eParamsBeg:    s->PutCString("params   ( "); break;
364     case eParamsEnd:    s->PutCString("params   ) "); break;
365     case eQualifier:    s->PutCString("qualifier  "); break;
366     case eError:        s->PutCString("ERROR      "); break;
367     default:
368         s->Printf("type = %i", type);
369         break;
370     }
371     value.DumpDebug(s);
372 }
373 
374 //--------------------------------------------------------------
375 // Returns true if this token is a wildcard
376 //--------------------------------------------------------------
377 bool
378 Mangled::Token::IsWildcard () const
379 {
380     static ConstString g_wildcard_str("*");
381     return value == g_wildcard_str;
382 }
383 
384 
385 //----------------------------------------------------------------------
386 // Dump "obj" to the supplied stream "s"
387 //----------------------------------------------------------------------
388 Stream&
389 lldb_private::operator << (Stream& s, const Mangled::Token& obj)
390 {
391     obj.Dump(&s);
392     return s;
393 }
394 
395 
396 #pragma mark Mangled::TokenList
397 //----------------------------------------------------------------------
398 // Mangled::TokenList
399 //----------------------------------------------------------------------
400 
401 //--------------------------------------------------------------
402 // Default constructor. If demangled is non-NULL and not-empty
403 // the token list will parse up the demangled string it is
404 // given, else the object will initialize an empty token list.
405 //--------------------------------------------------------------
406 Mangled::TokenList::TokenList (const char *demangled) :
407     m_tokens()
408 {
409     if (demangled && demangled[0])
410     {
411         Parse(demangled);
412     }
413 }
414 
415 //----------------------------------------------------------------------
416 // Destructor
417 //----------------------------------------------------------------------
418 Mangled::TokenList::~TokenList ()
419 {
420 }
421 
422 //----------------------------------------------------------------------
423 // Parses "demangled" into tokens. This allows complex
424 // comparisons to be done. Comparisons can include wildcards at
425 // the namespace, method name, template, and template and
426 // parameter type levels.
427 //
428 // Example queries include:
429 // "std::basic_string<*>"   // Find all std::basic_string variants
430 // "std::basic_string<*>::erase(*)" // Find all std::basic_string::erase variants with any number of parameters
431 // "*::clear()"             // Find all functions with a method name of
432 //                          // "clear" that are in any namespace that
433 //                          // have no parameters
434 // "::printf"               // Find the printf function in the global namespace
435 // "printf"                 // Ditto
436 // "foo::*(int)"            // Find all functions in the class or namespace "foo" that take a single integer argument
437 //
438 // Returns the number of tokens that were decoded, or zero when
439 // we fail.
440 //----------------------------------------------------------------------
441 size_t
442 Mangled::TokenList::Parse (const char *s)
443 {
444     m_tokens.clear();
445 
446     Token token;
447     token.type = eNameSpace;
448 
449     TokenType max_type = eInvalid;
450     const char *p = s;
451     size_t span = 0;
452     size_t sep_size = 0;
453 
454     while (*p != '\0')
455     {
456         p = p + span + sep_size;
457         while (isspace(*p))
458             ++p;
459 
460         if (*p == '\0')
461             break;
462 
463         span = strcspn(p, ":<>(),");
464         sep_size = 1;
465         token.type = eInvalid;
466         switch (p[span])
467         {
468         case '\0':
469             break;
470 
471         case ':':
472             if (p[span+1] == ':')
473             {
474                 sep_size = 2;
475                 if (span > 0)
476                 {
477                     token.type = eNameSpace;
478                     token.value.SetCStringWithLength (p, span);
479                     m_tokens.push_back(token);
480                 }
481                 else
482                     continue;
483             }
484             break;
485 
486         case '(':
487             if (span > 0)
488             {
489                 token.type = eMethodName;
490                 token.value.SetCStringWithLength (p, span);
491                 m_tokens.push_back(token);
492             }
493 
494             token.type = eParamsBeg;
495             token.value.Clear();
496             m_tokens.push_back(token);
497             break;
498 
499         case ',':
500             if (span > 0)
501             {
502                 token.type = eType;
503                 token.value.SetCStringWithLength (p, span);
504                 m_tokens.push_back(token);
505             }
506             else
507             {
508                 continue;
509             }
510             break;
511 
512         case ')':
513             if (span > 0)
514             {
515                 token.type = eType;
516                 token.value.SetCStringWithLength (p, span);
517                 m_tokens.push_back(token);
518             }
519 
520             token.type = eParamsEnd;
521             token.value.Clear();
522             m_tokens.push_back(token);
523             break;
524 
525         case '<':
526             if (span > 0)
527             {
528                 token.type = eTemplate;
529                 token.value.SetCStringWithLength (p, span);
530                 m_tokens.push_back(token);
531             }
532 
533             token.type = eTemplateBeg;
534             token.value.Clear();
535             m_tokens.push_back(token);
536             break;
537 
538         case '>':
539             if (span > 0)
540             {
541                 token.type = eType;
542                 token.value.SetCStringWithLength (p, span);
543                 m_tokens.push_back(token);
544             }
545 
546             token.type = eTemplateEnd;
547             token.value.Clear();
548             m_tokens.push_back(token);
549             break;
550         }
551 
552         if (max_type < token.type)
553             max_type = token.type;
554 
555         if (token.type == eInvalid)
556         {
557             if (max_type >= eParamsEnd)
558             {
559                 token.type = eQualifier;
560                 token.value.SetCString(p);
561                 m_tokens.push_back(token);
562             }
563             else if (max_type >= eParamsBeg)
564             {
565                 token.type = eType;
566                 token.value.SetCString(p);
567                 m_tokens.push_back(token);
568             }
569             else
570             {
571                 token.type = eMethodName;
572                 token.value.SetCString(p);
573                 m_tokens.push_back(token);
574             }
575             break;
576         }
577     }
578     return m_tokens.size();
579 }
580 
581 
582 //----------------------------------------------------------------------
583 // Clear the token list.
584 //----------------------------------------------------------------------
585 void
586 Mangled::TokenList::Clear ()
587 {
588     m_tokens.clear();
589 }
590 
591 //----------------------------------------------------------------------
592 // Dump the token list to the stream "s"
593 //----------------------------------------------------------------------
594 void
595 Mangled::TokenList::Dump (Stream *s) const
596 {
597     collection::const_iterator pos;
598     collection::const_iterator beg = m_tokens.begin();
599     collection::const_iterator end = m_tokens.end();
600     for (pos = beg; pos != end; ++pos)
601     {
602         s->Indent("token[");
603         *s << (uint32_t)std::distance(beg, pos) << "] = " << *pos << "\n";
604     }
605 }
606 
607 //----------------------------------------------------------------------
608 // Find the first token in the list that has "token_type" as its
609 // type
610 //----------------------------------------------------------------------
611 const Mangled::Token *
612 Mangled::TokenList::Find (TokenType token_type) const
613 {
614     collection::const_iterator pos;
615     collection::const_iterator beg = m_tokens.begin();
616     collection::const_iterator end = m_tokens.end();
617     for (pos = beg; pos != end; ++pos)
618     {
619         if (pos->type == token_type)
620             return &(*pos);
621     }
622     return NULL;
623 }
624 
625 //----------------------------------------------------------------------
626 // Return the token at index "idx", or NULL if the index is
627 // out of range.
628 //----------------------------------------------------------------------
629 const Mangled::Token *
630 Mangled::TokenList::GetTokenAtIndex (uint32_t idx) const
631 {
632     if (idx < m_tokens.size())
633         return &m_tokens[idx];
634     return NULL;
635 }
636 
637 
638 //----------------------------------------------------------------------
639 // Given a token list, see if it matches this object's tokens.
640 // "token_list" can contain wild card values to enable powerful
641 // matching. Matching the std::string::erase(*) example that was
642 // tokenized above we could use a token list such as:
643 //
644 //      token           name
645 //      -----------     ----------------------------------------
646 //      eNameSpace      "std"
647 //      eTemplate       "basic_string"
648 //      eTemplateBeg
649 //      eInvalid        "*"
650 //      eTemplateEnd
651 //      eMethodName     "erase"
652 //      eParamsBeg
653 //      eInvalid        "*"
654 //      eParamsEnd
655 //
656 // Returns true if it "token_list" matches this object's tokens,
657 // false otherwise.
658 //----------------------------------------------------------------------
659 bool
660 Mangled::TokenList::MatchesQuery (const Mangled::TokenList &match) const
661 {
662     size_t match_count = 0;
663     collection::const_iterator pos;
664     collection::const_iterator pos_end = m_tokens.end();
665 
666     collection::const_iterator match_pos;
667     collection::const_iterator match_pos_end = match.m_tokens.end();
668     collection::const_iterator match_wildcard_pos = match_pos_end;
669     collection::const_iterator match_next_pos = match_pos_end;
670 
671     size_t template_scope_depth = 0;
672 
673     for (pos = m_tokens.begin(), match_pos = match.m_tokens.begin();
674          pos != pos_end && match_pos != match_pos_end;
675          ++match_pos)
676     {
677         match_next_pos = match_pos + 1;
678         // Is this a wildcard?
679         if (match_pos->IsWildcard())
680         {
681             if (match_wildcard_pos != match_pos_end)
682                 return false;   // Can't have two wildcards in effect at once.
683 
684             match_wildcard_pos = match_pos;
685             // Are we at the end of the MATCH token list?
686             if (match_next_pos == match_pos_end)
687             {
688                 // There is nothing more to match, return if we have any matches so far...
689                 return match_count > 0;
690             }
691         }
692 
693         if (match_pos->type == eInvalid || match_pos->type == eError)
694         {
695             return false;
696         }
697         else
698         {
699             if (match_pos->type == eTemplateBeg)
700             {
701                 ++template_scope_depth;
702             }
703             else if (match_pos->type == eTemplateEnd)
704             {
705                 assert(template_scope_depth > 0);
706                 --template_scope_depth;
707             }
708 
709             // Do we have a wildcard going right now?
710             if (match_wildcard_pos == match_pos_end)
711             {
712                 // No wildcard matching right now, just check and see if things match
713                 if (*pos == *match_pos)
714                     ++match_count;
715                 else
716                     return false;
717             }
718             else
719             {
720                 // We have a wildcard match going
721 
722                 // For template types we need to make sure to match the template depths...
723                 const size_t start_wildcard_template_scope_depth = template_scope_depth;
724                 size_t curr_wildcard_template_scope_depth = template_scope_depth;
725                 while (pos != pos_end)
726                 {
727                     if (match_wildcard_pos->type == eNameSpace && pos->type == eParamsBeg)
728                         return false;
729 
730                     if (start_wildcard_template_scope_depth == curr_wildcard_template_scope_depth)
731                     {
732                         if (*pos == *match_next_pos)
733                         {
734                             ++match_count;
735                             match_pos = match_next_pos;
736                             match_wildcard_pos = match_pos_end;
737                             break;
738                         }
739                     }
740                     if (pos->type == eTemplateBeg)
741                         ++curr_wildcard_template_scope_depth;
742                     else if (pos->type == eTemplateEnd)
743                         --curr_wildcard_template_scope_depth;
744 
745 
746                     ++pos;
747                 }
748             }
749         }
750 
751         if (pos != pos_end)
752             ++pos;
753     }
754     if (match_pos != match_pos_end)
755         return false;
756 
757     return match_count > 0;
758 }
759 
760 
761 //----------------------------------------------------------------------
762 // Return the number of tokens in the token collection
763 //----------------------------------------------------------------------
764 size_t
765 Mangled::TokenList::Size () const
766 {
767     return m_tokens.size();
768 }
769 
770 
771 //----------------------------------------------------------------------
772 // Stream out the tokens
773 //----------------------------------------------------------------------
774 Stream&
775 lldb_private::operator << (Stream& s, const Mangled::TokenList& obj)
776 {
777     obj.Dump(&s);
778     return s;
779 }
780