1 //===- IdentifierTable.h - Hash table for identifier lookup -----*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// Defines the clang::IdentifierInfo, clang::IdentifierTable, and
12 /// clang::Selector interfaces.
13 //
14 //===----------------------------------------------------------------------===//
15
16 #ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
17 #define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
18
19 #include "clang/Basic/LLVM.h"
20 #include "clang/Basic/TokenKinds.h"
21 #include "llvm/ADT/DenseMapInfo.h"
22 #include "llvm/ADT/SmallString.h"
23 #include "llvm/ADT/StringMap.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/Support/Allocator.h"
26 #include "llvm/Support/PointerLikeTypeTraits.h"
27 #include "llvm/Support/type_traits.h"
28 #include <cassert>
29 #include <cstddef>
30 #include <cstdint>
31 #include <cstring>
32 #include <string>
33 #include <utility>
34
35 namespace clang {
36
37 class DeclarationName;
38 class DeclarationNameTable;
39 class IdentifierInfo;
40 class LangOptions;
41 class MultiKeywordSelector;
42 class SourceLocation;
43
44 /// A simple pair of identifier info and location.
45 using IdentifierLocPair = std::pair<IdentifierInfo *, SourceLocation>;
46
47 /// IdentifierInfo and other related classes are aligned to
48 /// 8 bytes so that DeclarationName can use the lower 3 bits
49 /// of a pointer to one of these classes.
50 enum { IdentifierInfoAlignment = 8 };
51
52 /// One of these records is kept for each identifier that
53 /// is lexed. This contains information about whether the token was \#define'd,
54 /// is a language keyword, or if it is a front-end token of some sort (e.g. a
55 /// variable or function name). The preprocessor keeps this information in a
56 /// set, and all tok::identifier tokens have a pointer to one of these.
57 /// It is aligned to 8 bytes because DeclarationName needs the lower 3 bits.
alignas(IdentifierInfoAlignment)58 class alignas(IdentifierInfoAlignment) IdentifierInfo {
59 friend class IdentifierTable;
60
61 // Front-end token ID or tok::identifier.
62 unsigned TokenID : 9;
63
64 // ObjC keyword ('protocol' in '@protocol') or builtin (__builtin_inf).
65 // First NUM_OBJC_KEYWORDS values are for Objective-C,
66 // the remaining values are for builtins.
67 unsigned ObjCOrBuiltinID : 13;
68
69 // True if there is a #define for this.
70 unsigned HasMacro : 1;
71
72 // True if there was a #define for this.
73 unsigned HadMacro : 1;
74
75 // True if the identifier is a language extension.
76 unsigned IsExtension : 1;
77
78 // True if the identifier is a keyword in a newer or proposed Standard.
79 unsigned IsFutureCompatKeyword : 1;
80
81 // True if the identifier is poisoned.
82 unsigned IsPoisoned : 1;
83
84 // True if the identifier is a C++ operator keyword.
85 unsigned IsCPPOperatorKeyword : 1;
86
87 // Internal bit set by the member function RecomputeNeedsHandleIdentifier.
88 // See comment about RecomputeNeedsHandleIdentifier for more info.
89 unsigned NeedsHandleIdentifier : 1;
90
91 // True if the identifier was loaded (at least partially) from an AST file.
92 unsigned IsFromAST : 1;
93
94 // True if the identifier has changed from the definition
95 // loaded from an AST file.
96 unsigned ChangedAfterLoad : 1;
97
98 // True if the identifier's frontend information has changed from the
99 // definition loaded from an AST file.
100 unsigned FEChangedAfterLoad : 1;
101
102 // True if revertTokenIDToIdentifier was called.
103 unsigned RevertedTokenID : 1;
104
105 // True if there may be additional information about
106 // this identifier stored externally.
107 unsigned OutOfDate : 1;
108
109 // True if this is the 'import' contextual keyword.
110 unsigned IsModulesImport : 1;
111
112 // 29 bits left in a 64-bit word.
113
114 // Managed by the language front-end.
115 void *FETokenInfo = nullptr;
116
117 llvm::StringMapEntry<IdentifierInfo *> *Entry = nullptr;
118
119 IdentifierInfo()
120 : TokenID(tok::identifier), ObjCOrBuiltinID(0), HasMacro(false),
121 HadMacro(false), IsExtension(false), IsFutureCompatKeyword(false),
122 IsPoisoned(false), IsCPPOperatorKeyword(false),
123 NeedsHandleIdentifier(false), IsFromAST(false), ChangedAfterLoad(false),
124 FEChangedAfterLoad(false), RevertedTokenID(false), OutOfDate(false),
125 IsModulesImport(false) {}
126
127 public:
128 IdentifierInfo(const IdentifierInfo &) = delete;
129 IdentifierInfo &operator=(const IdentifierInfo &) = delete;
130 IdentifierInfo(IdentifierInfo &&) = delete;
131 IdentifierInfo &operator=(IdentifierInfo &&) = delete;
132
133 /// Return true if this is the identifier for the specified string.
134 ///
135 /// This is intended to be used for string literals only: II->isStr("foo").
136 template <std::size_t StrLen>
137 bool isStr(const char (&Str)[StrLen]) const {
138 return getLength() == StrLen-1 &&
139 memcmp(getNameStart(), Str, StrLen-1) == 0;
140 }
141
142 /// Return true if this is the identifier for the specified StringRef.
143 bool isStr(llvm::StringRef Str) const {
144 llvm::StringRef ThisStr(getNameStart(), getLength());
145 return ThisStr == Str;
146 }
147
148 /// Return the beginning of the actual null-terminated string for this
149 /// identifier.
150 const char *getNameStart() const { return Entry->getKeyData(); }
151
152 /// Efficiently return the length of this identifier info.
153 unsigned getLength() const { return Entry->getKeyLength(); }
154
155 /// Return the actual identifier string.
156 StringRef getName() const {
157 return StringRef(getNameStart(), getLength());
158 }
159
160 /// Return true if this identifier is \#defined to some other value.
161 /// \note The current definition may be in a module and not currently visible.
162 bool hasMacroDefinition() const {
163 return HasMacro;
164 }
165 void setHasMacroDefinition(bool Val) {
166 if (HasMacro == Val) return;
167
168 HasMacro = Val;
169 if (Val) {
170 NeedsHandleIdentifier = true;
171 HadMacro = true;
172 } else {
173 RecomputeNeedsHandleIdentifier();
174 }
175 }
176 /// Returns true if this identifier was \#defined to some value at any
177 /// moment. In this case there should be an entry for the identifier in the
178 /// macro history table in Preprocessor.
179 bool hadMacroDefinition() const {
180 return HadMacro;
181 }
182
183 /// If this is a source-language token (e.g. 'for'), this API
184 /// can be used to cause the lexer to map identifiers to source-language
185 /// tokens.
186 tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; }
187
188 /// True if revertTokenIDToIdentifier() was called.
189 bool hasRevertedTokenIDToIdentifier() const { return RevertedTokenID; }
190
191 /// Revert TokenID to tok::identifier; used for GNU libstdc++ 4.2
192 /// compatibility.
193 ///
194 /// TokenID is normally read-only but there are 2 instances where we revert it
195 /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens
196 /// using this method so we can inform serialization about it.
197 void revertTokenIDToIdentifier() {
198 assert(TokenID != tok::identifier && "Already at tok::identifier");
199 TokenID = tok::identifier;
200 RevertedTokenID = true;
201 }
202 void revertIdentifierToTokenID(tok::TokenKind TK) {
203 assert(TokenID == tok::identifier && "Should be at tok::identifier");
204 TokenID = TK;
205 RevertedTokenID = false;
206 }
207
208 /// Return the preprocessor keyword ID for this identifier.
209 ///
210 /// For example, "define" will return tok::pp_define.
211 tok::PPKeywordKind getPPKeywordID() const;
212
213 /// Return the Objective-C keyword ID for the this identifier.
214 ///
215 /// For example, 'class' will return tok::objc_class if ObjC is enabled.
216 tok::ObjCKeywordKind getObjCKeywordID() const {
217 if (ObjCOrBuiltinID < tok::NUM_OBJC_KEYWORDS)
218 return tok::ObjCKeywordKind(ObjCOrBuiltinID);
219 else
220 return tok::objc_not_keyword;
221 }
222 void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
223
224 /// True if setNotBuiltin() was called.
225 bool hasRevertedBuiltin() const {
226 return ObjCOrBuiltinID == tok::NUM_OBJC_KEYWORDS;
227 }
228
229 /// Revert the identifier to a non-builtin identifier. We do this if
230 /// the name of a known builtin library function is used to declare that
231 /// function, but an unexpected type is specified.
232 void revertBuiltin() {
233 setBuiltinID(0);
234 }
235
236 /// Return a value indicating whether this is a builtin function.
237 ///
238 /// 0 is not-built-in. 1+ are specific builtin functions.
239 unsigned getBuiltinID() const {
240 if (ObjCOrBuiltinID >= tok::NUM_OBJC_KEYWORDS)
241 return ObjCOrBuiltinID - tok::NUM_OBJC_KEYWORDS;
242 else
243 return 0;
244 }
245 void setBuiltinID(unsigned ID) {
246 ObjCOrBuiltinID = ID + tok::NUM_OBJC_KEYWORDS;
247 assert(ObjCOrBuiltinID - unsigned(tok::NUM_OBJC_KEYWORDS) == ID
248 && "ID too large for field!");
249 }
250
251 unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; }
252 void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; }
253
254 /// get/setExtension - Initialize information about whether or not this
255 /// language token is an extension. This controls extension warnings, and is
256 /// only valid if a custom token ID is set.
257 bool isExtensionToken() const { return IsExtension; }
258 void setIsExtensionToken(bool Val) {
259 IsExtension = Val;
260 if (Val)
261 NeedsHandleIdentifier = true;
262 else
263 RecomputeNeedsHandleIdentifier();
264 }
265
266 /// is/setIsFutureCompatKeyword - Initialize information about whether or not
267 /// this language token is a keyword in a newer or proposed Standard. This
268 /// controls compatibility warnings, and is only true when not parsing the
269 /// corresponding Standard. Once a compatibility problem has been diagnosed
270 /// with this keyword, the flag will be cleared.
271 bool isFutureCompatKeyword() const { return IsFutureCompatKeyword; }
272 void setIsFutureCompatKeyword(bool Val) {
273 IsFutureCompatKeyword = Val;
274 if (Val)
275 NeedsHandleIdentifier = true;
276 else
277 RecomputeNeedsHandleIdentifier();
278 }
279
280 /// setIsPoisoned - Mark this identifier as poisoned. After poisoning, the
281 /// Preprocessor will emit an error every time this token is used.
282 void setIsPoisoned(bool Value = true) {
283 IsPoisoned = Value;
284 if (Value)
285 NeedsHandleIdentifier = true;
286 else
287 RecomputeNeedsHandleIdentifier();
288 }
289
290 /// Return true if this token has been poisoned.
291 bool isPoisoned() const { return IsPoisoned; }
292
293 /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether
294 /// this identifier is a C++ alternate representation of an operator.
295 void setIsCPlusPlusOperatorKeyword(bool Val = true) {
296 IsCPPOperatorKeyword = Val;
297 }
298 bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; }
299
300 /// Return true if this token is a keyword in the specified language.
301 bool isKeyword(const LangOptions &LangOpts) const;
302
303 /// Return true if this token is a C++ keyword in the specified
304 /// language.
305 bool isCPlusPlusKeyword(const LangOptions &LangOpts) const;
306
307 /// Get and set FETokenInfo. The language front-end is allowed to associate
308 /// arbitrary metadata with this token.
309 void *getFETokenInfo() const { return FETokenInfo; }
310 void setFETokenInfo(void *T) { FETokenInfo = T; }
311
312 /// Return true if the Preprocessor::HandleIdentifier must be called
313 /// on a token of this identifier.
314 ///
315 /// If this returns false, we know that HandleIdentifier will not affect
316 /// the token.
317 bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; }
318
319 /// Return true if the identifier in its current state was loaded
320 /// from an AST file.
321 bool isFromAST() const { return IsFromAST; }
322
323 void setIsFromAST() { IsFromAST = true; }
324
325 /// Determine whether this identifier has changed since it was loaded
326 /// from an AST file.
327 bool hasChangedSinceDeserialization() const {
328 return ChangedAfterLoad;
329 }
330
331 /// Note that this identifier has changed since it was loaded from
332 /// an AST file.
333 void setChangedSinceDeserialization() {
334 ChangedAfterLoad = true;
335 }
336
337 /// Determine whether the frontend token information for this
338 /// identifier has changed since it was loaded from an AST file.
339 bool hasFETokenInfoChangedSinceDeserialization() const {
340 return FEChangedAfterLoad;
341 }
342
343 /// Note that the frontend token information for this identifier has
344 /// changed since it was loaded from an AST file.
345 void setFETokenInfoChangedSinceDeserialization() {
346 FEChangedAfterLoad = true;
347 }
348
349 /// Determine whether the information for this identifier is out of
350 /// date with respect to the external source.
351 bool isOutOfDate() const { return OutOfDate; }
352
353 /// Set whether the information for this identifier is out of
354 /// date with respect to the external source.
355 void setOutOfDate(bool OOD) {
356 OutOfDate = OOD;
357 if (OOD)
358 NeedsHandleIdentifier = true;
359 else
360 RecomputeNeedsHandleIdentifier();
361 }
362
363 /// Determine whether this is the contextual keyword \c import.
364 bool isModulesImport() const { return IsModulesImport; }
365
366 /// Set whether this identifier is the contextual keyword \c import.
367 void setModulesImport(bool I) {
368 IsModulesImport = I;
369 if (I)
370 NeedsHandleIdentifier = true;
371 else
372 RecomputeNeedsHandleIdentifier();
373 }
374
375 /// Return true if this identifier is an editor placeholder.
376 ///
377 /// Editor placeholders are produced by the code-completion engine and are
378 /// represented as characters between '<#' and '#>' in the source code. An
379 /// example of auto-completed call with a placeholder parameter is shown
380 /// below:
381 /// \code
382 /// function(<#int x#>);
383 /// \endcode
384 bool isEditorPlaceholder() const {
385 return getName().startswith("<#") && getName().endswith("#>");
386 }
387
388 /// Provide less than operator for lexicographical sorting.
389 bool operator<(const IdentifierInfo &RHS) const {
390 return getName() < RHS.getName();
391 }
392
393 private:
394 /// The Preprocessor::HandleIdentifier does several special (but rare)
395 /// things to identifiers of various sorts. For example, it changes the
396 /// \c for keyword token from tok::identifier to tok::for.
397 ///
398 /// This method is very tied to the definition of HandleIdentifier. Any
399 /// change to it should be reflected here.
400 void RecomputeNeedsHandleIdentifier() {
401 NeedsHandleIdentifier = isPoisoned() || hasMacroDefinition() ||
402 isExtensionToken() || isFutureCompatKeyword() ||
403 isOutOfDate() || isModulesImport();
404 }
405 };
406
407 /// An RAII object for [un]poisoning an identifier within a scope.
408 ///
409 /// \p II is allowed to be null, in which case objects of this type have
410 /// no effect.
411 class PoisonIdentifierRAIIObject {
412 IdentifierInfo *const II;
413 const bool OldValue;
414
415 public:
PoisonIdentifierRAIIObject(IdentifierInfo * II,bool NewValue)416 PoisonIdentifierRAIIObject(IdentifierInfo *II, bool NewValue)
417 : II(II), OldValue(II ? II->isPoisoned() : false) {
418 if(II)
419 II->setIsPoisoned(NewValue);
420 }
421
~PoisonIdentifierRAIIObject()422 ~PoisonIdentifierRAIIObject() {
423 if(II)
424 II->setIsPoisoned(OldValue);
425 }
426 };
427
428 /// An iterator that walks over all of the known identifiers
429 /// in the lookup table.
430 ///
431 /// Since this iterator uses an abstract interface via virtual
432 /// functions, it uses an object-oriented interface rather than the
433 /// more standard C++ STL iterator interface. In this OO-style
434 /// iteration, the single function \c Next() provides dereference,
435 /// advance, and end-of-sequence checking in a single
436 /// operation. Subclasses of this iterator type will provide the
437 /// actual functionality.
438 class IdentifierIterator {
439 protected:
440 IdentifierIterator() = default;
441
442 public:
443 IdentifierIterator(const IdentifierIterator &) = delete;
444 IdentifierIterator &operator=(const IdentifierIterator &) = delete;
445
446 virtual ~IdentifierIterator();
447
448 /// Retrieve the next string in the identifier table and
449 /// advances the iterator for the following string.
450 ///
451 /// \returns The next string in the identifier table. If there is
452 /// no such string, returns an empty \c StringRef.
453 virtual StringRef Next() = 0;
454 };
455
456 /// Provides lookups to, and iteration over, IdentiferInfo objects.
457 class IdentifierInfoLookup {
458 public:
459 virtual ~IdentifierInfoLookup();
460
461 /// Return the IdentifierInfo for the specified named identifier.
462 ///
463 /// Unlike the version in IdentifierTable, this returns a pointer instead
464 /// of a reference. If the pointer is null then the IdentifierInfo cannot
465 /// be found.
466 virtual IdentifierInfo* get(StringRef Name) = 0;
467
468 /// Retrieve an iterator into the set of all identifiers
469 /// known to this identifier lookup source.
470 ///
471 /// This routine provides access to all of the identifiers known to
472 /// the identifier lookup, allowing access to the contents of the
473 /// identifiers without introducing the overhead of constructing
474 /// IdentifierInfo objects for each.
475 ///
476 /// \returns A new iterator into the set of known identifiers. The
477 /// caller is responsible for deleting this iterator.
478 virtual IdentifierIterator *getIdentifiers();
479 };
480
481 /// Implements an efficient mapping from strings to IdentifierInfo nodes.
482 ///
483 /// This has no other purpose, but this is an extremely performance-critical
484 /// piece of the code, as each occurrence of every identifier goes through
485 /// here when lexed.
486 class IdentifierTable {
487 // Shark shows that using MallocAllocator is *much* slower than using this
488 // BumpPtrAllocator!
489 using HashTableTy = llvm::StringMap<IdentifierInfo *, llvm::BumpPtrAllocator>;
490 HashTableTy HashTable;
491
492 IdentifierInfoLookup* ExternalLookup;
493
494 public:
495 /// Create the identifier table.
496 explicit IdentifierTable(IdentifierInfoLookup *ExternalLookup = nullptr);
497
498 /// Create the identifier table, populating it with info about the
499 /// language keywords for the language specified by \p LangOpts.
500 explicit IdentifierTable(const LangOptions &LangOpts,
501 IdentifierInfoLookup *ExternalLookup = nullptr);
502
503 /// Set the external identifier lookup mechanism.
setExternalIdentifierLookup(IdentifierInfoLookup * IILookup)504 void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) {
505 ExternalLookup = IILookup;
506 }
507
508 /// Retrieve the external identifier lookup object, if any.
getExternalIdentifierLookup()509 IdentifierInfoLookup *getExternalIdentifierLookup() const {
510 return ExternalLookup;
511 }
512
getAllocator()513 llvm::BumpPtrAllocator& getAllocator() {
514 return HashTable.getAllocator();
515 }
516
517 /// Return the identifier token info for the specified named
518 /// identifier.
get(StringRef Name)519 IdentifierInfo &get(StringRef Name) {
520 auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first;
521
522 IdentifierInfo *&II = Entry.second;
523 if (II) return *II;
524
525 // No entry; if we have an external lookup, look there first.
526 if (ExternalLookup) {
527 II = ExternalLookup->get(Name);
528 if (II)
529 return *II;
530 }
531
532 // Lookups failed, make a new IdentifierInfo.
533 void *Mem = getAllocator().Allocate<IdentifierInfo>();
534 II = new (Mem) IdentifierInfo();
535
536 // Make sure getName() knows how to find the IdentifierInfo
537 // contents.
538 II->Entry = &Entry;
539
540 return *II;
541 }
542
get(StringRef Name,tok::TokenKind TokenCode)543 IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) {
544 IdentifierInfo &II = get(Name);
545 II.TokenID = TokenCode;
546 assert(II.TokenID == (unsigned) TokenCode && "TokenCode too large");
547 return II;
548 }
549
550 /// Gets an IdentifierInfo for the given name without consulting
551 /// external sources.
552 ///
553 /// This is a version of get() meant for external sources that want to
554 /// introduce or modify an identifier. If they called get(), they would
555 /// likely end up in a recursion.
getOwn(StringRef Name)556 IdentifierInfo &getOwn(StringRef Name) {
557 auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first;
558
559 IdentifierInfo *&II = Entry.second;
560 if (II)
561 return *II;
562
563 // Lookups failed, make a new IdentifierInfo.
564 void *Mem = getAllocator().Allocate<IdentifierInfo>();
565 II = new (Mem) IdentifierInfo();
566
567 // Make sure getName() knows how to find the IdentifierInfo
568 // contents.
569 II->Entry = &Entry;
570
571 // If this is the 'import' contextual keyword, mark it as such.
572 if (Name.equals("import"))
573 II->setModulesImport(true);
574
575 return *II;
576 }
577
578 using iterator = HashTableTy::const_iterator;
579 using const_iterator = HashTableTy::const_iterator;
580
begin()581 iterator begin() const { return HashTable.begin(); }
end()582 iterator end() const { return HashTable.end(); }
size()583 unsigned size() const { return HashTable.size(); }
584
585 /// Print some statistics to stderr that indicate how well the
586 /// hashing is doing.
587 void PrintStats() const;
588
589 /// Populate the identifier table with info about the language keywords
590 /// for the language specified by \p LangOpts.
591 void AddKeywords(const LangOptions &LangOpts);
592 };
593
594 /// A family of Objective-C methods.
595 ///
596 /// These families have no inherent meaning in the language, but are
597 /// nonetheless central enough in the existing implementations to
598 /// merit direct AST support. While, in theory, arbitrary methods can
599 /// be considered to form families, we focus here on the methods
600 /// involving allocation and retain-count management, as these are the
601 /// most "core" and the most likely to be useful to diverse clients
602 /// without extra information.
603 ///
604 /// Both selectors and actual method declarations may be classified
605 /// into families. Method families may impose additional restrictions
606 /// beyond their selector name; for example, a method called '_init'
607 /// that returns void is not considered to be in the 'init' family
608 /// (but would be if it returned 'id'). It is also possible to
609 /// explicitly change or remove a method's family. Therefore the
610 /// method's family should be considered the single source of truth.
611 enum ObjCMethodFamily {
612 /// No particular method family.
613 OMF_None,
614
615 // Selectors in these families may have arbitrary arity, may be
616 // written with arbitrary leading underscores, and may have
617 // additional CamelCase "words" in their first selector chunk
618 // following the family name.
619 OMF_alloc,
620 OMF_copy,
621 OMF_init,
622 OMF_mutableCopy,
623 OMF_new,
624
625 // These families are singletons consisting only of the nullary
626 // selector with the given name.
627 OMF_autorelease,
628 OMF_dealloc,
629 OMF_finalize,
630 OMF_release,
631 OMF_retain,
632 OMF_retainCount,
633 OMF_self,
634 OMF_initialize,
635
636 // performSelector families
637 OMF_performSelector
638 };
639
640 /// Enough bits to store any enumerator in ObjCMethodFamily or
641 /// InvalidObjCMethodFamily.
642 enum { ObjCMethodFamilyBitWidth = 4 };
643
644 /// An invalid value of ObjCMethodFamily.
645 enum { InvalidObjCMethodFamily = (1 << ObjCMethodFamilyBitWidth) - 1 };
646
647 /// A family of Objective-C methods.
648 ///
649 /// These are family of methods whose result type is initially 'id', but
650 /// but are candidate for the result type to be changed to 'instancetype'.
651 enum ObjCInstanceTypeFamily {
652 OIT_None,
653 OIT_Array,
654 OIT_Dictionary,
655 OIT_Singleton,
656 OIT_Init,
657 OIT_ReturnsSelf
658 };
659
660 enum ObjCStringFormatFamily {
661 SFF_None,
662 SFF_NSString,
663 SFF_CFString
664 };
665
666 /// Smart pointer class that efficiently represents Objective-C method
667 /// names.
668 ///
669 /// This class will either point to an IdentifierInfo or a
670 /// MultiKeywordSelector (which is private). This enables us to optimize
671 /// selectors that take no arguments and selectors that take 1 argument, which
672 /// accounts for 78% of all selectors in Cocoa.h.
673 class Selector {
674 friend class Diagnostic;
675 friend class SelectorTable; // only the SelectorTable can create these
676 friend class DeclarationName; // and the AST's DeclarationName.
677
678 enum IdentifierInfoFlag {
679 // Empty selector = 0. Note that these enumeration values must
680 // correspond to the enumeration values of DeclarationName::StoredNameKind
681 ZeroArg = 0x01,
682 OneArg = 0x02,
683 MultiArg = 0x07,
684 ArgFlags = 0x07
685 };
686
687 /// A pointer to the MultiKeywordSelector or IdentifierInfo. We use the low
688 /// three bits of InfoPtr to store an IdentifierInfoFlag. Note that in any
689 /// case IdentifierInfo and MultiKeywordSelector are already aligned to
690 /// 8 bytes even on 32 bits archs because of DeclarationName.
691 uintptr_t InfoPtr = 0;
692
Selector(IdentifierInfo * II,unsigned nArgs)693 Selector(IdentifierInfo *II, unsigned nArgs) {
694 InfoPtr = reinterpret_cast<uintptr_t>(II);
695 assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
696 assert(nArgs < 2 && "nArgs not equal to 0/1");
697 InfoPtr |= nArgs+1;
698 }
699
Selector(MultiKeywordSelector * SI)700 Selector(MultiKeywordSelector *SI) {
701 InfoPtr = reinterpret_cast<uintptr_t>(SI);
702 assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
703 InfoPtr |= MultiArg;
704 }
705
getAsIdentifierInfo()706 IdentifierInfo *getAsIdentifierInfo() const {
707 if (getIdentifierInfoFlag() < MultiArg)
708 return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags);
709 return nullptr;
710 }
711
getMultiKeywordSelector()712 MultiKeywordSelector *getMultiKeywordSelector() const {
713 return reinterpret_cast<MultiKeywordSelector *>(InfoPtr & ~ArgFlags);
714 }
715
getIdentifierInfoFlag()716 unsigned getIdentifierInfoFlag() const {
717 return InfoPtr & ArgFlags;
718 }
719
720 static ObjCMethodFamily getMethodFamilyImpl(Selector sel);
721
722 static ObjCStringFormatFamily getStringFormatFamilyImpl(Selector sel);
723
724 public:
725 /// The default ctor should only be used when creating data structures that
726 /// will contain selectors.
727 Selector() = default;
Selector(uintptr_t V)728 explicit Selector(uintptr_t V) : InfoPtr(V) {}
729
730 /// operator==/!= - Indicate whether the specified selectors are identical.
731 bool operator==(Selector RHS) const {
732 return InfoPtr == RHS.InfoPtr;
733 }
734 bool operator!=(Selector RHS) const {
735 return InfoPtr != RHS.InfoPtr;
736 }
737
getAsOpaquePtr()738 void *getAsOpaquePtr() const {
739 return reinterpret_cast<void*>(InfoPtr);
740 }
741
742 /// Determine whether this is the empty selector.
isNull()743 bool isNull() const { return InfoPtr == 0; }
744
745 // Predicates to identify the selector type.
isKeywordSelector()746 bool isKeywordSelector() const {
747 return getIdentifierInfoFlag() != ZeroArg;
748 }
749
isUnarySelector()750 bool isUnarySelector() const {
751 return getIdentifierInfoFlag() == ZeroArg;
752 }
753
754 unsigned getNumArgs() const;
755
756 /// Retrieve the identifier at a given position in the selector.
757 ///
758 /// Note that the identifier pointer returned may be NULL. Clients that only
759 /// care about the text of the identifier string, and not the specific,
760 /// uniqued identifier pointer, should use \c getNameForSlot(), which returns
761 /// an empty string when the identifier pointer would be NULL.
762 ///
763 /// \param argIndex The index for which we want to retrieve the identifier.
764 /// This index shall be less than \c getNumArgs() unless this is a keyword
765 /// selector, in which case 0 is the only permissible value.
766 ///
767 /// \returns the uniqued identifier for this slot, or NULL if this slot has
768 /// no corresponding identifier.
769 IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const;
770
771 /// Retrieve the name at a given position in the selector.
772 ///
773 /// \param argIndex The index for which we want to retrieve the name.
774 /// This index shall be less than \c getNumArgs() unless this is a keyword
775 /// selector, in which case 0 is the only permissible value.
776 ///
777 /// \returns the name for this slot, which may be the empty string if no
778 /// name was supplied.
779 StringRef getNameForSlot(unsigned argIndex) const;
780
781 /// Derive the full selector name (e.g. "foo:bar:") and return
782 /// it as an std::string.
783 std::string getAsString() const;
784
785 /// Prints the full selector name (e.g. "foo:bar:").
786 void print(llvm::raw_ostream &OS) const;
787
788 void dump() const;
789
790 /// Derive the conventional family of this method.
getMethodFamily()791 ObjCMethodFamily getMethodFamily() const {
792 return getMethodFamilyImpl(*this);
793 }
794
getStringFormatFamily()795 ObjCStringFormatFamily getStringFormatFamily() const {
796 return getStringFormatFamilyImpl(*this);
797 }
798
getEmptyMarker()799 static Selector getEmptyMarker() {
800 return Selector(uintptr_t(-1));
801 }
802
getTombstoneMarker()803 static Selector getTombstoneMarker() {
804 return Selector(uintptr_t(-2));
805 }
806
807 static ObjCInstanceTypeFamily getInstTypeMethodFamily(Selector sel);
808 };
809
810 /// This table allows us to fully hide how we implement
811 /// multi-keyword caching.
812 class SelectorTable {
813 // Actually a SelectorTableImpl
814 void *Impl;
815
816 public:
817 SelectorTable();
818 SelectorTable(const SelectorTable &) = delete;
819 SelectorTable &operator=(const SelectorTable &) = delete;
820 ~SelectorTable();
821
822 /// Can create any sort of selector.
823 ///
824 /// \p NumArgs indicates whether this is a no argument selector "foo", a
825 /// single argument selector "foo:" or multi-argument "foo:bar:".
826 Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV);
827
getUnarySelector(IdentifierInfo * ID)828 Selector getUnarySelector(IdentifierInfo *ID) {
829 return Selector(ID, 1);
830 }
831
getNullarySelector(IdentifierInfo * ID)832 Selector getNullarySelector(IdentifierInfo *ID) {
833 return Selector(ID, 0);
834 }
835
836 /// Return the total amount of memory allocated for managing selectors.
837 size_t getTotalMemory() const;
838
839 /// Return the default setter name for the given identifier.
840 ///
841 /// This is "set" + \p Name where the initial character of \p Name
842 /// has been capitalized.
843 static SmallString<64> constructSetterName(StringRef Name);
844
845 /// Return the default setter selector for the given identifier.
846 ///
847 /// This is "set" + \p Name where the initial character of \p Name
848 /// has been capitalized.
849 static Selector constructSetterSelector(IdentifierTable &Idents,
850 SelectorTable &SelTable,
851 const IdentifierInfo *Name);
852
853 /// Return the property name for the given setter selector.
854 static std::string getPropertyNameFromSetterSelector(Selector Sel);
855 };
856
857 namespace detail {
858
859 /// DeclarationNameExtra is used as a base of various uncommon special names.
860 /// This class is needed since DeclarationName has not enough space to store
861 /// the kind of every possible names. Therefore the kind of common names is
862 /// stored directly in DeclarationName, and the kind of uncommon names is
863 /// stored in DeclarationNameExtra. It is aligned to 8 bytes because
864 /// DeclarationName needs the lower 3 bits to store the kind of common names.
865 /// DeclarationNameExtra is tightly coupled to DeclarationName and any change
866 /// here is very likely to require changes in DeclarationName(Table).
alignas(IdentifierInfoAlignment)867 class alignas(IdentifierInfoAlignment) DeclarationNameExtra {
868 friend class clang::DeclarationName;
869 friend class clang::DeclarationNameTable;
870
871 protected:
872 /// The kind of "extra" information stored in the DeclarationName. See
873 /// @c ExtraKindOrNumArgs for an explanation of how these enumerator values
874 /// are used. Note that DeclarationName depends on the numerical values
875 /// of the enumerators in this enum. See DeclarationName::StoredNameKind
876 /// for more info.
877 enum ExtraKind {
878 CXXDeductionGuideName,
879 CXXLiteralOperatorName,
880 CXXUsingDirective,
881 ObjCMultiArgSelector
882 };
883
884 /// ExtraKindOrNumArgs has one of the following meaning:
885 /// * The kind of an uncommon C++ special name. This DeclarationNameExtra
886 /// is in this case in fact either a CXXDeductionGuideNameExtra or
887 /// a CXXLiteralOperatorIdName.
888 ///
889 /// * It may be also name common to C++ using-directives (CXXUsingDirective),
890 ///
891 /// * Otherwise it is ObjCMultiArgSelector+NumArgs, where NumArgs is
892 /// the number of arguments in the Objective-C selector, in which
893 /// case the DeclarationNameExtra is also a MultiKeywordSelector.
894 unsigned ExtraKindOrNumArgs;
895
896 DeclarationNameExtra(ExtraKind Kind) : ExtraKindOrNumArgs(Kind) {}
897 DeclarationNameExtra(unsigned NumArgs)
898 : ExtraKindOrNumArgs(ObjCMultiArgSelector + NumArgs) {}
899
900 /// Return the corresponding ExtraKind.
901 ExtraKind getKind() const {
902 return static_cast<ExtraKind>(ExtraKindOrNumArgs >
903 (unsigned)ObjCMultiArgSelector
904 ? (unsigned)ObjCMultiArgSelector
905 : ExtraKindOrNumArgs);
906 }
907
908 /// Return the number of arguments in an ObjC selector. Only valid when this
909 /// is indeed an ObjCMultiArgSelector.
910 unsigned getNumArgs() const {
911 assert(ExtraKindOrNumArgs >= (unsigned)ObjCMultiArgSelector &&
912 "getNumArgs called but this is not an ObjC selector!");
913 return ExtraKindOrNumArgs - (unsigned)ObjCMultiArgSelector;
914 }
915 };
916
917 } // namespace detail
918
919 } // namespace clang
920
921 namespace llvm {
922
923 /// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and
924 /// DenseSets.
925 template <>
926 struct DenseMapInfo<clang::Selector> {
927 static clang::Selector getEmptyKey() {
928 return clang::Selector::getEmptyMarker();
929 }
930
931 static clang::Selector getTombstoneKey() {
932 return clang::Selector::getTombstoneMarker();
933 }
934
935 static unsigned getHashValue(clang::Selector S);
936
937 static bool isEqual(clang::Selector LHS, clang::Selector RHS) {
938 return LHS == RHS;
939 }
940 };
941
942 template <>
943 struct isPodLike<clang::Selector> { static const bool value = true; };
944
945 template<>
946 struct PointerLikeTypeTraits<clang::Selector> {
947 static const void *getAsVoidPointer(clang::Selector P) {
948 return P.getAsOpaquePtr();
949 }
950
951 static clang::Selector getFromVoidPointer(const void *P) {
952 return clang::Selector(reinterpret_cast<uintptr_t>(P));
953 }
954
955 enum { NumLowBitsAvailable = 0 };
956 };
957
958 // Provide PointerLikeTypeTraits for IdentifierInfo pointers, which
959 // are not guaranteed to be 8-byte aligned.
960 template<>
961 struct PointerLikeTypeTraits<clang::IdentifierInfo*> {
962 static void *getAsVoidPointer(clang::IdentifierInfo* P) {
963 return P;
964 }
965
966 static clang::IdentifierInfo *getFromVoidPointer(void *P) {
967 return static_cast<clang::IdentifierInfo*>(P);
968 }
969
970 enum { NumLowBitsAvailable = 1 };
971 };
972
973 template<>
974 struct PointerLikeTypeTraits<const clang::IdentifierInfo*> {
975 static const void *getAsVoidPointer(const clang::IdentifierInfo* P) {
976 return P;
977 }
978
979 static const clang::IdentifierInfo *getFromVoidPointer(const void *P) {
980 return static_cast<const clang::IdentifierInfo*>(P);
981 }
982
983 enum { NumLowBitsAvailable = 1 };
984 };
985
986 } // namespace llvm
987
988 #endif // LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
989