1 //===- IdentifierTable.h - Hash table for identifier lookup -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines the clang::IdentifierInfo, clang::IdentifierTable, and
11 /// clang::Selector interfaces.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
16 #define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
17
18 #include "clang/Basic/LLVM.h"
19 #include "clang/Basic/TokenKinds.h"
20 #include "llvm/ADT/DenseMapInfo.h"
21 #include "llvm/ADT/SmallString.h"
22 #include "llvm/ADT/StringMap.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Support/Allocator.h"
25 #include "llvm/Support/PointerLikeTypeTraits.h"
26 #include "llvm/Support/type_traits.h"
27 #include <cassert>
28 #include <cstddef>
29 #include <cstdint>
30 #include <cstring>
31 #include <string>
32 #include <utility>
33
34 namespace clang {
35
36 class DeclarationName;
37 class DeclarationNameTable;
38 class IdentifierInfo;
39 class LangOptions;
40 class MultiKeywordSelector;
41 class SourceLocation;
42
43 enum class ReservedIdentifierStatus {
44 NotReserved = 0,
45 StartsWithUnderscoreAtGlobalScope,
46 StartsWithUnderscoreAndIsExternC,
47 StartsWithDoubleUnderscore,
48 StartsWithUnderscoreFollowedByCapitalLetter,
49 ContainsDoubleUnderscore,
50 };
51
52 /// Determine whether an identifier is reserved for use as a name at global
53 /// scope. Such identifiers might be implementation-specific global functions
54 /// or variables.
isReservedAtGlobalScope(ReservedIdentifierStatus Status)55 inline bool isReservedAtGlobalScope(ReservedIdentifierStatus Status) {
56 return Status != ReservedIdentifierStatus::NotReserved;
57 }
58
59 /// Determine whether an identifier is reserved in all contexts. Such
60 /// identifiers might be implementation-specific keywords or macros, for
61 /// example.
isReservedInAllContexts(ReservedIdentifierStatus Status)62 inline bool isReservedInAllContexts(ReservedIdentifierStatus Status) {
63 return Status != ReservedIdentifierStatus::NotReserved &&
64 Status != ReservedIdentifierStatus::StartsWithUnderscoreAtGlobalScope &&
65 Status != ReservedIdentifierStatus::StartsWithUnderscoreAndIsExternC;
66 }
67
68 /// A simple pair of identifier info and location.
69 using IdentifierLocPair = std::pair<IdentifierInfo *, SourceLocation>;
70
71 /// IdentifierInfo and other related classes are aligned to
72 /// 8 bytes so that DeclarationName can use the lower 3 bits
73 /// of a pointer to one of these classes.
74 enum { IdentifierInfoAlignment = 8 };
75
76 static constexpr int ObjCOrBuiltinIDBits = 16;
77
78 /// One of these records is kept for each identifier that
79 /// is lexed. This contains information about whether the token was \#define'd,
80 /// is a language keyword, or if it is a front-end token of some sort (e.g. a
81 /// variable or function name). The preprocessor keeps this information in a
82 /// set, and all tok::identifier tokens have a pointer to one of these.
83 /// It is aligned to 8 bytes because DeclarationName needs the lower 3 bits.
alignas(IdentifierInfoAlignment)84 class alignas(IdentifierInfoAlignment) IdentifierInfo {
85 friend class IdentifierTable;
86
87 // Front-end token ID or tok::identifier.
88 unsigned TokenID : 9;
89
90 // ObjC keyword ('protocol' in '@protocol') or builtin (__builtin_inf).
91 // First NUM_OBJC_KEYWORDS values are for Objective-C,
92 // the remaining values are for builtins.
93 unsigned ObjCOrBuiltinID : ObjCOrBuiltinIDBits;
94
95 // True if there is a #define for this.
96 unsigned HasMacro : 1;
97
98 // True if there was a #define for this.
99 unsigned HadMacro : 1;
100
101 // True if the identifier is a language extension.
102 unsigned IsExtension : 1;
103
104 // True if the identifier is a keyword in a newer or proposed Standard.
105 unsigned IsFutureCompatKeyword : 1;
106
107 // True if the identifier is poisoned.
108 unsigned IsPoisoned : 1;
109
110 // True if the identifier is a C++ operator keyword.
111 unsigned IsCPPOperatorKeyword : 1;
112
113 // Internal bit set by the member function RecomputeNeedsHandleIdentifier.
114 // See comment about RecomputeNeedsHandleIdentifier for more info.
115 unsigned NeedsHandleIdentifier : 1;
116
117 // True if the identifier was loaded (at least partially) from an AST file.
118 unsigned IsFromAST : 1;
119
120 // True if the identifier has changed from the definition
121 // loaded from an AST file.
122 unsigned ChangedAfterLoad : 1;
123
124 // True if the identifier's frontend information has changed from the
125 // definition loaded from an AST file.
126 unsigned FEChangedAfterLoad : 1;
127
128 // True if revertTokenIDToIdentifier was called.
129 unsigned RevertedTokenID : 1;
130
131 // True if there may be additional information about
132 // this identifier stored externally.
133 unsigned OutOfDate : 1;
134
135 // True if this is the 'import' contextual keyword.
136 unsigned IsModulesImport : 1;
137
138 // True if this is a mangled OpenMP variant name.
139 unsigned IsMangledOpenMPVariantName : 1;
140
141 // True if this is a deprecated macro.
142 unsigned IsDeprecatedMacro : 1;
143
144 // True if this macro is unsafe in headers.
145 unsigned IsRestrictExpansion : 1;
146
147 // True if this macro is final.
148 unsigned IsFinal : 1;
149
150 // 22 bits left in a 64-bit word.
151
152 // Managed by the language front-end.
153 void *FETokenInfo = nullptr;
154
155 llvm::StringMapEntry<IdentifierInfo *> *Entry = nullptr;
156
157 IdentifierInfo()
158 : TokenID(tok::identifier), ObjCOrBuiltinID(0), HasMacro(false),
159 HadMacro(false), IsExtension(false), IsFutureCompatKeyword(false),
160 IsPoisoned(false), IsCPPOperatorKeyword(false),
161 NeedsHandleIdentifier(false), IsFromAST(false), ChangedAfterLoad(false),
162 FEChangedAfterLoad(false), RevertedTokenID(false), OutOfDate(false),
163 IsModulesImport(false), IsMangledOpenMPVariantName(false),
164 IsDeprecatedMacro(false), IsRestrictExpansion(false), IsFinal(false) {}
165
166 public:
167 IdentifierInfo(const IdentifierInfo &) = delete;
168 IdentifierInfo &operator=(const IdentifierInfo &) = delete;
169 IdentifierInfo(IdentifierInfo &&) = delete;
170 IdentifierInfo &operator=(IdentifierInfo &&) = delete;
171
172 /// Return true if this is the identifier for the specified string.
173 ///
174 /// This is intended to be used for string literals only: II->isStr("foo").
175 template <std::size_t StrLen>
176 bool isStr(const char (&Str)[StrLen]) const {
177 return getLength() == StrLen-1 &&
178 memcmp(getNameStart(), Str, StrLen-1) == 0;
179 }
180
181 /// Return true if this is the identifier for the specified StringRef.
182 bool isStr(llvm::StringRef Str) const {
183 llvm::StringRef ThisStr(getNameStart(), getLength());
184 return ThisStr == Str;
185 }
186
187 /// Return the beginning of the actual null-terminated string for this
188 /// identifier.
189 const char *getNameStart() const { return Entry->getKeyData(); }
190
191 /// Efficiently return the length of this identifier info.
192 unsigned getLength() const { return Entry->getKeyLength(); }
193
194 /// Return the actual identifier string.
195 StringRef getName() const {
196 return StringRef(getNameStart(), getLength());
197 }
198
199 /// Return true if this identifier is \#defined to some other value.
200 /// \note The current definition may be in a module and not currently visible.
201 bool hasMacroDefinition() const {
202 return HasMacro;
203 }
204 void setHasMacroDefinition(bool Val) {
205 if (HasMacro == Val) return;
206
207 HasMacro = Val;
208 if (Val) {
209 NeedsHandleIdentifier = true;
210 HadMacro = true;
211 } else {
212 // If this is a final macro, make the deprecation and header unsafe bits
213 // stick around after the undefinition so they apply to any redefinitions.
214 if (!IsFinal) {
215 // Because calling the setters of these calls recomputes, just set them
216 // manually to avoid recomputing a bunch of times.
217 IsDeprecatedMacro = false;
218 IsRestrictExpansion = false;
219 }
220 RecomputeNeedsHandleIdentifier();
221 }
222 }
223 /// Returns true if this identifier was \#defined to some value at any
224 /// moment. In this case there should be an entry for the identifier in the
225 /// macro history table in Preprocessor.
226 bool hadMacroDefinition() const {
227 return HadMacro;
228 }
229
230 bool isDeprecatedMacro() const { return IsDeprecatedMacro; }
231
232 void setIsDeprecatedMacro(bool Val) {
233 if (IsDeprecatedMacro == Val)
234 return;
235 IsDeprecatedMacro = Val;
236 if (Val)
237 NeedsHandleIdentifier = true;
238 else
239 RecomputeNeedsHandleIdentifier();
240 }
241
242 bool isRestrictExpansion() const { return IsRestrictExpansion; }
243
244 void setIsRestrictExpansion(bool Val) {
245 if (IsRestrictExpansion == Val)
246 return;
247 IsRestrictExpansion = Val;
248 if (Val)
249 NeedsHandleIdentifier = true;
250 else
251 RecomputeNeedsHandleIdentifier();
252 }
253
254 bool isFinal() const { return IsFinal; }
255
256 void setIsFinal(bool Val) { IsFinal = Val; }
257
258 /// If this is a source-language token (e.g. 'for'), this API
259 /// can be used to cause the lexer to map identifiers to source-language
260 /// tokens.
261 tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; }
262
263 /// True if revertTokenIDToIdentifier() was called.
264 bool hasRevertedTokenIDToIdentifier() const { return RevertedTokenID; }
265
266 /// Revert TokenID to tok::identifier; used for GNU libstdc++ 4.2
267 /// compatibility.
268 ///
269 /// TokenID is normally read-only but there are 2 instances where we revert it
270 /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens
271 /// using this method so we can inform serialization about it.
272 void revertTokenIDToIdentifier() {
273 assert(TokenID != tok::identifier && "Already at tok::identifier");
274 TokenID = tok::identifier;
275 RevertedTokenID = true;
276 }
277 void revertIdentifierToTokenID(tok::TokenKind TK) {
278 assert(TokenID == tok::identifier && "Should be at tok::identifier");
279 TokenID = TK;
280 RevertedTokenID = false;
281 }
282
283 /// Return the preprocessor keyword ID for this identifier.
284 ///
285 /// For example, "define" will return tok::pp_define.
286 tok::PPKeywordKind getPPKeywordID() const;
287
288 /// Return the Objective-C keyword ID for the this identifier.
289 ///
290 /// For example, 'class' will return tok::objc_class if ObjC is enabled.
291 tok::ObjCKeywordKind getObjCKeywordID() const {
292 if (ObjCOrBuiltinID < tok::NUM_OBJC_KEYWORDS)
293 return tok::ObjCKeywordKind(ObjCOrBuiltinID);
294 else
295 return tok::objc_not_keyword;
296 }
297 void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
298
299 /// Return a value indicating whether this is a builtin function.
300 ///
301 /// 0 is not-built-in. 1+ are specific builtin functions.
302 unsigned getBuiltinID() const {
303 if (ObjCOrBuiltinID >= tok::NUM_OBJC_KEYWORDS)
304 return ObjCOrBuiltinID - tok::NUM_OBJC_KEYWORDS;
305 else
306 return 0;
307 }
308 void setBuiltinID(unsigned ID) {
309 ObjCOrBuiltinID = ID + tok::NUM_OBJC_KEYWORDS;
310 assert(ObjCOrBuiltinID - unsigned(tok::NUM_OBJC_KEYWORDS) == ID
311 && "ID too large for field!");
312 }
313
314 unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; }
315 void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; }
316
317 /// get/setExtension - Initialize information about whether or not this
318 /// language token is an extension. This controls extension warnings, and is
319 /// only valid if a custom token ID is set.
320 bool isExtensionToken() const { return IsExtension; }
321 void setIsExtensionToken(bool Val) {
322 IsExtension = Val;
323 if (Val)
324 NeedsHandleIdentifier = true;
325 else
326 RecomputeNeedsHandleIdentifier();
327 }
328
329 /// is/setIsFutureCompatKeyword - Initialize information about whether or not
330 /// this language token is a keyword in a newer or proposed Standard. This
331 /// controls compatibility warnings, and is only true when not parsing the
332 /// corresponding Standard. Once a compatibility problem has been diagnosed
333 /// with this keyword, the flag will be cleared.
334 bool isFutureCompatKeyword() const { return IsFutureCompatKeyword; }
335 void setIsFutureCompatKeyword(bool Val) {
336 IsFutureCompatKeyword = Val;
337 if (Val)
338 NeedsHandleIdentifier = true;
339 else
340 RecomputeNeedsHandleIdentifier();
341 }
342
343 /// setIsPoisoned - Mark this identifier as poisoned. After poisoning, the
344 /// Preprocessor will emit an error every time this token is used.
345 void setIsPoisoned(bool Value = true) {
346 IsPoisoned = Value;
347 if (Value)
348 NeedsHandleIdentifier = true;
349 else
350 RecomputeNeedsHandleIdentifier();
351 }
352
353 /// Return true if this token has been poisoned.
354 bool isPoisoned() const { return IsPoisoned; }
355
356 /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether
357 /// this identifier is a C++ alternate representation of an operator.
358 void setIsCPlusPlusOperatorKeyword(bool Val = true) {
359 IsCPPOperatorKeyword = Val;
360 }
361 bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; }
362
363 /// Return true if this token is a keyword in the specified language.
364 bool isKeyword(const LangOptions &LangOpts) const;
365
366 /// Return true if this token is a C++ keyword in the specified
367 /// language.
368 bool isCPlusPlusKeyword(const LangOptions &LangOpts) const;
369
370 /// Get and set FETokenInfo. The language front-end is allowed to associate
371 /// arbitrary metadata with this token.
372 void *getFETokenInfo() const { return FETokenInfo; }
373 void setFETokenInfo(void *T) { FETokenInfo = T; }
374
375 /// Return true if the Preprocessor::HandleIdentifier must be called
376 /// on a token of this identifier.
377 ///
378 /// If this returns false, we know that HandleIdentifier will not affect
379 /// the token.
380 bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; }
381
382 /// Return true if the identifier in its current state was loaded
383 /// from an AST file.
384 bool isFromAST() const { return IsFromAST; }
385
386 void setIsFromAST() { IsFromAST = true; }
387
388 /// Determine whether this identifier has changed since it was loaded
389 /// from an AST file.
390 bool hasChangedSinceDeserialization() const {
391 return ChangedAfterLoad;
392 }
393
394 /// Note that this identifier has changed since it was loaded from
395 /// an AST file.
396 void setChangedSinceDeserialization() {
397 ChangedAfterLoad = true;
398 }
399
400 /// Determine whether the frontend token information for this
401 /// identifier has changed since it was loaded from an AST file.
402 bool hasFETokenInfoChangedSinceDeserialization() const {
403 return FEChangedAfterLoad;
404 }
405
406 /// Note that the frontend token information for this identifier has
407 /// changed since it was loaded from an AST file.
408 void setFETokenInfoChangedSinceDeserialization() {
409 FEChangedAfterLoad = true;
410 }
411
412 /// Determine whether the information for this identifier is out of
413 /// date with respect to the external source.
414 bool isOutOfDate() const { return OutOfDate; }
415
416 /// Set whether the information for this identifier is out of
417 /// date with respect to the external source.
418 void setOutOfDate(bool OOD) {
419 OutOfDate = OOD;
420 if (OOD)
421 NeedsHandleIdentifier = true;
422 else
423 RecomputeNeedsHandleIdentifier();
424 }
425
426 /// Determine whether this is the contextual keyword \c import.
427 bool isModulesImport() const { return IsModulesImport; }
428
429 /// Set whether this identifier is the contextual keyword \c import.
430 void setModulesImport(bool I) {
431 IsModulesImport = I;
432 if (I)
433 NeedsHandleIdentifier = true;
434 else
435 RecomputeNeedsHandleIdentifier();
436 }
437
438 /// Determine whether this is the mangled name of an OpenMP variant.
439 bool isMangledOpenMPVariantName() const { return IsMangledOpenMPVariantName; }
440
441 /// Set whether this is the mangled name of an OpenMP variant.
442 void setMangledOpenMPVariantName(bool I) { IsMangledOpenMPVariantName = I; }
443
444 /// Return true if this identifier is an editor placeholder.
445 ///
446 /// Editor placeholders are produced by the code-completion engine and are
447 /// represented as characters between '<#' and '#>' in the source code. An
448 /// example of auto-completed call with a placeholder parameter is shown
449 /// below:
450 /// \code
451 /// function(<#int x#>);
452 /// \endcode
453 bool isEditorPlaceholder() const {
454 return getName().startswith("<#") && getName().endswith("#>");
455 }
456
457 /// Determine whether \p this is a name reserved for the implementation (C99
458 /// 7.1.3, C++ [lib.global.names]).
459 ReservedIdentifierStatus isReserved(const LangOptions &LangOpts) const;
460
461 /// If the identifier is an "uglified" reserved name, return a cleaned form.
462 /// e.g. _Foo => Foo. Otherwise, just returns the name.
463 StringRef deuglifiedName() const;
464
465 /// Provide less than operator for lexicographical sorting.
466 bool operator<(const IdentifierInfo &RHS) const {
467 return getName() < RHS.getName();
468 }
469
470 private:
471 /// The Preprocessor::HandleIdentifier does several special (but rare)
472 /// things to identifiers of various sorts. For example, it changes the
473 /// \c for keyword token from tok::identifier to tok::for.
474 ///
475 /// This method is very tied to the definition of HandleIdentifier. Any
476 /// change to it should be reflected here.
477 void RecomputeNeedsHandleIdentifier() {
478 NeedsHandleIdentifier = isPoisoned() || hasMacroDefinition() ||
479 isExtensionToken() || isFutureCompatKeyword() ||
480 isOutOfDate() || isModulesImport();
481 }
482 };
483
484 /// An RAII object for [un]poisoning an identifier within a scope.
485 ///
486 /// \p II is allowed to be null, in which case objects of this type have
487 /// no effect.
488 class PoisonIdentifierRAIIObject {
489 IdentifierInfo *const II;
490 const bool OldValue;
491
492 public:
PoisonIdentifierRAIIObject(IdentifierInfo * II,bool NewValue)493 PoisonIdentifierRAIIObject(IdentifierInfo *II, bool NewValue)
494 : II(II), OldValue(II ? II->isPoisoned() : false) {
495 if(II)
496 II->setIsPoisoned(NewValue);
497 }
498
~PoisonIdentifierRAIIObject()499 ~PoisonIdentifierRAIIObject() {
500 if(II)
501 II->setIsPoisoned(OldValue);
502 }
503 };
504
505 /// An iterator that walks over all of the known identifiers
506 /// in the lookup table.
507 ///
508 /// Since this iterator uses an abstract interface via virtual
509 /// functions, it uses an object-oriented interface rather than the
510 /// more standard C++ STL iterator interface. In this OO-style
511 /// iteration, the single function \c Next() provides dereference,
512 /// advance, and end-of-sequence checking in a single
513 /// operation. Subclasses of this iterator type will provide the
514 /// actual functionality.
515 class IdentifierIterator {
516 protected:
517 IdentifierIterator() = default;
518
519 public:
520 IdentifierIterator(const IdentifierIterator &) = delete;
521 IdentifierIterator &operator=(const IdentifierIterator &) = delete;
522
523 virtual ~IdentifierIterator();
524
525 /// Retrieve the next string in the identifier table and
526 /// advances the iterator for the following string.
527 ///
528 /// \returns The next string in the identifier table. If there is
529 /// no such string, returns an empty \c StringRef.
530 virtual StringRef Next() = 0;
531 };
532
533 /// Provides lookups to, and iteration over, IdentiferInfo objects.
534 class IdentifierInfoLookup {
535 public:
536 virtual ~IdentifierInfoLookup();
537
538 /// Return the IdentifierInfo for the specified named identifier.
539 ///
540 /// Unlike the version in IdentifierTable, this returns a pointer instead
541 /// of a reference. If the pointer is null then the IdentifierInfo cannot
542 /// be found.
543 virtual IdentifierInfo* get(StringRef Name) = 0;
544
545 /// Retrieve an iterator into the set of all identifiers
546 /// known to this identifier lookup source.
547 ///
548 /// This routine provides access to all of the identifiers known to
549 /// the identifier lookup, allowing access to the contents of the
550 /// identifiers without introducing the overhead of constructing
551 /// IdentifierInfo objects for each.
552 ///
553 /// \returns A new iterator into the set of known identifiers. The
554 /// caller is responsible for deleting this iterator.
555 virtual IdentifierIterator *getIdentifiers();
556 };
557
558 /// Implements an efficient mapping from strings to IdentifierInfo nodes.
559 ///
560 /// This has no other purpose, but this is an extremely performance-critical
561 /// piece of the code, as each occurrence of every identifier goes through
562 /// here when lexed.
563 class IdentifierTable {
564 // Shark shows that using MallocAllocator is *much* slower than using this
565 // BumpPtrAllocator!
566 using HashTableTy = llvm::StringMap<IdentifierInfo *, llvm::BumpPtrAllocator>;
567 HashTableTy HashTable;
568
569 IdentifierInfoLookup* ExternalLookup;
570
571 public:
572 /// Create the identifier table.
573 explicit IdentifierTable(IdentifierInfoLookup *ExternalLookup = nullptr);
574
575 /// Create the identifier table, populating it with info about the
576 /// language keywords for the language specified by \p LangOpts.
577 explicit IdentifierTable(const LangOptions &LangOpts,
578 IdentifierInfoLookup *ExternalLookup = nullptr);
579
580 /// Set the external identifier lookup mechanism.
setExternalIdentifierLookup(IdentifierInfoLookup * IILookup)581 void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) {
582 ExternalLookup = IILookup;
583 }
584
585 /// Retrieve the external identifier lookup object, if any.
getExternalIdentifierLookup()586 IdentifierInfoLookup *getExternalIdentifierLookup() const {
587 return ExternalLookup;
588 }
589
getAllocator()590 llvm::BumpPtrAllocator& getAllocator() {
591 return HashTable.getAllocator();
592 }
593
594 /// Return the identifier token info for the specified named
595 /// identifier.
get(StringRef Name)596 IdentifierInfo &get(StringRef Name) {
597 auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first;
598
599 IdentifierInfo *&II = Entry.second;
600 if (II) return *II;
601
602 // No entry; if we have an external lookup, look there first.
603 if (ExternalLookup) {
604 II = ExternalLookup->get(Name);
605 if (II)
606 return *II;
607 }
608
609 // Lookups failed, make a new IdentifierInfo.
610 void *Mem = getAllocator().Allocate<IdentifierInfo>();
611 II = new (Mem) IdentifierInfo();
612
613 // Make sure getName() knows how to find the IdentifierInfo
614 // contents.
615 II->Entry = &Entry;
616
617 return *II;
618 }
619
get(StringRef Name,tok::TokenKind TokenCode)620 IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) {
621 IdentifierInfo &II = get(Name);
622 II.TokenID = TokenCode;
623 assert(II.TokenID == (unsigned) TokenCode && "TokenCode too large");
624 return II;
625 }
626
627 /// Gets an IdentifierInfo for the given name without consulting
628 /// external sources.
629 ///
630 /// This is a version of get() meant for external sources that want to
631 /// introduce or modify an identifier. If they called get(), they would
632 /// likely end up in a recursion.
getOwn(StringRef Name)633 IdentifierInfo &getOwn(StringRef Name) {
634 auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first;
635
636 IdentifierInfo *&II = Entry.second;
637 if (II)
638 return *II;
639
640 // Lookups failed, make a new IdentifierInfo.
641 void *Mem = getAllocator().Allocate<IdentifierInfo>();
642 II = new (Mem) IdentifierInfo();
643
644 // Make sure getName() knows how to find the IdentifierInfo
645 // contents.
646 II->Entry = &Entry;
647
648 // If this is the 'import' contextual keyword, mark it as such.
649 if (Name.equals("import"))
650 II->setModulesImport(true);
651
652 return *II;
653 }
654
655 using iterator = HashTableTy::const_iterator;
656 using const_iterator = HashTableTy::const_iterator;
657
begin()658 iterator begin() const { return HashTable.begin(); }
end()659 iterator end() const { return HashTable.end(); }
size()660 unsigned size() const { return HashTable.size(); }
661
find(StringRef Name)662 iterator find(StringRef Name) const { return HashTable.find(Name); }
663
664 /// Print some statistics to stderr that indicate how well the
665 /// hashing is doing.
666 void PrintStats() const;
667
668 /// Populate the identifier table with info about the language keywords
669 /// for the language specified by \p LangOpts.
670 void AddKeywords(const LangOptions &LangOpts);
671 };
672
673 /// A family of Objective-C methods.
674 ///
675 /// These families have no inherent meaning in the language, but are
676 /// nonetheless central enough in the existing implementations to
677 /// merit direct AST support. While, in theory, arbitrary methods can
678 /// be considered to form families, we focus here on the methods
679 /// involving allocation and retain-count management, as these are the
680 /// most "core" and the most likely to be useful to diverse clients
681 /// without extra information.
682 ///
683 /// Both selectors and actual method declarations may be classified
684 /// into families. Method families may impose additional restrictions
685 /// beyond their selector name; for example, a method called '_init'
686 /// that returns void is not considered to be in the 'init' family
687 /// (but would be if it returned 'id'). It is also possible to
688 /// explicitly change or remove a method's family. Therefore the
689 /// method's family should be considered the single source of truth.
690 enum ObjCMethodFamily {
691 /// No particular method family.
692 OMF_None,
693
694 // Selectors in these families may have arbitrary arity, may be
695 // written with arbitrary leading underscores, and may have
696 // additional CamelCase "words" in their first selector chunk
697 // following the family name.
698 OMF_alloc,
699 OMF_copy,
700 OMF_init,
701 OMF_mutableCopy,
702 OMF_new,
703
704 // These families are singletons consisting only of the nullary
705 // selector with the given name.
706 OMF_autorelease,
707 OMF_dealloc,
708 OMF_finalize,
709 OMF_release,
710 OMF_retain,
711 OMF_retainCount,
712 OMF_self,
713 OMF_initialize,
714
715 // performSelector families
716 OMF_performSelector
717 };
718
719 /// Enough bits to store any enumerator in ObjCMethodFamily or
720 /// InvalidObjCMethodFamily.
721 enum { ObjCMethodFamilyBitWidth = 4 };
722
723 /// An invalid value of ObjCMethodFamily.
724 enum { InvalidObjCMethodFamily = (1 << ObjCMethodFamilyBitWidth) - 1 };
725
726 /// A family of Objective-C methods.
727 ///
728 /// These are family of methods whose result type is initially 'id', but
729 /// but are candidate for the result type to be changed to 'instancetype'.
730 enum ObjCInstanceTypeFamily {
731 OIT_None,
732 OIT_Array,
733 OIT_Dictionary,
734 OIT_Singleton,
735 OIT_Init,
736 OIT_ReturnsSelf
737 };
738
739 enum ObjCStringFormatFamily {
740 SFF_None,
741 SFF_NSString,
742 SFF_CFString
743 };
744
745 /// Smart pointer class that efficiently represents Objective-C method
746 /// names.
747 ///
748 /// This class will either point to an IdentifierInfo or a
749 /// MultiKeywordSelector (which is private). This enables us to optimize
750 /// selectors that take no arguments and selectors that take 1 argument, which
751 /// accounts for 78% of all selectors in Cocoa.h.
752 class Selector {
753 friend class Diagnostic;
754 friend class SelectorTable; // only the SelectorTable can create these
755 friend class DeclarationName; // and the AST's DeclarationName.
756
757 enum IdentifierInfoFlag {
758 // Empty selector = 0. Note that these enumeration values must
759 // correspond to the enumeration values of DeclarationName::StoredNameKind
760 ZeroArg = 0x01,
761 OneArg = 0x02,
762 MultiArg = 0x07,
763 ArgFlags = 0x07
764 };
765
766 /// A pointer to the MultiKeywordSelector or IdentifierInfo. We use the low
767 /// three bits of InfoPtr to store an IdentifierInfoFlag. Note that in any
768 /// case IdentifierInfo and MultiKeywordSelector are already aligned to
769 /// 8 bytes even on 32 bits archs because of DeclarationName.
770 uintptr_t InfoPtr = 0;
771
Selector(IdentifierInfo * II,unsigned nArgs)772 Selector(IdentifierInfo *II, unsigned nArgs) {
773 InfoPtr = reinterpret_cast<uintptr_t>(II);
774 assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
775 assert(nArgs < 2 && "nArgs not equal to 0/1");
776 InfoPtr |= nArgs+1;
777 }
778
Selector(MultiKeywordSelector * SI)779 Selector(MultiKeywordSelector *SI) {
780 InfoPtr = reinterpret_cast<uintptr_t>(SI);
781 assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
782 InfoPtr |= MultiArg;
783 }
784
getAsIdentifierInfo()785 IdentifierInfo *getAsIdentifierInfo() const {
786 if (getIdentifierInfoFlag() < MultiArg)
787 return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags);
788 return nullptr;
789 }
790
getMultiKeywordSelector()791 MultiKeywordSelector *getMultiKeywordSelector() const {
792 return reinterpret_cast<MultiKeywordSelector *>(InfoPtr & ~ArgFlags);
793 }
794
getIdentifierInfoFlag()795 unsigned getIdentifierInfoFlag() const {
796 return InfoPtr & ArgFlags;
797 }
798
799 static ObjCMethodFamily getMethodFamilyImpl(Selector sel);
800
801 static ObjCStringFormatFamily getStringFormatFamilyImpl(Selector sel);
802
803 public:
804 /// The default ctor should only be used when creating data structures that
805 /// will contain selectors.
806 Selector() = default;
Selector(uintptr_t V)807 explicit Selector(uintptr_t V) : InfoPtr(V) {}
808
809 /// operator==/!= - Indicate whether the specified selectors are identical.
810 bool operator==(Selector RHS) const {
811 return InfoPtr == RHS.InfoPtr;
812 }
813 bool operator!=(Selector RHS) const {
814 return InfoPtr != RHS.InfoPtr;
815 }
816
getAsOpaquePtr()817 void *getAsOpaquePtr() const {
818 return reinterpret_cast<void*>(InfoPtr);
819 }
820
821 /// Determine whether this is the empty selector.
isNull()822 bool isNull() const { return InfoPtr == 0; }
823
824 // Predicates to identify the selector type.
isKeywordSelector()825 bool isKeywordSelector() const {
826 return getIdentifierInfoFlag() != ZeroArg;
827 }
828
isUnarySelector()829 bool isUnarySelector() const {
830 return getIdentifierInfoFlag() == ZeroArg;
831 }
832
833 /// If this selector is the specific keyword selector described by Names.
834 bool isKeywordSelector(ArrayRef<StringRef> Names) const;
835
836 /// If this selector is the specific unary selector described by Name.
837 bool isUnarySelector(StringRef Name) const;
838
839 unsigned getNumArgs() const;
840
841 /// Retrieve the identifier at a given position in the selector.
842 ///
843 /// Note that the identifier pointer returned may be NULL. Clients that only
844 /// care about the text of the identifier string, and not the specific,
845 /// uniqued identifier pointer, should use \c getNameForSlot(), which returns
846 /// an empty string when the identifier pointer would be NULL.
847 ///
848 /// \param argIndex The index for which we want to retrieve the identifier.
849 /// This index shall be less than \c getNumArgs() unless this is a keyword
850 /// selector, in which case 0 is the only permissible value.
851 ///
852 /// \returns the uniqued identifier for this slot, or NULL if this slot has
853 /// no corresponding identifier.
854 IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const;
855
856 /// Retrieve the name at a given position in the selector.
857 ///
858 /// \param argIndex The index for which we want to retrieve the name.
859 /// This index shall be less than \c getNumArgs() unless this is a keyword
860 /// selector, in which case 0 is the only permissible value.
861 ///
862 /// \returns the name for this slot, which may be the empty string if no
863 /// name was supplied.
864 StringRef getNameForSlot(unsigned argIndex) const;
865
866 /// Derive the full selector name (e.g. "foo:bar:") and return
867 /// it as an std::string.
868 std::string getAsString() const;
869
870 /// Prints the full selector name (e.g. "foo:bar:").
871 void print(llvm::raw_ostream &OS) const;
872
873 void dump() const;
874
875 /// Derive the conventional family of this method.
getMethodFamily()876 ObjCMethodFamily getMethodFamily() const {
877 return getMethodFamilyImpl(*this);
878 }
879
getStringFormatFamily()880 ObjCStringFormatFamily getStringFormatFamily() const {
881 return getStringFormatFamilyImpl(*this);
882 }
883
getEmptyMarker()884 static Selector getEmptyMarker() {
885 return Selector(uintptr_t(-1));
886 }
887
getTombstoneMarker()888 static Selector getTombstoneMarker() {
889 return Selector(uintptr_t(-2));
890 }
891
892 static ObjCInstanceTypeFamily getInstTypeMethodFamily(Selector sel);
893 };
894
895 /// This table allows us to fully hide how we implement
896 /// multi-keyword caching.
897 class SelectorTable {
898 // Actually a SelectorTableImpl
899 void *Impl;
900
901 public:
902 SelectorTable();
903 SelectorTable(const SelectorTable &) = delete;
904 SelectorTable &operator=(const SelectorTable &) = delete;
905 ~SelectorTable();
906
907 /// Can create any sort of selector.
908 ///
909 /// \p NumArgs indicates whether this is a no argument selector "foo", a
910 /// single argument selector "foo:" or multi-argument "foo:bar:".
911 Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV);
912
getUnarySelector(IdentifierInfo * ID)913 Selector getUnarySelector(IdentifierInfo *ID) {
914 return Selector(ID, 1);
915 }
916
getNullarySelector(IdentifierInfo * ID)917 Selector getNullarySelector(IdentifierInfo *ID) {
918 return Selector(ID, 0);
919 }
920
921 /// Return the total amount of memory allocated for managing selectors.
922 size_t getTotalMemory() const;
923
924 /// Return the default setter name for the given identifier.
925 ///
926 /// This is "set" + \p Name where the initial character of \p Name
927 /// has been capitalized.
928 static SmallString<64> constructSetterName(StringRef Name);
929
930 /// Return the default setter selector for the given identifier.
931 ///
932 /// This is "set" + \p Name where the initial character of \p Name
933 /// has been capitalized.
934 static Selector constructSetterSelector(IdentifierTable &Idents,
935 SelectorTable &SelTable,
936 const IdentifierInfo *Name);
937
938 /// Return the property name for the given setter selector.
939 static std::string getPropertyNameFromSetterSelector(Selector Sel);
940 };
941
942 namespace detail {
943
944 /// DeclarationNameExtra is used as a base of various uncommon special names.
945 /// This class is needed since DeclarationName has not enough space to store
946 /// the kind of every possible names. Therefore the kind of common names is
947 /// stored directly in DeclarationName, and the kind of uncommon names is
948 /// stored in DeclarationNameExtra. It is aligned to 8 bytes because
949 /// DeclarationName needs the lower 3 bits to store the kind of common names.
950 /// DeclarationNameExtra is tightly coupled to DeclarationName and any change
951 /// here is very likely to require changes in DeclarationName(Table).
alignas(IdentifierInfoAlignment)952 class alignas(IdentifierInfoAlignment) DeclarationNameExtra {
953 friend class clang::DeclarationName;
954 friend class clang::DeclarationNameTable;
955
956 protected:
957 /// The kind of "extra" information stored in the DeclarationName. See
958 /// @c ExtraKindOrNumArgs for an explanation of how these enumerator values
959 /// are used. Note that DeclarationName depends on the numerical values
960 /// of the enumerators in this enum. See DeclarationName::StoredNameKind
961 /// for more info.
962 enum ExtraKind {
963 CXXDeductionGuideName,
964 CXXLiteralOperatorName,
965 CXXUsingDirective,
966 ObjCMultiArgSelector
967 };
968
969 /// ExtraKindOrNumArgs has one of the following meaning:
970 /// * The kind of an uncommon C++ special name. This DeclarationNameExtra
971 /// is in this case in fact either a CXXDeductionGuideNameExtra or
972 /// a CXXLiteralOperatorIdName.
973 ///
974 /// * It may be also name common to C++ using-directives (CXXUsingDirective),
975 ///
976 /// * Otherwise it is ObjCMultiArgSelector+NumArgs, where NumArgs is
977 /// the number of arguments in the Objective-C selector, in which
978 /// case the DeclarationNameExtra is also a MultiKeywordSelector.
979 unsigned ExtraKindOrNumArgs;
980
981 DeclarationNameExtra(ExtraKind Kind) : ExtraKindOrNumArgs(Kind) {}
982 DeclarationNameExtra(unsigned NumArgs)
983 : ExtraKindOrNumArgs(ObjCMultiArgSelector + NumArgs) {}
984
985 /// Return the corresponding ExtraKind.
986 ExtraKind getKind() const {
987 return static_cast<ExtraKind>(ExtraKindOrNumArgs >
988 (unsigned)ObjCMultiArgSelector
989 ? (unsigned)ObjCMultiArgSelector
990 : ExtraKindOrNumArgs);
991 }
992
993 /// Return the number of arguments in an ObjC selector. Only valid when this
994 /// is indeed an ObjCMultiArgSelector.
995 unsigned getNumArgs() const {
996 assert(ExtraKindOrNumArgs >= (unsigned)ObjCMultiArgSelector &&
997 "getNumArgs called but this is not an ObjC selector!");
998 return ExtraKindOrNumArgs - (unsigned)ObjCMultiArgSelector;
999 }
1000 };
1001
1002 } // namespace detail
1003
1004 } // namespace clang
1005
1006 namespace llvm {
1007
1008 /// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and
1009 /// DenseSets.
1010 template <>
1011 struct DenseMapInfo<clang::Selector> {
1012 static clang::Selector getEmptyKey() {
1013 return clang::Selector::getEmptyMarker();
1014 }
1015
1016 static clang::Selector getTombstoneKey() {
1017 return clang::Selector::getTombstoneMarker();
1018 }
1019
1020 static unsigned getHashValue(clang::Selector S);
1021
1022 static bool isEqual(clang::Selector LHS, clang::Selector RHS) {
1023 return LHS == RHS;
1024 }
1025 };
1026
1027 template<>
1028 struct PointerLikeTypeTraits<clang::Selector> {
1029 static const void *getAsVoidPointer(clang::Selector P) {
1030 return P.getAsOpaquePtr();
1031 }
1032
1033 static clang::Selector getFromVoidPointer(const void *P) {
1034 return clang::Selector(reinterpret_cast<uintptr_t>(P));
1035 }
1036
1037 static constexpr int NumLowBitsAvailable = 0;
1038 };
1039
1040 // Provide PointerLikeTypeTraits for IdentifierInfo pointers, which
1041 // are not guaranteed to be 8-byte aligned.
1042 template<>
1043 struct PointerLikeTypeTraits<clang::IdentifierInfo*> {
1044 static void *getAsVoidPointer(clang::IdentifierInfo* P) {
1045 return P;
1046 }
1047
1048 static clang::IdentifierInfo *getFromVoidPointer(void *P) {
1049 return static_cast<clang::IdentifierInfo*>(P);
1050 }
1051
1052 static constexpr int NumLowBitsAvailable = 1;
1053 };
1054
1055 template<>
1056 struct PointerLikeTypeTraits<const clang::IdentifierInfo*> {
1057 static const void *getAsVoidPointer(const clang::IdentifierInfo* P) {
1058 return P;
1059 }
1060
1061 static const clang::IdentifierInfo *getFromVoidPointer(const void *P) {
1062 return static_cast<const clang::IdentifierInfo*>(P);
1063 }
1064
1065 static constexpr int NumLowBitsAvailable = 1;
1066 };
1067
1068 } // namespace llvm
1069
1070 #endif // LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
1071