1 //===--- IdentifierTable.cpp - Hash table for identifier lookup -----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the IdentifierInfo, IdentifierVisitor, and 11 // IdentifierTable interfaces. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Basic/IdentifierTable.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "llvm/ADT/FoldingSet.h" 18 #include "llvm/ADT/DenseMap.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/Support/raw_ostream.h" 21 #include <cstdio> 22 23 using namespace clang; 24 25 //===----------------------------------------------------------------------===// 26 // IdentifierInfo Implementation 27 //===----------------------------------------------------------------------===// 28 29 IdentifierInfo::IdentifierInfo() { 30 TokenID = tok::identifier; 31 ObjCOrBuiltinID = 0; 32 HasMacro = false; 33 IsExtension = false; 34 IsPoisoned = false; 35 IsCPPOperatorKeyword = false; 36 NeedsHandleIdentifier = false; 37 IsFromAST = false; 38 RevertedTokenID = false; 39 FETokenInfo = 0; 40 Entry = 0; 41 } 42 43 //===----------------------------------------------------------------------===// 44 // IdentifierTable Implementation 45 //===----------------------------------------------------------------------===// 46 47 IdentifierIterator::~IdentifierIterator() { } 48 49 IdentifierInfoLookup::~IdentifierInfoLookup() {} 50 51 namespace { 52 /// \brief A simple identifier lookup iterator that represents an 53 /// empty sequence of identifiers. 54 class EmptyLookupIterator : public IdentifierIterator 55 { 56 public: 57 virtual llvm::StringRef Next() { return llvm::StringRef(); } 58 }; 59 } 60 61 IdentifierIterator *IdentifierInfoLookup::getIdentifiers() const { 62 return new EmptyLookupIterator(); 63 } 64 65 ExternalIdentifierLookup::~ExternalIdentifierLookup() {} 66 67 IdentifierTable::IdentifierTable(const LangOptions &LangOpts, 68 IdentifierInfoLookup* externalLookup) 69 : HashTable(8192), // Start with space for 8K identifiers. 70 ExternalLookup(externalLookup) { 71 72 // Populate the identifier table with info about keywords for the current 73 // language. 74 AddKeywords(LangOpts); 75 } 76 77 //===----------------------------------------------------------------------===// 78 // Language Keyword Implementation 79 //===----------------------------------------------------------------------===// 80 81 // Constants for TokenKinds.def 82 namespace { 83 enum { 84 KEYC99 = 0x1, 85 KEYCXX = 0x2, 86 KEYCXX0X = 0x4, 87 KEYGNU = 0x8, 88 KEYMS = 0x10, 89 BOOLSUPPORT = 0x20, 90 KEYALTIVEC = 0x40, 91 KEYNOCXX = 0x80, 92 KEYBORLAND = 0x100, 93 KEYOPENCL = 0x200, 94 KEYALL = 0x3ff 95 }; 96 } 97 98 /// AddKeyword - This method is used to associate a token ID with specific 99 /// identifiers because they are language keywords. This causes the lexer to 100 /// automatically map matching identifiers to specialized token codes. 101 /// 102 /// The C90/C99/CPP/CPP0x flags are set to 2 if the token should be 103 /// enabled in the specified langauge, set to 1 if it is an extension 104 /// in the specified language, and set to 0 if disabled in the 105 /// specified language. 106 static void AddKeyword(llvm::StringRef Keyword, 107 tok::TokenKind TokenCode, unsigned Flags, 108 const LangOptions &LangOpts, IdentifierTable &Table) { 109 unsigned AddResult = 0; 110 if (Flags == KEYALL) AddResult = 2; 111 else if (LangOpts.CPlusPlus && (Flags & KEYCXX)) AddResult = 2; 112 else if (LangOpts.CPlusPlus0x && (Flags & KEYCXX0X)) AddResult = 2; 113 else if (LangOpts.C99 && (Flags & KEYC99)) AddResult = 2; 114 else if (LangOpts.GNUKeywords && (Flags & KEYGNU)) AddResult = 1; 115 else if (LangOpts.Microsoft && (Flags & KEYMS)) AddResult = 1; 116 else if (LangOpts.Borland && (Flags & KEYBORLAND)) AddResult = 1; 117 else if (LangOpts.Bool && (Flags & BOOLSUPPORT)) AddResult = 2; 118 else if (LangOpts.AltiVec && (Flags & KEYALTIVEC)) AddResult = 2; 119 else if (LangOpts.OpenCL && (Flags & KEYOPENCL)) AddResult = 2; 120 else if (!LangOpts.CPlusPlus && (Flags & KEYNOCXX)) AddResult = 2; 121 122 // Don't add this keyword if disabled in this language. 123 if (AddResult == 0) return; 124 125 IdentifierInfo &Info = Table.get(Keyword, TokenCode); 126 Info.setIsExtensionToken(AddResult == 1); 127 } 128 129 /// AddCXXOperatorKeyword - Register a C++ operator keyword alternative 130 /// representations. 131 static void AddCXXOperatorKeyword(llvm::StringRef Keyword, 132 tok::TokenKind TokenCode, 133 IdentifierTable &Table) { 134 IdentifierInfo &Info = Table.get(Keyword, TokenCode); 135 Info.setIsCPlusPlusOperatorKeyword(); 136 } 137 138 /// AddObjCKeyword - Register an Objective-C @keyword like "class" "selector" or 139 /// "property". 140 static void AddObjCKeyword(llvm::StringRef Name, 141 tok::ObjCKeywordKind ObjCID, 142 IdentifierTable &Table) { 143 Table.get(Name).setObjCKeywordID(ObjCID); 144 } 145 146 /// AddKeywords - Add all keywords to the symbol table. 147 /// 148 void IdentifierTable::AddKeywords(const LangOptions &LangOpts) { 149 // Add keywords and tokens for the current language. 150 #define KEYWORD(NAME, FLAGS) \ 151 AddKeyword(llvm::StringRef(#NAME), tok::kw_ ## NAME, \ 152 FLAGS, LangOpts, *this); 153 #define ALIAS(NAME, TOK, FLAGS) \ 154 AddKeyword(llvm::StringRef(NAME), tok::kw_ ## TOK, \ 155 FLAGS, LangOpts, *this); 156 #define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \ 157 if (LangOpts.CXXOperatorNames) \ 158 AddCXXOperatorKeyword(llvm::StringRef(#NAME), tok::ALIAS, *this); 159 #define OBJC1_AT_KEYWORD(NAME) \ 160 if (LangOpts.ObjC1) \ 161 AddObjCKeyword(llvm::StringRef(#NAME), tok::objc_##NAME, *this); 162 #define OBJC2_AT_KEYWORD(NAME) \ 163 if (LangOpts.ObjC2) \ 164 AddObjCKeyword(llvm::StringRef(#NAME), tok::objc_##NAME, *this); 165 #define TESTING_KEYWORD(NAME, FLAGS) 166 #include "clang/Basic/TokenKinds.def" 167 168 if (LangOpts.ParseUnknownAnytype) 169 AddKeyword("__unknown_anytype", tok::kw___unknown_anytype, KEYALL, 170 LangOpts, *this); 171 } 172 173 tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const { 174 // We use a perfect hash function here involving the length of the keyword, 175 // the first and third character. For preprocessor ID's there are no 176 // collisions (if there were, the switch below would complain about duplicate 177 // case values). Note that this depends on 'if' being null terminated. 178 179 #define HASH(LEN, FIRST, THIRD) \ 180 (LEN << 5) + (((FIRST-'a') + (THIRD-'a')) & 31) 181 #define CASE(LEN, FIRST, THIRD, NAME) \ 182 case HASH(LEN, FIRST, THIRD): \ 183 return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME 184 185 unsigned Len = getLength(); 186 if (Len < 2) return tok::pp_not_keyword; 187 const char *Name = getNameStart(); 188 switch (HASH(Len, Name[0], Name[2])) { 189 default: return tok::pp_not_keyword; 190 CASE( 2, 'i', '\0', if); 191 CASE( 4, 'e', 'i', elif); 192 CASE( 4, 'e', 's', else); 193 CASE( 4, 'l', 'n', line); 194 CASE( 4, 's', 'c', sccs); 195 CASE( 5, 'e', 'd', endif); 196 CASE( 5, 'e', 'r', error); 197 CASE( 5, 'i', 'e', ident); 198 CASE( 5, 'i', 'd', ifdef); 199 CASE( 5, 'u', 'd', undef); 200 201 CASE( 6, 'a', 's', assert); 202 CASE( 6, 'd', 'f', define); 203 CASE( 6, 'i', 'n', ifndef); 204 CASE( 6, 'i', 'p', import); 205 CASE( 6, 'p', 'a', pragma); 206 207 CASE( 7, 'd', 'f', defined); 208 CASE( 7, 'i', 'c', include); 209 CASE( 7, 'w', 'r', warning); 210 211 CASE( 8, 'u', 'a', unassert); 212 CASE(12, 'i', 'c', include_next); 213 214 CASE(16, '_', 'i', __include_macros); 215 #undef CASE 216 #undef HASH 217 } 218 } 219 220 //===----------------------------------------------------------------------===// 221 // Stats Implementation 222 //===----------------------------------------------------------------------===// 223 224 /// PrintStats - Print statistics about how well the identifier table is doing 225 /// at hashing identifiers. 226 void IdentifierTable::PrintStats() const { 227 unsigned NumBuckets = HashTable.getNumBuckets(); 228 unsigned NumIdentifiers = HashTable.getNumItems(); 229 unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers; 230 unsigned AverageIdentifierSize = 0; 231 unsigned MaxIdentifierLength = 0; 232 233 // TODO: Figure out maximum times an identifier had to probe for -stats. 234 for (llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator>::const_iterator 235 I = HashTable.begin(), E = HashTable.end(); I != E; ++I) { 236 unsigned IdLen = I->getKeyLength(); 237 AverageIdentifierSize += IdLen; 238 if (MaxIdentifierLength < IdLen) 239 MaxIdentifierLength = IdLen; 240 } 241 242 fprintf(stderr, "\n*** Identifier Table Stats:\n"); 243 fprintf(stderr, "# Identifiers: %d\n", NumIdentifiers); 244 fprintf(stderr, "# Empty Buckets: %d\n", NumEmptyBuckets); 245 fprintf(stderr, "Hash density (#identifiers per bucket): %f\n", 246 NumIdentifiers/(double)NumBuckets); 247 fprintf(stderr, "Ave identifier length: %f\n", 248 (AverageIdentifierSize/(double)NumIdentifiers)); 249 fprintf(stderr, "Max identifier length: %d\n", MaxIdentifierLength); 250 251 // Compute statistics about the memory allocated for identifiers. 252 HashTable.getAllocator().PrintStats(); 253 } 254 255 //===----------------------------------------------------------------------===// 256 // SelectorTable Implementation 257 //===----------------------------------------------------------------------===// 258 259 unsigned llvm::DenseMapInfo<clang::Selector>::getHashValue(clang::Selector S) { 260 return DenseMapInfo<void*>::getHashValue(S.getAsOpaquePtr()); 261 } 262 263 namespace clang { 264 /// MultiKeywordSelector - One of these variable length records is kept for each 265 /// selector containing more than one keyword. We use a folding set 266 /// to unique aggregate names (keyword selectors in ObjC parlance). Access to 267 /// this class is provided strictly through Selector. 268 class MultiKeywordSelector 269 : public DeclarationNameExtra, public llvm::FoldingSetNode { 270 MultiKeywordSelector(unsigned nKeys) { 271 ExtraKindOrNumArgs = NUM_EXTRA_KINDS + nKeys; 272 } 273 public: 274 // Constructor for keyword selectors. 275 MultiKeywordSelector(unsigned nKeys, IdentifierInfo **IIV) { 276 assert((nKeys > 1) && "not a multi-keyword selector"); 277 ExtraKindOrNumArgs = NUM_EXTRA_KINDS + nKeys; 278 279 // Fill in the trailing keyword array. 280 IdentifierInfo **KeyInfo = reinterpret_cast<IdentifierInfo **>(this+1); 281 for (unsigned i = 0; i != nKeys; ++i) 282 KeyInfo[i] = IIV[i]; 283 } 284 285 // getName - Derive the full selector name and return it. 286 std::string getName() const; 287 288 unsigned getNumArgs() const { return ExtraKindOrNumArgs - NUM_EXTRA_KINDS; } 289 290 typedef IdentifierInfo *const *keyword_iterator; 291 keyword_iterator keyword_begin() const { 292 return reinterpret_cast<keyword_iterator>(this+1); 293 } 294 keyword_iterator keyword_end() const { 295 return keyword_begin()+getNumArgs(); 296 } 297 IdentifierInfo *getIdentifierInfoForSlot(unsigned i) const { 298 assert(i < getNumArgs() && "getIdentifierInfoForSlot(): illegal index"); 299 return keyword_begin()[i]; 300 } 301 static void Profile(llvm::FoldingSetNodeID &ID, 302 keyword_iterator ArgTys, unsigned NumArgs) { 303 ID.AddInteger(NumArgs); 304 for (unsigned i = 0; i != NumArgs; ++i) 305 ID.AddPointer(ArgTys[i]); 306 } 307 void Profile(llvm::FoldingSetNodeID &ID) { 308 Profile(ID, keyword_begin(), getNumArgs()); 309 } 310 }; 311 } // end namespace clang. 312 313 unsigned Selector::getNumArgs() const { 314 unsigned IIF = getIdentifierInfoFlag(); 315 if (IIF == ZeroArg) 316 return 0; 317 if (IIF == OneArg) 318 return 1; 319 // We point to a MultiKeywordSelector (pointer doesn't contain any flags). 320 MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr); 321 return SI->getNumArgs(); 322 } 323 324 IdentifierInfo *Selector::getIdentifierInfoForSlot(unsigned argIndex) const { 325 if (getIdentifierInfoFlag()) { 326 assert(argIndex == 0 && "illegal keyword index"); 327 return getAsIdentifierInfo(); 328 } 329 // We point to a MultiKeywordSelector (pointer doesn't contain any flags). 330 MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr); 331 return SI->getIdentifierInfoForSlot(argIndex); 332 } 333 334 llvm::StringRef Selector::getNameForSlot(unsigned int argIndex) const { 335 IdentifierInfo *II = getIdentifierInfoForSlot(argIndex); 336 return II? II->getName() : llvm::StringRef(); 337 } 338 339 std::string MultiKeywordSelector::getName() const { 340 llvm::SmallString<256> Str; 341 llvm::raw_svector_ostream OS(Str); 342 for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) { 343 if (*I) 344 OS << (*I)->getName(); 345 OS << ':'; 346 } 347 348 return OS.str(); 349 } 350 351 std::string Selector::getAsString() const { 352 if (InfoPtr == 0) 353 return "<null selector>"; 354 355 if (InfoPtr & ArgFlags) { 356 IdentifierInfo *II = getAsIdentifierInfo(); 357 358 // If the number of arguments is 0 then II is guaranteed to not be null. 359 if (getNumArgs() == 0) 360 return II->getName(); 361 362 if (!II) 363 return ":"; 364 365 return II->getName().str() + ":"; 366 } 367 368 // We have a multiple keyword selector (no embedded flags). 369 return reinterpret_cast<MultiKeywordSelector *>(InfoPtr)->getName(); 370 } 371 372 /// Interpreting the given string using the normal CamelCase 373 /// conventions, determine whether the given string starts with the 374 /// given "word", which is assumed to end in a lowercase letter. 375 static bool startsWithWord(llvm::StringRef name, llvm::StringRef word) { 376 if (name.size() < word.size()) return false; 377 return ((name.size() == word.size() || 378 !islower(name[word.size()])) 379 && name.startswith(word)); 380 } 381 382 ObjCMethodFamily Selector::getMethodFamilyImpl(Selector sel) { 383 IdentifierInfo *first = sel.getIdentifierInfoForSlot(0); 384 if (!first) return OMF_None; 385 386 llvm::StringRef name = first->getName(); 387 if (sel.isUnarySelector()) { 388 if (name == "autorelease") return OMF_autorelease; 389 if (name == "dealloc") return OMF_dealloc; 390 if (name == "release") return OMF_release; 391 if (name == "retain") return OMF_retain; 392 if (name == "retainCount") return OMF_retainCount; 393 } 394 395 // The other method families may begin with a prefix of underscores. 396 while (!name.empty() && name.front() == '_') 397 name = name.substr(1); 398 399 if (name.empty()) return OMF_None; 400 switch (name.front()) { 401 case 'a': 402 if (startsWithWord(name, "alloc")) return OMF_alloc; 403 break; 404 case 'c': 405 if (startsWithWord(name, "copy")) return OMF_copy; 406 break; 407 case 'i': 408 if (startsWithWord(name, "init")) return OMF_init; 409 break; 410 case 'm': 411 if (startsWithWord(name, "mutableCopy")) return OMF_mutableCopy; 412 break; 413 case 'n': 414 if (startsWithWord(name, "new")) return OMF_new; 415 break; 416 default: 417 break; 418 } 419 420 return OMF_None; 421 } 422 423 namespace { 424 struct SelectorTableImpl { 425 llvm::FoldingSet<MultiKeywordSelector> Table; 426 llvm::BumpPtrAllocator Allocator; 427 }; 428 } // end anonymous namespace. 429 430 static SelectorTableImpl &getSelectorTableImpl(void *P) { 431 return *static_cast<SelectorTableImpl*>(P); 432 } 433 434 435 Selector SelectorTable::getSelector(unsigned nKeys, IdentifierInfo **IIV) { 436 if (nKeys < 2) 437 return Selector(IIV[0], nKeys); 438 439 SelectorTableImpl &SelTabImpl = getSelectorTableImpl(Impl); 440 441 // Unique selector, to guarantee there is one per name. 442 llvm::FoldingSetNodeID ID; 443 MultiKeywordSelector::Profile(ID, IIV, nKeys); 444 445 void *InsertPos = 0; 446 if (MultiKeywordSelector *SI = 447 SelTabImpl.Table.FindNodeOrInsertPos(ID, InsertPos)) 448 return Selector(SI); 449 450 // MultiKeywordSelector objects are not allocated with new because they have a 451 // variable size array (for parameter types) at the end of them. 452 unsigned Size = sizeof(MultiKeywordSelector) + nKeys*sizeof(IdentifierInfo *); 453 MultiKeywordSelector *SI = 454 (MultiKeywordSelector*)SelTabImpl.Allocator.Allocate(Size, 455 llvm::alignOf<MultiKeywordSelector>()); 456 new (SI) MultiKeywordSelector(nKeys, IIV); 457 SelTabImpl.Table.InsertNode(SI, InsertPos); 458 return Selector(SI); 459 } 460 461 SelectorTable::SelectorTable() { 462 Impl = new SelectorTableImpl(); 463 } 464 465 SelectorTable::~SelectorTable() { 466 delete &getSelectorTableImpl(Impl); 467 } 468 469 const char *clang::getOperatorSpelling(OverloadedOperatorKind Operator) { 470 switch (Operator) { 471 case OO_None: 472 case NUM_OVERLOADED_OPERATORS: 473 return 0; 474 475 #define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \ 476 case OO_##Name: return Spelling; 477 #include "clang/Basic/OperatorKinds.def" 478 } 479 480 return 0; 481 } 482 483