1f22ef01cSRoman Divacky //===--- StringMap.cpp - String Hash table map implementation -------------===//
2f22ef01cSRoman Divacky //
3f22ef01cSRoman Divacky //                     The LLVM Compiler Infrastructure
4f22ef01cSRoman Divacky //
5f22ef01cSRoman Divacky // This file is distributed under the University of Illinois Open Source
6f22ef01cSRoman Divacky // License. See LICENSE.TXT for details.
7f22ef01cSRoman Divacky //
8f22ef01cSRoman Divacky //===----------------------------------------------------------------------===//
9f22ef01cSRoman Divacky //
10f22ef01cSRoman Divacky // This file implements the StringMap class.
11f22ef01cSRoman Divacky //
12f22ef01cSRoman Divacky //===----------------------------------------------------------------------===//
13f22ef01cSRoman Divacky 
14f22ef01cSRoman Divacky #include "llvm/ADT/StringMap.h"
15f22ef01cSRoman Divacky #include "llvm/ADT/StringExtras.h"
163861d79fSDimitry Andric #include "llvm/Support/Compiler.h"
17*4ba319b5SDimitry Andric #include "llvm/Support/DJB.h"
18d88c1a5aSDimitry Andric #include "llvm/Support/MathExtras.h"
19f22ef01cSRoman Divacky #include <cassert>
20d88c1a5aSDimitry Andric 
21f22ef01cSRoman Divacky using namespace llvm;
22f22ef01cSRoman Divacky 
233ca95b02SDimitry Andric /// Returns the number of buckets to allocate to ensure that the DenseMap can
243ca95b02SDimitry Andric /// accommodate \p NumEntries without need to grow().
getMinBucketToReserveForEntries(unsigned NumEntries)253ca95b02SDimitry Andric static unsigned getMinBucketToReserveForEntries(unsigned NumEntries) {
263ca95b02SDimitry Andric   // Ensure that "NumEntries * 4 < NumBuckets * 3"
273ca95b02SDimitry Andric   if (NumEntries == 0)
283ca95b02SDimitry Andric     return 0;
293ca95b02SDimitry Andric   // +1 is required because of the strict equality.
303ca95b02SDimitry Andric   // For example if NumEntries is 48, we need to return 401.
313ca95b02SDimitry Andric   return NextPowerOf2(NumEntries * 4 / 3 + 1);
323ca95b02SDimitry Andric }
333ca95b02SDimitry Andric 
StringMapImpl(unsigned InitSize,unsigned itemSize)34f22ef01cSRoman Divacky StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
35f22ef01cSRoman Divacky   ItemSize = itemSize;
36f22ef01cSRoman Divacky 
37f22ef01cSRoman Divacky   // If a size is specified, initialize the table with that many buckets.
38f22ef01cSRoman Divacky   if (InitSize) {
393ca95b02SDimitry Andric     // The table will grow when the number of entries reach 3/4 of the number of
403ca95b02SDimitry Andric     // buckets. To guarantee that "InitSize" number of entries can be inserted
413ca95b02SDimitry Andric     // in the table without growing, we allocate just what is needed here.
423ca95b02SDimitry Andric     init(getMinBucketToReserveForEntries(InitSize));
43f22ef01cSRoman Divacky     return;
44f22ef01cSRoman Divacky   }
45f22ef01cSRoman Divacky 
46f22ef01cSRoman Divacky   // Otherwise, initialize it with zero buckets to avoid the allocation.
4791bc56edSDimitry Andric   TheTable = nullptr;
48f22ef01cSRoman Divacky   NumBuckets = 0;
49f22ef01cSRoman Divacky   NumItems = 0;
50f22ef01cSRoman Divacky   NumTombstones = 0;
51f22ef01cSRoman Divacky }
52f22ef01cSRoman Divacky 
init(unsigned InitSize)53f22ef01cSRoman Divacky void StringMapImpl::init(unsigned InitSize) {
54f22ef01cSRoman Divacky   assert((InitSize & (InitSize-1)) == 0 &&
55f22ef01cSRoman Divacky          "Init Size must be a power of 2 or zero!");
562cab237bSDimitry Andric 
572cab237bSDimitry Andric   unsigned NewNumBuckets = InitSize ? InitSize : 16;
58f22ef01cSRoman Divacky   NumItems = 0;
59f22ef01cSRoman Divacky   NumTombstones = 0;
60f22ef01cSRoman Divacky 
61*4ba319b5SDimitry Andric   TheTable = static_cast<StringMapEntryBase **>(
62*4ba319b5SDimitry Andric       safe_calloc(NewNumBuckets+1,
63*4ba319b5SDimitry Andric                   sizeof(StringMapEntryBase **) + sizeof(unsigned)));
642cab237bSDimitry Andric 
652cab237bSDimitry Andric   // Set the member only if TheTable was successfully allocated
662cab237bSDimitry Andric   NumBuckets = NewNumBuckets;
672cab237bSDimitry Andric 
68f22ef01cSRoman Divacky   // Allocate one extra bucket, set it to look filled so the iterators stop at
69f22ef01cSRoman Divacky   // end.
70dff0c46cSDimitry Andric   TheTable[NumBuckets] = (StringMapEntryBase*)2;
71f22ef01cSRoman Divacky }
72f22ef01cSRoman Divacky 
73f22ef01cSRoman Divacky /// LookupBucketFor - Look up the bucket that the specified string should end
74f22ef01cSRoman Divacky /// up in.  If it already exists as a key in the map, the Item pointer for the
75f22ef01cSRoman Divacky /// specified bucket will be non-null.  Otherwise, it will be null.  In either
76f22ef01cSRoman Divacky /// case, the FullHashValue field of the bucket will be set to the hash value
77f22ef01cSRoman Divacky /// of the string.
LookupBucketFor(StringRef Name)78f22ef01cSRoman Divacky unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
79f22ef01cSRoman Divacky   unsigned HTSize = NumBuckets;
80f22ef01cSRoman Divacky   if (HTSize == 0) {  // Hash table unallocated so far?
81f22ef01cSRoman Divacky     init(16);
82f22ef01cSRoman Divacky     HTSize = NumBuckets;
83f22ef01cSRoman Divacky   }
84*4ba319b5SDimitry Andric   unsigned FullHashValue = djbHash(Name, 0);
85f22ef01cSRoman Divacky   unsigned BucketNo = FullHashValue & (HTSize-1);
86dff0c46cSDimitry Andric   unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
87f22ef01cSRoman Divacky 
88f22ef01cSRoman Divacky   unsigned ProbeAmt = 1;
89f22ef01cSRoman Divacky   int FirstTombstone = -1;
90d88c1a5aSDimitry Andric   while (true) {
91dff0c46cSDimitry Andric     StringMapEntryBase *BucketItem = TheTable[BucketNo];
92f22ef01cSRoman Divacky     // If we found an empty bucket, this key isn't in the table yet, return it.
9391bc56edSDimitry Andric     if (LLVM_LIKELY(!BucketItem)) {
94f22ef01cSRoman Divacky       // If we found a tombstone, we want to reuse the tombstone instead of an
95f22ef01cSRoman Divacky       // empty bucket.  This reduces probing.
96f22ef01cSRoman Divacky       if (FirstTombstone != -1) {
97dff0c46cSDimitry Andric         HashTable[FirstTombstone] = FullHashValue;
98f22ef01cSRoman Divacky         return FirstTombstone;
99f22ef01cSRoman Divacky       }
100f22ef01cSRoman Divacky 
101dff0c46cSDimitry Andric       HashTable[BucketNo] = FullHashValue;
102f22ef01cSRoman Divacky       return BucketNo;
103f22ef01cSRoman Divacky     }
104f22ef01cSRoman Divacky 
105f22ef01cSRoman Divacky     if (BucketItem == getTombstoneVal()) {
106f22ef01cSRoman Divacky       // Skip over tombstones.  However, remember the first one we see.
107f22ef01cSRoman Divacky       if (FirstTombstone == -1) FirstTombstone = BucketNo;
1083861d79fSDimitry Andric     } else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) {
109f22ef01cSRoman Divacky       // If the full hash value matches, check deeply for a match.  The common
110f22ef01cSRoman Divacky       // case here is that we are only looking at the buckets (for item info
111f22ef01cSRoman Divacky       // being non-null and for the full hash value) not at the items.  This
112f22ef01cSRoman Divacky       // is important for cache locality.
113f22ef01cSRoman Divacky 
114f22ef01cSRoman Divacky       // Do the comparison like this because Name isn't necessarily
115f22ef01cSRoman Divacky       // null-terminated!
116f22ef01cSRoman Divacky       char *ItemStr = (char*)BucketItem+ItemSize;
117f22ef01cSRoman Divacky       if (Name == StringRef(ItemStr, BucketItem->getKeyLength())) {
118f22ef01cSRoman Divacky         // We found a match!
119f22ef01cSRoman Divacky         return BucketNo;
120f22ef01cSRoman Divacky       }
121f22ef01cSRoman Divacky     }
122f22ef01cSRoman Divacky 
123f22ef01cSRoman Divacky     // Okay, we didn't find the item.  Probe to the next bucket.
124f22ef01cSRoman Divacky     BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
125f22ef01cSRoman Divacky 
126f22ef01cSRoman Divacky     // Use quadratic probing, it has fewer clumping artifacts than linear
127f22ef01cSRoman Divacky     // probing and has good cache behavior in the common case.
128f22ef01cSRoman Divacky     ++ProbeAmt;
129f22ef01cSRoman Divacky   }
130f22ef01cSRoman Divacky }
131f22ef01cSRoman Divacky 
132f22ef01cSRoman Divacky /// FindKey - Look up the bucket that contains the specified key. If it exists
133f22ef01cSRoman Divacky /// in the map, return the bucket number of the key.  Otherwise return -1.
134f22ef01cSRoman Divacky /// This does not modify the map.
FindKey(StringRef Key) const135f22ef01cSRoman Divacky int StringMapImpl::FindKey(StringRef Key) const {
136f22ef01cSRoman Divacky   unsigned HTSize = NumBuckets;
137f22ef01cSRoman Divacky   if (HTSize == 0) return -1;  // Really empty table?
138*4ba319b5SDimitry Andric   unsigned FullHashValue = djbHash(Key, 0);
139f22ef01cSRoman Divacky   unsigned BucketNo = FullHashValue & (HTSize-1);
140dff0c46cSDimitry Andric   unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
141f22ef01cSRoman Divacky 
142f22ef01cSRoman Divacky   unsigned ProbeAmt = 1;
143d88c1a5aSDimitry Andric   while (true) {
144dff0c46cSDimitry Andric     StringMapEntryBase *BucketItem = TheTable[BucketNo];
145f22ef01cSRoman Divacky     // If we found an empty bucket, this key isn't in the table yet, return.
14691bc56edSDimitry Andric     if (LLVM_LIKELY(!BucketItem))
147f22ef01cSRoman Divacky       return -1;
148f22ef01cSRoman Divacky 
149f22ef01cSRoman Divacky     if (BucketItem == getTombstoneVal()) {
150f22ef01cSRoman Divacky       // Ignore tombstones.
1513861d79fSDimitry Andric     } else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) {
152f22ef01cSRoman Divacky       // If the full hash value matches, check deeply for a match.  The common
153f22ef01cSRoman Divacky       // case here is that we are only looking at the buckets (for item info
154f22ef01cSRoman Divacky       // being non-null and for the full hash value) not at the items.  This
155f22ef01cSRoman Divacky       // is important for cache locality.
156f22ef01cSRoman Divacky 
157f22ef01cSRoman Divacky       // Do the comparison like this because NameStart isn't necessarily
158f22ef01cSRoman Divacky       // null-terminated!
159f22ef01cSRoman Divacky       char *ItemStr = (char*)BucketItem+ItemSize;
160f22ef01cSRoman Divacky       if (Key == StringRef(ItemStr, BucketItem->getKeyLength())) {
161f22ef01cSRoman Divacky         // We found a match!
162f22ef01cSRoman Divacky         return BucketNo;
163f22ef01cSRoman Divacky       }
164f22ef01cSRoman Divacky     }
165f22ef01cSRoman Divacky 
166f22ef01cSRoman Divacky     // Okay, we didn't find the item.  Probe to the next bucket.
167f22ef01cSRoman Divacky     BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
168f22ef01cSRoman Divacky 
169f22ef01cSRoman Divacky     // Use quadratic probing, it has fewer clumping artifacts than linear
170f22ef01cSRoman Divacky     // probing and has good cache behavior in the common case.
171f22ef01cSRoman Divacky     ++ProbeAmt;
172f22ef01cSRoman Divacky   }
173f22ef01cSRoman Divacky }
174f22ef01cSRoman Divacky 
175f22ef01cSRoman Divacky /// RemoveKey - Remove the specified StringMapEntry from the table, but do not
176f22ef01cSRoman Divacky /// delete it.  This aborts if the value isn't in the table.
RemoveKey(StringMapEntryBase * V)177f22ef01cSRoman Divacky void StringMapImpl::RemoveKey(StringMapEntryBase *V) {
178f22ef01cSRoman Divacky   const char *VStr = (char*)V + ItemSize;
179f22ef01cSRoman Divacky   StringMapEntryBase *V2 = RemoveKey(StringRef(VStr, V->getKeyLength()));
1802754fe60SDimitry Andric   (void)V2;
181f22ef01cSRoman Divacky   assert(V == V2 && "Didn't find key?");
182f22ef01cSRoman Divacky }
183f22ef01cSRoman Divacky 
184f22ef01cSRoman Divacky /// RemoveKey - Remove the StringMapEntry for the specified key from the
185f22ef01cSRoman Divacky /// table, returning it.  If the key is not in the table, this returns null.
RemoveKey(StringRef Key)186f22ef01cSRoman Divacky StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
187f22ef01cSRoman Divacky   int Bucket = FindKey(Key);
18891bc56edSDimitry Andric   if (Bucket == -1) return nullptr;
189f22ef01cSRoman Divacky 
190dff0c46cSDimitry Andric   StringMapEntryBase *Result = TheTable[Bucket];
191dff0c46cSDimitry Andric   TheTable[Bucket] = getTombstoneVal();
192f22ef01cSRoman Divacky   --NumItems;
193f22ef01cSRoman Divacky   ++NumTombstones;
1943b0f4066SDimitry Andric   assert(NumItems + NumTombstones <= NumBuckets);
1953b0f4066SDimitry Andric 
196f22ef01cSRoman Divacky   return Result;
197f22ef01cSRoman Divacky }
198f22ef01cSRoman Divacky 
199f22ef01cSRoman Divacky /// RehashTable - Grow the table, redistributing values into the buckets with
200f22ef01cSRoman Divacky /// the appropriate mod-of-hashtable-size.
RehashTable(unsigned BucketNo)20191bc56edSDimitry Andric unsigned StringMapImpl::RehashTable(unsigned BucketNo) {
2023b0f4066SDimitry Andric   unsigned NewSize;
203dff0c46cSDimitry Andric   unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
2043b0f4066SDimitry Andric 
2053b0f4066SDimitry Andric   // If the hash table is now more than 3/4 full, or if fewer than 1/8 of
2063b0f4066SDimitry Andric   // the buckets are empty (meaning that many are filled with tombstones),
2073b0f4066SDimitry Andric   // grow/rehash the table.
208ff0cc061SDimitry Andric   if (LLVM_UNLIKELY(NumItems * 4 > NumBuckets * 3)) {
2093b0f4066SDimitry Andric     NewSize = NumBuckets*2;
210ff0cc061SDimitry Andric   } else if (LLVM_UNLIKELY(NumBuckets - (NumItems + NumTombstones) <=
211ff0cc061SDimitry Andric                            NumBuckets / 8)) {
2123b0f4066SDimitry Andric     NewSize = NumBuckets;
2133b0f4066SDimitry Andric   } else {
21491bc56edSDimitry Andric     return BucketNo;
2153b0f4066SDimitry Andric   }
2163b0f4066SDimitry Andric 
21791bc56edSDimitry Andric   unsigned NewBucketNo = BucketNo;
218f22ef01cSRoman Divacky   // Allocate one extra bucket which will always be non-empty.  This allows the
219f22ef01cSRoman Divacky   // iterators to stop at end.
220*4ba319b5SDimitry Andric   auto NewTableArray = static_cast<StringMapEntryBase **>(
221*4ba319b5SDimitry Andric       safe_calloc(NewSize+1, sizeof(StringMapEntryBase *) + sizeof(unsigned)));
2222cab237bSDimitry Andric 
223dff0c46cSDimitry Andric   unsigned *NewHashArray = (unsigned *)(NewTableArray + NewSize + 1);
224dff0c46cSDimitry Andric   NewTableArray[NewSize] = (StringMapEntryBase*)2;
225f22ef01cSRoman Divacky 
226f22ef01cSRoman Divacky   // Rehash all the items into their new buckets.  Luckily :) we already have
227f22ef01cSRoman Divacky   // the hash values available, so we don't have to rehash any strings.
228dff0c46cSDimitry Andric   for (unsigned I = 0, E = NumBuckets; I != E; ++I) {
229dff0c46cSDimitry Andric     StringMapEntryBase *Bucket = TheTable[I];
230dff0c46cSDimitry Andric     if (Bucket && Bucket != getTombstoneVal()) {
231f22ef01cSRoman Divacky       // Fast case, bucket available.
232dff0c46cSDimitry Andric       unsigned FullHash = HashTable[I];
233f22ef01cSRoman Divacky       unsigned NewBucket = FullHash & (NewSize-1);
23491bc56edSDimitry Andric       if (!NewTableArray[NewBucket]) {
235dff0c46cSDimitry Andric         NewTableArray[FullHash & (NewSize-1)] = Bucket;
236dff0c46cSDimitry Andric         NewHashArray[FullHash & (NewSize-1)] = FullHash;
23791bc56edSDimitry Andric         if (I == BucketNo)
23891bc56edSDimitry Andric           NewBucketNo = NewBucket;
239f22ef01cSRoman Divacky         continue;
240f22ef01cSRoman Divacky       }
241f22ef01cSRoman Divacky 
242f22ef01cSRoman Divacky       // Otherwise probe for a spot.
243f22ef01cSRoman Divacky       unsigned ProbeSize = 1;
244f22ef01cSRoman Divacky       do {
245f22ef01cSRoman Divacky         NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
246dff0c46cSDimitry Andric       } while (NewTableArray[NewBucket]);
247f22ef01cSRoman Divacky 
248f22ef01cSRoman Divacky       // Finally found a slot.  Fill it in.
249dff0c46cSDimitry Andric       NewTableArray[NewBucket] = Bucket;
250dff0c46cSDimitry Andric       NewHashArray[NewBucket] = FullHash;
25191bc56edSDimitry Andric       if (I == BucketNo)
25291bc56edSDimitry Andric         NewBucketNo = NewBucket;
253f22ef01cSRoman Divacky     }
254f22ef01cSRoman Divacky   }
255f22ef01cSRoman Divacky 
256f22ef01cSRoman Divacky   free(TheTable);
257f22ef01cSRoman Divacky 
258f22ef01cSRoman Divacky   TheTable = NewTableArray;
259f22ef01cSRoman Divacky   NumBuckets = NewSize;
2603b0f4066SDimitry Andric   NumTombstones = 0;
26191bc56edSDimitry Andric   return NewBucketNo;
262f22ef01cSRoman Divacky }
263