1f22ef01cSRoman Divacky //===--- StringMap.cpp - String Hash table map implementation -------------===//
2f22ef01cSRoman Divacky //
3f22ef01cSRoman Divacky // The LLVM Compiler Infrastructure
4f22ef01cSRoman Divacky //
5f22ef01cSRoman Divacky // This file is distributed under the University of Illinois Open Source
6f22ef01cSRoman Divacky // License. See LICENSE.TXT for details.
7f22ef01cSRoman Divacky //
8f22ef01cSRoman Divacky //===----------------------------------------------------------------------===//
9f22ef01cSRoman Divacky //
10f22ef01cSRoman Divacky // This file implements the StringMap class.
11f22ef01cSRoman Divacky //
12f22ef01cSRoman Divacky //===----------------------------------------------------------------------===//
13f22ef01cSRoman Divacky
14f22ef01cSRoman Divacky #include "llvm/ADT/StringMap.h"
15f22ef01cSRoman Divacky #include "llvm/ADT/StringExtras.h"
163861d79fSDimitry Andric #include "llvm/Support/Compiler.h"
17*4ba319b5SDimitry Andric #include "llvm/Support/DJB.h"
18d88c1a5aSDimitry Andric #include "llvm/Support/MathExtras.h"
19f22ef01cSRoman Divacky #include <cassert>
20d88c1a5aSDimitry Andric
21f22ef01cSRoman Divacky using namespace llvm;
22f22ef01cSRoman Divacky
233ca95b02SDimitry Andric /// Returns the number of buckets to allocate to ensure that the DenseMap can
243ca95b02SDimitry Andric /// accommodate \p NumEntries without need to grow().
getMinBucketToReserveForEntries(unsigned NumEntries)253ca95b02SDimitry Andric static unsigned getMinBucketToReserveForEntries(unsigned NumEntries) {
263ca95b02SDimitry Andric // Ensure that "NumEntries * 4 < NumBuckets * 3"
273ca95b02SDimitry Andric if (NumEntries == 0)
283ca95b02SDimitry Andric return 0;
293ca95b02SDimitry Andric // +1 is required because of the strict equality.
303ca95b02SDimitry Andric // For example if NumEntries is 48, we need to return 401.
313ca95b02SDimitry Andric return NextPowerOf2(NumEntries * 4 / 3 + 1);
323ca95b02SDimitry Andric }
333ca95b02SDimitry Andric
StringMapImpl(unsigned InitSize,unsigned itemSize)34f22ef01cSRoman Divacky StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
35f22ef01cSRoman Divacky ItemSize = itemSize;
36f22ef01cSRoman Divacky
37f22ef01cSRoman Divacky // If a size is specified, initialize the table with that many buckets.
38f22ef01cSRoman Divacky if (InitSize) {
393ca95b02SDimitry Andric // The table will grow when the number of entries reach 3/4 of the number of
403ca95b02SDimitry Andric // buckets. To guarantee that "InitSize" number of entries can be inserted
413ca95b02SDimitry Andric // in the table without growing, we allocate just what is needed here.
423ca95b02SDimitry Andric init(getMinBucketToReserveForEntries(InitSize));
43f22ef01cSRoman Divacky return;
44f22ef01cSRoman Divacky }
45f22ef01cSRoman Divacky
46f22ef01cSRoman Divacky // Otherwise, initialize it with zero buckets to avoid the allocation.
4791bc56edSDimitry Andric TheTable = nullptr;
48f22ef01cSRoman Divacky NumBuckets = 0;
49f22ef01cSRoman Divacky NumItems = 0;
50f22ef01cSRoman Divacky NumTombstones = 0;
51f22ef01cSRoman Divacky }
52f22ef01cSRoman Divacky
init(unsigned InitSize)53f22ef01cSRoman Divacky void StringMapImpl::init(unsigned InitSize) {
54f22ef01cSRoman Divacky assert((InitSize & (InitSize-1)) == 0 &&
55f22ef01cSRoman Divacky "Init Size must be a power of 2 or zero!");
562cab237bSDimitry Andric
572cab237bSDimitry Andric unsigned NewNumBuckets = InitSize ? InitSize : 16;
58f22ef01cSRoman Divacky NumItems = 0;
59f22ef01cSRoman Divacky NumTombstones = 0;
60f22ef01cSRoman Divacky
61*4ba319b5SDimitry Andric TheTable = static_cast<StringMapEntryBase **>(
62*4ba319b5SDimitry Andric safe_calloc(NewNumBuckets+1,
63*4ba319b5SDimitry Andric sizeof(StringMapEntryBase **) + sizeof(unsigned)));
642cab237bSDimitry Andric
652cab237bSDimitry Andric // Set the member only if TheTable was successfully allocated
662cab237bSDimitry Andric NumBuckets = NewNumBuckets;
672cab237bSDimitry Andric
68f22ef01cSRoman Divacky // Allocate one extra bucket, set it to look filled so the iterators stop at
69f22ef01cSRoman Divacky // end.
70dff0c46cSDimitry Andric TheTable[NumBuckets] = (StringMapEntryBase*)2;
71f22ef01cSRoman Divacky }
72f22ef01cSRoman Divacky
73f22ef01cSRoman Divacky /// LookupBucketFor - Look up the bucket that the specified string should end
74f22ef01cSRoman Divacky /// up in. If it already exists as a key in the map, the Item pointer for the
75f22ef01cSRoman Divacky /// specified bucket will be non-null. Otherwise, it will be null. In either
76f22ef01cSRoman Divacky /// case, the FullHashValue field of the bucket will be set to the hash value
77f22ef01cSRoman Divacky /// of the string.
LookupBucketFor(StringRef Name)78f22ef01cSRoman Divacky unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
79f22ef01cSRoman Divacky unsigned HTSize = NumBuckets;
80f22ef01cSRoman Divacky if (HTSize == 0) { // Hash table unallocated so far?
81f22ef01cSRoman Divacky init(16);
82f22ef01cSRoman Divacky HTSize = NumBuckets;
83f22ef01cSRoman Divacky }
84*4ba319b5SDimitry Andric unsigned FullHashValue = djbHash(Name, 0);
85f22ef01cSRoman Divacky unsigned BucketNo = FullHashValue & (HTSize-1);
86dff0c46cSDimitry Andric unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
87f22ef01cSRoman Divacky
88f22ef01cSRoman Divacky unsigned ProbeAmt = 1;
89f22ef01cSRoman Divacky int FirstTombstone = -1;
90d88c1a5aSDimitry Andric while (true) {
91dff0c46cSDimitry Andric StringMapEntryBase *BucketItem = TheTable[BucketNo];
92f22ef01cSRoman Divacky // If we found an empty bucket, this key isn't in the table yet, return it.
9391bc56edSDimitry Andric if (LLVM_LIKELY(!BucketItem)) {
94f22ef01cSRoman Divacky // If we found a tombstone, we want to reuse the tombstone instead of an
95f22ef01cSRoman Divacky // empty bucket. This reduces probing.
96f22ef01cSRoman Divacky if (FirstTombstone != -1) {
97dff0c46cSDimitry Andric HashTable[FirstTombstone] = FullHashValue;
98f22ef01cSRoman Divacky return FirstTombstone;
99f22ef01cSRoman Divacky }
100f22ef01cSRoman Divacky
101dff0c46cSDimitry Andric HashTable[BucketNo] = FullHashValue;
102f22ef01cSRoman Divacky return BucketNo;
103f22ef01cSRoman Divacky }
104f22ef01cSRoman Divacky
105f22ef01cSRoman Divacky if (BucketItem == getTombstoneVal()) {
106f22ef01cSRoman Divacky // Skip over tombstones. However, remember the first one we see.
107f22ef01cSRoman Divacky if (FirstTombstone == -1) FirstTombstone = BucketNo;
1083861d79fSDimitry Andric } else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) {
109f22ef01cSRoman Divacky // If the full hash value matches, check deeply for a match. The common
110f22ef01cSRoman Divacky // case here is that we are only looking at the buckets (for item info
111f22ef01cSRoman Divacky // being non-null and for the full hash value) not at the items. This
112f22ef01cSRoman Divacky // is important for cache locality.
113f22ef01cSRoman Divacky
114f22ef01cSRoman Divacky // Do the comparison like this because Name isn't necessarily
115f22ef01cSRoman Divacky // null-terminated!
116f22ef01cSRoman Divacky char *ItemStr = (char*)BucketItem+ItemSize;
117f22ef01cSRoman Divacky if (Name == StringRef(ItemStr, BucketItem->getKeyLength())) {
118f22ef01cSRoman Divacky // We found a match!
119f22ef01cSRoman Divacky return BucketNo;
120f22ef01cSRoman Divacky }
121f22ef01cSRoman Divacky }
122f22ef01cSRoman Divacky
123f22ef01cSRoman Divacky // Okay, we didn't find the item. Probe to the next bucket.
124f22ef01cSRoman Divacky BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
125f22ef01cSRoman Divacky
126f22ef01cSRoman Divacky // Use quadratic probing, it has fewer clumping artifacts than linear
127f22ef01cSRoman Divacky // probing and has good cache behavior in the common case.
128f22ef01cSRoman Divacky ++ProbeAmt;
129f22ef01cSRoman Divacky }
130f22ef01cSRoman Divacky }
131f22ef01cSRoman Divacky
132f22ef01cSRoman Divacky /// FindKey - Look up the bucket that contains the specified key. If it exists
133f22ef01cSRoman Divacky /// in the map, return the bucket number of the key. Otherwise return -1.
134f22ef01cSRoman Divacky /// This does not modify the map.
FindKey(StringRef Key) const135f22ef01cSRoman Divacky int StringMapImpl::FindKey(StringRef Key) const {
136f22ef01cSRoman Divacky unsigned HTSize = NumBuckets;
137f22ef01cSRoman Divacky if (HTSize == 0) return -1; // Really empty table?
138*4ba319b5SDimitry Andric unsigned FullHashValue = djbHash(Key, 0);
139f22ef01cSRoman Divacky unsigned BucketNo = FullHashValue & (HTSize-1);
140dff0c46cSDimitry Andric unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
141f22ef01cSRoman Divacky
142f22ef01cSRoman Divacky unsigned ProbeAmt = 1;
143d88c1a5aSDimitry Andric while (true) {
144dff0c46cSDimitry Andric StringMapEntryBase *BucketItem = TheTable[BucketNo];
145f22ef01cSRoman Divacky // If we found an empty bucket, this key isn't in the table yet, return.
14691bc56edSDimitry Andric if (LLVM_LIKELY(!BucketItem))
147f22ef01cSRoman Divacky return -1;
148f22ef01cSRoman Divacky
149f22ef01cSRoman Divacky if (BucketItem == getTombstoneVal()) {
150f22ef01cSRoman Divacky // Ignore tombstones.
1513861d79fSDimitry Andric } else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) {
152f22ef01cSRoman Divacky // If the full hash value matches, check deeply for a match. The common
153f22ef01cSRoman Divacky // case here is that we are only looking at the buckets (for item info
154f22ef01cSRoman Divacky // being non-null and for the full hash value) not at the items. This
155f22ef01cSRoman Divacky // is important for cache locality.
156f22ef01cSRoman Divacky
157f22ef01cSRoman Divacky // Do the comparison like this because NameStart isn't necessarily
158f22ef01cSRoman Divacky // null-terminated!
159f22ef01cSRoman Divacky char *ItemStr = (char*)BucketItem+ItemSize;
160f22ef01cSRoman Divacky if (Key == StringRef(ItemStr, BucketItem->getKeyLength())) {
161f22ef01cSRoman Divacky // We found a match!
162f22ef01cSRoman Divacky return BucketNo;
163f22ef01cSRoman Divacky }
164f22ef01cSRoman Divacky }
165f22ef01cSRoman Divacky
166f22ef01cSRoman Divacky // Okay, we didn't find the item. Probe to the next bucket.
167f22ef01cSRoman Divacky BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
168f22ef01cSRoman Divacky
169f22ef01cSRoman Divacky // Use quadratic probing, it has fewer clumping artifacts than linear
170f22ef01cSRoman Divacky // probing and has good cache behavior in the common case.
171f22ef01cSRoman Divacky ++ProbeAmt;
172f22ef01cSRoman Divacky }
173f22ef01cSRoman Divacky }
174f22ef01cSRoman Divacky
175f22ef01cSRoman Divacky /// RemoveKey - Remove the specified StringMapEntry from the table, but do not
176f22ef01cSRoman Divacky /// delete it. This aborts if the value isn't in the table.
RemoveKey(StringMapEntryBase * V)177f22ef01cSRoman Divacky void StringMapImpl::RemoveKey(StringMapEntryBase *V) {
178f22ef01cSRoman Divacky const char *VStr = (char*)V + ItemSize;
179f22ef01cSRoman Divacky StringMapEntryBase *V2 = RemoveKey(StringRef(VStr, V->getKeyLength()));
1802754fe60SDimitry Andric (void)V2;
181f22ef01cSRoman Divacky assert(V == V2 && "Didn't find key?");
182f22ef01cSRoman Divacky }
183f22ef01cSRoman Divacky
184f22ef01cSRoman Divacky /// RemoveKey - Remove the StringMapEntry for the specified key from the
185f22ef01cSRoman Divacky /// table, returning it. If the key is not in the table, this returns null.
RemoveKey(StringRef Key)186f22ef01cSRoman Divacky StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
187f22ef01cSRoman Divacky int Bucket = FindKey(Key);
18891bc56edSDimitry Andric if (Bucket == -1) return nullptr;
189f22ef01cSRoman Divacky
190dff0c46cSDimitry Andric StringMapEntryBase *Result = TheTable[Bucket];
191dff0c46cSDimitry Andric TheTable[Bucket] = getTombstoneVal();
192f22ef01cSRoman Divacky --NumItems;
193f22ef01cSRoman Divacky ++NumTombstones;
1943b0f4066SDimitry Andric assert(NumItems + NumTombstones <= NumBuckets);
1953b0f4066SDimitry Andric
196f22ef01cSRoman Divacky return Result;
197f22ef01cSRoman Divacky }
198f22ef01cSRoman Divacky
199f22ef01cSRoman Divacky /// RehashTable - Grow the table, redistributing values into the buckets with
200f22ef01cSRoman Divacky /// the appropriate mod-of-hashtable-size.
RehashTable(unsigned BucketNo)20191bc56edSDimitry Andric unsigned StringMapImpl::RehashTable(unsigned BucketNo) {
2023b0f4066SDimitry Andric unsigned NewSize;
203dff0c46cSDimitry Andric unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
2043b0f4066SDimitry Andric
2053b0f4066SDimitry Andric // If the hash table is now more than 3/4 full, or if fewer than 1/8 of
2063b0f4066SDimitry Andric // the buckets are empty (meaning that many are filled with tombstones),
2073b0f4066SDimitry Andric // grow/rehash the table.
208ff0cc061SDimitry Andric if (LLVM_UNLIKELY(NumItems * 4 > NumBuckets * 3)) {
2093b0f4066SDimitry Andric NewSize = NumBuckets*2;
210ff0cc061SDimitry Andric } else if (LLVM_UNLIKELY(NumBuckets - (NumItems + NumTombstones) <=
211ff0cc061SDimitry Andric NumBuckets / 8)) {
2123b0f4066SDimitry Andric NewSize = NumBuckets;
2133b0f4066SDimitry Andric } else {
21491bc56edSDimitry Andric return BucketNo;
2153b0f4066SDimitry Andric }
2163b0f4066SDimitry Andric
21791bc56edSDimitry Andric unsigned NewBucketNo = BucketNo;
218f22ef01cSRoman Divacky // Allocate one extra bucket which will always be non-empty. This allows the
219f22ef01cSRoman Divacky // iterators to stop at end.
220*4ba319b5SDimitry Andric auto NewTableArray = static_cast<StringMapEntryBase **>(
221*4ba319b5SDimitry Andric safe_calloc(NewSize+1, sizeof(StringMapEntryBase *) + sizeof(unsigned)));
2222cab237bSDimitry Andric
223dff0c46cSDimitry Andric unsigned *NewHashArray = (unsigned *)(NewTableArray + NewSize + 1);
224dff0c46cSDimitry Andric NewTableArray[NewSize] = (StringMapEntryBase*)2;
225f22ef01cSRoman Divacky
226f22ef01cSRoman Divacky // Rehash all the items into their new buckets. Luckily :) we already have
227f22ef01cSRoman Divacky // the hash values available, so we don't have to rehash any strings.
228dff0c46cSDimitry Andric for (unsigned I = 0, E = NumBuckets; I != E; ++I) {
229dff0c46cSDimitry Andric StringMapEntryBase *Bucket = TheTable[I];
230dff0c46cSDimitry Andric if (Bucket && Bucket != getTombstoneVal()) {
231f22ef01cSRoman Divacky // Fast case, bucket available.
232dff0c46cSDimitry Andric unsigned FullHash = HashTable[I];
233f22ef01cSRoman Divacky unsigned NewBucket = FullHash & (NewSize-1);
23491bc56edSDimitry Andric if (!NewTableArray[NewBucket]) {
235dff0c46cSDimitry Andric NewTableArray[FullHash & (NewSize-1)] = Bucket;
236dff0c46cSDimitry Andric NewHashArray[FullHash & (NewSize-1)] = FullHash;
23791bc56edSDimitry Andric if (I == BucketNo)
23891bc56edSDimitry Andric NewBucketNo = NewBucket;
239f22ef01cSRoman Divacky continue;
240f22ef01cSRoman Divacky }
241f22ef01cSRoman Divacky
242f22ef01cSRoman Divacky // Otherwise probe for a spot.
243f22ef01cSRoman Divacky unsigned ProbeSize = 1;
244f22ef01cSRoman Divacky do {
245f22ef01cSRoman Divacky NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
246dff0c46cSDimitry Andric } while (NewTableArray[NewBucket]);
247f22ef01cSRoman Divacky
248f22ef01cSRoman Divacky // Finally found a slot. Fill it in.
249dff0c46cSDimitry Andric NewTableArray[NewBucket] = Bucket;
250dff0c46cSDimitry Andric NewHashArray[NewBucket] = FullHash;
25191bc56edSDimitry Andric if (I == BucketNo)
25291bc56edSDimitry Andric NewBucketNo = NewBucket;
253f22ef01cSRoman Divacky }
254f22ef01cSRoman Divacky }
255f22ef01cSRoman Divacky
256f22ef01cSRoman Divacky free(TheTable);
257f22ef01cSRoman Divacky
258f22ef01cSRoman Divacky TheTable = NewTableArray;
259f22ef01cSRoman Divacky NumBuckets = NewSize;
2603b0f4066SDimitry Andric NumTombstones = 0;
26191bc56edSDimitry Andric return NewBucketNo;
262f22ef01cSRoman Divacky }
263