xref: /sqlite-3.40.0/ext/fts2/fts2_hash.c (revision 049d487e)
1 /*
2 ** 2001 September 22
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 ** This is the implementation of generic hash-tables used in SQLite.
13 ** We've modified it slightly to serve as a standalone hash table
14 ** implementation for the full-text indexing module.
15 */
16 
17 /*
18 ** The code in this file is only compiled if:
19 **
20 **     * The FTS2 module is being built as an extension
21 **       (in which case SQLITE_CORE is not defined), or
22 **
23 **     * The FTS2 module is being built into the core of
24 **       SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
25 */
26 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
27 
28 #include <assert.h>
29 #include <stdlib.h>
30 #include <string.h>
31 
32 #include "sqlite3.h"
33 #include "sqlite3ext.h"
34 SQLITE_EXTENSION_INIT3
35 #include "fts2_hash.h"
36 
37 /*
38 ** Malloc and Free functions
39 */
fts2HashMalloc(int n)40 static void *fts2HashMalloc(int n){
41   void *p = sqlite3_malloc(n);
42   if( p ){
43     memset(p, 0, n);
44   }
45   return p;
46 }
fts2HashFree(void * p)47 static void fts2HashFree(void *p){
48   sqlite3_free(p);
49 }
50 
51 /* Turn bulk memory into a hash table object by initializing the
52 ** fields of the Hash structure.
53 **
54 ** "pNew" is a pointer to the hash table that is to be initialized.
55 ** keyClass is one of the constants
56 ** FTS2_HASH_BINARY or FTS2_HASH_STRING.  The value of keyClass
57 ** determines what kind of key the hash table will use.  "copyKey" is
58 ** true if the hash table should make its own private copy of keys and
59 ** false if it should just use the supplied pointer.
60 */
sqlite3Fts2HashInit(fts2Hash * pNew,int keyClass,int copyKey)61 void sqlite3Fts2HashInit(fts2Hash *pNew, int keyClass, int copyKey){
62   assert( pNew!=0 );
63   assert( keyClass>=FTS2_HASH_STRING && keyClass<=FTS2_HASH_BINARY );
64   pNew->keyClass = keyClass;
65   pNew->copyKey = copyKey;
66   pNew->first = 0;
67   pNew->count = 0;
68   pNew->htsize = 0;
69   pNew->ht = 0;
70 }
71 
72 /* Remove all entries from a hash table.  Reclaim all memory.
73 ** Call this routine to delete a hash table or to reset a hash table
74 ** to the empty state.
75 */
sqlite3Fts2HashClear(fts2Hash * pH)76 void sqlite3Fts2HashClear(fts2Hash *pH){
77   fts2HashElem *elem;         /* For looping over all elements of the table */
78 
79   assert( pH!=0 );
80   elem = pH->first;
81   pH->first = 0;
82   fts2HashFree(pH->ht);
83   pH->ht = 0;
84   pH->htsize = 0;
85   while( elem ){
86     fts2HashElem *next_elem = elem->next;
87     if( pH->copyKey && elem->pKey ){
88       fts2HashFree(elem->pKey);
89     }
90     fts2HashFree(elem);
91     elem = next_elem;
92   }
93   pH->count = 0;
94 }
95 
96 /*
97 ** Hash and comparison functions when the mode is FTS2_HASH_STRING
98 */
strHash(const void * pKey,int nKey)99 static int strHash(const void *pKey, int nKey){
100   const char *z = (const char *)pKey;
101   int h = 0;
102   if( nKey<=0 ) nKey = (int) strlen(z);
103   while( nKey > 0  ){
104     h = (h<<3) ^ h ^ *z++;
105     nKey--;
106   }
107   return h & 0x7fffffff;
108 }
strCompare(const void * pKey1,int n1,const void * pKey2,int n2)109 static int strCompare(const void *pKey1, int n1, const void *pKey2, int n2){
110   if( n1!=n2 ) return 1;
111   return strncmp((const char*)pKey1,(const char*)pKey2,n1);
112 }
113 
114 /*
115 ** Hash and comparison functions when the mode is FTS2_HASH_BINARY
116 */
binHash(const void * pKey,int nKey)117 static int binHash(const void *pKey, int nKey){
118   int h = 0;
119   const char *z = (const char *)pKey;
120   while( nKey-- > 0 ){
121     h = (h<<3) ^ h ^ *(z++);
122   }
123   return h & 0x7fffffff;
124 }
binCompare(const void * pKey1,int n1,const void * pKey2,int n2)125 static int binCompare(const void *pKey1, int n1, const void *pKey2, int n2){
126   if( n1!=n2 ) return 1;
127   return memcmp(pKey1,pKey2,n1);
128 }
129 
130 /*
131 ** Return a pointer to the appropriate hash function given the key class.
132 **
133 ** The C syntax in this function definition may be unfamilar to some
134 ** programmers, so we provide the following additional explanation:
135 **
136 ** The name of the function is "hashFunction".  The function takes a
137 ** single parameter "keyClass".  The return value of hashFunction()
138 ** is a pointer to another function.  Specifically, the return value
139 ** of hashFunction() is a pointer to a function that takes two parameters
140 ** with types "const void*" and "int" and returns an "int".
141 */
hashFunction(int keyClass)142 static int (*hashFunction(int keyClass))(const void*,int){
143   if( keyClass==FTS2_HASH_STRING ){
144     return &strHash;
145   }else{
146     assert( keyClass==FTS2_HASH_BINARY );
147     return &binHash;
148   }
149 }
150 
151 /*
152 ** Return a pointer to the appropriate hash function given the key class.
153 **
154 ** For help in interpreted the obscure C code in the function definition,
155 ** see the header comment on the previous function.
156 */
compareFunction(int keyClass)157 static int (*compareFunction(int keyClass))(const void*,int,const void*,int){
158   if( keyClass==FTS2_HASH_STRING ){
159     return &strCompare;
160   }else{
161     assert( keyClass==FTS2_HASH_BINARY );
162     return &binCompare;
163   }
164 }
165 
166 /* Link an element into the hash table
167 */
insertElement(fts2Hash * pH,struct _fts2ht * pEntry,fts2HashElem * pNew)168 static void insertElement(
169   fts2Hash *pH,            /* The complete hash table */
170   struct _fts2ht *pEntry,  /* The entry into which pNew is inserted */
171   fts2HashElem *pNew       /* The element to be inserted */
172 ){
173   fts2HashElem *pHead;     /* First element already in pEntry */
174   pHead = pEntry->chain;
175   if( pHead ){
176     pNew->next = pHead;
177     pNew->prev = pHead->prev;
178     if( pHead->prev ){ pHead->prev->next = pNew; }
179     else             { pH->first = pNew; }
180     pHead->prev = pNew;
181   }else{
182     pNew->next = pH->first;
183     if( pH->first ){ pH->first->prev = pNew; }
184     pNew->prev = 0;
185     pH->first = pNew;
186   }
187   pEntry->count++;
188   pEntry->chain = pNew;
189 }
190 
191 
192 /* Resize the hash table so that it cantains "new_size" buckets.
193 ** "new_size" must be a power of 2.  The hash table might fail
194 ** to resize if sqliteMalloc() fails.
195 */
rehash(fts2Hash * pH,int new_size)196 static void rehash(fts2Hash *pH, int new_size){
197   struct _fts2ht *new_ht;          /* The new hash table */
198   fts2HashElem *elem, *next_elem;  /* For looping over existing elements */
199   int (*xHash)(const void*,int);   /* The hash function */
200 
201   assert( (new_size & (new_size-1))==0 );
202   new_ht = (struct _fts2ht *)fts2HashMalloc( new_size*sizeof(struct _fts2ht) );
203   if( new_ht==0 ) return;
204   fts2HashFree(pH->ht);
205   pH->ht = new_ht;
206   pH->htsize = new_size;
207   xHash = hashFunction(pH->keyClass);
208   for(elem=pH->first, pH->first=0; elem; elem = next_elem){
209     int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1);
210     next_elem = elem->next;
211     insertElement(pH, &new_ht[h], elem);
212   }
213 }
214 
215 /* This function (for internal use only) locates an element in an
216 ** hash table that matches the given key.  The hash for this key has
217 ** already been computed and is passed as the 4th parameter.
218 */
findElementGivenHash(const fts2Hash * pH,const void * pKey,int nKey,int h)219 static fts2HashElem *findElementGivenHash(
220   const fts2Hash *pH, /* The pH to be searched */
221   const void *pKey,   /* The key we are searching for */
222   int nKey,
223   int h               /* The hash for this key. */
224 ){
225   fts2HashElem *elem;            /* Used to loop thru the element list */
226   int count;                     /* Number of elements left to test */
227   int (*xCompare)(const void*,int,const void*,int);  /* comparison function */
228 
229   if( pH->ht ){
230     struct _fts2ht *pEntry = &pH->ht[h];
231     elem = pEntry->chain;
232     count = pEntry->count;
233     xCompare = compareFunction(pH->keyClass);
234     while( count-- && elem ){
235       if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){
236         return elem;
237       }
238       elem = elem->next;
239     }
240   }
241   return 0;
242 }
243 
244 /* Remove a single entry from the hash table given a pointer to that
245 ** element and a hash on the element's key.
246 */
removeElementGivenHash(fts2Hash * pH,fts2HashElem * elem,int h)247 static void removeElementGivenHash(
248   fts2Hash *pH,         /* The pH containing "elem" */
249   fts2HashElem* elem,   /* The element to be removed from the pH */
250   int h                 /* Hash value for the element */
251 ){
252   struct _fts2ht *pEntry;
253   if( elem->prev ){
254     elem->prev->next = elem->next;
255   }else{
256     pH->first = elem->next;
257   }
258   if( elem->next ){
259     elem->next->prev = elem->prev;
260   }
261   pEntry = &pH->ht[h];
262   if( pEntry->chain==elem ){
263     pEntry->chain = elem->next;
264   }
265   pEntry->count--;
266   if( pEntry->count<=0 ){
267     pEntry->chain = 0;
268   }
269   if( pH->copyKey && elem->pKey ){
270     fts2HashFree(elem->pKey);
271   }
272   fts2HashFree( elem );
273   pH->count--;
274   if( pH->count<=0 ){
275     assert( pH->first==0 );
276     assert( pH->count==0 );
277     fts2HashClear(pH);
278   }
279 }
280 
281 /* Attempt to locate an element of the hash table pH with a key
282 ** that matches pKey,nKey.  Return the data for this element if it is
283 ** found, or NULL if there is no match.
284 */
sqlite3Fts2HashFind(const fts2Hash * pH,const void * pKey,int nKey)285 void *sqlite3Fts2HashFind(const fts2Hash *pH, const void *pKey, int nKey){
286   int h;                 /* A hash on key */
287   fts2HashElem *elem;    /* The element that matches key */
288   int (*xHash)(const void*,int);  /* The hash function */
289 
290   if( pH==0 || pH->ht==0 ) return 0;
291   xHash = hashFunction(pH->keyClass);
292   assert( xHash!=0 );
293   h = (*xHash)(pKey,nKey);
294   assert( (pH->htsize & (pH->htsize-1))==0 );
295   elem = findElementGivenHash(pH,pKey,nKey, h & (pH->htsize-1));
296   return elem ? elem->data : 0;
297 }
298 
299 /* Insert an element into the hash table pH.  The key is pKey,nKey
300 ** and the data is "data".
301 **
302 ** If no element exists with a matching key, then a new
303 ** element is created.  A copy of the key is made if the copyKey
304 ** flag is set.  NULL is returned.
305 **
306 ** If another element already exists with the same key, then the
307 ** new data replaces the old data and the old data is returned.
308 ** The key is not copied in this instance.  If a malloc fails, then
309 ** the new data is returned and the hash table is unchanged.
310 **
311 ** If the "data" parameter to this function is NULL, then the
312 ** element corresponding to "key" is removed from the hash table.
313 */
sqlite3Fts2HashInsert(fts2Hash * pH,const void * pKey,int nKey,void * data)314 void *sqlite3Fts2HashInsert(
315   fts2Hash *pH,        /* The hash table to insert into */
316   const void *pKey,    /* The key */
317   int nKey,            /* Number of bytes in the key */
318   void *data           /* The data */
319 ){
320   int hraw;                 /* Raw hash value of the key */
321   int h;                    /* the hash of the key modulo hash table size */
322   fts2HashElem *elem;       /* Used to loop thru the element list */
323   fts2HashElem *new_elem;   /* New element added to the pH */
324   int (*xHash)(const void*,int);  /* The hash function */
325 
326   assert( pH!=0 );
327   xHash = hashFunction(pH->keyClass);
328   assert( xHash!=0 );
329   hraw = (*xHash)(pKey, nKey);
330   assert( (pH->htsize & (pH->htsize-1))==0 );
331   h = hraw & (pH->htsize-1);
332   elem = findElementGivenHash(pH,pKey,nKey,h);
333   if( elem ){
334     void *old_data = elem->data;
335     if( data==0 ){
336       removeElementGivenHash(pH,elem,h);
337     }else{
338       elem->data = data;
339     }
340     return old_data;
341   }
342   if( data==0 ) return 0;
343   new_elem = (fts2HashElem*)fts2HashMalloc( sizeof(fts2HashElem) );
344   if( new_elem==0 ) return data;
345   if( pH->copyKey && pKey!=0 ){
346     new_elem->pKey = fts2HashMalloc( nKey );
347     if( new_elem->pKey==0 ){
348       fts2HashFree(new_elem);
349       return data;
350     }
351     memcpy((void*)new_elem->pKey, pKey, nKey);
352   }else{
353     new_elem->pKey = (void*)pKey;
354   }
355   new_elem->nKey = nKey;
356   pH->count++;
357   if( pH->htsize==0 ){
358     rehash(pH,8);
359     if( pH->htsize==0 ){
360       pH->count = 0;
361       fts2HashFree(new_elem);
362       return data;
363     }
364   }
365   if( pH->count > pH->htsize ){
366     rehash(pH,pH->htsize*2);
367   }
368   assert( pH->htsize>0 );
369   assert( (pH->htsize & (pH->htsize-1))==0 );
370   h = hraw & (pH->htsize-1);
371   insertElement(pH, &pH->ht[h], new_elem);
372   new_elem->data = data;
373   return 0;
374 }
375 
376 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */
377