1 /* Rax -- A radix tree implementation. 2 * 3 * Copyright (c) 2017-2018, Salvatore Sanfilippo <antirez at gmail dot com> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are met: 8 * 9 * * Redistributions of source code must retain the above copyright notice, 10 * this list of conditions and the following disclaimer. 11 * * Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * * Neither the name of Redis nor the names of its contributors may be used 15 * to endorse or promote products derived from this software without 16 * specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <stdlib.h> 32 #include <string.h> 33 #include <assert.h> 34 #include <stdio.h> 35 #include <errno.h> 36 #include <math.h> 37 #include "rax.h" 38 39 #ifndef RAX_MALLOC_INCLUDE 40 #define RAX_MALLOC_INCLUDE "rax_malloc.h" 41 #endif 42 43 #include RAX_MALLOC_INCLUDE 44 45 /* This is a special pointer that is guaranteed to never have the same value 46 * of a radix tree node. It's used in order to report "not found" error without 47 * requiring the function to have multiple return values. */ 48 void *raxNotFound = (void*)"rax-not-found-pointer"; 49 50 /* -------------------------------- Debugging ------------------------------ */ 51 52 void raxDebugShowNode(const char *msg, raxNode *n); 53 54 /* Turn debugging messages on/off by compiling with RAX_DEBUG_MSG macro on. 55 * When RAX_DEBUG_MSG is defined by default Rax operations will emit a lot 56 * of debugging info to the standard output, however you can still turn 57 * debugging on/off in order to enable it only when you suspect there is an 58 * operation causing a bug using the function raxSetDebugMsg(). */ 59 #ifdef RAX_DEBUG_MSG 60 #define debugf(...) \ 61 if (raxDebugMsg) { \ 62 printf("%s:%s:%d:\t", __FILE__, __FUNCTION__, __LINE__); \ 63 printf(__VA_ARGS__); \ 64 fflush(stdout); \ 65 } 66 67 #define debugnode(msg,n) raxDebugShowNode(msg,n) 68 #else 69 #define debugf(...) 70 #define debugnode(msg,n) 71 #endif 72 73 /* By default log debug info if RAX_DEBUG_MSG is defined. */ 74 static int raxDebugMsg = 1; 75 76 /* When debug messages are enabled, turn them on/off dynamically. By 77 * default they are enabled. Set the state to 0 to disable, and 1 to 78 * re-enable. */ 79 void raxSetDebugMsg(int onoff) { 80 raxDebugMsg = onoff; 81 } 82 83 /* ------------------------- raxStack functions -------------------------- 84 * The raxStack is a simple stack of pointers that is capable of switching 85 * from using a stack-allocated array to dynamic heap once a given number of 86 * items are reached. It is used in order to retain the list of parent nodes 87 * while walking the radix tree in order to implement certain operations that 88 * need to navigate the tree upward. 89 * ------------------------------------------------------------------------- */ 90 91 /* Initialize the stack. */ 92 static inline void raxStackInit(raxStack *ts) { 93 ts->stack = ts->static_items; 94 ts->items = 0; 95 ts->maxitems = RAX_STACK_STATIC_ITEMS; 96 ts->oom = 0; 97 } 98 99 /* Push an item into the stack, returns 1 on success, 0 on out of memory. */ 100 static inline int raxStackPush(raxStack *ts, void *ptr) { 101 if (ts->items == ts->maxitems) { 102 if (ts->stack == ts->static_items) { 103 ts->stack = rax_malloc(sizeof(void*)*ts->maxitems*2); 104 if (ts->stack == NULL) { 105 ts->stack = ts->static_items; 106 ts->oom = 1; 107 errno = ENOMEM; 108 return 0; 109 } 110 memcpy(ts->stack,ts->static_items,sizeof(void*)*ts->maxitems); 111 } else { 112 void **newalloc = rax_realloc(ts->stack,sizeof(void*)*ts->maxitems*2); 113 if (newalloc == NULL) { 114 ts->oom = 1; 115 errno = ENOMEM; 116 return 0; 117 } 118 ts->stack = newalloc; 119 } 120 ts->maxitems *= 2; 121 } 122 ts->stack[ts->items] = ptr; 123 ts->items++; 124 return 1; 125 } 126 127 /* Pop an item from the stack, the function returns NULL if there are no 128 * items to pop. */ 129 static inline void *raxStackPop(raxStack *ts) { 130 if (ts->items == 0) return NULL; 131 ts->items--; 132 return ts->stack[ts->items]; 133 } 134 135 /* Return the stack item at the top of the stack without actually consuming 136 * it. */ 137 static inline void *raxStackPeek(raxStack *ts) { 138 if (ts->items == 0) return NULL; 139 return ts->stack[ts->items-1]; 140 } 141 142 /* Free the stack in case we used heap allocation. */ 143 static inline void raxStackFree(raxStack *ts) { 144 if (ts->stack != ts->static_items) rax_free(ts->stack); 145 } 146 147 /* ---------------------------------------------------------------------------- 148 * Radix tree implementation 149 * --------------------------------------------------------------------------*/ 150 151 /* Return the padding needed in the characters section of a node having size 152 * 'nodesize'. The padding is needed to store the child pointers to aligned 153 * addresses. Note that we add 4 to the node size because the node has a four 154 * bytes header. */ 155 #define raxPadding(nodesize) ((sizeof(void*)-((nodesize+4) % sizeof(void*))) & (sizeof(void*)-1)) 156 157 /* Return the pointer to the last child pointer in a node. For the compressed 158 * nodes this is the only child pointer. */ 159 #define raxNodeLastChildPtr(n) ((raxNode**) ( \ 160 ((char*)(n)) + \ 161 raxNodeCurrentLength(n) - \ 162 sizeof(raxNode*) - \ 163 (((n)->iskey && !(n)->isnull) ? sizeof(void*) : 0) \ 164 )) 165 166 /* Return the pointer to the first child pointer. */ 167 #define raxNodeFirstChildPtr(n) ((raxNode**) ( \ 168 (n)->data + \ 169 (n)->size + \ 170 raxPadding((n)->size))) 171 172 /* Return the current total size of the node. Note that the second line 173 * computes the padding after the string of characters, needed in order to 174 * save pointers to aligned addresses. */ 175 #define raxNodeCurrentLength(n) ( \ 176 sizeof(raxNode)+(n)->size+ \ 177 raxPadding((n)->size)+ \ 178 ((n)->iscompr ? sizeof(raxNode*) : sizeof(raxNode*)*(n)->size)+ \ 179 (((n)->iskey && !(n)->isnull)*sizeof(void*)) \ 180 ) 181 182 /* Allocate a new non compressed node with the specified number of children. 183 * If datafiled is true, the allocation is made large enough to hold the 184 * associated data pointer. 185 * Returns the new node pointer. On out of memory NULL is returned. */ 186 raxNode *raxNewNode(size_t children, int datafield) { 187 size_t nodesize = sizeof(raxNode)+children+raxPadding(children)+ 188 sizeof(raxNode*)*children; 189 if (datafield) nodesize += sizeof(void*); 190 raxNode *node = rax_malloc(nodesize); 191 if (node == NULL) return NULL; 192 node->iskey = 0; 193 node->isnull = 0; 194 node->iscompr = 0; 195 node->size = children; 196 return node; 197 } 198 199 /* Allocate a new rax and return its pointer. On out of memory the function 200 * returns NULL. */ 201 rax *raxNew(void) { 202 rax *rax = rax_malloc(sizeof(*rax)); 203 if (rax == NULL) return NULL; 204 rax->numele = 0; 205 rax->numnodes = 1; 206 rax->head = raxNewNode(0,0); 207 if (rax->head == NULL) { 208 rax_free(rax); 209 return NULL; 210 } else { 211 return rax; 212 } 213 } 214 215 /* realloc the node to make room for auxiliary data in order 216 * to store an item in that node. On out of memory NULL is returned. */ 217 raxNode *raxReallocForData(raxNode *n, void *data) { 218 if (data == NULL) return n; /* No reallocation needed, setting isnull=1 */ 219 size_t curlen = raxNodeCurrentLength(n); 220 return rax_realloc(n,curlen+sizeof(void*)); 221 } 222 223 /* Set the node auxiliary data to the specified pointer. */ 224 void raxSetData(raxNode *n, void *data) { 225 n->iskey = 1; 226 if (data != NULL) { 227 n->isnull = 0; 228 void **ndata = (void**) 229 ((char*)n+raxNodeCurrentLength(n)-sizeof(void*)); 230 memcpy(ndata,&data,sizeof(data)); 231 } else { 232 n->isnull = 1; 233 } 234 } 235 236 /* Get the node auxiliary data. */ 237 void *raxGetData(raxNode *n) { 238 if (n->isnull) return NULL; 239 void **ndata =(void**)((char*)n+raxNodeCurrentLength(n)-sizeof(void*)); 240 void *data; 241 memcpy(&data,ndata,sizeof(data)); 242 return data; 243 } 244 245 /* Add a new child to the node 'n' representing the character 'c' and return 246 * its new pointer, as well as the child pointer by reference. Additionally 247 * '***parentlink' is populated with the raxNode pointer-to-pointer of where 248 * the new child was stored, which is useful for the caller to replace the 249 * child pointer if it gets reallocated. 250 * 251 * On success the new parent node pointer is returned (it may change because 252 * of the realloc, so the caller should discard 'n' and use the new value). 253 * On out of memory NULL is returned, and the old node is still valid. */ 254 raxNode *raxAddChild(raxNode *n, unsigned char c, raxNode **childptr, raxNode ***parentlink) { 255 assert(n->iscompr == 0); 256 257 size_t curlen = raxNodeCurrentLength(n); 258 n->size++; 259 size_t newlen = raxNodeCurrentLength(n); 260 n->size--; /* For now restore the orignal size. We'll update it only on 261 success at the end. */ 262 263 /* Alloc the new child we will link to 'n'. */ 264 raxNode *child = raxNewNode(0,0); 265 if (child == NULL) return NULL; 266 267 /* Make space in the original node. */ 268 raxNode *newn = rax_realloc(n,newlen); 269 if (newn == NULL) { 270 rax_free(child); 271 return NULL; 272 } 273 n = newn; 274 275 /* After the reallocation, we have up to 8/16 (depending on the system 276 * pointer size, and the required node padding) bytes at the end, that is, 277 * the additional char in the 'data' section, plus one pointer to the new 278 * child, plus the padding needed in order to store addresses into aligned 279 * locations. 280 * 281 * So if we start with the following node, having "abde" edges. 282 * 283 * Note: 284 * - We assume 4 bytes pointer for simplicity. 285 * - Each space below corresponds to one byte 286 * 287 * [HDR*][abde][Aptr][Bptr][Dptr][Eptr]|AUXP| 288 * 289 * After the reallocation we need: 1 byte for the new edge character 290 * plus 4 bytes for a new child pointer (assuming 32 bit machine). 291 * However after adding 1 byte to the edge char, the header + the edge 292 * characters are no longer aligned, so we also need 3 bytes of padding. 293 * In total the reallocation will add 1+4+3 bytes = 8 bytes: 294 * 295 * (Blank bytes are represented by ".") 296 * 297 * [HDR*][abde][Aptr][Bptr][Dptr][Eptr]|AUXP|[....][....] 298 * 299 * Let's find where to insert the new child in order to make sure 300 * it is inserted in-place lexicographically. Assuming we are adding 301 * a child "c" in our case pos will be = 2 after the end of the following 302 * loop. */ 303 int pos; 304 for (pos = 0; pos < n->size; pos++) { 305 if (n->data[pos] > c) break; 306 } 307 308 /* Now, if present, move auxiliary data pointer at the end 309 * so that we can mess with the other data without overwriting it. 310 * We will obtain something like that: 311 * 312 * [HDR*][abde][Aptr][Bptr][Dptr][Eptr][....][....]|AUXP| 313 */ 314 unsigned char *src, *dst; 315 if (n->iskey && !n->isnull) { 316 src = ((unsigned char*)n+curlen-sizeof(void*)); 317 dst = ((unsigned char*)n+newlen-sizeof(void*)); 318 memmove(dst,src,sizeof(void*)); 319 } 320 321 /* Compute the "shift", that is, how many bytes we need to move the 322 * pointers section forward because of the addition of the new child 323 * byte in the string section. Note that if we had no padding, that 324 * would be always "1", since we are adding a single byte in the string 325 * section of the node (where now there is "abde" basically). 326 * 327 * However we have padding, so it could be zero, or up to 8. 328 * 329 * Another way to think at the shift is, how many bytes we need to 330 * move child pointers forward *other than* the obvious sizeof(void*) 331 * needed for the additional pointer itself. */ 332 size_t shift = newlen - curlen - sizeof(void*); 333 334 /* We said we are adding a node with edge 'c'. The insertion 335 * point is between 'b' and 'd', so the 'pos' variable value is 336 * the index of the first child pointer that we need to move forward 337 * to make space for our new pointer. 338 * 339 * To start, move all the child pointers after the insertion point 340 * of shift+sizeof(pointer) bytes on the right, to obtain: 341 * 342 * [HDR*][abde][Aptr][Bptr][....][....][Dptr][Eptr]|AUXP| 343 */ 344 src = n->data+n->size+ 345 raxPadding(n->size)+ 346 sizeof(raxNode*)*pos; 347 memmove(src+shift+sizeof(raxNode*),src,sizeof(raxNode*)*(n->size-pos)); 348 349 /* Move the pointers to the left of the insertion position as well. Often 350 * we don't need to do anything if there was already some padding to use. In 351 * that case the final destination of the pointers will be the same, however 352 * in our example there was no pre-existing padding, so we added one byte 353 * plus thre bytes of padding. After the next memmove() things will look 354 * like thata: 355 * 356 * [HDR*][abde][....][Aptr][Bptr][....][Dptr][Eptr]|AUXP| 357 */ 358 if (shift) { 359 src = (unsigned char*) raxNodeFirstChildPtr(n); 360 memmove(src+shift,src,sizeof(raxNode*)*pos); 361 } 362 363 /* Now make the space for the additional char in the data section, 364 * but also move the pointers before the insertion point to the right 365 * by shift bytes, in order to obtain the following: 366 * 367 * [HDR*][ab.d][e...][Aptr][Bptr][....][Dptr][Eptr]|AUXP| 368 */ 369 src = n->data+pos; 370 memmove(src+1,src,n->size-pos); 371 372 /* We can now set the character and its child node pointer to get: 373 * 374 * [HDR*][abcd][e...][Aptr][Bptr][....][Dptr][Eptr]|AUXP| 375 * [HDR*][abcd][e...][Aptr][Bptr][Cptr][Dptr][Eptr]|AUXP| 376 */ 377 n->data[pos] = c; 378 n->size++; 379 src = (unsigned char*) raxNodeFirstChildPtr(n); 380 raxNode **childfield = (raxNode**)(src+sizeof(raxNode*)*pos); 381 memcpy(childfield,&child,sizeof(child)); 382 *childptr = child; 383 *parentlink = childfield; 384 return n; 385 } 386 387 /* Turn the node 'n', that must be a node without any children, into a 388 * compressed node representing a set of nodes linked one after the other 389 * and having exactly one child each. The node can be a key or not: this 390 * property and the associated value if any will be preserved. 391 * 392 * The function also returns a child node, since the last node of the 393 * compressed chain cannot be part of the chain: it has zero children while 394 * we can only compress inner nodes with exactly one child each. */ 395 raxNode *raxCompressNode(raxNode *n, unsigned char *s, size_t len, raxNode **child) { 396 assert(n->size == 0 && n->iscompr == 0); 397 void *data = NULL; /* Initialized only to avoid warnings. */ 398 size_t newsize; 399 400 debugf("Compress node: %.*s\n", (int)len,s); 401 402 /* Allocate the child to link to this node. */ 403 *child = raxNewNode(0,0); 404 if (*child == NULL) return NULL; 405 406 /* Make space in the parent node. */ 407 newsize = sizeof(raxNode)+len+raxPadding(len)+sizeof(raxNode*); 408 if (n->iskey) { 409 data = raxGetData(n); /* To restore it later. */ 410 if (!n->isnull) newsize += sizeof(void*); 411 } 412 raxNode *newn = rax_realloc(n,newsize); 413 if (newn == NULL) { 414 rax_free(*child); 415 return NULL; 416 } 417 n = newn; 418 419 n->iscompr = 1; 420 n->size = len; 421 memcpy(n->data,s,len); 422 if (n->iskey) raxSetData(n,data); 423 raxNode **childfield = raxNodeLastChildPtr(n); 424 memcpy(childfield,child,sizeof(*child)); 425 return n; 426 } 427 428 /* Low level function that walks the tree looking for the string 429 * 's' of 'len' bytes. The function returns the number of characters 430 * of the key that was possible to process: if the returned integer 431 * is the same as 'len', then it means that the node corresponding to the 432 * string was found (however it may not be a key in case the node->iskey is 433 * zero or if simply we stopped in the middle of a compressed node, so that 434 * 'splitpos' is non zero). 435 * 436 * Otherwise if the returned integer is not the same as 'len', there was an 437 * early stop during the tree walk because of a character mismatch. 438 * 439 * The node where the search ended (because the full string was processed 440 * or because there was an early stop) is returned by reference as 441 * '*stopnode' if the passed pointer is not NULL. This node link in the 442 * parent's node is returned as '*plink' if not NULL. Finally, if the 443 * search stopped in a compressed node, '*splitpos' returns the index 444 * inside the compressed node where the search ended. This is useful to 445 * know where to split the node for insertion. 446 * 447 * Note that when we stop in the middle of a compressed node with 448 * a perfect match, this function will return a length equal to the 449 * 'len' argument (all the key matched), and will return a *splitpos which is 450 * always positive (that will represent the index of the character immediately 451 * *after* the last match in the current compressed node). 452 * 453 * When instead we stop at a compressed node and *splitpos is zero, it 454 * means that the current node represents the key (that is, none of the 455 * compressed node characters are needed to represent the key, just all 456 * its parents nodes). */ 457 static inline size_t raxLowWalk(rax *rax, unsigned char *s, size_t len, raxNode **stopnode, raxNode ***plink, int *splitpos, raxStack *ts) { 458 raxNode *h = rax->head; 459 raxNode **parentlink = &rax->head; 460 461 size_t i = 0; /* Position in the string. */ 462 size_t j = 0; /* Position in the node children (or bytes if compressed).*/ 463 while(h->size && i < len) { 464 debugnode("Lookup current node",h); 465 unsigned char *v = h->data; 466 467 if (h->iscompr) { 468 for (j = 0; j < h->size && i < len; j++, i++) { 469 if (v[j] != s[i]) break; 470 } 471 if (j != h->size) break; 472 } else { 473 /* Even when h->size is large, linear scan provides good 474 * performances compared to other approaches that are in theory 475 * more sounding, like performing a binary search. */ 476 for (j = 0; j < h->size; j++) { 477 if (v[j] == s[i]) break; 478 } 479 if (j == h->size) break; 480 i++; 481 } 482 483 if (ts) raxStackPush(ts,h); /* Save stack of parent nodes. */ 484 raxNode **children = raxNodeFirstChildPtr(h); 485 if (h->iscompr) j = 0; /* Compressed node only child is at index 0. */ 486 memcpy(&h,children+j,sizeof(h)); 487 parentlink = children+j; 488 j = 0; /* If the new node is compressed and we do not 489 iterate again (since i == l) set the split 490 position to 0 to signal this node represents 491 the searched key. */ 492 } 493 debugnode("Lookup stop node is",h); 494 if (stopnode) *stopnode = h; 495 if (plink) *plink = parentlink; 496 if (splitpos && h->iscompr) *splitpos = j; 497 return i; 498 } 499 500 /* Insert the element 's' of size 'len', setting as auxiliary data 501 * the pointer 'data'. If the element is already present, the associated 502 * data is updated (only if 'overwrite' is set to 1), and 0 is returned, 503 * otherwise the element is inserted and 1 is returned. On out of memory the 504 * function returns 0 as well but sets errno to ENOMEM, otherwise errno will 505 * be set to 0. 506 */ 507 int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old, int overwrite) { 508 size_t i; 509 int j = 0; /* Split position. If raxLowWalk() stops in a compressed 510 node, the index 'j' represents the char we stopped within the 511 compressed node, that is, the position where to split the 512 node for insertion. */ 513 raxNode *h, **parentlink; 514 515 debugf("### Insert %.*s with value %p\n", (int)len, s, data); 516 i = raxLowWalk(rax,s,len,&h,&parentlink,&j,NULL); 517 518 /* If i == len we walked following the whole string. If we are not 519 * in the middle of a compressed node, the string is either already 520 * inserted or this middle node is currently not a key, but can represent 521 * our key. We have just to reallocate the node and make space for the 522 * data pointer. */ 523 if (i == len && (!h->iscompr || j == 0 /* not in the middle if j is 0 */)) { 524 debugf("### Insert: node representing key exists\n"); 525 /* Make space for the value pointer if needed. */ 526 if (!h->iskey || (h->isnull && overwrite)) { 527 h = raxReallocForData(h,data); 528 if (h) memcpy(parentlink,&h,sizeof(h)); 529 } 530 if (h == NULL) { 531 errno = ENOMEM; 532 return 0; 533 } 534 535 /* Update the existing key if there is already one. */ 536 if (h->iskey) { 537 if (old) *old = raxGetData(h); 538 if (overwrite) raxSetData(h,data); 539 errno = 0; 540 return 0; /* Element already exists. */ 541 } 542 543 /* Otherwise set the node as a key. Note that raxSetData() 544 * will set h->iskey. */ 545 raxSetData(h,data); 546 rax->numele++; 547 return 1; /* Element inserted. */ 548 } 549 550 /* If the node we stopped at is a compressed node, we need to 551 * split it before to continue. 552 * 553 * Splitting a compressed node have a few possible cases. 554 * Imagine that the node 'h' we are currently at is a compressed 555 * node contaning the string "ANNIBALE" (it means that it represents 556 * nodes A -> N -> N -> I -> B -> A -> L -> E with the only child 557 * pointer of this node pointing at the 'E' node, because remember that 558 * we have characters at the edges of the graph, not inside the nodes 559 * themselves. 560 * 561 * In order to show a real case imagine our node to also point to 562 * another compressed node, that finally points at the node without 563 * children, representing 'O': 564 * 565 * "ANNIBALE" -> "SCO" -> [] 566 * 567 * When inserting we may face the following cases. Note that all the cases 568 * require the insertion of a non compressed node with exactly two 569 * children, except for the last case which just requires splitting a 570 * compressed node. 571 * 572 * 1) Inserting "ANNIENTARE" 573 * 574 * |B| -> "ALE" -> "SCO" -> [] 575 * "ANNI" -> |-| 576 * |E| -> (... continue algo ...) "NTARE" -> [] 577 * 578 * 2) Inserting "ANNIBALI" 579 * 580 * |E| -> "SCO" -> [] 581 * "ANNIBAL" -> |-| 582 * |I| -> (... continue algo ...) [] 583 * 584 * 3) Inserting "AGO" (Like case 1, but set iscompr = 0 into original node) 585 * 586 * |N| -> "NIBALE" -> "SCO" -> [] 587 * |A| -> |-| 588 * |G| -> (... continue algo ...) |O| -> [] 589 * 590 * 4) Inserting "CIAO" 591 * 592 * |A| -> "NNIBALE" -> "SCO" -> [] 593 * |-| 594 * |C| -> (... continue algo ...) "IAO" -> [] 595 * 596 * 5) Inserting "ANNI" 597 * 598 * "ANNI" -> "BALE" -> "SCO" -> [] 599 * 600 * The final algorithm for insertion covering all the above cases is as 601 * follows. 602 * 603 * ============================= ALGO 1 ============================= 604 * 605 * For the above cases 1 to 4, that is, all cases where we stopped in 606 * the middle of a compressed node for a character mismatch, do: 607 * 608 * Let $SPLITPOS be the zero-based index at which, in the 609 * compressed node array of characters, we found the mismatching 610 * character. For example if the node contains "ANNIBALE" and we add 611 * "ANNIENTARE" the $SPLITPOS is 4, that is, the index at which the 612 * mismatching character is found. 613 * 614 * 1. Save the current compressed node $NEXT pointer (the pointer to the 615 * child element, that is always present in compressed nodes). 616 * 617 * 2. Create "split node" having as child the non common letter 618 * at the compressed node. The other non common letter (at the key) 619 * will be added later as we continue the normal insertion algorithm 620 * at step "6". 621 * 622 * 3a. IF $SPLITPOS == 0: 623 * Replace the old node with the split node, by copying the auxiliary 624 * data if any. Fix parent's reference. Free old node eventually 625 * (we still need its data for the next steps of the algorithm). 626 * 627 * 3b. IF $SPLITPOS != 0: 628 * Trim the compressed node (reallocating it as well) in order to 629 * contain $splitpos characters. Change chilid pointer in order to link 630 * to the split node. If new compressed node len is just 1, set 631 * iscompr to 0 (layout is the same). Fix parent's reference. 632 * 633 * 4a. IF the postfix len (the length of the remaining string of the 634 * original compressed node after the split character) is non zero, 635 * create a "postfix node". If the postfix node has just one character 636 * set iscompr to 0, otherwise iscompr to 1. Set the postfix node 637 * child pointer to $NEXT. 638 * 639 * 4b. IF the postfix len is zero, just use $NEXT as postfix pointer. 640 * 641 * 5. Set child[0] of split node to postfix node. 642 * 643 * 6. Set the split node as the current node, set current index at child[1] 644 * and continue insertion algorithm as usually. 645 * 646 * ============================= ALGO 2 ============================= 647 * 648 * For case 5, that is, if we stopped in the middle of a compressed 649 * node but no mismatch was found, do: 650 * 651 * Let $SPLITPOS be the zero-based index at which, in the 652 * compressed node array of characters, we stopped iterating because 653 * there were no more keys character to match. So in the example of 654 * the node "ANNIBALE", addig the string "ANNI", the $SPLITPOS is 4. 655 * 656 * 1. Save the current compressed node $NEXT pointer (the pointer to the 657 * child element, that is always present in compressed nodes). 658 * 659 * 2. Create a "postfix node" containing all the characters from $SPLITPOS 660 * to the end. Use $NEXT as the postfix node child pointer. 661 * If the postfix node length is 1, set iscompr to 0. 662 * Set the node as a key with the associated value of the new 663 * inserted key. 664 * 665 * 3. Trim the current node to contain the first $SPLITPOS characters. 666 * As usually if the new node length is just 1, set iscompr to 0. 667 * Take the iskey / associated value as it was in the orignal node. 668 * Fix the parent's reference. 669 * 670 * 4. Set the postfix node as the only child pointer of the trimmed 671 * node created at step 1. 672 */ 673 674 /* ------------------------- ALGORITHM 1 --------------------------- */ 675 if (h->iscompr && i != len) { 676 debugf("ALGO 1: Stopped at compressed node %.*s (%p)\n", 677 h->size, h->data, (void*)h); 678 debugf("Still to insert: %.*s\n", (int)(len-i), s+i); 679 debugf("Splitting at %d: '%c'\n", j, ((char*)h->data)[j]); 680 debugf("Other (key) letter is '%c'\n", s[i]); 681 682 /* 1: Save next pointer. */ 683 raxNode **childfield = raxNodeLastChildPtr(h); 684 raxNode *next; 685 memcpy(&next,childfield,sizeof(next)); 686 debugf("Next is %p\n", (void*)next); 687 debugf("iskey %d\n", h->iskey); 688 if (h->iskey) { 689 debugf("key value is %p\n", raxGetData(h)); 690 } 691 692 /* Set the length of the additional nodes we will need. */ 693 size_t trimmedlen = j; 694 size_t postfixlen = h->size - j - 1; 695 int split_node_is_key = !trimmedlen && h->iskey && !h->isnull; 696 size_t nodesize; 697 698 /* 2: Create the split node. Also allocate the other nodes we'll need 699 * ASAP, so that it will be simpler to handle OOM. */ 700 raxNode *splitnode = raxNewNode(1, split_node_is_key); 701 raxNode *trimmed = NULL; 702 raxNode *postfix = NULL; 703 704 if (trimmedlen) { 705 nodesize = sizeof(raxNode)+trimmedlen+raxPadding(trimmedlen)+ 706 sizeof(raxNode*); 707 if (h->iskey && !h->isnull) nodesize += sizeof(void*); 708 trimmed = rax_malloc(nodesize); 709 } 710 711 if (postfixlen) { 712 nodesize = sizeof(raxNode)+postfixlen+raxPadding(postfixlen)+ 713 sizeof(raxNode*); 714 postfix = rax_malloc(nodesize); 715 } 716 717 /* OOM? Abort now that the tree is untouched. */ 718 if (splitnode == NULL || 719 (trimmedlen && trimmed == NULL) || 720 (postfixlen && postfix == NULL)) 721 { 722 rax_free(splitnode); 723 rax_free(trimmed); 724 rax_free(postfix); 725 errno = ENOMEM; 726 return 0; 727 } 728 splitnode->data[0] = h->data[j]; 729 730 if (j == 0) { 731 /* 3a: Replace the old node with the split node. */ 732 if (h->iskey) { 733 void *ndata = raxGetData(h); 734 raxSetData(splitnode,ndata); 735 } 736 memcpy(parentlink,&splitnode,sizeof(splitnode)); 737 } else { 738 /* 3b: Trim the compressed node. */ 739 trimmed->size = j; 740 memcpy(trimmed->data,h->data,j); 741 trimmed->iscompr = j > 1 ? 1 : 0; 742 trimmed->iskey = h->iskey; 743 trimmed->isnull = h->isnull; 744 if (h->iskey && !h->isnull) { 745 void *ndata = raxGetData(h); 746 raxSetData(trimmed,ndata); 747 } 748 raxNode **cp = raxNodeLastChildPtr(trimmed); 749 memcpy(cp,&splitnode,sizeof(splitnode)); 750 memcpy(parentlink,&trimmed,sizeof(trimmed)); 751 parentlink = cp; /* Set parentlink to splitnode parent. */ 752 rax->numnodes++; 753 } 754 755 /* 4: Create the postfix node: what remains of the original 756 * compressed node after the split. */ 757 if (postfixlen) { 758 /* 4a: create a postfix node. */ 759 postfix->iskey = 0; 760 postfix->isnull = 0; 761 postfix->size = postfixlen; 762 postfix->iscompr = postfixlen > 1; 763 memcpy(postfix->data,h->data+j+1,postfixlen); 764 raxNode **cp = raxNodeLastChildPtr(postfix); 765 memcpy(cp,&next,sizeof(next)); 766 rax->numnodes++; 767 } else { 768 /* 4b: just use next as postfix node. */ 769 postfix = next; 770 } 771 772 /* 5: Set splitnode first child as the postfix node. */ 773 raxNode **splitchild = raxNodeLastChildPtr(splitnode); 774 memcpy(splitchild,&postfix,sizeof(postfix)); 775 776 /* 6. Continue insertion: this will cause the splitnode to 777 * get a new child (the non common character at the currently 778 * inserted key). */ 779 rax_free(h); 780 h = splitnode; 781 } else if (h->iscompr && i == len) { 782 /* ------------------------- ALGORITHM 2 --------------------------- */ 783 debugf("ALGO 2: Stopped at compressed node %.*s (%p) j = %d\n", 784 h->size, h->data, (void*)h, j); 785 786 /* Allocate postfix & trimmed nodes ASAP to fail for OOM gracefully. */ 787 size_t postfixlen = h->size - j; 788 size_t nodesize = sizeof(raxNode)+postfixlen+raxPadding(postfixlen)+ 789 sizeof(raxNode*); 790 if (data != NULL) nodesize += sizeof(void*); 791 raxNode *postfix = rax_malloc(nodesize); 792 793 nodesize = sizeof(raxNode)+j+raxPadding(j)+sizeof(raxNode*); 794 if (h->iskey && !h->isnull) nodesize += sizeof(void*); 795 raxNode *trimmed = rax_malloc(nodesize); 796 797 if (postfix == NULL || trimmed == NULL) { 798 rax_free(postfix); 799 rax_free(trimmed); 800 errno = ENOMEM; 801 return 0; 802 } 803 804 /* 1: Save next pointer. */ 805 raxNode **childfield = raxNodeLastChildPtr(h); 806 raxNode *next; 807 memcpy(&next,childfield,sizeof(next)); 808 809 /* 2: Create the postfix node. */ 810 postfix->size = postfixlen; 811 postfix->iscompr = postfixlen > 1; 812 postfix->iskey = 1; 813 postfix->isnull = 0; 814 memcpy(postfix->data,h->data+j,postfixlen); 815 raxSetData(postfix,data); 816 raxNode **cp = raxNodeLastChildPtr(postfix); 817 memcpy(cp,&next,sizeof(next)); 818 rax->numnodes++; 819 820 /* 3: Trim the compressed node. */ 821 trimmed->size = j; 822 trimmed->iscompr = j > 1; 823 trimmed->iskey = 0; 824 trimmed->isnull = 0; 825 memcpy(trimmed->data,h->data,j); 826 memcpy(parentlink,&trimmed,sizeof(trimmed)); 827 if (h->iskey) { 828 void *aux = raxGetData(h); 829 raxSetData(trimmed,aux); 830 } 831 832 /* Fix the trimmed node child pointer to point to 833 * the postfix node. */ 834 cp = raxNodeLastChildPtr(trimmed); 835 memcpy(cp,&postfix,sizeof(postfix)); 836 837 /* Finish! We don't need to continue with the insertion 838 * algorithm for ALGO 2. The key is already inserted. */ 839 rax->numele++; 840 rax_free(h); 841 return 1; /* Key inserted. */ 842 } 843 844 /* We walked the radix tree as far as we could, but still there are left 845 * chars in our string. We need to insert the missing nodes. */ 846 while(i < len) { 847 raxNode *child; 848 849 /* If this node is going to have a single child, and there 850 * are other characters, so that that would result in a chain 851 * of single-childed nodes, turn it into a compressed node. */ 852 if (h->size == 0 && len-i > 1) { 853 debugf("Inserting compressed node\n"); 854 size_t comprsize = len-i; 855 if (comprsize > RAX_NODE_MAX_SIZE) 856 comprsize = RAX_NODE_MAX_SIZE; 857 raxNode *newh = raxCompressNode(h,s+i,comprsize,&child); 858 if (newh == NULL) goto oom; 859 h = newh; 860 memcpy(parentlink,&h,sizeof(h)); 861 parentlink = raxNodeLastChildPtr(h); 862 i += comprsize; 863 } else { 864 debugf("Inserting normal node\n"); 865 raxNode **new_parentlink; 866 raxNode *newh = raxAddChild(h,s[i],&child,&new_parentlink); 867 if (newh == NULL) goto oom; 868 h = newh; 869 memcpy(parentlink,&h,sizeof(h)); 870 parentlink = new_parentlink; 871 i++; 872 } 873 rax->numnodes++; 874 h = child; 875 } 876 raxNode *newh = raxReallocForData(h,data); 877 if (newh == NULL) goto oom; 878 h = newh; 879 if (!h->iskey) rax->numele++; 880 raxSetData(h,data); 881 memcpy(parentlink,&h,sizeof(h)); 882 return 1; /* Element inserted. */ 883 884 oom: 885 /* This code path handles out of memory after part of the sub-tree was 886 * already modified. Set the node as a key, and then remove it. However we 887 * do that only if the node is a terminal node, otherwise if the OOM 888 * happened reallocating a node in the middle, we don't need to free 889 * anything. */ 890 if (h->size == 0) { 891 h->isnull = 1; 892 h->iskey = 1; 893 rax->numele++; /* Compensate the next remove. */ 894 assert(raxRemove(rax,s,i,NULL) != 0); 895 } 896 errno = ENOMEM; 897 return 0; 898 } 899 900 /* Overwriting insert. Just a wrapper for raxGenericInsert() that will 901 * update the element if there is already one for the same key. */ 902 int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old) { 903 return raxGenericInsert(rax,s,len,data,old,1); 904 } 905 906 /* Non overwriting insert function: this if an element with the same key 907 * exists, the value is not updated and the function returns 0. 908 * This is a just a wrapper for raxGenericInsert(). */ 909 int raxTryInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old) { 910 return raxGenericInsert(rax,s,len,data,old,0); 911 } 912 913 /* Find a key in the rax, returns raxNotFound special void pointer value 914 * if the item was not found, otherwise the value associated with the 915 * item is returned. */ 916 void *raxFind(rax *rax, unsigned char *s, size_t len) { 917 raxNode *h; 918 919 debugf("### Lookup: %.*s\n", (int)len, s); 920 int splitpos = 0; 921 size_t i = raxLowWalk(rax,s,len,&h,NULL,&splitpos,NULL); 922 if (i != len || (h->iscompr && splitpos != 0) || !h->iskey) 923 return raxNotFound; 924 return raxGetData(h); 925 } 926 927 /* Return the memory address where the 'parent' node stores the specified 928 * 'child' pointer, so that the caller can update the pointer with another 929 * one if needed. The function assumes it will find a match, otherwise the 930 * operation is an undefined behavior (it will continue scanning the 931 * memory without any bound checking). */ 932 raxNode **raxFindParentLink(raxNode *parent, raxNode *child) { 933 raxNode **cp = raxNodeFirstChildPtr(parent); 934 raxNode *c; 935 while(1) { 936 memcpy(&c,cp,sizeof(c)); 937 if (c == child) break; 938 cp++; 939 } 940 return cp; 941 } 942 943 /* Low level child removal from node. The new node pointer (after the child 944 * removal) is returned. Note that this function does not fix the pointer 945 * of the parent node in its parent, so this task is up to the caller. 946 * The function never fails for out of memory. */ 947 raxNode *raxRemoveChild(raxNode *parent, raxNode *child) { 948 debugnode("raxRemoveChild before", parent); 949 /* If parent is a compressed node (having a single child, as for definition 950 * of the data structure), the removal of the child consists into turning 951 * it into a normal node without children. */ 952 if (parent->iscompr) { 953 void *data = NULL; 954 if (parent->iskey) data = raxGetData(parent); 955 parent->isnull = 0; 956 parent->iscompr = 0; 957 parent->size = 0; 958 if (parent->iskey) raxSetData(parent,data); 959 debugnode("raxRemoveChild after", parent); 960 return parent; 961 } 962 963 /* Otherwise we need to scan for the child pointer and memmove() 964 * accordingly. 965 * 966 * 1. To start we seek the first element in both the children 967 * pointers and edge bytes in the node. */ 968 raxNode **cp = raxNodeFirstChildPtr(parent); 969 raxNode **c = cp; 970 unsigned char *e = parent->data; 971 972 /* 2. Search the child pointer to remove inside the array of children 973 * pointers. */ 974 while(1) { 975 raxNode *aux; 976 memcpy(&aux,c,sizeof(aux)); 977 if (aux == child) break; 978 c++; 979 e++; 980 } 981 982 /* 3. Remove the edge and the pointer by memmoving the remaining children 983 * pointer and edge bytes one position before. */ 984 int taillen = parent->size - (e - parent->data) - 1; 985 debugf("raxRemoveChild tail len: %d\n", taillen); 986 memmove(e,e+1,taillen); 987 988 /* Compute the shift, that is the amount of bytes we should move our 989 * child pointers to the left, since the removal of one edge character 990 * and the corresponding padding change, may change the layout. 991 * We just check if in the old version of the node there was at the 992 * end just a single byte and all padding: in that case removing one char 993 * will remove a whole sizeof(void*) word. */ 994 size_t shift = ((parent->size+4) % sizeof(void*)) == 1 ? sizeof(void*) : 0; 995 996 /* Move the children pointers before the deletion point. */ 997 if (shift) 998 memmove(((char*)cp)-shift,cp,(parent->size-taillen-1)*sizeof(raxNode**)); 999 1000 /* Move the remaining "tail" pointers at the right position as well. */ 1001 size_t valuelen = (parent->iskey && !parent->isnull) ? sizeof(void*) : 0; 1002 memmove(((char*)c)-shift,c+1,taillen*sizeof(raxNode**)+valuelen); 1003 1004 /* 4. Update size. */ 1005 parent->size--; 1006 1007 /* realloc the node according to the theoretical memory usage, to free 1008 * data if we are over-allocating right now. */ 1009 raxNode *newnode = rax_realloc(parent,raxNodeCurrentLength(parent)); 1010 if (newnode) { 1011 debugnode("raxRemoveChild after", newnode); 1012 } 1013 /* Note: if rax_realloc() fails we just return the old address, which 1014 * is valid. */ 1015 return newnode ? newnode : parent; 1016 } 1017 1018 /* Remove the specified item. Returns 1 if the item was found and 1019 * deleted, 0 otherwise. */ 1020 int raxRemove(rax *rax, unsigned char *s, size_t len, void **old) { 1021 raxNode *h; 1022 raxStack ts; 1023 1024 debugf("### Delete: %.*s\n", (int)len, s); 1025 raxStackInit(&ts); 1026 int splitpos = 0; 1027 size_t i = raxLowWalk(rax,s,len,&h,NULL,&splitpos,&ts); 1028 if (i != len || (h->iscompr && splitpos != 0) || !h->iskey) { 1029 raxStackFree(&ts); 1030 return 0; 1031 } 1032 if (old) *old = raxGetData(h); 1033 h->iskey = 0; 1034 rax->numele--; 1035 1036 /* If this node has no children, the deletion needs to reclaim the 1037 * no longer used nodes. This is an iterative process that needs to 1038 * walk the three upward, deleting all the nodes with just one child 1039 * that are not keys, until the head of the rax is reached or the first 1040 * node with more than one child is found. */ 1041 1042 int trycompress = 0; /* Will be set to 1 if we should try to optimize the 1043 tree resulting from the deletion. */ 1044 1045 if (h->size == 0) { 1046 debugf("Key deleted in node without children. Cleanup needed.\n"); 1047 raxNode *child = NULL; 1048 while(h != rax->head) { 1049 child = h; 1050 debugf("Freeing child %p [%.*s] key:%d\n", (void*)child, 1051 (int)child->size, (char*)child->data, child->iskey); 1052 rax_free(child); 1053 rax->numnodes--; 1054 h = raxStackPop(&ts); 1055 /* If this node has more then one child, or actually holds 1056 * a key, stop here. */ 1057 if (h->iskey || (!h->iscompr && h->size != 1)) break; 1058 } 1059 if (child) { 1060 debugf("Unlinking child %p from parent %p\n", 1061 (void*)child, (void*)h); 1062 raxNode *new = raxRemoveChild(h,child); 1063 if (new != h) { 1064 raxNode *parent = raxStackPeek(&ts); 1065 raxNode **parentlink; 1066 if (parent == NULL) { 1067 parentlink = &rax->head; 1068 } else { 1069 parentlink = raxFindParentLink(parent,h); 1070 } 1071 memcpy(parentlink,&new,sizeof(new)); 1072 } 1073 1074 /* If after the removal the node has just a single child 1075 * and is not a key, we need to try to compress it. */ 1076 if (new->size == 1 && new->iskey == 0) { 1077 trycompress = 1; 1078 h = new; 1079 } 1080 } 1081 } else if (h->size == 1) { 1082 /* If the node had just one child, after the removal of the key 1083 * further compression with adjacent nodes is pontentially possible. */ 1084 trycompress = 1; 1085 } 1086 1087 /* Don't try node compression if our nodes pointers stack is not 1088 * complete because of OOM while executing raxLowWalk() */ 1089 if (trycompress && ts.oom) trycompress = 0; 1090 1091 /* Recompression: if trycompress is true, 'h' points to a radix tree node 1092 * that changed in a way that could allow to compress nodes in this 1093 * sub-branch. Compressed nodes represent chains of nodes that are not 1094 * keys and have a single child, so there are two deletion events that 1095 * may alter the tree so that further compression is needed: 1096 * 1097 * 1) A node with a single child was a key and now no longer is a key. 1098 * 2) A node with two children now has just one child. 1099 * 1100 * We try to navigate upward till there are other nodes that can be 1101 * compressed, when we reach the upper node which is not a key and has 1102 * a single child, we scan the chain of children to collect the 1103 * compressable part of the tree, and replace the current node with the 1104 * new one, fixing the child pointer to reference the first non 1105 * compressable node. 1106 * 1107 * Example of case "1". A tree stores the keys "FOO" = 1 and 1108 * "FOOBAR" = 2: 1109 * 1110 * 1111 * "FOO" -> "BAR" -> [] (2) 1112 * (1) 1113 * 1114 * After the removal of "FOO" the tree can be compressed as: 1115 * 1116 * "FOOBAR" -> [] (2) 1117 * 1118 * 1119 * Example of case "2". A tree stores the keys "FOOBAR" = 1 and 1120 * "FOOTER" = 2: 1121 * 1122 * |B| -> "AR" -> [] (1) 1123 * "FOO" -> |-| 1124 * |T| -> "ER" -> [] (2) 1125 * 1126 * After the removal of "FOOTER" the resulting tree is: 1127 * 1128 * "FOO" -> |B| -> "AR" -> [] (1) 1129 * 1130 * That can be compressed into: 1131 * 1132 * "FOOBAR" -> [] (1) 1133 */ 1134 if (trycompress) { 1135 debugf("After removing %.*s:\n", (int)len, s); 1136 debugnode("Compression may be needed",h); 1137 debugf("Seek start node\n"); 1138 1139 /* Try to reach the upper node that is compressible. 1140 * At the end of the loop 'h' will point to the first node we 1141 * can try to compress and 'parent' to its parent. */ 1142 raxNode *parent; 1143 while(1) { 1144 parent = raxStackPop(&ts); 1145 if (!parent || parent->iskey || 1146 (!parent->iscompr && parent->size != 1)) break; 1147 h = parent; 1148 debugnode("Going up to",h); 1149 } 1150 raxNode *start = h; /* Compression starting node. */ 1151 1152 /* Scan chain of nodes we can compress. */ 1153 size_t comprsize = h->size; 1154 int nodes = 1; 1155 while(h->size != 0) { 1156 raxNode **cp = raxNodeLastChildPtr(h); 1157 memcpy(&h,cp,sizeof(h)); 1158 if (h->iskey || (!h->iscompr && h->size != 1)) break; 1159 /* Stop here if going to the next node would result into 1160 * a compressed node larger than h->size can hold. */ 1161 if (comprsize + h->size > RAX_NODE_MAX_SIZE) break; 1162 nodes++; 1163 comprsize += h->size; 1164 } 1165 if (nodes > 1) { 1166 /* If we can compress, create the new node and populate it. */ 1167 size_t nodesize = 1168 sizeof(raxNode)+comprsize+raxPadding(comprsize)+sizeof(raxNode*); 1169 raxNode *new = rax_malloc(nodesize); 1170 /* An out of memory here just means we cannot optimize this 1171 * node, but the tree is left in a consistent state. */ 1172 if (new == NULL) { 1173 raxStackFree(&ts); 1174 return 1; 1175 } 1176 new->iskey = 0; 1177 new->isnull = 0; 1178 new->iscompr = 1; 1179 new->size = comprsize; 1180 rax->numnodes++; 1181 1182 /* Scan again, this time to populate the new node content and 1183 * to fix the new node child pointer. At the same time we free 1184 * all the nodes that we'll no longer use. */ 1185 comprsize = 0; 1186 h = start; 1187 while(h->size != 0) { 1188 memcpy(new->data+comprsize,h->data,h->size); 1189 comprsize += h->size; 1190 raxNode **cp = raxNodeLastChildPtr(h); 1191 raxNode *tofree = h; 1192 memcpy(&h,cp,sizeof(h)); 1193 rax_free(tofree); rax->numnodes--; 1194 if (h->iskey || (!h->iscompr && h->size != 1)) break; 1195 } 1196 debugnode("New node",new); 1197 1198 /* Now 'h' points to the first node that we still need to use, 1199 * so our new node child pointer will point to it. */ 1200 raxNode **cp = raxNodeLastChildPtr(new); 1201 memcpy(cp,&h,sizeof(h)); 1202 1203 /* Fix parent link. */ 1204 if (parent) { 1205 raxNode **parentlink = raxFindParentLink(parent,start); 1206 memcpy(parentlink,&new,sizeof(new)); 1207 } else { 1208 rax->head = new; 1209 } 1210 1211 debugf("Compressed %d nodes, %d total bytes\n", 1212 nodes, (int)comprsize); 1213 } 1214 } 1215 raxStackFree(&ts); 1216 return 1; 1217 } 1218 1219 /* This is the core of raxFree(): performs a depth-first scan of the 1220 * tree and releases all the nodes found. */ 1221 void raxRecursiveFree(rax *rax, raxNode *n, void (*free_callback)(void*)) { 1222 debugnode("free traversing",n); 1223 int numchildren = n->iscompr ? 1 : n->size; 1224 raxNode **cp = raxNodeLastChildPtr(n); 1225 while(numchildren--) { 1226 raxNode *child; 1227 memcpy(&child,cp,sizeof(child)); 1228 raxRecursiveFree(rax,child,free_callback); 1229 cp--; 1230 } 1231 debugnode("free depth-first",n); 1232 if (free_callback && n->iskey && !n->isnull) 1233 free_callback(raxGetData(n)); 1234 rax_free(n); 1235 rax->numnodes--; 1236 } 1237 1238 /* Free a whole radix tree, calling the specified callback in order to 1239 * free the auxiliary data. */ 1240 void raxFreeWithCallback(rax *rax, void (*free_callback)(void*)) { 1241 raxRecursiveFree(rax,rax->head,free_callback); 1242 assert(rax->numnodes == 0); 1243 rax_free(rax); 1244 } 1245 1246 /* Free a whole radix tree. */ 1247 void raxFree(rax *rax) { 1248 raxFreeWithCallback(rax,NULL); 1249 } 1250 1251 /* ------------------------------- Iterator --------------------------------- */ 1252 1253 /* Initialize a Rax iterator. This call should be performed a single time 1254 * to initialize the iterator, and must be followed by a raxSeek() call, 1255 * otherwise the raxPrev()/raxNext() functions will just return EOF. */ 1256 void raxStart(raxIterator *it, rax *rt) { 1257 it->flags = RAX_ITER_EOF; /* No crash if the iterator is not seeked. */ 1258 it->rt = rt; 1259 it->key_len = 0; 1260 it->key = it->key_static_string; 1261 it->key_max = RAX_ITER_STATIC_LEN; 1262 it->data = NULL; 1263 it->node_cb = NULL; 1264 raxStackInit(&it->stack); 1265 } 1266 1267 /* Append characters at the current key string of the iterator 'it'. This 1268 * is a low level function used to implement the iterator, not callable by 1269 * the user. Returns 0 on out of memory, otherwise 1 is returned. */ 1270 int raxIteratorAddChars(raxIterator *it, unsigned char *s, size_t len) { 1271 if (it->key_max < it->key_len+len) { 1272 unsigned char *old = (it->key == it->key_static_string) ? NULL : 1273 it->key; 1274 size_t new_max = (it->key_len+len)*2; 1275 it->key = rax_realloc(old,new_max); 1276 if (it->key == NULL) { 1277 it->key = (!old) ? it->key_static_string : old; 1278 errno = ENOMEM; 1279 return 0; 1280 } 1281 if (old == NULL) memcpy(it->key,it->key_static_string,it->key_len); 1282 it->key_max = new_max; 1283 } 1284 /* Use memmove since there could be an overlap between 's' and 1285 * it->key when we use the current key in order to re-seek. */ 1286 memmove(it->key+it->key_len,s,len); 1287 it->key_len += len; 1288 return 1; 1289 } 1290 1291 /* Remove the specified number of chars from the right of the current 1292 * iterator key. */ 1293 void raxIteratorDelChars(raxIterator *it, size_t count) { 1294 it->key_len -= count; 1295 } 1296 1297 /* Do an iteration step towards the next element. At the end of the step the 1298 * iterator key will represent the (new) current key. If it is not possible 1299 * to step in the specified direction since there are no longer elements, the 1300 * iterator is flagged with RAX_ITER_EOF. 1301 * 1302 * If 'noup' is true the function starts directly scanning for the next 1303 * lexicographically smaller children, and the current node is already assumed 1304 * to be the parent of the last key node, so the first operation to go back to 1305 * the parent will be skipped. This option is used by raxSeek() when 1306 * implementing seeking a non existing element with the ">" or "<" options: 1307 * the starting node is not a key in that particular case, so we start the scan 1308 * from a node that does not represent the key set. 1309 * 1310 * The function returns 1 on success or 0 on out of memory. */ 1311 int raxIteratorNextStep(raxIterator *it, int noup) { 1312 if (it->flags & RAX_ITER_EOF) { 1313 return 1; 1314 } else if (it->flags & RAX_ITER_JUST_SEEKED) { 1315 it->flags &= ~RAX_ITER_JUST_SEEKED; 1316 return 1; 1317 } 1318 1319 /* Save key len, stack items and the node where we are currently 1320 * so that on iterator EOF we can restore the current key and state. */ 1321 size_t orig_key_len = it->key_len; 1322 size_t orig_stack_items = it->stack.items; 1323 raxNode *orig_node = it->node; 1324 1325 while(1) { 1326 int children = it->node->iscompr ? 1 : it->node->size; 1327 if (!noup && children) { 1328 debugf("GO DEEPER\n"); 1329 /* Seek the lexicographically smaller key in this subtree, which 1330 * is the first one found always going torwards the first child 1331 * of every successive node. */ 1332 if (!raxStackPush(&it->stack,it->node)) return 0; 1333 raxNode **cp = raxNodeFirstChildPtr(it->node); 1334 if (!raxIteratorAddChars(it,it->node->data, 1335 it->node->iscompr ? it->node->size : 1)) return 0; 1336 memcpy(&it->node,cp,sizeof(it->node)); 1337 /* Call the node callback if any, and replace the node pointer 1338 * if the callback returns true. */ 1339 if (it->node_cb && it->node_cb(&it->node)) 1340 memcpy(cp,&it->node,sizeof(it->node)); 1341 /* For "next" step, stop every time we find a key along the 1342 * way, since the key is lexicograhically smaller compared to 1343 * what follows in the sub-children. */ 1344 if (it->node->iskey) { 1345 it->data = raxGetData(it->node); 1346 return 1; 1347 } 1348 } else { 1349 /* If we finished exporing the previous sub-tree, switch to the 1350 * new one: go upper until a node is found where there are 1351 * children representing keys lexicographically greater than the 1352 * current key. */ 1353 while(1) { 1354 int old_noup = noup; 1355 1356 /* Already on head? Can't go up, iteration finished. */ 1357 if (!noup && it->node == it->rt->head) { 1358 it->flags |= RAX_ITER_EOF; 1359 it->stack.items = orig_stack_items; 1360 it->key_len = orig_key_len; 1361 it->node = orig_node; 1362 return 1; 1363 } 1364 /* If there are no children at the current node, try parent's 1365 * next child. */ 1366 unsigned char prevchild = it->key[it->key_len-1]; 1367 if (!noup) { 1368 it->node = raxStackPop(&it->stack); 1369 } else { 1370 noup = 0; 1371 } 1372 /* Adjust the current key to represent the node we are 1373 * at. */ 1374 int todel = it->node->iscompr ? it->node->size : 1; 1375 raxIteratorDelChars(it,todel); 1376 1377 /* Try visiting the next child if there was at least one 1378 * additional child. */ 1379 if (!it->node->iscompr && it->node->size > (old_noup ? 0 : 1)) { 1380 raxNode **cp = raxNodeFirstChildPtr(it->node); 1381 int i = 0; 1382 while (i < it->node->size) { 1383 debugf("SCAN NEXT %c\n", it->node->data[i]); 1384 if (it->node->data[i] > prevchild) break; 1385 i++; 1386 cp++; 1387 } 1388 if (i != it->node->size) { 1389 debugf("SCAN found a new node\n"); 1390 raxIteratorAddChars(it,it->node->data+i,1); 1391 if (!raxStackPush(&it->stack,it->node)) return 0; 1392 memcpy(&it->node,cp,sizeof(it->node)); 1393 /* Call the node callback if any, and replace the node 1394 * pointer if the callback returns true. */ 1395 if (it->node_cb && it->node_cb(&it->node)) 1396 memcpy(cp,&it->node,sizeof(it->node)); 1397 if (it->node->iskey) { 1398 it->data = raxGetData(it->node); 1399 return 1; 1400 } 1401 break; 1402 } 1403 } 1404 } 1405 } 1406 } 1407 } 1408 1409 /* Seek the grestest key in the subtree at the current node. Return 0 on 1410 * out of memory, otherwise 1. This is an helper function for different 1411 * iteration functions below. */ 1412 int raxSeekGreatest(raxIterator *it) { 1413 while(it->node->size) { 1414 if (it->node->iscompr) { 1415 if (!raxIteratorAddChars(it,it->node->data, 1416 it->node->size)) return 0; 1417 } else { 1418 if (!raxIteratorAddChars(it,it->node->data+it->node->size-1,1)) 1419 return 0; 1420 } 1421 raxNode **cp = raxNodeLastChildPtr(it->node); 1422 if (!raxStackPush(&it->stack,it->node)) return 0; 1423 memcpy(&it->node,cp,sizeof(it->node)); 1424 } 1425 return 1; 1426 } 1427 1428 /* Like raxIteratorNextStep() but implements an iteration step moving 1429 * to the lexicographically previous element. The 'noup' option has a similar 1430 * effect to the one of raxIteratorNextStep(). */ 1431 int raxIteratorPrevStep(raxIterator *it, int noup) { 1432 if (it->flags & RAX_ITER_EOF) { 1433 return 1; 1434 } else if (it->flags & RAX_ITER_JUST_SEEKED) { 1435 it->flags &= ~RAX_ITER_JUST_SEEKED; 1436 return 1; 1437 } 1438 1439 /* Save key len, stack items and the node where we are currently 1440 * so that on iterator EOF we can restore the current key and state. */ 1441 size_t orig_key_len = it->key_len; 1442 size_t orig_stack_items = it->stack.items; 1443 raxNode *orig_node = it->node; 1444 1445 while(1) { 1446 int old_noup = noup; 1447 1448 /* Already on head? Can't go up, iteration finished. */ 1449 if (!noup && it->node == it->rt->head) { 1450 it->flags |= RAX_ITER_EOF; 1451 it->stack.items = orig_stack_items; 1452 it->key_len = orig_key_len; 1453 it->node = orig_node; 1454 return 1; 1455 } 1456 1457 unsigned char prevchild = it->key[it->key_len-1]; 1458 if (!noup) { 1459 it->node = raxStackPop(&it->stack); 1460 } else { 1461 noup = 0; 1462 } 1463 1464 /* Adjust the current key to represent the node we are 1465 * at. */ 1466 int todel = it->node->iscompr ? it->node->size : 1; 1467 raxIteratorDelChars(it,todel); 1468 1469 /* Try visiting the prev child if there is at least one 1470 * child. */ 1471 if (!it->node->iscompr && it->node->size > (old_noup ? 0 : 1)) { 1472 raxNode **cp = raxNodeLastChildPtr(it->node); 1473 int i = it->node->size-1; 1474 while (i >= 0) { 1475 debugf("SCAN PREV %c\n", it->node->data[i]); 1476 if (it->node->data[i] < prevchild) break; 1477 i--; 1478 cp--; 1479 } 1480 /* If we found a new subtree to explore in this node, 1481 * go deeper following all the last children in order to 1482 * find the key lexicographically greater. */ 1483 if (i != -1) { 1484 debugf("SCAN found a new node\n"); 1485 /* Enter the node we just found. */ 1486 if (!raxIteratorAddChars(it,it->node->data+i,1)) return 0; 1487 if (!raxStackPush(&it->stack,it->node)) return 0; 1488 memcpy(&it->node,cp,sizeof(it->node)); 1489 /* Seek sub-tree max. */ 1490 if (!raxSeekGreatest(it)) return 0; 1491 } 1492 } 1493 1494 /* Return the key: this could be the key we found scanning a new 1495 * subtree, or if we did not find a new subtree to explore here, 1496 * before giving up with this node, check if it's a key itself. */ 1497 if (it->node->iskey) { 1498 it->data = raxGetData(it->node); 1499 return 1; 1500 } 1501 } 1502 } 1503 1504 /* Seek an iterator at the specified element. 1505 * Return 0 if the seek failed for syntax error or out of memory. Otherwise 1506 * 1 is returned. When 0 is returned for out of memory, errno is set to 1507 * the ENOMEM value. */ 1508 int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len) { 1509 int eq = 0, lt = 0, gt = 0, first = 0, last = 0; 1510 1511 it->stack.items = 0; /* Just resetting. Intialized by raxStart(). */ 1512 it->flags |= RAX_ITER_JUST_SEEKED; 1513 it->flags &= ~RAX_ITER_EOF; 1514 it->key_len = 0; 1515 it->node = NULL; 1516 1517 /* Set flags according to the operator used to perform the seek. */ 1518 if (op[0] == '>') { 1519 gt = 1; 1520 if (op[1] == '=') eq = 1; 1521 } else if (op[0] == '<') { 1522 lt = 1; 1523 if (op[1] == '=') eq = 1; 1524 } else if (op[0] == '=') { 1525 eq = 1; 1526 } else if (op[0] == '^') { 1527 first = 1; 1528 } else if (op[0] == '$') { 1529 last = 1; 1530 } else { 1531 errno = 0; 1532 return 0; /* Error. */ 1533 } 1534 1535 /* If there are no elements, set the EOF condition immediately and 1536 * return. */ 1537 if (it->rt->numele == 0) { 1538 it->flags |= RAX_ITER_EOF; 1539 return 1; 1540 } 1541 1542 if (first) { 1543 /* Seeking the first key greater or equal to the empty string 1544 * is equivalent to seeking the smaller key available. */ 1545 return raxSeek(it,">=",NULL,0); 1546 } 1547 1548 if (last) { 1549 /* Find the greatest key taking always the last child till a 1550 * final node is found. */ 1551 it->node = it->rt->head; 1552 if (!raxSeekGreatest(it)) return 0; 1553 assert(it->node->iskey); 1554 it->data = raxGetData(it->node); 1555 return 1; 1556 } 1557 1558 /* We need to seek the specified key. What we do here is to actually 1559 * perform a lookup, and later invoke the prev/next key code that 1560 * we already use for iteration. */ 1561 int splitpos = 0; 1562 size_t i = raxLowWalk(it->rt,ele,len,&it->node,NULL,&splitpos,&it->stack); 1563 1564 /* Return OOM on incomplete stack info. */ 1565 if (it->stack.oom) return 0; 1566 1567 if (eq && i == len && (!it->node->iscompr || splitpos == 0) && 1568 it->node->iskey) 1569 { 1570 /* We found our node, since the key matches and we have an 1571 * "equal" condition. */ 1572 if (!raxIteratorAddChars(it,ele,len)) return 0; /* OOM. */ 1573 it->data = raxGetData(it->node); 1574 } else if (lt || gt) { 1575 /* Exact key not found or eq flag not set. We have to set as current 1576 * key the one represented by the node we stopped at, and perform 1577 * a next/prev operation to seek. To reconstruct the key at this node 1578 * we start from the parent and go to the current node, accumulating 1579 * the characters found along the way. */ 1580 if (!raxStackPush(&it->stack,it->node)) return 0; 1581 for (size_t j = 1; j < it->stack.items; j++) { 1582 raxNode *parent = it->stack.stack[j-1]; 1583 raxNode *child = it->stack.stack[j]; 1584 if (parent->iscompr) { 1585 if (!raxIteratorAddChars(it,parent->data,parent->size)) 1586 return 0; 1587 } else { 1588 raxNode **cp = raxNodeFirstChildPtr(parent); 1589 unsigned char *p = parent->data; 1590 while(1) { 1591 raxNode *aux; 1592 memcpy(&aux,cp,sizeof(aux)); 1593 if (aux == child) break; 1594 cp++; 1595 p++; 1596 } 1597 if (!raxIteratorAddChars(it,p,1)) return 0; 1598 } 1599 } 1600 raxStackPop(&it->stack); 1601 1602 /* We need to set the iterator in the correct state to call next/prev 1603 * step in order to seek the desired element. */ 1604 debugf("After initial seek: i=%d len=%d key=%.*s\n", 1605 (int)i, (int)len, (int)it->key_len, it->key); 1606 if (i != len && !it->node->iscompr) { 1607 /* If we stopped in the middle of a normal node because of a 1608 * mismatch, add the mismatching character to the current key 1609 * and call the iterator with the 'noup' flag so that it will try 1610 * to seek the next/prev child in the current node directly based 1611 * on the mismatching character. */ 1612 if (!raxIteratorAddChars(it,ele+i,1)) return 0; 1613 debugf("Seek normal node on mismatch: %.*s\n", 1614 (int)it->key_len, (char*)it->key); 1615 1616 it->flags &= ~RAX_ITER_JUST_SEEKED; 1617 if (lt && !raxIteratorPrevStep(it,1)) return 0; 1618 if (gt && !raxIteratorNextStep(it,1)) return 0; 1619 it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */ 1620 } else if (i != len && it->node->iscompr) { 1621 debugf("Compressed mismatch: %.*s\n", 1622 (int)it->key_len, (char*)it->key); 1623 /* In case of a mismatch within a compressed node. */ 1624 int nodechar = it->node->data[splitpos]; 1625 int keychar = ele[i]; 1626 it->flags &= ~RAX_ITER_JUST_SEEKED; 1627 if (gt) { 1628 /* If the key the compressed node represents is greater 1629 * than our seek element, continue forward, otherwise set the 1630 * state in order to go back to the next sub-tree. */ 1631 if (nodechar > keychar) { 1632 if (!raxIteratorNextStep(it,0)) return 0; 1633 } else { 1634 if (!raxIteratorAddChars(it,it->node->data,it->node->size)) 1635 return 0; 1636 if (!raxIteratorNextStep(it,1)) return 0; 1637 } 1638 } 1639 if (lt) { 1640 /* If the key the compressed node represents is smaller 1641 * than our seek element, seek the greater key in this 1642 * subtree, otherwise set the state in order to go back to 1643 * the previous sub-tree. */ 1644 if (nodechar < keychar) { 1645 if (!raxSeekGreatest(it)) return 0; 1646 it->data = raxGetData(it->node); 1647 } else { 1648 if (!raxIteratorAddChars(it,it->node->data,it->node->size)) 1649 return 0; 1650 if (!raxIteratorPrevStep(it,1)) return 0; 1651 } 1652 } 1653 it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */ 1654 } else { 1655 debugf("No mismatch: %.*s\n", 1656 (int)it->key_len, (char*)it->key); 1657 /* If there was no mismatch we are into a node representing the 1658 * key, (but which is not a key or the seek operator does not 1659 * include 'eq'), or we stopped in the middle of a compressed node 1660 * after processing all the key. Continue iterating as this was 1661 * a legitimate key we stopped at. */ 1662 it->flags &= ~RAX_ITER_JUST_SEEKED; 1663 if (it->node->iscompr && it->node->iskey && splitpos && lt) { 1664 /* If we stopped in the middle of a compressed node with 1665 * perfect match, and the condition is to seek a key "<" than 1666 * the specified one, then if this node is a key it already 1667 * represents our match. For instance we may have nodes: 1668 * 1669 * "f" -> "oobar" = 1 -> "" = 2 1670 * 1671 * Representing keys "f" = 1, "foobar" = 2. A seek for 1672 * the key < "foo" will stop in the middle of the "oobar" 1673 * node, but will be our match, representing the key "f". 1674 * 1675 * So in that case, we don't seek backward. */ 1676 } else { 1677 if (gt && !raxIteratorNextStep(it,0)) return 0; 1678 if (lt && !raxIteratorPrevStep(it,0)) return 0; 1679 } 1680 it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */ 1681 } 1682 } else { 1683 /* If we are here just eq was set but no match was found. */ 1684 it->flags |= RAX_ITER_EOF; 1685 return 1; 1686 } 1687 return 1; 1688 } 1689 1690 /* Go to the next element in the scope of the iterator 'it'. 1691 * If EOF (or out of memory) is reached, 0 is returned, otherwise 1 is 1692 * returned. In case 0 is returned because of OOM, errno is set to ENOMEM. */ 1693 int raxNext(raxIterator *it) { 1694 if (!raxIteratorNextStep(it,0)) { 1695 errno = ENOMEM; 1696 return 0; 1697 } 1698 if (it->flags & RAX_ITER_EOF) { 1699 errno = 0; 1700 return 0; 1701 } 1702 return 1; 1703 } 1704 1705 /* Go to the previous element in the scope of the iterator 'it'. 1706 * If EOF (or out of memory) is reached, 0 is returned, otherwise 1 is 1707 * returned. In case 0 is returned because of OOM, errno is set to ENOMEM. */ 1708 int raxPrev(raxIterator *it) { 1709 if (!raxIteratorPrevStep(it,0)) { 1710 errno = ENOMEM; 1711 return 0; 1712 } 1713 if (it->flags & RAX_ITER_EOF) { 1714 errno = 0; 1715 return 0; 1716 } 1717 return 1; 1718 } 1719 1720 /* Perform a random walk starting in the current position of the iterator. 1721 * Return 0 if the tree is empty or on out of memory. Otherwise 1 is returned 1722 * and the iterator is set to the node reached after doing a random walk 1723 * of 'steps' steps. If the 'steps' argument is 0, the random walk is performed 1724 * using a random number of steps between 1 and two times the logarithm of 1725 * the number of elements. 1726 * 1727 * NOTE: if you use this function to generate random elements from the radix 1728 * tree, expect a disappointing distribution. A random walk produces good 1729 * random elements if the tree is not sparse, however in the case of a radix 1730 * tree certain keys will be reported much more often than others. At least 1731 * this function should be able to expore every possible element eventually. */ 1732 int raxRandomWalk(raxIterator *it, size_t steps) { 1733 if (it->rt->numele == 0) { 1734 it->flags |= RAX_ITER_EOF; 1735 return 0; 1736 } 1737 1738 if (steps == 0) { 1739 size_t fle = floor(log(it->rt->numele)); 1740 fle *= 2; 1741 steps = 1 + rand() % fle; 1742 } 1743 1744 raxNode *n = it->node; 1745 while(steps > 0 || !n->iskey) { 1746 int numchildren = n->iscompr ? 1 : n->size; 1747 int r = rand() % (numchildren+(n != it->rt->head)); 1748 1749 if (r == numchildren) { 1750 /* Go up to parent. */ 1751 n = raxStackPop(&it->stack); 1752 int todel = n->iscompr ? n->size : 1; 1753 raxIteratorDelChars(it,todel); 1754 } else { 1755 /* Select a random child. */ 1756 if (n->iscompr) { 1757 if (!raxIteratorAddChars(it,n->data,n->size)) return 0; 1758 } else { 1759 if (!raxIteratorAddChars(it,n->data+r,1)) return 0; 1760 } 1761 raxNode **cp = raxNodeFirstChildPtr(n)+r; 1762 if (!raxStackPush(&it->stack,n)) return 0; 1763 memcpy(&n,cp,sizeof(n)); 1764 } 1765 if (n->iskey) steps--; 1766 } 1767 it->node = n; 1768 return 1; 1769 } 1770 1771 /* Compare the key currently pointed by the iterator to the specified 1772 * key according to the specified operator. Returns 1 if the comparison is 1773 * true, otherwise 0 is returned. */ 1774 int raxCompare(raxIterator *iter, const char *op, unsigned char *key, size_t key_len) { 1775 int eq = 0, lt = 0, gt = 0; 1776 1777 if (op[0] == '=' || op[1] == '=') eq = 1; 1778 if (op[0] == '>') gt = 1; 1779 else if (op[0] == '<') lt = 1; 1780 else if (op[1] != '=') return 0; /* Syntax error. */ 1781 1782 size_t minlen = key_len < iter->key_len ? key_len : iter->key_len; 1783 int cmp = memcmp(iter->key,key,minlen); 1784 1785 /* Handle == */ 1786 if (lt == 0 && gt == 0) return cmp == 0 && key_len == iter->key_len; 1787 1788 /* Handle >, >=, <, <= */ 1789 if (cmp == 0) { 1790 /* Same prefix: longer wins. */ 1791 if (eq && key_len == iter->key_len) return 1; 1792 else if (lt) return iter->key_len < key_len; 1793 else if (gt) return iter->key_len > key_len; 1794 } if (cmp > 0) { 1795 return gt ? 1 : 0; 1796 } else /* (cmp < 0) */ { 1797 return lt ? 1 : 0; 1798 } 1799 } 1800 1801 /* Free the iterator. */ 1802 void raxStop(raxIterator *it) { 1803 if (it->key != it->key_static_string) rax_free(it->key); 1804 raxStackFree(&it->stack); 1805 } 1806 1807 /* Return if the iterator is in an EOF state. This happens when raxSeek() 1808 * failed to seek an appropriate element, so that raxNext() or raxPrev() 1809 * will return zero, or when an EOF condition was reached while iterating 1810 * with raxNext() and raxPrev(). */ 1811 int raxEOF(raxIterator *it) { 1812 return it->flags & RAX_ITER_EOF; 1813 } 1814 1815 /* Return the number of elements inside the radix tree. */ 1816 uint64_t raxSize(rax *rax) { 1817 return rax->numele; 1818 } 1819 1820 /* ----------------------------- Introspection ------------------------------ */ 1821 1822 /* This function is mostly used for debugging and learning purposes. 1823 * It shows an ASCII representation of a tree on standard output, outling 1824 * all the nodes and the contained keys. 1825 * 1826 * The representation is as follow: 1827 * 1828 * "foobar" (compressed node) 1829 * [abc] (normal node with three children) 1830 * [abc]=0x12345678 (node is a key, pointing to value 0x12345678) 1831 * [] (a normal empty node) 1832 * 1833 * Children are represented in new idented lines, each children prefixed by 1834 * the "`-(x)" string, where "x" is the edge byte. 1835 * 1836 * [abc] 1837 * `-(a) "ladin" 1838 * `-(b) [kj] 1839 * `-(c) [] 1840 * 1841 * However when a node has a single child the following representation 1842 * is used instead: 1843 * 1844 * [abc] -> "ladin" -> [] 1845 */ 1846 1847 /* The actual implementation of raxShow(). */ 1848 void raxRecursiveShow(int level, int lpad, raxNode *n) { 1849 char s = n->iscompr ? '"' : '['; 1850 char e = n->iscompr ? '"' : ']'; 1851 1852 int numchars = printf("%c%.*s%c", s, n->size, n->data, e); 1853 if (n->iskey) { 1854 numchars += printf("=%p",raxGetData(n)); 1855 } 1856 1857 int numchildren = n->iscompr ? 1 : n->size; 1858 /* Note that 7 and 4 magic constants are the string length 1859 * of " `-(x) " and " -> " respectively. */ 1860 if (level) { 1861 lpad += (numchildren > 1) ? 7 : 4; 1862 if (numchildren == 1) lpad += numchars; 1863 } 1864 raxNode **cp = raxNodeFirstChildPtr(n); 1865 for (int i = 0; i < numchildren; i++) { 1866 char *branch = " `-(%c) "; 1867 if (numchildren > 1) { 1868 printf("\n"); 1869 for (int j = 0; j < lpad; j++) putchar(' '); 1870 printf(branch,n->data[i]); 1871 } else { 1872 printf(" -> "); 1873 } 1874 raxNode *child; 1875 memcpy(&child,cp,sizeof(child)); 1876 raxRecursiveShow(level+1,lpad,child); 1877 cp++; 1878 } 1879 } 1880 1881 /* Show a tree, as outlined in the comment above. */ 1882 void raxShow(rax *rax) { 1883 raxRecursiveShow(0,0,rax->head); 1884 putchar('\n'); 1885 } 1886 1887 /* Used by debugnode() macro to show info about a given node. */ 1888 void raxDebugShowNode(const char *msg, raxNode *n) { 1889 if (raxDebugMsg == 0) return; 1890 printf("%s: %p [%.*s] key:%d size:%d children:", 1891 msg, (void*)n, (int)n->size, (char*)n->data, n->iskey, n->size); 1892 int numcld = n->iscompr ? 1 : n->size; 1893 raxNode **cldptr = raxNodeLastChildPtr(n) - (numcld-1); 1894 while(numcld--) { 1895 raxNode *child; 1896 memcpy(&child,cldptr,sizeof(child)); 1897 cldptr++; 1898 printf("%p ", (void*)child); 1899 } 1900 printf("\n"); 1901 fflush(stdout); 1902 } 1903 1904 /* Touch all the nodes of a tree returning a check sum. This is useful 1905 * in order to make Valgrind detect if there is something wrong while 1906 * reading the data structure. 1907 * 1908 * This function was used in order to identify Rax bugs after a big refactoring 1909 * using this technique: 1910 * 1911 * 1. The rax-test is executed using Valgrind, adding a printf() so that for 1912 * the fuzz tester we see what iteration in the loop we are in. 1913 * 2. After every modification of the radix tree made by the fuzz tester 1914 * in rax-test.c, we add a call to raxTouch(). 1915 * 3. Now as soon as an operation will corrupt the tree, raxTouch() will 1916 * detect it (via Valgrind) immediately. We can add more calls to narrow 1917 * the state. 1918 * 4. At this point a good idea is to enable Rax debugging messages immediately 1919 * before the moment the tree is corrupted, to see what happens. 1920 */ 1921 unsigned long raxTouch(raxNode *n) { 1922 debugf("Touching %p\n", (void*)n); 1923 unsigned long sum = 0; 1924 if (n->iskey) { 1925 sum += (unsigned long)raxGetData(n); 1926 } 1927 1928 int numchildren = n->iscompr ? 1 : n->size; 1929 raxNode **cp = raxNodeFirstChildPtr(n); 1930 int count = 0; 1931 for (int i = 0; i < numchildren; i++) { 1932 if (numchildren > 1) { 1933 sum += (long)n->data[i]; 1934 } 1935 raxNode *child; 1936 memcpy(&child,cp,sizeof(child)); 1937 if (child == (void*)0x65d1760) count++; 1938 if (count > 1) exit(1); 1939 sum += raxTouch(child); 1940 cp++; 1941 } 1942 return sum; 1943 } 1944