1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
28 * Copyright (c) 2015 by Delphix. All rights reserved.
29 */
30
31 /*
32 * AVL - generic AVL tree implementation for kernel use
33 *
34 * A complete description of AVL trees can be found in many CS textbooks.
35 *
36 * Here is a very brief overview. An AVL tree is a binary search tree that is
37 * almost perfectly balanced. By "almost" perfectly balanced, we mean that at
38 * any given node, the left and right subtrees are allowed to differ in height
39 * by at most 1 level.
40 *
41 * This relaxation from a perfectly balanced binary tree allows doing
42 * insertion and deletion relatively efficiently. Searching the tree is
43 * still a fast operation, roughly O(log(N)).
44 *
45 * The key to insertion and deletion is a set of tree manipulations called
46 * rotations, which bring unbalanced subtrees back into the semi-balanced state.
47 *
48 * This implementation of AVL trees has the following peculiarities:
49 *
50 * - The AVL specific data structures are physically embedded as fields
51 * in the "using" data structures. To maintain generality the code
52 * must constantly translate between "avl_node_t *" and containing
53 * data structure "void *"s by adding/subtracting the avl_offset.
54 *
55 * - Since the AVL data is always embedded in other structures, there is
56 * no locking or memory allocation in the AVL routines. This must be
57 * provided for by the enclosing data structure's semantics. Typically,
58 * avl_insert()/_add()/_remove()/avl_insert_here() require some kind of
59 * exclusive write lock. Other operations require a read lock.
60 *
61 * - The implementation uses iteration instead of explicit recursion,
62 * since it is intended to run on limited size kernel stacks. Since
63 * there is no recursion stack present to move "up" in the tree,
64 * there is an explicit "parent" link in the avl_node_t.
65 *
66 * - The left/right children pointers of a node are in an array.
67 * In the code, variables (instead of constants) are used to represent
68 * left and right indices. The implementation is written as if it only
69 * dealt with left handed manipulations. By changing the value assigned
70 * to "left", the code also works for right handed trees. The
71 * following variables/terms are frequently used:
72 *
73 * int left; // 0 when dealing with left children,
74 * // 1 for dealing with right children
75 *
76 * int left_heavy; // -1 when left subtree is taller at some node,
77 * // +1 when right subtree is taller
78 *
79 * int right; // will be the opposite of left (0 or 1)
80 * int right_heavy;// will be the opposite of left_heavy (-1 or 1)
81 *
82 * int direction; // 0 for "<" (ie. left child); 1 for ">" (right)
83 *
84 * Though it is a little more confusing to read the code, the approach
85 * allows using half as much code (and hence cache footprint) for tree
86 * manipulations and eliminates many conditional branches.
87 *
88 * - The avl_index_t is an opaque "cookie" used to find nodes at or
89 * adjacent to where a new value would be inserted in the tree. The value
90 * is a modified "avl_node_t *". The bottom bit (normally 0 for a
91 * pointer) is set to indicate if that the new node has a value greater
92 * than the value of the indicated "avl_node_t *".
93 *
94 * Note - in addition to userland (e.g. libavl and libutil) and the kernel
95 * (e.g. genunix), avl.c is compiled into ld.so and kmdb's genunix module,
96 * which each have their own compilation environments and subsequent
97 * requirements. Each of these environments must be considered when adding
98 * dependencies from avl.c.
99 *
100 * Link to Illumos.org for more information on avl function:
101 * [1] https://illumos.org/man/9f/avl
102 */
103
104 #include <sys/types.h>
105 #include <sys/param.h>
106 #include <sys/debug.h>
107 #include <sys/avl.h>
108 #include <sys/cmn_err.h>
109 #include <sys/mod.h>
110
111 /*
112 * Small arrays to translate between balance (or diff) values and child indices.
113 *
114 * Code that deals with binary tree data structures will randomly use
115 * left and right children when examining a tree. C "if()" statements
116 * which evaluate randomly suffer from very poor hardware branch prediction.
117 * In this code we avoid some of the branch mispredictions by using the
118 * following translation arrays. They replace random branches with an
119 * additional memory reference. Since the translation arrays are both very
120 * small the data should remain efficiently in cache.
121 */
122 static const int avl_child2balance[2] = {-1, 1};
123 static const int avl_balance2child[] = {0, 0, 1};
124
125
126 /*
127 * Walk from one node to the previous valued node (ie. an infix walk
128 * towards the left). At any given node we do one of 2 things:
129 *
130 * - If there is a left child, go to it, then to it's rightmost descendant.
131 *
132 * - otherwise we return through parent nodes until we've come from a right
133 * child.
134 *
135 * Return Value:
136 * NULL - if at the end of the nodes
137 * otherwise next node
138 */
139 void *
avl_walk(avl_tree_t * tree,void * oldnode,int left)140 avl_walk(avl_tree_t *tree, void *oldnode, int left)
141 {
142 size_t off = tree->avl_offset;
143 avl_node_t *node = AVL_DATA2NODE(oldnode, off);
144 int right = 1 - left;
145 int was_child;
146
147
148 /*
149 * nowhere to walk to if tree is empty
150 */
151 if (node == NULL)
152 return (NULL);
153
154 /*
155 * Visit the previous valued node. There are two possibilities:
156 *
157 * If this node has a left child, go down one left, then all
158 * the way right.
159 */
160 if (node->avl_child[left] != NULL) {
161 for (node = node->avl_child[left];
162 node->avl_child[right] != NULL;
163 node = node->avl_child[right])
164 ;
165 /*
166 * Otherwise, return through left children as far as we can.
167 */
168 } else {
169 for (;;) {
170 was_child = AVL_XCHILD(node);
171 node = AVL_XPARENT(node);
172 if (node == NULL)
173 return (NULL);
174 if (was_child == right)
175 break;
176 }
177 }
178
179 return (AVL_NODE2DATA(node, off));
180 }
181
182 /*
183 * Return the lowest valued node in a tree or NULL.
184 * (leftmost child from root of tree)
185 */
186 void *
avl_first(avl_tree_t * tree)187 avl_first(avl_tree_t *tree)
188 {
189 avl_node_t *node;
190 avl_node_t *prev = NULL;
191 size_t off = tree->avl_offset;
192
193 for (node = tree->avl_root; node != NULL; node = node->avl_child[0])
194 prev = node;
195
196 if (prev != NULL)
197 return (AVL_NODE2DATA(prev, off));
198 return (NULL);
199 }
200
201 /*
202 * Return the highest valued node in a tree or NULL.
203 * (rightmost child from root of tree)
204 */
205 void *
avl_last(avl_tree_t * tree)206 avl_last(avl_tree_t *tree)
207 {
208 avl_node_t *node;
209 avl_node_t *prev = NULL;
210 size_t off = tree->avl_offset;
211
212 for (node = tree->avl_root; node != NULL; node = node->avl_child[1])
213 prev = node;
214
215 if (prev != NULL)
216 return (AVL_NODE2DATA(prev, off));
217 return (NULL);
218 }
219
220 /*
221 * Access the node immediately before or after an insertion point.
222 *
223 * "avl_index_t" is a (avl_node_t *) with the bottom bit indicating a child
224 *
225 * Return value:
226 * NULL: no node in the given direction
227 * "void *" of the found tree node
228 */
229 void *
avl_nearest(avl_tree_t * tree,avl_index_t where,int direction)230 avl_nearest(avl_tree_t *tree, avl_index_t where, int direction)
231 {
232 int child = AVL_INDEX2CHILD(where);
233 avl_node_t *node = AVL_INDEX2NODE(where);
234 void *data;
235 size_t off = tree->avl_offset;
236
237 if (node == NULL) {
238 ASSERT(tree->avl_root == NULL);
239 return (NULL);
240 }
241 data = AVL_NODE2DATA(node, off);
242 if (child != direction)
243 return (data);
244
245 return (avl_walk(tree, data, direction));
246 }
247
248
249 /*
250 * Search for the node which contains "value". The algorithm is a
251 * simple binary tree search.
252 *
253 * return value:
254 * NULL: the value is not in the AVL tree
255 * *where (if not NULL) is set to indicate the insertion point
256 * "void *" of the found tree node
257 */
258 void *
avl_find(avl_tree_t * tree,const void * value,avl_index_t * where)259 avl_find(avl_tree_t *tree, const void *value, avl_index_t *where)
260 {
261 avl_node_t *node;
262 avl_node_t *prev = NULL;
263 int child = 0;
264 int diff;
265 size_t off = tree->avl_offset;
266
267 for (node = tree->avl_root; node != NULL;
268 node = node->avl_child[child]) {
269
270 prev = node;
271
272 diff = tree->avl_compar(value, AVL_NODE2DATA(node, off));
273 ASSERT(-1 <= diff && diff <= 1);
274 if (diff == 0) {
275 #ifdef ZFS_DEBUG
276 if (where != NULL)
277 *where = 0;
278 #endif
279 return (AVL_NODE2DATA(node, off));
280 }
281 child = avl_balance2child[1 + diff];
282
283 }
284
285 if (where != NULL)
286 *where = AVL_MKINDEX(prev, child);
287
288 return (NULL);
289 }
290
291
292 /*
293 * Perform a rotation to restore balance at the subtree given by depth.
294 *
295 * This routine is used by both insertion and deletion. The return value
296 * indicates:
297 * 0 : subtree did not change height
298 * !0 : subtree was reduced in height
299 *
300 * The code is written as if handling left rotations, right rotations are
301 * symmetric and handled by swapping values of variables right/left[_heavy]
302 *
303 * On input balance is the "new" balance at "node". This value is either
304 * -2 or +2.
305 */
306 static int
avl_rotation(avl_tree_t * tree,avl_node_t * node,int balance)307 avl_rotation(avl_tree_t *tree, avl_node_t *node, int balance)
308 {
309 int left = !(balance < 0); /* when balance = -2, left will be 0 */
310 int right = 1 - left;
311 int left_heavy = balance >> 1;
312 int right_heavy = -left_heavy;
313 avl_node_t *parent = AVL_XPARENT(node);
314 avl_node_t *child = node->avl_child[left];
315 avl_node_t *cright;
316 avl_node_t *gchild;
317 avl_node_t *gright;
318 avl_node_t *gleft;
319 int which_child = AVL_XCHILD(node);
320 int child_bal = AVL_XBALANCE(child);
321
322 /* BEGIN CSTYLED */
323 /*
324 * case 1 : node is overly left heavy, the left child is balanced or
325 * also left heavy. This requires the following rotation.
326 *
327 * (node bal:-2)
328 * / \
329 * / \
330 * (child bal:0 or -1)
331 * / \
332 * / \
333 * cright
334 *
335 * becomes:
336 *
337 * (child bal:1 or 0)
338 * / \
339 * / \
340 * (node bal:-1 or 0)
341 * / \
342 * / \
343 * cright
344 *
345 * we detect this situation by noting that child's balance is not
346 * right_heavy.
347 */
348 /* END CSTYLED */
349 if (child_bal != right_heavy) {
350
351 /*
352 * compute new balance of nodes
353 *
354 * If child used to be left heavy (now balanced) we reduced
355 * the height of this sub-tree -- used in "return...;" below
356 */
357 child_bal += right_heavy; /* adjust towards right */
358
359 /*
360 * move "cright" to be node's left child
361 */
362 cright = child->avl_child[right];
363 node->avl_child[left] = cright;
364 if (cright != NULL) {
365 AVL_SETPARENT(cright, node);
366 AVL_SETCHILD(cright, left);
367 }
368
369 /*
370 * move node to be child's right child
371 */
372 child->avl_child[right] = node;
373 AVL_SETBALANCE(node, -child_bal);
374 AVL_SETCHILD(node, right);
375 AVL_SETPARENT(node, child);
376
377 /*
378 * update the pointer into this subtree
379 */
380 AVL_SETBALANCE(child, child_bal);
381 AVL_SETCHILD(child, which_child);
382 AVL_SETPARENT(child, parent);
383 if (parent != NULL)
384 parent->avl_child[which_child] = child;
385 else
386 tree->avl_root = child;
387
388 return (child_bal == 0);
389 }
390
391 /* BEGIN CSTYLED */
392 /*
393 * case 2 : When node is left heavy, but child is right heavy we use
394 * a different rotation.
395 *
396 * (node b:-2)
397 * / \
398 * / \
399 * / \
400 * (child b:+1)
401 * / \
402 * / \
403 * (gchild b: != 0)
404 * / \
405 * / \
406 * gleft gright
407 *
408 * becomes:
409 *
410 * (gchild b:0)
411 * / \
412 * / \
413 * / \
414 * (child b:?) (node b:?)
415 * / \ / \
416 * / \ / \
417 * gleft gright
418 *
419 * computing the new balances is more complicated. As an example:
420 * if gchild was right_heavy, then child is now left heavy
421 * else it is balanced
422 */
423 /* END CSTYLED */
424 gchild = child->avl_child[right];
425 gleft = gchild->avl_child[left];
426 gright = gchild->avl_child[right];
427
428 /*
429 * move gright to left child of node and
430 *
431 * move gleft to right child of node
432 */
433 node->avl_child[left] = gright;
434 if (gright != NULL) {
435 AVL_SETPARENT(gright, node);
436 AVL_SETCHILD(gright, left);
437 }
438
439 child->avl_child[right] = gleft;
440 if (gleft != NULL) {
441 AVL_SETPARENT(gleft, child);
442 AVL_SETCHILD(gleft, right);
443 }
444
445 /*
446 * move child to left child of gchild and
447 *
448 * move node to right child of gchild and
449 *
450 * fixup parent of all this to point to gchild
451 */
452 balance = AVL_XBALANCE(gchild);
453 gchild->avl_child[left] = child;
454 AVL_SETBALANCE(child, (balance == right_heavy ? left_heavy : 0));
455 AVL_SETPARENT(child, gchild);
456 AVL_SETCHILD(child, left);
457
458 gchild->avl_child[right] = node;
459 AVL_SETBALANCE(node, (balance == left_heavy ? right_heavy : 0));
460 AVL_SETPARENT(node, gchild);
461 AVL_SETCHILD(node, right);
462
463 AVL_SETBALANCE(gchild, 0);
464 AVL_SETPARENT(gchild, parent);
465 AVL_SETCHILD(gchild, which_child);
466 if (parent != NULL)
467 parent->avl_child[which_child] = gchild;
468 else
469 tree->avl_root = gchild;
470
471 return (1); /* the new tree is always shorter */
472 }
473
474
475 /*
476 * Insert a new node into an AVL tree at the specified (from avl_find()) place.
477 *
478 * Newly inserted nodes are always leaf nodes in the tree, since avl_find()
479 * searches out to the leaf positions. The avl_index_t indicates the node
480 * which will be the parent of the new node.
481 *
482 * After the node is inserted, a single rotation further up the tree may
483 * be necessary to maintain an acceptable AVL balance.
484 */
485 void
avl_insert(avl_tree_t * tree,void * new_data,avl_index_t where)486 avl_insert(avl_tree_t *tree, void *new_data, avl_index_t where)
487 {
488 avl_node_t *node;
489 avl_node_t *parent = AVL_INDEX2NODE(where);
490 int old_balance;
491 int new_balance;
492 int which_child = AVL_INDEX2CHILD(where);
493 size_t off = tree->avl_offset;
494
495 ASSERT(tree);
496 #ifdef _LP64
497 ASSERT(((uintptr_t)new_data & 0x7) == 0);
498 #endif
499
500 node = AVL_DATA2NODE(new_data, off);
501
502 /*
503 * First, add the node to the tree at the indicated position.
504 */
505 ++tree->avl_numnodes;
506
507 node->avl_child[0] = NULL;
508 node->avl_child[1] = NULL;
509
510 AVL_SETCHILD(node, which_child);
511 AVL_SETBALANCE(node, 0);
512 AVL_SETPARENT(node, parent);
513 if (parent != NULL) {
514 ASSERT(parent->avl_child[which_child] == NULL);
515 parent->avl_child[which_child] = node;
516 } else {
517 ASSERT(tree->avl_root == NULL);
518 tree->avl_root = node;
519 }
520 /*
521 * Now, back up the tree modifying the balance of all nodes above the
522 * insertion point. If we get to a highly unbalanced ancestor, we
523 * need to do a rotation. If we back out of the tree we are done.
524 * If we brought any subtree into perfect balance (0), we are also done.
525 */
526 for (;;) {
527 node = parent;
528 if (node == NULL)
529 return;
530
531 /*
532 * Compute the new balance
533 */
534 old_balance = AVL_XBALANCE(node);
535 new_balance = old_balance + avl_child2balance[which_child];
536
537 /*
538 * If we introduced equal balance, then we are done immediately
539 */
540 if (new_balance == 0) {
541 AVL_SETBALANCE(node, 0);
542 return;
543 }
544
545 /*
546 * If both old and new are not zero we went
547 * from -1 to -2 balance, do a rotation.
548 */
549 if (old_balance != 0)
550 break;
551
552 AVL_SETBALANCE(node, new_balance);
553 parent = AVL_XPARENT(node);
554 which_child = AVL_XCHILD(node);
555 }
556
557 /*
558 * perform a rotation to fix the tree and return
559 */
560 (void) avl_rotation(tree, node, new_balance);
561 }
562
563 /*
564 * Insert "new_data" in "tree" in the given "direction" either after or
565 * before (AVL_AFTER, AVL_BEFORE) the data "here".
566 *
567 * Insertions can only be done at empty leaf points in the tree, therefore
568 * if the given child of the node is already present we move to either
569 * the AVL_PREV or AVL_NEXT and reverse the insertion direction. Since
570 * every other node in the tree is a leaf, this always works.
571 *
572 * To help developers using this interface, we assert that the new node
573 * is correctly ordered at every step of the way in DEBUG kernels.
574 */
575 void
avl_insert_here(avl_tree_t * tree,void * new_data,void * here,int direction)576 avl_insert_here(
577 avl_tree_t *tree,
578 void *new_data,
579 void *here,
580 int direction)
581 {
582 avl_node_t *node;
583 int child = direction; /* rely on AVL_BEFORE == 0, AVL_AFTER == 1 */
584 #ifdef ZFS_DEBUG
585 int diff;
586 #endif
587
588 ASSERT(tree != NULL);
589 ASSERT(new_data != NULL);
590 ASSERT(here != NULL);
591 ASSERT(direction == AVL_BEFORE || direction == AVL_AFTER);
592
593 /*
594 * If corresponding child of node is not NULL, go to the neighboring
595 * node and reverse the insertion direction.
596 */
597 node = AVL_DATA2NODE(here, tree->avl_offset);
598
599 #ifdef ZFS_DEBUG
600 diff = tree->avl_compar(new_data, here);
601 ASSERT(-1 <= diff && diff <= 1);
602 ASSERT(diff != 0);
603 ASSERT(diff > 0 ? child == 1 : child == 0);
604 #endif
605
606 if (node->avl_child[child] != NULL) {
607 node = node->avl_child[child];
608 child = 1 - child;
609 while (node->avl_child[child] != NULL) {
610 #ifdef ZFS_DEBUG
611 diff = tree->avl_compar(new_data,
612 AVL_NODE2DATA(node, tree->avl_offset));
613 ASSERT(-1 <= diff && diff <= 1);
614 ASSERT(diff != 0);
615 ASSERT(diff > 0 ? child == 1 : child == 0);
616 #endif
617 node = node->avl_child[child];
618 }
619 #ifdef ZFS_DEBUG
620 diff = tree->avl_compar(new_data,
621 AVL_NODE2DATA(node, tree->avl_offset));
622 ASSERT(-1 <= diff && diff <= 1);
623 ASSERT(diff != 0);
624 ASSERT(diff > 0 ? child == 1 : child == 0);
625 #endif
626 }
627 ASSERT(node->avl_child[child] == NULL);
628
629 avl_insert(tree, new_data, AVL_MKINDEX(node, child));
630 }
631
632 /*
633 * Add a new node to an AVL tree. Strictly enforce that no duplicates can
634 * be added to the tree with a VERIFY which is enabled for non-DEBUG builds.
635 */
636 void
avl_add(avl_tree_t * tree,void * new_node)637 avl_add(avl_tree_t *tree, void *new_node)
638 {
639 avl_index_t where = 0;
640
641 VERIFY(avl_find(tree, new_node, &where) == NULL);
642
643 avl_insert(tree, new_node, where);
644 }
645
646 /*
647 * Delete a node from the AVL tree. Deletion is similar to insertion, but
648 * with 2 complications.
649 *
650 * First, we may be deleting an interior node. Consider the following subtree:
651 *
652 * d c c
653 * / \ / \ / \
654 * b e b e b e
655 * / \ / \ /
656 * a c a a
657 *
658 * When we are deleting node (d), we find and bring up an adjacent valued leaf
659 * node, say (c), to take the interior node's place. In the code this is
660 * handled by temporarily swapping (d) and (c) in the tree and then using
661 * common code to delete (d) from the leaf position.
662 *
663 * Secondly, an interior deletion from a deep tree may require more than one
664 * rotation to fix the balance. This is handled by moving up the tree through
665 * parents and applying rotations as needed. The return value from
666 * avl_rotation() is used to detect when a subtree did not change overall
667 * height due to a rotation.
668 */
669 void
avl_remove(avl_tree_t * tree,void * data)670 avl_remove(avl_tree_t *tree, void *data)
671 {
672 avl_node_t *delete;
673 avl_node_t *parent;
674 avl_node_t *node;
675 avl_node_t tmp;
676 int old_balance;
677 int new_balance;
678 int left;
679 int right;
680 int which_child;
681 size_t off = tree->avl_offset;
682
683 ASSERT(tree);
684
685 delete = AVL_DATA2NODE(data, off);
686
687 /*
688 * Deletion is easiest with a node that has at most 1 child.
689 * We swap a node with 2 children with a sequentially valued
690 * neighbor node. That node will have at most 1 child. Note this
691 * has no effect on the ordering of the remaining nodes.
692 *
693 * As an optimization, we choose the greater neighbor if the tree
694 * is right heavy, otherwise the left neighbor. This reduces the
695 * number of rotations needed.
696 */
697 if (delete->avl_child[0] != NULL && delete->avl_child[1] != NULL) {
698
699 /*
700 * choose node to swap from whichever side is taller
701 */
702 old_balance = AVL_XBALANCE(delete);
703 left = avl_balance2child[old_balance + 1];
704 right = 1 - left;
705
706 /*
707 * get to the previous value'd node
708 * (down 1 left, as far as possible right)
709 */
710 for (node = delete->avl_child[left];
711 node->avl_child[right] != NULL;
712 node = node->avl_child[right])
713 ;
714
715 /*
716 * create a temp placeholder for 'node'
717 * move 'node' to delete's spot in the tree
718 */
719 tmp = *node;
720
721 *node = *delete;
722 if (node->avl_child[left] == node)
723 node->avl_child[left] = &tmp;
724
725 parent = AVL_XPARENT(node);
726 if (parent != NULL)
727 parent->avl_child[AVL_XCHILD(node)] = node;
728 else
729 tree->avl_root = node;
730 AVL_SETPARENT(node->avl_child[left], node);
731 AVL_SETPARENT(node->avl_child[right], node);
732
733 /*
734 * Put tmp where node used to be (just temporary).
735 * It always has a parent and at most 1 child.
736 */
737 delete = &tmp;
738 parent = AVL_XPARENT(delete);
739 parent->avl_child[AVL_XCHILD(delete)] = delete;
740 which_child = (delete->avl_child[1] != 0);
741 if (delete->avl_child[which_child] != NULL)
742 AVL_SETPARENT(delete->avl_child[which_child], delete);
743 }
744
745
746 /*
747 * Here we know "delete" is at least partially a leaf node. It can
748 * be easily removed from the tree.
749 */
750 ASSERT(tree->avl_numnodes > 0);
751 --tree->avl_numnodes;
752 parent = AVL_XPARENT(delete);
753 which_child = AVL_XCHILD(delete);
754 if (delete->avl_child[0] != NULL)
755 node = delete->avl_child[0];
756 else
757 node = delete->avl_child[1];
758
759 /*
760 * Connect parent directly to node (leaving out delete).
761 */
762 if (node != NULL) {
763 AVL_SETPARENT(node, parent);
764 AVL_SETCHILD(node, which_child);
765 }
766 if (parent == NULL) {
767 tree->avl_root = node;
768 return;
769 }
770 parent->avl_child[which_child] = node;
771
772
773 /*
774 * Since the subtree is now shorter, begin adjusting parent balances
775 * and performing any needed rotations.
776 */
777 do {
778
779 /*
780 * Move up the tree and adjust the balance
781 *
782 * Capture the parent and which_child values for the next
783 * iteration before any rotations occur.
784 */
785 node = parent;
786 old_balance = AVL_XBALANCE(node);
787 new_balance = old_balance - avl_child2balance[which_child];
788 parent = AVL_XPARENT(node);
789 which_child = AVL_XCHILD(node);
790
791 /*
792 * If a node was in perfect balance but isn't anymore then
793 * we can stop, since the height didn't change above this point
794 * due to a deletion.
795 */
796 if (old_balance == 0) {
797 AVL_SETBALANCE(node, new_balance);
798 break;
799 }
800
801 /*
802 * If the new balance is zero, we don't need to rotate
803 * else
804 * need a rotation to fix the balance.
805 * If the rotation doesn't change the height
806 * of the sub-tree we have finished adjusting.
807 */
808 if (new_balance == 0)
809 AVL_SETBALANCE(node, new_balance);
810 else if (!avl_rotation(tree, node, new_balance))
811 break;
812 } while (parent != NULL);
813 }
814
815 #define AVL_REINSERT(tree, obj) \
816 avl_remove((tree), (obj)); \
817 avl_add((tree), (obj))
818
819 boolean_t
avl_update_lt(avl_tree_t * t,void * obj)820 avl_update_lt(avl_tree_t *t, void *obj)
821 {
822 void *neighbor;
823
824 ASSERT(((neighbor = AVL_NEXT(t, obj)) == NULL) ||
825 (t->avl_compar(obj, neighbor) <= 0));
826
827 neighbor = AVL_PREV(t, obj);
828 if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
829 AVL_REINSERT(t, obj);
830 return (B_TRUE);
831 }
832
833 return (B_FALSE);
834 }
835
836 boolean_t
avl_update_gt(avl_tree_t * t,void * obj)837 avl_update_gt(avl_tree_t *t, void *obj)
838 {
839 void *neighbor;
840
841 ASSERT(((neighbor = AVL_PREV(t, obj)) == NULL) ||
842 (t->avl_compar(obj, neighbor) >= 0));
843
844 neighbor = AVL_NEXT(t, obj);
845 if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
846 AVL_REINSERT(t, obj);
847 return (B_TRUE);
848 }
849
850 return (B_FALSE);
851 }
852
853 boolean_t
avl_update(avl_tree_t * t,void * obj)854 avl_update(avl_tree_t *t, void *obj)
855 {
856 void *neighbor;
857
858 neighbor = AVL_PREV(t, obj);
859 if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
860 AVL_REINSERT(t, obj);
861 return (B_TRUE);
862 }
863
864 neighbor = AVL_NEXT(t, obj);
865 if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
866 AVL_REINSERT(t, obj);
867 return (B_TRUE);
868 }
869
870 return (B_FALSE);
871 }
872
873 void
avl_swap(avl_tree_t * tree1,avl_tree_t * tree2)874 avl_swap(avl_tree_t *tree1, avl_tree_t *tree2)
875 {
876 avl_node_t *temp_node;
877 ulong_t temp_numnodes;
878
879 ASSERT3P(tree1->avl_compar, ==, tree2->avl_compar);
880 ASSERT3U(tree1->avl_offset, ==, tree2->avl_offset);
881 ASSERT3U(tree1->avl_size, ==, tree2->avl_size);
882
883 temp_node = tree1->avl_root;
884 temp_numnodes = tree1->avl_numnodes;
885 tree1->avl_root = tree2->avl_root;
886 tree1->avl_numnodes = tree2->avl_numnodes;
887 tree2->avl_root = temp_node;
888 tree2->avl_numnodes = temp_numnodes;
889 }
890
891 /*
892 * initialize a new AVL tree
893 */
894 void
avl_create(avl_tree_t * tree,int (* compar)(const void *,const void *),size_t size,size_t offset)895 avl_create(avl_tree_t *tree, int (*compar) (const void *, const void *),
896 size_t size, size_t offset)
897 {
898 ASSERT(tree);
899 ASSERT(compar);
900 ASSERT(size > 0);
901 ASSERT(size >= offset + sizeof (avl_node_t));
902 #ifdef _LP64
903 ASSERT((offset & 0x7) == 0);
904 #endif
905
906 tree->avl_compar = compar;
907 tree->avl_root = NULL;
908 tree->avl_numnodes = 0;
909 tree->avl_size = size;
910 tree->avl_offset = offset;
911 }
912
913 /*
914 * Delete a tree.
915 */
916 /* ARGSUSED */
917 void
avl_destroy(avl_tree_t * tree)918 avl_destroy(avl_tree_t *tree)
919 {
920 ASSERT(tree);
921 ASSERT(tree->avl_numnodes == 0);
922 ASSERT(tree->avl_root == NULL);
923 }
924
925
926 /*
927 * Return the number of nodes in an AVL tree.
928 */
929 ulong_t
avl_numnodes(avl_tree_t * tree)930 avl_numnodes(avl_tree_t *tree)
931 {
932 ASSERT(tree);
933 return (tree->avl_numnodes);
934 }
935
936 boolean_t
avl_is_empty(avl_tree_t * tree)937 avl_is_empty(avl_tree_t *tree)
938 {
939 ASSERT(tree);
940 return (tree->avl_numnodes == 0);
941 }
942
943 #define CHILDBIT (1L)
944
945 /*
946 * Post-order tree walk used to visit all tree nodes and destroy the tree
947 * in post order. This is used for removing all the nodes from a tree without
948 * paying any cost for rebalancing it.
949 *
950 * example:
951 *
952 * void *cookie = NULL;
953 * my_data_t *node;
954 *
955 * while ((node = avl_destroy_nodes(tree, &cookie)) != NULL)
956 * free(node);
957 * avl_destroy(tree);
958 *
959 * The cookie is really an avl_node_t to the current node's parent and
960 * an indication of which child you looked at last.
961 *
962 * On input, a cookie value of CHILDBIT indicates the tree is done.
963 */
964 void *
avl_destroy_nodes(avl_tree_t * tree,void ** cookie)965 avl_destroy_nodes(avl_tree_t *tree, void **cookie)
966 {
967 avl_node_t *node;
968 avl_node_t *parent;
969 int child;
970 void *first;
971 size_t off = tree->avl_offset;
972
973 /*
974 * Initial calls go to the first node or it's right descendant.
975 */
976 if (*cookie == NULL) {
977 first = avl_first(tree);
978
979 /*
980 * deal with an empty tree
981 */
982 if (first == NULL) {
983 *cookie = (void *)CHILDBIT;
984 return (NULL);
985 }
986
987 node = AVL_DATA2NODE(first, off);
988 parent = AVL_XPARENT(node);
989 goto check_right_side;
990 }
991
992 /*
993 * If there is no parent to return to we are done.
994 */
995 parent = (avl_node_t *)((uintptr_t)(*cookie) & ~CHILDBIT);
996 if (parent == NULL) {
997 if (tree->avl_root != NULL) {
998 ASSERT(tree->avl_numnodes == 1);
999 tree->avl_root = NULL;
1000 tree->avl_numnodes = 0;
1001 }
1002 return (NULL);
1003 }
1004
1005 /*
1006 * Remove the child pointer we just visited from the parent and tree.
1007 */
1008 child = (uintptr_t)(*cookie) & CHILDBIT;
1009 parent->avl_child[child] = NULL;
1010 ASSERT(tree->avl_numnodes > 1);
1011 --tree->avl_numnodes;
1012
1013 /*
1014 * If we just did a right child or there isn't one, go up to parent.
1015 */
1016 if (child == 1 || parent->avl_child[1] == NULL) {
1017 node = parent;
1018 parent = AVL_XPARENT(parent);
1019 goto done;
1020 }
1021
1022 /*
1023 * Do parent's right child, then leftmost descendent.
1024 */
1025 node = parent->avl_child[1];
1026 while (node->avl_child[0] != NULL) {
1027 parent = node;
1028 node = node->avl_child[0];
1029 }
1030
1031 /*
1032 * If here, we moved to a left child. It may have one
1033 * child on the right (when balance == +1).
1034 */
1035 check_right_side:
1036 if (node->avl_child[1] != NULL) {
1037 ASSERT(AVL_XBALANCE(node) == 1);
1038 parent = node;
1039 node = node->avl_child[1];
1040 ASSERT(node->avl_child[0] == NULL &&
1041 node->avl_child[1] == NULL);
1042 } else {
1043 ASSERT(AVL_XBALANCE(node) <= 0);
1044 }
1045
1046 done:
1047 if (parent == NULL) {
1048 *cookie = (void *)CHILDBIT;
1049 ASSERT(node == tree->avl_root);
1050 } else {
1051 *cookie = (void *)((uintptr_t)parent | AVL_XCHILD(node));
1052 }
1053
1054 return (AVL_NODE2DATA(node, off));
1055 }
1056
1057 #if defined(_KERNEL)
1058
1059 static int __init
avl_init(void)1060 avl_init(void)
1061 {
1062 return (0);
1063 }
1064
1065 static void __exit
avl_fini(void)1066 avl_fini(void)
1067 {
1068 }
1069
1070 module_init(avl_init);
1071 module_exit(avl_fini);
1072 #endif
1073
1074 ZFS_MODULE_DESCRIPTION("Generic AVL tree implementation");
1075 ZFS_MODULE_AUTHOR(ZFS_META_AUTHOR);
1076 ZFS_MODULE_LICENSE(ZFS_META_LICENSE);
1077 ZFS_MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
1078
1079 EXPORT_SYMBOL(avl_create);
1080 EXPORT_SYMBOL(avl_find);
1081 EXPORT_SYMBOL(avl_insert);
1082 EXPORT_SYMBOL(avl_insert_here);
1083 EXPORT_SYMBOL(avl_walk);
1084 EXPORT_SYMBOL(avl_first);
1085 EXPORT_SYMBOL(avl_last);
1086 EXPORT_SYMBOL(avl_nearest);
1087 EXPORT_SYMBOL(avl_add);
1088 EXPORT_SYMBOL(avl_swap);
1089 EXPORT_SYMBOL(avl_is_empty);
1090 EXPORT_SYMBOL(avl_remove);
1091 EXPORT_SYMBOL(avl_numnodes);
1092 EXPORT_SYMBOL(avl_destroy_nodes);
1093 EXPORT_SYMBOL(avl_destroy);
1094 EXPORT_SYMBOL(avl_update_lt);
1095 EXPORT_SYMBOL(avl_update_gt);
1096 EXPORT_SYMBOL(avl_update);
1097