1 /* Generic associative array implementation. 2 * 3 * See Documentation/assoc_array.txt for information. 4 * 5 * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. 6 * Written by David Howells ([email protected]) 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public Licence 10 * as published by the Free Software Foundation; either version 11 * 2 of the Licence, or (at your option) any later version. 12 */ 13 //#define DEBUG 14 #include <linux/slab.h> 15 #include <linux/assoc_array_priv.h> 16 17 /* 18 * Iterate over an associative array. The caller must hold the RCU read lock 19 * or better. 20 */ 21 static int assoc_array_subtree_iterate(const struct assoc_array_ptr *root, 22 const struct assoc_array_ptr *stop, 23 int (*iterator)(const void *leaf, 24 void *iterator_data), 25 void *iterator_data) 26 { 27 const struct assoc_array_shortcut *shortcut; 28 const struct assoc_array_node *node; 29 const struct assoc_array_ptr *cursor, *ptr, *parent; 30 unsigned long has_meta; 31 int slot, ret; 32 33 cursor = root; 34 35 begin_node: 36 if (assoc_array_ptr_is_shortcut(cursor)) { 37 /* Descend through a shortcut */ 38 shortcut = assoc_array_ptr_to_shortcut(cursor); 39 smp_read_barrier_depends(); 40 cursor = ACCESS_ONCE(shortcut->next_node); 41 } 42 43 node = assoc_array_ptr_to_node(cursor); 44 smp_read_barrier_depends(); 45 slot = 0; 46 47 /* We perform two passes of each node. 48 * 49 * The first pass does all the leaves in this node. This means we 50 * don't miss any leaves if the node is split up by insertion whilst 51 * we're iterating over the branches rooted here (we may, however, see 52 * some leaves twice). 53 */ 54 has_meta = 0; 55 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 56 ptr = ACCESS_ONCE(node->slots[slot]); 57 has_meta |= (unsigned long)ptr; 58 if (ptr && assoc_array_ptr_is_leaf(ptr)) { 59 /* We need a barrier between the read of the pointer 60 * and dereferencing the pointer - but only if we are 61 * actually going to dereference it. 62 */ 63 smp_read_barrier_depends(); 64 65 /* Invoke the callback */ 66 ret = iterator(assoc_array_ptr_to_leaf(ptr), 67 iterator_data); 68 if (ret) 69 return ret; 70 } 71 } 72 73 /* The second pass attends to all the metadata pointers. If we follow 74 * one of these we may find that we don't come back here, but rather go 75 * back to a replacement node with the leaves in a different layout. 76 * 77 * We are guaranteed to make progress, however, as the slot number for 78 * a particular portion of the key space cannot change - and we 79 * continue at the back pointer + 1. 80 */ 81 if (!(has_meta & ASSOC_ARRAY_PTR_META_TYPE)) 82 goto finished_node; 83 slot = 0; 84 85 continue_node: 86 node = assoc_array_ptr_to_node(cursor); 87 smp_read_barrier_depends(); 88 89 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 90 ptr = ACCESS_ONCE(node->slots[slot]); 91 if (assoc_array_ptr_is_meta(ptr)) { 92 cursor = ptr; 93 goto begin_node; 94 } 95 } 96 97 finished_node: 98 /* Move up to the parent (may need to skip back over a shortcut) */ 99 parent = ACCESS_ONCE(node->back_pointer); 100 slot = node->parent_slot; 101 if (parent == stop) 102 return 0; 103 104 if (assoc_array_ptr_is_shortcut(parent)) { 105 shortcut = assoc_array_ptr_to_shortcut(parent); 106 smp_read_barrier_depends(); 107 cursor = parent; 108 parent = ACCESS_ONCE(shortcut->back_pointer); 109 slot = shortcut->parent_slot; 110 if (parent == stop) 111 return 0; 112 } 113 114 /* Ascend to next slot in parent node */ 115 cursor = parent; 116 slot++; 117 goto continue_node; 118 } 119 120 /** 121 * assoc_array_iterate - Pass all objects in the array to a callback 122 * @array: The array to iterate over. 123 * @iterator: The callback function. 124 * @iterator_data: Private data for the callback function. 125 * 126 * Iterate over all the objects in an associative array. Each one will be 127 * presented to the iterator function. 128 * 129 * If the array is being modified concurrently with the iteration then it is 130 * possible that some objects in the array will be passed to the iterator 131 * callback more than once - though every object should be passed at least 132 * once. If this is undesirable then the caller must lock against modification 133 * for the duration of this function. 134 * 135 * The function will return 0 if no objects were in the array or else it will 136 * return the result of the last iterator function called. Iteration stops 137 * immediately if any call to the iteration function results in a non-zero 138 * return. 139 * 140 * The caller should hold the RCU read lock or better if concurrent 141 * modification is possible. 142 */ 143 int assoc_array_iterate(const struct assoc_array *array, 144 int (*iterator)(const void *object, 145 void *iterator_data), 146 void *iterator_data) 147 { 148 struct assoc_array_ptr *root = ACCESS_ONCE(array->root); 149 150 if (!root) 151 return 0; 152 return assoc_array_subtree_iterate(root, NULL, iterator, iterator_data); 153 } 154 155 enum assoc_array_walk_status { 156 assoc_array_walk_tree_empty, 157 assoc_array_walk_found_terminal_node, 158 assoc_array_walk_found_wrong_shortcut, 159 } status; 160 161 struct assoc_array_walk_result { 162 struct { 163 struct assoc_array_node *node; /* Node in which leaf might be found */ 164 int level; 165 int slot; 166 } terminal_node; 167 struct { 168 struct assoc_array_shortcut *shortcut; 169 int level; 170 int sc_level; 171 unsigned long sc_segments; 172 unsigned long dissimilarity; 173 } wrong_shortcut; 174 }; 175 176 /* 177 * Navigate through the internal tree looking for the closest node to the key. 178 */ 179 static enum assoc_array_walk_status 180 assoc_array_walk(const struct assoc_array *array, 181 const struct assoc_array_ops *ops, 182 const void *index_key, 183 struct assoc_array_walk_result *result) 184 { 185 struct assoc_array_shortcut *shortcut; 186 struct assoc_array_node *node; 187 struct assoc_array_ptr *cursor, *ptr; 188 unsigned long sc_segments, dissimilarity; 189 unsigned long segments; 190 int level, sc_level, next_sc_level; 191 int slot; 192 193 pr_devel("-->%s()\n", __func__); 194 195 cursor = ACCESS_ONCE(array->root); 196 if (!cursor) 197 return assoc_array_walk_tree_empty; 198 199 level = 0; 200 201 /* Use segments from the key for the new leaf to navigate through the 202 * internal tree, skipping through nodes and shortcuts that are on 203 * route to the destination. Eventually we'll come to a slot that is 204 * either empty or contains a leaf at which point we've found a node in 205 * which the leaf we're looking for might be found or into which it 206 * should be inserted. 207 */ 208 jumped: 209 segments = ops->get_key_chunk(index_key, level); 210 pr_devel("segments[%d]: %lx\n", level, segments); 211 212 if (assoc_array_ptr_is_shortcut(cursor)) 213 goto follow_shortcut; 214 215 consider_node: 216 node = assoc_array_ptr_to_node(cursor); 217 smp_read_barrier_depends(); 218 219 slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK); 220 slot &= ASSOC_ARRAY_FAN_MASK; 221 ptr = ACCESS_ONCE(node->slots[slot]); 222 223 pr_devel("consider slot %x [ix=%d type=%lu]\n", 224 slot, level, (unsigned long)ptr & 3); 225 226 if (!assoc_array_ptr_is_meta(ptr)) { 227 /* The node doesn't have a node/shortcut pointer in the slot 228 * corresponding to the index key that we have to follow. 229 */ 230 result->terminal_node.node = node; 231 result->terminal_node.level = level; 232 result->terminal_node.slot = slot; 233 pr_devel("<--%s() = terminal_node\n", __func__); 234 return assoc_array_walk_found_terminal_node; 235 } 236 237 if (assoc_array_ptr_is_node(ptr)) { 238 /* There is a pointer to a node in the slot corresponding to 239 * this index key segment, so we need to follow it. 240 */ 241 cursor = ptr; 242 level += ASSOC_ARRAY_LEVEL_STEP; 243 if ((level & ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) 244 goto consider_node; 245 goto jumped; 246 } 247 248 /* There is a shortcut in the slot corresponding to the index key 249 * segment. We follow the shortcut if its partial index key matches 250 * this leaf's. Otherwise we need to split the shortcut. 251 */ 252 cursor = ptr; 253 follow_shortcut: 254 shortcut = assoc_array_ptr_to_shortcut(cursor); 255 smp_read_barrier_depends(); 256 pr_devel("shortcut to %d\n", shortcut->skip_to_level); 257 sc_level = level + ASSOC_ARRAY_LEVEL_STEP; 258 BUG_ON(sc_level > shortcut->skip_to_level); 259 260 do { 261 /* Check the leaf against the shortcut's index key a word at a 262 * time, trimming the final word (the shortcut stores the index 263 * key completely from the root to the shortcut's target). 264 */ 265 if ((sc_level & ASSOC_ARRAY_KEY_CHUNK_MASK) == 0) 266 segments = ops->get_key_chunk(index_key, sc_level); 267 268 sc_segments = shortcut->index_key[sc_level >> ASSOC_ARRAY_KEY_CHUNK_SHIFT]; 269 dissimilarity = segments ^ sc_segments; 270 271 if (round_up(sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE) > shortcut->skip_to_level) { 272 /* Trim segments that are beyond the shortcut */ 273 int shift = shortcut->skip_to_level & ASSOC_ARRAY_KEY_CHUNK_MASK; 274 dissimilarity &= ~(ULONG_MAX << shift); 275 next_sc_level = shortcut->skip_to_level; 276 } else { 277 next_sc_level = sc_level + ASSOC_ARRAY_KEY_CHUNK_SIZE; 278 next_sc_level = round_down(next_sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); 279 } 280 281 if (dissimilarity != 0) { 282 /* This shortcut points elsewhere */ 283 result->wrong_shortcut.shortcut = shortcut; 284 result->wrong_shortcut.level = level; 285 result->wrong_shortcut.sc_level = sc_level; 286 result->wrong_shortcut.sc_segments = sc_segments; 287 result->wrong_shortcut.dissimilarity = dissimilarity; 288 return assoc_array_walk_found_wrong_shortcut; 289 } 290 291 sc_level = next_sc_level; 292 } while (sc_level < shortcut->skip_to_level); 293 294 /* The shortcut matches the leaf's index to this point. */ 295 cursor = ACCESS_ONCE(shortcut->next_node); 296 if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) { 297 level = sc_level; 298 goto jumped; 299 } else { 300 level = sc_level; 301 goto consider_node; 302 } 303 } 304 305 /** 306 * assoc_array_find - Find an object by index key 307 * @array: The associative array to search. 308 * @ops: The operations to use. 309 * @index_key: The key to the object. 310 * 311 * Find an object in an associative array by walking through the internal tree 312 * to the node that should contain the object and then searching the leaves 313 * there. NULL is returned if the requested object was not found in the array. 314 * 315 * The caller must hold the RCU read lock or better. 316 */ 317 void *assoc_array_find(const struct assoc_array *array, 318 const struct assoc_array_ops *ops, 319 const void *index_key) 320 { 321 struct assoc_array_walk_result result; 322 const struct assoc_array_node *node; 323 const struct assoc_array_ptr *ptr; 324 const void *leaf; 325 int slot; 326 327 if (assoc_array_walk(array, ops, index_key, &result) != 328 assoc_array_walk_found_terminal_node) 329 return NULL; 330 331 node = result.terminal_node.node; 332 smp_read_barrier_depends(); 333 334 /* If the target key is available to us, it's has to be pointed to by 335 * the terminal node. 336 */ 337 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 338 ptr = ACCESS_ONCE(node->slots[slot]); 339 if (ptr && assoc_array_ptr_is_leaf(ptr)) { 340 /* We need a barrier between the read of the pointer 341 * and dereferencing the pointer - but only if we are 342 * actually going to dereference it. 343 */ 344 leaf = assoc_array_ptr_to_leaf(ptr); 345 smp_read_barrier_depends(); 346 if (ops->compare_object(leaf, index_key)) 347 return (void *)leaf; 348 } 349 } 350 351 return NULL; 352 } 353 354 /* 355 * Destructively iterate over an associative array. The caller must prevent 356 * other simultaneous accesses. 357 */ 358 static void assoc_array_destroy_subtree(struct assoc_array_ptr *root, 359 const struct assoc_array_ops *ops) 360 { 361 struct assoc_array_shortcut *shortcut; 362 struct assoc_array_node *node; 363 struct assoc_array_ptr *cursor, *parent = NULL; 364 int slot = -1; 365 366 pr_devel("-->%s()\n", __func__); 367 368 cursor = root; 369 if (!cursor) { 370 pr_devel("empty\n"); 371 return; 372 } 373 374 move_to_meta: 375 if (assoc_array_ptr_is_shortcut(cursor)) { 376 /* Descend through a shortcut */ 377 pr_devel("[%d] shortcut\n", slot); 378 BUG_ON(!assoc_array_ptr_is_shortcut(cursor)); 379 shortcut = assoc_array_ptr_to_shortcut(cursor); 380 BUG_ON(shortcut->back_pointer != parent); 381 BUG_ON(slot != -1 && shortcut->parent_slot != slot); 382 parent = cursor; 383 cursor = shortcut->next_node; 384 slot = -1; 385 BUG_ON(!assoc_array_ptr_is_node(cursor)); 386 } 387 388 pr_devel("[%d] node\n", slot); 389 node = assoc_array_ptr_to_node(cursor); 390 BUG_ON(node->back_pointer != parent); 391 BUG_ON(slot != -1 && node->parent_slot != slot); 392 slot = 0; 393 394 continue_node: 395 pr_devel("Node %p [back=%p]\n", node, node->back_pointer); 396 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 397 struct assoc_array_ptr *ptr = node->slots[slot]; 398 if (!ptr) 399 continue; 400 if (assoc_array_ptr_is_meta(ptr)) { 401 parent = cursor; 402 cursor = ptr; 403 goto move_to_meta; 404 } 405 406 if (ops) { 407 pr_devel("[%d] free leaf\n", slot); 408 ops->free_object(assoc_array_ptr_to_leaf(ptr)); 409 } 410 } 411 412 parent = node->back_pointer; 413 slot = node->parent_slot; 414 pr_devel("free node\n"); 415 kfree(node); 416 if (!parent) 417 return; /* Done */ 418 419 /* Move back up to the parent (may need to free a shortcut on 420 * the way up) */ 421 if (assoc_array_ptr_is_shortcut(parent)) { 422 shortcut = assoc_array_ptr_to_shortcut(parent); 423 BUG_ON(shortcut->next_node != cursor); 424 cursor = parent; 425 parent = shortcut->back_pointer; 426 slot = shortcut->parent_slot; 427 pr_devel("free shortcut\n"); 428 kfree(shortcut); 429 if (!parent) 430 return; 431 432 BUG_ON(!assoc_array_ptr_is_node(parent)); 433 } 434 435 /* Ascend to next slot in parent node */ 436 pr_devel("ascend to %p[%d]\n", parent, slot); 437 cursor = parent; 438 node = assoc_array_ptr_to_node(cursor); 439 slot++; 440 goto continue_node; 441 } 442 443 /** 444 * assoc_array_destroy - Destroy an associative array 445 * @array: The array to destroy. 446 * @ops: The operations to use. 447 * 448 * Discard all metadata and free all objects in an associative array. The 449 * array will be empty and ready to use again upon completion. This function 450 * cannot fail. 451 * 452 * The caller must prevent all other accesses whilst this takes place as no 453 * attempt is made to adjust pointers gracefully to permit RCU readlock-holding 454 * accesses to continue. On the other hand, no memory allocation is required. 455 */ 456 void assoc_array_destroy(struct assoc_array *array, 457 const struct assoc_array_ops *ops) 458 { 459 assoc_array_destroy_subtree(array->root, ops); 460 array->root = NULL; 461 } 462 463 /* 464 * Handle insertion into an empty tree. 465 */ 466 static bool assoc_array_insert_in_empty_tree(struct assoc_array_edit *edit) 467 { 468 struct assoc_array_node *new_n0; 469 470 pr_devel("-->%s()\n", __func__); 471 472 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 473 if (!new_n0) 474 return false; 475 476 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 477 edit->leaf_p = &new_n0->slots[0]; 478 edit->adjust_count_on = new_n0; 479 edit->set[0].ptr = &edit->array->root; 480 edit->set[0].to = assoc_array_node_to_ptr(new_n0); 481 482 pr_devel("<--%s() = ok [no root]\n", __func__); 483 return true; 484 } 485 486 /* 487 * Handle insertion into a terminal node. 488 */ 489 static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit, 490 const struct assoc_array_ops *ops, 491 const void *index_key, 492 struct assoc_array_walk_result *result) 493 { 494 struct assoc_array_shortcut *shortcut, *new_s0; 495 struct assoc_array_node *node, *new_n0, *new_n1, *side; 496 struct assoc_array_ptr *ptr; 497 unsigned long dissimilarity, base_seg, blank; 498 size_t keylen; 499 bool have_meta; 500 int level, diff; 501 int slot, next_slot, free_slot, i, j; 502 503 node = result->terminal_node.node; 504 level = result->terminal_node.level; 505 edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = result->terminal_node.slot; 506 507 pr_devel("-->%s()\n", __func__); 508 509 /* We arrived at a node which doesn't have an onward node or shortcut 510 * pointer that we have to follow. This means that (a) the leaf we 511 * want must go here (either by insertion or replacement) or (b) we 512 * need to split this node and insert in one of the fragments. 513 */ 514 free_slot = -1; 515 516 /* Firstly, we have to check the leaves in this node to see if there's 517 * a matching one we should replace in place. 518 */ 519 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 520 ptr = node->slots[i]; 521 if (!ptr) { 522 free_slot = i; 523 continue; 524 } 525 if (ops->compare_object(assoc_array_ptr_to_leaf(ptr), index_key)) { 526 pr_devel("replace in slot %d\n", i); 527 edit->leaf_p = &node->slots[i]; 528 edit->dead_leaf = node->slots[i]; 529 pr_devel("<--%s() = ok [replace]\n", __func__); 530 return true; 531 } 532 } 533 534 /* If there is a free slot in this node then we can just insert the 535 * leaf here. 536 */ 537 if (free_slot >= 0) { 538 pr_devel("insert in free slot %d\n", free_slot); 539 edit->leaf_p = &node->slots[free_slot]; 540 edit->adjust_count_on = node; 541 pr_devel("<--%s() = ok [insert]\n", __func__); 542 return true; 543 } 544 545 /* The node has no spare slots - so we're either going to have to split 546 * it or insert another node before it. 547 * 548 * Whatever, we're going to need at least two new nodes - so allocate 549 * those now. We may also need a new shortcut, but we deal with that 550 * when we need it. 551 */ 552 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 553 if (!new_n0) 554 return false; 555 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 556 new_n1 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 557 if (!new_n1) 558 return false; 559 edit->new_meta[1] = assoc_array_node_to_ptr(new_n1); 560 561 /* We need to find out how similar the leaves are. */ 562 pr_devel("no spare slots\n"); 563 have_meta = false; 564 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 565 ptr = node->slots[i]; 566 if (assoc_array_ptr_is_meta(ptr)) { 567 edit->segment_cache[i] = 0xff; 568 have_meta = true; 569 continue; 570 } 571 base_seg = ops->get_object_key_chunk( 572 assoc_array_ptr_to_leaf(ptr), level); 573 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; 574 edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK; 575 } 576 577 if (have_meta) { 578 pr_devel("have meta\n"); 579 goto split_node; 580 } 581 582 /* The node contains only leaves */ 583 dissimilarity = 0; 584 base_seg = edit->segment_cache[0]; 585 for (i = 1; i < ASSOC_ARRAY_FAN_OUT; i++) 586 dissimilarity |= edit->segment_cache[i] ^ base_seg; 587 588 pr_devel("only leaves; dissimilarity=%lx\n", dissimilarity); 589 590 if ((dissimilarity & ASSOC_ARRAY_FAN_MASK) == 0) { 591 /* The old leaves all cluster in the same slot. We will need 592 * to insert a shortcut if the new node wants to cluster with them. 593 */ 594 if ((edit->segment_cache[ASSOC_ARRAY_FAN_OUT] ^ base_seg) == 0) 595 goto all_leaves_cluster_together; 596 597 /* Otherwise we can just insert a new node ahead of the old 598 * one. 599 */ 600 goto present_leaves_cluster_but_not_new_leaf; 601 } 602 603 split_node: 604 pr_devel("split node\n"); 605 606 /* We need to split the current node; we know that the node doesn't 607 * simply contain a full set of leaves that cluster together (it 608 * contains meta pointers and/or non-clustering leaves). 609 * 610 * We need to expel at least two leaves out of a set consisting of the 611 * leaves in the node and the new leaf. 612 * 613 * We need a new node (n0) to replace the current one and a new node to 614 * take the expelled nodes (n1). 615 */ 616 edit->set[0].to = assoc_array_node_to_ptr(new_n0); 617 new_n0->back_pointer = node->back_pointer; 618 new_n0->parent_slot = node->parent_slot; 619 new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); 620 new_n1->parent_slot = -1; /* Need to calculate this */ 621 622 do_split_node: 623 pr_devel("do_split_node\n"); 624 625 new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; 626 new_n1->nr_leaves_on_branch = 0; 627 628 /* Begin by finding two matching leaves. There have to be at least two 629 * that match - even if there are meta pointers - because any leaf that 630 * would match a slot with a meta pointer in it must be somewhere 631 * behind that meta pointer and cannot be here. Further, given N 632 * remaining leaf slots, we now have N+1 leaves to go in them. 633 */ 634 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 635 slot = edit->segment_cache[i]; 636 if (slot != 0xff) 637 for (j = i + 1; j < ASSOC_ARRAY_FAN_OUT + 1; j++) 638 if (edit->segment_cache[j] == slot) 639 goto found_slot_for_multiple_occupancy; 640 } 641 found_slot_for_multiple_occupancy: 642 pr_devel("same slot: %x %x [%02x]\n", i, j, slot); 643 BUG_ON(i >= ASSOC_ARRAY_FAN_OUT); 644 BUG_ON(j >= ASSOC_ARRAY_FAN_OUT + 1); 645 BUG_ON(slot >= ASSOC_ARRAY_FAN_OUT); 646 647 new_n1->parent_slot = slot; 648 649 /* Metadata pointers cannot change slot */ 650 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) 651 if (assoc_array_ptr_is_meta(node->slots[i])) 652 new_n0->slots[i] = node->slots[i]; 653 else 654 new_n0->slots[i] = NULL; 655 BUG_ON(new_n0->slots[slot] != NULL); 656 new_n0->slots[slot] = assoc_array_node_to_ptr(new_n1); 657 658 /* Filter the leaf pointers between the new nodes */ 659 free_slot = -1; 660 next_slot = 0; 661 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 662 if (assoc_array_ptr_is_meta(node->slots[i])) 663 continue; 664 if (edit->segment_cache[i] == slot) { 665 new_n1->slots[next_slot++] = node->slots[i]; 666 new_n1->nr_leaves_on_branch++; 667 } else { 668 do { 669 free_slot++; 670 } while (new_n0->slots[free_slot] != NULL); 671 new_n0->slots[free_slot] = node->slots[i]; 672 } 673 } 674 675 pr_devel("filtered: f=%x n=%x\n", free_slot, next_slot); 676 677 if (edit->segment_cache[ASSOC_ARRAY_FAN_OUT] != slot) { 678 do { 679 free_slot++; 680 } while (new_n0->slots[free_slot] != NULL); 681 edit->leaf_p = &new_n0->slots[free_slot]; 682 edit->adjust_count_on = new_n0; 683 } else { 684 edit->leaf_p = &new_n1->slots[next_slot++]; 685 edit->adjust_count_on = new_n1; 686 } 687 688 BUG_ON(next_slot <= 1); 689 690 edit->set_backpointers_to = assoc_array_node_to_ptr(new_n0); 691 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 692 if (edit->segment_cache[i] == 0xff) { 693 ptr = node->slots[i]; 694 BUG_ON(assoc_array_ptr_is_leaf(ptr)); 695 if (assoc_array_ptr_is_node(ptr)) { 696 side = assoc_array_ptr_to_node(ptr); 697 edit->set_backpointers[i] = &side->back_pointer; 698 } else { 699 shortcut = assoc_array_ptr_to_shortcut(ptr); 700 edit->set_backpointers[i] = &shortcut->back_pointer; 701 } 702 } 703 } 704 705 ptr = node->back_pointer; 706 if (!ptr) 707 edit->set[0].ptr = &edit->array->root; 708 else if (assoc_array_ptr_is_node(ptr)) 709 edit->set[0].ptr = &assoc_array_ptr_to_node(ptr)->slots[node->parent_slot]; 710 else 711 edit->set[0].ptr = &assoc_array_ptr_to_shortcut(ptr)->next_node; 712 edit->excised_meta[0] = assoc_array_node_to_ptr(node); 713 pr_devel("<--%s() = ok [split node]\n", __func__); 714 return true; 715 716 present_leaves_cluster_but_not_new_leaf: 717 /* All the old leaves cluster in the same slot, but the new leaf wants 718 * to go into a different slot, so we create a new node to hold the new 719 * leaf and a pointer to a new node holding all the old leaves. 720 */ 721 pr_devel("present leaves cluster but not new leaf\n"); 722 723 new_n0->back_pointer = node->back_pointer; 724 new_n0->parent_slot = node->parent_slot; 725 new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; 726 new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); 727 new_n1->parent_slot = edit->segment_cache[0]; 728 new_n1->nr_leaves_on_branch = node->nr_leaves_on_branch; 729 edit->adjust_count_on = new_n0; 730 731 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) 732 new_n1->slots[i] = node->slots[i]; 733 734 new_n0->slots[edit->segment_cache[0]] = assoc_array_node_to_ptr(new_n0); 735 edit->leaf_p = &new_n0->slots[edit->segment_cache[ASSOC_ARRAY_FAN_OUT]]; 736 737 edit->set[0].ptr = &assoc_array_ptr_to_node(node->back_pointer)->slots[node->parent_slot]; 738 edit->set[0].to = assoc_array_node_to_ptr(new_n0); 739 edit->excised_meta[0] = assoc_array_node_to_ptr(node); 740 pr_devel("<--%s() = ok [insert node before]\n", __func__); 741 return true; 742 743 all_leaves_cluster_together: 744 /* All the leaves, new and old, want to cluster together in this node 745 * in the same slot, so we have to replace this node with a shortcut to 746 * skip over the identical parts of the key and then place a pair of 747 * nodes, one inside the other, at the end of the shortcut and 748 * distribute the keys between them. 749 * 750 * Firstly we need to work out where the leaves start diverging as a 751 * bit position into their keys so that we know how big the shortcut 752 * needs to be. 753 * 754 * We only need to make a single pass of N of the N+1 leaves because if 755 * any keys differ between themselves at bit X then at least one of 756 * them must also differ with the base key at bit X or before. 757 */ 758 pr_devel("all leaves cluster together\n"); 759 diff = INT_MAX; 760 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 761 int x = ops->diff_objects(assoc_array_ptr_to_leaf(edit->leaf), 762 assoc_array_ptr_to_leaf(node->slots[i])); 763 if (x < diff) { 764 BUG_ON(x < 0); 765 diff = x; 766 } 767 } 768 BUG_ON(diff == INT_MAX); 769 BUG_ON(diff < level + ASSOC_ARRAY_LEVEL_STEP); 770 771 keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); 772 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 773 774 new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) + 775 keylen * sizeof(unsigned long), GFP_KERNEL); 776 if (!new_s0) 777 return false; 778 edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s0); 779 780 edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0); 781 new_s0->back_pointer = node->back_pointer; 782 new_s0->parent_slot = node->parent_slot; 783 new_s0->next_node = assoc_array_node_to_ptr(new_n0); 784 new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0); 785 new_n0->parent_slot = 0; 786 new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); 787 new_n1->parent_slot = -1; /* Need to calculate this */ 788 789 new_s0->skip_to_level = level = diff & ~ASSOC_ARRAY_LEVEL_STEP_MASK; 790 pr_devel("skip_to_level = %d [diff %d]\n", level, diff); 791 BUG_ON(level <= 0); 792 793 for (i = 0; i < keylen; i++) 794 new_s0->index_key[i] = 795 ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE); 796 797 blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK); 798 pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank); 799 new_s0->index_key[keylen - 1] &= ~blank; 800 801 /* This now reduces to a node splitting exercise for which we'll need 802 * to regenerate the disparity table. 803 */ 804 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 805 ptr = node->slots[i]; 806 base_seg = ops->get_object_key_chunk(assoc_array_ptr_to_leaf(ptr), 807 level); 808 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; 809 edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK; 810 } 811 812 base_seg = ops->get_key_chunk(index_key, level); 813 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; 814 edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = base_seg & ASSOC_ARRAY_FAN_MASK; 815 goto do_split_node; 816 } 817 818 /* 819 * Handle insertion into the middle of a shortcut. 820 */ 821 static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit, 822 const struct assoc_array_ops *ops, 823 struct assoc_array_walk_result *result) 824 { 825 struct assoc_array_shortcut *shortcut, *new_s0, *new_s1; 826 struct assoc_array_node *node, *new_n0, *side; 827 unsigned long sc_segments, dissimilarity, blank; 828 size_t keylen; 829 int level, sc_level, diff; 830 int sc_slot; 831 832 shortcut = result->wrong_shortcut.shortcut; 833 level = result->wrong_shortcut.level; 834 sc_level = result->wrong_shortcut.sc_level; 835 sc_segments = result->wrong_shortcut.sc_segments; 836 dissimilarity = result->wrong_shortcut.dissimilarity; 837 838 pr_devel("-->%s(ix=%d dis=%lx scix=%d)\n", 839 __func__, level, dissimilarity, sc_level); 840 841 /* We need to split a shortcut and insert a node between the two 842 * pieces. Zero-length pieces will be dispensed with entirely. 843 * 844 * First of all, we need to find out in which level the first 845 * difference was. 846 */ 847 diff = __ffs(dissimilarity); 848 diff &= ~ASSOC_ARRAY_LEVEL_STEP_MASK; 849 diff += sc_level & ~ASSOC_ARRAY_KEY_CHUNK_MASK; 850 pr_devel("diff=%d\n", diff); 851 852 if (!shortcut->back_pointer) { 853 edit->set[0].ptr = &edit->array->root; 854 } else if (assoc_array_ptr_is_node(shortcut->back_pointer)) { 855 node = assoc_array_ptr_to_node(shortcut->back_pointer); 856 edit->set[0].ptr = &node->slots[shortcut->parent_slot]; 857 } else { 858 BUG(); 859 } 860 861 edit->excised_meta[0] = assoc_array_shortcut_to_ptr(shortcut); 862 863 /* Create a new node now since we're going to need it anyway */ 864 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 865 if (!new_n0) 866 return false; 867 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 868 edit->adjust_count_on = new_n0; 869 870 /* Insert a new shortcut before the new node if this segment isn't of 871 * zero length - otherwise we just connect the new node directly to the 872 * parent. 873 */ 874 level += ASSOC_ARRAY_LEVEL_STEP; 875 if (diff > level) { 876 pr_devel("pre-shortcut %d...%d\n", level, diff); 877 keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); 878 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 879 880 new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) + 881 keylen * sizeof(unsigned long), GFP_KERNEL); 882 if (!new_s0) 883 return false; 884 edit->new_meta[1] = assoc_array_shortcut_to_ptr(new_s0); 885 edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0); 886 new_s0->back_pointer = shortcut->back_pointer; 887 new_s0->parent_slot = shortcut->parent_slot; 888 new_s0->next_node = assoc_array_node_to_ptr(new_n0); 889 new_s0->skip_to_level = diff; 890 891 new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0); 892 new_n0->parent_slot = 0; 893 894 memcpy(new_s0->index_key, shortcut->index_key, 895 keylen * sizeof(unsigned long)); 896 897 blank = ULONG_MAX << (diff & ASSOC_ARRAY_KEY_CHUNK_MASK); 898 pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, diff, blank); 899 new_s0->index_key[keylen - 1] &= ~blank; 900 } else { 901 pr_devel("no pre-shortcut\n"); 902 edit->set[0].to = assoc_array_node_to_ptr(new_n0); 903 new_n0->back_pointer = shortcut->back_pointer; 904 new_n0->parent_slot = shortcut->parent_slot; 905 } 906 907 side = assoc_array_ptr_to_node(shortcut->next_node); 908 new_n0->nr_leaves_on_branch = side->nr_leaves_on_branch; 909 910 /* We need to know which slot in the new node is going to take a 911 * metadata pointer. 912 */ 913 sc_slot = sc_segments >> (diff & ASSOC_ARRAY_KEY_CHUNK_MASK); 914 sc_slot &= ASSOC_ARRAY_FAN_MASK; 915 916 pr_devel("new slot %lx >> %d -> %d\n", 917 sc_segments, diff & ASSOC_ARRAY_KEY_CHUNK_MASK, sc_slot); 918 919 /* Determine whether we need to follow the new node with a replacement 920 * for the current shortcut. We could in theory reuse the current 921 * shortcut if its parent slot number doesn't change - but that's a 922 * 1-in-16 chance so not worth expending the code upon. 923 */ 924 level = diff + ASSOC_ARRAY_LEVEL_STEP; 925 if (level < shortcut->skip_to_level) { 926 pr_devel("post-shortcut %d...%d\n", level, shortcut->skip_to_level); 927 keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); 928 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 929 930 new_s1 = kzalloc(sizeof(struct assoc_array_shortcut) + 931 keylen * sizeof(unsigned long), GFP_KERNEL); 932 if (!new_s1) 933 return false; 934 edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s1); 935 936 new_s1->back_pointer = assoc_array_node_to_ptr(new_n0); 937 new_s1->parent_slot = sc_slot; 938 new_s1->next_node = shortcut->next_node; 939 new_s1->skip_to_level = shortcut->skip_to_level; 940 941 new_n0->slots[sc_slot] = assoc_array_shortcut_to_ptr(new_s1); 942 943 memcpy(new_s1->index_key, shortcut->index_key, 944 keylen * sizeof(unsigned long)); 945 946 edit->set[1].ptr = &side->back_pointer; 947 edit->set[1].to = assoc_array_shortcut_to_ptr(new_s1); 948 } else { 949 pr_devel("no post-shortcut\n"); 950 951 /* We don't have to replace the pointed-to node as long as we 952 * use memory barriers to make sure the parent slot number is 953 * changed before the back pointer (the parent slot number is 954 * irrelevant to the old parent shortcut). 955 */ 956 new_n0->slots[sc_slot] = shortcut->next_node; 957 edit->set_parent_slot[0].p = &side->parent_slot; 958 edit->set_parent_slot[0].to = sc_slot; 959 edit->set[1].ptr = &side->back_pointer; 960 edit->set[1].to = assoc_array_node_to_ptr(new_n0); 961 } 962 963 /* Install the new leaf in a spare slot in the new node. */ 964 if (sc_slot == 0) 965 edit->leaf_p = &new_n0->slots[1]; 966 else 967 edit->leaf_p = &new_n0->slots[0]; 968 969 pr_devel("<--%s() = ok [split shortcut]\n", __func__); 970 return edit; 971 } 972 973 /** 974 * assoc_array_insert - Script insertion of an object into an associative array 975 * @array: The array to insert into. 976 * @ops: The operations to use. 977 * @index_key: The key to insert at. 978 * @object: The object to insert. 979 * 980 * Precalculate and preallocate a script for the insertion or replacement of an 981 * object in an associative array. This results in an edit script that can 982 * either be applied or cancelled. 983 * 984 * The function returns a pointer to an edit script or -ENOMEM. 985 * 986 * The caller should lock against other modifications and must continue to hold 987 * the lock until assoc_array_apply_edit() has been called. 988 * 989 * Accesses to the tree may take place concurrently with this function, 990 * provided they hold the RCU read lock. 991 */ 992 struct assoc_array_edit *assoc_array_insert(struct assoc_array *array, 993 const struct assoc_array_ops *ops, 994 const void *index_key, 995 void *object) 996 { 997 struct assoc_array_walk_result result; 998 struct assoc_array_edit *edit; 999 1000 pr_devel("-->%s()\n", __func__); 1001 1002 /* The leaf pointer we're given must not have the bottom bit set as we 1003 * use those for type-marking the pointer. NULL pointers are also not 1004 * allowed as they indicate an empty slot but we have to allow them 1005 * here as they can be updated later. 1006 */ 1007 BUG_ON(assoc_array_ptr_is_meta(object)); 1008 1009 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); 1010 if (!edit) 1011 return ERR_PTR(-ENOMEM); 1012 edit->array = array; 1013 edit->ops = ops; 1014 edit->leaf = assoc_array_leaf_to_ptr(object); 1015 edit->adjust_count_by = 1; 1016 1017 switch (assoc_array_walk(array, ops, index_key, &result)) { 1018 case assoc_array_walk_tree_empty: 1019 /* Allocate a root node if there isn't one yet */ 1020 if (!assoc_array_insert_in_empty_tree(edit)) 1021 goto enomem; 1022 return edit; 1023 1024 case assoc_array_walk_found_terminal_node: 1025 /* We found a node that doesn't have a node/shortcut pointer in 1026 * the slot corresponding to the index key that we have to 1027 * follow. 1028 */ 1029 if (!assoc_array_insert_into_terminal_node(edit, ops, index_key, 1030 &result)) 1031 goto enomem; 1032 return edit; 1033 1034 case assoc_array_walk_found_wrong_shortcut: 1035 /* We found a shortcut that didn't match our key in a slot we 1036 * needed to follow. 1037 */ 1038 if (!assoc_array_insert_mid_shortcut(edit, ops, &result)) 1039 goto enomem; 1040 return edit; 1041 } 1042 1043 enomem: 1044 /* Clean up after an out of memory error */ 1045 pr_devel("enomem\n"); 1046 assoc_array_cancel_edit(edit); 1047 return ERR_PTR(-ENOMEM); 1048 } 1049 1050 /** 1051 * assoc_array_insert_set_object - Set the new object pointer in an edit script 1052 * @edit: The edit script to modify. 1053 * @object: The object pointer to set. 1054 * 1055 * Change the object to be inserted in an edit script. The object pointed to 1056 * by the old object is not freed. This must be done prior to applying the 1057 * script. 1058 */ 1059 void assoc_array_insert_set_object(struct assoc_array_edit *edit, void *object) 1060 { 1061 BUG_ON(!object); 1062 edit->leaf = assoc_array_leaf_to_ptr(object); 1063 } 1064 1065 struct assoc_array_delete_collapse_context { 1066 struct assoc_array_node *node; 1067 const void *skip_leaf; 1068 int slot; 1069 }; 1070 1071 /* 1072 * Subtree collapse to node iterator. 1073 */ 1074 static int assoc_array_delete_collapse_iterator(const void *leaf, 1075 void *iterator_data) 1076 { 1077 struct assoc_array_delete_collapse_context *collapse = iterator_data; 1078 1079 if (leaf == collapse->skip_leaf) 1080 return 0; 1081 1082 BUG_ON(collapse->slot >= ASSOC_ARRAY_FAN_OUT); 1083 1084 collapse->node->slots[collapse->slot++] = assoc_array_leaf_to_ptr(leaf); 1085 return 0; 1086 } 1087 1088 /** 1089 * assoc_array_delete - Script deletion of an object from an associative array 1090 * @array: The array to search. 1091 * @ops: The operations to use. 1092 * @index_key: The key to the object. 1093 * 1094 * Precalculate and preallocate a script for the deletion of an object from an 1095 * associative array. This results in an edit script that can either be 1096 * applied or cancelled. 1097 * 1098 * The function returns a pointer to an edit script if the object was found, 1099 * NULL if the object was not found or -ENOMEM. 1100 * 1101 * The caller should lock against other modifications and must continue to hold 1102 * the lock until assoc_array_apply_edit() has been called. 1103 * 1104 * Accesses to the tree may take place concurrently with this function, 1105 * provided they hold the RCU read lock. 1106 */ 1107 struct assoc_array_edit *assoc_array_delete(struct assoc_array *array, 1108 const struct assoc_array_ops *ops, 1109 const void *index_key) 1110 { 1111 struct assoc_array_delete_collapse_context collapse; 1112 struct assoc_array_walk_result result; 1113 struct assoc_array_node *node, *new_n0; 1114 struct assoc_array_edit *edit; 1115 struct assoc_array_ptr *ptr; 1116 bool has_meta; 1117 int slot, i; 1118 1119 pr_devel("-->%s()\n", __func__); 1120 1121 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); 1122 if (!edit) 1123 return ERR_PTR(-ENOMEM); 1124 edit->array = array; 1125 edit->ops = ops; 1126 edit->adjust_count_by = -1; 1127 1128 switch (assoc_array_walk(array, ops, index_key, &result)) { 1129 case assoc_array_walk_found_terminal_node: 1130 /* We found a node that should contain the leaf we've been 1131 * asked to remove - *if* it's in the tree. 1132 */ 1133 pr_devel("terminal_node\n"); 1134 node = result.terminal_node.node; 1135 1136 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1137 ptr = node->slots[slot]; 1138 if (ptr && 1139 assoc_array_ptr_is_leaf(ptr) && 1140 ops->compare_object(assoc_array_ptr_to_leaf(ptr), 1141 index_key)) 1142 goto found_leaf; 1143 } 1144 case assoc_array_walk_tree_empty: 1145 case assoc_array_walk_found_wrong_shortcut: 1146 default: 1147 assoc_array_cancel_edit(edit); 1148 pr_devel("not found\n"); 1149 return NULL; 1150 } 1151 1152 found_leaf: 1153 BUG_ON(array->nr_leaves_on_tree <= 0); 1154 1155 /* In the simplest form of deletion we just clear the slot and release 1156 * the leaf after a suitable interval. 1157 */ 1158 edit->dead_leaf = node->slots[slot]; 1159 edit->set[0].ptr = &node->slots[slot]; 1160 edit->set[0].to = NULL; 1161 edit->adjust_count_on = node; 1162 1163 /* If that concludes erasure of the last leaf, then delete the entire 1164 * internal array. 1165 */ 1166 if (array->nr_leaves_on_tree == 1) { 1167 edit->set[1].ptr = &array->root; 1168 edit->set[1].to = NULL; 1169 edit->adjust_count_on = NULL; 1170 edit->excised_subtree = array->root; 1171 pr_devel("all gone\n"); 1172 return edit; 1173 } 1174 1175 /* However, we'd also like to clear up some metadata blocks if we 1176 * possibly can. 1177 * 1178 * We go for a simple algorithm of: if this node has FAN_OUT or fewer 1179 * leaves in it, then attempt to collapse it - and attempt to 1180 * recursively collapse up the tree. 1181 * 1182 * We could also try and collapse in partially filled subtrees to take 1183 * up space in this node. 1184 */ 1185 if (node->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) { 1186 struct assoc_array_node *parent, *grandparent; 1187 struct assoc_array_ptr *ptr; 1188 1189 /* First of all, we need to know if this node has metadata so 1190 * that we don't try collapsing if all the leaves are already 1191 * here. 1192 */ 1193 has_meta = false; 1194 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 1195 ptr = node->slots[i]; 1196 if (assoc_array_ptr_is_meta(ptr)) { 1197 has_meta = true; 1198 break; 1199 } 1200 } 1201 1202 pr_devel("leaves: %ld [m=%d]\n", 1203 node->nr_leaves_on_branch - 1, has_meta); 1204 1205 /* Look further up the tree to see if we can collapse this node 1206 * into a more proximal node too. 1207 */ 1208 parent = node; 1209 collapse_up: 1210 pr_devel("collapse subtree: %ld\n", parent->nr_leaves_on_branch); 1211 1212 ptr = parent->back_pointer; 1213 if (!ptr) 1214 goto do_collapse; 1215 if (assoc_array_ptr_is_shortcut(ptr)) { 1216 struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(ptr); 1217 ptr = s->back_pointer; 1218 if (!ptr) 1219 goto do_collapse; 1220 } 1221 1222 grandparent = assoc_array_ptr_to_node(ptr); 1223 if (grandparent->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) { 1224 parent = grandparent; 1225 goto collapse_up; 1226 } 1227 1228 do_collapse: 1229 /* There's no point collapsing if the original node has no meta 1230 * pointers to discard and if we didn't merge into one of that 1231 * node's ancestry. 1232 */ 1233 if (has_meta || parent != node) { 1234 node = parent; 1235 1236 /* Create a new node to collapse into */ 1237 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 1238 if (!new_n0) 1239 goto enomem; 1240 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 1241 1242 new_n0->back_pointer = node->back_pointer; 1243 new_n0->parent_slot = node->parent_slot; 1244 new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; 1245 edit->adjust_count_on = new_n0; 1246 1247 collapse.node = new_n0; 1248 collapse.skip_leaf = assoc_array_ptr_to_leaf(edit->dead_leaf); 1249 collapse.slot = 0; 1250 assoc_array_subtree_iterate(assoc_array_node_to_ptr(node), 1251 node->back_pointer, 1252 assoc_array_delete_collapse_iterator, 1253 &collapse); 1254 pr_devel("collapsed %d,%lu\n", collapse.slot, new_n0->nr_leaves_on_branch); 1255 BUG_ON(collapse.slot != new_n0->nr_leaves_on_branch - 1); 1256 1257 if (!node->back_pointer) { 1258 edit->set[1].ptr = &array->root; 1259 } else if (assoc_array_ptr_is_leaf(node->back_pointer)) { 1260 BUG(); 1261 } else if (assoc_array_ptr_is_node(node->back_pointer)) { 1262 struct assoc_array_node *p = 1263 assoc_array_ptr_to_node(node->back_pointer); 1264 edit->set[1].ptr = &p->slots[node->parent_slot]; 1265 } else if (assoc_array_ptr_is_shortcut(node->back_pointer)) { 1266 struct assoc_array_shortcut *s = 1267 assoc_array_ptr_to_shortcut(node->back_pointer); 1268 edit->set[1].ptr = &s->next_node; 1269 } 1270 edit->set[1].to = assoc_array_node_to_ptr(new_n0); 1271 edit->excised_subtree = assoc_array_node_to_ptr(node); 1272 } 1273 } 1274 1275 return edit; 1276 1277 enomem: 1278 /* Clean up after an out of memory error */ 1279 pr_devel("enomem\n"); 1280 assoc_array_cancel_edit(edit); 1281 return ERR_PTR(-ENOMEM); 1282 } 1283 1284 /** 1285 * assoc_array_clear - Script deletion of all objects from an associative array 1286 * @array: The array to clear. 1287 * @ops: The operations to use. 1288 * 1289 * Precalculate and preallocate a script for the deletion of all the objects 1290 * from an associative array. This results in an edit script that can either 1291 * be applied or cancelled. 1292 * 1293 * The function returns a pointer to an edit script if there are objects to be 1294 * deleted, NULL if there are no objects in the array or -ENOMEM. 1295 * 1296 * The caller should lock against other modifications and must continue to hold 1297 * the lock until assoc_array_apply_edit() has been called. 1298 * 1299 * Accesses to the tree may take place concurrently with this function, 1300 * provided they hold the RCU read lock. 1301 */ 1302 struct assoc_array_edit *assoc_array_clear(struct assoc_array *array, 1303 const struct assoc_array_ops *ops) 1304 { 1305 struct assoc_array_edit *edit; 1306 1307 pr_devel("-->%s()\n", __func__); 1308 1309 if (!array->root) 1310 return NULL; 1311 1312 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); 1313 if (!edit) 1314 return ERR_PTR(-ENOMEM); 1315 edit->array = array; 1316 edit->ops = ops; 1317 edit->set[1].ptr = &array->root; 1318 edit->set[1].to = NULL; 1319 edit->excised_subtree = array->root; 1320 edit->ops_for_excised_subtree = ops; 1321 pr_devel("all gone\n"); 1322 return edit; 1323 } 1324 1325 /* 1326 * Handle the deferred destruction after an applied edit. 1327 */ 1328 static void assoc_array_rcu_cleanup(struct rcu_head *head) 1329 { 1330 struct assoc_array_edit *edit = 1331 container_of(head, struct assoc_array_edit, rcu); 1332 int i; 1333 1334 pr_devel("-->%s()\n", __func__); 1335 1336 if (edit->dead_leaf) 1337 edit->ops->free_object(assoc_array_ptr_to_leaf(edit->dead_leaf)); 1338 for (i = 0; i < ARRAY_SIZE(edit->excised_meta); i++) 1339 if (edit->excised_meta[i]) 1340 kfree(assoc_array_ptr_to_node(edit->excised_meta[i])); 1341 1342 if (edit->excised_subtree) { 1343 BUG_ON(assoc_array_ptr_is_leaf(edit->excised_subtree)); 1344 if (assoc_array_ptr_is_node(edit->excised_subtree)) { 1345 struct assoc_array_node *n = 1346 assoc_array_ptr_to_node(edit->excised_subtree); 1347 n->back_pointer = NULL; 1348 } else { 1349 struct assoc_array_shortcut *s = 1350 assoc_array_ptr_to_shortcut(edit->excised_subtree); 1351 s->back_pointer = NULL; 1352 } 1353 assoc_array_destroy_subtree(edit->excised_subtree, 1354 edit->ops_for_excised_subtree); 1355 } 1356 1357 kfree(edit); 1358 } 1359 1360 /** 1361 * assoc_array_apply_edit - Apply an edit script to an associative array 1362 * @edit: The script to apply. 1363 * 1364 * Apply an edit script to an associative array to effect an insertion, 1365 * deletion or clearance. As the edit script includes preallocated memory, 1366 * this is guaranteed not to fail. 1367 * 1368 * The edit script, dead objects and dead metadata will be scheduled for 1369 * destruction after an RCU grace period to permit those doing read-only 1370 * accesses on the array to continue to do so under the RCU read lock whilst 1371 * the edit is taking place. 1372 */ 1373 void assoc_array_apply_edit(struct assoc_array_edit *edit) 1374 { 1375 struct assoc_array_shortcut *shortcut; 1376 struct assoc_array_node *node; 1377 struct assoc_array_ptr *ptr; 1378 int i; 1379 1380 pr_devel("-->%s()\n", __func__); 1381 1382 smp_wmb(); 1383 if (edit->leaf_p) 1384 *edit->leaf_p = edit->leaf; 1385 1386 smp_wmb(); 1387 for (i = 0; i < ARRAY_SIZE(edit->set_parent_slot); i++) 1388 if (edit->set_parent_slot[i].p) 1389 *edit->set_parent_slot[i].p = edit->set_parent_slot[i].to; 1390 1391 smp_wmb(); 1392 for (i = 0; i < ARRAY_SIZE(edit->set_backpointers); i++) 1393 if (edit->set_backpointers[i]) 1394 *edit->set_backpointers[i] = edit->set_backpointers_to; 1395 1396 smp_wmb(); 1397 for (i = 0; i < ARRAY_SIZE(edit->set); i++) 1398 if (edit->set[i].ptr) 1399 *edit->set[i].ptr = edit->set[i].to; 1400 1401 if (edit->array->root == NULL) { 1402 edit->array->nr_leaves_on_tree = 0; 1403 } else if (edit->adjust_count_on) { 1404 node = edit->adjust_count_on; 1405 for (;;) { 1406 node->nr_leaves_on_branch += edit->adjust_count_by; 1407 1408 ptr = node->back_pointer; 1409 if (!ptr) 1410 break; 1411 if (assoc_array_ptr_is_shortcut(ptr)) { 1412 shortcut = assoc_array_ptr_to_shortcut(ptr); 1413 ptr = shortcut->back_pointer; 1414 if (!ptr) 1415 break; 1416 } 1417 BUG_ON(!assoc_array_ptr_is_node(ptr)); 1418 node = assoc_array_ptr_to_node(ptr); 1419 } 1420 1421 edit->array->nr_leaves_on_tree += edit->adjust_count_by; 1422 } 1423 1424 call_rcu(&edit->rcu, assoc_array_rcu_cleanup); 1425 } 1426 1427 /** 1428 * assoc_array_cancel_edit - Discard an edit script. 1429 * @edit: The script to discard. 1430 * 1431 * Free an edit script and all the preallocated data it holds without making 1432 * any changes to the associative array it was intended for. 1433 * 1434 * NOTE! In the case of an insertion script, this does _not_ release the leaf 1435 * that was to be inserted. That is left to the caller. 1436 */ 1437 void assoc_array_cancel_edit(struct assoc_array_edit *edit) 1438 { 1439 struct assoc_array_ptr *ptr; 1440 int i; 1441 1442 pr_devel("-->%s()\n", __func__); 1443 1444 /* Clean up after an out of memory error */ 1445 for (i = 0; i < ARRAY_SIZE(edit->new_meta); i++) { 1446 ptr = edit->new_meta[i]; 1447 if (ptr) { 1448 if (assoc_array_ptr_is_node(ptr)) 1449 kfree(assoc_array_ptr_to_node(ptr)); 1450 else 1451 kfree(assoc_array_ptr_to_shortcut(ptr)); 1452 } 1453 } 1454 kfree(edit); 1455 } 1456 1457 /** 1458 * assoc_array_gc - Garbage collect an associative array. 1459 * @array: The array to clean. 1460 * @ops: The operations to use. 1461 * @iterator: A callback function to pass judgement on each object. 1462 * @iterator_data: Private data for the callback function. 1463 * 1464 * Collect garbage from an associative array and pack down the internal tree to 1465 * save memory. 1466 * 1467 * The iterator function is asked to pass judgement upon each object in the 1468 * array. If it returns false, the object is discard and if it returns true, 1469 * the object is kept. If it returns true, it must increment the object's 1470 * usage count (or whatever it needs to do to retain it) before returning. 1471 * 1472 * This function returns 0 if successful or -ENOMEM if out of memory. In the 1473 * latter case, the array is not changed. 1474 * 1475 * The caller should lock against other modifications and must continue to hold 1476 * the lock until assoc_array_apply_edit() has been called. 1477 * 1478 * Accesses to the tree may take place concurrently with this function, 1479 * provided they hold the RCU read lock. 1480 */ 1481 int assoc_array_gc(struct assoc_array *array, 1482 const struct assoc_array_ops *ops, 1483 bool (*iterator)(void *object, void *iterator_data), 1484 void *iterator_data) 1485 { 1486 struct assoc_array_shortcut *shortcut, *new_s; 1487 struct assoc_array_node *node, *new_n; 1488 struct assoc_array_edit *edit; 1489 struct assoc_array_ptr *cursor, *ptr; 1490 struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp; 1491 unsigned long nr_leaves_on_tree; 1492 int keylen, slot, nr_free, next_slot, i; 1493 1494 pr_devel("-->%s()\n", __func__); 1495 1496 if (!array->root) 1497 return 0; 1498 1499 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); 1500 if (!edit) 1501 return -ENOMEM; 1502 edit->array = array; 1503 edit->ops = ops; 1504 edit->ops_for_excised_subtree = ops; 1505 edit->set[0].ptr = &array->root; 1506 edit->excised_subtree = array->root; 1507 1508 new_root = new_parent = NULL; 1509 new_ptr_pp = &new_root; 1510 cursor = array->root; 1511 1512 descend: 1513 /* If this point is a shortcut, then we need to duplicate it and 1514 * advance the target cursor. 1515 */ 1516 if (assoc_array_ptr_is_shortcut(cursor)) { 1517 shortcut = assoc_array_ptr_to_shortcut(cursor); 1518 keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); 1519 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 1520 new_s = kmalloc(sizeof(struct assoc_array_shortcut) + 1521 keylen * sizeof(unsigned long), GFP_KERNEL); 1522 if (!new_s) 1523 goto enomem; 1524 pr_devel("dup shortcut %p -> %p\n", shortcut, new_s); 1525 memcpy(new_s, shortcut, (sizeof(struct assoc_array_shortcut) + 1526 keylen * sizeof(unsigned long))); 1527 new_s->back_pointer = new_parent; 1528 new_s->parent_slot = shortcut->parent_slot; 1529 *new_ptr_pp = new_parent = assoc_array_shortcut_to_ptr(new_s); 1530 new_ptr_pp = &new_s->next_node; 1531 cursor = shortcut->next_node; 1532 } 1533 1534 /* Duplicate the node at this position */ 1535 node = assoc_array_ptr_to_node(cursor); 1536 new_n = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 1537 if (!new_n) 1538 goto enomem; 1539 pr_devel("dup node %p -> %p\n", node, new_n); 1540 new_n->back_pointer = new_parent; 1541 new_n->parent_slot = node->parent_slot; 1542 *new_ptr_pp = new_parent = assoc_array_node_to_ptr(new_n); 1543 new_ptr_pp = NULL; 1544 slot = 0; 1545 1546 continue_node: 1547 /* Filter across any leaves and gc any subtrees */ 1548 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1549 ptr = node->slots[slot]; 1550 if (!ptr) 1551 continue; 1552 1553 if (assoc_array_ptr_is_leaf(ptr)) { 1554 if (iterator(assoc_array_ptr_to_leaf(ptr), 1555 iterator_data)) 1556 /* The iterator will have done any reference 1557 * counting on the object for us. 1558 */ 1559 new_n->slots[slot] = ptr; 1560 continue; 1561 } 1562 1563 new_ptr_pp = &new_n->slots[slot]; 1564 cursor = ptr; 1565 goto descend; 1566 } 1567 1568 pr_devel("-- compress node %p --\n", new_n); 1569 1570 /* Count up the number of empty slots in this node and work out the 1571 * subtree leaf count. 1572 */ 1573 new_n->nr_leaves_on_branch = 0; 1574 nr_free = 0; 1575 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1576 ptr = new_n->slots[slot]; 1577 if (!ptr) 1578 nr_free++; 1579 else if (assoc_array_ptr_is_leaf(ptr)) 1580 new_n->nr_leaves_on_branch++; 1581 } 1582 pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch); 1583 1584 /* See what we can fold in */ 1585 next_slot = 0; 1586 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1587 struct assoc_array_shortcut *s; 1588 struct assoc_array_node *child; 1589 1590 ptr = new_n->slots[slot]; 1591 if (!ptr || assoc_array_ptr_is_leaf(ptr)) 1592 continue; 1593 1594 s = NULL; 1595 if (assoc_array_ptr_is_shortcut(ptr)) { 1596 s = assoc_array_ptr_to_shortcut(ptr); 1597 ptr = s->next_node; 1598 } 1599 1600 child = assoc_array_ptr_to_node(ptr); 1601 new_n->nr_leaves_on_branch += child->nr_leaves_on_branch; 1602 1603 if (child->nr_leaves_on_branch <= nr_free + 1) { 1604 /* Fold the child node into this one */ 1605 pr_devel("[%d] fold node %lu/%d [nx %d]\n", 1606 slot, child->nr_leaves_on_branch, nr_free + 1, 1607 next_slot); 1608 1609 /* We would already have reaped an intervening shortcut 1610 * on the way back up the tree. 1611 */ 1612 BUG_ON(s); 1613 1614 new_n->slots[slot] = NULL; 1615 nr_free++; 1616 if (slot < next_slot) 1617 next_slot = slot; 1618 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 1619 struct assoc_array_ptr *p = child->slots[i]; 1620 if (!p) 1621 continue; 1622 BUG_ON(assoc_array_ptr_is_meta(p)); 1623 while (new_n->slots[next_slot]) 1624 next_slot++; 1625 BUG_ON(next_slot >= ASSOC_ARRAY_FAN_OUT); 1626 new_n->slots[next_slot++] = p; 1627 nr_free--; 1628 } 1629 kfree(child); 1630 } else { 1631 pr_devel("[%d] retain node %lu/%d [nx %d]\n", 1632 slot, child->nr_leaves_on_branch, nr_free + 1, 1633 next_slot); 1634 } 1635 } 1636 1637 pr_devel("after: %lu\n", new_n->nr_leaves_on_branch); 1638 1639 nr_leaves_on_tree = new_n->nr_leaves_on_branch; 1640 1641 /* Excise this node if it is singly occupied by a shortcut */ 1642 if (nr_free == ASSOC_ARRAY_FAN_OUT - 1) { 1643 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) 1644 if ((ptr = new_n->slots[slot])) 1645 break; 1646 1647 if (assoc_array_ptr_is_meta(ptr) && 1648 assoc_array_ptr_is_shortcut(ptr)) { 1649 pr_devel("excise node %p with 1 shortcut\n", new_n); 1650 new_s = assoc_array_ptr_to_shortcut(ptr); 1651 new_parent = new_n->back_pointer; 1652 slot = new_n->parent_slot; 1653 kfree(new_n); 1654 if (!new_parent) { 1655 new_s->back_pointer = NULL; 1656 new_s->parent_slot = 0; 1657 new_root = ptr; 1658 goto gc_complete; 1659 } 1660 1661 if (assoc_array_ptr_is_shortcut(new_parent)) { 1662 /* We can discard any preceding shortcut also */ 1663 struct assoc_array_shortcut *s = 1664 assoc_array_ptr_to_shortcut(new_parent); 1665 1666 pr_devel("excise preceding shortcut\n"); 1667 1668 new_parent = new_s->back_pointer = s->back_pointer; 1669 slot = new_s->parent_slot = s->parent_slot; 1670 kfree(s); 1671 if (!new_parent) { 1672 new_s->back_pointer = NULL; 1673 new_s->parent_slot = 0; 1674 new_root = ptr; 1675 goto gc_complete; 1676 } 1677 } 1678 1679 new_s->back_pointer = new_parent; 1680 new_s->parent_slot = slot; 1681 new_n = assoc_array_ptr_to_node(new_parent); 1682 new_n->slots[slot] = ptr; 1683 goto ascend_old_tree; 1684 } 1685 } 1686 1687 /* Excise any shortcuts we might encounter that point to nodes that 1688 * only contain leaves. 1689 */ 1690 ptr = new_n->back_pointer; 1691 if (!ptr) 1692 goto gc_complete; 1693 1694 if (assoc_array_ptr_is_shortcut(ptr)) { 1695 new_s = assoc_array_ptr_to_shortcut(ptr); 1696 new_parent = new_s->back_pointer; 1697 slot = new_s->parent_slot; 1698 1699 if (new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) { 1700 struct assoc_array_node *n; 1701 1702 pr_devel("excise shortcut\n"); 1703 new_n->back_pointer = new_parent; 1704 new_n->parent_slot = slot; 1705 kfree(new_s); 1706 if (!new_parent) { 1707 new_root = assoc_array_node_to_ptr(new_n); 1708 goto gc_complete; 1709 } 1710 1711 n = assoc_array_ptr_to_node(new_parent); 1712 n->slots[slot] = assoc_array_node_to_ptr(new_n); 1713 } 1714 } else { 1715 new_parent = ptr; 1716 } 1717 new_n = assoc_array_ptr_to_node(new_parent); 1718 1719 ascend_old_tree: 1720 ptr = node->back_pointer; 1721 if (assoc_array_ptr_is_shortcut(ptr)) { 1722 shortcut = assoc_array_ptr_to_shortcut(ptr); 1723 slot = shortcut->parent_slot; 1724 cursor = shortcut->back_pointer; 1725 } else { 1726 slot = node->parent_slot; 1727 cursor = ptr; 1728 } 1729 BUG_ON(!ptr); 1730 node = assoc_array_ptr_to_node(cursor); 1731 slot++; 1732 goto continue_node; 1733 1734 gc_complete: 1735 edit->set[0].to = new_root; 1736 assoc_array_apply_edit(edit); 1737 edit->array->nr_leaves_on_tree = nr_leaves_on_tree; 1738 return 0; 1739 1740 enomem: 1741 pr_devel("enomem\n"); 1742 assoc_array_destroy_subtree(new_root, edit->ops); 1743 kfree(edit); 1744 return -ENOMEM; 1745 } 1746