1 /* Generic associative array implementation. 2 * 3 * See Documentation/assoc_array.txt for information. 4 * 5 * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. 6 * Written by David Howells (dhowells@redhat.com) 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public Licence 10 * as published by the Free Software Foundation; either version 11 * 2 of the Licence, or (at your option) any later version. 12 */ 13 //#define DEBUG 14 #include <linux/rcupdate.h> 15 #include <linux/slab.h> 16 #include <linux/err.h> 17 #include <linux/assoc_array_priv.h> 18 19 /* 20 * Iterate over an associative array. The caller must hold the RCU read lock 21 * or better. 22 */ 23 static int assoc_array_subtree_iterate(const struct assoc_array_ptr *root, 24 const struct assoc_array_ptr *stop, 25 int (*iterator)(const void *leaf, 26 void *iterator_data), 27 void *iterator_data) 28 { 29 const struct assoc_array_shortcut *shortcut; 30 const struct assoc_array_node *node; 31 const struct assoc_array_ptr *cursor, *ptr, *parent; 32 unsigned long has_meta; 33 int slot, ret; 34 35 cursor = root; 36 37 begin_node: 38 if (assoc_array_ptr_is_shortcut(cursor)) { 39 /* Descend through a shortcut */ 40 shortcut = assoc_array_ptr_to_shortcut(cursor); 41 smp_read_barrier_depends(); 42 cursor = ACCESS_ONCE(shortcut->next_node); 43 } 44 45 node = assoc_array_ptr_to_node(cursor); 46 smp_read_barrier_depends(); 47 slot = 0; 48 49 /* We perform two passes of each node. 50 * 51 * The first pass does all the leaves in this node. This means we 52 * don't miss any leaves if the node is split up by insertion whilst 53 * we're iterating over the branches rooted here (we may, however, see 54 * some leaves twice). 55 */ 56 has_meta = 0; 57 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 58 ptr = ACCESS_ONCE(node->slots[slot]); 59 has_meta |= (unsigned long)ptr; 60 if (ptr && assoc_array_ptr_is_leaf(ptr)) { 61 /* We need a barrier between the read of the pointer 62 * and dereferencing the pointer - but only if we are 63 * actually going to dereference it. 64 */ 65 smp_read_barrier_depends(); 66 67 /* Invoke the callback */ 68 ret = iterator(assoc_array_ptr_to_leaf(ptr), 69 iterator_data); 70 if (ret) 71 return ret; 72 } 73 } 74 75 /* The second pass attends to all the metadata pointers. If we follow 76 * one of these we may find that we don't come back here, but rather go 77 * back to a replacement node with the leaves in a different layout. 78 * 79 * We are guaranteed to make progress, however, as the slot number for 80 * a particular portion of the key space cannot change - and we 81 * continue at the back pointer + 1. 82 */ 83 if (!(has_meta & ASSOC_ARRAY_PTR_META_TYPE)) 84 goto finished_node; 85 slot = 0; 86 87 continue_node: 88 node = assoc_array_ptr_to_node(cursor); 89 smp_read_barrier_depends(); 90 91 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 92 ptr = ACCESS_ONCE(node->slots[slot]); 93 if (assoc_array_ptr_is_meta(ptr)) { 94 cursor = ptr; 95 goto begin_node; 96 } 97 } 98 99 finished_node: 100 /* Move up to the parent (may need to skip back over a shortcut) */ 101 parent = ACCESS_ONCE(node->back_pointer); 102 slot = node->parent_slot; 103 if (parent == stop) 104 return 0; 105 106 if (assoc_array_ptr_is_shortcut(parent)) { 107 shortcut = assoc_array_ptr_to_shortcut(parent); 108 smp_read_barrier_depends(); 109 cursor = parent; 110 parent = ACCESS_ONCE(shortcut->back_pointer); 111 slot = shortcut->parent_slot; 112 if (parent == stop) 113 return 0; 114 } 115 116 /* Ascend to next slot in parent node */ 117 cursor = parent; 118 slot++; 119 goto continue_node; 120 } 121 122 /** 123 * assoc_array_iterate - Pass all objects in the array to a callback 124 * @array: The array to iterate over. 125 * @iterator: The callback function. 126 * @iterator_data: Private data for the callback function. 127 * 128 * Iterate over all the objects in an associative array. Each one will be 129 * presented to the iterator function. 130 * 131 * If the array is being modified concurrently with the iteration then it is 132 * possible that some objects in the array will be passed to the iterator 133 * callback more than once - though every object should be passed at least 134 * once. If this is undesirable then the caller must lock against modification 135 * for the duration of this function. 136 * 137 * The function will return 0 if no objects were in the array or else it will 138 * return the result of the last iterator function called. Iteration stops 139 * immediately if any call to the iteration function results in a non-zero 140 * return. 141 * 142 * The caller should hold the RCU read lock or better if concurrent 143 * modification is possible. 144 */ 145 int assoc_array_iterate(const struct assoc_array *array, 146 int (*iterator)(const void *object, 147 void *iterator_data), 148 void *iterator_data) 149 { 150 struct assoc_array_ptr *root = ACCESS_ONCE(array->root); 151 152 if (!root) 153 return 0; 154 return assoc_array_subtree_iterate(root, NULL, iterator, iterator_data); 155 } 156 157 enum assoc_array_walk_status { 158 assoc_array_walk_tree_empty, 159 assoc_array_walk_found_terminal_node, 160 assoc_array_walk_found_wrong_shortcut, 161 }; 162 163 struct assoc_array_walk_result { 164 struct { 165 struct assoc_array_node *node; /* Node in which leaf might be found */ 166 int level; 167 int slot; 168 } terminal_node; 169 struct { 170 struct assoc_array_shortcut *shortcut; 171 int level; 172 int sc_level; 173 unsigned long sc_segments; 174 unsigned long dissimilarity; 175 } wrong_shortcut; 176 }; 177 178 /* 179 * Navigate through the internal tree looking for the closest node to the key. 180 */ 181 static enum assoc_array_walk_status 182 assoc_array_walk(const struct assoc_array *array, 183 const struct assoc_array_ops *ops, 184 const void *index_key, 185 struct assoc_array_walk_result *result) 186 { 187 struct assoc_array_shortcut *shortcut; 188 struct assoc_array_node *node; 189 struct assoc_array_ptr *cursor, *ptr; 190 unsigned long sc_segments, dissimilarity; 191 unsigned long segments; 192 int level, sc_level, next_sc_level; 193 int slot; 194 195 pr_devel("-->%s()\n", __func__); 196 197 cursor = ACCESS_ONCE(array->root); 198 if (!cursor) 199 return assoc_array_walk_tree_empty; 200 201 level = 0; 202 203 /* Use segments from the key for the new leaf to navigate through the 204 * internal tree, skipping through nodes and shortcuts that are on 205 * route to the destination. Eventually we'll come to a slot that is 206 * either empty or contains a leaf at which point we've found a node in 207 * which the leaf we're looking for might be found or into which it 208 * should be inserted. 209 */ 210 jumped: 211 segments = ops->get_key_chunk(index_key, level); 212 pr_devel("segments[%d]: %lx\n", level, segments); 213 214 if (assoc_array_ptr_is_shortcut(cursor)) 215 goto follow_shortcut; 216 217 consider_node: 218 node = assoc_array_ptr_to_node(cursor); 219 smp_read_barrier_depends(); 220 221 slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK); 222 slot &= ASSOC_ARRAY_FAN_MASK; 223 ptr = ACCESS_ONCE(node->slots[slot]); 224 225 pr_devel("consider slot %x [ix=%d type=%lu]\n", 226 slot, level, (unsigned long)ptr & 3); 227 228 if (!assoc_array_ptr_is_meta(ptr)) { 229 /* The node doesn't have a node/shortcut pointer in the slot 230 * corresponding to the index key that we have to follow. 231 */ 232 result->terminal_node.node = node; 233 result->terminal_node.level = level; 234 result->terminal_node.slot = slot; 235 pr_devel("<--%s() = terminal_node\n", __func__); 236 return assoc_array_walk_found_terminal_node; 237 } 238 239 if (assoc_array_ptr_is_node(ptr)) { 240 /* There is a pointer to a node in the slot corresponding to 241 * this index key segment, so we need to follow it. 242 */ 243 cursor = ptr; 244 level += ASSOC_ARRAY_LEVEL_STEP; 245 if ((level & ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) 246 goto consider_node; 247 goto jumped; 248 } 249 250 /* There is a shortcut in the slot corresponding to the index key 251 * segment. We follow the shortcut if its partial index key matches 252 * this leaf's. Otherwise we need to split the shortcut. 253 */ 254 cursor = ptr; 255 follow_shortcut: 256 shortcut = assoc_array_ptr_to_shortcut(cursor); 257 smp_read_barrier_depends(); 258 pr_devel("shortcut to %d\n", shortcut->skip_to_level); 259 sc_level = level + ASSOC_ARRAY_LEVEL_STEP; 260 BUG_ON(sc_level > shortcut->skip_to_level); 261 262 do { 263 /* Check the leaf against the shortcut's index key a word at a 264 * time, trimming the final word (the shortcut stores the index 265 * key completely from the root to the shortcut's target). 266 */ 267 if ((sc_level & ASSOC_ARRAY_KEY_CHUNK_MASK) == 0) 268 segments = ops->get_key_chunk(index_key, sc_level); 269 270 sc_segments = shortcut->index_key[sc_level >> ASSOC_ARRAY_KEY_CHUNK_SHIFT]; 271 dissimilarity = segments ^ sc_segments; 272 273 if (round_up(sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE) > shortcut->skip_to_level) { 274 /* Trim segments that are beyond the shortcut */ 275 int shift = shortcut->skip_to_level & ASSOC_ARRAY_KEY_CHUNK_MASK; 276 dissimilarity &= ~(ULONG_MAX << shift); 277 next_sc_level = shortcut->skip_to_level; 278 } else { 279 next_sc_level = sc_level + ASSOC_ARRAY_KEY_CHUNK_SIZE; 280 next_sc_level = round_down(next_sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); 281 } 282 283 if (dissimilarity != 0) { 284 /* This shortcut points elsewhere */ 285 result->wrong_shortcut.shortcut = shortcut; 286 result->wrong_shortcut.level = level; 287 result->wrong_shortcut.sc_level = sc_level; 288 result->wrong_shortcut.sc_segments = sc_segments; 289 result->wrong_shortcut.dissimilarity = dissimilarity; 290 return assoc_array_walk_found_wrong_shortcut; 291 } 292 293 sc_level = next_sc_level; 294 } while (sc_level < shortcut->skip_to_level); 295 296 /* The shortcut matches the leaf's index to this point. */ 297 cursor = ACCESS_ONCE(shortcut->next_node); 298 if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) { 299 level = sc_level; 300 goto jumped; 301 } else { 302 level = sc_level; 303 goto consider_node; 304 } 305 } 306 307 /** 308 * assoc_array_find - Find an object by index key 309 * @array: The associative array to search. 310 * @ops: The operations to use. 311 * @index_key: The key to the object. 312 * 313 * Find an object in an associative array by walking through the internal tree 314 * to the node that should contain the object and then searching the leaves 315 * there. NULL is returned if the requested object was not found in the array. 316 * 317 * The caller must hold the RCU read lock or better. 318 */ 319 void *assoc_array_find(const struct assoc_array *array, 320 const struct assoc_array_ops *ops, 321 const void *index_key) 322 { 323 struct assoc_array_walk_result result; 324 const struct assoc_array_node *node; 325 const struct assoc_array_ptr *ptr; 326 const void *leaf; 327 int slot; 328 329 if (assoc_array_walk(array, ops, index_key, &result) != 330 assoc_array_walk_found_terminal_node) 331 return NULL; 332 333 node = result.terminal_node.node; 334 smp_read_barrier_depends(); 335 336 /* If the target key is available to us, it's has to be pointed to by 337 * the terminal node. 338 */ 339 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 340 ptr = ACCESS_ONCE(node->slots[slot]); 341 if (ptr && assoc_array_ptr_is_leaf(ptr)) { 342 /* We need a barrier between the read of the pointer 343 * and dereferencing the pointer - but only if we are 344 * actually going to dereference it. 345 */ 346 leaf = assoc_array_ptr_to_leaf(ptr); 347 smp_read_barrier_depends(); 348 if (ops->compare_object(leaf, index_key)) 349 return (void *)leaf; 350 } 351 } 352 353 return NULL; 354 } 355 356 /* 357 * Destructively iterate over an associative array. The caller must prevent 358 * other simultaneous accesses. 359 */ 360 static void assoc_array_destroy_subtree(struct assoc_array_ptr *root, 361 const struct assoc_array_ops *ops) 362 { 363 struct assoc_array_shortcut *shortcut; 364 struct assoc_array_node *node; 365 struct assoc_array_ptr *cursor, *parent = NULL; 366 int slot = -1; 367 368 pr_devel("-->%s()\n", __func__); 369 370 cursor = root; 371 if (!cursor) { 372 pr_devel("empty\n"); 373 return; 374 } 375 376 move_to_meta: 377 if (assoc_array_ptr_is_shortcut(cursor)) { 378 /* Descend through a shortcut */ 379 pr_devel("[%d] shortcut\n", slot); 380 BUG_ON(!assoc_array_ptr_is_shortcut(cursor)); 381 shortcut = assoc_array_ptr_to_shortcut(cursor); 382 BUG_ON(shortcut->back_pointer != parent); 383 BUG_ON(slot != -1 && shortcut->parent_slot != slot); 384 parent = cursor; 385 cursor = shortcut->next_node; 386 slot = -1; 387 BUG_ON(!assoc_array_ptr_is_node(cursor)); 388 } 389 390 pr_devel("[%d] node\n", slot); 391 node = assoc_array_ptr_to_node(cursor); 392 BUG_ON(node->back_pointer != parent); 393 BUG_ON(slot != -1 && node->parent_slot != slot); 394 slot = 0; 395 396 continue_node: 397 pr_devel("Node %p [back=%p]\n", node, node->back_pointer); 398 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 399 struct assoc_array_ptr *ptr = node->slots[slot]; 400 if (!ptr) 401 continue; 402 if (assoc_array_ptr_is_meta(ptr)) { 403 parent = cursor; 404 cursor = ptr; 405 goto move_to_meta; 406 } 407 408 if (ops) { 409 pr_devel("[%d] free leaf\n", slot); 410 ops->free_object(assoc_array_ptr_to_leaf(ptr)); 411 } 412 } 413 414 parent = node->back_pointer; 415 slot = node->parent_slot; 416 pr_devel("free node\n"); 417 kfree(node); 418 if (!parent) 419 return; /* Done */ 420 421 /* Move back up to the parent (may need to free a shortcut on 422 * the way up) */ 423 if (assoc_array_ptr_is_shortcut(parent)) { 424 shortcut = assoc_array_ptr_to_shortcut(parent); 425 BUG_ON(shortcut->next_node != cursor); 426 cursor = parent; 427 parent = shortcut->back_pointer; 428 slot = shortcut->parent_slot; 429 pr_devel("free shortcut\n"); 430 kfree(shortcut); 431 if (!parent) 432 return; 433 434 BUG_ON(!assoc_array_ptr_is_node(parent)); 435 } 436 437 /* Ascend to next slot in parent node */ 438 pr_devel("ascend to %p[%d]\n", parent, slot); 439 cursor = parent; 440 node = assoc_array_ptr_to_node(cursor); 441 slot++; 442 goto continue_node; 443 } 444 445 /** 446 * assoc_array_destroy - Destroy an associative array 447 * @array: The array to destroy. 448 * @ops: The operations to use. 449 * 450 * Discard all metadata and free all objects in an associative array. The 451 * array will be empty and ready to use again upon completion. This function 452 * cannot fail. 453 * 454 * The caller must prevent all other accesses whilst this takes place as no 455 * attempt is made to adjust pointers gracefully to permit RCU readlock-holding 456 * accesses to continue. On the other hand, no memory allocation is required. 457 */ 458 void assoc_array_destroy(struct assoc_array *array, 459 const struct assoc_array_ops *ops) 460 { 461 assoc_array_destroy_subtree(array->root, ops); 462 array->root = NULL; 463 } 464 465 /* 466 * Handle insertion into an empty tree. 467 */ 468 static bool assoc_array_insert_in_empty_tree(struct assoc_array_edit *edit) 469 { 470 struct assoc_array_node *new_n0; 471 472 pr_devel("-->%s()\n", __func__); 473 474 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 475 if (!new_n0) 476 return false; 477 478 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 479 edit->leaf_p = &new_n0->slots[0]; 480 edit->adjust_count_on = new_n0; 481 edit->set[0].ptr = &edit->array->root; 482 edit->set[0].to = assoc_array_node_to_ptr(new_n0); 483 484 pr_devel("<--%s() = ok [no root]\n", __func__); 485 return true; 486 } 487 488 /* 489 * Handle insertion into a terminal node. 490 */ 491 static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit, 492 const struct assoc_array_ops *ops, 493 const void *index_key, 494 struct assoc_array_walk_result *result) 495 { 496 struct assoc_array_shortcut *shortcut, *new_s0; 497 struct assoc_array_node *node, *new_n0, *new_n1, *side; 498 struct assoc_array_ptr *ptr; 499 unsigned long dissimilarity, base_seg, blank; 500 size_t keylen; 501 bool have_meta; 502 int level, diff; 503 int slot, next_slot, free_slot, i, j; 504 505 node = result->terminal_node.node; 506 level = result->terminal_node.level; 507 edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = result->terminal_node.slot; 508 509 pr_devel("-->%s()\n", __func__); 510 511 /* We arrived at a node which doesn't have an onward node or shortcut 512 * pointer that we have to follow. This means that (a) the leaf we 513 * want must go here (either by insertion or replacement) or (b) we 514 * need to split this node and insert in one of the fragments. 515 */ 516 free_slot = -1; 517 518 /* Firstly, we have to check the leaves in this node to see if there's 519 * a matching one we should replace in place. 520 */ 521 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 522 ptr = node->slots[i]; 523 if (!ptr) { 524 free_slot = i; 525 continue; 526 } 527 if (ops->compare_object(assoc_array_ptr_to_leaf(ptr), index_key)) { 528 pr_devel("replace in slot %d\n", i); 529 edit->leaf_p = &node->slots[i]; 530 edit->dead_leaf = node->slots[i]; 531 pr_devel("<--%s() = ok [replace]\n", __func__); 532 return true; 533 } 534 } 535 536 /* If there is a free slot in this node then we can just insert the 537 * leaf here. 538 */ 539 if (free_slot >= 0) { 540 pr_devel("insert in free slot %d\n", free_slot); 541 edit->leaf_p = &node->slots[free_slot]; 542 edit->adjust_count_on = node; 543 pr_devel("<--%s() = ok [insert]\n", __func__); 544 return true; 545 } 546 547 /* The node has no spare slots - so we're either going to have to split 548 * it or insert another node before it. 549 * 550 * Whatever, we're going to need at least two new nodes - so allocate 551 * those now. We may also need a new shortcut, but we deal with that 552 * when we need it. 553 */ 554 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 555 if (!new_n0) 556 return false; 557 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 558 new_n1 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 559 if (!new_n1) 560 return false; 561 edit->new_meta[1] = assoc_array_node_to_ptr(new_n1); 562 563 /* We need to find out how similar the leaves are. */ 564 pr_devel("no spare slots\n"); 565 have_meta = false; 566 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 567 ptr = node->slots[i]; 568 if (assoc_array_ptr_is_meta(ptr)) { 569 edit->segment_cache[i] = 0xff; 570 have_meta = true; 571 continue; 572 } 573 base_seg = ops->get_object_key_chunk( 574 assoc_array_ptr_to_leaf(ptr), level); 575 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; 576 edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK; 577 } 578 579 if (have_meta) { 580 pr_devel("have meta\n"); 581 goto split_node; 582 } 583 584 /* The node contains only leaves */ 585 dissimilarity = 0; 586 base_seg = edit->segment_cache[0]; 587 for (i = 1; i < ASSOC_ARRAY_FAN_OUT; i++) 588 dissimilarity |= edit->segment_cache[i] ^ base_seg; 589 590 pr_devel("only leaves; dissimilarity=%lx\n", dissimilarity); 591 592 if ((dissimilarity & ASSOC_ARRAY_FAN_MASK) == 0) { 593 /* The old leaves all cluster in the same slot. We will need 594 * to insert a shortcut if the new node wants to cluster with them. 595 */ 596 if ((edit->segment_cache[ASSOC_ARRAY_FAN_OUT] ^ base_seg) == 0) 597 goto all_leaves_cluster_together; 598 599 /* Otherwise we can just insert a new node ahead of the old 600 * one. 601 */ 602 goto present_leaves_cluster_but_not_new_leaf; 603 } 604 605 split_node: 606 pr_devel("split node\n"); 607 608 /* We need to split the current node; we know that the node doesn't 609 * simply contain a full set of leaves that cluster together (it 610 * contains meta pointers and/or non-clustering leaves). 611 * 612 * We need to expel at least two leaves out of a set consisting of the 613 * leaves in the node and the new leaf. 614 * 615 * We need a new node (n0) to replace the current one and a new node to 616 * take the expelled nodes (n1). 617 */ 618 edit->set[0].to = assoc_array_node_to_ptr(new_n0); 619 new_n0->back_pointer = node->back_pointer; 620 new_n0->parent_slot = node->parent_slot; 621 new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); 622 new_n1->parent_slot = -1; /* Need to calculate this */ 623 624 do_split_node: 625 pr_devel("do_split_node\n"); 626 627 new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; 628 new_n1->nr_leaves_on_branch = 0; 629 630 /* Begin by finding two matching leaves. There have to be at least two 631 * that match - even if there are meta pointers - because any leaf that 632 * would match a slot with a meta pointer in it must be somewhere 633 * behind that meta pointer and cannot be here. Further, given N 634 * remaining leaf slots, we now have N+1 leaves to go in them. 635 */ 636 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 637 slot = edit->segment_cache[i]; 638 if (slot != 0xff) 639 for (j = i + 1; j < ASSOC_ARRAY_FAN_OUT + 1; j++) 640 if (edit->segment_cache[j] == slot) 641 goto found_slot_for_multiple_occupancy; 642 } 643 found_slot_for_multiple_occupancy: 644 pr_devel("same slot: %x %x [%02x]\n", i, j, slot); 645 BUG_ON(i >= ASSOC_ARRAY_FAN_OUT); 646 BUG_ON(j >= ASSOC_ARRAY_FAN_OUT + 1); 647 BUG_ON(slot >= ASSOC_ARRAY_FAN_OUT); 648 649 new_n1->parent_slot = slot; 650 651 /* Metadata pointers cannot change slot */ 652 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) 653 if (assoc_array_ptr_is_meta(node->slots[i])) 654 new_n0->slots[i] = node->slots[i]; 655 else 656 new_n0->slots[i] = NULL; 657 BUG_ON(new_n0->slots[slot] != NULL); 658 new_n0->slots[slot] = assoc_array_node_to_ptr(new_n1); 659 660 /* Filter the leaf pointers between the new nodes */ 661 free_slot = -1; 662 next_slot = 0; 663 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 664 if (assoc_array_ptr_is_meta(node->slots[i])) 665 continue; 666 if (edit->segment_cache[i] == slot) { 667 new_n1->slots[next_slot++] = node->slots[i]; 668 new_n1->nr_leaves_on_branch++; 669 } else { 670 do { 671 free_slot++; 672 } while (new_n0->slots[free_slot] != NULL); 673 new_n0->slots[free_slot] = node->slots[i]; 674 } 675 } 676 677 pr_devel("filtered: f=%x n=%x\n", free_slot, next_slot); 678 679 if (edit->segment_cache[ASSOC_ARRAY_FAN_OUT] != slot) { 680 do { 681 free_slot++; 682 } while (new_n0->slots[free_slot] != NULL); 683 edit->leaf_p = &new_n0->slots[free_slot]; 684 edit->adjust_count_on = new_n0; 685 } else { 686 edit->leaf_p = &new_n1->slots[next_slot++]; 687 edit->adjust_count_on = new_n1; 688 } 689 690 BUG_ON(next_slot <= 1); 691 692 edit->set_backpointers_to = assoc_array_node_to_ptr(new_n0); 693 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 694 if (edit->segment_cache[i] == 0xff) { 695 ptr = node->slots[i]; 696 BUG_ON(assoc_array_ptr_is_leaf(ptr)); 697 if (assoc_array_ptr_is_node(ptr)) { 698 side = assoc_array_ptr_to_node(ptr); 699 edit->set_backpointers[i] = &side->back_pointer; 700 } else { 701 shortcut = assoc_array_ptr_to_shortcut(ptr); 702 edit->set_backpointers[i] = &shortcut->back_pointer; 703 } 704 } 705 } 706 707 ptr = node->back_pointer; 708 if (!ptr) 709 edit->set[0].ptr = &edit->array->root; 710 else if (assoc_array_ptr_is_node(ptr)) 711 edit->set[0].ptr = &assoc_array_ptr_to_node(ptr)->slots[node->parent_slot]; 712 else 713 edit->set[0].ptr = &assoc_array_ptr_to_shortcut(ptr)->next_node; 714 edit->excised_meta[0] = assoc_array_node_to_ptr(node); 715 pr_devel("<--%s() = ok [split node]\n", __func__); 716 return true; 717 718 present_leaves_cluster_but_not_new_leaf: 719 /* All the old leaves cluster in the same slot, but the new leaf wants 720 * to go into a different slot, so we create a new node to hold the new 721 * leaf and a pointer to a new node holding all the old leaves. 722 */ 723 pr_devel("present leaves cluster but not new leaf\n"); 724 725 new_n0->back_pointer = node->back_pointer; 726 new_n0->parent_slot = node->parent_slot; 727 new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; 728 new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); 729 new_n1->parent_slot = edit->segment_cache[0]; 730 new_n1->nr_leaves_on_branch = node->nr_leaves_on_branch; 731 edit->adjust_count_on = new_n0; 732 733 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) 734 new_n1->slots[i] = node->slots[i]; 735 736 new_n0->slots[edit->segment_cache[0]] = assoc_array_node_to_ptr(new_n0); 737 edit->leaf_p = &new_n0->slots[edit->segment_cache[ASSOC_ARRAY_FAN_OUT]]; 738 739 edit->set[0].ptr = &assoc_array_ptr_to_node(node->back_pointer)->slots[node->parent_slot]; 740 edit->set[0].to = assoc_array_node_to_ptr(new_n0); 741 edit->excised_meta[0] = assoc_array_node_to_ptr(node); 742 pr_devel("<--%s() = ok [insert node before]\n", __func__); 743 return true; 744 745 all_leaves_cluster_together: 746 /* All the leaves, new and old, want to cluster together in this node 747 * in the same slot, so we have to replace this node with a shortcut to 748 * skip over the identical parts of the key and then place a pair of 749 * nodes, one inside the other, at the end of the shortcut and 750 * distribute the keys between them. 751 * 752 * Firstly we need to work out where the leaves start diverging as a 753 * bit position into their keys so that we know how big the shortcut 754 * needs to be. 755 * 756 * We only need to make a single pass of N of the N+1 leaves because if 757 * any keys differ between themselves at bit X then at least one of 758 * them must also differ with the base key at bit X or before. 759 */ 760 pr_devel("all leaves cluster together\n"); 761 diff = INT_MAX; 762 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 763 int x = ops->diff_objects(assoc_array_ptr_to_leaf(node->slots[i]), 764 index_key); 765 if (x < diff) { 766 BUG_ON(x < 0); 767 diff = x; 768 } 769 } 770 BUG_ON(diff == INT_MAX); 771 BUG_ON(diff < level + ASSOC_ARRAY_LEVEL_STEP); 772 773 keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); 774 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 775 776 new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) + 777 keylen * sizeof(unsigned long), GFP_KERNEL); 778 if (!new_s0) 779 return false; 780 edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s0); 781 782 edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0); 783 new_s0->back_pointer = node->back_pointer; 784 new_s0->parent_slot = node->parent_slot; 785 new_s0->next_node = assoc_array_node_to_ptr(new_n0); 786 new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0); 787 new_n0->parent_slot = 0; 788 new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); 789 new_n1->parent_slot = -1; /* Need to calculate this */ 790 791 new_s0->skip_to_level = level = diff & ~ASSOC_ARRAY_LEVEL_STEP_MASK; 792 pr_devel("skip_to_level = %d [diff %d]\n", level, diff); 793 BUG_ON(level <= 0); 794 795 for (i = 0; i < keylen; i++) 796 new_s0->index_key[i] = 797 ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE); 798 799 blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK); 800 pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank); 801 new_s0->index_key[keylen - 1] &= ~blank; 802 803 /* This now reduces to a node splitting exercise for which we'll need 804 * to regenerate the disparity table. 805 */ 806 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 807 ptr = node->slots[i]; 808 base_seg = ops->get_object_key_chunk(assoc_array_ptr_to_leaf(ptr), 809 level); 810 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; 811 edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK; 812 } 813 814 base_seg = ops->get_key_chunk(index_key, level); 815 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; 816 edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = base_seg & ASSOC_ARRAY_FAN_MASK; 817 goto do_split_node; 818 } 819 820 /* 821 * Handle insertion into the middle of a shortcut. 822 */ 823 static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit, 824 const struct assoc_array_ops *ops, 825 struct assoc_array_walk_result *result) 826 { 827 struct assoc_array_shortcut *shortcut, *new_s0, *new_s1; 828 struct assoc_array_node *node, *new_n0, *side; 829 unsigned long sc_segments, dissimilarity, blank; 830 size_t keylen; 831 int level, sc_level, diff; 832 int sc_slot; 833 834 shortcut = result->wrong_shortcut.shortcut; 835 level = result->wrong_shortcut.level; 836 sc_level = result->wrong_shortcut.sc_level; 837 sc_segments = result->wrong_shortcut.sc_segments; 838 dissimilarity = result->wrong_shortcut.dissimilarity; 839 840 pr_devel("-->%s(ix=%d dis=%lx scix=%d)\n", 841 __func__, level, dissimilarity, sc_level); 842 843 /* We need to split a shortcut and insert a node between the two 844 * pieces. Zero-length pieces will be dispensed with entirely. 845 * 846 * First of all, we need to find out in which level the first 847 * difference was. 848 */ 849 diff = __ffs(dissimilarity); 850 diff &= ~ASSOC_ARRAY_LEVEL_STEP_MASK; 851 diff += sc_level & ~ASSOC_ARRAY_KEY_CHUNK_MASK; 852 pr_devel("diff=%d\n", diff); 853 854 if (!shortcut->back_pointer) { 855 edit->set[0].ptr = &edit->array->root; 856 } else if (assoc_array_ptr_is_node(shortcut->back_pointer)) { 857 node = assoc_array_ptr_to_node(shortcut->back_pointer); 858 edit->set[0].ptr = &node->slots[shortcut->parent_slot]; 859 } else { 860 BUG(); 861 } 862 863 edit->excised_meta[0] = assoc_array_shortcut_to_ptr(shortcut); 864 865 /* Create a new node now since we're going to need it anyway */ 866 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 867 if (!new_n0) 868 return false; 869 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 870 edit->adjust_count_on = new_n0; 871 872 /* Insert a new shortcut before the new node if this segment isn't of 873 * zero length - otherwise we just connect the new node directly to the 874 * parent. 875 */ 876 level += ASSOC_ARRAY_LEVEL_STEP; 877 if (diff > level) { 878 pr_devel("pre-shortcut %d...%d\n", level, diff); 879 keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); 880 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 881 882 new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) + 883 keylen * sizeof(unsigned long), GFP_KERNEL); 884 if (!new_s0) 885 return false; 886 edit->new_meta[1] = assoc_array_shortcut_to_ptr(new_s0); 887 edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0); 888 new_s0->back_pointer = shortcut->back_pointer; 889 new_s0->parent_slot = shortcut->parent_slot; 890 new_s0->next_node = assoc_array_node_to_ptr(new_n0); 891 new_s0->skip_to_level = diff; 892 893 new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0); 894 new_n0->parent_slot = 0; 895 896 memcpy(new_s0->index_key, shortcut->index_key, 897 keylen * sizeof(unsigned long)); 898 899 blank = ULONG_MAX << (diff & ASSOC_ARRAY_KEY_CHUNK_MASK); 900 pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, diff, blank); 901 new_s0->index_key[keylen - 1] &= ~blank; 902 } else { 903 pr_devel("no pre-shortcut\n"); 904 edit->set[0].to = assoc_array_node_to_ptr(new_n0); 905 new_n0->back_pointer = shortcut->back_pointer; 906 new_n0->parent_slot = shortcut->parent_slot; 907 } 908 909 side = assoc_array_ptr_to_node(shortcut->next_node); 910 new_n0->nr_leaves_on_branch = side->nr_leaves_on_branch; 911 912 /* We need to know which slot in the new node is going to take a 913 * metadata pointer. 914 */ 915 sc_slot = sc_segments >> (diff & ASSOC_ARRAY_KEY_CHUNK_MASK); 916 sc_slot &= ASSOC_ARRAY_FAN_MASK; 917 918 pr_devel("new slot %lx >> %d -> %d\n", 919 sc_segments, diff & ASSOC_ARRAY_KEY_CHUNK_MASK, sc_slot); 920 921 /* Determine whether we need to follow the new node with a replacement 922 * for the current shortcut. We could in theory reuse the current 923 * shortcut if its parent slot number doesn't change - but that's a 924 * 1-in-16 chance so not worth expending the code upon. 925 */ 926 level = diff + ASSOC_ARRAY_LEVEL_STEP; 927 if (level < shortcut->skip_to_level) { 928 pr_devel("post-shortcut %d...%d\n", level, shortcut->skip_to_level); 929 keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); 930 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 931 932 new_s1 = kzalloc(sizeof(struct assoc_array_shortcut) + 933 keylen * sizeof(unsigned long), GFP_KERNEL); 934 if (!new_s1) 935 return false; 936 edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s1); 937 938 new_s1->back_pointer = assoc_array_node_to_ptr(new_n0); 939 new_s1->parent_slot = sc_slot; 940 new_s1->next_node = shortcut->next_node; 941 new_s1->skip_to_level = shortcut->skip_to_level; 942 943 new_n0->slots[sc_slot] = assoc_array_shortcut_to_ptr(new_s1); 944 945 memcpy(new_s1->index_key, shortcut->index_key, 946 keylen * sizeof(unsigned long)); 947 948 edit->set[1].ptr = &side->back_pointer; 949 edit->set[1].to = assoc_array_shortcut_to_ptr(new_s1); 950 } else { 951 pr_devel("no post-shortcut\n"); 952 953 /* We don't have to replace the pointed-to node as long as we 954 * use memory barriers to make sure the parent slot number is 955 * changed before the back pointer (the parent slot number is 956 * irrelevant to the old parent shortcut). 957 */ 958 new_n0->slots[sc_slot] = shortcut->next_node; 959 edit->set_parent_slot[0].p = &side->parent_slot; 960 edit->set_parent_slot[0].to = sc_slot; 961 edit->set[1].ptr = &side->back_pointer; 962 edit->set[1].to = assoc_array_node_to_ptr(new_n0); 963 } 964 965 /* Install the new leaf in a spare slot in the new node. */ 966 if (sc_slot == 0) 967 edit->leaf_p = &new_n0->slots[1]; 968 else 969 edit->leaf_p = &new_n0->slots[0]; 970 971 pr_devel("<--%s() = ok [split shortcut]\n", __func__); 972 return edit; 973 } 974 975 /** 976 * assoc_array_insert - Script insertion of an object into an associative array 977 * @array: The array to insert into. 978 * @ops: The operations to use. 979 * @index_key: The key to insert at. 980 * @object: The object to insert. 981 * 982 * Precalculate and preallocate a script for the insertion or replacement of an 983 * object in an associative array. This results in an edit script that can 984 * either be applied or cancelled. 985 * 986 * The function returns a pointer to an edit script or -ENOMEM. 987 * 988 * The caller should lock against other modifications and must continue to hold 989 * the lock until assoc_array_apply_edit() has been called. 990 * 991 * Accesses to the tree may take place concurrently with this function, 992 * provided they hold the RCU read lock. 993 */ 994 struct assoc_array_edit *assoc_array_insert(struct assoc_array *array, 995 const struct assoc_array_ops *ops, 996 const void *index_key, 997 void *object) 998 { 999 struct assoc_array_walk_result result; 1000 struct assoc_array_edit *edit; 1001 1002 pr_devel("-->%s()\n", __func__); 1003 1004 /* The leaf pointer we're given must not have the bottom bit set as we 1005 * use those for type-marking the pointer. NULL pointers are also not 1006 * allowed as they indicate an empty slot but we have to allow them 1007 * here as they can be updated later. 1008 */ 1009 BUG_ON(assoc_array_ptr_is_meta(object)); 1010 1011 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); 1012 if (!edit) 1013 return ERR_PTR(-ENOMEM); 1014 edit->array = array; 1015 edit->ops = ops; 1016 edit->leaf = assoc_array_leaf_to_ptr(object); 1017 edit->adjust_count_by = 1; 1018 1019 switch (assoc_array_walk(array, ops, index_key, &result)) { 1020 case assoc_array_walk_tree_empty: 1021 /* Allocate a root node if there isn't one yet */ 1022 if (!assoc_array_insert_in_empty_tree(edit)) 1023 goto enomem; 1024 return edit; 1025 1026 case assoc_array_walk_found_terminal_node: 1027 /* We found a node that doesn't have a node/shortcut pointer in 1028 * the slot corresponding to the index key that we have to 1029 * follow. 1030 */ 1031 if (!assoc_array_insert_into_terminal_node(edit, ops, index_key, 1032 &result)) 1033 goto enomem; 1034 return edit; 1035 1036 case assoc_array_walk_found_wrong_shortcut: 1037 /* We found a shortcut that didn't match our key in a slot we 1038 * needed to follow. 1039 */ 1040 if (!assoc_array_insert_mid_shortcut(edit, ops, &result)) 1041 goto enomem; 1042 return edit; 1043 } 1044 1045 enomem: 1046 /* Clean up after an out of memory error */ 1047 pr_devel("enomem\n"); 1048 assoc_array_cancel_edit(edit); 1049 return ERR_PTR(-ENOMEM); 1050 } 1051 1052 /** 1053 * assoc_array_insert_set_object - Set the new object pointer in an edit script 1054 * @edit: The edit script to modify. 1055 * @object: The object pointer to set. 1056 * 1057 * Change the object to be inserted in an edit script. The object pointed to 1058 * by the old object is not freed. This must be done prior to applying the 1059 * script. 1060 */ 1061 void assoc_array_insert_set_object(struct assoc_array_edit *edit, void *object) 1062 { 1063 BUG_ON(!object); 1064 edit->leaf = assoc_array_leaf_to_ptr(object); 1065 } 1066 1067 struct assoc_array_delete_collapse_context { 1068 struct assoc_array_node *node; 1069 const void *skip_leaf; 1070 int slot; 1071 }; 1072 1073 /* 1074 * Subtree collapse to node iterator. 1075 */ 1076 static int assoc_array_delete_collapse_iterator(const void *leaf, 1077 void *iterator_data) 1078 { 1079 struct assoc_array_delete_collapse_context *collapse = iterator_data; 1080 1081 if (leaf == collapse->skip_leaf) 1082 return 0; 1083 1084 BUG_ON(collapse->slot >= ASSOC_ARRAY_FAN_OUT); 1085 1086 collapse->node->slots[collapse->slot++] = assoc_array_leaf_to_ptr(leaf); 1087 return 0; 1088 } 1089 1090 /** 1091 * assoc_array_delete - Script deletion of an object from an associative array 1092 * @array: The array to search. 1093 * @ops: The operations to use. 1094 * @index_key: The key to the object. 1095 * 1096 * Precalculate and preallocate a script for the deletion of an object from an 1097 * associative array. This results in an edit script that can either be 1098 * applied or cancelled. 1099 * 1100 * The function returns a pointer to an edit script if the object was found, 1101 * NULL if the object was not found or -ENOMEM. 1102 * 1103 * The caller should lock against other modifications and must continue to hold 1104 * the lock until assoc_array_apply_edit() has been called. 1105 * 1106 * Accesses to the tree may take place concurrently with this function, 1107 * provided they hold the RCU read lock. 1108 */ 1109 struct assoc_array_edit *assoc_array_delete(struct assoc_array *array, 1110 const struct assoc_array_ops *ops, 1111 const void *index_key) 1112 { 1113 struct assoc_array_delete_collapse_context collapse; 1114 struct assoc_array_walk_result result; 1115 struct assoc_array_node *node, *new_n0; 1116 struct assoc_array_edit *edit; 1117 struct assoc_array_ptr *ptr; 1118 bool has_meta; 1119 int slot, i; 1120 1121 pr_devel("-->%s()\n", __func__); 1122 1123 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); 1124 if (!edit) 1125 return ERR_PTR(-ENOMEM); 1126 edit->array = array; 1127 edit->ops = ops; 1128 edit->adjust_count_by = -1; 1129 1130 switch (assoc_array_walk(array, ops, index_key, &result)) { 1131 case assoc_array_walk_found_terminal_node: 1132 /* We found a node that should contain the leaf we've been 1133 * asked to remove - *if* it's in the tree. 1134 */ 1135 pr_devel("terminal_node\n"); 1136 node = result.terminal_node.node; 1137 1138 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1139 ptr = node->slots[slot]; 1140 if (ptr && 1141 assoc_array_ptr_is_leaf(ptr) && 1142 ops->compare_object(assoc_array_ptr_to_leaf(ptr), 1143 index_key)) 1144 goto found_leaf; 1145 } 1146 case assoc_array_walk_tree_empty: 1147 case assoc_array_walk_found_wrong_shortcut: 1148 default: 1149 assoc_array_cancel_edit(edit); 1150 pr_devel("not found\n"); 1151 return NULL; 1152 } 1153 1154 found_leaf: 1155 BUG_ON(array->nr_leaves_on_tree <= 0); 1156 1157 /* In the simplest form of deletion we just clear the slot and release 1158 * the leaf after a suitable interval. 1159 */ 1160 edit->dead_leaf = node->slots[slot]; 1161 edit->set[0].ptr = &node->slots[slot]; 1162 edit->set[0].to = NULL; 1163 edit->adjust_count_on = node; 1164 1165 /* If that concludes erasure of the last leaf, then delete the entire 1166 * internal array. 1167 */ 1168 if (array->nr_leaves_on_tree == 1) { 1169 edit->set[1].ptr = &array->root; 1170 edit->set[1].to = NULL; 1171 edit->adjust_count_on = NULL; 1172 edit->excised_subtree = array->root; 1173 pr_devel("all gone\n"); 1174 return edit; 1175 } 1176 1177 /* However, we'd also like to clear up some metadata blocks if we 1178 * possibly can. 1179 * 1180 * We go for a simple algorithm of: if this node has FAN_OUT or fewer 1181 * leaves in it, then attempt to collapse it - and attempt to 1182 * recursively collapse up the tree. 1183 * 1184 * We could also try and collapse in partially filled subtrees to take 1185 * up space in this node. 1186 */ 1187 if (node->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) { 1188 struct assoc_array_node *parent, *grandparent; 1189 struct assoc_array_ptr *ptr; 1190 1191 /* First of all, we need to know if this node has metadata so 1192 * that we don't try collapsing if all the leaves are already 1193 * here. 1194 */ 1195 has_meta = false; 1196 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 1197 ptr = node->slots[i]; 1198 if (assoc_array_ptr_is_meta(ptr)) { 1199 has_meta = true; 1200 break; 1201 } 1202 } 1203 1204 pr_devel("leaves: %ld [m=%d]\n", 1205 node->nr_leaves_on_branch - 1, has_meta); 1206 1207 /* Look further up the tree to see if we can collapse this node 1208 * into a more proximal node too. 1209 */ 1210 parent = node; 1211 collapse_up: 1212 pr_devel("collapse subtree: %ld\n", parent->nr_leaves_on_branch); 1213 1214 ptr = parent->back_pointer; 1215 if (!ptr) 1216 goto do_collapse; 1217 if (assoc_array_ptr_is_shortcut(ptr)) { 1218 struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(ptr); 1219 ptr = s->back_pointer; 1220 if (!ptr) 1221 goto do_collapse; 1222 } 1223 1224 grandparent = assoc_array_ptr_to_node(ptr); 1225 if (grandparent->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) { 1226 parent = grandparent; 1227 goto collapse_up; 1228 } 1229 1230 do_collapse: 1231 /* There's no point collapsing if the original node has no meta 1232 * pointers to discard and if we didn't merge into one of that 1233 * node's ancestry. 1234 */ 1235 if (has_meta || parent != node) { 1236 node = parent; 1237 1238 /* Create a new node to collapse into */ 1239 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 1240 if (!new_n0) 1241 goto enomem; 1242 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 1243 1244 new_n0->back_pointer = node->back_pointer; 1245 new_n0->parent_slot = node->parent_slot; 1246 new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; 1247 edit->adjust_count_on = new_n0; 1248 1249 collapse.node = new_n0; 1250 collapse.skip_leaf = assoc_array_ptr_to_leaf(edit->dead_leaf); 1251 collapse.slot = 0; 1252 assoc_array_subtree_iterate(assoc_array_node_to_ptr(node), 1253 node->back_pointer, 1254 assoc_array_delete_collapse_iterator, 1255 &collapse); 1256 pr_devel("collapsed %d,%lu\n", collapse.slot, new_n0->nr_leaves_on_branch); 1257 BUG_ON(collapse.slot != new_n0->nr_leaves_on_branch - 1); 1258 1259 if (!node->back_pointer) { 1260 edit->set[1].ptr = &array->root; 1261 } else if (assoc_array_ptr_is_leaf(node->back_pointer)) { 1262 BUG(); 1263 } else if (assoc_array_ptr_is_node(node->back_pointer)) { 1264 struct assoc_array_node *p = 1265 assoc_array_ptr_to_node(node->back_pointer); 1266 edit->set[1].ptr = &p->slots[node->parent_slot]; 1267 } else if (assoc_array_ptr_is_shortcut(node->back_pointer)) { 1268 struct assoc_array_shortcut *s = 1269 assoc_array_ptr_to_shortcut(node->back_pointer); 1270 edit->set[1].ptr = &s->next_node; 1271 } 1272 edit->set[1].to = assoc_array_node_to_ptr(new_n0); 1273 edit->excised_subtree = assoc_array_node_to_ptr(node); 1274 } 1275 } 1276 1277 return edit; 1278 1279 enomem: 1280 /* Clean up after an out of memory error */ 1281 pr_devel("enomem\n"); 1282 assoc_array_cancel_edit(edit); 1283 return ERR_PTR(-ENOMEM); 1284 } 1285 1286 /** 1287 * assoc_array_clear - Script deletion of all objects from an associative array 1288 * @array: The array to clear. 1289 * @ops: The operations to use. 1290 * 1291 * Precalculate and preallocate a script for the deletion of all the objects 1292 * from an associative array. This results in an edit script that can either 1293 * be applied or cancelled. 1294 * 1295 * The function returns a pointer to an edit script if there are objects to be 1296 * deleted, NULL if there are no objects in the array or -ENOMEM. 1297 * 1298 * The caller should lock against other modifications and must continue to hold 1299 * the lock until assoc_array_apply_edit() has been called. 1300 * 1301 * Accesses to the tree may take place concurrently with this function, 1302 * provided they hold the RCU read lock. 1303 */ 1304 struct assoc_array_edit *assoc_array_clear(struct assoc_array *array, 1305 const struct assoc_array_ops *ops) 1306 { 1307 struct assoc_array_edit *edit; 1308 1309 pr_devel("-->%s()\n", __func__); 1310 1311 if (!array->root) 1312 return NULL; 1313 1314 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); 1315 if (!edit) 1316 return ERR_PTR(-ENOMEM); 1317 edit->array = array; 1318 edit->ops = ops; 1319 edit->set[1].ptr = &array->root; 1320 edit->set[1].to = NULL; 1321 edit->excised_subtree = array->root; 1322 edit->ops_for_excised_subtree = ops; 1323 pr_devel("all gone\n"); 1324 return edit; 1325 } 1326 1327 /* 1328 * Handle the deferred destruction after an applied edit. 1329 */ 1330 static void assoc_array_rcu_cleanup(struct rcu_head *head) 1331 { 1332 struct assoc_array_edit *edit = 1333 container_of(head, struct assoc_array_edit, rcu); 1334 int i; 1335 1336 pr_devel("-->%s()\n", __func__); 1337 1338 if (edit->dead_leaf) 1339 edit->ops->free_object(assoc_array_ptr_to_leaf(edit->dead_leaf)); 1340 for (i = 0; i < ARRAY_SIZE(edit->excised_meta); i++) 1341 if (edit->excised_meta[i]) 1342 kfree(assoc_array_ptr_to_node(edit->excised_meta[i])); 1343 1344 if (edit->excised_subtree) { 1345 BUG_ON(assoc_array_ptr_is_leaf(edit->excised_subtree)); 1346 if (assoc_array_ptr_is_node(edit->excised_subtree)) { 1347 struct assoc_array_node *n = 1348 assoc_array_ptr_to_node(edit->excised_subtree); 1349 n->back_pointer = NULL; 1350 } else { 1351 struct assoc_array_shortcut *s = 1352 assoc_array_ptr_to_shortcut(edit->excised_subtree); 1353 s->back_pointer = NULL; 1354 } 1355 assoc_array_destroy_subtree(edit->excised_subtree, 1356 edit->ops_for_excised_subtree); 1357 } 1358 1359 kfree(edit); 1360 } 1361 1362 /** 1363 * assoc_array_apply_edit - Apply an edit script to an associative array 1364 * @edit: The script to apply. 1365 * 1366 * Apply an edit script to an associative array to effect an insertion, 1367 * deletion or clearance. As the edit script includes preallocated memory, 1368 * this is guaranteed not to fail. 1369 * 1370 * The edit script, dead objects and dead metadata will be scheduled for 1371 * destruction after an RCU grace period to permit those doing read-only 1372 * accesses on the array to continue to do so under the RCU read lock whilst 1373 * the edit is taking place. 1374 */ 1375 void assoc_array_apply_edit(struct assoc_array_edit *edit) 1376 { 1377 struct assoc_array_shortcut *shortcut; 1378 struct assoc_array_node *node; 1379 struct assoc_array_ptr *ptr; 1380 int i; 1381 1382 pr_devel("-->%s()\n", __func__); 1383 1384 smp_wmb(); 1385 if (edit->leaf_p) 1386 *edit->leaf_p = edit->leaf; 1387 1388 smp_wmb(); 1389 for (i = 0; i < ARRAY_SIZE(edit->set_parent_slot); i++) 1390 if (edit->set_parent_slot[i].p) 1391 *edit->set_parent_slot[i].p = edit->set_parent_slot[i].to; 1392 1393 smp_wmb(); 1394 for (i = 0; i < ARRAY_SIZE(edit->set_backpointers); i++) 1395 if (edit->set_backpointers[i]) 1396 *edit->set_backpointers[i] = edit->set_backpointers_to; 1397 1398 smp_wmb(); 1399 for (i = 0; i < ARRAY_SIZE(edit->set); i++) 1400 if (edit->set[i].ptr) 1401 *edit->set[i].ptr = edit->set[i].to; 1402 1403 if (edit->array->root == NULL) { 1404 edit->array->nr_leaves_on_tree = 0; 1405 } else if (edit->adjust_count_on) { 1406 node = edit->adjust_count_on; 1407 for (;;) { 1408 node->nr_leaves_on_branch += edit->adjust_count_by; 1409 1410 ptr = node->back_pointer; 1411 if (!ptr) 1412 break; 1413 if (assoc_array_ptr_is_shortcut(ptr)) { 1414 shortcut = assoc_array_ptr_to_shortcut(ptr); 1415 ptr = shortcut->back_pointer; 1416 if (!ptr) 1417 break; 1418 } 1419 BUG_ON(!assoc_array_ptr_is_node(ptr)); 1420 node = assoc_array_ptr_to_node(ptr); 1421 } 1422 1423 edit->array->nr_leaves_on_tree += edit->adjust_count_by; 1424 } 1425 1426 call_rcu(&edit->rcu, assoc_array_rcu_cleanup); 1427 } 1428 1429 /** 1430 * assoc_array_cancel_edit - Discard an edit script. 1431 * @edit: The script to discard. 1432 * 1433 * Free an edit script and all the preallocated data it holds without making 1434 * any changes to the associative array it was intended for. 1435 * 1436 * NOTE! In the case of an insertion script, this does _not_ release the leaf 1437 * that was to be inserted. That is left to the caller. 1438 */ 1439 void assoc_array_cancel_edit(struct assoc_array_edit *edit) 1440 { 1441 struct assoc_array_ptr *ptr; 1442 int i; 1443 1444 pr_devel("-->%s()\n", __func__); 1445 1446 /* Clean up after an out of memory error */ 1447 for (i = 0; i < ARRAY_SIZE(edit->new_meta); i++) { 1448 ptr = edit->new_meta[i]; 1449 if (ptr) { 1450 if (assoc_array_ptr_is_node(ptr)) 1451 kfree(assoc_array_ptr_to_node(ptr)); 1452 else 1453 kfree(assoc_array_ptr_to_shortcut(ptr)); 1454 } 1455 } 1456 kfree(edit); 1457 } 1458 1459 /** 1460 * assoc_array_gc - Garbage collect an associative array. 1461 * @array: The array to clean. 1462 * @ops: The operations to use. 1463 * @iterator: A callback function to pass judgement on each object. 1464 * @iterator_data: Private data for the callback function. 1465 * 1466 * Collect garbage from an associative array and pack down the internal tree to 1467 * save memory. 1468 * 1469 * The iterator function is asked to pass judgement upon each object in the 1470 * array. If it returns false, the object is discard and if it returns true, 1471 * the object is kept. If it returns true, it must increment the object's 1472 * usage count (or whatever it needs to do to retain it) before returning. 1473 * 1474 * This function returns 0 if successful or -ENOMEM if out of memory. In the 1475 * latter case, the array is not changed. 1476 * 1477 * The caller should lock against other modifications and must continue to hold 1478 * the lock until assoc_array_apply_edit() has been called. 1479 * 1480 * Accesses to the tree may take place concurrently with this function, 1481 * provided they hold the RCU read lock. 1482 */ 1483 int assoc_array_gc(struct assoc_array *array, 1484 const struct assoc_array_ops *ops, 1485 bool (*iterator)(void *object, void *iterator_data), 1486 void *iterator_data) 1487 { 1488 struct assoc_array_shortcut *shortcut, *new_s; 1489 struct assoc_array_node *node, *new_n; 1490 struct assoc_array_edit *edit; 1491 struct assoc_array_ptr *cursor, *ptr; 1492 struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp; 1493 unsigned long nr_leaves_on_tree; 1494 int keylen, slot, nr_free, next_slot, i; 1495 1496 pr_devel("-->%s()\n", __func__); 1497 1498 if (!array->root) 1499 return 0; 1500 1501 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); 1502 if (!edit) 1503 return -ENOMEM; 1504 edit->array = array; 1505 edit->ops = ops; 1506 edit->ops_for_excised_subtree = ops; 1507 edit->set[0].ptr = &array->root; 1508 edit->excised_subtree = array->root; 1509 1510 new_root = new_parent = NULL; 1511 new_ptr_pp = &new_root; 1512 cursor = array->root; 1513 1514 descend: 1515 /* If this point is a shortcut, then we need to duplicate it and 1516 * advance the target cursor. 1517 */ 1518 if (assoc_array_ptr_is_shortcut(cursor)) { 1519 shortcut = assoc_array_ptr_to_shortcut(cursor); 1520 keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); 1521 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 1522 new_s = kmalloc(sizeof(struct assoc_array_shortcut) + 1523 keylen * sizeof(unsigned long), GFP_KERNEL); 1524 if (!new_s) 1525 goto enomem; 1526 pr_devel("dup shortcut %p -> %p\n", shortcut, new_s); 1527 memcpy(new_s, shortcut, (sizeof(struct assoc_array_shortcut) + 1528 keylen * sizeof(unsigned long))); 1529 new_s->back_pointer = new_parent; 1530 new_s->parent_slot = shortcut->parent_slot; 1531 *new_ptr_pp = new_parent = assoc_array_shortcut_to_ptr(new_s); 1532 new_ptr_pp = &new_s->next_node; 1533 cursor = shortcut->next_node; 1534 } 1535 1536 /* Duplicate the node at this position */ 1537 node = assoc_array_ptr_to_node(cursor); 1538 new_n = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 1539 if (!new_n) 1540 goto enomem; 1541 pr_devel("dup node %p -> %p\n", node, new_n); 1542 new_n->back_pointer = new_parent; 1543 new_n->parent_slot = node->parent_slot; 1544 *new_ptr_pp = new_parent = assoc_array_node_to_ptr(new_n); 1545 new_ptr_pp = NULL; 1546 slot = 0; 1547 1548 continue_node: 1549 /* Filter across any leaves and gc any subtrees */ 1550 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1551 ptr = node->slots[slot]; 1552 if (!ptr) 1553 continue; 1554 1555 if (assoc_array_ptr_is_leaf(ptr)) { 1556 if (iterator(assoc_array_ptr_to_leaf(ptr), 1557 iterator_data)) 1558 /* The iterator will have done any reference 1559 * counting on the object for us. 1560 */ 1561 new_n->slots[slot] = ptr; 1562 continue; 1563 } 1564 1565 new_ptr_pp = &new_n->slots[slot]; 1566 cursor = ptr; 1567 goto descend; 1568 } 1569 1570 pr_devel("-- compress node %p --\n", new_n); 1571 1572 /* Count up the number of empty slots in this node and work out the 1573 * subtree leaf count. 1574 */ 1575 new_n->nr_leaves_on_branch = 0; 1576 nr_free = 0; 1577 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1578 ptr = new_n->slots[slot]; 1579 if (!ptr) 1580 nr_free++; 1581 else if (assoc_array_ptr_is_leaf(ptr)) 1582 new_n->nr_leaves_on_branch++; 1583 } 1584 pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch); 1585 1586 /* See what we can fold in */ 1587 next_slot = 0; 1588 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1589 struct assoc_array_shortcut *s; 1590 struct assoc_array_node *child; 1591 1592 ptr = new_n->slots[slot]; 1593 if (!ptr || assoc_array_ptr_is_leaf(ptr)) 1594 continue; 1595 1596 s = NULL; 1597 if (assoc_array_ptr_is_shortcut(ptr)) { 1598 s = assoc_array_ptr_to_shortcut(ptr); 1599 ptr = s->next_node; 1600 } 1601 1602 child = assoc_array_ptr_to_node(ptr); 1603 new_n->nr_leaves_on_branch += child->nr_leaves_on_branch; 1604 1605 if (child->nr_leaves_on_branch <= nr_free + 1) { 1606 /* Fold the child node into this one */ 1607 pr_devel("[%d] fold node %lu/%d [nx %d]\n", 1608 slot, child->nr_leaves_on_branch, nr_free + 1, 1609 next_slot); 1610 1611 /* We would already have reaped an intervening shortcut 1612 * on the way back up the tree. 1613 */ 1614 BUG_ON(s); 1615 1616 new_n->slots[slot] = NULL; 1617 nr_free++; 1618 if (slot < next_slot) 1619 next_slot = slot; 1620 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 1621 struct assoc_array_ptr *p = child->slots[i]; 1622 if (!p) 1623 continue; 1624 BUG_ON(assoc_array_ptr_is_meta(p)); 1625 while (new_n->slots[next_slot]) 1626 next_slot++; 1627 BUG_ON(next_slot >= ASSOC_ARRAY_FAN_OUT); 1628 new_n->slots[next_slot++] = p; 1629 nr_free--; 1630 } 1631 kfree(child); 1632 } else { 1633 pr_devel("[%d] retain node %lu/%d [nx %d]\n", 1634 slot, child->nr_leaves_on_branch, nr_free + 1, 1635 next_slot); 1636 } 1637 } 1638 1639 pr_devel("after: %lu\n", new_n->nr_leaves_on_branch); 1640 1641 nr_leaves_on_tree = new_n->nr_leaves_on_branch; 1642 1643 /* Excise this node if it is singly occupied by a shortcut */ 1644 if (nr_free == ASSOC_ARRAY_FAN_OUT - 1) { 1645 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) 1646 if ((ptr = new_n->slots[slot])) 1647 break; 1648 1649 if (assoc_array_ptr_is_meta(ptr) && 1650 assoc_array_ptr_is_shortcut(ptr)) { 1651 pr_devel("excise node %p with 1 shortcut\n", new_n); 1652 new_s = assoc_array_ptr_to_shortcut(ptr); 1653 new_parent = new_n->back_pointer; 1654 slot = new_n->parent_slot; 1655 kfree(new_n); 1656 if (!new_parent) { 1657 new_s->back_pointer = NULL; 1658 new_s->parent_slot = 0; 1659 new_root = ptr; 1660 goto gc_complete; 1661 } 1662 1663 if (assoc_array_ptr_is_shortcut(new_parent)) { 1664 /* We can discard any preceding shortcut also */ 1665 struct assoc_array_shortcut *s = 1666 assoc_array_ptr_to_shortcut(new_parent); 1667 1668 pr_devel("excise preceding shortcut\n"); 1669 1670 new_parent = new_s->back_pointer = s->back_pointer; 1671 slot = new_s->parent_slot = s->parent_slot; 1672 kfree(s); 1673 if (!new_parent) { 1674 new_s->back_pointer = NULL; 1675 new_s->parent_slot = 0; 1676 new_root = ptr; 1677 goto gc_complete; 1678 } 1679 } 1680 1681 new_s->back_pointer = new_parent; 1682 new_s->parent_slot = slot; 1683 new_n = assoc_array_ptr_to_node(new_parent); 1684 new_n->slots[slot] = ptr; 1685 goto ascend_old_tree; 1686 } 1687 } 1688 1689 /* Excise any shortcuts we might encounter that point to nodes that 1690 * only contain leaves. 1691 */ 1692 ptr = new_n->back_pointer; 1693 if (!ptr) 1694 goto gc_complete; 1695 1696 if (assoc_array_ptr_is_shortcut(ptr)) { 1697 new_s = assoc_array_ptr_to_shortcut(ptr); 1698 new_parent = new_s->back_pointer; 1699 slot = new_s->parent_slot; 1700 1701 if (new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) { 1702 struct assoc_array_node *n; 1703 1704 pr_devel("excise shortcut\n"); 1705 new_n->back_pointer = new_parent; 1706 new_n->parent_slot = slot; 1707 kfree(new_s); 1708 if (!new_parent) { 1709 new_root = assoc_array_node_to_ptr(new_n); 1710 goto gc_complete; 1711 } 1712 1713 n = assoc_array_ptr_to_node(new_parent); 1714 n->slots[slot] = assoc_array_node_to_ptr(new_n); 1715 } 1716 } else { 1717 new_parent = ptr; 1718 } 1719 new_n = assoc_array_ptr_to_node(new_parent); 1720 1721 ascend_old_tree: 1722 ptr = node->back_pointer; 1723 if (assoc_array_ptr_is_shortcut(ptr)) { 1724 shortcut = assoc_array_ptr_to_shortcut(ptr); 1725 slot = shortcut->parent_slot; 1726 cursor = shortcut->back_pointer; 1727 if (!cursor) 1728 goto gc_complete; 1729 } else { 1730 slot = node->parent_slot; 1731 cursor = ptr; 1732 } 1733 BUG_ON(!cursor); 1734 node = assoc_array_ptr_to_node(cursor); 1735 slot++; 1736 goto continue_node; 1737 1738 gc_complete: 1739 edit->set[0].to = new_root; 1740 assoc_array_apply_edit(edit); 1741 array->nr_leaves_on_tree = nr_leaves_on_tree; 1742 return 0; 1743 1744 enomem: 1745 pr_devel("enomem\n"); 1746 assoc_array_destroy_subtree(new_root, edit->ops); 1747 kfree(edit); 1748 return -ENOMEM; 1749 } 1750