1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* Generic associative array implementation. 3 * 4 * See Documentation/core-api/assoc_array.rst for information. 5 * 6 * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. 7 * Written by David Howells (dhowells@redhat.com) 8 */ 9 //#define DEBUG 10 #include <linux/rcupdate.h> 11 #include <linux/slab.h> 12 #include <linux/err.h> 13 #include <linux/assoc_array_priv.h> 14 15 /* 16 * Iterate over an associative array. The caller must hold the RCU read lock 17 * or better. 18 */ 19 static int assoc_array_subtree_iterate(const struct assoc_array_ptr *root, 20 const struct assoc_array_ptr *stop, 21 int (*iterator)(const void *leaf, 22 void *iterator_data), 23 void *iterator_data) 24 { 25 const struct assoc_array_shortcut *shortcut; 26 const struct assoc_array_node *node; 27 const struct assoc_array_ptr *cursor, *ptr, *parent; 28 unsigned long has_meta; 29 int slot, ret; 30 31 cursor = root; 32 33 begin_node: 34 if (assoc_array_ptr_is_shortcut(cursor)) { 35 /* Descend through a shortcut */ 36 shortcut = assoc_array_ptr_to_shortcut(cursor); 37 cursor = READ_ONCE(shortcut->next_node); /* Address dependency. */ 38 } 39 40 node = assoc_array_ptr_to_node(cursor); 41 slot = 0; 42 43 /* We perform two passes of each node. 44 * 45 * The first pass does all the leaves in this node. This means we 46 * don't miss any leaves if the node is split up by insertion whilst 47 * we're iterating over the branches rooted here (we may, however, see 48 * some leaves twice). 49 */ 50 has_meta = 0; 51 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 52 ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */ 53 has_meta |= (unsigned long)ptr; 54 if (ptr && assoc_array_ptr_is_leaf(ptr)) { 55 /* We need a barrier between the read of the pointer, 56 * which is supplied by the above READ_ONCE(). 57 */ 58 /* Invoke the callback */ 59 ret = iterator(assoc_array_ptr_to_leaf(ptr), 60 iterator_data); 61 if (ret) 62 return ret; 63 } 64 } 65 66 /* The second pass attends to all the metadata pointers. If we follow 67 * one of these we may find that we don't come back here, but rather go 68 * back to a replacement node with the leaves in a different layout. 69 * 70 * We are guaranteed to make progress, however, as the slot number for 71 * a particular portion of the key space cannot change - and we 72 * continue at the back pointer + 1. 73 */ 74 if (!(has_meta & ASSOC_ARRAY_PTR_META_TYPE)) 75 goto finished_node; 76 slot = 0; 77 78 continue_node: 79 node = assoc_array_ptr_to_node(cursor); 80 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 81 ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */ 82 if (assoc_array_ptr_is_meta(ptr)) { 83 cursor = ptr; 84 goto begin_node; 85 } 86 } 87 88 finished_node: 89 /* Move up to the parent (may need to skip back over a shortcut) */ 90 parent = READ_ONCE(node->back_pointer); /* Address dependency. */ 91 slot = node->parent_slot; 92 if (parent == stop) 93 return 0; 94 95 if (assoc_array_ptr_is_shortcut(parent)) { 96 shortcut = assoc_array_ptr_to_shortcut(parent); 97 cursor = parent; 98 parent = READ_ONCE(shortcut->back_pointer); /* Address dependency. */ 99 slot = shortcut->parent_slot; 100 if (parent == stop) 101 return 0; 102 } 103 104 /* Ascend to next slot in parent node */ 105 cursor = parent; 106 slot++; 107 goto continue_node; 108 } 109 110 /** 111 * assoc_array_iterate - Pass all objects in the array to a callback 112 * @array: The array to iterate over. 113 * @iterator: The callback function. 114 * @iterator_data: Private data for the callback function. 115 * 116 * Iterate over all the objects in an associative array. Each one will be 117 * presented to the iterator function. 118 * 119 * If the array is being modified concurrently with the iteration then it is 120 * possible that some objects in the array will be passed to the iterator 121 * callback more than once - though every object should be passed at least 122 * once. If this is undesirable then the caller must lock against modification 123 * for the duration of this function. 124 * 125 * The function will return 0 if no objects were in the array or else it will 126 * return the result of the last iterator function called. Iteration stops 127 * immediately if any call to the iteration function results in a non-zero 128 * return. 129 * 130 * The caller should hold the RCU read lock or better if concurrent 131 * modification is possible. 132 */ 133 int assoc_array_iterate(const struct assoc_array *array, 134 int (*iterator)(const void *object, 135 void *iterator_data), 136 void *iterator_data) 137 { 138 struct assoc_array_ptr *root = READ_ONCE(array->root); /* Address dependency. */ 139 140 if (!root) 141 return 0; 142 return assoc_array_subtree_iterate(root, NULL, iterator, iterator_data); 143 } 144 145 enum assoc_array_walk_status { 146 assoc_array_walk_tree_empty, 147 assoc_array_walk_found_terminal_node, 148 assoc_array_walk_found_wrong_shortcut, 149 }; 150 151 struct assoc_array_walk_result { 152 struct { 153 struct assoc_array_node *node; /* Node in which leaf might be found */ 154 int level; 155 int slot; 156 } terminal_node; 157 struct { 158 struct assoc_array_shortcut *shortcut; 159 int level; 160 int sc_level; 161 unsigned long sc_segments; 162 unsigned long dissimilarity; 163 } wrong_shortcut; 164 }; 165 166 /* 167 * Navigate through the internal tree looking for the closest node to the key. 168 */ 169 static enum assoc_array_walk_status 170 assoc_array_walk(const struct assoc_array *array, 171 const struct assoc_array_ops *ops, 172 const void *index_key, 173 struct assoc_array_walk_result *result) 174 { 175 struct assoc_array_shortcut *shortcut; 176 struct assoc_array_node *node; 177 struct assoc_array_ptr *cursor, *ptr; 178 unsigned long sc_segments, dissimilarity; 179 unsigned long segments; 180 int level, sc_level, next_sc_level; 181 int slot; 182 183 pr_devel("-->%s()\n", __func__); 184 185 cursor = READ_ONCE(array->root); /* Address dependency. */ 186 if (!cursor) 187 return assoc_array_walk_tree_empty; 188 189 level = 0; 190 191 /* Use segments from the key for the new leaf to navigate through the 192 * internal tree, skipping through nodes and shortcuts that are on 193 * route to the destination. Eventually we'll come to a slot that is 194 * either empty or contains a leaf at which point we've found a node in 195 * which the leaf we're looking for might be found or into which it 196 * should be inserted. 197 */ 198 jumped: 199 segments = ops->get_key_chunk(index_key, level); 200 pr_devel("segments[%d]: %lx\n", level, segments); 201 202 if (assoc_array_ptr_is_shortcut(cursor)) 203 goto follow_shortcut; 204 205 consider_node: 206 node = assoc_array_ptr_to_node(cursor); 207 slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK); 208 slot &= ASSOC_ARRAY_FAN_MASK; 209 ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */ 210 211 pr_devel("consider slot %x [ix=%d type=%lu]\n", 212 slot, level, (unsigned long)ptr & 3); 213 214 if (!assoc_array_ptr_is_meta(ptr)) { 215 /* The node doesn't have a node/shortcut pointer in the slot 216 * corresponding to the index key that we have to follow. 217 */ 218 result->terminal_node.node = node; 219 result->terminal_node.level = level; 220 result->terminal_node.slot = slot; 221 pr_devel("<--%s() = terminal_node\n", __func__); 222 return assoc_array_walk_found_terminal_node; 223 } 224 225 if (assoc_array_ptr_is_node(ptr)) { 226 /* There is a pointer to a node in the slot corresponding to 227 * this index key segment, so we need to follow it. 228 */ 229 cursor = ptr; 230 level += ASSOC_ARRAY_LEVEL_STEP; 231 if ((level & ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) 232 goto consider_node; 233 goto jumped; 234 } 235 236 /* There is a shortcut in the slot corresponding to the index key 237 * segment. We follow the shortcut if its partial index key matches 238 * this leaf's. Otherwise we need to split the shortcut. 239 */ 240 cursor = ptr; 241 follow_shortcut: 242 shortcut = assoc_array_ptr_to_shortcut(cursor); 243 pr_devel("shortcut to %d\n", shortcut->skip_to_level); 244 sc_level = level + ASSOC_ARRAY_LEVEL_STEP; 245 BUG_ON(sc_level > shortcut->skip_to_level); 246 247 do { 248 /* Check the leaf against the shortcut's index key a word at a 249 * time, trimming the final word (the shortcut stores the index 250 * key completely from the root to the shortcut's target). 251 */ 252 if ((sc_level & ASSOC_ARRAY_KEY_CHUNK_MASK) == 0) 253 segments = ops->get_key_chunk(index_key, sc_level); 254 255 sc_segments = shortcut->index_key[sc_level >> ASSOC_ARRAY_KEY_CHUNK_SHIFT]; 256 dissimilarity = segments ^ sc_segments; 257 258 if (round_up(sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE) > shortcut->skip_to_level) { 259 /* Trim segments that are beyond the shortcut */ 260 int shift = shortcut->skip_to_level & ASSOC_ARRAY_KEY_CHUNK_MASK; 261 dissimilarity &= ~(ULONG_MAX << shift); 262 next_sc_level = shortcut->skip_to_level; 263 } else { 264 next_sc_level = sc_level + ASSOC_ARRAY_KEY_CHUNK_SIZE; 265 next_sc_level = round_down(next_sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); 266 } 267 268 if (dissimilarity != 0) { 269 /* This shortcut points elsewhere */ 270 result->wrong_shortcut.shortcut = shortcut; 271 result->wrong_shortcut.level = level; 272 result->wrong_shortcut.sc_level = sc_level; 273 result->wrong_shortcut.sc_segments = sc_segments; 274 result->wrong_shortcut.dissimilarity = dissimilarity; 275 return assoc_array_walk_found_wrong_shortcut; 276 } 277 278 sc_level = next_sc_level; 279 } while (sc_level < shortcut->skip_to_level); 280 281 /* The shortcut matches the leaf's index to this point. */ 282 cursor = READ_ONCE(shortcut->next_node); /* Address dependency. */ 283 if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) { 284 level = sc_level; 285 goto jumped; 286 } else { 287 level = sc_level; 288 goto consider_node; 289 } 290 } 291 292 /** 293 * assoc_array_find - Find an object by index key 294 * @array: The associative array to search. 295 * @ops: The operations to use. 296 * @index_key: The key to the object. 297 * 298 * Find an object in an associative array by walking through the internal tree 299 * to the node that should contain the object and then searching the leaves 300 * there. NULL is returned if the requested object was not found in the array. 301 * 302 * The caller must hold the RCU read lock or better. 303 */ 304 void *assoc_array_find(const struct assoc_array *array, 305 const struct assoc_array_ops *ops, 306 const void *index_key) 307 { 308 struct assoc_array_walk_result result; 309 const struct assoc_array_node *node; 310 const struct assoc_array_ptr *ptr; 311 const void *leaf; 312 int slot; 313 314 if (assoc_array_walk(array, ops, index_key, &result) != 315 assoc_array_walk_found_terminal_node) 316 return NULL; 317 318 node = result.terminal_node.node; 319 320 /* If the target key is available to us, it's has to be pointed to by 321 * the terminal node. 322 */ 323 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 324 ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */ 325 if (ptr && assoc_array_ptr_is_leaf(ptr)) { 326 /* We need a barrier between the read of the pointer 327 * and dereferencing the pointer - but only if we are 328 * actually going to dereference it. 329 */ 330 leaf = assoc_array_ptr_to_leaf(ptr); 331 if (ops->compare_object(leaf, index_key)) 332 return (void *)leaf; 333 } 334 } 335 336 return NULL; 337 } 338 339 /* 340 * Destructively iterate over an associative array. The caller must prevent 341 * other simultaneous accesses. 342 */ 343 static void assoc_array_destroy_subtree(struct assoc_array_ptr *root, 344 const struct assoc_array_ops *ops) 345 { 346 struct assoc_array_shortcut *shortcut; 347 struct assoc_array_node *node; 348 struct assoc_array_ptr *cursor, *parent = NULL; 349 int slot = -1; 350 351 pr_devel("-->%s()\n", __func__); 352 353 cursor = root; 354 if (!cursor) { 355 pr_devel("empty\n"); 356 return; 357 } 358 359 move_to_meta: 360 if (assoc_array_ptr_is_shortcut(cursor)) { 361 /* Descend through a shortcut */ 362 pr_devel("[%d] shortcut\n", slot); 363 BUG_ON(!assoc_array_ptr_is_shortcut(cursor)); 364 shortcut = assoc_array_ptr_to_shortcut(cursor); 365 BUG_ON(shortcut->back_pointer != parent); 366 BUG_ON(slot != -1 && shortcut->parent_slot != slot); 367 parent = cursor; 368 cursor = shortcut->next_node; 369 slot = -1; 370 BUG_ON(!assoc_array_ptr_is_node(cursor)); 371 } 372 373 pr_devel("[%d] node\n", slot); 374 node = assoc_array_ptr_to_node(cursor); 375 BUG_ON(node->back_pointer != parent); 376 BUG_ON(slot != -1 && node->parent_slot != slot); 377 slot = 0; 378 379 continue_node: 380 pr_devel("Node %p [back=%p]\n", node, node->back_pointer); 381 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 382 struct assoc_array_ptr *ptr = node->slots[slot]; 383 if (!ptr) 384 continue; 385 if (assoc_array_ptr_is_meta(ptr)) { 386 parent = cursor; 387 cursor = ptr; 388 goto move_to_meta; 389 } 390 391 if (ops) { 392 pr_devel("[%d] free leaf\n", slot); 393 ops->free_object(assoc_array_ptr_to_leaf(ptr)); 394 } 395 } 396 397 parent = node->back_pointer; 398 slot = node->parent_slot; 399 pr_devel("free node\n"); 400 kfree(node); 401 if (!parent) 402 return; /* Done */ 403 404 /* Move back up to the parent (may need to free a shortcut on 405 * the way up) */ 406 if (assoc_array_ptr_is_shortcut(parent)) { 407 shortcut = assoc_array_ptr_to_shortcut(parent); 408 BUG_ON(shortcut->next_node != cursor); 409 cursor = parent; 410 parent = shortcut->back_pointer; 411 slot = shortcut->parent_slot; 412 pr_devel("free shortcut\n"); 413 kfree(shortcut); 414 if (!parent) 415 return; 416 417 BUG_ON(!assoc_array_ptr_is_node(parent)); 418 } 419 420 /* Ascend to next slot in parent node */ 421 pr_devel("ascend to %p[%d]\n", parent, slot); 422 cursor = parent; 423 node = assoc_array_ptr_to_node(cursor); 424 slot++; 425 goto continue_node; 426 } 427 428 /** 429 * assoc_array_destroy - Destroy an associative array 430 * @array: The array to destroy. 431 * @ops: The operations to use. 432 * 433 * Discard all metadata and free all objects in an associative array. The 434 * array will be empty and ready to use again upon completion. This function 435 * cannot fail. 436 * 437 * The caller must prevent all other accesses whilst this takes place as no 438 * attempt is made to adjust pointers gracefully to permit RCU readlock-holding 439 * accesses to continue. On the other hand, no memory allocation is required. 440 */ 441 void assoc_array_destroy(struct assoc_array *array, 442 const struct assoc_array_ops *ops) 443 { 444 assoc_array_destroy_subtree(array->root, ops); 445 array->root = NULL; 446 } 447 448 /* 449 * Handle insertion into an empty tree. 450 */ 451 static bool assoc_array_insert_in_empty_tree(struct assoc_array_edit *edit) 452 { 453 struct assoc_array_node *new_n0; 454 455 pr_devel("-->%s()\n", __func__); 456 457 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 458 if (!new_n0) 459 return false; 460 461 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 462 edit->leaf_p = &new_n0->slots[0]; 463 edit->adjust_count_on = new_n0; 464 edit->set[0].ptr = &edit->array->root; 465 edit->set[0].to = assoc_array_node_to_ptr(new_n0); 466 467 pr_devel("<--%s() = ok [no root]\n", __func__); 468 return true; 469 } 470 471 /* 472 * Handle insertion into a terminal node. 473 */ 474 static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit, 475 const struct assoc_array_ops *ops, 476 const void *index_key, 477 struct assoc_array_walk_result *result) 478 { 479 struct assoc_array_shortcut *shortcut, *new_s0; 480 struct assoc_array_node *node, *new_n0, *new_n1, *side; 481 struct assoc_array_ptr *ptr; 482 unsigned long dissimilarity, base_seg, blank; 483 size_t keylen; 484 bool have_meta; 485 int level, diff; 486 int slot, next_slot, free_slot, i, j; 487 488 node = result->terminal_node.node; 489 level = result->terminal_node.level; 490 edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = result->terminal_node.slot; 491 492 pr_devel("-->%s()\n", __func__); 493 494 /* We arrived at a node which doesn't have an onward node or shortcut 495 * pointer that we have to follow. This means that (a) the leaf we 496 * want must go here (either by insertion or replacement) or (b) we 497 * need to split this node and insert in one of the fragments. 498 */ 499 free_slot = -1; 500 501 /* Firstly, we have to check the leaves in this node to see if there's 502 * a matching one we should replace in place. 503 */ 504 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 505 ptr = node->slots[i]; 506 if (!ptr) { 507 free_slot = i; 508 continue; 509 } 510 if (assoc_array_ptr_is_leaf(ptr) && 511 ops->compare_object(assoc_array_ptr_to_leaf(ptr), 512 index_key)) { 513 pr_devel("replace in slot %d\n", i); 514 edit->leaf_p = &node->slots[i]; 515 edit->dead_leaf = node->slots[i]; 516 pr_devel("<--%s() = ok [replace]\n", __func__); 517 return true; 518 } 519 } 520 521 /* If there is a free slot in this node then we can just insert the 522 * leaf here. 523 */ 524 if (free_slot >= 0) { 525 pr_devel("insert in free slot %d\n", free_slot); 526 edit->leaf_p = &node->slots[free_slot]; 527 edit->adjust_count_on = node; 528 pr_devel("<--%s() = ok [insert]\n", __func__); 529 return true; 530 } 531 532 /* The node has no spare slots - so we're either going to have to split 533 * it or insert another node before it. 534 * 535 * Whatever, we're going to need at least two new nodes - so allocate 536 * those now. We may also need a new shortcut, but we deal with that 537 * when we need it. 538 */ 539 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 540 if (!new_n0) 541 return false; 542 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 543 new_n1 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 544 if (!new_n1) 545 return false; 546 edit->new_meta[1] = assoc_array_node_to_ptr(new_n1); 547 548 /* We need to find out how similar the leaves are. */ 549 pr_devel("no spare slots\n"); 550 have_meta = false; 551 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 552 ptr = node->slots[i]; 553 if (assoc_array_ptr_is_meta(ptr)) { 554 edit->segment_cache[i] = 0xff; 555 have_meta = true; 556 continue; 557 } 558 base_seg = ops->get_object_key_chunk( 559 assoc_array_ptr_to_leaf(ptr), level); 560 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; 561 edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK; 562 } 563 564 if (have_meta) { 565 pr_devel("have meta\n"); 566 goto split_node; 567 } 568 569 /* The node contains only leaves */ 570 dissimilarity = 0; 571 base_seg = edit->segment_cache[0]; 572 for (i = 1; i < ASSOC_ARRAY_FAN_OUT; i++) 573 dissimilarity |= edit->segment_cache[i] ^ base_seg; 574 575 pr_devel("only leaves; dissimilarity=%lx\n", dissimilarity); 576 577 if ((dissimilarity & ASSOC_ARRAY_FAN_MASK) == 0) { 578 /* The old leaves all cluster in the same slot. We will need 579 * to insert a shortcut if the new node wants to cluster with them. 580 */ 581 if ((edit->segment_cache[ASSOC_ARRAY_FAN_OUT] ^ base_seg) == 0) 582 goto all_leaves_cluster_together; 583 584 /* Otherwise all the old leaves cluster in the same slot, but 585 * the new leaf wants to go into a different slot - so we 586 * create a new node (n0) to hold the new leaf and a pointer to 587 * a new node (n1) holding all the old leaves. 588 * 589 * This can be done by falling through to the node splitting 590 * path. 591 */ 592 pr_devel("present leaves cluster but not new leaf\n"); 593 } 594 595 split_node: 596 pr_devel("split node\n"); 597 598 /* We need to split the current node. The node must contain anything 599 * from a single leaf (in the one leaf case, this leaf will cluster 600 * with the new leaf) and the rest meta-pointers, to all leaves, some 601 * of which may cluster. 602 * 603 * It won't contain the case in which all the current leaves plus the 604 * new leaves want to cluster in the same slot. 605 * 606 * We need to expel at least two leaves out of a set consisting of the 607 * leaves in the node and the new leaf. The current meta pointers can 608 * just be copied as they shouldn't cluster with any of the leaves. 609 * 610 * We need a new node (n0) to replace the current one and a new node to 611 * take the expelled nodes (n1). 612 */ 613 edit->set[0].to = assoc_array_node_to_ptr(new_n0); 614 new_n0->back_pointer = node->back_pointer; 615 new_n0->parent_slot = node->parent_slot; 616 new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); 617 new_n1->parent_slot = -1; /* Need to calculate this */ 618 619 do_split_node: 620 pr_devel("do_split_node\n"); 621 622 new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; 623 new_n1->nr_leaves_on_branch = 0; 624 625 /* Begin by finding two matching leaves. There have to be at least two 626 * that match - even if there are meta pointers - because any leaf that 627 * would match a slot with a meta pointer in it must be somewhere 628 * behind that meta pointer and cannot be here. Further, given N 629 * remaining leaf slots, we now have N+1 leaves to go in them. 630 */ 631 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 632 slot = edit->segment_cache[i]; 633 if (slot != 0xff) 634 for (j = i + 1; j < ASSOC_ARRAY_FAN_OUT + 1; j++) 635 if (edit->segment_cache[j] == slot) 636 goto found_slot_for_multiple_occupancy; 637 } 638 found_slot_for_multiple_occupancy: 639 pr_devel("same slot: %x %x [%02x]\n", i, j, slot); 640 BUG_ON(i >= ASSOC_ARRAY_FAN_OUT); 641 BUG_ON(j >= ASSOC_ARRAY_FAN_OUT + 1); 642 BUG_ON(slot >= ASSOC_ARRAY_FAN_OUT); 643 644 new_n1->parent_slot = slot; 645 646 /* Metadata pointers cannot change slot */ 647 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) 648 if (assoc_array_ptr_is_meta(node->slots[i])) 649 new_n0->slots[i] = node->slots[i]; 650 else 651 new_n0->slots[i] = NULL; 652 BUG_ON(new_n0->slots[slot] != NULL); 653 new_n0->slots[slot] = assoc_array_node_to_ptr(new_n1); 654 655 /* Filter the leaf pointers between the new nodes */ 656 free_slot = -1; 657 next_slot = 0; 658 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 659 if (assoc_array_ptr_is_meta(node->slots[i])) 660 continue; 661 if (edit->segment_cache[i] == slot) { 662 new_n1->slots[next_slot++] = node->slots[i]; 663 new_n1->nr_leaves_on_branch++; 664 } else { 665 do { 666 free_slot++; 667 } while (new_n0->slots[free_slot] != NULL); 668 new_n0->slots[free_slot] = node->slots[i]; 669 } 670 } 671 672 pr_devel("filtered: f=%x n=%x\n", free_slot, next_slot); 673 674 if (edit->segment_cache[ASSOC_ARRAY_FAN_OUT] != slot) { 675 do { 676 free_slot++; 677 } while (new_n0->slots[free_slot] != NULL); 678 edit->leaf_p = &new_n0->slots[free_slot]; 679 edit->adjust_count_on = new_n0; 680 } else { 681 edit->leaf_p = &new_n1->slots[next_slot++]; 682 edit->adjust_count_on = new_n1; 683 } 684 685 BUG_ON(next_slot <= 1); 686 687 edit->set_backpointers_to = assoc_array_node_to_ptr(new_n0); 688 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 689 if (edit->segment_cache[i] == 0xff) { 690 ptr = node->slots[i]; 691 BUG_ON(assoc_array_ptr_is_leaf(ptr)); 692 if (assoc_array_ptr_is_node(ptr)) { 693 side = assoc_array_ptr_to_node(ptr); 694 edit->set_backpointers[i] = &side->back_pointer; 695 } else { 696 shortcut = assoc_array_ptr_to_shortcut(ptr); 697 edit->set_backpointers[i] = &shortcut->back_pointer; 698 } 699 } 700 } 701 702 ptr = node->back_pointer; 703 if (!ptr) 704 edit->set[0].ptr = &edit->array->root; 705 else if (assoc_array_ptr_is_node(ptr)) 706 edit->set[0].ptr = &assoc_array_ptr_to_node(ptr)->slots[node->parent_slot]; 707 else 708 edit->set[0].ptr = &assoc_array_ptr_to_shortcut(ptr)->next_node; 709 edit->excised_meta[0] = assoc_array_node_to_ptr(node); 710 pr_devel("<--%s() = ok [split node]\n", __func__); 711 return true; 712 713 all_leaves_cluster_together: 714 /* All the leaves, new and old, want to cluster together in this node 715 * in the same slot, so we have to replace this node with a shortcut to 716 * skip over the identical parts of the key and then place a pair of 717 * nodes, one inside the other, at the end of the shortcut and 718 * distribute the keys between them. 719 * 720 * Firstly we need to work out where the leaves start diverging as a 721 * bit position into their keys so that we know how big the shortcut 722 * needs to be. 723 * 724 * We only need to make a single pass of N of the N+1 leaves because if 725 * any keys differ between themselves at bit X then at least one of 726 * them must also differ with the base key at bit X or before. 727 */ 728 pr_devel("all leaves cluster together\n"); 729 diff = INT_MAX; 730 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 731 int x = ops->diff_objects(assoc_array_ptr_to_leaf(node->slots[i]), 732 index_key); 733 if (x < diff) { 734 BUG_ON(x < 0); 735 diff = x; 736 } 737 } 738 BUG_ON(diff == INT_MAX); 739 BUG_ON(diff < level + ASSOC_ARRAY_LEVEL_STEP); 740 741 keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); 742 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 743 744 new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) + 745 keylen * sizeof(unsigned long), GFP_KERNEL); 746 if (!new_s0) 747 return false; 748 edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s0); 749 750 edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0); 751 new_s0->back_pointer = node->back_pointer; 752 new_s0->parent_slot = node->parent_slot; 753 new_s0->next_node = assoc_array_node_to_ptr(new_n0); 754 new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0); 755 new_n0->parent_slot = 0; 756 new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); 757 new_n1->parent_slot = -1; /* Need to calculate this */ 758 759 new_s0->skip_to_level = level = diff & ~ASSOC_ARRAY_LEVEL_STEP_MASK; 760 pr_devel("skip_to_level = %d [diff %d]\n", level, diff); 761 BUG_ON(level <= 0); 762 763 for (i = 0; i < keylen; i++) 764 new_s0->index_key[i] = 765 ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE); 766 767 if (level & ASSOC_ARRAY_KEY_CHUNK_MASK) { 768 blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK); 769 pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank); 770 new_s0->index_key[keylen - 1] &= ~blank; 771 } 772 773 /* This now reduces to a node splitting exercise for which we'll need 774 * to regenerate the disparity table. 775 */ 776 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 777 ptr = node->slots[i]; 778 base_seg = ops->get_object_key_chunk(assoc_array_ptr_to_leaf(ptr), 779 level); 780 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; 781 edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK; 782 } 783 784 base_seg = ops->get_key_chunk(index_key, level); 785 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; 786 edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = base_seg & ASSOC_ARRAY_FAN_MASK; 787 goto do_split_node; 788 } 789 790 /* 791 * Handle insertion into the middle of a shortcut. 792 */ 793 static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit, 794 const struct assoc_array_ops *ops, 795 struct assoc_array_walk_result *result) 796 { 797 struct assoc_array_shortcut *shortcut, *new_s0, *new_s1; 798 struct assoc_array_node *node, *new_n0, *side; 799 unsigned long sc_segments, dissimilarity, blank; 800 size_t keylen; 801 int level, sc_level, diff; 802 int sc_slot; 803 804 shortcut = result->wrong_shortcut.shortcut; 805 level = result->wrong_shortcut.level; 806 sc_level = result->wrong_shortcut.sc_level; 807 sc_segments = result->wrong_shortcut.sc_segments; 808 dissimilarity = result->wrong_shortcut.dissimilarity; 809 810 pr_devel("-->%s(ix=%d dis=%lx scix=%d)\n", 811 __func__, level, dissimilarity, sc_level); 812 813 /* We need to split a shortcut and insert a node between the two 814 * pieces. Zero-length pieces will be dispensed with entirely. 815 * 816 * First of all, we need to find out in which level the first 817 * difference was. 818 */ 819 diff = __ffs(dissimilarity); 820 diff &= ~ASSOC_ARRAY_LEVEL_STEP_MASK; 821 diff += sc_level & ~ASSOC_ARRAY_KEY_CHUNK_MASK; 822 pr_devel("diff=%d\n", diff); 823 824 if (!shortcut->back_pointer) { 825 edit->set[0].ptr = &edit->array->root; 826 } else if (assoc_array_ptr_is_node(shortcut->back_pointer)) { 827 node = assoc_array_ptr_to_node(shortcut->back_pointer); 828 edit->set[0].ptr = &node->slots[shortcut->parent_slot]; 829 } else { 830 BUG(); 831 } 832 833 edit->excised_meta[0] = assoc_array_shortcut_to_ptr(shortcut); 834 835 /* Create a new node now since we're going to need it anyway */ 836 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 837 if (!new_n0) 838 return false; 839 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 840 edit->adjust_count_on = new_n0; 841 842 /* Insert a new shortcut before the new node if this segment isn't of 843 * zero length - otherwise we just connect the new node directly to the 844 * parent. 845 */ 846 level += ASSOC_ARRAY_LEVEL_STEP; 847 if (diff > level) { 848 pr_devel("pre-shortcut %d...%d\n", level, diff); 849 keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); 850 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 851 852 new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) + 853 keylen * sizeof(unsigned long), GFP_KERNEL); 854 if (!new_s0) 855 return false; 856 edit->new_meta[1] = assoc_array_shortcut_to_ptr(new_s0); 857 edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0); 858 new_s0->back_pointer = shortcut->back_pointer; 859 new_s0->parent_slot = shortcut->parent_slot; 860 new_s0->next_node = assoc_array_node_to_ptr(new_n0); 861 new_s0->skip_to_level = diff; 862 863 new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0); 864 new_n0->parent_slot = 0; 865 866 memcpy(new_s0->index_key, shortcut->index_key, 867 keylen * sizeof(unsigned long)); 868 869 blank = ULONG_MAX << (diff & ASSOC_ARRAY_KEY_CHUNK_MASK); 870 pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, diff, blank); 871 new_s0->index_key[keylen - 1] &= ~blank; 872 } else { 873 pr_devel("no pre-shortcut\n"); 874 edit->set[0].to = assoc_array_node_to_ptr(new_n0); 875 new_n0->back_pointer = shortcut->back_pointer; 876 new_n0->parent_slot = shortcut->parent_slot; 877 } 878 879 side = assoc_array_ptr_to_node(shortcut->next_node); 880 new_n0->nr_leaves_on_branch = side->nr_leaves_on_branch; 881 882 /* We need to know which slot in the new node is going to take a 883 * metadata pointer. 884 */ 885 sc_slot = sc_segments >> (diff & ASSOC_ARRAY_KEY_CHUNK_MASK); 886 sc_slot &= ASSOC_ARRAY_FAN_MASK; 887 888 pr_devel("new slot %lx >> %d -> %d\n", 889 sc_segments, diff & ASSOC_ARRAY_KEY_CHUNK_MASK, sc_slot); 890 891 /* Determine whether we need to follow the new node with a replacement 892 * for the current shortcut. We could in theory reuse the current 893 * shortcut if its parent slot number doesn't change - but that's a 894 * 1-in-16 chance so not worth expending the code upon. 895 */ 896 level = diff + ASSOC_ARRAY_LEVEL_STEP; 897 if (level < shortcut->skip_to_level) { 898 pr_devel("post-shortcut %d...%d\n", level, shortcut->skip_to_level); 899 keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); 900 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 901 902 new_s1 = kzalloc(sizeof(struct assoc_array_shortcut) + 903 keylen * sizeof(unsigned long), GFP_KERNEL); 904 if (!new_s1) 905 return false; 906 edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s1); 907 908 new_s1->back_pointer = assoc_array_node_to_ptr(new_n0); 909 new_s1->parent_slot = sc_slot; 910 new_s1->next_node = shortcut->next_node; 911 new_s1->skip_to_level = shortcut->skip_to_level; 912 913 new_n0->slots[sc_slot] = assoc_array_shortcut_to_ptr(new_s1); 914 915 memcpy(new_s1->index_key, shortcut->index_key, 916 keylen * sizeof(unsigned long)); 917 918 edit->set[1].ptr = &side->back_pointer; 919 edit->set[1].to = assoc_array_shortcut_to_ptr(new_s1); 920 } else { 921 pr_devel("no post-shortcut\n"); 922 923 /* We don't have to replace the pointed-to node as long as we 924 * use memory barriers to make sure the parent slot number is 925 * changed before the back pointer (the parent slot number is 926 * irrelevant to the old parent shortcut). 927 */ 928 new_n0->slots[sc_slot] = shortcut->next_node; 929 edit->set_parent_slot[0].p = &side->parent_slot; 930 edit->set_parent_slot[0].to = sc_slot; 931 edit->set[1].ptr = &side->back_pointer; 932 edit->set[1].to = assoc_array_node_to_ptr(new_n0); 933 } 934 935 /* Install the new leaf in a spare slot in the new node. */ 936 if (sc_slot == 0) 937 edit->leaf_p = &new_n0->slots[1]; 938 else 939 edit->leaf_p = &new_n0->slots[0]; 940 941 pr_devel("<--%s() = ok [split shortcut]\n", __func__); 942 return edit; 943 } 944 945 /** 946 * assoc_array_insert - Script insertion of an object into an associative array 947 * @array: The array to insert into. 948 * @ops: The operations to use. 949 * @index_key: The key to insert at. 950 * @object: The object to insert. 951 * 952 * Precalculate and preallocate a script for the insertion or replacement of an 953 * object in an associative array. This results in an edit script that can 954 * either be applied or cancelled. 955 * 956 * The function returns a pointer to an edit script or -ENOMEM. 957 * 958 * The caller should lock against other modifications and must continue to hold 959 * the lock until assoc_array_apply_edit() has been called. 960 * 961 * Accesses to the tree may take place concurrently with this function, 962 * provided they hold the RCU read lock. 963 */ 964 struct assoc_array_edit *assoc_array_insert(struct assoc_array *array, 965 const struct assoc_array_ops *ops, 966 const void *index_key, 967 void *object) 968 { 969 struct assoc_array_walk_result result; 970 struct assoc_array_edit *edit; 971 972 pr_devel("-->%s()\n", __func__); 973 974 /* The leaf pointer we're given must not have the bottom bit set as we 975 * use those for type-marking the pointer. NULL pointers are also not 976 * allowed as they indicate an empty slot but we have to allow them 977 * here as they can be updated later. 978 */ 979 BUG_ON(assoc_array_ptr_is_meta(object)); 980 981 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); 982 if (!edit) 983 return ERR_PTR(-ENOMEM); 984 edit->array = array; 985 edit->ops = ops; 986 edit->leaf = assoc_array_leaf_to_ptr(object); 987 edit->adjust_count_by = 1; 988 989 switch (assoc_array_walk(array, ops, index_key, &result)) { 990 case assoc_array_walk_tree_empty: 991 /* Allocate a root node if there isn't one yet */ 992 if (!assoc_array_insert_in_empty_tree(edit)) 993 goto enomem; 994 return edit; 995 996 case assoc_array_walk_found_terminal_node: 997 /* We found a node that doesn't have a node/shortcut pointer in 998 * the slot corresponding to the index key that we have to 999 * follow. 1000 */ 1001 if (!assoc_array_insert_into_terminal_node(edit, ops, index_key, 1002 &result)) 1003 goto enomem; 1004 return edit; 1005 1006 case assoc_array_walk_found_wrong_shortcut: 1007 /* We found a shortcut that didn't match our key in a slot we 1008 * needed to follow. 1009 */ 1010 if (!assoc_array_insert_mid_shortcut(edit, ops, &result)) 1011 goto enomem; 1012 return edit; 1013 } 1014 1015 enomem: 1016 /* Clean up after an out of memory error */ 1017 pr_devel("enomem\n"); 1018 assoc_array_cancel_edit(edit); 1019 return ERR_PTR(-ENOMEM); 1020 } 1021 1022 /** 1023 * assoc_array_insert_set_object - Set the new object pointer in an edit script 1024 * @edit: The edit script to modify. 1025 * @object: The object pointer to set. 1026 * 1027 * Change the object to be inserted in an edit script. The object pointed to 1028 * by the old object is not freed. This must be done prior to applying the 1029 * script. 1030 */ 1031 void assoc_array_insert_set_object(struct assoc_array_edit *edit, void *object) 1032 { 1033 BUG_ON(!object); 1034 edit->leaf = assoc_array_leaf_to_ptr(object); 1035 } 1036 1037 struct assoc_array_delete_collapse_context { 1038 struct assoc_array_node *node; 1039 const void *skip_leaf; 1040 int slot; 1041 }; 1042 1043 /* 1044 * Subtree collapse to node iterator. 1045 */ 1046 static int assoc_array_delete_collapse_iterator(const void *leaf, 1047 void *iterator_data) 1048 { 1049 struct assoc_array_delete_collapse_context *collapse = iterator_data; 1050 1051 if (leaf == collapse->skip_leaf) 1052 return 0; 1053 1054 BUG_ON(collapse->slot >= ASSOC_ARRAY_FAN_OUT); 1055 1056 collapse->node->slots[collapse->slot++] = assoc_array_leaf_to_ptr(leaf); 1057 return 0; 1058 } 1059 1060 /** 1061 * assoc_array_delete - Script deletion of an object from an associative array 1062 * @array: The array to search. 1063 * @ops: The operations to use. 1064 * @index_key: The key to the object. 1065 * 1066 * Precalculate and preallocate a script for the deletion of an object from an 1067 * associative array. This results in an edit script that can either be 1068 * applied or cancelled. 1069 * 1070 * The function returns a pointer to an edit script if the object was found, 1071 * NULL if the object was not found or -ENOMEM. 1072 * 1073 * The caller should lock against other modifications and must continue to hold 1074 * the lock until assoc_array_apply_edit() has been called. 1075 * 1076 * Accesses to the tree may take place concurrently with this function, 1077 * provided they hold the RCU read lock. 1078 */ 1079 struct assoc_array_edit *assoc_array_delete(struct assoc_array *array, 1080 const struct assoc_array_ops *ops, 1081 const void *index_key) 1082 { 1083 struct assoc_array_delete_collapse_context collapse; 1084 struct assoc_array_walk_result result; 1085 struct assoc_array_node *node, *new_n0; 1086 struct assoc_array_edit *edit; 1087 struct assoc_array_ptr *ptr; 1088 bool has_meta; 1089 int slot, i; 1090 1091 pr_devel("-->%s()\n", __func__); 1092 1093 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); 1094 if (!edit) 1095 return ERR_PTR(-ENOMEM); 1096 edit->array = array; 1097 edit->ops = ops; 1098 edit->adjust_count_by = -1; 1099 1100 switch (assoc_array_walk(array, ops, index_key, &result)) { 1101 case assoc_array_walk_found_terminal_node: 1102 /* We found a node that should contain the leaf we've been 1103 * asked to remove - *if* it's in the tree. 1104 */ 1105 pr_devel("terminal_node\n"); 1106 node = result.terminal_node.node; 1107 1108 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1109 ptr = node->slots[slot]; 1110 if (ptr && 1111 assoc_array_ptr_is_leaf(ptr) && 1112 ops->compare_object(assoc_array_ptr_to_leaf(ptr), 1113 index_key)) 1114 goto found_leaf; 1115 } 1116 fallthrough; 1117 case assoc_array_walk_tree_empty: 1118 case assoc_array_walk_found_wrong_shortcut: 1119 default: 1120 assoc_array_cancel_edit(edit); 1121 pr_devel("not found\n"); 1122 return NULL; 1123 } 1124 1125 found_leaf: 1126 BUG_ON(array->nr_leaves_on_tree <= 0); 1127 1128 /* In the simplest form of deletion we just clear the slot and release 1129 * the leaf after a suitable interval. 1130 */ 1131 edit->dead_leaf = node->slots[slot]; 1132 edit->set[0].ptr = &node->slots[slot]; 1133 edit->set[0].to = NULL; 1134 edit->adjust_count_on = node; 1135 1136 /* If that concludes erasure of the last leaf, then delete the entire 1137 * internal array. 1138 */ 1139 if (array->nr_leaves_on_tree == 1) { 1140 edit->set[1].ptr = &array->root; 1141 edit->set[1].to = NULL; 1142 edit->adjust_count_on = NULL; 1143 edit->excised_subtree = array->root; 1144 pr_devel("all gone\n"); 1145 return edit; 1146 } 1147 1148 /* However, we'd also like to clear up some metadata blocks if we 1149 * possibly can. 1150 * 1151 * We go for a simple algorithm of: if this node has FAN_OUT or fewer 1152 * leaves in it, then attempt to collapse it - and attempt to 1153 * recursively collapse up the tree. 1154 * 1155 * We could also try and collapse in partially filled subtrees to take 1156 * up space in this node. 1157 */ 1158 if (node->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) { 1159 struct assoc_array_node *parent, *grandparent; 1160 struct assoc_array_ptr *ptr; 1161 1162 /* First of all, we need to know if this node has metadata so 1163 * that we don't try collapsing if all the leaves are already 1164 * here. 1165 */ 1166 has_meta = false; 1167 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 1168 ptr = node->slots[i]; 1169 if (assoc_array_ptr_is_meta(ptr)) { 1170 has_meta = true; 1171 break; 1172 } 1173 } 1174 1175 pr_devel("leaves: %ld [m=%d]\n", 1176 node->nr_leaves_on_branch - 1, has_meta); 1177 1178 /* Look further up the tree to see if we can collapse this node 1179 * into a more proximal node too. 1180 */ 1181 parent = node; 1182 collapse_up: 1183 pr_devel("collapse subtree: %ld\n", parent->nr_leaves_on_branch); 1184 1185 ptr = parent->back_pointer; 1186 if (!ptr) 1187 goto do_collapse; 1188 if (assoc_array_ptr_is_shortcut(ptr)) { 1189 struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(ptr); 1190 ptr = s->back_pointer; 1191 if (!ptr) 1192 goto do_collapse; 1193 } 1194 1195 grandparent = assoc_array_ptr_to_node(ptr); 1196 if (grandparent->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) { 1197 parent = grandparent; 1198 goto collapse_up; 1199 } 1200 1201 do_collapse: 1202 /* There's no point collapsing if the original node has no meta 1203 * pointers to discard and if we didn't merge into one of that 1204 * node's ancestry. 1205 */ 1206 if (has_meta || parent != node) { 1207 node = parent; 1208 1209 /* Create a new node to collapse into */ 1210 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 1211 if (!new_n0) 1212 goto enomem; 1213 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 1214 1215 new_n0->back_pointer = node->back_pointer; 1216 new_n0->parent_slot = node->parent_slot; 1217 new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; 1218 edit->adjust_count_on = new_n0; 1219 1220 collapse.node = new_n0; 1221 collapse.skip_leaf = assoc_array_ptr_to_leaf(edit->dead_leaf); 1222 collapse.slot = 0; 1223 assoc_array_subtree_iterate(assoc_array_node_to_ptr(node), 1224 node->back_pointer, 1225 assoc_array_delete_collapse_iterator, 1226 &collapse); 1227 pr_devel("collapsed %d,%lu\n", collapse.slot, new_n0->nr_leaves_on_branch); 1228 BUG_ON(collapse.slot != new_n0->nr_leaves_on_branch - 1); 1229 1230 if (!node->back_pointer) { 1231 edit->set[1].ptr = &array->root; 1232 } else if (assoc_array_ptr_is_leaf(node->back_pointer)) { 1233 BUG(); 1234 } else if (assoc_array_ptr_is_node(node->back_pointer)) { 1235 struct assoc_array_node *p = 1236 assoc_array_ptr_to_node(node->back_pointer); 1237 edit->set[1].ptr = &p->slots[node->parent_slot]; 1238 } else if (assoc_array_ptr_is_shortcut(node->back_pointer)) { 1239 struct assoc_array_shortcut *s = 1240 assoc_array_ptr_to_shortcut(node->back_pointer); 1241 edit->set[1].ptr = &s->next_node; 1242 } 1243 edit->set[1].to = assoc_array_node_to_ptr(new_n0); 1244 edit->excised_subtree = assoc_array_node_to_ptr(node); 1245 } 1246 } 1247 1248 return edit; 1249 1250 enomem: 1251 /* Clean up after an out of memory error */ 1252 pr_devel("enomem\n"); 1253 assoc_array_cancel_edit(edit); 1254 return ERR_PTR(-ENOMEM); 1255 } 1256 1257 /** 1258 * assoc_array_clear - Script deletion of all objects from an associative array 1259 * @array: The array to clear. 1260 * @ops: The operations to use. 1261 * 1262 * Precalculate and preallocate a script for the deletion of all the objects 1263 * from an associative array. This results in an edit script that can either 1264 * be applied or cancelled. 1265 * 1266 * The function returns a pointer to an edit script if there are objects to be 1267 * deleted, NULL if there are no objects in the array or -ENOMEM. 1268 * 1269 * The caller should lock against other modifications and must continue to hold 1270 * the lock until assoc_array_apply_edit() has been called. 1271 * 1272 * Accesses to the tree may take place concurrently with this function, 1273 * provided they hold the RCU read lock. 1274 */ 1275 struct assoc_array_edit *assoc_array_clear(struct assoc_array *array, 1276 const struct assoc_array_ops *ops) 1277 { 1278 struct assoc_array_edit *edit; 1279 1280 pr_devel("-->%s()\n", __func__); 1281 1282 if (!array->root) 1283 return NULL; 1284 1285 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); 1286 if (!edit) 1287 return ERR_PTR(-ENOMEM); 1288 edit->array = array; 1289 edit->ops = ops; 1290 edit->set[1].ptr = &array->root; 1291 edit->set[1].to = NULL; 1292 edit->excised_subtree = array->root; 1293 edit->ops_for_excised_subtree = ops; 1294 pr_devel("all gone\n"); 1295 return edit; 1296 } 1297 1298 /* 1299 * Handle the deferred destruction after an applied edit. 1300 */ 1301 static void assoc_array_rcu_cleanup(struct rcu_head *head) 1302 { 1303 struct assoc_array_edit *edit = 1304 container_of(head, struct assoc_array_edit, rcu); 1305 int i; 1306 1307 pr_devel("-->%s()\n", __func__); 1308 1309 if (edit->dead_leaf) 1310 edit->ops->free_object(assoc_array_ptr_to_leaf(edit->dead_leaf)); 1311 for (i = 0; i < ARRAY_SIZE(edit->excised_meta); i++) 1312 if (edit->excised_meta[i]) 1313 kfree(assoc_array_ptr_to_node(edit->excised_meta[i])); 1314 1315 if (edit->excised_subtree) { 1316 BUG_ON(assoc_array_ptr_is_leaf(edit->excised_subtree)); 1317 if (assoc_array_ptr_is_node(edit->excised_subtree)) { 1318 struct assoc_array_node *n = 1319 assoc_array_ptr_to_node(edit->excised_subtree); 1320 n->back_pointer = NULL; 1321 } else { 1322 struct assoc_array_shortcut *s = 1323 assoc_array_ptr_to_shortcut(edit->excised_subtree); 1324 s->back_pointer = NULL; 1325 } 1326 assoc_array_destroy_subtree(edit->excised_subtree, 1327 edit->ops_for_excised_subtree); 1328 } 1329 1330 kfree(edit); 1331 } 1332 1333 /** 1334 * assoc_array_apply_edit - Apply an edit script to an associative array 1335 * @edit: The script to apply. 1336 * 1337 * Apply an edit script to an associative array to effect an insertion, 1338 * deletion or clearance. As the edit script includes preallocated memory, 1339 * this is guaranteed not to fail. 1340 * 1341 * The edit script, dead objects and dead metadata will be scheduled for 1342 * destruction after an RCU grace period to permit those doing read-only 1343 * accesses on the array to continue to do so under the RCU read lock whilst 1344 * the edit is taking place. 1345 */ 1346 void assoc_array_apply_edit(struct assoc_array_edit *edit) 1347 { 1348 struct assoc_array_shortcut *shortcut; 1349 struct assoc_array_node *node; 1350 struct assoc_array_ptr *ptr; 1351 int i; 1352 1353 pr_devel("-->%s()\n", __func__); 1354 1355 smp_wmb(); 1356 if (edit->leaf_p) 1357 *edit->leaf_p = edit->leaf; 1358 1359 smp_wmb(); 1360 for (i = 0; i < ARRAY_SIZE(edit->set_parent_slot); i++) 1361 if (edit->set_parent_slot[i].p) 1362 *edit->set_parent_slot[i].p = edit->set_parent_slot[i].to; 1363 1364 smp_wmb(); 1365 for (i = 0; i < ARRAY_SIZE(edit->set_backpointers); i++) 1366 if (edit->set_backpointers[i]) 1367 *edit->set_backpointers[i] = edit->set_backpointers_to; 1368 1369 smp_wmb(); 1370 for (i = 0; i < ARRAY_SIZE(edit->set); i++) 1371 if (edit->set[i].ptr) 1372 *edit->set[i].ptr = edit->set[i].to; 1373 1374 if (edit->array->root == NULL) { 1375 edit->array->nr_leaves_on_tree = 0; 1376 } else if (edit->adjust_count_on) { 1377 node = edit->adjust_count_on; 1378 for (;;) { 1379 node->nr_leaves_on_branch += edit->adjust_count_by; 1380 1381 ptr = node->back_pointer; 1382 if (!ptr) 1383 break; 1384 if (assoc_array_ptr_is_shortcut(ptr)) { 1385 shortcut = assoc_array_ptr_to_shortcut(ptr); 1386 ptr = shortcut->back_pointer; 1387 if (!ptr) 1388 break; 1389 } 1390 BUG_ON(!assoc_array_ptr_is_node(ptr)); 1391 node = assoc_array_ptr_to_node(ptr); 1392 } 1393 1394 edit->array->nr_leaves_on_tree += edit->adjust_count_by; 1395 } 1396 1397 call_rcu(&edit->rcu, assoc_array_rcu_cleanup); 1398 } 1399 1400 /** 1401 * assoc_array_cancel_edit - Discard an edit script. 1402 * @edit: The script to discard. 1403 * 1404 * Free an edit script and all the preallocated data it holds without making 1405 * any changes to the associative array it was intended for. 1406 * 1407 * NOTE! In the case of an insertion script, this does _not_ release the leaf 1408 * that was to be inserted. That is left to the caller. 1409 */ 1410 void assoc_array_cancel_edit(struct assoc_array_edit *edit) 1411 { 1412 struct assoc_array_ptr *ptr; 1413 int i; 1414 1415 pr_devel("-->%s()\n", __func__); 1416 1417 /* Clean up after an out of memory error */ 1418 for (i = 0; i < ARRAY_SIZE(edit->new_meta); i++) { 1419 ptr = edit->new_meta[i]; 1420 if (ptr) { 1421 if (assoc_array_ptr_is_node(ptr)) 1422 kfree(assoc_array_ptr_to_node(ptr)); 1423 else 1424 kfree(assoc_array_ptr_to_shortcut(ptr)); 1425 } 1426 } 1427 kfree(edit); 1428 } 1429 1430 /** 1431 * assoc_array_gc - Garbage collect an associative array. 1432 * @array: The array to clean. 1433 * @ops: The operations to use. 1434 * @iterator: A callback function to pass judgement on each object. 1435 * @iterator_data: Private data for the callback function. 1436 * 1437 * Collect garbage from an associative array and pack down the internal tree to 1438 * save memory. 1439 * 1440 * The iterator function is asked to pass judgement upon each object in the 1441 * array. If it returns false, the object is discard and if it returns true, 1442 * the object is kept. If it returns true, it must increment the object's 1443 * usage count (or whatever it needs to do to retain it) before returning. 1444 * 1445 * This function returns 0 if successful or -ENOMEM if out of memory. In the 1446 * latter case, the array is not changed. 1447 * 1448 * The caller should lock against other modifications and must continue to hold 1449 * the lock until assoc_array_apply_edit() has been called. 1450 * 1451 * Accesses to the tree may take place concurrently with this function, 1452 * provided they hold the RCU read lock. 1453 */ 1454 int assoc_array_gc(struct assoc_array *array, 1455 const struct assoc_array_ops *ops, 1456 bool (*iterator)(void *object, void *iterator_data), 1457 void *iterator_data) 1458 { 1459 struct assoc_array_shortcut *shortcut, *new_s; 1460 struct assoc_array_node *node, *new_n; 1461 struct assoc_array_edit *edit; 1462 struct assoc_array_ptr *cursor, *ptr; 1463 struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp; 1464 unsigned long nr_leaves_on_tree; 1465 int keylen, slot, nr_free, next_slot, i; 1466 1467 pr_devel("-->%s()\n", __func__); 1468 1469 if (!array->root) 1470 return 0; 1471 1472 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); 1473 if (!edit) 1474 return -ENOMEM; 1475 edit->array = array; 1476 edit->ops = ops; 1477 edit->ops_for_excised_subtree = ops; 1478 edit->set[0].ptr = &array->root; 1479 edit->excised_subtree = array->root; 1480 1481 new_root = new_parent = NULL; 1482 new_ptr_pp = &new_root; 1483 cursor = array->root; 1484 1485 descend: 1486 /* If this point is a shortcut, then we need to duplicate it and 1487 * advance the target cursor. 1488 */ 1489 if (assoc_array_ptr_is_shortcut(cursor)) { 1490 shortcut = assoc_array_ptr_to_shortcut(cursor); 1491 keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); 1492 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 1493 new_s = kmalloc(sizeof(struct assoc_array_shortcut) + 1494 keylen * sizeof(unsigned long), GFP_KERNEL); 1495 if (!new_s) 1496 goto enomem; 1497 pr_devel("dup shortcut %p -> %p\n", shortcut, new_s); 1498 memcpy(new_s, shortcut, (sizeof(struct assoc_array_shortcut) + 1499 keylen * sizeof(unsigned long))); 1500 new_s->back_pointer = new_parent; 1501 new_s->parent_slot = shortcut->parent_slot; 1502 *new_ptr_pp = new_parent = assoc_array_shortcut_to_ptr(new_s); 1503 new_ptr_pp = &new_s->next_node; 1504 cursor = shortcut->next_node; 1505 } 1506 1507 /* Duplicate the node at this position */ 1508 node = assoc_array_ptr_to_node(cursor); 1509 new_n = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 1510 if (!new_n) 1511 goto enomem; 1512 pr_devel("dup node %p -> %p\n", node, new_n); 1513 new_n->back_pointer = new_parent; 1514 new_n->parent_slot = node->parent_slot; 1515 *new_ptr_pp = new_parent = assoc_array_node_to_ptr(new_n); 1516 new_ptr_pp = NULL; 1517 slot = 0; 1518 1519 continue_node: 1520 /* Filter across any leaves and gc any subtrees */ 1521 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1522 ptr = node->slots[slot]; 1523 if (!ptr) 1524 continue; 1525 1526 if (assoc_array_ptr_is_leaf(ptr)) { 1527 if (iterator(assoc_array_ptr_to_leaf(ptr), 1528 iterator_data)) 1529 /* The iterator will have done any reference 1530 * counting on the object for us. 1531 */ 1532 new_n->slots[slot] = ptr; 1533 continue; 1534 } 1535 1536 new_ptr_pp = &new_n->slots[slot]; 1537 cursor = ptr; 1538 goto descend; 1539 } 1540 1541 pr_devel("-- compress node %p --\n", new_n); 1542 1543 /* Count up the number of empty slots in this node and work out the 1544 * subtree leaf count. 1545 */ 1546 new_n->nr_leaves_on_branch = 0; 1547 nr_free = 0; 1548 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1549 ptr = new_n->slots[slot]; 1550 if (!ptr) 1551 nr_free++; 1552 else if (assoc_array_ptr_is_leaf(ptr)) 1553 new_n->nr_leaves_on_branch++; 1554 } 1555 pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch); 1556 1557 /* See what we can fold in */ 1558 next_slot = 0; 1559 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1560 struct assoc_array_shortcut *s; 1561 struct assoc_array_node *child; 1562 1563 ptr = new_n->slots[slot]; 1564 if (!ptr || assoc_array_ptr_is_leaf(ptr)) 1565 continue; 1566 1567 s = NULL; 1568 if (assoc_array_ptr_is_shortcut(ptr)) { 1569 s = assoc_array_ptr_to_shortcut(ptr); 1570 ptr = s->next_node; 1571 } 1572 1573 child = assoc_array_ptr_to_node(ptr); 1574 new_n->nr_leaves_on_branch += child->nr_leaves_on_branch; 1575 1576 if (child->nr_leaves_on_branch <= nr_free + 1) { 1577 /* Fold the child node into this one */ 1578 pr_devel("[%d] fold node %lu/%d [nx %d]\n", 1579 slot, child->nr_leaves_on_branch, nr_free + 1, 1580 next_slot); 1581 1582 /* We would already have reaped an intervening shortcut 1583 * on the way back up the tree. 1584 */ 1585 BUG_ON(s); 1586 1587 new_n->slots[slot] = NULL; 1588 nr_free++; 1589 if (slot < next_slot) 1590 next_slot = slot; 1591 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 1592 struct assoc_array_ptr *p = child->slots[i]; 1593 if (!p) 1594 continue; 1595 BUG_ON(assoc_array_ptr_is_meta(p)); 1596 while (new_n->slots[next_slot]) 1597 next_slot++; 1598 BUG_ON(next_slot >= ASSOC_ARRAY_FAN_OUT); 1599 new_n->slots[next_slot++] = p; 1600 nr_free--; 1601 } 1602 kfree(child); 1603 } else { 1604 pr_devel("[%d] retain node %lu/%d [nx %d]\n", 1605 slot, child->nr_leaves_on_branch, nr_free + 1, 1606 next_slot); 1607 } 1608 } 1609 1610 pr_devel("after: %lu\n", new_n->nr_leaves_on_branch); 1611 1612 nr_leaves_on_tree = new_n->nr_leaves_on_branch; 1613 1614 /* Excise this node if it is singly occupied by a shortcut */ 1615 if (nr_free == ASSOC_ARRAY_FAN_OUT - 1) { 1616 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) 1617 if ((ptr = new_n->slots[slot])) 1618 break; 1619 1620 if (assoc_array_ptr_is_meta(ptr) && 1621 assoc_array_ptr_is_shortcut(ptr)) { 1622 pr_devel("excise node %p with 1 shortcut\n", new_n); 1623 new_s = assoc_array_ptr_to_shortcut(ptr); 1624 new_parent = new_n->back_pointer; 1625 slot = new_n->parent_slot; 1626 kfree(new_n); 1627 if (!new_parent) { 1628 new_s->back_pointer = NULL; 1629 new_s->parent_slot = 0; 1630 new_root = ptr; 1631 goto gc_complete; 1632 } 1633 1634 if (assoc_array_ptr_is_shortcut(new_parent)) { 1635 /* We can discard any preceding shortcut also */ 1636 struct assoc_array_shortcut *s = 1637 assoc_array_ptr_to_shortcut(new_parent); 1638 1639 pr_devel("excise preceding shortcut\n"); 1640 1641 new_parent = new_s->back_pointer = s->back_pointer; 1642 slot = new_s->parent_slot = s->parent_slot; 1643 kfree(s); 1644 if (!new_parent) { 1645 new_s->back_pointer = NULL; 1646 new_s->parent_slot = 0; 1647 new_root = ptr; 1648 goto gc_complete; 1649 } 1650 } 1651 1652 new_s->back_pointer = new_parent; 1653 new_s->parent_slot = slot; 1654 new_n = assoc_array_ptr_to_node(new_parent); 1655 new_n->slots[slot] = ptr; 1656 goto ascend_old_tree; 1657 } 1658 } 1659 1660 /* Excise any shortcuts we might encounter that point to nodes that 1661 * only contain leaves. 1662 */ 1663 ptr = new_n->back_pointer; 1664 if (!ptr) 1665 goto gc_complete; 1666 1667 if (assoc_array_ptr_is_shortcut(ptr)) { 1668 new_s = assoc_array_ptr_to_shortcut(ptr); 1669 new_parent = new_s->back_pointer; 1670 slot = new_s->parent_slot; 1671 1672 if (new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) { 1673 struct assoc_array_node *n; 1674 1675 pr_devel("excise shortcut\n"); 1676 new_n->back_pointer = new_parent; 1677 new_n->parent_slot = slot; 1678 kfree(new_s); 1679 if (!new_parent) { 1680 new_root = assoc_array_node_to_ptr(new_n); 1681 goto gc_complete; 1682 } 1683 1684 n = assoc_array_ptr_to_node(new_parent); 1685 n->slots[slot] = assoc_array_node_to_ptr(new_n); 1686 } 1687 } else { 1688 new_parent = ptr; 1689 } 1690 new_n = assoc_array_ptr_to_node(new_parent); 1691 1692 ascend_old_tree: 1693 ptr = node->back_pointer; 1694 if (assoc_array_ptr_is_shortcut(ptr)) { 1695 shortcut = assoc_array_ptr_to_shortcut(ptr); 1696 slot = shortcut->parent_slot; 1697 cursor = shortcut->back_pointer; 1698 if (!cursor) 1699 goto gc_complete; 1700 } else { 1701 slot = node->parent_slot; 1702 cursor = ptr; 1703 } 1704 BUG_ON(!cursor); 1705 node = assoc_array_ptr_to_node(cursor); 1706 slot++; 1707 goto continue_node; 1708 1709 gc_complete: 1710 edit->set[0].to = new_root; 1711 assoc_array_apply_edit(edit); 1712 array->nr_leaves_on_tree = nr_leaves_on_tree; 1713 return 0; 1714 1715 enomem: 1716 pr_devel("enomem\n"); 1717 assoc_array_destroy_subtree(new_root, edit->ops); 1718 kfree(edit); 1719 return -ENOMEM; 1720 } 1721