1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* Generic associative array implementation. 3 * 4 * See Documentation/core-api/assoc_array.rst for information. 5 * 6 * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. 7 * Written by David Howells (dhowells@redhat.com) 8 */ 9 //#define DEBUG 10 #include <linux/rcupdate.h> 11 #include <linux/slab.h> 12 #include <linux/err.h> 13 #include <linux/assoc_array_priv.h> 14 15 /* 16 * Iterate over an associative array. The caller must hold the RCU read lock 17 * or better. 18 */ 19 static int assoc_array_subtree_iterate(const struct assoc_array_ptr *root, 20 const struct assoc_array_ptr *stop, 21 int (*iterator)(const void *leaf, 22 void *iterator_data), 23 void *iterator_data) 24 { 25 const struct assoc_array_shortcut *shortcut; 26 const struct assoc_array_node *node; 27 const struct assoc_array_ptr *cursor, *ptr, *parent; 28 unsigned long has_meta; 29 int slot, ret; 30 31 cursor = root; 32 33 begin_node: 34 if (assoc_array_ptr_is_shortcut(cursor)) { 35 /* Descend through a shortcut */ 36 shortcut = assoc_array_ptr_to_shortcut(cursor); 37 cursor = READ_ONCE(shortcut->next_node); /* Address dependency. */ 38 } 39 40 node = assoc_array_ptr_to_node(cursor); 41 slot = 0; 42 43 /* We perform two passes of each node. 44 * 45 * The first pass does all the leaves in this node. This means we 46 * don't miss any leaves if the node is split up by insertion whilst 47 * we're iterating over the branches rooted here (we may, however, see 48 * some leaves twice). 49 */ 50 has_meta = 0; 51 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 52 ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */ 53 has_meta |= (unsigned long)ptr; 54 if (ptr && assoc_array_ptr_is_leaf(ptr)) { 55 /* We need a barrier between the read of the pointer, 56 * which is supplied by the above READ_ONCE(). 57 */ 58 /* Invoke the callback */ 59 ret = iterator(assoc_array_ptr_to_leaf(ptr), 60 iterator_data); 61 if (ret) 62 return ret; 63 } 64 } 65 66 /* The second pass attends to all the metadata pointers. If we follow 67 * one of these we may find that we don't come back here, but rather go 68 * back to a replacement node with the leaves in a different layout. 69 * 70 * We are guaranteed to make progress, however, as the slot number for 71 * a particular portion of the key space cannot change - and we 72 * continue at the back pointer + 1. 73 */ 74 if (!(has_meta & ASSOC_ARRAY_PTR_META_TYPE)) 75 goto finished_node; 76 slot = 0; 77 78 continue_node: 79 node = assoc_array_ptr_to_node(cursor); 80 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 81 ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */ 82 if (assoc_array_ptr_is_meta(ptr)) { 83 cursor = ptr; 84 goto begin_node; 85 } 86 } 87 88 finished_node: 89 /* Move up to the parent (may need to skip back over a shortcut) */ 90 parent = READ_ONCE(node->back_pointer); /* Address dependency. */ 91 slot = node->parent_slot; 92 if (parent == stop) 93 return 0; 94 95 if (assoc_array_ptr_is_shortcut(parent)) { 96 shortcut = assoc_array_ptr_to_shortcut(parent); 97 cursor = parent; 98 parent = READ_ONCE(shortcut->back_pointer); /* Address dependency. */ 99 slot = shortcut->parent_slot; 100 if (parent == stop) 101 return 0; 102 } 103 104 /* Ascend to next slot in parent node */ 105 cursor = parent; 106 slot++; 107 goto continue_node; 108 } 109 110 /** 111 * assoc_array_iterate - Pass all objects in the array to a callback 112 * @array: The array to iterate over. 113 * @iterator: The callback function. 114 * @iterator_data: Private data for the callback function. 115 * 116 * Iterate over all the objects in an associative array. Each one will be 117 * presented to the iterator function. 118 * 119 * If the array is being modified concurrently with the iteration then it is 120 * possible that some objects in the array will be passed to the iterator 121 * callback more than once - though every object should be passed at least 122 * once. If this is undesirable then the caller must lock against modification 123 * for the duration of this function. 124 * 125 * The function will return 0 if no objects were in the array or else it will 126 * return the result of the last iterator function called. Iteration stops 127 * immediately if any call to the iteration function results in a non-zero 128 * return. 129 * 130 * The caller should hold the RCU read lock or better if concurrent 131 * modification is possible. 132 */ 133 int assoc_array_iterate(const struct assoc_array *array, 134 int (*iterator)(const void *object, 135 void *iterator_data), 136 void *iterator_data) 137 { 138 struct assoc_array_ptr *root = READ_ONCE(array->root); /* Address dependency. */ 139 140 if (!root) 141 return 0; 142 return assoc_array_subtree_iterate(root, NULL, iterator, iterator_data); 143 } 144 145 enum assoc_array_walk_status { 146 assoc_array_walk_tree_empty, 147 assoc_array_walk_found_terminal_node, 148 assoc_array_walk_found_wrong_shortcut, 149 }; 150 151 struct assoc_array_walk_result { 152 struct { 153 struct assoc_array_node *node; /* Node in which leaf might be found */ 154 int level; 155 int slot; 156 } terminal_node; 157 struct { 158 struct assoc_array_shortcut *shortcut; 159 int level; 160 int sc_level; 161 unsigned long sc_segments; 162 unsigned long dissimilarity; 163 } wrong_shortcut; 164 }; 165 166 /* 167 * Navigate through the internal tree looking for the closest node to the key. 168 */ 169 static enum assoc_array_walk_status 170 assoc_array_walk(const struct assoc_array *array, 171 const struct assoc_array_ops *ops, 172 const void *index_key, 173 struct assoc_array_walk_result *result) 174 { 175 struct assoc_array_shortcut *shortcut; 176 struct assoc_array_node *node; 177 struct assoc_array_ptr *cursor, *ptr; 178 unsigned long sc_segments, dissimilarity; 179 unsigned long segments; 180 int level, sc_level, next_sc_level; 181 int slot; 182 183 pr_devel("-->%s()\n", __func__); 184 185 cursor = READ_ONCE(array->root); /* Address dependency. */ 186 if (!cursor) 187 return assoc_array_walk_tree_empty; 188 189 level = 0; 190 191 /* Use segments from the key for the new leaf to navigate through the 192 * internal tree, skipping through nodes and shortcuts that are on 193 * route to the destination. Eventually we'll come to a slot that is 194 * either empty or contains a leaf at which point we've found a node in 195 * which the leaf we're looking for might be found or into which it 196 * should be inserted. 197 */ 198 jumped: 199 segments = ops->get_key_chunk(index_key, level); 200 pr_devel("segments[%d]: %lx\n", level, segments); 201 202 if (assoc_array_ptr_is_shortcut(cursor)) 203 goto follow_shortcut; 204 205 consider_node: 206 node = assoc_array_ptr_to_node(cursor); 207 slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK); 208 slot &= ASSOC_ARRAY_FAN_MASK; 209 ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */ 210 211 pr_devel("consider slot %x [ix=%d type=%lu]\n", 212 slot, level, (unsigned long)ptr & 3); 213 214 if (!assoc_array_ptr_is_meta(ptr)) { 215 /* The node doesn't have a node/shortcut pointer in the slot 216 * corresponding to the index key that we have to follow. 217 */ 218 result->terminal_node.node = node; 219 result->terminal_node.level = level; 220 result->terminal_node.slot = slot; 221 pr_devel("<--%s() = terminal_node\n", __func__); 222 return assoc_array_walk_found_terminal_node; 223 } 224 225 if (assoc_array_ptr_is_node(ptr)) { 226 /* There is a pointer to a node in the slot corresponding to 227 * this index key segment, so we need to follow it. 228 */ 229 cursor = ptr; 230 level += ASSOC_ARRAY_LEVEL_STEP; 231 if ((level & ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) 232 goto consider_node; 233 goto jumped; 234 } 235 236 /* There is a shortcut in the slot corresponding to the index key 237 * segment. We follow the shortcut if its partial index key matches 238 * this leaf's. Otherwise we need to split the shortcut. 239 */ 240 cursor = ptr; 241 follow_shortcut: 242 shortcut = assoc_array_ptr_to_shortcut(cursor); 243 pr_devel("shortcut to %d\n", shortcut->skip_to_level); 244 sc_level = level + ASSOC_ARRAY_LEVEL_STEP; 245 BUG_ON(sc_level > shortcut->skip_to_level); 246 247 do { 248 /* Check the leaf against the shortcut's index key a word at a 249 * time, trimming the final word (the shortcut stores the index 250 * key completely from the root to the shortcut's target). 251 */ 252 if ((sc_level & ASSOC_ARRAY_KEY_CHUNK_MASK) == 0) 253 segments = ops->get_key_chunk(index_key, sc_level); 254 255 sc_segments = shortcut->index_key[sc_level >> ASSOC_ARRAY_KEY_CHUNK_SHIFT]; 256 dissimilarity = segments ^ sc_segments; 257 258 if (round_up(sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE) > shortcut->skip_to_level) { 259 /* Trim segments that are beyond the shortcut */ 260 int shift = shortcut->skip_to_level & ASSOC_ARRAY_KEY_CHUNK_MASK; 261 dissimilarity &= ~(ULONG_MAX << shift); 262 next_sc_level = shortcut->skip_to_level; 263 } else { 264 next_sc_level = sc_level + ASSOC_ARRAY_KEY_CHUNK_SIZE; 265 next_sc_level = round_down(next_sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); 266 } 267 268 if (dissimilarity != 0) { 269 /* This shortcut points elsewhere */ 270 result->wrong_shortcut.shortcut = shortcut; 271 result->wrong_shortcut.level = level; 272 result->wrong_shortcut.sc_level = sc_level; 273 result->wrong_shortcut.sc_segments = sc_segments; 274 result->wrong_shortcut.dissimilarity = dissimilarity; 275 return assoc_array_walk_found_wrong_shortcut; 276 } 277 278 sc_level = next_sc_level; 279 } while (sc_level < shortcut->skip_to_level); 280 281 /* The shortcut matches the leaf's index to this point. */ 282 cursor = READ_ONCE(shortcut->next_node); /* Address dependency. */ 283 if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) { 284 level = sc_level; 285 goto jumped; 286 } else { 287 level = sc_level; 288 goto consider_node; 289 } 290 } 291 292 /** 293 * assoc_array_find - Find an object by index key 294 * @array: The associative array to search. 295 * @ops: The operations to use. 296 * @index_key: The key to the object. 297 * 298 * Find an object in an associative array by walking through the internal tree 299 * to the node that should contain the object and then searching the leaves 300 * there. NULL is returned if the requested object was not found in the array. 301 * 302 * The caller must hold the RCU read lock or better. 303 */ 304 void *assoc_array_find(const struct assoc_array *array, 305 const struct assoc_array_ops *ops, 306 const void *index_key) 307 { 308 struct assoc_array_walk_result result; 309 const struct assoc_array_node *node; 310 const struct assoc_array_ptr *ptr; 311 const void *leaf; 312 int slot; 313 314 if (assoc_array_walk(array, ops, index_key, &result) != 315 assoc_array_walk_found_terminal_node) 316 return NULL; 317 318 node = result.terminal_node.node; 319 320 /* If the target key is available to us, it's has to be pointed to by 321 * the terminal node. 322 */ 323 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 324 ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */ 325 if (ptr && assoc_array_ptr_is_leaf(ptr)) { 326 /* We need a barrier between the read of the pointer 327 * and dereferencing the pointer - but only if we are 328 * actually going to dereference it. 329 */ 330 leaf = assoc_array_ptr_to_leaf(ptr); 331 if (ops->compare_object(leaf, index_key)) 332 return (void *)leaf; 333 } 334 } 335 336 return NULL; 337 } 338 339 /* 340 * Destructively iterate over an associative array. The caller must prevent 341 * other simultaneous accesses. 342 */ 343 static void assoc_array_destroy_subtree(struct assoc_array_ptr *root, 344 const struct assoc_array_ops *ops) 345 { 346 struct assoc_array_shortcut *shortcut; 347 struct assoc_array_node *node; 348 struct assoc_array_ptr *cursor, *parent = NULL; 349 int slot = -1; 350 351 pr_devel("-->%s()\n", __func__); 352 353 cursor = root; 354 if (!cursor) { 355 pr_devel("empty\n"); 356 return; 357 } 358 359 move_to_meta: 360 if (assoc_array_ptr_is_shortcut(cursor)) { 361 /* Descend through a shortcut */ 362 pr_devel("[%d] shortcut\n", slot); 363 BUG_ON(!assoc_array_ptr_is_shortcut(cursor)); 364 shortcut = assoc_array_ptr_to_shortcut(cursor); 365 BUG_ON(shortcut->back_pointer != parent); 366 BUG_ON(slot != -1 && shortcut->parent_slot != slot); 367 parent = cursor; 368 cursor = shortcut->next_node; 369 slot = -1; 370 BUG_ON(!assoc_array_ptr_is_node(cursor)); 371 } 372 373 pr_devel("[%d] node\n", slot); 374 node = assoc_array_ptr_to_node(cursor); 375 BUG_ON(node->back_pointer != parent); 376 BUG_ON(slot != -1 && node->parent_slot != slot); 377 slot = 0; 378 379 continue_node: 380 pr_devel("Node %p [back=%p]\n", node, node->back_pointer); 381 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 382 struct assoc_array_ptr *ptr = node->slots[slot]; 383 if (!ptr) 384 continue; 385 if (assoc_array_ptr_is_meta(ptr)) { 386 parent = cursor; 387 cursor = ptr; 388 goto move_to_meta; 389 } 390 391 if (ops) { 392 pr_devel("[%d] free leaf\n", slot); 393 ops->free_object(assoc_array_ptr_to_leaf(ptr)); 394 } 395 } 396 397 parent = node->back_pointer; 398 slot = node->parent_slot; 399 pr_devel("free node\n"); 400 kfree(node); 401 if (!parent) 402 return; /* Done */ 403 404 /* Move back up to the parent (may need to free a shortcut on 405 * the way up) */ 406 if (assoc_array_ptr_is_shortcut(parent)) { 407 shortcut = assoc_array_ptr_to_shortcut(parent); 408 BUG_ON(shortcut->next_node != cursor); 409 cursor = parent; 410 parent = shortcut->back_pointer; 411 slot = shortcut->parent_slot; 412 pr_devel("free shortcut\n"); 413 kfree(shortcut); 414 if (!parent) 415 return; 416 417 BUG_ON(!assoc_array_ptr_is_node(parent)); 418 } 419 420 /* Ascend to next slot in parent node */ 421 pr_devel("ascend to %p[%d]\n", parent, slot); 422 cursor = parent; 423 node = assoc_array_ptr_to_node(cursor); 424 slot++; 425 goto continue_node; 426 } 427 428 /** 429 * assoc_array_destroy - Destroy an associative array 430 * @array: The array to destroy. 431 * @ops: The operations to use. 432 * 433 * Discard all metadata and free all objects in an associative array. The 434 * array will be empty and ready to use again upon completion. This function 435 * cannot fail. 436 * 437 * The caller must prevent all other accesses whilst this takes place as no 438 * attempt is made to adjust pointers gracefully to permit RCU readlock-holding 439 * accesses to continue. On the other hand, no memory allocation is required. 440 */ 441 void assoc_array_destroy(struct assoc_array *array, 442 const struct assoc_array_ops *ops) 443 { 444 assoc_array_destroy_subtree(array->root, ops); 445 array->root = NULL; 446 } 447 448 /* 449 * Handle insertion into an empty tree. 450 */ 451 static bool assoc_array_insert_in_empty_tree(struct assoc_array_edit *edit) 452 { 453 struct assoc_array_node *new_n0; 454 455 pr_devel("-->%s()\n", __func__); 456 457 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 458 if (!new_n0) 459 return false; 460 461 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 462 edit->leaf_p = &new_n0->slots[0]; 463 edit->adjust_count_on = new_n0; 464 edit->set[0].ptr = &edit->array->root; 465 edit->set[0].to = assoc_array_node_to_ptr(new_n0); 466 467 pr_devel("<--%s() = ok [no root]\n", __func__); 468 return true; 469 } 470 471 /* 472 * Handle insertion into a terminal node. 473 */ 474 static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit, 475 const struct assoc_array_ops *ops, 476 const void *index_key, 477 struct assoc_array_walk_result *result) 478 { 479 struct assoc_array_shortcut *shortcut, *new_s0; 480 struct assoc_array_node *node, *new_n0, *new_n1, *side; 481 struct assoc_array_ptr *ptr; 482 unsigned long dissimilarity, base_seg, blank; 483 size_t keylen; 484 bool have_meta; 485 int level, diff; 486 int slot, next_slot, free_slot, i, j; 487 488 node = result->terminal_node.node; 489 level = result->terminal_node.level; 490 edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = result->terminal_node.slot; 491 492 pr_devel("-->%s()\n", __func__); 493 494 /* We arrived at a node which doesn't have an onward node or shortcut 495 * pointer that we have to follow. This means that (a) the leaf we 496 * want must go here (either by insertion or replacement) or (b) we 497 * need to split this node and insert in one of the fragments. 498 */ 499 free_slot = -1; 500 501 /* Firstly, we have to check the leaves in this node to see if there's 502 * a matching one we should replace in place. 503 */ 504 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 505 ptr = node->slots[i]; 506 if (!ptr) { 507 free_slot = i; 508 continue; 509 } 510 if (assoc_array_ptr_is_leaf(ptr) && 511 ops->compare_object(assoc_array_ptr_to_leaf(ptr), 512 index_key)) { 513 pr_devel("replace in slot %d\n", i); 514 edit->leaf_p = &node->slots[i]; 515 edit->dead_leaf = node->slots[i]; 516 pr_devel("<--%s() = ok [replace]\n", __func__); 517 return true; 518 } 519 } 520 521 /* If there is a free slot in this node then we can just insert the 522 * leaf here. 523 */ 524 if (free_slot >= 0) { 525 pr_devel("insert in free slot %d\n", free_slot); 526 edit->leaf_p = &node->slots[free_slot]; 527 edit->adjust_count_on = node; 528 pr_devel("<--%s() = ok [insert]\n", __func__); 529 return true; 530 } 531 532 /* The node has no spare slots - so we're either going to have to split 533 * it or insert another node before it. 534 * 535 * Whatever, we're going to need at least two new nodes - so allocate 536 * those now. We may also need a new shortcut, but we deal with that 537 * when we need it. 538 */ 539 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 540 if (!new_n0) 541 return false; 542 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 543 new_n1 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 544 if (!new_n1) 545 return false; 546 edit->new_meta[1] = assoc_array_node_to_ptr(new_n1); 547 548 /* We need to find out how similar the leaves are. */ 549 pr_devel("no spare slots\n"); 550 have_meta = false; 551 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 552 ptr = node->slots[i]; 553 if (assoc_array_ptr_is_meta(ptr)) { 554 edit->segment_cache[i] = 0xff; 555 have_meta = true; 556 continue; 557 } 558 base_seg = ops->get_object_key_chunk( 559 assoc_array_ptr_to_leaf(ptr), level); 560 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; 561 edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK; 562 } 563 564 if (have_meta) { 565 pr_devel("have meta\n"); 566 goto split_node; 567 } 568 569 /* The node contains only leaves */ 570 dissimilarity = 0; 571 base_seg = edit->segment_cache[0]; 572 for (i = 1; i < ASSOC_ARRAY_FAN_OUT; i++) 573 dissimilarity |= edit->segment_cache[i] ^ base_seg; 574 575 pr_devel("only leaves; dissimilarity=%lx\n", dissimilarity); 576 577 if ((dissimilarity & ASSOC_ARRAY_FAN_MASK) == 0) { 578 /* The old leaves all cluster in the same slot. We will need 579 * to insert a shortcut if the new node wants to cluster with them. 580 */ 581 if ((edit->segment_cache[ASSOC_ARRAY_FAN_OUT] ^ base_seg) == 0) 582 goto all_leaves_cluster_together; 583 584 /* Otherwise all the old leaves cluster in the same slot, but 585 * the new leaf wants to go into a different slot - so we 586 * create a new node (n0) to hold the new leaf and a pointer to 587 * a new node (n1) holding all the old leaves. 588 * 589 * This can be done by falling through to the node splitting 590 * path. 591 */ 592 pr_devel("present leaves cluster but not new leaf\n"); 593 } 594 595 split_node: 596 pr_devel("split node\n"); 597 598 /* We need to split the current node. The node must contain anything 599 * from a single leaf (in the one leaf case, this leaf will cluster 600 * with the new leaf) and the rest meta-pointers, to all leaves, some 601 * of which may cluster. 602 * 603 * It won't contain the case in which all the current leaves plus the 604 * new leaves want to cluster in the same slot. 605 * 606 * We need to expel at least two leaves out of a set consisting of the 607 * leaves in the node and the new leaf. The current meta pointers can 608 * just be copied as they shouldn't cluster with any of the leaves. 609 * 610 * We need a new node (n0) to replace the current one and a new node to 611 * take the expelled nodes (n1). 612 */ 613 edit->set[0].to = assoc_array_node_to_ptr(new_n0); 614 new_n0->back_pointer = node->back_pointer; 615 new_n0->parent_slot = node->parent_slot; 616 new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); 617 new_n1->parent_slot = -1; /* Need to calculate this */ 618 619 do_split_node: 620 pr_devel("do_split_node\n"); 621 622 new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; 623 new_n1->nr_leaves_on_branch = 0; 624 625 /* Begin by finding two matching leaves. There have to be at least two 626 * that match - even if there are meta pointers - because any leaf that 627 * would match a slot with a meta pointer in it must be somewhere 628 * behind that meta pointer and cannot be here. Further, given N 629 * remaining leaf slots, we now have N+1 leaves to go in them. 630 */ 631 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 632 slot = edit->segment_cache[i]; 633 if (slot != 0xff) 634 for (j = i + 1; j < ASSOC_ARRAY_FAN_OUT + 1; j++) 635 if (edit->segment_cache[j] == slot) 636 goto found_slot_for_multiple_occupancy; 637 } 638 found_slot_for_multiple_occupancy: 639 pr_devel("same slot: %x %x [%02x]\n", i, j, slot); 640 BUG_ON(i >= ASSOC_ARRAY_FAN_OUT); 641 BUG_ON(j >= ASSOC_ARRAY_FAN_OUT + 1); 642 BUG_ON(slot >= ASSOC_ARRAY_FAN_OUT); 643 644 new_n1->parent_slot = slot; 645 646 /* Metadata pointers cannot change slot */ 647 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) 648 if (assoc_array_ptr_is_meta(node->slots[i])) 649 new_n0->slots[i] = node->slots[i]; 650 else 651 new_n0->slots[i] = NULL; 652 BUG_ON(new_n0->slots[slot] != NULL); 653 new_n0->slots[slot] = assoc_array_node_to_ptr(new_n1); 654 655 /* Filter the leaf pointers between the new nodes */ 656 free_slot = -1; 657 next_slot = 0; 658 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 659 if (assoc_array_ptr_is_meta(node->slots[i])) 660 continue; 661 if (edit->segment_cache[i] == slot) { 662 new_n1->slots[next_slot++] = node->slots[i]; 663 new_n1->nr_leaves_on_branch++; 664 } else { 665 do { 666 free_slot++; 667 } while (new_n0->slots[free_slot] != NULL); 668 new_n0->slots[free_slot] = node->slots[i]; 669 } 670 } 671 672 pr_devel("filtered: f=%x n=%x\n", free_slot, next_slot); 673 674 if (edit->segment_cache[ASSOC_ARRAY_FAN_OUT] != slot) { 675 do { 676 free_slot++; 677 } while (new_n0->slots[free_slot] != NULL); 678 edit->leaf_p = &new_n0->slots[free_slot]; 679 edit->adjust_count_on = new_n0; 680 } else { 681 edit->leaf_p = &new_n1->slots[next_slot++]; 682 edit->adjust_count_on = new_n1; 683 } 684 685 BUG_ON(next_slot <= 1); 686 687 edit->set_backpointers_to = assoc_array_node_to_ptr(new_n0); 688 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 689 if (edit->segment_cache[i] == 0xff) { 690 ptr = node->slots[i]; 691 BUG_ON(assoc_array_ptr_is_leaf(ptr)); 692 if (assoc_array_ptr_is_node(ptr)) { 693 side = assoc_array_ptr_to_node(ptr); 694 edit->set_backpointers[i] = &side->back_pointer; 695 } else { 696 shortcut = assoc_array_ptr_to_shortcut(ptr); 697 edit->set_backpointers[i] = &shortcut->back_pointer; 698 } 699 } 700 } 701 702 ptr = node->back_pointer; 703 if (!ptr) 704 edit->set[0].ptr = &edit->array->root; 705 else if (assoc_array_ptr_is_node(ptr)) 706 edit->set[0].ptr = &assoc_array_ptr_to_node(ptr)->slots[node->parent_slot]; 707 else 708 edit->set[0].ptr = &assoc_array_ptr_to_shortcut(ptr)->next_node; 709 edit->excised_meta[0] = assoc_array_node_to_ptr(node); 710 pr_devel("<--%s() = ok [split node]\n", __func__); 711 return true; 712 713 all_leaves_cluster_together: 714 /* All the leaves, new and old, want to cluster together in this node 715 * in the same slot, so we have to replace this node with a shortcut to 716 * skip over the identical parts of the key and then place a pair of 717 * nodes, one inside the other, at the end of the shortcut and 718 * distribute the keys between them. 719 * 720 * Firstly we need to work out where the leaves start diverging as a 721 * bit position into their keys so that we know how big the shortcut 722 * needs to be. 723 * 724 * We only need to make a single pass of N of the N+1 leaves because if 725 * any keys differ between themselves at bit X then at least one of 726 * them must also differ with the base key at bit X or before. 727 */ 728 pr_devel("all leaves cluster together\n"); 729 diff = INT_MAX; 730 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 731 int x = ops->diff_objects(assoc_array_ptr_to_leaf(node->slots[i]), 732 index_key); 733 if (x < diff) { 734 BUG_ON(x < 0); 735 diff = x; 736 } 737 } 738 BUG_ON(diff == INT_MAX); 739 BUG_ON(diff < level + ASSOC_ARRAY_LEVEL_STEP); 740 741 keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); 742 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 743 744 new_s0 = kzalloc(struct_size(new_s0, index_key, keylen), GFP_KERNEL); 745 if (!new_s0) 746 return false; 747 edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s0); 748 749 edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0); 750 new_s0->back_pointer = node->back_pointer; 751 new_s0->parent_slot = node->parent_slot; 752 new_s0->next_node = assoc_array_node_to_ptr(new_n0); 753 new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0); 754 new_n0->parent_slot = 0; 755 new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); 756 new_n1->parent_slot = -1; /* Need to calculate this */ 757 758 new_s0->skip_to_level = level = diff & ~ASSOC_ARRAY_LEVEL_STEP_MASK; 759 pr_devel("skip_to_level = %d [diff %d]\n", level, diff); 760 BUG_ON(level <= 0); 761 762 for (i = 0; i < keylen; i++) 763 new_s0->index_key[i] = 764 ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE); 765 766 if (level & ASSOC_ARRAY_KEY_CHUNK_MASK) { 767 blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK); 768 pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank); 769 new_s0->index_key[keylen - 1] &= ~blank; 770 } 771 772 /* This now reduces to a node splitting exercise for which we'll need 773 * to regenerate the disparity table. 774 */ 775 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 776 ptr = node->slots[i]; 777 base_seg = ops->get_object_key_chunk(assoc_array_ptr_to_leaf(ptr), 778 level); 779 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; 780 edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK; 781 } 782 783 base_seg = ops->get_key_chunk(index_key, level); 784 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; 785 edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = base_seg & ASSOC_ARRAY_FAN_MASK; 786 goto do_split_node; 787 } 788 789 /* 790 * Handle insertion into the middle of a shortcut. 791 */ 792 static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit, 793 const struct assoc_array_ops *ops, 794 struct assoc_array_walk_result *result) 795 { 796 struct assoc_array_shortcut *shortcut, *new_s0, *new_s1; 797 struct assoc_array_node *node, *new_n0, *side; 798 unsigned long sc_segments, dissimilarity, blank; 799 size_t keylen; 800 int level, sc_level, diff; 801 int sc_slot; 802 803 shortcut = result->wrong_shortcut.shortcut; 804 level = result->wrong_shortcut.level; 805 sc_level = result->wrong_shortcut.sc_level; 806 sc_segments = result->wrong_shortcut.sc_segments; 807 dissimilarity = result->wrong_shortcut.dissimilarity; 808 809 pr_devel("-->%s(ix=%d dis=%lx scix=%d)\n", 810 __func__, level, dissimilarity, sc_level); 811 812 /* We need to split a shortcut and insert a node between the two 813 * pieces. Zero-length pieces will be dispensed with entirely. 814 * 815 * First of all, we need to find out in which level the first 816 * difference was. 817 */ 818 diff = __ffs(dissimilarity); 819 diff &= ~ASSOC_ARRAY_LEVEL_STEP_MASK; 820 diff += sc_level & ~ASSOC_ARRAY_KEY_CHUNK_MASK; 821 pr_devel("diff=%d\n", diff); 822 823 if (!shortcut->back_pointer) { 824 edit->set[0].ptr = &edit->array->root; 825 } else if (assoc_array_ptr_is_node(shortcut->back_pointer)) { 826 node = assoc_array_ptr_to_node(shortcut->back_pointer); 827 edit->set[0].ptr = &node->slots[shortcut->parent_slot]; 828 } else { 829 BUG(); 830 } 831 832 edit->excised_meta[0] = assoc_array_shortcut_to_ptr(shortcut); 833 834 /* Create a new node now since we're going to need it anyway */ 835 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 836 if (!new_n0) 837 return false; 838 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 839 edit->adjust_count_on = new_n0; 840 841 /* Insert a new shortcut before the new node if this segment isn't of 842 * zero length - otherwise we just connect the new node directly to the 843 * parent. 844 */ 845 level += ASSOC_ARRAY_LEVEL_STEP; 846 if (diff > level) { 847 pr_devel("pre-shortcut %d...%d\n", level, diff); 848 keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); 849 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 850 851 new_s0 = kzalloc(struct_size(new_s0, index_key, keylen), 852 GFP_KERNEL); 853 if (!new_s0) 854 return false; 855 edit->new_meta[1] = assoc_array_shortcut_to_ptr(new_s0); 856 edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0); 857 new_s0->back_pointer = shortcut->back_pointer; 858 new_s0->parent_slot = shortcut->parent_slot; 859 new_s0->next_node = assoc_array_node_to_ptr(new_n0); 860 new_s0->skip_to_level = diff; 861 862 new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0); 863 new_n0->parent_slot = 0; 864 865 memcpy(new_s0->index_key, shortcut->index_key, 866 flex_array_size(new_s0, index_key, keylen)); 867 868 blank = ULONG_MAX << (diff & ASSOC_ARRAY_KEY_CHUNK_MASK); 869 pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, diff, blank); 870 new_s0->index_key[keylen - 1] &= ~blank; 871 } else { 872 pr_devel("no pre-shortcut\n"); 873 edit->set[0].to = assoc_array_node_to_ptr(new_n0); 874 new_n0->back_pointer = shortcut->back_pointer; 875 new_n0->parent_slot = shortcut->parent_slot; 876 } 877 878 side = assoc_array_ptr_to_node(shortcut->next_node); 879 new_n0->nr_leaves_on_branch = side->nr_leaves_on_branch; 880 881 /* We need to know which slot in the new node is going to take a 882 * metadata pointer. 883 */ 884 sc_slot = sc_segments >> (diff & ASSOC_ARRAY_KEY_CHUNK_MASK); 885 sc_slot &= ASSOC_ARRAY_FAN_MASK; 886 887 pr_devel("new slot %lx >> %d -> %d\n", 888 sc_segments, diff & ASSOC_ARRAY_KEY_CHUNK_MASK, sc_slot); 889 890 /* Determine whether we need to follow the new node with a replacement 891 * for the current shortcut. We could in theory reuse the current 892 * shortcut if its parent slot number doesn't change - but that's a 893 * 1-in-16 chance so not worth expending the code upon. 894 */ 895 level = diff + ASSOC_ARRAY_LEVEL_STEP; 896 if (level < shortcut->skip_to_level) { 897 pr_devel("post-shortcut %d...%d\n", level, shortcut->skip_to_level); 898 keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); 899 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 900 901 new_s1 = kzalloc(struct_size(new_s1, index_key, keylen), 902 GFP_KERNEL); 903 if (!new_s1) 904 return false; 905 edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s1); 906 907 new_s1->back_pointer = assoc_array_node_to_ptr(new_n0); 908 new_s1->parent_slot = sc_slot; 909 new_s1->next_node = shortcut->next_node; 910 new_s1->skip_to_level = shortcut->skip_to_level; 911 912 new_n0->slots[sc_slot] = assoc_array_shortcut_to_ptr(new_s1); 913 914 memcpy(new_s1->index_key, shortcut->index_key, 915 flex_array_size(new_s1, index_key, keylen)); 916 917 edit->set[1].ptr = &side->back_pointer; 918 edit->set[1].to = assoc_array_shortcut_to_ptr(new_s1); 919 } else { 920 pr_devel("no post-shortcut\n"); 921 922 /* We don't have to replace the pointed-to node as long as we 923 * use memory barriers to make sure the parent slot number is 924 * changed before the back pointer (the parent slot number is 925 * irrelevant to the old parent shortcut). 926 */ 927 new_n0->slots[sc_slot] = shortcut->next_node; 928 edit->set_parent_slot[0].p = &side->parent_slot; 929 edit->set_parent_slot[0].to = sc_slot; 930 edit->set[1].ptr = &side->back_pointer; 931 edit->set[1].to = assoc_array_node_to_ptr(new_n0); 932 } 933 934 /* Install the new leaf in a spare slot in the new node. */ 935 if (sc_slot == 0) 936 edit->leaf_p = &new_n0->slots[1]; 937 else 938 edit->leaf_p = &new_n0->slots[0]; 939 940 pr_devel("<--%s() = ok [split shortcut]\n", __func__); 941 return edit; 942 } 943 944 /** 945 * assoc_array_insert - Script insertion of an object into an associative array 946 * @array: The array to insert into. 947 * @ops: The operations to use. 948 * @index_key: The key to insert at. 949 * @object: The object to insert. 950 * 951 * Precalculate and preallocate a script for the insertion or replacement of an 952 * object in an associative array. This results in an edit script that can 953 * either be applied or cancelled. 954 * 955 * The function returns a pointer to an edit script or -ENOMEM. 956 * 957 * The caller should lock against other modifications and must continue to hold 958 * the lock until assoc_array_apply_edit() has been called. 959 * 960 * Accesses to the tree may take place concurrently with this function, 961 * provided they hold the RCU read lock. 962 */ 963 struct assoc_array_edit *assoc_array_insert(struct assoc_array *array, 964 const struct assoc_array_ops *ops, 965 const void *index_key, 966 void *object) 967 { 968 struct assoc_array_walk_result result; 969 struct assoc_array_edit *edit; 970 971 pr_devel("-->%s()\n", __func__); 972 973 /* The leaf pointer we're given must not have the bottom bit set as we 974 * use those for type-marking the pointer. NULL pointers are also not 975 * allowed as they indicate an empty slot but we have to allow them 976 * here as they can be updated later. 977 */ 978 BUG_ON(assoc_array_ptr_is_meta(object)); 979 980 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); 981 if (!edit) 982 return ERR_PTR(-ENOMEM); 983 edit->array = array; 984 edit->ops = ops; 985 edit->leaf = assoc_array_leaf_to_ptr(object); 986 edit->adjust_count_by = 1; 987 988 switch (assoc_array_walk(array, ops, index_key, &result)) { 989 case assoc_array_walk_tree_empty: 990 /* Allocate a root node if there isn't one yet */ 991 if (!assoc_array_insert_in_empty_tree(edit)) 992 goto enomem; 993 return edit; 994 995 case assoc_array_walk_found_terminal_node: 996 /* We found a node that doesn't have a node/shortcut pointer in 997 * the slot corresponding to the index key that we have to 998 * follow. 999 */ 1000 if (!assoc_array_insert_into_terminal_node(edit, ops, index_key, 1001 &result)) 1002 goto enomem; 1003 return edit; 1004 1005 case assoc_array_walk_found_wrong_shortcut: 1006 /* We found a shortcut that didn't match our key in a slot we 1007 * needed to follow. 1008 */ 1009 if (!assoc_array_insert_mid_shortcut(edit, ops, &result)) 1010 goto enomem; 1011 return edit; 1012 } 1013 1014 enomem: 1015 /* Clean up after an out of memory error */ 1016 pr_devel("enomem\n"); 1017 assoc_array_cancel_edit(edit); 1018 return ERR_PTR(-ENOMEM); 1019 } 1020 1021 /** 1022 * assoc_array_insert_set_object - Set the new object pointer in an edit script 1023 * @edit: The edit script to modify. 1024 * @object: The object pointer to set. 1025 * 1026 * Change the object to be inserted in an edit script. The object pointed to 1027 * by the old object is not freed. This must be done prior to applying the 1028 * script. 1029 */ 1030 void assoc_array_insert_set_object(struct assoc_array_edit *edit, void *object) 1031 { 1032 BUG_ON(!object); 1033 edit->leaf = assoc_array_leaf_to_ptr(object); 1034 } 1035 1036 struct assoc_array_delete_collapse_context { 1037 struct assoc_array_node *node; 1038 const void *skip_leaf; 1039 int slot; 1040 }; 1041 1042 /* 1043 * Subtree collapse to node iterator. 1044 */ 1045 static int assoc_array_delete_collapse_iterator(const void *leaf, 1046 void *iterator_data) 1047 { 1048 struct assoc_array_delete_collapse_context *collapse = iterator_data; 1049 1050 if (leaf == collapse->skip_leaf) 1051 return 0; 1052 1053 BUG_ON(collapse->slot >= ASSOC_ARRAY_FAN_OUT); 1054 1055 collapse->node->slots[collapse->slot++] = assoc_array_leaf_to_ptr(leaf); 1056 return 0; 1057 } 1058 1059 /** 1060 * assoc_array_delete - Script deletion of an object from an associative array 1061 * @array: The array to search. 1062 * @ops: The operations to use. 1063 * @index_key: The key to the object. 1064 * 1065 * Precalculate and preallocate a script for the deletion of an object from an 1066 * associative array. This results in an edit script that can either be 1067 * applied or cancelled. 1068 * 1069 * The function returns a pointer to an edit script if the object was found, 1070 * NULL if the object was not found or -ENOMEM. 1071 * 1072 * The caller should lock against other modifications and must continue to hold 1073 * the lock until assoc_array_apply_edit() has been called. 1074 * 1075 * Accesses to the tree may take place concurrently with this function, 1076 * provided they hold the RCU read lock. 1077 */ 1078 struct assoc_array_edit *assoc_array_delete(struct assoc_array *array, 1079 const struct assoc_array_ops *ops, 1080 const void *index_key) 1081 { 1082 struct assoc_array_delete_collapse_context collapse; 1083 struct assoc_array_walk_result result; 1084 struct assoc_array_node *node, *new_n0; 1085 struct assoc_array_edit *edit; 1086 struct assoc_array_ptr *ptr; 1087 bool has_meta; 1088 int slot, i; 1089 1090 pr_devel("-->%s()\n", __func__); 1091 1092 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); 1093 if (!edit) 1094 return ERR_PTR(-ENOMEM); 1095 edit->array = array; 1096 edit->ops = ops; 1097 edit->adjust_count_by = -1; 1098 1099 switch (assoc_array_walk(array, ops, index_key, &result)) { 1100 case assoc_array_walk_found_terminal_node: 1101 /* We found a node that should contain the leaf we've been 1102 * asked to remove - *if* it's in the tree. 1103 */ 1104 pr_devel("terminal_node\n"); 1105 node = result.terminal_node.node; 1106 1107 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1108 ptr = node->slots[slot]; 1109 if (ptr && 1110 assoc_array_ptr_is_leaf(ptr) && 1111 ops->compare_object(assoc_array_ptr_to_leaf(ptr), 1112 index_key)) 1113 goto found_leaf; 1114 } 1115 fallthrough; 1116 case assoc_array_walk_tree_empty: 1117 case assoc_array_walk_found_wrong_shortcut: 1118 default: 1119 assoc_array_cancel_edit(edit); 1120 pr_devel("not found\n"); 1121 return NULL; 1122 } 1123 1124 found_leaf: 1125 BUG_ON(array->nr_leaves_on_tree <= 0); 1126 1127 /* In the simplest form of deletion we just clear the slot and release 1128 * the leaf after a suitable interval. 1129 */ 1130 edit->dead_leaf = node->slots[slot]; 1131 edit->set[0].ptr = &node->slots[slot]; 1132 edit->set[0].to = NULL; 1133 edit->adjust_count_on = node; 1134 1135 /* If that concludes erasure of the last leaf, then delete the entire 1136 * internal array. 1137 */ 1138 if (array->nr_leaves_on_tree == 1) { 1139 edit->set[1].ptr = &array->root; 1140 edit->set[1].to = NULL; 1141 edit->adjust_count_on = NULL; 1142 edit->excised_subtree = array->root; 1143 pr_devel("all gone\n"); 1144 return edit; 1145 } 1146 1147 /* However, we'd also like to clear up some metadata blocks if we 1148 * possibly can. 1149 * 1150 * We go for a simple algorithm of: if this node has FAN_OUT or fewer 1151 * leaves in it, then attempt to collapse it - and attempt to 1152 * recursively collapse up the tree. 1153 * 1154 * We could also try and collapse in partially filled subtrees to take 1155 * up space in this node. 1156 */ 1157 if (node->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) { 1158 struct assoc_array_node *parent, *grandparent; 1159 struct assoc_array_ptr *ptr; 1160 1161 /* First of all, we need to know if this node has metadata so 1162 * that we don't try collapsing if all the leaves are already 1163 * here. 1164 */ 1165 has_meta = false; 1166 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 1167 ptr = node->slots[i]; 1168 if (assoc_array_ptr_is_meta(ptr)) { 1169 has_meta = true; 1170 break; 1171 } 1172 } 1173 1174 pr_devel("leaves: %ld [m=%d]\n", 1175 node->nr_leaves_on_branch - 1, has_meta); 1176 1177 /* Look further up the tree to see if we can collapse this node 1178 * into a more proximal node too. 1179 */ 1180 parent = node; 1181 collapse_up: 1182 pr_devel("collapse subtree: %ld\n", parent->nr_leaves_on_branch); 1183 1184 ptr = parent->back_pointer; 1185 if (!ptr) 1186 goto do_collapse; 1187 if (assoc_array_ptr_is_shortcut(ptr)) { 1188 struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(ptr); 1189 ptr = s->back_pointer; 1190 if (!ptr) 1191 goto do_collapse; 1192 } 1193 1194 grandparent = assoc_array_ptr_to_node(ptr); 1195 if (grandparent->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) { 1196 parent = grandparent; 1197 goto collapse_up; 1198 } 1199 1200 do_collapse: 1201 /* There's no point collapsing if the original node has no meta 1202 * pointers to discard and if we didn't merge into one of that 1203 * node's ancestry. 1204 */ 1205 if (has_meta || parent != node) { 1206 node = parent; 1207 1208 /* Create a new node to collapse into */ 1209 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 1210 if (!new_n0) 1211 goto enomem; 1212 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 1213 1214 new_n0->back_pointer = node->back_pointer; 1215 new_n0->parent_slot = node->parent_slot; 1216 new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; 1217 edit->adjust_count_on = new_n0; 1218 1219 collapse.node = new_n0; 1220 collapse.skip_leaf = assoc_array_ptr_to_leaf(edit->dead_leaf); 1221 collapse.slot = 0; 1222 assoc_array_subtree_iterate(assoc_array_node_to_ptr(node), 1223 node->back_pointer, 1224 assoc_array_delete_collapse_iterator, 1225 &collapse); 1226 pr_devel("collapsed %d,%lu\n", collapse.slot, new_n0->nr_leaves_on_branch); 1227 BUG_ON(collapse.slot != new_n0->nr_leaves_on_branch - 1); 1228 1229 if (!node->back_pointer) { 1230 edit->set[1].ptr = &array->root; 1231 } else if (assoc_array_ptr_is_leaf(node->back_pointer)) { 1232 BUG(); 1233 } else if (assoc_array_ptr_is_node(node->back_pointer)) { 1234 struct assoc_array_node *p = 1235 assoc_array_ptr_to_node(node->back_pointer); 1236 edit->set[1].ptr = &p->slots[node->parent_slot]; 1237 } else if (assoc_array_ptr_is_shortcut(node->back_pointer)) { 1238 struct assoc_array_shortcut *s = 1239 assoc_array_ptr_to_shortcut(node->back_pointer); 1240 edit->set[1].ptr = &s->next_node; 1241 } 1242 edit->set[1].to = assoc_array_node_to_ptr(new_n0); 1243 edit->excised_subtree = assoc_array_node_to_ptr(node); 1244 } 1245 } 1246 1247 return edit; 1248 1249 enomem: 1250 /* Clean up after an out of memory error */ 1251 pr_devel("enomem\n"); 1252 assoc_array_cancel_edit(edit); 1253 return ERR_PTR(-ENOMEM); 1254 } 1255 1256 /** 1257 * assoc_array_clear - Script deletion of all objects from an associative array 1258 * @array: The array to clear. 1259 * @ops: The operations to use. 1260 * 1261 * Precalculate and preallocate a script for the deletion of all the objects 1262 * from an associative array. This results in an edit script that can either 1263 * be applied or cancelled. 1264 * 1265 * The function returns a pointer to an edit script if there are objects to be 1266 * deleted, NULL if there are no objects in the array or -ENOMEM. 1267 * 1268 * The caller should lock against other modifications and must continue to hold 1269 * the lock until assoc_array_apply_edit() has been called. 1270 * 1271 * Accesses to the tree may take place concurrently with this function, 1272 * provided they hold the RCU read lock. 1273 */ 1274 struct assoc_array_edit *assoc_array_clear(struct assoc_array *array, 1275 const struct assoc_array_ops *ops) 1276 { 1277 struct assoc_array_edit *edit; 1278 1279 pr_devel("-->%s()\n", __func__); 1280 1281 if (!array->root) 1282 return NULL; 1283 1284 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); 1285 if (!edit) 1286 return ERR_PTR(-ENOMEM); 1287 edit->array = array; 1288 edit->ops = ops; 1289 edit->set[1].ptr = &array->root; 1290 edit->set[1].to = NULL; 1291 edit->excised_subtree = array->root; 1292 edit->ops_for_excised_subtree = ops; 1293 pr_devel("all gone\n"); 1294 return edit; 1295 } 1296 1297 /* 1298 * Handle the deferred destruction after an applied edit. 1299 */ 1300 static void assoc_array_rcu_cleanup(struct rcu_head *head) 1301 { 1302 struct assoc_array_edit *edit = 1303 container_of(head, struct assoc_array_edit, rcu); 1304 int i; 1305 1306 pr_devel("-->%s()\n", __func__); 1307 1308 if (edit->dead_leaf) 1309 edit->ops->free_object(assoc_array_ptr_to_leaf(edit->dead_leaf)); 1310 for (i = 0; i < ARRAY_SIZE(edit->excised_meta); i++) 1311 if (edit->excised_meta[i]) 1312 kfree(assoc_array_ptr_to_node(edit->excised_meta[i])); 1313 1314 if (edit->excised_subtree) { 1315 BUG_ON(assoc_array_ptr_is_leaf(edit->excised_subtree)); 1316 if (assoc_array_ptr_is_node(edit->excised_subtree)) { 1317 struct assoc_array_node *n = 1318 assoc_array_ptr_to_node(edit->excised_subtree); 1319 n->back_pointer = NULL; 1320 } else { 1321 struct assoc_array_shortcut *s = 1322 assoc_array_ptr_to_shortcut(edit->excised_subtree); 1323 s->back_pointer = NULL; 1324 } 1325 assoc_array_destroy_subtree(edit->excised_subtree, 1326 edit->ops_for_excised_subtree); 1327 } 1328 1329 kfree(edit); 1330 } 1331 1332 /** 1333 * assoc_array_apply_edit - Apply an edit script to an associative array 1334 * @edit: The script to apply. 1335 * 1336 * Apply an edit script to an associative array to effect an insertion, 1337 * deletion or clearance. As the edit script includes preallocated memory, 1338 * this is guaranteed not to fail. 1339 * 1340 * The edit script, dead objects and dead metadata will be scheduled for 1341 * destruction after an RCU grace period to permit those doing read-only 1342 * accesses on the array to continue to do so under the RCU read lock whilst 1343 * the edit is taking place. 1344 */ 1345 void assoc_array_apply_edit(struct assoc_array_edit *edit) 1346 { 1347 struct assoc_array_shortcut *shortcut; 1348 struct assoc_array_node *node; 1349 struct assoc_array_ptr *ptr; 1350 int i; 1351 1352 pr_devel("-->%s()\n", __func__); 1353 1354 smp_wmb(); 1355 if (edit->leaf_p) 1356 *edit->leaf_p = edit->leaf; 1357 1358 smp_wmb(); 1359 for (i = 0; i < ARRAY_SIZE(edit->set_parent_slot); i++) 1360 if (edit->set_parent_slot[i].p) 1361 *edit->set_parent_slot[i].p = edit->set_parent_slot[i].to; 1362 1363 smp_wmb(); 1364 for (i = 0; i < ARRAY_SIZE(edit->set_backpointers); i++) 1365 if (edit->set_backpointers[i]) 1366 *edit->set_backpointers[i] = edit->set_backpointers_to; 1367 1368 smp_wmb(); 1369 for (i = 0; i < ARRAY_SIZE(edit->set); i++) 1370 if (edit->set[i].ptr) 1371 *edit->set[i].ptr = edit->set[i].to; 1372 1373 if (edit->array->root == NULL) { 1374 edit->array->nr_leaves_on_tree = 0; 1375 } else if (edit->adjust_count_on) { 1376 node = edit->adjust_count_on; 1377 for (;;) { 1378 node->nr_leaves_on_branch += edit->adjust_count_by; 1379 1380 ptr = node->back_pointer; 1381 if (!ptr) 1382 break; 1383 if (assoc_array_ptr_is_shortcut(ptr)) { 1384 shortcut = assoc_array_ptr_to_shortcut(ptr); 1385 ptr = shortcut->back_pointer; 1386 if (!ptr) 1387 break; 1388 } 1389 BUG_ON(!assoc_array_ptr_is_node(ptr)); 1390 node = assoc_array_ptr_to_node(ptr); 1391 } 1392 1393 edit->array->nr_leaves_on_tree += edit->adjust_count_by; 1394 } 1395 1396 call_rcu(&edit->rcu, assoc_array_rcu_cleanup); 1397 } 1398 1399 /** 1400 * assoc_array_cancel_edit - Discard an edit script. 1401 * @edit: The script to discard. 1402 * 1403 * Free an edit script and all the preallocated data it holds without making 1404 * any changes to the associative array it was intended for. 1405 * 1406 * NOTE! In the case of an insertion script, this does _not_ release the leaf 1407 * that was to be inserted. That is left to the caller. 1408 */ 1409 void assoc_array_cancel_edit(struct assoc_array_edit *edit) 1410 { 1411 struct assoc_array_ptr *ptr; 1412 int i; 1413 1414 pr_devel("-->%s()\n", __func__); 1415 1416 /* Clean up after an out of memory error */ 1417 for (i = 0; i < ARRAY_SIZE(edit->new_meta); i++) { 1418 ptr = edit->new_meta[i]; 1419 if (ptr) { 1420 if (assoc_array_ptr_is_node(ptr)) 1421 kfree(assoc_array_ptr_to_node(ptr)); 1422 else 1423 kfree(assoc_array_ptr_to_shortcut(ptr)); 1424 } 1425 } 1426 kfree(edit); 1427 } 1428 1429 /** 1430 * assoc_array_gc - Garbage collect an associative array. 1431 * @array: The array to clean. 1432 * @ops: The operations to use. 1433 * @iterator: A callback function to pass judgement on each object. 1434 * @iterator_data: Private data for the callback function. 1435 * 1436 * Collect garbage from an associative array and pack down the internal tree to 1437 * save memory. 1438 * 1439 * The iterator function is asked to pass judgement upon each object in the 1440 * array. If it returns false, the object is discard and if it returns true, 1441 * the object is kept. If it returns true, it must increment the object's 1442 * usage count (or whatever it needs to do to retain it) before returning. 1443 * 1444 * This function returns 0 if successful or -ENOMEM if out of memory. In the 1445 * latter case, the array is not changed. 1446 * 1447 * The caller should lock against other modifications and must continue to hold 1448 * the lock until assoc_array_apply_edit() has been called. 1449 * 1450 * Accesses to the tree may take place concurrently with this function, 1451 * provided they hold the RCU read lock. 1452 */ 1453 int assoc_array_gc(struct assoc_array *array, 1454 const struct assoc_array_ops *ops, 1455 bool (*iterator)(void *object, void *iterator_data), 1456 void *iterator_data) 1457 { 1458 struct assoc_array_shortcut *shortcut, *new_s; 1459 struct assoc_array_node *node, *new_n; 1460 struct assoc_array_edit *edit; 1461 struct assoc_array_ptr *cursor, *ptr; 1462 struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp; 1463 unsigned long nr_leaves_on_tree; 1464 bool retained; 1465 int keylen, slot, nr_free, next_slot, i; 1466 1467 pr_devel("-->%s()\n", __func__); 1468 1469 if (!array->root) 1470 return 0; 1471 1472 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); 1473 if (!edit) 1474 return -ENOMEM; 1475 edit->array = array; 1476 edit->ops = ops; 1477 edit->ops_for_excised_subtree = ops; 1478 edit->set[0].ptr = &array->root; 1479 edit->excised_subtree = array->root; 1480 1481 new_root = new_parent = NULL; 1482 new_ptr_pp = &new_root; 1483 cursor = array->root; 1484 1485 descend: 1486 /* If this point is a shortcut, then we need to duplicate it and 1487 * advance the target cursor. 1488 */ 1489 if (assoc_array_ptr_is_shortcut(cursor)) { 1490 shortcut = assoc_array_ptr_to_shortcut(cursor); 1491 keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); 1492 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 1493 new_s = kmalloc(struct_size(new_s, index_key, keylen), 1494 GFP_KERNEL); 1495 if (!new_s) 1496 goto enomem; 1497 pr_devel("dup shortcut %p -> %p\n", shortcut, new_s); 1498 memcpy(new_s, shortcut, struct_size(new_s, index_key, keylen)); 1499 new_s->back_pointer = new_parent; 1500 new_s->parent_slot = shortcut->parent_slot; 1501 *new_ptr_pp = new_parent = assoc_array_shortcut_to_ptr(new_s); 1502 new_ptr_pp = &new_s->next_node; 1503 cursor = shortcut->next_node; 1504 } 1505 1506 /* Duplicate the node at this position */ 1507 node = assoc_array_ptr_to_node(cursor); 1508 new_n = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 1509 if (!new_n) 1510 goto enomem; 1511 pr_devel("dup node %p -> %p\n", node, new_n); 1512 new_n->back_pointer = new_parent; 1513 new_n->parent_slot = node->parent_slot; 1514 *new_ptr_pp = new_parent = assoc_array_node_to_ptr(new_n); 1515 new_ptr_pp = NULL; 1516 slot = 0; 1517 1518 continue_node: 1519 /* Filter across any leaves and gc any subtrees */ 1520 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1521 ptr = node->slots[slot]; 1522 if (!ptr) 1523 continue; 1524 1525 if (assoc_array_ptr_is_leaf(ptr)) { 1526 if (iterator(assoc_array_ptr_to_leaf(ptr), 1527 iterator_data)) 1528 /* The iterator will have done any reference 1529 * counting on the object for us. 1530 */ 1531 new_n->slots[slot] = ptr; 1532 continue; 1533 } 1534 1535 new_ptr_pp = &new_n->slots[slot]; 1536 cursor = ptr; 1537 goto descend; 1538 } 1539 1540 retry_compress: 1541 pr_devel("-- compress node %p --\n", new_n); 1542 1543 /* Count up the number of empty slots in this node and work out the 1544 * subtree leaf count. 1545 */ 1546 new_n->nr_leaves_on_branch = 0; 1547 nr_free = 0; 1548 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1549 ptr = new_n->slots[slot]; 1550 if (!ptr) 1551 nr_free++; 1552 else if (assoc_array_ptr_is_leaf(ptr)) 1553 new_n->nr_leaves_on_branch++; 1554 } 1555 pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch); 1556 1557 /* See what we can fold in */ 1558 retained = false; 1559 next_slot = 0; 1560 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1561 struct assoc_array_shortcut *s; 1562 struct assoc_array_node *child; 1563 1564 ptr = new_n->slots[slot]; 1565 if (!ptr || assoc_array_ptr_is_leaf(ptr)) 1566 continue; 1567 1568 s = NULL; 1569 if (assoc_array_ptr_is_shortcut(ptr)) { 1570 s = assoc_array_ptr_to_shortcut(ptr); 1571 ptr = s->next_node; 1572 } 1573 1574 child = assoc_array_ptr_to_node(ptr); 1575 new_n->nr_leaves_on_branch += child->nr_leaves_on_branch; 1576 1577 if (child->nr_leaves_on_branch <= nr_free + 1) { 1578 /* Fold the child node into this one */ 1579 pr_devel("[%d] fold node %lu/%d [nx %d]\n", 1580 slot, child->nr_leaves_on_branch, nr_free + 1, 1581 next_slot); 1582 1583 /* We would already have reaped an intervening shortcut 1584 * on the way back up the tree. 1585 */ 1586 BUG_ON(s); 1587 1588 new_n->slots[slot] = NULL; 1589 nr_free++; 1590 if (slot < next_slot) 1591 next_slot = slot; 1592 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 1593 struct assoc_array_ptr *p = child->slots[i]; 1594 if (!p) 1595 continue; 1596 BUG_ON(assoc_array_ptr_is_meta(p)); 1597 while (new_n->slots[next_slot]) 1598 next_slot++; 1599 BUG_ON(next_slot >= ASSOC_ARRAY_FAN_OUT); 1600 new_n->slots[next_slot++] = p; 1601 nr_free--; 1602 } 1603 kfree(child); 1604 } else { 1605 pr_devel("[%d] retain node %lu/%d [nx %d]\n", 1606 slot, child->nr_leaves_on_branch, nr_free + 1, 1607 next_slot); 1608 retained = true; 1609 } 1610 } 1611 1612 if (retained && new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) { 1613 pr_devel("internal nodes remain despite enough space, retrying\n"); 1614 goto retry_compress; 1615 } 1616 pr_devel("after: %lu\n", new_n->nr_leaves_on_branch); 1617 1618 nr_leaves_on_tree = new_n->nr_leaves_on_branch; 1619 1620 /* Excise this node if it is singly occupied by a shortcut */ 1621 if (nr_free == ASSOC_ARRAY_FAN_OUT - 1) { 1622 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) 1623 if ((ptr = new_n->slots[slot])) 1624 break; 1625 1626 if (assoc_array_ptr_is_meta(ptr) && 1627 assoc_array_ptr_is_shortcut(ptr)) { 1628 pr_devel("excise node %p with 1 shortcut\n", new_n); 1629 new_s = assoc_array_ptr_to_shortcut(ptr); 1630 new_parent = new_n->back_pointer; 1631 slot = new_n->parent_slot; 1632 kfree(new_n); 1633 if (!new_parent) { 1634 new_s->back_pointer = NULL; 1635 new_s->parent_slot = 0; 1636 new_root = ptr; 1637 goto gc_complete; 1638 } 1639 1640 if (assoc_array_ptr_is_shortcut(new_parent)) { 1641 /* We can discard any preceding shortcut also */ 1642 struct assoc_array_shortcut *s = 1643 assoc_array_ptr_to_shortcut(new_parent); 1644 1645 pr_devel("excise preceding shortcut\n"); 1646 1647 new_parent = new_s->back_pointer = s->back_pointer; 1648 slot = new_s->parent_slot = s->parent_slot; 1649 kfree(s); 1650 if (!new_parent) { 1651 new_s->back_pointer = NULL; 1652 new_s->parent_slot = 0; 1653 new_root = ptr; 1654 goto gc_complete; 1655 } 1656 } 1657 1658 new_s->back_pointer = new_parent; 1659 new_s->parent_slot = slot; 1660 new_n = assoc_array_ptr_to_node(new_parent); 1661 new_n->slots[slot] = ptr; 1662 goto ascend_old_tree; 1663 } 1664 } 1665 1666 /* Excise any shortcuts we might encounter that point to nodes that 1667 * only contain leaves. 1668 */ 1669 ptr = new_n->back_pointer; 1670 if (!ptr) 1671 goto gc_complete; 1672 1673 if (assoc_array_ptr_is_shortcut(ptr)) { 1674 new_s = assoc_array_ptr_to_shortcut(ptr); 1675 new_parent = new_s->back_pointer; 1676 slot = new_s->parent_slot; 1677 1678 if (new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) { 1679 struct assoc_array_node *n; 1680 1681 pr_devel("excise shortcut\n"); 1682 new_n->back_pointer = new_parent; 1683 new_n->parent_slot = slot; 1684 kfree(new_s); 1685 if (!new_parent) { 1686 new_root = assoc_array_node_to_ptr(new_n); 1687 goto gc_complete; 1688 } 1689 1690 n = assoc_array_ptr_to_node(new_parent); 1691 n->slots[slot] = assoc_array_node_to_ptr(new_n); 1692 } 1693 } else { 1694 new_parent = ptr; 1695 } 1696 new_n = assoc_array_ptr_to_node(new_parent); 1697 1698 ascend_old_tree: 1699 ptr = node->back_pointer; 1700 if (assoc_array_ptr_is_shortcut(ptr)) { 1701 shortcut = assoc_array_ptr_to_shortcut(ptr); 1702 slot = shortcut->parent_slot; 1703 cursor = shortcut->back_pointer; 1704 if (!cursor) 1705 goto gc_complete; 1706 } else { 1707 slot = node->parent_slot; 1708 cursor = ptr; 1709 } 1710 BUG_ON(!cursor); 1711 node = assoc_array_ptr_to_node(cursor); 1712 slot++; 1713 goto continue_node; 1714 1715 gc_complete: 1716 edit->set[0].to = new_root; 1717 assoc_array_apply_edit(edit); 1718 array->nr_leaves_on_tree = nr_leaves_on_tree; 1719 return 0; 1720 1721 enomem: 1722 pr_devel("enomem\n"); 1723 assoc_array_destroy_subtree(new_root, edit->ops); 1724 kfree(edit); 1725 return -ENOMEM; 1726 } 1727