1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Maple Tree implementation 4 * Copyright (c) 2018-2022 Oracle Corporation 5 * Authors: Liam R. Howlett <Liam.Howlett@oracle.com> 6 * Matthew Wilcox <willy@infradead.org> 7 */ 8 9 /* 10 * DOC: Interesting implementation details of the Maple Tree 11 * 12 * Each node type has a number of slots for entries and a number of slots for 13 * pivots. In the case of dense nodes, the pivots are implied by the position 14 * and are simply the slot index + the minimum of the node. 15 * 16 * In regular B-Tree terms, pivots are called keys. The term pivot is used to 17 * indicate that the tree is specifying ranges, Pivots may appear in the 18 * subtree with an entry attached to the value where as keys are unique to a 19 * specific position of a B-tree. Pivot values are inclusive of the slot with 20 * the same index. 21 * 22 * 23 * The following illustrates the layout of a range64 nodes slots and pivots. 24 * 25 * 26 * Slots -> | 0 | 1 | 2 | ... | 12 | 13 | 14 | 15 | 27 * ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ 28 * │ │ │ │ │ │ │ │ └─ Implied maximum 29 * │ │ │ │ │ │ │ └─ Pivot 14 30 * │ │ │ │ │ │ └─ Pivot 13 31 * │ │ │ │ │ └─ Pivot 12 32 * │ │ │ │ └─ Pivot 11 33 * │ │ │ └─ Pivot 2 34 * │ │ └─ Pivot 1 35 * │ └─ Pivot 0 36 * └─ Implied minimum 37 * 38 * Slot contents: 39 * Internal (non-leaf) nodes contain pointers to other nodes. 40 * Leaf nodes contain entries. 41 * 42 * The location of interest is often referred to as an offset. All offsets have 43 * a slot, but the last offset has an implied pivot from the node above (or 44 * UINT_MAX for the root node. 45 * 46 * Ranges complicate certain write activities. When modifying any of 47 * the B-tree variants, it is known that one entry will either be added or 48 * deleted. When modifying the Maple Tree, one store operation may overwrite 49 * the entire data set, or one half of the tree, or the middle half of the tree. 50 * 51 */ 52 53 54 #include <linux/maple_tree.h> 55 #include <linux/xarray.h> 56 #include <linux/types.h> 57 #include <linux/export.h> 58 #include <linux/slab.h> 59 #include <linux/limits.h> 60 #include <asm/barrier.h> 61 62 #define CREATE_TRACE_POINTS 63 #include <trace/events/maple_tree.h> 64 65 #define MA_ROOT_PARENT 1 66 67 /* 68 * Maple state flags 69 * * MA_STATE_BULK - Bulk insert mode 70 * * MA_STATE_REBALANCE - Indicate a rebalance during bulk insert 71 * * MA_STATE_PREALLOC - Preallocated nodes, WARN_ON allocation 72 */ 73 #define MA_STATE_BULK 1 74 #define MA_STATE_REBALANCE 2 75 #define MA_STATE_PREALLOC 4 76 77 #define ma_parent_ptr(x) ((struct maple_pnode *)(x)) 78 #define mas_tree_parent(x) ((unsigned long)(x->tree) | MA_ROOT_PARENT) 79 #define ma_mnode_ptr(x) ((struct maple_node *)(x)) 80 #define ma_enode_ptr(x) ((struct maple_enode *)(x)) 81 static struct kmem_cache *maple_node_cache; 82 83 #ifdef CONFIG_DEBUG_MAPLE_TREE 84 static const unsigned long mt_max[] = { 85 [maple_dense] = MAPLE_NODE_SLOTS, 86 [maple_leaf_64] = ULONG_MAX, 87 [maple_range_64] = ULONG_MAX, 88 [maple_arange_64] = ULONG_MAX, 89 }; 90 #define mt_node_max(x) mt_max[mte_node_type(x)] 91 #endif 92 93 static const unsigned char mt_slots[] = { 94 [maple_dense] = MAPLE_NODE_SLOTS, 95 [maple_leaf_64] = MAPLE_RANGE64_SLOTS, 96 [maple_range_64] = MAPLE_RANGE64_SLOTS, 97 [maple_arange_64] = MAPLE_ARANGE64_SLOTS, 98 }; 99 #define mt_slot_count(x) mt_slots[mte_node_type(x)] 100 101 static const unsigned char mt_pivots[] = { 102 [maple_dense] = 0, 103 [maple_leaf_64] = MAPLE_RANGE64_SLOTS - 1, 104 [maple_range_64] = MAPLE_RANGE64_SLOTS - 1, 105 [maple_arange_64] = MAPLE_ARANGE64_SLOTS - 1, 106 }; 107 #define mt_pivot_count(x) mt_pivots[mte_node_type(x)] 108 109 static const unsigned char mt_min_slots[] = { 110 [maple_dense] = MAPLE_NODE_SLOTS / 2, 111 [maple_leaf_64] = (MAPLE_RANGE64_SLOTS / 2) - 2, 112 [maple_range_64] = (MAPLE_RANGE64_SLOTS / 2) - 2, 113 [maple_arange_64] = (MAPLE_ARANGE64_SLOTS / 2) - 1, 114 }; 115 #define mt_min_slot_count(x) mt_min_slots[mte_node_type(x)] 116 117 #define MAPLE_BIG_NODE_SLOTS (MAPLE_RANGE64_SLOTS * 2 + 2) 118 #define MAPLE_BIG_NODE_GAPS (MAPLE_ARANGE64_SLOTS * 2 + 1) 119 120 struct maple_big_node { 121 struct maple_pnode *parent; 122 unsigned long pivot[MAPLE_BIG_NODE_SLOTS - 1]; 123 union { 124 struct maple_enode *slot[MAPLE_BIG_NODE_SLOTS]; 125 struct { 126 unsigned long padding[MAPLE_BIG_NODE_GAPS]; 127 unsigned long gap[MAPLE_BIG_NODE_GAPS]; 128 }; 129 }; 130 unsigned char b_end; 131 enum maple_type type; 132 }; 133 134 /* 135 * The maple_subtree_state is used to build a tree to replace a segment of an 136 * existing tree in a more atomic way. Any walkers of the older tree will hit a 137 * dead node and restart on updates. 138 */ 139 struct maple_subtree_state { 140 struct ma_state *orig_l; /* Original left side of subtree */ 141 struct ma_state *orig_r; /* Original right side of subtree */ 142 struct ma_state *l; /* New left side of subtree */ 143 struct ma_state *m; /* New middle of subtree (rare) */ 144 struct ma_state *r; /* New right side of subtree */ 145 struct ma_topiary *free; /* nodes to be freed */ 146 struct ma_topiary *destroy; /* Nodes to be destroyed (walked and freed) */ 147 struct maple_big_node *bn; 148 }; 149 150 #ifdef CONFIG_KASAN_STACK 151 /* Prevent mas_wr_bnode() from exceeding the stack frame limit */ 152 #define noinline_for_kasan noinline_for_stack 153 #else 154 #define noinline_for_kasan inline 155 #endif 156 157 /* Functions */ 158 static inline struct maple_node *mt_alloc_one(gfp_t gfp) 159 { 160 return kmem_cache_alloc(maple_node_cache, gfp); 161 } 162 163 static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes) 164 { 165 return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes); 166 } 167 168 static inline void mt_free_bulk(size_t size, void __rcu **nodes) 169 { 170 kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes); 171 } 172 173 static void mt_free_rcu(struct rcu_head *head) 174 { 175 struct maple_node *node = container_of(head, struct maple_node, rcu); 176 177 kmem_cache_free(maple_node_cache, node); 178 } 179 180 /* 181 * ma_free_rcu() - Use rcu callback to free a maple node 182 * @node: The node to free 183 * 184 * The maple tree uses the parent pointer to indicate this node is no longer in 185 * use and will be freed. 186 */ 187 static void ma_free_rcu(struct maple_node *node) 188 { 189 WARN_ON(node->parent != ma_parent_ptr(node)); 190 call_rcu(&node->rcu, mt_free_rcu); 191 } 192 193 static void mas_set_height(struct ma_state *mas) 194 { 195 unsigned int new_flags = mas->tree->ma_flags; 196 197 new_flags &= ~MT_FLAGS_HEIGHT_MASK; 198 MAS_BUG_ON(mas, mas->depth > MAPLE_HEIGHT_MAX); 199 new_flags |= mas->depth << MT_FLAGS_HEIGHT_OFFSET; 200 mas->tree->ma_flags = new_flags; 201 } 202 203 static unsigned int mas_mt_height(struct ma_state *mas) 204 { 205 return mt_height(mas->tree); 206 } 207 208 static inline enum maple_type mte_node_type(const struct maple_enode *entry) 209 { 210 return ((unsigned long)entry >> MAPLE_NODE_TYPE_SHIFT) & 211 MAPLE_NODE_TYPE_MASK; 212 } 213 214 static inline bool ma_is_dense(const enum maple_type type) 215 { 216 return type < maple_leaf_64; 217 } 218 219 static inline bool ma_is_leaf(const enum maple_type type) 220 { 221 return type < maple_range_64; 222 } 223 224 static inline bool mte_is_leaf(const struct maple_enode *entry) 225 { 226 return ma_is_leaf(mte_node_type(entry)); 227 } 228 229 /* 230 * We also reserve values with the bottom two bits set to '10' which are 231 * below 4096 232 */ 233 static inline bool mt_is_reserved(const void *entry) 234 { 235 return ((unsigned long)entry < MAPLE_RESERVED_RANGE) && 236 xa_is_internal(entry); 237 } 238 239 static inline void mas_set_err(struct ma_state *mas, long err) 240 { 241 mas->node = MA_ERROR(err); 242 } 243 244 static inline bool mas_is_ptr(const struct ma_state *mas) 245 { 246 return mas->node == MAS_ROOT; 247 } 248 249 static inline bool mas_is_start(const struct ma_state *mas) 250 { 251 return mas->node == MAS_START; 252 } 253 254 bool mas_is_err(struct ma_state *mas) 255 { 256 return xa_is_err(mas->node); 257 } 258 259 static __always_inline bool mas_is_overflow(struct ma_state *mas) 260 { 261 if (unlikely(mas->node == MAS_OVERFLOW)) 262 return true; 263 264 return false; 265 } 266 267 static __always_inline bool mas_is_underflow(struct ma_state *mas) 268 { 269 if (unlikely(mas->node == MAS_UNDERFLOW)) 270 return true; 271 272 return false; 273 } 274 275 static inline bool mas_searchable(struct ma_state *mas) 276 { 277 if (mas_is_none(mas)) 278 return false; 279 280 if (mas_is_ptr(mas)) 281 return false; 282 283 return true; 284 } 285 286 static inline struct maple_node *mte_to_node(const struct maple_enode *entry) 287 { 288 return (struct maple_node *)((unsigned long)entry & ~MAPLE_NODE_MASK); 289 } 290 291 /* 292 * mte_to_mat() - Convert a maple encoded node to a maple topiary node. 293 * @entry: The maple encoded node 294 * 295 * Return: a maple topiary pointer 296 */ 297 static inline struct maple_topiary *mte_to_mat(const struct maple_enode *entry) 298 { 299 return (struct maple_topiary *) 300 ((unsigned long)entry & ~MAPLE_NODE_MASK); 301 } 302 303 /* 304 * mas_mn() - Get the maple state node. 305 * @mas: The maple state 306 * 307 * Return: the maple node (not encoded - bare pointer). 308 */ 309 static inline struct maple_node *mas_mn(const struct ma_state *mas) 310 { 311 return mte_to_node(mas->node); 312 } 313 314 /* 315 * mte_set_node_dead() - Set a maple encoded node as dead. 316 * @mn: The maple encoded node. 317 */ 318 static inline void mte_set_node_dead(struct maple_enode *mn) 319 { 320 mte_to_node(mn)->parent = ma_parent_ptr(mte_to_node(mn)); 321 smp_wmb(); /* Needed for RCU */ 322 } 323 324 /* Bit 1 indicates the root is a node */ 325 #define MAPLE_ROOT_NODE 0x02 326 /* maple_type stored bit 3-6 */ 327 #define MAPLE_ENODE_TYPE_SHIFT 0x03 328 /* Bit 2 means a NULL somewhere below */ 329 #define MAPLE_ENODE_NULL 0x04 330 331 static inline struct maple_enode *mt_mk_node(const struct maple_node *node, 332 enum maple_type type) 333 { 334 return (void *)((unsigned long)node | 335 (type << MAPLE_ENODE_TYPE_SHIFT) | MAPLE_ENODE_NULL); 336 } 337 338 static inline void *mte_mk_root(const struct maple_enode *node) 339 { 340 return (void *)((unsigned long)node | MAPLE_ROOT_NODE); 341 } 342 343 static inline void *mte_safe_root(const struct maple_enode *node) 344 { 345 return (void *)((unsigned long)node & ~MAPLE_ROOT_NODE); 346 } 347 348 static inline void *mte_set_full(const struct maple_enode *node) 349 { 350 return (void *)((unsigned long)node & ~MAPLE_ENODE_NULL); 351 } 352 353 static inline void *mte_clear_full(const struct maple_enode *node) 354 { 355 return (void *)((unsigned long)node | MAPLE_ENODE_NULL); 356 } 357 358 static inline bool mte_has_null(const struct maple_enode *node) 359 { 360 return (unsigned long)node & MAPLE_ENODE_NULL; 361 } 362 363 static inline bool ma_is_root(struct maple_node *node) 364 { 365 return ((unsigned long)node->parent & MA_ROOT_PARENT); 366 } 367 368 static inline bool mte_is_root(const struct maple_enode *node) 369 { 370 return ma_is_root(mte_to_node(node)); 371 } 372 373 static inline bool mas_is_root_limits(const struct ma_state *mas) 374 { 375 return !mas->min && mas->max == ULONG_MAX; 376 } 377 378 static inline bool mt_is_alloc(struct maple_tree *mt) 379 { 380 return (mt->ma_flags & MT_FLAGS_ALLOC_RANGE); 381 } 382 383 /* 384 * The Parent Pointer 385 * Excluding root, the parent pointer is 256B aligned like all other tree nodes. 386 * When storing a 32 or 64 bit values, the offset can fit into 5 bits. The 16 387 * bit values need an extra bit to store the offset. This extra bit comes from 388 * a reuse of the last bit in the node type. This is possible by using bit 1 to 389 * indicate if bit 2 is part of the type or the slot. 390 * 391 * Note types: 392 * 0x??1 = Root 393 * 0x?00 = 16 bit nodes 394 * 0x010 = 32 bit nodes 395 * 0x110 = 64 bit nodes 396 * 397 * Slot size and alignment 398 * 0b??1 : Root 399 * 0b?00 : 16 bit values, type in 0-1, slot in 2-7 400 * 0b010 : 32 bit values, type in 0-2, slot in 3-7 401 * 0b110 : 64 bit values, type in 0-2, slot in 3-7 402 */ 403 404 #define MAPLE_PARENT_ROOT 0x01 405 406 #define MAPLE_PARENT_SLOT_SHIFT 0x03 407 #define MAPLE_PARENT_SLOT_MASK 0xF8 408 409 #define MAPLE_PARENT_16B_SLOT_SHIFT 0x02 410 #define MAPLE_PARENT_16B_SLOT_MASK 0xFC 411 412 #define MAPLE_PARENT_RANGE64 0x06 413 #define MAPLE_PARENT_RANGE32 0x04 414 #define MAPLE_PARENT_NOT_RANGE16 0x02 415 416 /* 417 * mte_parent_shift() - Get the parent shift for the slot storage. 418 * @parent: The parent pointer cast as an unsigned long 419 * Return: The shift into that pointer to the star to of the slot 420 */ 421 static inline unsigned long mte_parent_shift(unsigned long parent) 422 { 423 /* Note bit 1 == 0 means 16B */ 424 if (likely(parent & MAPLE_PARENT_NOT_RANGE16)) 425 return MAPLE_PARENT_SLOT_SHIFT; 426 427 return MAPLE_PARENT_16B_SLOT_SHIFT; 428 } 429 430 /* 431 * mte_parent_slot_mask() - Get the slot mask for the parent. 432 * @parent: The parent pointer cast as an unsigned long. 433 * Return: The slot mask for that parent. 434 */ 435 static inline unsigned long mte_parent_slot_mask(unsigned long parent) 436 { 437 /* Note bit 1 == 0 means 16B */ 438 if (likely(parent & MAPLE_PARENT_NOT_RANGE16)) 439 return MAPLE_PARENT_SLOT_MASK; 440 441 return MAPLE_PARENT_16B_SLOT_MASK; 442 } 443 444 /* 445 * mas_parent_type() - Return the maple_type of the parent from the stored 446 * parent type. 447 * @mas: The maple state 448 * @enode: The maple_enode to extract the parent's enum 449 * Return: The node->parent maple_type 450 */ 451 static inline 452 enum maple_type mas_parent_type(struct ma_state *mas, struct maple_enode *enode) 453 { 454 unsigned long p_type; 455 456 p_type = (unsigned long)mte_to_node(enode)->parent; 457 if (WARN_ON(p_type & MAPLE_PARENT_ROOT)) 458 return 0; 459 460 p_type &= MAPLE_NODE_MASK; 461 p_type &= ~mte_parent_slot_mask(p_type); 462 switch (p_type) { 463 case MAPLE_PARENT_RANGE64: /* or MAPLE_PARENT_ARANGE64 */ 464 if (mt_is_alloc(mas->tree)) 465 return maple_arange_64; 466 return maple_range_64; 467 } 468 469 return 0; 470 } 471 472 /* 473 * mas_set_parent() - Set the parent node and encode the slot 474 * @enode: The encoded maple node. 475 * @parent: The encoded maple node that is the parent of @enode. 476 * @slot: The slot that @enode resides in @parent. 477 * 478 * Slot number is encoded in the enode->parent bit 3-6 or 2-6, depending on the 479 * parent type. 480 */ 481 static inline 482 void mas_set_parent(struct ma_state *mas, struct maple_enode *enode, 483 const struct maple_enode *parent, unsigned char slot) 484 { 485 unsigned long val = (unsigned long)parent; 486 unsigned long shift; 487 unsigned long type; 488 enum maple_type p_type = mte_node_type(parent); 489 490 MAS_BUG_ON(mas, p_type == maple_dense); 491 MAS_BUG_ON(mas, p_type == maple_leaf_64); 492 493 switch (p_type) { 494 case maple_range_64: 495 case maple_arange_64: 496 shift = MAPLE_PARENT_SLOT_SHIFT; 497 type = MAPLE_PARENT_RANGE64; 498 break; 499 default: 500 case maple_dense: 501 case maple_leaf_64: 502 shift = type = 0; 503 break; 504 } 505 506 val &= ~MAPLE_NODE_MASK; /* Clear all node metadata in parent */ 507 val |= (slot << shift) | type; 508 mte_to_node(enode)->parent = ma_parent_ptr(val); 509 } 510 511 /* 512 * mte_parent_slot() - get the parent slot of @enode. 513 * @enode: The encoded maple node. 514 * 515 * Return: The slot in the parent node where @enode resides. 516 */ 517 static inline unsigned int mte_parent_slot(const struct maple_enode *enode) 518 { 519 unsigned long val = (unsigned long)mte_to_node(enode)->parent; 520 521 if (val & MA_ROOT_PARENT) 522 return 0; 523 524 /* 525 * Okay to use MAPLE_PARENT_16B_SLOT_MASK as the last bit will be lost 526 * by shift if the parent shift is MAPLE_PARENT_SLOT_SHIFT 527 */ 528 return (val & MAPLE_PARENT_16B_SLOT_MASK) >> mte_parent_shift(val); 529 } 530 531 /* 532 * mte_parent() - Get the parent of @node. 533 * @node: The encoded maple node. 534 * 535 * Return: The parent maple node. 536 */ 537 static inline struct maple_node *mte_parent(const struct maple_enode *enode) 538 { 539 return (void *)((unsigned long) 540 (mte_to_node(enode)->parent) & ~MAPLE_NODE_MASK); 541 } 542 543 /* 544 * ma_dead_node() - check if the @enode is dead. 545 * @enode: The encoded maple node 546 * 547 * Return: true if dead, false otherwise. 548 */ 549 static inline bool ma_dead_node(const struct maple_node *node) 550 { 551 struct maple_node *parent; 552 553 /* Do not reorder reads from the node prior to the parent check */ 554 smp_rmb(); 555 parent = (void *)((unsigned long) node->parent & ~MAPLE_NODE_MASK); 556 return (parent == node); 557 } 558 559 /* 560 * mte_dead_node() - check if the @enode is dead. 561 * @enode: The encoded maple node 562 * 563 * Return: true if dead, false otherwise. 564 */ 565 static inline bool mte_dead_node(const struct maple_enode *enode) 566 { 567 struct maple_node *parent, *node; 568 569 node = mte_to_node(enode); 570 /* Do not reorder reads from the node prior to the parent check */ 571 smp_rmb(); 572 parent = mte_parent(enode); 573 return (parent == node); 574 } 575 576 /* 577 * mas_allocated() - Get the number of nodes allocated in a maple state. 578 * @mas: The maple state 579 * 580 * The ma_state alloc member is overloaded to hold a pointer to the first 581 * allocated node or to the number of requested nodes to allocate. If bit 0 is 582 * set, then the alloc contains the number of requested nodes. If there is an 583 * allocated node, then the total allocated nodes is in that node. 584 * 585 * Return: The total number of nodes allocated 586 */ 587 static inline unsigned long mas_allocated(const struct ma_state *mas) 588 { 589 if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) 590 return 0; 591 592 return mas->alloc->total; 593 } 594 595 /* 596 * mas_set_alloc_req() - Set the requested number of allocations. 597 * @mas: the maple state 598 * @count: the number of allocations. 599 * 600 * The requested number of allocations is either in the first allocated node, 601 * located in @mas->alloc->request_count, or directly in @mas->alloc if there is 602 * no allocated node. Set the request either in the node or do the necessary 603 * encoding to store in @mas->alloc directly. 604 */ 605 static inline void mas_set_alloc_req(struct ma_state *mas, unsigned long count) 606 { 607 if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) { 608 if (!count) 609 mas->alloc = NULL; 610 else 611 mas->alloc = (struct maple_alloc *)(((count) << 1U) | 1U); 612 return; 613 } 614 615 mas->alloc->request_count = count; 616 } 617 618 /* 619 * mas_alloc_req() - get the requested number of allocations. 620 * @mas: The maple state 621 * 622 * The alloc count is either stored directly in @mas, or in 623 * @mas->alloc->request_count if there is at least one node allocated. Decode 624 * the request count if it's stored directly in @mas->alloc. 625 * 626 * Return: The allocation request count. 627 */ 628 static inline unsigned int mas_alloc_req(const struct ma_state *mas) 629 { 630 if ((unsigned long)mas->alloc & 0x1) 631 return (unsigned long)(mas->alloc) >> 1; 632 else if (mas->alloc) 633 return mas->alloc->request_count; 634 return 0; 635 } 636 637 /* 638 * ma_pivots() - Get a pointer to the maple node pivots. 639 * @node - the maple node 640 * @type - the node type 641 * 642 * In the event of a dead node, this array may be %NULL 643 * 644 * Return: A pointer to the maple node pivots 645 */ 646 static inline unsigned long *ma_pivots(struct maple_node *node, 647 enum maple_type type) 648 { 649 switch (type) { 650 case maple_arange_64: 651 return node->ma64.pivot; 652 case maple_range_64: 653 case maple_leaf_64: 654 return node->mr64.pivot; 655 case maple_dense: 656 return NULL; 657 } 658 return NULL; 659 } 660 661 /* 662 * ma_gaps() - Get a pointer to the maple node gaps. 663 * @node - the maple node 664 * @type - the node type 665 * 666 * Return: A pointer to the maple node gaps 667 */ 668 static inline unsigned long *ma_gaps(struct maple_node *node, 669 enum maple_type type) 670 { 671 switch (type) { 672 case maple_arange_64: 673 return node->ma64.gap; 674 case maple_range_64: 675 case maple_leaf_64: 676 case maple_dense: 677 return NULL; 678 } 679 return NULL; 680 } 681 682 /* 683 * mas_pivot() - Get the pivot at @piv of the maple encoded node. 684 * @mas: The maple state. 685 * @piv: The pivot. 686 * 687 * Return: the pivot at @piv of @mn. 688 */ 689 static inline unsigned long mas_pivot(struct ma_state *mas, unsigned char piv) 690 { 691 struct maple_node *node = mas_mn(mas); 692 enum maple_type type = mte_node_type(mas->node); 693 694 if (MAS_WARN_ON(mas, piv >= mt_pivots[type])) { 695 mas_set_err(mas, -EIO); 696 return 0; 697 } 698 699 switch (type) { 700 case maple_arange_64: 701 return node->ma64.pivot[piv]; 702 case maple_range_64: 703 case maple_leaf_64: 704 return node->mr64.pivot[piv]; 705 case maple_dense: 706 return 0; 707 } 708 return 0; 709 } 710 711 /* 712 * mas_safe_pivot() - get the pivot at @piv or mas->max. 713 * @mas: The maple state 714 * @pivots: The pointer to the maple node pivots 715 * @piv: The pivot to fetch 716 * @type: The maple node type 717 * 718 * Return: The pivot at @piv within the limit of the @pivots array, @mas->max 719 * otherwise. 720 */ 721 static inline unsigned long 722 mas_safe_pivot(const struct ma_state *mas, unsigned long *pivots, 723 unsigned char piv, enum maple_type type) 724 { 725 if (piv >= mt_pivots[type]) 726 return mas->max; 727 728 return pivots[piv]; 729 } 730 731 /* 732 * mas_safe_min() - Return the minimum for a given offset. 733 * @mas: The maple state 734 * @pivots: The pointer to the maple node pivots 735 * @offset: The offset into the pivot array 736 * 737 * Return: The minimum range value that is contained in @offset. 738 */ 739 static inline unsigned long 740 mas_safe_min(struct ma_state *mas, unsigned long *pivots, unsigned char offset) 741 { 742 if (likely(offset)) 743 return pivots[offset - 1] + 1; 744 745 return mas->min; 746 } 747 748 /* 749 * mte_set_pivot() - Set a pivot to a value in an encoded maple node. 750 * @mn: The encoded maple node 751 * @piv: The pivot offset 752 * @val: The value of the pivot 753 */ 754 static inline void mte_set_pivot(struct maple_enode *mn, unsigned char piv, 755 unsigned long val) 756 { 757 struct maple_node *node = mte_to_node(mn); 758 enum maple_type type = mte_node_type(mn); 759 760 BUG_ON(piv >= mt_pivots[type]); 761 switch (type) { 762 default: 763 case maple_range_64: 764 case maple_leaf_64: 765 node->mr64.pivot[piv] = val; 766 break; 767 case maple_arange_64: 768 node->ma64.pivot[piv] = val; 769 break; 770 case maple_dense: 771 break; 772 } 773 774 } 775 776 /* 777 * ma_slots() - Get a pointer to the maple node slots. 778 * @mn: The maple node 779 * @mt: The maple node type 780 * 781 * Return: A pointer to the maple node slots 782 */ 783 static inline void __rcu **ma_slots(struct maple_node *mn, enum maple_type mt) 784 { 785 switch (mt) { 786 default: 787 case maple_arange_64: 788 return mn->ma64.slot; 789 case maple_range_64: 790 case maple_leaf_64: 791 return mn->mr64.slot; 792 case maple_dense: 793 return mn->slot; 794 } 795 } 796 797 static inline bool mt_write_locked(const struct maple_tree *mt) 798 { 799 return mt_external_lock(mt) ? mt_write_lock_is_held(mt) : 800 lockdep_is_held(&mt->ma_lock); 801 } 802 803 static inline bool mt_locked(const struct maple_tree *mt) 804 { 805 return mt_external_lock(mt) ? mt_lock_is_held(mt) : 806 lockdep_is_held(&mt->ma_lock); 807 } 808 809 static inline void *mt_slot(const struct maple_tree *mt, 810 void __rcu **slots, unsigned char offset) 811 { 812 return rcu_dereference_check(slots[offset], mt_locked(mt)); 813 } 814 815 static inline void *mt_slot_locked(struct maple_tree *mt, void __rcu **slots, 816 unsigned char offset) 817 { 818 return rcu_dereference_protected(slots[offset], mt_write_locked(mt)); 819 } 820 /* 821 * mas_slot_locked() - Get the slot value when holding the maple tree lock. 822 * @mas: The maple state 823 * @slots: The pointer to the slots 824 * @offset: The offset into the slots array to fetch 825 * 826 * Return: The entry stored in @slots at the @offset. 827 */ 828 static inline void *mas_slot_locked(struct ma_state *mas, void __rcu **slots, 829 unsigned char offset) 830 { 831 return mt_slot_locked(mas->tree, slots, offset); 832 } 833 834 /* 835 * mas_slot() - Get the slot value when not holding the maple tree lock. 836 * @mas: The maple state 837 * @slots: The pointer to the slots 838 * @offset: The offset into the slots array to fetch 839 * 840 * Return: The entry stored in @slots at the @offset 841 */ 842 static inline void *mas_slot(struct ma_state *mas, void __rcu **slots, 843 unsigned char offset) 844 { 845 return mt_slot(mas->tree, slots, offset); 846 } 847 848 /* 849 * mas_root() - Get the maple tree root. 850 * @mas: The maple state. 851 * 852 * Return: The pointer to the root of the tree 853 */ 854 static inline void *mas_root(struct ma_state *mas) 855 { 856 return rcu_dereference_check(mas->tree->ma_root, mt_locked(mas->tree)); 857 } 858 859 static inline void *mt_root_locked(struct maple_tree *mt) 860 { 861 return rcu_dereference_protected(mt->ma_root, mt_write_locked(mt)); 862 } 863 864 /* 865 * mas_root_locked() - Get the maple tree root when holding the maple tree lock. 866 * @mas: The maple state. 867 * 868 * Return: The pointer to the root of the tree 869 */ 870 static inline void *mas_root_locked(struct ma_state *mas) 871 { 872 return mt_root_locked(mas->tree); 873 } 874 875 static inline struct maple_metadata *ma_meta(struct maple_node *mn, 876 enum maple_type mt) 877 { 878 switch (mt) { 879 case maple_arange_64: 880 return &mn->ma64.meta; 881 default: 882 return &mn->mr64.meta; 883 } 884 } 885 886 /* 887 * ma_set_meta() - Set the metadata information of a node. 888 * @mn: The maple node 889 * @mt: The maple node type 890 * @offset: The offset of the highest sub-gap in this node. 891 * @end: The end of the data in this node. 892 */ 893 static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt, 894 unsigned char offset, unsigned char end) 895 { 896 struct maple_metadata *meta = ma_meta(mn, mt); 897 898 meta->gap = offset; 899 meta->end = end; 900 } 901 902 /* 903 * mt_clear_meta() - clear the metadata information of a node, if it exists 904 * @mt: The maple tree 905 * @mn: The maple node 906 * @type: The maple node type 907 * @offset: The offset of the highest sub-gap in this node. 908 * @end: The end of the data in this node. 909 */ 910 static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn, 911 enum maple_type type) 912 { 913 struct maple_metadata *meta; 914 unsigned long *pivots; 915 void __rcu **slots; 916 void *next; 917 918 switch (type) { 919 case maple_range_64: 920 pivots = mn->mr64.pivot; 921 if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) { 922 slots = mn->mr64.slot; 923 next = mt_slot_locked(mt, slots, 924 MAPLE_RANGE64_SLOTS - 1); 925 if (unlikely((mte_to_node(next) && 926 mte_node_type(next)))) 927 return; /* no metadata, could be node */ 928 } 929 fallthrough; 930 case maple_arange_64: 931 meta = ma_meta(mn, type); 932 break; 933 default: 934 return; 935 } 936 937 meta->gap = 0; 938 meta->end = 0; 939 } 940 941 /* 942 * ma_meta_end() - Get the data end of a node from the metadata 943 * @mn: The maple node 944 * @mt: The maple node type 945 */ 946 static inline unsigned char ma_meta_end(struct maple_node *mn, 947 enum maple_type mt) 948 { 949 struct maple_metadata *meta = ma_meta(mn, mt); 950 951 return meta->end; 952 } 953 954 /* 955 * ma_meta_gap() - Get the largest gap location of a node from the metadata 956 * @mn: The maple node 957 * @mt: The maple node type 958 */ 959 static inline unsigned char ma_meta_gap(struct maple_node *mn, 960 enum maple_type mt) 961 { 962 return mn->ma64.meta.gap; 963 } 964 965 /* 966 * ma_set_meta_gap() - Set the largest gap location in a nodes metadata 967 * @mn: The maple node 968 * @mn: The maple node type 969 * @offset: The location of the largest gap. 970 */ 971 static inline void ma_set_meta_gap(struct maple_node *mn, enum maple_type mt, 972 unsigned char offset) 973 { 974 975 struct maple_metadata *meta = ma_meta(mn, mt); 976 977 meta->gap = offset; 978 } 979 980 /* 981 * mat_add() - Add a @dead_enode to the ma_topiary of a list of dead nodes. 982 * @mat - the ma_topiary, a linked list of dead nodes. 983 * @dead_enode - the node to be marked as dead and added to the tail of the list 984 * 985 * Add the @dead_enode to the linked list in @mat. 986 */ 987 static inline void mat_add(struct ma_topiary *mat, 988 struct maple_enode *dead_enode) 989 { 990 mte_set_node_dead(dead_enode); 991 mte_to_mat(dead_enode)->next = NULL; 992 if (!mat->tail) { 993 mat->tail = mat->head = dead_enode; 994 return; 995 } 996 997 mte_to_mat(mat->tail)->next = dead_enode; 998 mat->tail = dead_enode; 999 } 1000 1001 static void mt_free_walk(struct rcu_head *head); 1002 static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, 1003 bool free); 1004 /* 1005 * mas_mat_destroy() - Free all nodes and subtrees in a dead list. 1006 * @mas - the maple state 1007 * @mat - the ma_topiary linked list of dead nodes to free. 1008 * 1009 * Destroy walk a dead list. 1010 */ 1011 static void mas_mat_destroy(struct ma_state *mas, struct ma_topiary *mat) 1012 { 1013 struct maple_enode *next; 1014 struct maple_node *node; 1015 bool in_rcu = mt_in_rcu(mas->tree); 1016 1017 while (mat->head) { 1018 next = mte_to_mat(mat->head)->next; 1019 node = mte_to_node(mat->head); 1020 mt_destroy_walk(mat->head, mas->tree, !in_rcu); 1021 if (in_rcu) 1022 call_rcu(&node->rcu, mt_free_walk); 1023 mat->head = next; 1024 } 1025 } 1026 /* 1027 * mas_descend() - Descend into the slot stored in the ma_state. 1028 * @mas - the maple state. 1029 * 1030 * Note: Not RCU safe, only use in write side or debug code. 1031 */ 1032 static inline void mas_descend(struct ma_state *mas) 1033 { 1034 enum maple_type type; 1035 unsigned long *pivots; 1036 struct maple_node *node; 1037 void __rcu **slots; 1038 1039 node = mas_mn(mas); 1040 type = mte_node_type(mas->node); 1041 pivots = ma_pivots(node, type); 1042 slots = ma_slots(node, type); 1043 1044 if (mas->offset) 1045 mas->min = pivots[mas->offset - 1] + 1; 1046 mas->max = mas_safe_pivot(mas, pivots, mas->offset, type); 1047 mas->node = mas_slot(mas, slots, mas->offset); 1048 } 1049 1050 /* 1051 * mte_set_gap() - Set a maple node gap. 1052 * @mn: The encoded maple node 1053 * @gap: The offset of the gap to set 1054 * @val: The gap value 1055 */ 1056 static inline void mte_set_gap(const struct maple_enode *mn, 1057 unsigned char gap, unsigned long val) 1058 { 1059 switch (mte_node_type(mn)) { 1060 default: 1061 break; 1062 case maple_arange_64: 1063 mte_to_node(mn)->ma64.gap[gap] = val; 1064 break; 1065 } 1066 } 1067 1068 /* 1069 * mas_ascend() - Walk up a level of the tree. 1070 * @mas: The maple state 1071 * 1072 * Sets the @mas->max and @mas->min to the correct values when walking up. This 1073 * may cause several levels of walking up to find the correct min and max. 1074 * May find a dead node which will cause a premature return. 1075 * Return: 1 on dead node, 0 otherwise 1076 */ 1077 static int mas_ascend(struct ma_state *mas) 1078 { 1079 struct maple_enode *p_enode; /* parent enode. */ 1080 struct maple_enode *a_enode; /* ancestor enode. */ 1081 struct maple_node *a_node; /* ancestor node. */ 1082 struct maple_node *p_node; /* parent node. */ 1083 unsigned char a_slot; 1084 enum maple_type a_type; 1085 unsigned long min, max; 1086 unsigned long *pivots; 1087 bool set_max = false, set_min = false; 1088 1089 a_node = mas_mn(mas); 1090 if (ma_is_root(a_node)) { 1091 mas->offset = 0; 1092 return 0; 1093 } 1094 1095 p_node = mte_parent(mas->node); 1096 if (unlikely(a_node == p_node)) 1097 return 1; 1098 1099 a_type = mas_parent_type(mas, mas->node); 1100 mas->offset = mte_parent_slot(mas->node); 1101 a_enode = mt_mk_node(p_node, a_type); 1102 1103 /* Check to make sure all parent information is still accurate */ 1104 if (p_node != mte_parent(mas->node)) 1105 return 1; 1106 1107 mas->node = a_enode; 1108 1109 if (mte_is_root(a_enode)) { 1110 mas->max = ULONG_MAX; 1111 mas->min = 0; 1112 return 0; 1113 } 1114 1115 if (!mas->min) 1116 set_min = true; 1117 1118 if (mas->max == ULONG_MAX) 1119 set_max = true; 1120 1121 min = 0; 1122 max = ULONG_MAX; 1123 do { 1124 p_enode = a_enode; 1125 a_type = mas_parent_type(mas, p_enode); 1126 a_node = mte_parent(p_enode); 1127 a_slot = mte_parent_slot(p_enode); 1128 a_enode = mt_mk_node(a_node, a_type); 1129 pivots = ma_pivots(a_node, a_type); 1130 1131 if (unlikely(ma_dead_node(a_node))) 1132 return 1; 1133 1134 if (!set_min && a_slot) { 1135 set_min = true; 1136 min = pivots[a_slot - 1] + 1; 1137 } 1138 1139 if (!set_max && a_slot < mt_pivots[a_type]) { 1140 set_max = true; 1141 max = pivots[a_slot]; 1142 } 1143 1144 if (unlikely(ma_dead_node(a_node))) 1145 return 1; 1146 1147 if (unlikely(ma_is_root(a_node))) 1148 break; 1149 1150 } while (!set_min || !set_max); 1151 1152 mas->max = max; 1153 mas->min = min; 1154 return 0; 1155 } 1156 1157 /* 1158 * mas_pop_node() - Get a previously allocated maple node from the maple state. 1159 * @mas: The maple state 1160 * 1161 * Return: A pointer to a maple node. 1162 */ 1163 static inline struct maple_node *mas_pop_node(struct ma_state *mas) 1164 { 1165 struct maple_alloc *ret, *node = mas->alloc; 1166 unsigned long total = mas_allocated(mas); 1167 unsigned int req = mas_alloc_req(mas); 1168 1169 /* nothing or a request pending. */ 1170 if (WARN_ON(!total)) 1171 return NULL; 1172 1173 if (total == 1) { 1174 /* single allocation in this ma_state */ 1175 mas->alloc = NULL; 1176 ret = node; 1177 goto single_node; 1178 } 1179 1180 if (node->node_count == 1) { 1181 /* Single allocation in this node. */ 1182 mas->alloc = node->slot[0]; 1183 mas->alloc->total = node->total - 1; 1184 ret = node; 1185 goto new_head; 1186 } 1187 node->total--; 1188 ret = node->slot[--node->node_count]; 1189 node->slot[node->node_count] = NULL; 1190 1191 single_node: 1192 new_head: 1193 if (req) { 1194 req++; 1195 mas_set_alloc_req(mas, req); 1196 } 1197 1198 memset(ret, 0, sizeof(*ret)); 1199 return (struct maple_node *)ret; 1200 } 1201 1202 /* 1203 * mas_push_node() - Push a node back on the maple state allocation. 1204 * @mas: The maple state 1205 * @used: The used maple node 1206 * 1207 * Stores the maple node back into @mas->alloc for reuse. Updates allocated and 1208 * requested node count as necessary. 1209 */ 1210 static inline void mas_push_node(struct ma_state *mas, struct maple_node *used) 1211 { 1212 struct maple_alloc *reuse = (struct maple_alloc *)used; 1213 struct maple_alloc *head = mas->alloc; 1214 unsigned long count; 1215 unsigned int requested = mas_alloc_req(mas); 1216 1217 count = mas_allocated(mas); 1218 1219 reuse->request_count = 0; 1220 reuse->node_count = 0; 1221 if (count && (head->node_count < MAPLE_ALLOC_SLOTS)) { 1222 head->slot[head->node_count++] = reuse; 1223 head->total++; 1224 goto done; 1225 } 1226 1227 reuse->total = 1; 1228 if ((head) && !((unsigned long)head & 0x1)) { 1229 reuse->slot[0] = head; 1230 reuse->node_count = 1; 1231 reuse->total += head->total; 1232 } 1233 1234 mas->alloc = reuse; 1235 done: 1236 if (requested > 1) 1237 mas_set_alloc_req(mas, requested - 1); 1238 } 1239 1240 /* 1241 * mas_alloc_nodes() - Allocate nodes into a maple state 1242 * @mas: The maple state 1243 * @gfp: The GFP Flags 1244 */ 1245 static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp) 1246 { 1247 struct maple_alloc *node; 1248 unsigned long allocated = mas_allocated(mas); 1249 unsigned int requested = mas_alloc_req(mas); 1250 unsigned int count; 1251 void **slots = NULL; 1252 unsigned int max_req = 0; 1253 1254 if (!requested) 1255 return; 1256 1257 mas_set_alloc_req(mas, 0); 1258 if (mas->mas_flags & MA_STATE_PREALLOC) { 1259 if (allocated) 1260 return; 1261 WARN_ON(!allocated); 1262 } 1263 1264 if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS) { 1265 node = (struct maple_alloc *)mt_alloc_one(gfp); 1266 if (!node) 1267 goto nomem_one; 1268 1269 if (allocated) { 1270 node->slot[0] = mas->alloc; 1271 node->node_count = 1; 1272 } else { 1273 node->node_count = 0; 1274 } 1275 1276 mas->alloc = node; 1277 node->total = ++allocated; 1278 requested--; 1279 } 1280 1281 node = mas->alloc; 1282 node->request_count = 0; 1283 while (requested) { 1284 max_req = MAPLE_ALLOC_SLOTS - node->node_count; 1285 slots = (void **)&node->slot[node->node_count]; 1286 max_req = min(requested, max_req); 1287 count = mt_alloc_bulk(gfp, max_req, slots); 1288 if (!count) 1289 goto nomem_bulk; 1290 1291 if (node->node_count == 0) { 1292 node->slot[0]->node_count = 0; 1293 node->slot[0]->request_count = 0; 1294 } 1295 1296 node->node_count += count; 1297 allocated += count; 1298 node = node->slot[0]; 1299 requested -= count; 1300 } 1301 mas->alloc->total = allocated; 1302 return; 1303 1304 nomem_bulk: 1305 /* Clean up potential freed allocations on bulk failure */ 1306 memset(slots, 0, max_req * sizeof(unsigned long)); 1307 nomem_one: 1308 mas_set_alloc_req(mas, requested); 1309 if (mas->alloc && !(((unsigned long)mas->alloc & 0x1))) 1310 mas->alloc->total = allocated; 1311 mas_set_err(mas, -ENOMEM); 1312 } 1313 1314 /* 1315 * mas_free() - Free an encoded maple node 1316 * @mas: The maple state 1317 * @used: The encoded maple node to free. 1318 * 1319 * Uses rcu free if necessary, pushes @used back on the maple state allocations 1320 * otherwise. 1321 */ 1322 static inline void mas_free(struct ma_state *mas, struct maple_enode *used) 1323 { 1324 struct maple_node *tmp = mte_to_node(used); 1325 1326 if (mt_in_rcu(mas->tree)) 1327 ma_free_rcu(tmp); 1328 else 1329 mas_push_node(mas, tmp); 1330 } 1331 1332 /* 1333 * mas_node_count() - Check if enough nodes are allocated and request more if 1334 * there is not enough nodes. 1335 * @mas: The maple state 1336 * @count: The number of nodes needed 1337 * @gfp: the gfp flags 1338 */ 1339 static void mas_node_count_gfp(struct ma_state *mas, int count, gfp_t gfp) 1340 { 1341 unsigned long allocated = mas_allocated(mas); 1342 1343 if (allocated < count) { 1344 mas_set_alloc_req(mas, count - allocated); 1345 mas_alloc_nodes(mas, gfp); 1346 } 1347 } 1348 1349 /* 1350 * mas_node_count() - Check if enough nodes are allocated and request more if 1351 * there is not enough nodes. 1352 * @mas: The maple state 1353 * @count: The number of nodes needed 1354 * 1355 * Note: Uses GFP_NOWAIT | __GFP_NOWARN for gfp flags. 1356 */ 1357 static void mas_node_count(struct ma_state *mas, int count) 1358 { 1359 return mas_node_count_gfp(mas, count, GFP_NOWAIT | __GFP_NOWARN); 1360 } 1361 1362 /* 1363 * mas_start() - Sets up maple state for operations. 1364 * @mas: The maple state. 1365 * 1366 * If mas->node == MAS_START, then set the min, max and depth to 1367 * defaults. 1368 * 1369 * Return: 1370 * - If mas->node is an error or not MAS_START, return NULL. 1371 * - If it's an empty tree: NULL & mas->node == MAS_NONE 1372 * - If it's a single entry: The entry & mas->node == MAS_ROOT 1373 * - If it's a tree: NULL & mas->node == safe root node. 1374 */ 1375 static inline struct maple_enode *mas_start(struct ma_state *mas) 1376 { 1377 if (likely(mas_is_start(mas))) { 1378 struct maple_enode *root; 1379 1380 mas->min = 0; 1381 mas->max = ULONG_MAX; 1382 1383 retry: 1384 mas->depth = 0; 1385 root = mas_root(mas); 1386 /* Tree with nodes */ 1387 if (likely(xa_is_node(root))) { 1388 mas->depth = 1; 1389 mas->node = mte_safe_root(root); 1390 mas->offset = 0; 1391 if (mte_dead_node(mas->node)) 1392 goto retry; 1393 1394 return NULL; 1395 } 1396 1397 /* empty tree */ 1398 if (unlikely(!root)) { 1399 mas->node = MAS_NONE; 1400 mas->offset = MAPLE_NODE_SLOTS; 1401 return NULL; 1402 } 1403 1404 /* Single entry tree */ 1405 mas->node = MAS_ROOT; 1406 mas->offset = MAPLE_NODE_SLOTS; 1407 1408 /* Single entry tree. */ 1409 if (mas->index > 0) 1410 return NULL; 1411 1412 return root; 1413 } 1414 1415 return NULL; 1416 } 1417 1418 /* 1419 * ma_data_end() - Find the end of the data in a node. 1420 * @node: The maple node 1421 * @type: The maple node type 1422 * @pivots: The array of pivots in the node 1423 * @max: The maximum value in the node 1424 * 1425 * Uses metadata to find the end of the data when possible. 1426 * Return: The zero indexed last slot with data (may be null). 1427 */ 1428 static inline unsigned char ma_data_end(struct maple_node *node, 1429 enum maple_type type, 1430 unsigned long *pivots, 1431 unsigned long max) 1432 { 1433 unsigned char offset; 1434 1435 if (!pivots) 1436 return 0; 1437 1438 if (type == maple_arange_64) 1439 return ma_meta_end(node, type); 1440 1441 offset = mt_pivots[type] - 1; 1442 if (likely(!pivots[offset])) 1443 return ma_meta_end(node, type); 1444 1445 if (likely(pivots[offset] == max)) 1446 return offset; 1447 1448 return mt_pivots[type]; 1449 } 1450 1451 /* 1452 * mas_data_end() - Find the end of the data (slot). 1453 * @mas: the maple state 1454 * 1455 * This method is optimized to check the metadata of a node if the node type 1456 * supports data end metadata. 1457 * 1458 * Return: The zero indexed last slot with data (may be null). 1459 */ 1460 static inline unsigned char mas_data_end(struct ma_state *mas) 1461 { 1462 enum maple_type type; 1463 struct maple_node *node; 1464 unsigned char offset; 1465 unsigned long *pivots; 1466 1467 type = mte_node_type(mas->node); 1468 node = mas_mn(mas); 1469 if (type == maple_arange_64) 1470 return ma_meta_end(node, type); 1471 1472 pivots = ma_pivots(node, type); 1473 if (unlikely(ma_dead_node(node))) 1474 return 0; 1475 1476 offset = mt_pivots[type] - 1; 1477 if (likely(!pivots[offset])) 1478 return ma_meta_end(node, type); 1479 1480 if (likely(pivots[offset] == mas->max)) 1481 return offset; 1482 1483 return mt_pivots[type]; 1484 } 1485 1486 /* 1487 * mas_leaf_max_gap() - Returns the largest gap in a leaf node 1488 * @mas - the maple state 1489 * 1490 * Return: The maximum gap in the leaf. 1491 */ 1492 static unsigned long mas_leaf_max_gap(struct ma_state *mas) 1493 { 1494 enum maple_type mt; 1495 unsigned long pstart, gap, max_gap; 1496 struct maple_node *mn; 1497 unsigned long *pivots; 1498 void __rcu **slots; 1499 unsigned char i; 1500 unsigned char max_piv; 1501 1502 mt = mte_node_type(mas->node); 1503 mn = mas_mn(mas); 1504 slots = ma_slots(mn, mt); 1505 max_gap = 0; 1506 if (unlikely(ma_is_dense(mt))) { 1507 gap = 0; 1508 for (i = 0; i < mt_slots[mt]; i++) { 1509 if (slots[i]) { 1510 if (gap > max_gap) 1511 max_gap = gap; 1512 gap = 0; 1513 } else { 1514 gap++; 1515 } 1516 } 1517 if (gap > max_gap) 1518 max_gap = gap; 1519 return max_gap; 1520 } 1521 1522 /* 1523 * Check the first implied pivot optimizes the loop below and slot 1 may 1524 * be skipped if there is a gap in slot 0. 1525 */ 1526 pivots = ma_pivots(mn, mt); 1527 if (likely(!slots[0])) { 1528 max_gap = pivots[0] - mas->min + 1; 1529 i = 2; 1530 } else { 1531 i = 1; 1532 } 1533 1534 /* reduce max_piv as the special case is checked before the loop */ 1535 max_piv = ma_data_end(mn, mt, pivots, mas->max) - 1; 1536 /* 1537 * Check end implied pivot which can only be a gap on the right most 1538 * node. 1539 */ 1540 if (unlikely(mas->max == ULONG_MAX) && !slots[max_piv + 1]) { 1541 gap = ULONG_MAX - pivots[max_piv]; 1542 if (gap > max_gap) 1543 max_gap = gap; 1544 } 1545 1546 for (; i <= max_piv; i++) { 1547 /* data == no gap. */ 1548 if (likely(slots[i])) 1549 continue; 1550 1551 pstart = pivots[i - 1]; 1552 gap = pivots[i] - pstart; 1553 if (gap > max_gap) 1554 max_gap = gap; 1555 1556 /* There cannot be two gaps in a row. */ 1557 i++; 1558 } 1559 return max_gap; 1560 } 1561 1562 /* 1563 * ma_max_gap() - Get the maximum gap in a maple node (non-leaf) 1564 * @node: The maple node 1565 * @gaps: The pointer to the gaps 1566 * @mt: The maple node type 1567 * @*off: Pointer to store the offset location of the gap. 1568 * 1569 * Uses the metadata data end to scan backwards across set gaps. 1570 * 1571 * Return: The maximum gap value 1572 */ 1573 static inline unsigned long 1574 ma_max_gap(struct maple_node *node, unsigned long *gaps, enum maple_type mt, 1575 unsigned char *off) 1576 { 1577 unsigned char offset, i; 1578 unsigned long max_gap = 0; 1579 1580 i = offset = ma_meta_end(node, mt); 1581 do { 1582 if (gaps[i] > max_gap) { 1583 max_gap = gaps[i]; 1584 offset = i; 1585 } 1586 } while (i--); 1587 1588 *off = offset; 1589 return max_gap; 1590 } 1591 1592 /* 1593 * mas_max_gap() - find the largest gap in a non-leaf node and set the slot. 1594 * @mas: The maple state. 1595 * 1596 * Return: The gap value. 1597 */ 1598 static inline unsigned long mas_max_gap(struct ma_state *mas) 1599 { 1600 unsigned long *gaps; 1601 unsigned char offset; 1602 enum maple_type mt; 1603 struct maple_node *node; 1604 1605 mt = mte_node_type(mas->node); 1606 if (ma_is_leaf(mt)) 1607 return mas_leaf_max_gap(mas); 1608 1609 node = mas_mn(mas); 1610 MAS_BUG_ON(mas, mt != maple_arange_64); 1611 offset = ma_meta_gap(node, mt); 1612 gaps = ma_gaps(node, mt); 1613 return gaps[offset]; 1614 } 1615 1616 /* 1617 * mas_parent_gap() - Set the parent gap and any gaps above, as needed 1618 * @mas: The maple state 1619 * @offset: The gap offset in the parent to set 1620 * @new: The new gap value. 1621 * 1622 * Set the parent gap then continue to set the gap upwards, using the metadata 1623 * of the parent to see if it is necessary to check the node above. 1624 */ 1625 static inline void mas_parent_gap(struct ma_state *mas, unsigned char offset, 1626 unsigned long new) 1627 { 1628 unsigned long meta_gap = 0; 1629 struct maple_node *pnode; 1630 struct maple_enode *penode; 1631 unsigned long *pgaps; 1632 unsigned char meta_offset; 1633 enum maple_type pmt; 1634 1635 pnode = mte_parent(mas->node); 1636 pmt = mas_parent_type(mas, mas->node); 1637 penode = mt_mk_node(pnode, pmt); 1638 pgaps = ma_gaps(pnode, pmt); 1639 1640 ascend: 1641 MAS_BUG_ON(mas, pmt != maple_arange_64); 1642 meta_offset = ma_meta_gap(pnode, pmt); 1643 meta_gap = pgaps[meta_offset]; 1644 1645 pgaps[offset] = new; 1646 1647 if (meta_gap == new) 1648 return; 1649 1650 if (offset != meta_offset) { 1651 if (meta_gap > new) 1652 return; 1653 1654 ma_set_meta_gap(pnode, pmt, offset); 1655 } else if (new < meta_gap) { 1656 new = ma_max_gap(pnode, pgaps, pmt, &meta_offset); 1657 ma_set_meta_gap(pnode, pmt, meta_offset); 1658 } 1659 1660 if (ma_is_root(pnode)) 1661 return; 1662 1663 /* Go to the parent node. */ 1664 pnode = mte_parent(penode); 1665 pmt = mas_parent_type(mas, penode); 1666 pgaps = ma_gaps(pnode, pmt); 1667 offset = mte_parent_slot(penode); 1668 penode = mt_mk_node(pnode, pmt); 1669 goto ascend; 1670 } 1671 1672 /* 1673 * mas_update_gap() - Update a nodes gaps and propagate up if necessary. 1674 * @mas - the maple state. 1675 */ 1676 static inline void mas_update_gap(struct ma_state *mas) 1677 { 1678 unsigned char pslot; 1679 unsigned long p_gap; 1680 unsigned long max_gap; 1681 1682 if (!mt_is_alloc(mas->tree)) 1683 return; 1684 1685 if (mte_is_root(mas->node)) 1686 return; 1687 1688 max_gap = mas_max_gap(mas); 1689 1690 pslot = mte_parent_slot(mas->node); 1691 p_gap = ma_gaps(mte_parent(mas->node), 1692 mas_parent_type(mas, mas->node))[pslot]; 1693 1694 if (p_gap != max_gap) 1695 mas_parent_gap(mas, pslot, max_gap); 1696 } 1697 1698 /* 1699 * mas_adopt_children() - Set the parent pointer of all nodes in @parent to 1700 * @parent with the slot encoded. 1701 * @mas - the maple state (for the tree) 1702 * @parent - the maple encoded node containing the children. 1703 */ 1704 static inline void mas_adopt_children(struct ma_state *mas, 1705 struct maple_enode *parent) 1706 { 1707 enum maple_type type = mte_node_type(parent); 1708 struct maple_node *node = mte_to_node(parent); 1709 void __rcu **slots = ma_slots(node, type); 1710 unsigned long *pivots = ma_pivots(node, type); 1711 struct maple_enode *child; 1712 unsigned char offset; 1713 1714 offset = ma_data_end(node, type, pivots, mas->max); 1715 do { 1716 child = mas_slot_locked(mas, slots, offset); 1717 mas_set_parent(mas, child, parent, offset); 1718 } while (offset--); 1719 } 1720 1721 /* 1722 * mas_put_in_tree() - Put a new node in the tree, smp_wmb(), and mark the old 1723 * node as dead. 1724 * @mas - the maple state with the new node 1725 * @old_enode - The old maple encoded node to replace. 1726 */ 1727 static inline void mas_put_in_tree(struct ma_state *mas, 1728 struct maple_enode *old_enode) 1729 __must_hold(mas->tree->ma_lock) 1730 { 1731 unsigned char offset; 1732 void __rcu **slots; 1733 1734 if (mte_is_root(mas->node)) { 1735 mas_mn(mas)->parent = ma_parent_ptr(mas_tree_parent(mas)); 1736 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 1737 mas_set_height(mas); 1738 } else { 1739 1740 offset = mte_parent_slot(mas->node); 1741 slots = ma_slots(mte_parent(mas->node), 1742 mas_parent_type(mas, mas->node)); 1743 rcu_assign_pointer(slots[offset], mas->node); 1744 } 1745 1746 mte_set_node_dead(old_enode); 1747 } 1748 1749 /* 1750 * mas_replace_node() - Replace a node by putting it in the tree, marking it 1751 * dead, and freeing it. 1752 * the parent encoding to locate the maple node in the tree. 1753 * @mas - the ma_state with @mas->node pointing to the new node. 1754 * @old_enode - The old maple encoded node. 1755 */ 1756 static inline void mas_replace_node(struct ma_state *mas, 1757 struct maple_enode *old_enode) 1758 __must_hold(mas->tree->ma_lock) 1759 { 1760 mas_put_in_tree(mas, old_enode); 1761 mas_free(mas, old_enode); 1762 } 1763 1764 /* 1765 * mas_find_child() - Find a child who has the parent @mas->node. 1766 * @mas: the maple state with the parent. 1767 * @child: the maple state to store the child. 1768 */ 1769 static inline bool mas_find_child(struct ma_state *mas, struct ma_state *child) 1770 __must_hold(mas->tree->ma_lock) 1771 { 1772 enum maple_type mt; 1773 unsigned char offset; 1774 unsigned char end; 1775 unsigned long *pivots; 1776 struct maple_enode *entry; 1777 struct maple_node *node; 1778 void __rcu **slots; 1779 1780 mt = mte_node_type(mas->node); 1781 node = mas_mn(mas); 1782 slots = ma_slots(node, mt); 1783 pivots = ma_pivots(node, mt); 1784 end = ma_data_end(node, mt, pivots, mas->max); 1785 for (offset = mas->offset; offset <= end; offset++) { 1786 entry = mas_slot_locked(mas, slots, offset); 1787 if (mte_parent(entry) == node) { 1788 *child = *mas; 1789 mas->offset = offset + 1; 1790 child->offset = offset; 1791 mas_descend(child); 1792 child->offset = 0; 1793 return true; 1794 } 1795 } 1796 return false; 1797 } 1798 1799 /* 1800 * mab_shift_right() - Shift the data in mab right. Note, does not clean out the 1801 * old data or set b_node->b_end. 1802 * @b_node: the maple_big_node 1803 * @shift: the shift count 1804 */ 1805 static inline void mab_shift_right(struct maple_big_node *b_node, 1806 unsigned char shift) 1807 { 1808 unsigned long size = b_node->b_end * sizeof(unsigned long); 1809 1810 memmove(b_node->pivot + shift, b_node->pivot, size); 1811 memmove(b_node->slot + shift, b_node->slot, size); 1812 if (b_node->type == maple_arange_64) 1813 memmove(b_node->gap + shift, b_node->gap, size); 1814 } 1815 1816 /* 1817 * mab_middle_node() - Check if a middle node is needed (unlikely) 1818 * @b_node: the maple_big_node that contains the data. 1819 * @size: the amount of data in the b_node 1820 * @split: the potential split location 1821 * @slot_count: the size that can be stored in a single node being considered. 1822 * 1823 * Return: true if a middle node is required. 1824 */ 1825 static inline bool mab_middle_node(struct maple_big_node *b_node, int split, 1826 unsigned char slot_count) 1827 { 1828 unsigned char size = b_node->b_end; 1829 1830 if (size >= 2 * slot_count) 1831 return true; 1832 1833 if (!b_node->slot[split] && (size >= 2 * slot_count - 1)) 1834 return true; 1835 1836 return false; 1837 } 1838 1839 /* 1840 * mab_no_null_split() - ensure the split doesn't fall on a NULL 1841 * @b_node: the maple_big_node with the data 1842 * @split: the suggested split location 1843 * @slot_count: the number of slots in the node being considered. 1844 * 1845 * Return: the split location. 1846 */ 1847 static inline int mab_no_null_split(struct maple_big_node *b_node, 1848 unsigned char split, unsigned char slot_count) 1849 { 1850 if (!b_node->slot[split]) { 1851 /* 1852 * If the split is less than the max slot && the right side will 1853 * still be sufficient, then increment the split on NULL. 1854 */ 1855 if ((split < slot_count - 1) && 1856 (b_node->b_end - split) > (mt_min_slots[b_node->type])) 1857 split++; 1858 else 1859 split--; 1860 } 1861 return split; 1862 } 1863 1864 /* 1865 * mab_calc_split() - Calculate the split location and if there needs to be two 1866 * splits. 1867 * @bn: The maple_big_node with the data 1868 * @mid_split: The second split, if required. 0 otherwise. 1869 * 1870 * Return: The first split location. The middle split is set in @mid_split. 1871 */ 1872 static inline int mab_calc_split(struct ma_state *mas, 1873 struct maple_big_node *bn, unsigned char *mid_split, unsigned long min) 1874 { 1875 unsigned char b_end = bn->b_end; 1876 int split = b_end / 2; /* Assume equal split. */ 1877 unsigned char slot_min, slot_count = mt_slots[bn->type]; 1878 1879 /* 1880 * To support gap tracking, all NULL entries are kept together and a node cannot 1881 * end on a NULL entry, with the exception of the left-most leaf. The 1882 * limitation means that the split of a node must be checked for this condition 1883 * and be able to put more data in one direction or the other. 1884 */ 1885 if (unlikely((mas->mas_flags & MA_STATE_BULK))) { 1886 *mid_split = 0; 1887 split = b_end - mt_min_slots[bn->type]; 1888 1889 if (!ma_is_leaf(bn->type)) 1890 return split; 1891 1892 mas->mas_flags |= MA_STATE_REBALANCE; 1893 if (!bn->slot[split]) 1894 split--; 1895 return split; 1896 } 1897 1898 /* 1899 * Although extremely rare, it is possible to enter what is known as the 3-way 1900 * split scenario. The 3-way split comes about by means of a store of a range 1901 * that overwrites the end and beginning of two full nodes. The result is a set 1902 * of entries that cannot be stored in 2 nodes. Sometimes, these two nodes can 1903 * also be located in different parent nodes which are also full. This can 1904 * carry upwards all the way to the root in the worst case. 1905 */ 1906 if (unlikely(mab_middle_node(bn, split, slot_count))) { 1907 split = b_end / 3; 1908 *mid_split = split * 2; 1909 } else { 1910 slot_min = mt_min_slots[bn->type]; 1911 1912 *mid_split = 0; 1913 /* 1914 * Avoid having a range less than the slot count unless it 1915 * causes one node to be deficient. 1916 * NOTE: mt_min_slots is 1 based, b_end and split are zero. 1917 */ 1918 while ((split < slot_count - 1) && 1919 ((bn->pivot[split] - min) < slot_count - 1) && 1920 (b_end - split > slot_min)) 1921 split++; 1922 } 1923 1924 /* Avoid ending a node on a NULL entry */ 1925 split = mab_no_null_split(bn, split, slot_count); 1926 1927 if (unlikely(*mid_split)) 1928 *mid_split = mab_no_null_split(bn, *mid_split, slot_count); 1929 1930 return split; 1931 } 1932 1933 /* 1934 * mas_mab_cp() - Copy data from a maple state inclusively to a maple_big_node 1935 * and set @b_node->b_end to the next free slot. 1936 * @mas: The maple state 1937 * @mas_start: The starting slot to copy 1938 * @mas_end: The end slot to copy (inclusively) 1939 * @b_node: The maple_big_node to place the data 1940 * @mab_start: The starting location in maple_big_node to store the data. 1941 */ 1942 static inline void mas_mab_cp(struct ma_state *mas, unsigned char mas_start, 1943 unsigned char mas_end, struct maple_big_node *b_node, 1944 unsigned char mab_start) 1945 { 1946 enum maple_type mt; 1947 struct maple_node *node; 1948 void __rcu **slots; 1949 unsigned long *pivots, *gaps; 1950 int i = mas_start, j = mab_start; 1951 unsigned char piv_end; 1952 1953 node = mas_mn(mas); 1954 mt = mte_node_type(mas->node); 1955 pivots = ma_pivots(node, mt); 1956 if (!i) { 1957 b_node->pivot[j] = pivots[i++]; 1958 if (unlikely(i > mas_end)) 1959 goto complete; 1960 j++; 1961 } 1962 1963 piv_end = min(mas_end, mt_pivots[mt]); 1964 for (; i < piv_end; i++, j++) { 1965 b_node->pivot[j] = pivots[i]; 1966 if (unlikely(!b_node->pivot[j])) 1967 break; 1968 1969 if (unlikely(mas->max == b_node->pivot[j])) 1970 goto complete; 1971 } 1972 1973 if (likely(i <= mas_end)) 1974 b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt); 1975 1976 complete: 1977 b_node->b_end = ++j; 1978 j -= mab_start; 1979 slots = ma_slots(node, mt); 1980 memcpy(b_node->slot + mab_start, slots + mas_start, sizeof(void *) * j); 1981 if (!ma_is_leaf(mt) && mt_is_alloc(mas->tree)) { 1982 gaps = ma_gaps(node, mt); 1983 memcpy(b_node->gap + mab_start, gaps + mas_start, 1984 sizeof(unsigned long) * j); 1985 } 1986 } 1987 1988 /* 1989 * mas_leaf_set_meta() - Set the metadata of a leaf if possible. 1990 * @mas: The maple state 1991 * @node: The maple node 1992 * @pivots: pointer to the maple node pivots 1993 * @mt: The maple type 1994 * @end: The assumed end 1995 * 1996 * Note, end may be incremented within this function but not modified at the 1997 * source. This is fine since the metadata is the last thing to be stored in a 1998 * node during a write. 1999 */ 2000 static inline void mas_leaf_set_meta(struct ma_state *mas, 2001 struct maple_node *node, unsigned long *pivots, 2002 enum maple_type mt, unsigned char end) 2003 { 2004 /* There is no room for metadata already */ 2005 if (mt_pivots[mt] <= end) 2006 return; 2007 2008 if (pivots[end] && pivots[end] < mas->max) 2009 end++; 2010 2011 if (end < mt_slots[mt] - 1) 2012 ma_set_meta(node, mt, 0, end); 2013 } 2014 2015 /* 2016 * mab_mas_cp() - Copy data from maple_big_node to a maple encoded node. 2017 * @b_node: the maple_big_node that has the data 2018 * @mab_start: the start location in @b_node. 2019 * @mab_end: The end location in @b_node (inclusively) 2020 * @mas: The maple state with the maple encoded node. 2021 */ 2022 static inline void mab_mas_cp(struct maple_big_node *b_node, 2023 unsigned char mab_start, unsigned char mab_end, 2024 struct ma_state *mas, bool new_max) 2025 { 2026 int i, j = 0; 2027 enum maple_type mt = mte_node_type(mas->node); 2028 struct maple_node *node = mte_to_node(mas->node); 2029 void __rcu **slots = ma_slots(node, mt); 2030 unsigned long *pivots = ma_pivots(node, mt); 2031 unsigned long *gaps = NULL; 2032 unsigned char end; 2033 2034 if (mab_end - mab_start > mt_pivots[mt]) 2035 mab_end--; 2036 2037 if (!pivots[mt_pivots[mt] - 1]) 2038 slots[mt_pivots[mt]] = NULL; 2039 2040 i = mab_start; 2041 do { 2042 pivots[j++] = b_node->pivot[i++]; 2043 } while (i <= mab_end && likely(b_node->pivot[i])); 2044 2045 memcpy(slots, b_node->slot + mab_start, 2046 sizeof(void *) * (i - mab_start)); 2047 2048 if (new_max) 2049 mas->max = b_node->pivot[i - 1]; 2050 2051 end = j - 1; 2052 if (likely(!ma_is_leaf(mt) && mt_is_alloc(mas->tree))) { 2053 unsigned long max_gap = 0; 2054 unsigned char offset = 0; 2055 2056 gaps = ma_gaps(node, mt); 2057 do { 2058 gaps[--j] = b_node->gap[--i]; 2059 if (gaps[j] > max_gap) { 2060 offset = j; 2061 max_gap = gaps[j]; 2062 } 2063 } while (j); 2064 2065 ma_set_meta(node, mt, offset, end); 2066 } else { 2067 mas_leaf_set_meta(mas, node, pivots, mt, end); 2068 } 2069 } 2070 2071 /* 2072 * mas_bulk_rebalance() - Rebalance the end of a tree after a bulk insert. 2073 * @mas: The maple state 2074 * @end: The maple node end 2075 * @mt: The maple node type 2076 */ 2077 static inline void mas_bulk_rebalance(struct ma_state *mas, unsigned char end, 2078 enum maple_type mt) 2079 { 2080 if (!(mas->mas_flags & MA_STATE_BULK)) 2081 return; 2082 2083 if (mte_is_root(mas->node)) 2084 return; 2085 2086 if (end > mt_min_slots[mt]) { 2087 mas->mas_flags &= ~MA_STATE_REBALANCE; 2088 return; 2089 } 2090 } 2091 2092 /* 2093 * mas_store_b_node() - Store an @entry into the b_node while also copying the 2094 * data from a maple encoded node. 2095 * @wr_mas: the maple write state 2096 * @b_node: the maple_big_node to fill with data 2097 * @offset_end: the offset to end copying 2098 * 2099 * Return: The actual end of the data stored in @b_node 2100 */ 2101 static noinline_for_kasan void mas_store_b_node(struct ma_wr_state *wr_mas, 2102 struct maple_big_node *b_node, unsigned char offset_end) 2103 { 2104 unsigned char slot; 2105 unsigned char b_end; 2106 /* Possible underflow of piv will wrap back to 0 before use. */ 2107 unsigned long piv; 2108 struct ma_state *mas = wr_mas->mas; 2109 2110 b_node->type = wr_mas->type; 2111 b_end = 0; 2112 slot = mas->offset; 2113 if (slot) { 2114 /* Copy start data up to insert. */ 2115 mas_mab_cp(mas, 0, slot - 1, b_node, 0); 2116 b_end = b_node->b_end; 2117 piv = b_node->pivot[b_end - 1]; 2118 } else 2119 piv = mas->min - 1; 2120 2121 if (piv + 1 < mas->index) { 2122 /* Handle range starting after old range */ 2123 b_node->slot[b_end] = wr_mas->content; 2124 if (!wr_mas->content) 2125 b_node->gap[b_end] = mas->index - 1 - piv; 2126 b_node->pivot[b_end++] = mas->index - 1; 2127 } 2128 2129 /* Store the new entry. */ 2130 mas->offset = b_end; 2131 b_node->slot[b_end] = wr_mas->entry; 2132 b_node->pivot[b_end] = mas->last; 2133 2134 /* Appended. */ 2135 if (mas->last >= mas->max) 2136 goto b_end; 2137 2138 /* Handle new range ending before old range ends */ 2139 piv = mas_safe_pivot(mas, wr_mas->pivots, offset_end, wr_mas->type); 2140 if (piv > mas->last) { 2141 if (piv == ULONG_MAX) 2142 mas_bulk_rebalance(mas, b_node->b_end, wr_mas->type); 2143 2144 if (offset_end != slot) 2145 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 2146 offset_end); 2147 2148 b_node->slot[++b_end] = wr_mas->content; 2149 if (!wr_mas->content) 2150 b_node->gap[b_end] = piv - mas->last + 1; 2151 b_node->pivot[b_end] = piv; 2152 } 2153 2154 slot = offset_end + 1; 2155 if (slot > wr_mas->node_end) 2156 goto b_end; 2157 2158 /* Copy end data to the end of the node. */ 2159 mas_mab_cp(mas, slot, wr_mas->node_end + 1, b_node, ++b_end); 2160 b_node->b_end--; 2161 return; 2162 2163 b_end: 2164 b_node->b_end = b_end; 2165 } 2166 2167 /* 2168 * mas_prev_sibling() - Find the previous node with the same parent. 2169 * @mas: the maple state 2170 * 2171 * Return: True if there is a previous sibling, false otherwise. 2172 */ 2173 static inline bool mas_prev_sibling(struct ma_state *mas) 2174 { 2175 unsigned int p_slot = mte_parent_slot(mas->node); 2176 2177 if (mte_is_root(mas->node)) 2178 return false; 2179 2180 if (!p_slot) 2181 return false; 2182 2183 mas_ascend(mas); 2184 mas->offset = p_slot - 1; 2185 mas_descend(mas); 2186 return true; 2187 } 2188 2189 /* 2190 * mas_next_sibling() - Find the next node with the same parent. 2191 * @mas: the maple state 2192 * 2193 * Return: true if there is a next sibling, false otherwise. 2194 */ 2195 static inline bool mas_next_sibling(struct ma_state *mas) 2196 { 2197 MA_STATE(parent, mas->tree, mas->index, mas->last); 2198 2199 if (mte_is_root(mas->node)) 2200 return false; 2201 2202 parent = *mas; 2203 mas_ascend(&parent); 2204 parent.offset = mte_parent_slot(mas->node) + 1; 2205 if (parent.offset > mas_data_end(&parent)) 2206 return false; 2207 2208 *mas = parent; 2209 mas_descend(mas); 2210 return true; 2211 } 2212 2213 /* 2214 * mte_node_or_node() - Return the encoded node or MAS_NONE. 2215 * @enode: The encoded maple node. 2216 * 2217 * Shorthand to avoid setting %NULLs in the tree or maple_subtree_state. 2218 * 2219 * Return: @enode or MAS_NONE 2220 */ 2221 static inline struct maple_enode *mte_node_or_none(struct maple_enode *enode) 2222 { 2223 if (enode) 2224 return enode; 2225 2226 return ma_enode_ptr(MAS_NONE); 2227 } 2228 2229 /* 2230 * mas_wr_node_walk() - Find the correct offset for the index in the @mas. 2231 * If @mas->index cannot be found within the containing 2232 * node, we traverse to the last entry in the node. 2233 * @wr_mas: The maple write state 2234 * 2235 * Uses mas_slot_locked() and does not need to worry about dead nodes. 2236 */ 2237 static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas) 2238 { 2239 struct ma_state *mas = wr_mas->mas; 2240 unsigned char count, offset; 2241 2242 if (unlikely(ma_is_dense(wr_mas->type))) { 2243 wr_mas->r_max = wr_mas->r_min = mas->index; 2244 mas->offset = mas->index = mas->min; 2245 return; 2246 } 2247 2248 wr_mas->node = mas_mn(wr_mas->mas); 2249 wr_mas->pivots = ma_pivots(wr_mas->node, wr_mas->type); 2250 count = wr_mas->node_end = ma_data_end(wr_mas->node, wr_mas->type, 2251 wr_mas->pivots, mas->max); 2252 offset = mas->offset; 2253 2254 while (offset < count && mas->index > wr_mas->pivots[offset]) 2255 offset++; 2256 2257 wr_mas->r_max = offset < count ? wr_mas->pivots[offset] : mas->max; 2258 wr_mas->r_min = mas_safe_min(mas, wr_mas->pivots, offset); 2259 wr_mas->offset_end = mas->offset = offset; 2260 } 2261 2262 /* 2263 * mast_rebalance_next() - Rebalance against the next node 2264 * @mast: The maple subtree state 2265 * @old_r: The encoded maple node to the right (next node). 2266 */ 2267 static inline void mast_rebalance_next(struct maple_subtree_state *mast) 2268 { 2269 unsigned char b_end = mast->bn->b_end; 2270 2271 mas_mab_cp(mast->orig_r, 0, mt_slot_count(mast->orig_r->node), 2272 mast->bn, b_end); 2273 mast->orig_r->last = mast->orig_r->max; 2274 } 2275 2276 /* 2277 * mast_rebalance_prev() - Rebalance against the previous node 2278 * @mast: The maple subtree state 2279 * @old_l: The encoded maple node to the left (previous node) 2280 */ 2281 static inline void mast_rebalance_prev(struct maple_subtree_state *mast) 2282 { 2283 unsigned char end = mas_data_end(mast->orig_l) + 1; 2284 unsigned char b_end = mast->bn->b_end; 2285 2286 mab_shift_right(mast->bn, end); 2287 mas_mab_cp(mast->orig_l, 0, end - 1, mast->bn, 0); 2288 mast->l->min = mast->orig_l->min; 2289 mast->orig_l->index = mast->orig_l->min; 2290 mast->bn->b_end = end + b_end; 2291 mast->l->offset += end; 2292 } 2293 2294 /* 2295 * mast_spanning_rebalance() - Rebalance nodes with nearest neighbour favouring 2296 * the node to the right. Checking the nodes to the right then the left at each 2297 * level upwards until root is reached. 2298 * Data is copied into the @mast->bn. 2299 * @mast: The maple_subtree_state. 2300 */ 2301 static inline 2302 bool mast_spanning_rebalance(struct maple_subtree_state *mast) 2303 { 2304 struct ma_state r_tmp = *mast->orig_r; 2305 struct ma_state l_tmp = *mast->orig_l; 2306 unsigned char depth = 0; 2307 2308 r_tmp = *mast->orig_r; 2309 l_tmp = *mast->orig_l; 2310 do { 2311 mas_ascend(mast->orig_r); 2312 mas_ascend(mast->orig_l); 2313 depth++; 2314 if (mast->orig_r->offset < mas_data_end(mast->orig_r)) { 2315 mast->orig_r->offset++; 2316 do { 2317 mas_descend(mast->orig_r); 2318 mast->orig_r->offset = 0; 2319 } while (--depth); 2320 2321 mast_rebalance_next(mast); 2322 *mast->orig_l = l_tmp; 2323 return true; 2324 } else if (mast->orig_l->offset != 0) { 2325 mast->orig_l->offset--; 2326 do { 2327 mas_descend(mast->orig_l); 2328 mast->orig_l->offset = 2329 mas_data_end(mast->orig_l); 2330 } while (--depth); 2331 2332 mast_rebalance_prev(mast); 2333 *mast->orig_r = r_tmp; 2334 return true; 2335 } 2336 } while (!mte_is_root(mast->orig_r->node)); 2337 2338 *mast->orig_r = r_tmp; 2339 *mast->orig_l = l_tmp; 2340 return false; 2341 } 2342 2343 /* 2344 * mast_ascend() - Ascend the original left and right maple states. 2345 * @mast: the maple subtree state. 2346 * 2347 * Ascend the original left and right sides. Set the offsets to point to the 2348 * data already in the new tree (@mast->l and @mast->r). 2349 */ 2350 static inline void mast_ascend(struct maple_subtree_state *mast) 2351 { 2352 MA_WR_STATE(wr_mas, mast->orig_r, NULL); 2353 mas_ascend(mast->orig_l); 2354 mas_ascend(mast->orig_r); 2355 2356 mast->orig_r->offset = 0; 2357 mast->orig_r->index = mast->r->max; 2358 /* last should be larger than or equal to index */ 2359 if (mast->orig_r->last < mast->orig_r->index) 2360 mast->orig_r->last = mast->orig_r->index; 2361 2362 wr_mas.type = mte_node_type(mast->orig_r->node); 2363 mas_wr_node_walk(&wr_mas); 2364 /* Set up the left side of things */ 2365 mast->orig_l->offset = 0; 2366 mast->orig_l->index = mast->l->min; 2367 wr_mas.mas = mast->orig_l; 2368 wr_mas.type = mte_node_type(mast->orig_l->node); 2369 mas_wr_node_walk(&wr_mas); 2370 2371 mast->bn->type = wr_mas.type; 2372 } 2373 2374 /* 2375 * mas_new_ma_node() - Create and return a new maple node. Helper function. 2376 * @mas: the maple state with the allocations. 2377 * @b_node: the maple_big_node with the type encoding. 2378 * 2379 * Use the node type from the maple_big_node to allocate a new node from the 2380 * ma_state. This function exists mainly for code readability. 2381 * 2382 * Return: A new maple encoded node 2383 */ 2384 static inline struct maple_enode 2385 *mas_new_ma_node(struct ma_state *mas, struct maple_big_node *b_node) 2386 { 2387 return mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), b_node->type); 2388 } 2389 2390 /* 2391 * mas_mab_to_node() - Set up right and middle nodes 2392 * 2393 * @mas: the maple state that contains the allocations. 2394 * @b_node: the node which contains the data. 2395 * @left: The pointer which will have the left node 2396 * @right: The pointer which may have the right node 2397 * @middle: the pointer which may have the middle node (rare) 2398 * @mid_split: the split location for the middle node 2399 * 2400 * Return: the split of left. 2401 */ 2402 static inline unsigned char mas_mab_to_node(struct ma_state *mas, 2403 struct maple_big_node *b_node, struct maple_enode **left, 2404 struct maple_enode **right, struct maple_enode **middle, 2405 unsigned char *mid_split, unsigned long min) 2406 { 2407 unsigned char split = 0; 2408 unsigned char slot_count = mt_slots[b_node->type]; 2409 2410 *left = mas_new_ma_node(mas, b_node); 2411 *right = NULL; 2412 *middle = NULL; 2413 *mid_split = 0; 2414 2415 if (b_node->b_end < slot_count) { 2416 split = b_node->b_end; 2417 } else { 2418 split = mab_calc_split(mas, b_node, mid_split, min); 2419 *right = mas_new_ma_node(mas, b_node); 2420 } 2421 2422 if (*mid_split) 2423 *middle = mas_new_ma_node(mas, b_node); 2424 2425 return split; 2426 2427 } 2428 2429 /* 2430 * mab_set_b_end() - Add entry to b_node at b_node->b_end and increment the end 2431 * pointer. 2432 * @b_node - the big node to add the entry 2433 * @mas - the maple state to get the pivot (mas->max) 2434 * @entry - the entry to add, if NULL nothing happens. 2435 */ 2436 static inline void mab_set_b_end(struct maple_big_node *b_node, 2437 struct ma_state *mas, 2438 void *entry) 2439 { 2440 if (!entry) 2441 return; 2442 2443 b_node->slot[b_node->b_end] = entry; 2444 if (mt_is_alloc(mas->tree)) 2445 b_node->gap[b_node->b_end] = mas_max_gap(mas); 2446 b_node->pivot[b_node->b_end++] = mas->max; 2447 } 2448 2449 /* 2450 * mas_set_split_parent() - combine_then_separate helper function. Sets the parent 2451 * of @mas->node to either @left or @right, depending on @slot and @split 2452 * 2453 * @mas - the maple state with the node that needs a parent 2454 * @left - possible parent 1 2455 * @right - possible parent 2 2456 * @slot - the slot the mas->node was placed 2457 * @split - the split location between @left and @right 2458 */ 2459 static inline void mas_set_split_parent(struct ma_state *mas, 2460 struct maple_enode *left, 2461 struct maple_enode *right, 2462 unsigned char *slot, unsigned char split) 2463 { 2464 if (mas_is_none(mas)) 2465 return; 2466 2467 if ((*slot) <= split) 2468 mas_set_parent(mas, mas->node, left, *slot); 2469 else if (right) 2470 mas_set_parent(mas, mas->node, right, (*slot) - split - 1); 2471 2472 (*slot)++; 2473 } 2474 2475 /* 2476 * mte_mid_split_check() - Check if the next node passes the mid-split 2477 * @**l: Pointer to left encoded maple node. 2478 * @**m: Pointer to middle encoded maple node. 2479 * @**r: Pointer to right encoded maple node. 2480 * @slot: The offset 2481 * @*split: The split location. 2482 * @mid_split: The middle split. 2483 */ 2484 static inline void mte_mid_split_check(struct maple_enode **l, 2485 struct maple_enode **r, 2486 struct maple_enode *right, 2487 unsigned char slot, 2488 unsigned char *split, 2489 unsigned char mid_split) 2490 { 2491 if (*r == right) 2492 return; 2493 2494 if (slot < mid_split) 2495 return; 2496 2497 *l = *r; 2498 *r = right; 2499 *split = mid_split; 2500 } 2501 2502 /* 2503 * mast_set_split_parents() - Helper function to set three nodes parents. Slot 2504 * is taken from @mast->l. 2505 * @mast - the maple subtree state 2506 * @left - the left node 2507 * @right - the right node 2508 * @split - the split location. 2509 */ 2510 static inline void mast_set_split_parents(struct maple_subtree_state *mast, 2511 struct maple_enode *left, 2512 struct maple_enode *middle, 2513 struct maple_enode *right, 2514 unsigned char split, 2515 unsigned char mid_split) 2516 { 2517 unsigned char slot; 2518 struct maple_enode *l = left; 2519 struct maple_enode *r = right; 2520 2521 if (mas_is_none(mast->l)) 2522 return; 2523 2524 if (middle) 2525 r = middle; 2526 2527 slot = mast->l->offset; 2528 2529 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2530 mas_set_split_parent(mast->l, l, r, &slot, split); 2531 2532 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2533 mas_set_split_parent(mast->m, l, r, &slot, split); 2534 2535 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2536 mas_set_split_parent(mast->r, l, r, &slot, split); 2537 } 2538 2539 /* 2540 * mas_topiary_node() - Dispose of a singe node 2541 * @mas: The maple state for pushing nodes 2542 * @enode: The encoded maple node 2543 * @in_rcu: If the tree is in rcu mode 2544 * 2545 * The node will either be RCU freed or pushed back on the maple state. 2546 */ 2547 static inline void mas_topiary_node(struct ma_state *mas, 2548 struct maple_enode *enode, bool in_rcu) 2549 { 2550 struct maple_node *tmp; 2551 2552 if (enode == MAS_NONE) 2553 return; 2554 2555 tmp = mte_to_node(enode); 2556 mte_set_node_dead(enode); 2557 if (in_rcu) 2558 ma_free_rcu(tmp); 2559 else 2560 mas_push_node(mas, tmp); 2561 } 2562 2563 /* 2564 * mas_topiary_replace() - Replace the data with new data, then repair the 2565 * parent links within the new tree. Iterate over the dead sub-tree and collect 2566 * the dead subtrees and topiary the nodes that are no longer of use. 2567 * 2568 * The new tree will have up to three children with the correct parent. Keep 2569 * track of the new entries as they need to be followed to find the next level 2570 * of new entries. 2571 * 2572 * The old tree will have up to three children with the old parent. Keep track 2573 * of the old entries as they may have more nodes below replaced. Nodes within 2574 * [index, last] are dead subtrees, others need to be freed and followed. 2575 * 2576 * @mas: The maple state pointing at the new data 2577 * @old_enode: The maple encoded node being replaced 2578 * 2579 */ 2580 static inline void mas_topiary_replace(struct ma_state *mas, 2581 struct maple_enode *old_enode) 2582 { 2583 struct ma_state tmp[3], tmp_next[3]; 2584 MA_TOPIARY(subtrees, mas->tree); 2585 bool in_rcu; 2586 int i, n; 2587 2588 /* Place data in tree & then mark node as old */ 2589 mas_put_in_tree(mas, old_enode); 2590 2591 /* Update the parent pointers in the tree */ 2592 tmp[0] = *mas; 2593 tmp[0].offset = 0; 2594 tmp[1].node = MAS_NONE; 2595 tmp[2].node = MAS_NONE; 2596 while (!mte_is_leaf(tmp[0].node)) { 2597 n = 0; 2598 for (i = 0; i < 3; i++) { 2599 if (mas_is_none(&tmp[i])) 2600 continue; 2601 2602 while (n < 3) { 2603 if (!mas_find_child(&tmp[i], &tmp_next[n])) 2604 break; 2605 n++; 2606 } 2607 2608 mas_adopt_children(&tmp[i], tmp[i].node); 2609 } 2610 2611 if (MAS_WARN_ON(mas, n == 0)) 2612 break; 2613 2614 while (n < 3) 2615 tmp_next[n++].node = MAS_NONE; 2616 2617 for (i = 0; i < 3; i++) 2618 tmp[i] = tmp_next[i]; 2619 } 2620 2621 /* Collect the old nodes that need to be discarded */ 2622 if (mte_is_leaf(old_enode)) 2623 return mas_free(mas, old_enode); 2624 2625 tmp[0] = *mas; 2626 tmp[0].offset = 0; 2627 tmp[0].node = old_enode; 2628 tmp[1].node = MAS_NONE; 2629 tmp[2].node = MAS_NONE; 2630 in_rcu = mt_in_rcu(mas->tree); 2631 do { 2632 n = 0; 2633 for (i = 0; i < 3; i++) { 2634 if (mas_is_none(&tmp[i])) 2635 continue; 2636 2637 while (n < 3) { 2638 if (!mas_find_child(&tmp[i], &tmp_next[n])) 2639 break; 2640 2641 if ((tmp_next[n].min >= tmp_next->index) && 2642 (tmp_next[n].max <= tmp_next->last)) { 2643 mat_add(&subtrees, tmp_next[n].node); 2644 tmp_next[n].node = MAS_NONE; 2645 } else { 2646 n++; 2647 } 2648 } 2649 } 2650 2651 if (MAS_WARN_ON(mas, n == 0)) 2652 break; 2653 2654 while (n < 3) 2655 tmp_next[n++].node = MAS_NONE; 2656 2657 for (i = 0; i < 3; i++) { 2658 mas_topiary_node(mas, tmp[i].node, in_rcu); 2659 tmp[i] = tmp_next[i]; 2660 } 2661 } while (!mte_is_leaf(tmp[0].node)); 2662 2663 for (i = 0; i < 3; i++) 2664 mas_topiary_node(mas, tmp[i].node, in_rcu); 2665 2666 mas_mat_destroy(mas, &subtrees); 2667 } 2668 2669 /* 2670 * mas_wmb_replace() - Write memory barrier and replace 2671 * @mas: The maple state 2672 * @old: The old maple encoded node that is being replaced. 2673 * 2674 * Updates gap as necessary. 2675 */ 2676 static inline void mas_wmb_replace(struct ma_state *mas, 2677 struct maple_enode *old_enode) 2678 { 2679 /* Insert the new data in the tree */ 2680 mas_topiary_replace(mas, old_enode); 2681 2682 if (mte_is_leaf(mas->node)) 2683 return; 2684 2685 mas_update_gap(mas); 2686 } 2687 2688 /* 2689 * mast_cp_to_nodes() - Copy data out to nodes. 2690 * @mast: The maple subtree state 2691 * @left: The left encoded maple node 2692 * @middle: The middle encoded maple node 2693 * @right: The right encoded maple node 2694 * @split: The location to split between left and (middle ? middle : right) 2695 * @mid_split: The location to split between middle and right. 2696 */ 2697 static inline void mast_cp_to_nodes(struct maple_subtree_state *mast, 2698 struct maple_enode *left, struct maple_enode *middle, 2699 struct maple_enode *right, unsigned char split, unsigned char mid_split) 2700 { 2701 bool new_lmax = true; 2702 2703 mast->l->node = mte_node_or_none(left); 2704 mast->m->node = mte_node_or_none(middle); 2705 mast->r->node = mte_node_or_none(right); 2706 2707 mast->l->min = mast->orig_l->min; 2708 if (split == mast->bn->b_end) { 2709 mast->l->max = mast->orig_r->max; 2710 new_lmax = false; 2711 } 2712 2713 mab_mas_cp(mast->bn, 0, split, mast->l, new_lmax); 2714 2715 if (middle) { 2716 mab_mas_cp(mast->bn, 1 + split, mid_split, mast->m, true); 2717 mast->m->min = mast->bn->pivot[split] + 1; 2718 split = mid_split; 2719 } 2720 2721 mast->r->max = mast->orig_r->max; 2722 if (right) { 2723 mab_mas_cp(mast->bn, 1 + split, mast->bn->b_end, mast->r, false); 2724 mast->r->min = mast->bn->pivot[split] + 1; 2725 } 2726 } 2727 2728 /* 2729 * mast_combine_cp_left - Copy in the original left side of the tree into the 2730 * combined data set in the maple subtree state big node. 2731 * @mast: The maple subtree state 2732 */ 2733 static inline void mast_combine_cp_left(struct maple_subtree_state *mast) 2734 { 2735 unsigned char l_slot = mast->orig_l->offset; 2736 2737 if (!l_slot) 2738 return; 2739 2740 mas_mab_cp(mast->orig_l, 0, l_slot - 1, mast->bn, 0); 2741 } 2742 2743 /* 2744 * mast_combine_cp_right: Copy in the original right side of the tree into the 2745 * combined data set in the maple subtree state big node. 2746 * @mast: The maple subtree state 2747 */ 2748 static inline void mast_combine_cp_right(struct maple_subtree_state *mast) 2749 { 2750 if (mast->bn->pivot[mast->bn->b_end - 1] >= mast->orig_r->max) 2751 return; 2752 2753 mas_mab_cp(mast->orig_r, mast->orig_r->offset + 1, 2754 mt_slot_count(mast->orig_r->node), mast->bn, 2755 mast->bn->b_end); 2756 mast->orig_r->last = mast->orig_r->max; 2757 } 2758 2759 /* 2760 * mast_sufficient: Check if the maple subtree state has enough data in the big 2761 * node to create at least one sufficient node 2762 * @mast: the maple subtree state 2763 */ 2764 static inline bool mast_sufficient(struct maple_subtree_state *mast) 2765 { 2766 if (mast->bn->b_end > mt_min_slot_count(mast->orig_l->node)) 2767 return true; 2768 2769 return false; 2770 } 2771 2772 /* 2773 * mast_overflow: Check if there is too much data in the subtree state for a 2774 * single node. 2775 * @mast: The maple subtree state 2776 */ 2777 static inline bool mast_overflow(struct maple_subtree_state *mast) 2778 { 2779 if (mast->bn->b_end >= mt_slot_count(mast->orig_l->node)) 2780 return true; 2781 2782 return false; 2783 } 2784 2785 static inline void *mtree_range_walk(struct ma_state *mas) 2786 { 2787 unsigned long *pivots; 2788 unsigned char offset; 2789 struct maple_node *node; 2790 struct maple_enode *next, *last; 2791 enum maple_type type; 2792 void __rcu **slots; 2793 unsigned char end; 2794 unsigned long max, min; 2795 unsigned long prev_max, prev_min; 2796 2797 next = mas->node; 2798 min = mas->min; 2799 max = mas->max; 2800 do { 2801 offset = 0; 2802 last = next; 2803 node = mte_to_node(next); 2804 type = mte_node_type(next); 2805 pivots = ma_pivots(node, type); 2806 end = ma_data_end(node, type, pivots, max); 2807 if (unlikely(ma_dead_node(node))) 2808 goto dead_node; 2809 2810 if (pivots[offset] >= mas->index) { 2811 prev_max = max; 2812 prev_min = min; 2813 max = pivots[offset]; 2814 goto next; 2815 } 2816 2817 do { 2818 offset++; 2819 } while ((offset < end) && (pivots[offset] < mas->index)); 2820 2821 prev_min = min; 2822 min = pivots[offset - 1] + 1; 2823 prev_max = max; 2824 if (likely(offset < end && pivots[offset])) 2825 max = pivots[offset]; 2826 2827 next: 2828 slots = ma_slots(node, type); 2829 next = mt_slot(mas->tree, slots, offset); 2830 if (unlikely(ma_dead_node(node))) 2831 goto dead_node; 2832 } while (!ma_is_leaf(type)); 2833 2834 mas->offset = offset; 2835 mas->index = min; 2836 mas->last = max; 2837 mas->min = prev_min; 2838 mas->max = prev_max; 2839 mas->node = last; 2840 return (void *)next; 2841 2842 dead_node: 2843 mas_reset(mas); 2844 return NULL; 2845 } 2846 2847 /* 2848 * mas_spanning_rebalance() - Rebalance across two nodes which may not be peers. 2849 * @mas: The starting maple state 2850 * @mast: The maple_subtree_state, keeps track of 4 maple states. 2851 * @count: The estimated count of iterations needed. 2852 * 2853 * Follow the tree upwards from @l_mas and @r_mas for @count, or until the root 2854 * is hit. First @b_node is split into two entries which are inserted into the 2855 * next iteration of the loop. @b_node is returned populated with the final 2856 * iteration. @mas is used to obtain allocations. orig_l_mas keeps track of the 2857 * nodes that will remain active by using orig_l_mas->index and orig_l_mas->last 2858 * to account of what has been copied into the new sub-tree. The update of 2859 * orig_l_mas->last is used in mas_consume to find the slots that will need to 2860 * be either freed or destroyed. orig_l_mas->depth keeps track of the height of 2861 * the new sub-tree in case the sub-tree becomes the full tree. 2862 * 2863 * Return: the number of elements in b_node during the last loop. 2864 */ 2865 static int mas_spanning_rebalance(struct ma_state *mas, 2866 struct maple_subtree_state *mast, unsigned char count) 2867 { 2868 unsigned char split, mid_split; 2869 unsigned char slot = 0; 2870 struct maple_enode *left = NULL, *middle = NULL, *right = NULL; 2871 struct maple_enode *old_enode; 2872 2873 MA_STATE(l_mas, mas->tree, mas->index, mas->index); 2874 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 2875 MA_STATE(m_mas, mas->tree, mas->index, mas->index); 2876 2877 /* 2878 * The tree needs to be rebalanced and leaves need to be kept at the same level. 2879 * Rebalancing is done by use of the ``struct maple_topiary``. 2880 */ 2881 mast->l = &l_mas; 2882 mast->m = &m_mas; 2883 mast->r = &r_mas; 2884 l_mas.node = r_mas.node = m_mas.node = MAS_NONE; 2885 2886 /* Check if this is not root and has sufficient data. */ 2887 if (((mast->orig_l->min != 0) || (mast->orig_r->max != ULONG_MAX)) && 2888 unlikely(mast->bn->b_end <= mt_min_slots[mast->bn->type])) 2889 mast_spanning_rebalance(mast); 2890 2891 l_mas.depth = 0; 2892 2893 /* 2894 * Each level of the tree is examined and balanced, pushing data to the left or 2895 * right, or rebalancing against left or right nodes is employed to avoid 2896 * rippling up the tree to limit the amount of churn. Once a new sub-section of 2897 * the tree is created, there may be a mix of new and old nodes. The old nodes 2898 * will have the incorrect parent pointers and currently be in two trees: the 2899 * original tree and the partially new tree. To remedy the parent pointers in 2900 * the old tree, the new data is swapped into the active tree and a walk down 2901 * the tree is performed and the parent pointers are updated. 2902 * See mas_topiary_replace() for more information. 2903 */ 2904 while (count--) { 2905 mast->bn->b_end--; 2906 mast->bn->type = mte_node_type(mast->orig_l->node); 2907 split = mas_mab_to_node(mas, mast->bn, &left, &right, &middle, 2908 &mid_split, mast->orig_l->min); 2909 mast_set_split_parents(mast, left, middle, right, split, 2910 mid_split); 2911 mast_cp_to_nodes(mast, left, middle, right, split, mid_split); 2912 2913 /* 2914 * Copy data from next level in the tree to mast->bn from next 2915 * iteration 2916 */ 2917 memset(mast->bn, 0, sizeof(struct maple_big_node)); 2918 mast->bn->type = mte_node_type(left); 2919 l_mas.depth++; 2920 2921 /* Root already stored in l->node. */ 2922 if (mas_is_root_limits(mast->l)) 2923 goto new_root; 2924 2925 mast_ascend(mast); 2926 mast_combine_cp_left(mast); 2927 l_mas.offset = mast->bn->b_end; 2928 mab_set_b_end(mast->bn, &l_mas, left); 2929 mab_set_b_end(mast->bn, &m_mas, middle); 2930 mab_set_b_end(mast->bn, &r_mas, right); 2931 2932 /* Copy anything necessary out of the right node. */ 2933 mast_combine_cp_right(mast); 2934 mast->orig_l->last = mast->orig_l->max; 2935 2936 if (mast_sufficient(mast)) 2937 continue; 2938 2939 if (mast_overflow(mast)) 2940 continue; 2941 2942 /* May be a new root stored in mast->bn */ 2943 if (mas_is_root_limits(mast->orig_l)) 2944 break; 2945 2946 mast_spanning_rebalance(mast); 2947 2948 /* rebalancing from other nodes may require another loop. */ 2949 if (!count) 2950 count++; 2951 } 2952 2953 l_mas.node = mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), 2954 mte_node_type(mast->orig_l->node)); 2955 l_mas.depth++; 2956 mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, &l_mas, true); 2957 mas_set_parent(mas, left, l_mas.node, slot); 2958 if (middle) 2959 mas_set_parent(mas, middle, l_mas.node, ++slot); 2960 2961 if (right) 2962 mas_set_parent(mas, right, l_mas.node, ++slot); 2963 2964 if (mas_is_root_limits(mast->l)) { 2965 new_root: 2966 mas_mn(mast->l)->parent = ma_parent_ptr(mas_tree_parent(mas)); 2967 while (!mte_is_root(mast->orig_l->node)) 2968 mast_ascend(mast); 2969 } else { 2970 mas_mn(&l_mas)->parent = mas_mn(mast->orig_l)->parent; 2971 } 2972 2973 old_enode = mast->orig_l->node; 2974 mas->depth = l_mas.depth; 2975 mas->node = l_mas.node; 2976 mas->min = l_mas.min; 2977 mas->max = l_mas.max; 2978 mas->offset = l_mas.offset; 2979 mas_wmb_replace(mas, old_enode); 2980 mtree_range_walk(mas); 2981 return mast->bn->b_end; 2982 } 2983 2984 /* 2985 * mas_rebalance() - Rebalance a given node. 2986 * @mas: The maple state 2987 * @b_node: The big maple node. 2988 * 2989 * Rebalance two nodes into a single node or two new nodes that are sufficient. 2990 * Continue upwards until tree is sufficient. 2991 * 2992 * Return: the number of elements in b_node during the last loop. 2993 */ 2994 static inline int mas_rebalance(struct ma_state *mas, 2995 struct maple_big_node *b_node) 2996 { 2997 char empty_count = mas_mt_height(mas); 2998 struct maple_subtree_state mast; 2999 unsigned char shift, b_end = ++b_node->b_end; 3000 3001 MA_STATE(l_mas, mas->tree, mas->index, mas->last); 3002 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 3003 3004 trace_ma_op(__func__, mas); 3005 3006 /* 3007 * Rebalancing occurs if a node is insufficient. Data is rebalanced 3008 * against the node to the right if it exists, otherwise the node to the 3009 * left of this node is rebalanced against this node. If rebalancing 3010 * causes just one node to be produced instead of two, then the parent 3011 * is also examined and rebalanced if it is insufficient. Every level 3012 * tries to combine the data in the same way. If one node contains the 3013 * entire range of the tree, then that node is used as a new root node. 3014 */ 3015 mas_node_count(mas, empty_count * 2 - 1); 3016 if (mas_is_err(mas)) 3017 return 0; 3018 3019 mast.orig_l = &l_mas; 3020 mast.orig_r = &r_mas; 3021 mast.bn = b_node; 3022 mast.bn->type = mte_node_type(mas->node); 3023 3024 l_mas = r_mas = *mas; 3025 3026 if (mas_next_sibling(&r_mas)) { 3027 mas_mab_cp(&r_mas, 0, mt_slot_count(r_mas.node), b_node, b_end); 3028 r_mas.last = r_mas.index = r_mas.max; 3029 } else { 3030 mas_prev_sibling(&l_mas); 3031 shift = mas_data_end(&l_mas) + 1; 3032 mab_shift_right(b_node, shift); 3033 mas->offset += shift; 3034 mas_mab_cp(&l_mas, 0, shift - 1, b_node, 0); 3035 b_node->b_end = shift + b_end; 3036 l_mas.index = l_mas.last = l_mas.min; 3037 } 3038 3039 return mas_spanning_rebalance(mas, &mast, empty_count); 3040 } 3041 3042 /* 3043 * mas_destroy_rebalance() - Rebalance left-most node while destroying the maple 3044 * state. 3045 * @mas: The maple state 3046 * @end: The end of the left-most node. 3047 * 3048 * During a mass-insert event (such as forking), it may be necessary to 3049 * rebalance the left-most node when it is not sufficient. 3050 */ 3051 static inline void mas_destroy_rebalance(struct ma_state *mas, unsigned char end) 3052 { 3053 enum maple_type mt = mte_node_type(mas->node); 3054 struct maple_node reuse, *newnode, *parent, *new_left, *left, *node; 3055 struct maple_enode *eparent, *old_eparent; 3056 unsigned char offset, tmp, split = mt_slots[mt] / 2; 3057 void __rcu **l_slots, **slots; 3058 unsigned long *l_pivs, *pivs, gap; 3059 bool in_rcu = mt_in_rcu(mas->tree); 3060 3061 MA_STATE(l_mas, mas->tree, mas->index, mas->last); 3062 3063 l_mas = *mas; 3064 mas_prev_sibling(&l_mas); 3065 3066 /* set up node. */ 3067 if (in_rcu) { 3068 /* Allocate for both left and right as well as parent. */ 3069 mas_node_count(mas, 3); 3070 if (mas_is_err(mas)) 3071 return; 3072 3073 newnode = mas_pop_node(mas); 3074 } else { 3075 newnode = &reuse; 3076 } 3077 3078 node = mas_mn(mas); 3079 newnode->parent = node->parent; 3080 slots = ma_slots(newnode, mt); 3081 pivs = ma_pivots(newnode, mt); 3082 left = mas_mn(&l_mas); 3083 l_slots = ma_slots(left, mt); 3084 l_pivs = ma_pivots(left, mt); 3085 if (!l_slots[split]) 3086 split++; 3087 tmp = mas_data_end(&l_mas) - split; 3088 3089 memcpy(slots, l_slots + split + 1, sizeof(void *) * tmp); 3090 memcpy(pivs, l_pivs + split + 1, sizeof(unsigned long) * tmp); 3091 pivs[tmp] = l_mas.max; 3092 memcpy(slots + tmp, ma_slots(node, mt), sizeof(void *) * end); 3093 memcpy(pivs + tmp, ma_pivots(node, mt), sizeof(unsigned long) * end); 3094 3095 l_mas.max = l_pivs[split]; 3096 mas->min = l_mas.max + 1; 3097 old_eparent = mt_mk_node(mte_parent(l_mas.node), 3098 mas_parent_type(&l_mas, l_mas.node)); 3099 tmp += end; 3100 if (!in_rcu) { 3101 unsigned char max_p = mt_pivots[mt]; 3102 unsigned char max_s = mt_slots[mt]; 3103 3104 if (tmp < max_p) 3105 memset(pivs + tmp, 0, 3106 sizeof(unsigned long) * (max_p - tmp)); 3107 3108 if (tmp < mt_slots[mt]) 3109 memset(slots + tmp, 0, sizeof(void *) * (max_s - tmp)); 3110 3111 memcpy(node, newnode, sizeof(struct maple_node)); 3112 ma_set_meta(node, mt, 0, tmp - 1); 3113 mte_set_pivot(old_eparent, mte_parent_slot(l_mas.node), 3114 l_pivs[split]); 3115 3116 /* Remove data from l_pivs. */ 3117 tmp = split + 1; 3118 memset(l_pivs + tmp, 0, sizeof(unsigned long) * (max_p - tmp)); 3119 memset(l_slots + tmp, 0, sizeof(void *) * (max_s - tmp)); 3120 ma_set_meta(left, mt, 0, split); 3121 eparent = old_eparent; 3122 3123 goto done; 3124 } 3125 3126 /* RCU requires replacing both l_mas, mas, and parent. */ 3127 mas->node = mt_mk_node(newnode, mt); 3128 ma_set_meta(newnode, mt, 0, tmp); 3129 3130 new_left = mas_pop_node(mas); 3131 new_left->parent = left->parent; 3132 mt = mte_node_type(l_mas.node); 3133 slots = ma_slots(new_left, mt); 3134 pivs = ma_pivots(new_left, mt); 3135 memcpy(slots, l_slots, sizeof(void *) * split); 3136 memcpy(pivs, l_pivs, sizeof(unsigned long) * split); 3137 ma_set_meta(new_left, mt, 0, split); 3138 l_mas.node = mt_mk_node(new_left, mt); 3139 3140 /* replace parent. */ 3141 offset = mte_parent_slot(mas->node); 3142 mt = mas_parent_type(&l_mas, l_mas.node); 3143 parent = mas_pop_node(mas); 3144 slots = ma_slots(parent, mt); 3145 pivs = ma_pivots(parent, mt); 3146 memcpy(parent, mte_to_node(old_eparent), sizeof(struct maple_node)); 3147 rcu_assign_pointer(slots[offset], mas->node); 3148 rcu_assign_pointer(slots[offset - 1], l_mas.node); 3149 pivs[offset - 1] = l_mas.max; 3150 eparent = mt_mk_node(parent, mt); 3151 done: 3152 gap = mas_leaf_max_gap(mas); 3153 mte_set_gap(eparent, mte_parent_slot(mas->node), gap); 3154 gap = mas_leaf_max_gap(&l_mas); 3155 mte_set_gap(eparent, mte_parent_slot(l_mas.node), gap); 3156 mas_ascend(mas); 3157 3158 if (in_rcu) { 3159 mas_replace_node(mas, old_eparent); 3160 mas_adopt_children(mas, mas->node); 3161 } 3162 3163 mas_update_gap(mas); 3164 } 3165 3166 /* 3167 * mas_split_final_node() - Split the final node in a subtree operation. 3168 * @mast: the maple subtree state 3169 * @mas: The maple state 3170 * @height: The height of the tree in case it's a new root. 3171 */ 3172 static inline bool mas_split_final_node(struct maple_subtree_state *mast, 3173 struct ma_state *mas, int height) 3174 { 3175 struct maple_enode *ancestor; 3176 3177 if (mte_is_root(mas->node)) { 3178 if (mt_is_alloc(mas->tree)) 3179 mast->bn->type = maple_arange_64; 3180 else 3181 mast->bn->type = maple_range_64; 3182 mas->depth = height; 3183 } 3184 /* 3185 * Only a single node is used here, could be root. 3186 * The Big_node data should just fit in a single node. 3187 */ 3188 ancestor = mas_new_ma_node(mas, mast->bn); 3189 mas_set_parent(mas, mast->l->node, ancestor, mast->l->offset); 3190 mas_set_parent(mas, mast->r->node, ancestor, mast->r->offset); 3191 mte_to_node(ancestor)->parent = mas_mn(mas)->parent; 3192 3193 mast->l->node = ancestor; 3194 mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, mast->l, true); 3195 mas->offset = mast->bn->b_end - 1; 3196 return true; 3197 } 3198 3199 /* 3200 * mast_fill_bnode() - Copy data into the big node in the subtree state 3201 * @mast: The maple subtree state 3202 * @mas: the maple state 3203 * @skip: The number of entries to skip for new nodes insertion. 3204 */ 3205 static inline void mast_fill_bnode(struct maple_subtree_state *mast, 3206 struct ma_state *mas, 3207 unsigned char skip) 3208 { 3209 bool cp = true; 3210 unsigned char split; 3211 3212 memset(mast->bn->gap, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->gap)); 3213 memset(mast->bn->slot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->slot)); 3214 memset(mast->bn->pivot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->pivot)); 3215 mast->bn->b_end = 0; 3216 3217 if (mte_is_root(mas->node)) { 3218 cp = false; 3219 } else { 3220 mas_ascend(mas); 3221 mas->offset = mte_parent_slot(mas->node); 3222 } 3223 3224 if (cp && mast->l->offset) 3225 mas_mab_cp(mas, 0, mast->l->offset - 1, mast->bn, 0); 3226 3227 split = mast->bn->b_end; 3228 mab_set_b_end(mast->bn, mast->l, mast->l->node); 3229 mast->r->offset = mast->bn->b_end; 3230 mab_set_b_end(mast->bn, mast->r, mast->r->node); 3231 if (mast->bn->pivot[mast->bn->b_end - 1] == mas->max) 3232 cp = false; 3233 3234 if (cp) 3235 mas_mab_cp(mas, split + skip, mt_slot_count(mas->node) - 1, 3236 mast->bn, mast->bn->b_end); 3237 3238 mast->bn->b_end--; 3239 mast->bn->type = mte_node_type(mas->node); 3240 } 3241 3242 /* 3243 * mast_split_data() - Split the data in the subtree state big node into regular 3244 * nodes. 3245 * @mast: The maple subtree state 3246 * @mas: The maple state 3247 * @split: The location to split the big node 3248 */ 3249 static inline void mast_split_data(struct maple_subtree_state *mast, 3250 struct ma_state *mas, unsigned char split) 3251 { 3252 unsigned char p_slot; 3253 3254 mab_mas_cp(mast->bn, 0, split, mast->l, true); 3255 mte_set_pivot(mast->r->node, 0, mast->r->max); 3256 mab_mas_cp(mast->bn, split + 1, mast->bn->b_end, mast->r, false); 3257 mast->l->offset = mte_parent_slot(mas->node); 3258 mast->l->max = mast->bn->pivot[split]; 3259 mast->r->min = mast->l->max + 1; 3260 if (mte_is_leaf(mas->node)) 3261 return; 3262 3263 p_slot = mast->orig_l->offset; 3264 mas_set_split_parent(mast->orig_l, mast->l->node, mast->r->node, 3265 &p_slot, split); 3266 mas_set_split_parent(mast->orig_r, mast->l->node, mast->r->node, 3267 &p_slot, split); 3268 } 3269 3270 /* 3271 * mas_push_data() - Instead of splitting a node, it is beneficial to push the 3272 * data to the right or left node if there is room. 3273 * @mas: The maple state 3274 * @height: The current height of the maple state 3275 * @mast: The maple subtree state 3276 * @left: Push left or not. 3277 * 3278 * Keeping the height of the tree low means faster lookups. 3279 * 3280 * Return: True if pushed, false otherwise. 3281 */ 3282 static inline bool mas_push_data(struct ma_state *mas, int height, 3283 struct maple_subtree_state *mast, bool left) 3284 { 3285 unsigned char slot_total = mast->bn->b_end; 3286 unsigned char end, space, split; 3287 3288 MA_STATE(tmp_mas, mas->tree, mas->index, mas->last); 3289 tmp_mas = *mas; 3290 tmp_mas.depth = mast->l->depth; 3291 3292 if (left && !mas_prev_sibling(&tmp_mas)) 3293 return false; 3294 else if (!left && !mas_next_sibling(&tmp_mas)) 3295 return false; 3296 3297 end = mas_data_end(&tmp_mas); 3298 slot_total += end; 3299 space = 2 * mt_slot_count(mas->node) - 2; 3300 /* -2 instead of -1 to ensure there isn't a triple split */ 3301 if (ma_is_leaf(mast->bn->type)) 3302 space--; 3303 3304 if (mas->max == ULONG_MAX) 3305 space--; 3306 3307 if (slot_total >= space) 3308 return false; 3309 3310 /* Get the data; Fill mast->bn */ 3311 mast->bn->b_end++; 3312 if (left) { 3313 mab_shift_right(mast->bn, end + 1); 3314 mas_mab_cp(&tmp_mas, 0, end, mast->bn, 0); 3315 mast->bn->b_end = slot_total + 1; 3316 } else { 3317 mas_mab_cp(&tmp_mas, 0, end, mast->bn, mast->bn->b_end); 3318 } 3319 3320 /* Configure mast for splitting of mast->bn */ 3321 split = mt_slots[mast->bn->type] - 2; 3322 if (left) { 3323 /* Switch mas to prev node */ 3324 *mas = tmp_mas; 3325 /* Start using mast->l for the left side. */ 3326 tmp_mas.node = mast->l->node; 3327 *mast->l = tmp_mas; 3328 } else { 3329 tmp_mas.node = mast->r->node; 3330 *mast->r = tmp_mas; 3331 split = slot_total - split; 3332 } 3333 split = mab_no_null_split(mast->bn, split, mt_slots[mast->bn->type]); 3334 /* Update parent slot for split calculation. */ 3335 if (left) 3336 mast->orig_l->offset += end + 1; 3337 3338 mast_split_data(mast, mas, split); 3339 mast_fill_bnode(mast, mas, 2); 3340 mas_split_final_node(mast, mas, height + 1); 3341 return true; 3342 } 3343 3344 /* 3345 * mas_split() - Split data that is too big for one node into two. 3346 * @mas: The maple state 3347 * @b_node: The maple big node 3348 * Return: 1 on success, 0 on failure. 3349 */ 3350 static int mas_split(struct ma_state *mas, struct maple_big_node *b_node) 3351 { 3352 struct maple_subtree_state mast; 3353 int height = 0; 3354 unsigned char mid_split, split = 0; 3355 struct maple_enode *old; 3356 3357 /* 3358 * Splitting is handled differently from any other B-tree; the Maple 3359 * Tree splits upwards. Splitting up means that the split operation 3360 * occurs when the walk of the tree hits the leaves and not on the way 3361 * down. The reason for splitting up is that it is impossible to know 3362 * how much space will be needed until the leaf is (or leaves are) 3363 * reached. Since overwriting data is allowed and a range could 3364 * overwrite more than one range or result in changing one entry into 3 3365 * entries, it is impossible to know if a split is required until the 3366 * data is examined. 3367 * 3368 * Splitting is a balancing act between keeping allocations to a minimum 3369 * and avoiding a 'jitter' event where a tree is expanded to make room 3370 * for an entry followed by a contraction when the entry is removed. To 3371 * accomplish the balance, there are empty slots remaining in both left 3372 * and right nodes after a split. 3373 */ 3374 MA_STATE(l_mas, mas->tree, mas->index, mas->last); 3375 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 3376 MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last); 3377 MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last); 3378 3379 trace_ma_op(__func__, mas); 3380 mas->depth = mas_mt_height(mas); 3381 /* Allocation failures will happen early. */ 3382 mas_node_count(mas, 1 + mas->depth * 2); 3383 if (mas_is_err(mas)) 3384 return 0; 3385 3386 mast.l = &l_mas; 3387 mast.r = &r_mas; 3388 mast.orig_l = &prev_l_mas; 3389 mast.orig_r = &prev_r_mas; 3390 mast.bn = b_node; 3391 3392 while (height++ <= mas->depth) { 3393 if (mt_slots[b_node->type] > b_node->b_end) { 3394 mas_split_final_node(&mast, mas, height); 3395 break; 3396 } 3397 3398 l_mas = r_mas = *mas; 3399 l_mas.node = mas_new_ma_node(mas, b_node); 3400 r_mas.node = mas_new_ma_node(mas, b_node); 3401 /* 3402 * Another way that 'jitter' is avoided is to terminate a split up early if the 3403 * left or right node has space to spare. This is referred to as "pushing left" 3404 * or "pushing right" and is similar to the B* tree, except the nodes left or 3405 * right can rarely be reused due to RCU, but the ripple upwards is halted which 3406 * is a significant savings. 3407 */ 3408 /* Try to push left. */ 3409 if (mas_push_data(mas, height, &mast, true)) 3410 break; 3411 3412 /* Try to push right. */ 3413 if (mas_push_data(mas, height, &mast, false)) 3414 break; 3415 3416 split = mab_calc_split(mas, b_node, &mid_split, prev_l_mas.min); 3417 mast_split_data(&mast, mas, split); 3418 /* 3419 * Usually correct, mab_mas_cp in the above call overwrites 3420 * r->max. 3421 */ 3422 mast.r->max = mas->max; 3423 mast_fill_bnode(&mast, mas, 1); 3424 prev_l_mas = *mast.l; 3425 prev_r_mas = *mast.r; 3426 } 3427 3428 /* Set the original node as dead */ 3429 old = mas->node; 3430 mas->node = l_mas.node; 3431 mas_wmb_replace(mas, old); 3432 mtree_range_walk(mas); 3433 return 1; 3434 } 3435 3436 /* 3437 * mas_reuse_node() - Reuse the node to store the data. 3438 * @wr_mas: The maple write state 3439 * @bn: The maple big node 3440 * @end: The end of the data. 3441 * 3442 * Will always return false in RCU mode. 3443 * 3444 * Return: True if node was reused, false otherwise. 3445 */ 3446 static inline bool mas_reuse_node(struct ma_wr_state *wr_mas, 3447 struct maple_big_node *bn, unsigned char end) 3448 { 3449 /* Need to be rcu safe. */ 3450 if (mt_in_rcu(wr_mas->mas->tree)) 3451 return false; 3452 3453 if (end > bn->b_end) { 3454 int clear = mt_slots[wr_mas->type] - bn->b_end; 3455 3456 memset(wr_mas->slots + bn->b_end, 0, sizeof(void *) * clear--); 3457 memset(wr_mas->pivots + bn->b_end, 0, sizeof(void *) * clear); 3458 } 3459 mab_mas_cp(bn, 0, bn->b_end, wr_mas->mas, false); 3460 return true; 3461 } 3462 3463 /* 3464 * mas_commit_b_node() - Commit the big node into the tree. 3465 * @wr_mas: The maple write state 3466 * @b_node: The maple big node 3467 * @end: The end of the data. 3468 */ 3469 static noinline_for_kasan int mas_commit_b_node(struct ma_wr_state *wr_mas, 3470 struct maple_big_node *b_node, unsigned char end) 3471 { 3472 struct maple_node *node; 3473 struct maple_enode *old_enode; 3474 unsigned char b_end = b_node->b_end; 3475 enum maple_type b_type = b_node->type; 3476 3477 old_enode = wr_mas->mas->node; 3478 if ((b_end < mt_min_slots[b_type]) && 3479 (!mte_is_root(old_enode)) && 3480 (mas_mt_height(wr_mas->mas) > 1)) 3481 return mas_rebalance(wr_mas->mas, b_node); 3482 3483 if (b_end >= mt_slots[b_type]) 3484 return mas_split(wr_mas->mas, b_node); 3485 3486 if (mas_reuse_node(wr_mas, b_node, end)) 3487 goto reuse_node; 3488 3489 mas_node_count(wr_mas->mas, 1); 3490 if (mas_is_err(wr_mas->mas)) 3491 return 0; 3492 3493 node = mas_pop_node(wr_mas->mas); 3494 node->parent = mas_mn(wr_mas->mas)->parent; 3495 wr_mas->mas->node = mt_mk_node(node, b_type); 3496 mab_mas_cp(b_node, 0, b_end, wr_mas->mas, false); 3497 mas_replace_node(wr_mas->mas, old_enode); 3498 reuse_node: 3499 mas_update_gap(wr_mas->mas); 3500 return 1; 3501 } 3502 3503 /* 3504 * mas_root_expand() - Expand a root to a node 3505 * @mas: The maple state 3506 * @entry: The entry to store into the tree 3507 */ 3508 static inline int mas_root_expand(struct ma_state *mas, void *entry) 3509 { 3510 void *contents = mas_root_locked(mas); 3511 enum maple_type type = maple_leaf_64; 3512 struct maple_node *node; 3513 void __rcu **slots; 3514 unsigned long *pivots; 3515 int slot = 0; 3516 3517 mas_node_count(mas, 1); 3518 if (unlikely(mas_is_err(mas))) 3519 return 0; 3520 3521 node = mas_pop_node(mas); 3522 pivots = ma_pivots(node, type); 3523 slots = ma_slots(node, type); 3524 node->parent = ma_parent_ptr(mas_tree_parent(mas)); 3525 mas->node = mt_mk_node(node, type); 3526 3527 if (mas->index) { 3528 if (contents) { 3529 rcu_assign_pointer(slots[slot], contents); 3530 if (likely(mas->index > 1)) 3531 slot++; 3532 } 3533 pivots[slot++] = mas->index - 1; 3534 } 3535 3536 rcu_assign_pointer(slots[slot], entry); 3537 mas->offset = slot; 3538 pivots[slot] = mas->last; 3539 if (mas->last != ULONG_MAX) 3540 pivots[++slot] = ULONG_MAX; 3541 3542 mas->depth = 1; 3543 mas_set_height(mas); 3544 ma_set_meta(node, maple_leaf_64, 0, slot); 3545 /* swap the new root into the tree */ 3546 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 3547 return slot; 3548 } 3549 3550 /* 3551 * mas_store_root() - Storing value into root. 3552 * @mas: The maple state 3553 * @entry: The entry to store. 3554 * 3555 * There is no root node now and we are storing a value into the root - this 3556 * function either assigns the pointer or expands into a node. 3557 */ 3558 static inline void mas_store_root(struct ma_state *mas, void *entry) 3559 { 3560 if (!entry) { 3561 if (!mas->index) 3562 rcu_assign_pointer(mas->tree->ma_root, NULL); 3563 } else if (likely((mas->last != 0) || (mas->index != 0))) 3564 mas_root_expand(mas, entry); 3565 else if (((unsigned long) (entry) & 3) == 2) 3566 mas_root_expand(mas, entry); 3567 else { 3568 rcu_assign_pointer(mas->tree->ma_root, entry); 3569 mas->node = MAS_START; 3570 } 3571 } 3572 3573 /* 3574 * mas_is_span_wr() - Check if the write needs to be treated as a write that 3575 * spans the node. 3576 * @mas: The maple state 3577 * @piv: The pivot value being written 3578 * @type: The maple node type 3579 * @entry: The data to write 3580 * 3581 * Spanning writes are writes that start in one node and end in another OR if 3582 * the write of a %NULL will cause the node to end with a %NULL. 3583 * 3584 * Return: True if this is a spanning write, false otherwise. 3585 */ 3586 static bool mas_is_span_wr(struct ma_wr_state *wr_mas) 3587 { 3588 unsigned long max = wr_mas->r_max; 3589 unsigned long last = wr_mas->mas->last; 3590 enum maple_type type = wr_mas->type; 3591 void *entry = wr_mas->entry; 3592 3593 /* Contained in this pivot, fast path */ 3594 if (last < max) 3595 return false; 3596 3597 if (ma_is_leaf(type)) { 3598 max = wr_mas->mas->max; 3599 if (last < max) 3600 return false; 3601 } 3602 3603 if (last == max) { 3604 /* 3605 * The last entry of leaf node cannot be NULL unless it is the 3606 * rightmost node (writing ULONG_MAX), otherwise it spans slots. 3607 */ 3608 if (entry || last == ULONG_MAX) 3609 return false; 3610 } 3611 3612 trace_ma_write(__func__, wr_mas->mas, wr_mas->r_max, entry); 3613 return true; 3614 } 3615 3616 static inline void mas_wr_walk_descend(struct ma_wr_state *wr_mas) 3617 { 3618 wr_mas->type = mte_node_type(wr_mas->mas->node); 3619 mas_wr_node_walk(wr_mas); 3620 wr_mas->slots = ma_slots(wr_mas->node, wr_mas->type); 3621 } 3622 3623 static inline void mas_wr_walk_traverse(struct ma_wr_state *wr_mas) 3624 { 3625 wr_mas->mas->max = wr_mas->r_max; 3626 wr_mas->mas->min = wr_mas->r_min; 3627 wr_mas->mas->node = wr_mas->content; 3628 wr_mas->mas->offset = 0; 3629 wr_mas->mas->depth++; 3630 } 3631 /* 3632 * mas_wr_walk() - Walk the tree for a write. 3633 * @wr_mas: The maple write state 3634 * 3635 * Uses mas_slot_locked() and does not need to worry about dead nodes. 3636 * 3637 * Return: True if it's contained in a node, false on spanning write. 3638 */ 3639 static bool mas_wr_walk(struct ma_wr_state *wr_mas) 3640 { 3641 struct ma_state *mas = wr_mas->mas; 3642 3643 while (true) { 3644 mas_wr_walk_descend(wr_mas); 3645 if (unlikely(mas_is_span_wr(wr_mas))) 3646 return false; 3647 3648 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 3649 mas->offset); 3650 if (ma_is_leaf(wr_mas->type)) 3651 return true; 3652 3653 mas_wr_walk_traverse(wr_mas); 3654 } 3655 3656 return true; 3657 } 3658 3659 static void mas_wr_walk_index(struct ma_wr_state *wr_mas) 3660 { 3661 struct ma_state *mas = wr_mas->mas; 3662 3663 while (true) { 3664 mas_wr_walk_descend(wr_mas); 3665 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 3666 mas->offset); 3667 if (ma_is_leaf(wr_mas->type)) 3668 return; 3669 mas_wr_walk_traverse(wr_mas); 3670 } 3671 } 3672 /* 3673 * mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs. 3674 * @l_wr_mas: The left maple write state 3675 * @r_wr_mas: The right maple write state 3676 */ 3677 static inline void mas_extend_spanning_null(struct ma_wr_state *l_wr_mas, 3678 struct ma_wr_state *r_wr_mas) 3679 { 3680 struct ma_state *r_mas = r_wr_mas->mas; 3681 struct ma_state *l_mas = l_wr_mas->mas; 3682 unsigned char l_slot; 3683 3684 l_slot = l_mas->offset; 3685 if (!l_wr_mas->content) 3686 l_mas->index = l_wr_mas->r_min; 3687 3688 if ((l_mas->index == l_wr_mas->r_min) && 3689 (l_slot && 3690 !mas_slot_locked(l_mas, l_wr_mas->slots, l_slot - 1))) { 3691 if (l_slot > 1) 3692 l_mas->index = l_wr_mas->pivots[l_slot - 2] + 1; 3693 else 3694 l_mas->index = l_mas->min; 3695 3696 l_mas->offset = l_slot - 1; 3697 } 3698 3699 if (!r_wr_mas->content) { 3700 if (r_mas->last < r_wr_mas->r_max) 3701 r_mas->last = r_wr_mas->r_max; 3702 r_mas->offset++; 3703 } else if ((r_mas->last == r_wr_mas->r_max) && 3704 (r_mas->last < r_mas->max) && 3705 !mas_slot_locked(r_mas, r_wr_mas->slots, r_mas->offset + 1)) { 3706 r_mas->last = mas_safe_pivot(r_mas, r_wr_mas->pivots, 3707 r_wr_mas->type, r_mas->offset + 1); 3708 r_mas->offset++; 3709 } 3710 } 3711 3712 static inline void *mas_state_walk(struct ma_state *mas) 3713 { 3714 void *entry; 3715 3716 entry = mas_start(mas); 3717 if (mas_is_none(mas)) 3718 return NULL; 3719 3720 if (mas_is_ptr(mas)) 3721 return entry; 3722 3723 return mtree_range_walk(mas); 3724 } 3725 3726 /* 3727 * mtree_lookup_walk() - Internal quick lookup that does not keep maple state up 3728 * to date. 3729 * 3730 * @mas: The maple state. 3731 * 3732 * Note: Leaves mas in undesirable state. 3733 * Return: The entry for @mas->index or %NULL on dead node. 3734 */ 3735 static inline void *mtree_lookup_walk(struct ma_state *mas) 3736 { 3737 unsigned long *pivots; 3738 unsigned char offset; 3739 struct maple_node *node; 3740 struct maple_enode *next; 3741 enum maple_type type; 3742 void __rcu **slots; 3743 unsigned char end; 3744 unsigned long max; 3745 3746 next = mas->node; 3747 max = ULONG_MAX; 3748 do { 3749 offset = 0; 3750 node = mte_to_node(next); 3751 type = mte_node_type(next); 3752 pivots = ma_pivots(node, type); 3753 end = ma_data_end(node, type, pivots, max); 3754 if (unlikely(ma_dead_node(node))) 3755 goto dead_node; 3756 do { 3757 if (pivots[offset] >= mas->index) { 3758 max = pivots[offset]; 3759 break; 3760 } 3761 } while (++offset < end); 3762 3763 slots = ma_slots(node, type); 3764 next = mt_slot(mas->tree, slots, offset); 3765 if (unlikely(ma_dead_node(node))) 3766 goto dead_node; 3767 } while (!ma_is_leaf(type)); 3768 3769 return (void *)next; 3770 3771 dead_node: 3772 mas_reset(mas); 3773 return NULL; 3774 } 3775 3776 static void mte_destroy_walk(struct maple_enode *, struct maple_tree *); 3777 /* 3778 * mas_new_root() - Create a new root node that only contains the entry passed 3779 * in. 3780 * @mas: The maple state 3781 * @entry: The entry to store. 3782 * 3783 * Only valid when the index == 0 and the last == ULONG_MAX 3784 * 3785 * Return 0 on error, 1 on success. 3786 */ 3787 static inline int mas_new_root(struct ma_state *mas, void *entry) 3788 { 3789 struct maple_enode *root = mas_root_locked(mas); 3790 enum maple_type type = maple_leaf_64; 3791 struct maple_node *node; 3792 void __rcu **slots; 3793 unsigned long *pivots; 3794 3795 if (!entry && !mas->index && mas->last == ULONG_MAX) { 3796 mas->depth = 0; 3797 mas_set_height(mas); 3798 rcu_assign_pointer(mas->tree->ma_root, entry); 3799 mas->node = MAS_START; 3800 goto done; 3801 } 3802 3803 mas_node_count(mas, 1); 3804 if (mas_is_err(mas)) 3805 return 0; 3806 3807 node = mas_pop_node(mas); 3808 pivots = ma_pivots(node, type); 3809 slots = ma_slots(node, type); 3810 node->parent = ma_parent_ptr(mas_tree_parent(mas)); 3811 mas->node = mt_mk_node(node, type); 3812 rcu_assign_pointer(slots[0], entry); 3813 pivots[0] = mas->last; 3814 mas->depth = 1; 3815 mas_set_height(mas); 3816 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 3817 3818 done: 3819 if (xa_is_node(root)) 3820 mte_destroy_walk(root, mas->tree); 3821 3822 return 1; 3823 } 3824 /* 3825 * mas_wr_spanning_store() - Create a subtree with the store operation completed 3826 * and new nodes where necessary, then place the sub-tree in the actual tree. 3827 * Note that mas is expected to point to the node which caused the store to 3828 * span. 3829 * @wr_mas: The maple write state 3830 * 3831 * Return: 0 on error, positive on success. 3832 */ 3833 static inline int mas_wr_spanning_store(struct ma_wr_state *wr_mas) 3834 { 3835 struct maple_subtree_state mast; 3836 struct maple_big_node b_node; 3837 struct ma_state *mas; 3838 unsigned char height; 3839 3840 /* Left and Right side of spanning store */ 3841 MA_STATE(l_mas, NULL, 0, 0); 3842 MA_STATE(r_mas, NULL, 0, 0); 3843 MA_WR_STATE(r_wr_mas, &r_mas, wr_mas->entry); 3844 MA_WR_STATE(l_wr_mas, &l_mas, wr_mas->entry); 3845 3846 /* 3847 * A store operation that spans multiple nodes is called a spanning 3848 * store and is handled early in the store call stack by the function 3849 * mas_is_span_wr(). When a spanning store is identified, the maple 3850 * state is duplicated. The first maple state walks the left tree path 3851 * to ``index``, the duplicate walks the right tree path to ``last``. 3852 * The data in the two nodes are combined into a single node, two nodes, 3853 * or possibly three nodes (see the 3-way split above). A ``NULL`` 3854 * written to the last entry of a node is considered a spanning store as 3855 * a rebalance is required for the operation to complete and an overflow 3856 * of data may happen. 3857 */ 3858 mas = wr_mas->mas; 3859 trace_ma_op(__func__, mas); 3860 3861 if (unlikely(!mas->index && mas->last == ULONG_MAX)) 3862 return mas_new_root(mas, wr_mas->entry); 3863 /* 3864 * Node rebalancing may occur due to this store, so there may be three new 3865 * entries per level plus a new root. 3866 */ 3867 height = mas_mt_height(mas); 3868 mas_node_count(mas, 1 + height * 3); 3869 if (mas_is_err(mas)) 3870 return 0; 3871 3872 /* 3873 * Set up right side. Need to get to the next offset after the spanning 3874 * store to ensure it's not NULL and to combine both the next node and 3875 * the node with the start together. 3876 */ 3877 r_mas = *mas; 3878 /* Avoid overflow, walk to next slot in the tree. */ 3879 if (r_mas.last + 1) 3880 r_mas.last++; 3881 3882 r_mas.index = r_mas.last; 3883 mas_wr_walk_index(&r_wr_mas); 3884 r_mas.last = r_mas.index = mas->last; 3885 3886 /* Set up left side. */ 3887 l_mas = *mas; 3888 mas_wr_walk_index(&l_wr_mas); 3889 3890 if (!wr_mas->entry) { 3891 mas_extend_spanning_null(&l_wr_mas, &r_wr_mas); 3892 mas->offset = l_mas.offset; 3893 mas->index = l_mas.index; 3894 mas->last = l_mas.last = r_mas.last; 3895 } 3896 3897 /* expanding NULLs may make this cover the entire range */ 3898 if (!l_mas.index && r_mas.last == ULONG_MAX) { 3899 mas_set_range(mas, 0, ULONG_MAX); 3900 return mas_new_root(mas, wr_mas->entry); 3901 } 3902 3903 memset(&b_node, 0, sizeof(struct maple_big_node)); 3904 /* Copy l_mas and store the value in b_node. */ 3905 mas_store_b_node(&l_wr_mas, &b_node, l_wr_mas.node_end); 3906 /* Copy r_mas into b_node if there is anything to copy. */ 3907 if (r_mas.max > r_mas.last) 3908 mas_mab_cp(&r_mas, r_mas.offset, r_wr_mas.node_end, 3909 &b_node, b_node.b_end + 1); 3910 else 3911 b_node.b_end++; 3912 3913 /* Stop spanning searches by searching for just index. */ 3914 l_mas.index = l_mas.last = mas->index; 3915 3916 mast.bn = &b_node; 3917 mast.orig_l = &l_mas; 3918 mast.orig_r = &r_mas; 3919 /* Combine l_mas and r_mas and split them up evenly again. */ 3920 return mas_spanning_rebalance(mas, &mast, height + 1); 3921 } 3922 3923 /* 3924 * mas_wr_node_store() - Attempt to store the value in a node 3925 * @wr_mas: The maple write state 3926 * 3927 * Attempts to reuse the node, but may allocate. 3928 * 3929 * Return: True if stored, false otherwise 3930 */ 3931 static inline bool mas_wr_node_store(struct ma_wr_state *wr_mas, 3932 unsigned char new_end) 3933 { 3934 struct ma_state *mas = wr_mas->mas; 3935 void __rcu **dst_slots; 3936 unsigned long *dst_pivots; 3937 unsigned char dst_offset, offset_end = wr_mas->offset_end; 3938 struct maple_node reuse, *newnode; 3939 unsigned char copy_size, node_pivots = mt_pivots[wr_mas->type]; 3940 bool in_rcu = mt_in_rcu(mas->tree); 3941 3942 /* Check if there is enough data. The room is enough. */ 3943 if (!mte_is_root(mas->node) && (new_end <= mt_min_slots[wr_mas->type]) && 3944 !(mas->mas_flags & MA_STATE_BULK)) 3945 return false; 3946 3947 if (mas->last == wr_mas->end_piv) 3948 offset_end++; /* don't copy this offset */ 3949 else if (unlikely(wr_mas->r_max == ULONG_MAX)) 3950 mas_bulk_rebalance(mas, wr_mas->node_end, wr_mas->type); 3951 3952 /* set up node. */ 3953 if (in_rcu) { 3954 mas_node_count(mas, 1); 3955 if (mas_is_err(mas)) 3956 return false; 3957 3958 newnode = mas_pop_node(mas); 3959 } else { 3960 memset(&reuse, 0, sizeof(struct maple_node)); 3961 newnode = &reuse; 3962 } 3963 3964 newnode->parent = mas_mn(mas)->parent; 3965 dst_pivots = ma_pivots(newnode, wr_mas->type); 3966 dst_slots = ma_slots(newnode, wr_mas->type); 3967 /* Copy from start to insert point */ 3968 memcpy(dst_pivots, wr_mas->pivots, sizeof(unsigned long) * mas->offset); 3969 memcpy(dst_slots, wr_mas->slots, sizeof(void *) * mas->offset); 3970 3971 /* Handle insert of new range starting after old range */ 3972 if (wr_mas->r_min < mas->index) { 3973 rcu_assign_pointer(dst_slots[mas->offset], wr_mas->content); 3974 dst_pivots[mas->offset++] = mas->index - 1; 3975 } 3976 3977 /* Store the new entry and range end. */ 3978 if (mas->offset < node_pivots) 3979 dst_pivots[mas->offset] = mas->last; 3980 rcu_assign_pointer(dst_slots[mas->offset], wr_mas->entry); 3981 3982 /* 3983 * this range wrote to the end of the node or it overwrote the rest of 3984 * the data 3985 */ 3986 if (offset_end > wr_mas->node_end) 3987 goto done; 3988 3989 dst_offset = mas->offset + 1; 3990 /* Copy to the end of node if necessary. */ 3991 copy_size = wr_mas->node_end - offset_end + 1; 3992 memcpy(dst_slots + dst_offset, wr_mas->slots + offset_end, 3993 sizeof(void *) * copy_size); 3994 memcpy(dst_pivots + dst_offset, wr_mas->pivots + offset_end, 3995 sizeof(unsigned long) * (copy_size - 1)); 3996 3997 if (new_end < node_pivots) 3998 dst_pivots[new_end] = mas->max; 3999 4000 done: 4001 mas_leaf_set_meta(mas, newnode, dst_pivots, maple_leaf_64, new_end); 4002 if (in_rcu) { 4003 struct maple_enode *old_enode = mas->node; 4004 4005 mas->node = mt_mk_node(newnode, wr_mas->type); 4006 mas_replace_node(mas, old_enode); 4007 } else { 4008 memcpy(wr_mas->node, newnode, sizeof(struct maple_node)); 4009 } 4010 trace_ma_write(__func__, mas, 0, wr_mas->entry); 4011 mas_update_gap(mas); 4012 return true; 4013 } 4014 4015 /* 4016 * mas_wr_slot_store: Attempt to store a value in a slot. 4017 * @wr_mas: the maple write state 4018 * 4019 * Return: True if stored, false otherwise 4020 */ 4021 static inline bool mas_wr_slot_store(struct ma_wr_state *wr_mas) 4022 { 4023 struct ma_state *mas = wr_mas->mas; 4024 unsigned char offset = mas->offset; 4025 void __rcu **slots = wr_mas->slots; 4026 bool gap = false; 4027 4028 gap |= !mt_slot_locked(mas->tree, slots, offset); 4029 gap |= !mt_slot_locked(mas->tree, slots, offset + 1); 4030 4031 if (wr_mas->offset_end - offset == 1) { 4032 if (mas->index == wr_mas->r_min) { 4033 /* Overwriting the range and a part of the next one */ 4034 rcu_assign_pointer(slots[offset], wr_mas->entry); 4035 wr_mas->pivots[offset] = mas->last; 4036 } else { 4037 /* Overwriting a part of the range and the next one */ 4038 rcu_assign_pointer(slots[offset + 1], wr_mas->entry); 4039 wr_mas->pivots[offset] = mas->index - 1; 4040 mas->offset++; /* Keep mas accurate. */ 4041 } 4042 } else if (!mt_in_rcu(mas->tree)) { 4043 /* 4044 * Expand the range, only partially overwriting the previous and 4045 * next ranges 4046 */ 4047 gap |= !mt_slot_locked(mas->tree, slots, offset + 2); 4048 rcu_assign_pointer(slots[offset + 1], wr_mas->entry); 4049 wr_mas->pivots[offset] = mas->index - 1; 4050 wr_mas->pivots[offset + 1] = mas->last; 4051 mas->offset++; /* Keep mas accurate. */ 4052 } else { 4053 return false; 4054 } 4055 4056 trace_ma_write(__func__, mas, 0, wr_mas->entry); 4057 /* 4058 * Only update gap when the new entry is empty or there is an empty 4059 * entry in the original two ranges. 4060 */ 4061 if (!wr_mas->entry || gap) 4062 mas_update_gap(mas); 4063 4064 return true; 4065 } 4066 4067 static inline void mas_wr_extend_null(struct ma_wr_state *wr_mas) 4068 { 4069 struct ma_state *mas = wr_mas->mas; 4070 4071 if (!wr_mas->slots[wr_mas->offset_end]) { 4072 /* If this one is null, the next and prev are not */ 4073 mas->last = wr_mas->end_piv; 4074 } else { 4075 /* Check next slot(s) if we are overwriting the end */ 4076 if ((mas->last == wr_mas->end_piv) && 4077 (wr_mas->node_end != wr_mas->offset_end) && 4078 !wr_mas->slots[wr_mas->offset_end + 1]) { 4079 wr_mas->offset_end++; 4080 if (wr_mas->offset_end == wr_mas->node_end) 4081 mas->last = mas->max; 4082 else 4083 mas->last = wr_mas->pivots[wr_mas->offset_end]; 4084 wr_mas->end_piv = mas->last; 4085 } 4086 } 4087 4088 if (!wr_mas->content) { 4089 /* If this one is null, the next and prev are not */ 4090 mas->index = wr_mas->r_min; 4091 } else { 4092 /* Check prev slot if we are overwriting the start */ 4093 if (mas->index == wr_mas->r_min && mas->offset && 4094 !wr_mas->slots[mas->offset - 1]) { 4095 mas->offset--; 4096 wr_mas->r_min = mas->index = 4097 mas_safe_min(mas, wr_mas->pivots, mas->offset); 4098 wr_mas->r_max = wr_mas->pivots[mas->offset]; 4099 } 4100 } 4101 } 4102 4103 static inline void mas_wr_end_piv(struct ma_wr_state *wr_mas) 4104 { 4105 while ((wr_mas->offset_end < wr_mas->node_end) && 4106 (wr_mas->mas->last > wr_mas->pivots[wr_mas->offset_end])) 4107 wr_mas->offset_end++; 4108 4109 if (wr_mas->offset_end < wr_mas->node_end) 4110 wr_mas->end_piv = wr_mas->pivots[wr_mas->offset_end]; 4111 else 4112 wr_mas->end_piv = wr_mas->mas->max; 4113 4114 if (!wr_mas->entry) 4115 mas_wr_extend_null(wr_mas); 4116 } 4117 4118 static inline unsigned char mas_wr_new_end(struct ma_wr_state *wr_mas) 4119 { 4120 struct ma_state *mas = wr_mas->mas; 4121 unsigned char new_end = wr_mas->node_end + 2; 4122 4123 new_end -= wr_mas->offset_end - mas->offset; 4124 if (wr_mas->r_min == mas->index) 4125 new_end--; 4126 4127 if (wr_mas->end_piv == mas->last) 4128 new_end--; 4129 4130 return new_end; 4131 } 4132 4133 /* 4134 * mas_wr_append: Attempt to append 4135 * @wr_mas: the maple write state 4136 * @new_end: The end of the node after the modification 4137 * 4138 * This is currently unsafe in rcu mode since the end of the node may be cached 4139 * by readers while the node contents may be updated which could result in 4140 * inaccurate information. 4141 * 4142 * Return: True if appended, false otherwise 4143 */ 4144 static inline bool mas_wr_append(struct ma_wr_state *wr_mas, 4145 unsigned char new_end) 4146 { 4147 struct ma_state *mas; 4148 void __rcu **slots; 4149 unsigned char end; 4150 4151 mas = wr_mas->mas; 4152 if (mt_in_rcu(mas->tree)) 4153 return false; 4154 4155 if (mas->offset != wr_mas->node_end) 4156 return false; 4157 4158 end = wr_mas->node_end; 4159 if (mas->offset != end) 4160 return false; 4161 4162 if (new_end < mt_pivots[wr_mas->type]) { 4163 wr_mas->pivots[new_end] = wr_mas->pivots[end]; 4164 ma_set_meta(wr_mas->node, wr_mas->type, 0, new_end); 4165 } 4166 4167 slots = wr_mas->slots; 4168 if (new_end == end + 1) { 4169 if (mas->last == wr_mas->r_max) { 4170 /* Append to end of range */ 4171 rcu_assign_pointer(slots[new_end], wr_mas->entry); 4172 wr_mas->pivots[end] = mas->index - 1; 4173 mas->offset = new_end; 4174 } else { 4175 /* Append to start of range */ 4176 rcu_assign_pointer(slots[new_end], wr_mas->content); 4177 wr_mas->pivots[end] = mas->last; 4178 rcu_assign_pointer(slots[end], wr_mas->entry); 4179 } 4180 } else { 4181 /* Append to the range without touching any boundaries. */ 4182 rcu_assign_pointer(slots[new_end], wr_mas->content); 4183 wr_mas->pivots[end + 1] = mas->last; 4184 rcu_assign_pointer(slots[end + 1], wr_mas->entry); 4185 wr_mas->pivots[end] = mas->index - 1; 4186 mas->offset = end + 1; 4187 } 4188 4189 if (!wr_mas->content || !wr_mas->entry) 4190 mas_update_gap(mas); 4191 4192 trace_ma_write(__func__, mas, new_end, wr_mas->entry); 4193 return true; 4194 } 4195 4196 /* 4197 * mas_wr_bnode() - Slow path for a modification. 4198 * @wr_mas: The write maple state 4199 * 4200 * This is where split, rebalance end up. 4201 */ 4202 static void mas_wr_bnode(struct ma_wr_state *wr_mas) 4203 { 4204 struct maple_big_node b_node; 4205 4206 trace_ma_write(__func__, wr_mas->mas, 0, wr_mas->entry); 4207 memset(&b_node, 0, sizeof(struct maple_big_node)); 4208 mas_store_b_node(wr_mas, &b_node, wr_mas->offset_end); 4209 mas_commit_b_node(wr_mas, &b_node, wr_mas->node_end); 4210 } 4211 4212 static inline void mas_wr_modify(struct ma_wr_state *wr_mas) 4213 { 4214 struct ma_state *mas = wr_mas->mas; 4215 unsigned char new_end; 4216 4217 /* Direct replacement */ 4218 if (wr_mas->r_min == mas->index && wr_mas->r_max == mas->last) { 4219 rcu_assign_pointer(wr_mas->slots[mas->offset], wr_mas->entry); 4220 if (!!wr_mas->entry ^ !!wr_mas->content) 4221 mas_update_gap(mas); 4222 return; 4223 } 4224 4225 /* 4226 * new_end exceeds the size of the maple node and cannot enter the fast 4227 * path. 4228 */ 4229 new_end = mas_wr_new_end(wr_mas); 4230 if (new_end >= mt_slots[wr_mas->type]) 4231 goto slow_path; 4232 4233 /* Attempt to append */ 4234 if (mas_wr_append(wr_mas, new_end)) 4235 return; 4236 4237 if (new_end == wr_mas->node_end && mas_wr_slot_store(wr_mas)) 4238 return; 4239 4240 if (mas_wr_node_store(wr_mas, new_end)) 4241 return; 4242 4243 if (mas_is_err(mas)) 4244 return; 4245 4246 slow_path: 4247 mas_wr_bnode(wr_mas); 4248 } 4249 4250 /* 4251 * mas_wr_store_entry() - Internal call to store a value 4252 * @mas: The maple state 4253 * @entry: The entry to store. 4254 * 4255 * Return: The contents that was stored at the index. 4256 */ 4257 static inline void *mas_wr_store_entry(struct ma_wr_state *wr_mas) 4258 { 4259 struct ma_state *mas = wr_mas->mas; 4260 4261 wr_mas->content = mas_start(mas); 4262 if (mas_is_none(mas) || mas_is_ptr(mas)) { 4263 mas_store_root(mas, wr_mas->entry); 4264 return wr_mas->content; 4265 } 4266 4267 if (unlikely(!mas_wr_walk(wr_mas))) { 4268 mas_wr_spanning_store(wr_mas); 4269 return wr_mas->content; 4270 } 4271 4272 /* At this point, we are at the leaf node that needs to be altered. */ 4273 mas_wr_end_piv(wr_mas); 4274 /* New root for a single pointer */ 4275 if (unlikely(!mas->index && mas->last == ULONG_MAX)) { 4276 mas_new_root(mas, wr_mas->entry); 4277 return wr_mas->content; 4278 } 4279 4280 mas_wr_modify(wr_mas); 4281 return wr_mas->content; 4282 } 4283 4284 /** 4285 * mas_insert() - Internal call to insert a value 4286 * @mas: The maple state 4287 * @entry: The entry to store 4288 * 4289 * Return: %NULL or the contents that already exists at the requested index 4290 * otherwise. The maple state needs to be checked for error conditions. 4291 */ 4292 static inline void *mas_insert(struct ma_state *mas, void *entry) 4293 { 4294 MA_WR_STATE(wr_mas, mas, entry); 4295 4296 /* 4297 * Inserting a new range inserts either 0, 1, or 2 pivots within the 4298 * tree. If the insert fits exactly into an existing gap with a value 4299 * of NULL, then the slot only needs to be written with the new value. 4300 * If the range being inserted is adjacent to another range, then only a 4301 * single pivot needs to be inserted (as well as writing the entry). If 4302 * the new range is within a gap but does not touch any other ranges, 4303 * then two pivots need to be inserted: the start - 1, and the end. As 4304 * usual, the entry must be written. Most operations require a new node 4305 * to be allocated and replace an existing node to ensure RCU safety, 4306 * when in RCU mode. The exception to requiring a newly allocated node 4307 * is when inserting at the end of a node (appending). When done 4308 * carefully, appending can reuse the node in place. 4309 */ 4310 wr_mas.content = mas_start(mas); 4311 if (wr_mas.content) 4312 goto exists; 4313 4314 if (mas_is_none(mas) || mas_is_ptr(mas)) { 4315 mas_store_root(mas, entry); 4316 return NULL; 4317 } 4318 4319 /* spanning writes always overwrite something */ 4320 if (!mas_wr_walk(&wr_mas)) 4321 goto exists; 4322 4323 /* At this point, we are at the leaf node that needs to be altered. */ 4324 wr_mas.offset_end = mas->offset; 4325 wr_mas.end_piv = wr_mas.r_max; 4326 4327 if (wr_mas.content || (mas->last > wr_mas.r_max)) 4328 goto exists; 4329 4330 if (!entry) 4331 return NULL; 4332 4333 mas_wr_modify(&wr_mas); 4334 return wr_mas.content; 4335 4336 exists: 4337 mas_set_err(mas, -EEXIST); 4338 return wr_mas.content; 4339 4340 } 4341 4342 static inline void mas_rewalk(struct ma_state *mas, unsigned long index) 4343 { 4344 retry: 4345 mas_set(mas, index); 4346 mas_state_walk(mas); 4347 if (mas_is_start(mas)) 4348 goto retry; 4349 } 4350 4351 static inline bool mas_rewalk_if_dead(struct ma_state *mas, 4352 struct maple_node *node, const unsigned long index) 4353 { 4354 if (unlikely(ma_dead_node(node))) { 4355 mas_rewalk(mas, index); 4356 return true; 4357 } 4358 return false; 4359 } 4360 4361 /* 4362 * mas_prev_node() - Find the prev non-null entry at the same level in the 4363 * tree. The prev value will be mas->node[mas->offset] or MAS_NONE. 4364 * @mas: The maple state 4365 * @min: The lower limit to search 4366 * 4367 * The prev node value will be mas->node[mas->offset] or MAS_NONE. 4368 * Return: 1 if the node is dead, 0 otherwise. 4369 */ 4370 static inline int mas_prev_node(struct ma_state *mas, unsigned long min) 4371 { 4372 enum maple_type mt; 4373 int offset, level; 4374 void __rcu **slots; 4375 struct maple_node *node; 4376 unsigned long *pivots; 4377 unsigned long max; 4378 4379 node = mas_mn(mas); 4380 if (!mas->min) 4381 goto no_entry; 4382 4383 max = mas->min - 1; 4384 if (max < min) 4385 goto no_entry; 4386 4387 level = 0; 4388 do { 4389 if (ma_is_root(node)) 4390 goto no_entry; 4391 4392 /* Walk up. */ 4393 if (unlikely(mas_ascend(mas))) 4394 return 1; 4395 offset = mas->offset; 4396 level++; 4397 node = mas_mn(mas); 4398 } while (!offset); 4399 4400 offset--; 4401 mt = mte_node_type(mas->node); 4402 while (level > 1) { 4403 level--; 4404 slots = ma_slots(node, mt); 4405 mas->node = mas_slot(mas, slots, offset); 4406 if (unlikely(ma_dead_node(node))) 4407 return 1; 4408 4409 mt = mte_node_type(mas->node); 4410 node = mas_mn(mas); 4411 pivots = ma_pivots(node, mt); 4412 offset = ma_data_end(node, mt, pivots, max); 4413 if (unlikely(ma_dead_node(node))) 4414 return 1; 4415 } 4416 4417 slots = ma_slots(node, mt); 4418 mas->node = mas_slot(mas, slots, offset); 4419 pivots = ma_pivots(node, mt); 4420 if (unlikely(ma_dead_node(node))) 4421 return 1; 4422 4423 if (likely(offset)) 4424 mas->min = pivots[offset - 1] + 1; 4425 mas->max = max; 4426 mas->offset = mas_data_end(mas); 4427 if (unlikely(mte_dead_node(mas->node))) 4428 return 1; 4429 4430 return 0; 4431 4432 no_entry: 4433 if (unlikely(ma_dead_node(node))) 4434 return 1; 4435 4436 mas->node = MAS_NONE; 4437 return 0; 4438 } 4439 4440 /* 4441 * mas_prev_slot() - Get the entry in the previous slot 4442 * 4443 * @mas: The maple state 4444 * @max: The minimum starting range 4445 * @empty: Can be empty 4446 * @set_underflow: Set the @mas->node to underflow state on limit. 4447 * 4448 * Return: The entry in the previous slot which is possibly NULL 4449 */ 4450 static void *mas_prev_slot(struct ma_state *mas, unsigned long min, bool empty, 4451 bool set_underflow) 4452 { 4453 void *entry; 4454 void __rcu **slots; 4455 unsigned long pivot; 4456 enum maple_type type; 4457 unsigned long *pivots; 4458 struct maple_node *node; 4459 unsigned long save_point = mas->index; 4460 4461 retry: 4462 node = mas_mn(mas); 4463 type = mte_node_type(mas->node); 4464 pivots = ma_pivots(node, type); 4465 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4466 goto retry; 4467 4468 if (mas->min <= min) { 4469 pivot = mas_safe_min(mas, pivots, mas->offset); 4470 4471 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4472 goto retry; 4473 4474 if (pivot <= min) 4475 goto underflow; 4476 } 4477 4478 again: 4479 if (likely(mas->offset)) { 4480 mas->offset--; 4481 mas->last = mas->index - 1; 4482 mas->index = mas_safe_min(mas, pivots, mas->offset); 4483 } else { 4484 if (mas_prev_node(mas, min)) { 4485 mas_rewalk(mas, save_point); 4486 goto retry; 4487 } 4488 4489 if (mas_is_none(mas)) 4490 goto underflow; 4491 4492 mas->last = mas->max; 4493 node = mas_mn(mas); 4494 type = mte_node_type(mas->node); 4495 pivots = ma_pivots(node, type); 4496 mas->index = pivots[mas->offset - 1] + 1; 4497 } 4498 4499 slots = ma_slots(node, type); 4500 entry = mas_slot(mas, slots, mas->offset); 4501 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4502 goto retry; 4503 4504 if (likely(entry)) 4505 return entry; 4506 4507 if (!empty) { 4508 if (mas->index <= min) 4509 goto underflow; 4510 4511 goto again; 4512 } 4513 4514 return entry; 4515 4516 underflow: 4517 if (set_underflow) 4518 mas->node = MAS_UNDERFLOW; 4519 return NULL; 4520 } 4521 4522 /* 4523 * mas_next_node() - Get the next node at the same level in the tree. 4524 * @mas: The maple state 4525 * @max: The maximum pivot value to check. 4526 * 4527 * The next value will be mas->node[mas->offset] or MAS_NONE. 4528 * Return: 1 on dead node, 0 otherwise. 4529 */ 4530 static inline int mas_next_node(struct ma_state *mas, struct maple_node *node, 4531 unsigned long max) 4532 { 4533 unsigned long min; 4534 unsigned long *pivots; 4535 struct maple_enode *enode; 4536 int level = 0; 4537 unsigned char node_end; 4538 enum maple_type mt; 4539 void __rcu **slots; 4540 4541 if (mas->max >= max) 4542 goto no_entry; 4543 4544 min = mas->max + 1; 4545 level = 0; 4546 do { 4547 if (ma_is_root(node)) 4548 goto no_entry; 4549 4550 /* Walk up. */ 4551 if (unlikely(mas_ascend(mas))) 4552 return 1; 4553 4554 level++; 4555 node = mas_mn(mas); 4556 mt = mte_node_type(mas->node); 4557 pivots = ma_pivots(node, mt); 4558 node_end = ma_data_end(node, mt, pivots, mas->max); 4559 if (unlikely(ma_dead_node(node))) 4560 return 1; 4561 4562 } while (unlikely(mas->offset == node_end)); 4563 4564 slots = ma_slots(node, mt); 4565 mas->offset++; 4566 enode = mas_slot(mas, slots, mas->offset); 4567 if (unlikely(ma_dead_node(node))) 4568 return 1; 4569 4570 if (level > 1) 4571 mas->offset = 0; 4572 4573 while (unlikely(level > 1)) { 4574 level--; 4575 mas->node = enode; 4576 node = mas_mn(mas); 4577 mt = mte_node_type(mas->node); 4578 slots = ma_slots(node, mt); 4579 enode = mas_slot(mas, slots, 0); 4580 if (unlikely(ma_dead_node(node))) 4581 return 1; 4582 } 4583 4584 if (!mas->offset) 4585 pivots = ma_pivots(node, mt); 4586 4587 mas->max = mas_safe_pivot(mas, pivots, mas->offset, mt); 4588 if (unlikely(ma_dead_node(node))) 4589 return 1; 4590 4591 mas->node = enode; 4592 mas->min = min; 4593 return 0; 4594 4595 no_entry: 4596 if (unlikely(ma_dead_node(node))) 4597 return 1; 4598 4599 mas->node = MAS_NONE; 4600 return 0; 4601 } 4602 4603 /* 4604 * mas_next_slot() - Get the entry in the next slot 4605 * 4606 * @mas: The maple state 4607 * @max: The maximum starting range 4608 * @empty: Can be empty 4609 * @set_overflow: Should @mas->node be set to overflow when the limit is 4610 * reached. 4611 * 4612 * Return: The entry in the next slot which is possibly NULL 4613 */ 4614 static void *mas_next_slot(struct ma_state *mas, unsigned long max, bool empty, 4615 bool set_overflow) 4616 { 4617 void __rcu **slots; 4618 unsigned long *pivots; 4619 unsigned long pivot; 4620 enum maple_type type; 4621 struct maple_node *node; 4622 unsigned char data_end; 4623 unsigned long save_point = mas->last; 4624 void *entry; 4625 4626 retry: 4627 node = mas_mn(mas); 4628 type = mte_node_type(mas->node); 4629 pivots = ma_pivots(node, type); 4630 data_end = ma_data_end(node, type, pivots, mas->max); 4631 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4632 goto retry; 4633 4634 if (mas->max >= max) { 4635 if (likely(mas->offset < data_end)) 4636 pivot = pivots[mas->offset]; 4637 else 4638 goto overflow; 4639 4640 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4641 goto retry; 4642 4643 if (pivot >= max) 4644 goto overflow; 4645 } 4646 4647 if (likely(mas->offset < data_end)) { 4648 mas->index = pivots[mas->offset] + 1; 4649 again: 4650 mas->offset++; 4651 if (likely(mas->offset < data_end)) 4652 mas->last = pivots[mas->offset]; 4653 else 4654 mas->last = mas->max; 4655 } else { 4656 if (mas_next_node(mas, node, max)) { 4657 mas_rewalk(mas, save_point); 4658 goto retry; 4659 } 4660 4661 if (WARN_ON_ONCE(mas_is_none(mas))) { 4662 mas->node = MAS_OVERFLOW; 4663 return NULL; 4664 goto overflow; 4665 } 4666 4667 mas->offset = 0; 4668 mas->index = mas->min; 4669 node = mas_mn(mas); 4670 type = mte_node_type(mas->node); 4671 pivots = ma_pivots(node, type); 4672 mas->last = pivots[0]; 4673 } 4674 4675 slots = ma_slots(node, type); 4676 entry = mt_slot(mas->tree, slots, mas->offset); 4677 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4678 goto retry; 4679 4680 if (entry) 4681 return entry; 4682 4683 if (!empty) { 4684 if (mas->last >= max) 4685 goto overflow; 4686 4687 mas->index = mas->last + 1; 4688 /* Node cannot end on NULL, so it's safe to short-cut here */ 4689 goto again; 4690 } 4691 4692 return entry; 4693 4694 overflow: 4695 if (set_overflow) 4696 mas->node = MAS_OVERFLOW; 4697 return NULL; 4698 } 4699 4700 /* 4701 * mas_next_entry() - Internal function to get the next entry. 4702 * @mas: The maple state 4703 * @limit: The maximum range start. 4704 * 4705 * Set the @mas->node to the next entry and the range_start to 4706 * the beginning value for the entry. Does not check beyond @limit. 4707 * Sets @mas->index and @mas->last to the range, Does not update @mas->index and 4708 * @mas->last on overflow. 4709 * Restarts on dead nodes. 4710 * 4711 * Return: the next entry or %NULL. 4712 */ 4713 static inline void *mas_next_entry(struct ma_state *mas, unsigned long limit) 4714 { 4715 if (mas->last >= limit) { 4716 mas->node = MAS_OVERFLOW; 4717 return NULL; 4718 } 4719 4720 return mas_next_slot(mas, limit, false, true); 4721 } 4722 4723 /* 4724 * mas_rev_awalk() - Internal function. Reverse allocation walk. Find the 4725 * highest gap address of a given size in a given node and descend. 4726 * @mas: The maple state 4727 * @size: The needed size. 4728 * 4729 * Return: True if found in a leaf, false otherwise. 4730 * 4731 */ 4732 static bool mas_rev_awalk(struct ma_state *mas, unsigned long size, 4733 unsigned long *gap_min, unsigned long *gap_max) 4734 { 4735 enum maple_type type = mte_node_type(mas->node); 4736 struct maple_node *node = mas_mn(mas); 4737 unsigned long *pivots, *gaps; 4738 void __rcu **slots; 4739 unsigned long gap = 0; 4740 unsigned long max, min; 4741 unsigned char offset; 4742 4743 if (unlikely(mas_is_err(mas))) 4744 return true; 4745 4746 if (ma_is_dense(type)) { 4747 /* dense nodes. */ 4748 mas->offset = (unsigned char)(mas->index - mas->min); 4749 return true; 4750 } 4751 4752 pivots = ma_pivots(node, type); 4753 slots = ma_slots(node, type); 4754 gaps = ma_gaps(node, type); 4755 offset = mas->offset; 4756 min = mas_safe_min(mas, pivots, offset); 4757 /* Skip out of bounds. */ 4758 while (mas->last < min) 4759 min = mas_safe_min(mas, pivots, --offset); 4760 4761 max = mas_safe_pivot(mas, pivots, offset, type); 4762 while (mas->index <= max) { 4763 gap = 0; 4764 if (gaps) 4765 gap = gaps[offset]; 4766 else if (!mas_slot(mas, slots, offset)) 4767 gap = max - min + 1; 4768 4769 if (gap) { 4770 if ((size <= gap) && (size <= mas->last - min + 1)) 4771 break; 4772 4773 if (!gaps) { 4774 /* Skip the next slot, it cannot be a gap. */ 4775 if (offset < 2) 4776 goto ascend; 4777 4778 offset -= 2; 4779 max = pivots[offset]; 4780 min = mas_safe_min(mas, pivots, offset); 4781 continue; 4782 } 4783 } 4784 4785 if (!offset) 4786 goto ascend; 4787 4788 offset--; 4789 max = min - 1; 4790 min = mas_safe_min(mas, pivots, offset); 4791 } 4792 4793 if (unlikely((mas->index > max) || (size - 1 > max - mas->index))) 4794 goto no_space; 4795 4796 if (unlikely(ma_is_leaf(type))) { 4797 mas->offset = offset; 4798 *gap_min = min; 4799 *gap_max = min + gap - 1; 4800 return true; 4801 } 4802 4803 /* descend, only happens under lock. */ 4804 mas->node = mas_slot(mas, slots, offset); 4805 mas->min = min; 4806 mas->max = max; 4807 mas->offset = mas_data_end(mas); 4808 return false; 4809 4810 ascend: 4811 if (!mte_is_root(mas->node)) 4812 return false; 4813 4814 no_space: 4815 mas_set_err(mas, -EBUSY); 4816 return false; 4817 } 4818 4819 static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size) 4820 { 4821 enum maple_type type = mte_node_type(mas->node); 4822 unsigned long pivot, min, gap = 0; 4823 unsigned char offset, data_end; 4824 unsigned long *gaps, *pivots; 4825 void __rcu **slots; 4826 struct maple_node *node; 4827 bool found = false; 4828 4829 if (ma_is_dense(type)) { 4830 mas->offset = (unsigned char)(mas->index - mas->min); 4831 return true; 4832 } 4833 4834 node = mas_mn(mas); 4835 pivots = ma_pivots(node, type); 4836 slots = ma_slots(node, type); 4837 gaps = ma_gaps(node, type); 4838 offset = mas->offset; 4839 min = mas_safe_min(mas, pivots, offset); 4840 data_end = ma_data_end(node, type, pivots, mas->max); 4841 for (; offset <= data_end; offset++) { 4842 pivot = mas_safe_pivot(mas, pivots, offset, type); 4843 4844 /* Not within lower bounds */ 4845 if (mas->index > pivot) 4846 goto next_slot; 4847 4848 if (gaps) 4849 gap = gaps[offset]; 4850 else if (!mas_slot(mas, slots, offset)) 4851 gap = min(pivot, mas->last) - max(mas->index, min) + 1; 4852 else 4853 goto next_slot; 4854 4855 if (gap >= size) { 4856 if (ma_is_leaf(type)) { 4857 found = true; 4858 goto done; 4859 } 4860 if (mas->index <= pivot) { 4861 mas->node = mas_slot(mas, slots, offset); 4862 mas->min = min; 4863 mas->max = pivot; 4864 offset = 0; 4865 break; 4866 } 4867 } 4868 next_slot: 4869 min = pivot + 1; 4870 if (mas->last <= pivot) { 4871 mas_set_err(mas, -EBUSY); 4872 return true; 4873 } 4874 } 4875 4876 if (mte_is_root(mas->node)) 4877 found = true; 4878 done: 4879 mas->offset = offset; 4880 return found; 4881 } 4882 4883 /** 4884 * mas_walk() - Search for @mas->index in the tree. 4885 * @mas: The maple state. 4886 * 4887 * mas->index and mas->last will be set to the range if there is a value. If 4888 * mas->node is MAS_NONE, reset to MAS_START. 4889 * 4890 * Return: the entry at the location or %NULL. 4891 */ 4892 void *mas_walk(struct ma_state *mas) 4893 { 4894 void *entry; 4895 4896 if (!mas_is_active(mas) || !mas_is_start(mas)) 4897 mas->node = MAS_START; 4898 retry: 4899 entry = mas_state_walk(mas); 4900 if (mas_is_start(mas)) { 4901 goto retry; 4902 } else if (mas_is_none(mas)) { 4903 mas->index = 0; 4904 mas->last = ULONG_MAX; 4905 } else if (mas_is_ptr(mas)) { 4906 if (!mas->index) { 4907 mas->last = 0; 4908 return entry; 4909 } 4910 4911 mas->index = 1; 4912 mas->last = ULONG_MAX; 4913 mas->node = MAS_NONE; 4914 return NULL; 4915 } 4916 4917 return entry; 4918 } 4919 EXPORT_SYMBOL_GPL(mas_walk); 4920 4921 static inline bool mas_rewind_node(struct ma_state *mas) 4922 { 4923 unsigned char slot; 4924 4925 do { 4926 if (mte_is_root(mas->node)) { 4927 slot = mas->offset; 4928 if (!slot) 4929 return false; 4930 } else { 4931 mas_ascend(mas); 4932 slot = mas->offset; 4933 } 4934 } while (!slot); 4935 4936 mas->offset = --slot; 4937 return true; 4938 } 4939 4940 /* 4941 * mas_skip_node() - Internal function. Skip over a node. 4942 * @mas: The maple state. 4943 * 4944 * Return: true if there is another node, false otherwise. 4945 */ 4946 static inline bool mas_skip_node(struct ma_state *mas) 4947 { 4948 if (mas_is_err(mas)) 4949 return false; 4950 4951 do { 4952 if (mte_is_root(mas->node)) { 4953 if (mas->offset >= mas_data_end(mas)) { 4954 mas_set_err(mas, -EBUSY); 4955 return false; 4956 } 4957 } else { 4958 mas_ascend(mas); 4959 } 4960 } while (mas->offset >= mas_data_end(mas)); 4961 4962 mas->offset++; 4963 return true; 4964 } 4965 4966 /* 4967 * mas_awalk() - Allocation walk. Search from low address to high, for a gap of 4968 * @size 4969 * @mas: The maple state 4970 * @size: The size of the gap required 4971 * 4972 * Search between @mas->index and @mas->last for a gap of @size. 4973 */ 4974 static inline void mas_awalk(struct ma_state *mas, unsigned long size) 4975 { 4976 struct maple_enode *last = NULL; 4977 4978 /* 4979 * There are 4 options: 4980 * go to child (descend) 4981 * go back to parent (ascend) 4982 * no gap found. (return, slot == MAPLE_NODE_SLOTS) 4983 * found the gap. (return, slot != MAPLE_NODE_SLOTS) 4984 */ 4985 while (!mas_is_err(mas) && !mas_anode_descend(mas, size)) { 4986 if (last == mas->node) 4987 mas_skip_node(mas); 4988 else 4989 last = mas->node; 4990 } 4991 } 4992 4993 /* 4994 * mas_sparse_area() - Internal function. Return upper or lower limit when 4995 * searching for a gap in an empty tree. 4996 * @mas: The maple state 4997 * @min: the minimum range 4998 * @max: The maximum range 4999 * @size: The size of the gap 5000 * @fwd: Searching forward or back 5001 */ 5002 static inline int mas_sparse_area(struct ma_state *mas, unsigned long min, 5003 unsigned long max, unsigned long size, bool fwd) 5004 { 5005 if (!unlikely(mas_is_none(mas)) && min == 0) { 5006 min++; 5007 /* 5008 * At this time, min is increased, we need to recheck whether 5009 * the size is satisfied. 5010 */ 5011 if (min > max || max - min + 1 < size) 5012 return -EBUSY; 5013 } 5014 /* mas_is_ptr */ 5015 5016 if (fwd) { 5017 mas->index = min; 5018 mas->last = min + size - 1; 5019 } else { 5020 mas->last = max; 5021 mas->index = max - size + 1; 5022 } 5023 return 0; 5024 } 5025 5026 /* 5027 * mas_empty_area() - Get the lowest address within the range that is 5028 * sufficient for the size requested. 5029 * @mas: The maple state 5030 * @min: The lowest value of the range 5031 * @max: The highest value of the range 5032 * @size: The size needed 5033 */ 5034 int mas_empty_area(struct ma_state *mas, unsigned long min, 5035 unsigned long max, unsigned long size) 5036 { 5037 unsigned char offset; 5038 unsigned long *pivots; 5039 enum maple_type mt; 5040 5041 if (min > max) 5042 return -EINVAL; 5043 5044 if (size == 0 || max - min < size - 1) 5045 return -EINVAL; 5046 5047 if (mas_is_start(mas)) 5048 mas_start(mas); 5049 else if (mas->offset >= 2) 5050 mas->offset -= 2; 5051 else if (!mas_skip_node(mas)) 5052 return -EBUSY; 5053 5054 /* Empty set */ 5055 if (mas_is_none(mas) || mas_is_ptr(mas)) 5056 return mas_sparse_area(mas, min, max, size, true); 5057 5058 /* The start of the window can only be within these values */ 5059 mas->index = min; 5060 mas->last = max; 5061 mas_awalk(mas, size); 5062 5063 if (unlikely(mas_is_err(mas))) 5064 return xa_err(mas->node); 5065 5066 offset = mas->offset; 5067 if (unlikely(offset == MAPLE_NODE_SLOTS)) 5068 return -EBUSY; 5069 5070 mt = mte_node_type(mas->node); 5071 pivots = ma_pivots(mas_mn(mas), mt); 5072 min = mas_safe_min(mas, pivots, offset); 5073 if (mas->index < min) 5074 mas->index = min; 5075 mas->last = mas->index + size - 1; 5076 return 0; 5077 } 5078 EXPORT_SYMBOL_GPL(mas_empty_area); 5079 5080 /* 5081 * mas_empty_area_rev() - Get the highest address within the range that is 5082 * sufficient for the size requested. 5083 * @mas: The maple state 5084 * @min: The lowest value of the range 5085 * @max: The highest value of the range 5086 * @size: The size needed 5087 */ 5088 int mas_empty_area_rev(struct ma_state *mas, unsigned long min, 5089 unsigned long max, unsigned long size) 5090 { 5091 struct maple_enode *last = mas->node; 5092 5093 if (min > max) 5094 return -EINVAL; 5095 5096 if (size == 0 || max - min < size - 1) 5097 return -EINVAL; 5098 5099 if (mas_is_start(mas)) 5100 mas_start(mas); 5101 else if ((mas->offset < 2) && (!mas_rewind_node(mas))) 5102 return -EBUSY; 5103 5104 if (unlikely(mas_is_none(mas) || mas_is_ptr(mas))) 5105 return mas_sparse_area(mas, min, max, size, false); 5106 else if (mas->offset >= 2) 5107 mas->offset -= 2; 5108 else 5109 mas->offset = mas_data_end(mas); 5110 5111 5112 /* The start of the window can only be within these values. */ 5113 mas->index = min; 5114 mas->last = max; 5115 5116 while (!mas_rev_awalk(mas, size, &min, &max)) { 5117 if (last == mas->node) { 5118 if (!mas_rewind_node(mas)) 5119 return -EBUSY; 5120 } else { 5121 last = mas->node; 5122 } 5123 } 5124 5125 if (mas_is_err(mas)) 5126 return xa_err(mas->node); 5127 5128 if (unlikely(mas->offset == MAPLE_NODE_SLOTS)) 5129 return -EBUSY; 5130 5131 /* Trim the upper limit to the max. */ 5132 if (max < mas->last) 5133 mas->last = max; 5134 5135 mas->index = mas->last - size + 1; 5136 return 0; 5137 } 5138 EXPORT_SYMBOL_GPL(mas_empty_area_rev); 5139 5140 /* 5141 * mte_dead_leaves() - Mark all leaves of a node as dead. 5142 * @mas: The maple state 5143 * @slots: Pointer to the slot array 5144 * @type: The maple node type 5145 * 5146 * Must hold the write lock. 5147 * 5148 * Return: The number of leaves marked as dead. 5149 */ 5150 static inline 5151 unsigned char mte_dead_leaves(struct maple_enode *enode, struct maple_tree *mt, 5152 void __rcu **slots) 5153 { 5154 struct maple_node *node; 5155 enum maple_type type; 5156 void *entry; 5157 int offset; 5158 5159 for (offset = 0; offset < mt_slot_count(enode); offset++) { 5160 entry = mt_slot(mt, slots, offset); 5161 type = mte_node_type(entry); 5162 node = mte_to_node(entry); 5163 /* Use both node and type to catch LE & BE metadata */ 5164 if (!node || !type) 5165 break; 5166 5167 mte_set_node_dead(entry); 5168 node->type = type; 5169 rcu_assign_pointer(slots[offset], node); 5170 } 5171 5172 return offset; 5173 } 5174 5175 /** 5176 * mte_dead_walk() - Walk down a dead tree to just before the leaves 5177 * @enode: The maple encoded node 5178 * @offset: The starting offset 5179 * 5180 * Note: This can only be used from the RCU callback context. 5181 */ 5182 static void __rcu **mte_dead_walk(struct maple_enode **enode, unsigned char offset) 5183 { 5184 struct maple_node *node, *next; 5185 void __rcu **slots = NULL; 5186 5187 next = mte_to_node(*enode); 5188 do { 5189 *enode = ma_enode_ptr(next); 5190 node = mte_to_node(*enode); 5191 slots = ma_slots(node, node->type); 5192 next = rcu_dereference_protected(slots[offset], 5193 lock_is_held(&rcu_callback_map)); 5194 offset = 0; 5195 } while (!ma_is_leaf(next->type)); 5196 5197 return slots; 5198 } 5199 5200 /** 5201 * mt_free_walk() - Walk & free a tree in the RCU callback context 5202 * @head: The RCU head that's within the node. 5203 * 5204 * Note: This can only be used from the RCU callback context. 5205 */ 5206 static void mt_free_walk(struct rcu_head *head) 5207 { 5208 void __rcu **slots; 5209 struct maple_node *node, *start; 5210 struct maple_enode *enode; 5211 unsigned char offset; 5212 enum maple_type type; 5213 5214 node = container_of(head, struct maple_node, rcu); 5215 5216 if (ma_is_leaf(node->type)) 5217 goto free_leaf; 5218 5219 start = node; 5220 enode = mt_mk_node(node, node->type); 5221 slots = mte_dead_walk(&enode, 0); 5222 node = mte_to_node(enode); 5223 do { 5224 mt_free_bulk(node->slot_len, slots); 5225 offset = node->parent_slot + 1; 5226 enode = node->piv_parent; 5227 if (mte_to_node(enode) == node) 5228 goto free_leaf; 5229 5230 type = mte_node_type(enode); 5231 slots = ma_slots(mte_to_node(enode), type); 5232 if ((offset < mt_slots[type]) && 5233 rcu_dereference_protected(slots[offset], 5234 lock_is_held(&rcu_callback_map))) 5235 slots = mte_dead_walk(&enode, offset); 5236 node = mte_to_node(enode); 5237 } while ((node != start) || (node->slot_len < offset)); 5238 5239 slots = ma_slots(node, node->type); 5240 mt_free_bulk(node->slot_len, slots); 5241 5242 free_leaf: 5243 mt_free_rcu(&node->rcu); 5244 } 5245 5246 static inline void __rcu **mte_destroy_descend(struct maple_enode **enode, 5247 struct maple_tree *mt, struct maple_enode *prev, unsigned char offset) 5248 { 5249 struct maple_node *node; 5250 struct maple_enode *next = *enode; 5251 void __rcu **slots = NULL; 5252 enum maple_type type; 5253 unsigned char next_offset = 0; 5254 5255 do { 5256 *enode = next; 5257 node = mte_to_node(*enode); 5258 type = mte_node_type(*enode); 5259 slots = ma_slots(node, type); 5260 next = mt_slot_locked(mt, slots, next_offset); 5261 if ((mte_dead_node(next))) 5262 next = mt_slot_locked(mt, slots, ++next_offset); 5263 5264 mte_set_node_dead(*enode); 5265 node->type = type; 5266 node->piv_parent = prev; 5267 node->parent_slot = offset; 5268 offset = next_offset; 5269 next_offset = 0; 5270 prev = *enode; 5271 } while (!mte_is_leaf(next)); 5272 5273 return slots; 5274 } 5275 5276 static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, 5277 bool free) 5278 { 5279 void __rcu **slots; 5280 struct maple_node *node = mte_to_node(enode); 5281 struct maple_enode *start; 5282 5283 if (mte_is_leaf(enode)) { 5284 node->type = mte_node_type(enode); 5285 goto free_leaf; 5286 } 5287 5288 start = enode; 5289 slots = mte_destroy_descend(&enode, mt, start, 0); 5290 node = mte_to_node(enode); // Updated in the above call. 5291 do { 5292 enum maple_type type; 5293 unsigned char offset; 5294 struct maple_enode *parent, *tmp; 5295 5296 node->slot_len = mte_dead_leaves(enode, mt, slots); 5297 if (free) 5298 mt_free_bulk(node->slot_len, slots); 5299 offset = node->parent_slot + 1; 5300 enode = node->piv_parent; 5301 if (mte_to_node(enode) == node) 5302 goto free_leaf; 5303 5304 type = mte_node_type(enode); 5305 slots = ma_slots(mte_to_node(enode), type); 5306 if (offset >= mt_slots[type]) 5307 goto next; 5308 5309 tmp = mt_slot_locked(mt, slots, offset); 5310 if (mte_node_type(tmp) && mte_to_node(tmp)) { 5311 parent = enode; 5312 enode = tmp; 5313 slots = mte_destroy_descend(&enode, mt, parent, offset); 5314 } 5315 next: 5316 node = mte_to_node(enode); 5317 } while (start != enode); 5318 5319 node = mte_to_node(enode); 5320 node->slot_len = mte_dead_leaves(enode, mt, slots); 5321 if (free) 5322 mt_free_bulk(node->slot_len, slots); 5323 5324 free_leaf: 5325 if (free) 5326 mt_free_rcu(&node->rcu); 5327 else 5328 mt_clear_meta(mt, node, node->type); 5329 } 5330 5331 /* 5332 * mte_destroy_walk() - Free a tree or sub-tree. 5333 * @enode: the encoded maple node (maple_enode) to start 5334 * @mt: the tree to free - needed for node types. 5335 * 5336 * Must hold the write lock. 5337 */ 5338 static inline void mte_destroy_walk(struct maple_enode *enode, 5339 struct maple_tree *mt) 5340 { 5341 struct maple_node *node = mte_to_node(enode); 5342 5343 if (mt_in_rcu(mt)) { 5344 mt_destroy_walk(enode, mt, false); 5345 call_rcu(&node->rcu, mt_free_walk); 5346 } else { 5347 mt_destroy_walk(enode, mt, true); 5348 } 5349 } 5350 5351 static void mas_wr_store_setup(struct ma_wr_state *wr_mas) 5352 { 5353 if (!mas_is_active(wr_mas->mas)) { 5354 if (mas_is_start(wr_mas->mas)) 5355 return; 5356 5357 if (unlikely(mas_is_paused(wr_mas->mas))) 5358 goto reset; 5359 5360 if (unlikely(mas_is_none(wr_mas->mas))) 5361 goto reset; 5362 5363 if (unlikely(mas_is_overflow(wr_mas->mas))) 5364 goto reset; 5365 5366 if (unlikely(mas_is_underflow(wr_mas->mas))) 5367 goto reset; 5368 } 5369 5370 /* 5371 * A less strict version of mas_is_span_wr() where we allow spanning 5372 * writes within this node. This is to stop partial walks in 5373 * mas_prealloc() from being reset. 5374 */ 5375 if (wr_mas->mas->last > wr_mas->mas->max) 5376 goto reset; 5377 5378 if (wr_mas->entry) 5379 return; 5380 5381 if (mte_is_leaf(wr_mas->mas->node) && 5382 wr_mas->mas->last == wr_mas->mas->max) 5383 goto reset; 5384 5385 return; 5386 5387 reset: 5388 mas_reset(wr_mas->mas); 5389 } 5390 5391 /* Interface */ 5392 5393 /** 5394 * mas_store() - Store an @entry. 5395 * @mas: The maple state. 5396 * @entry: The entry to store. 5397 * 5398 * The @mas->index and @mas->last is used to set the range for the @entry. 5399 * Note: The @mas should have pre-allocated entries to ensure there is memory to 5400 * store the entry. Please see mas_expected_entries()/mas_destroy() for more details. 5401 * 5402 * Return: the first entry between mas->index and mas->last or %NULL. 5403 */ 5404 void *mas_store(struct ma_state *mas, void *entry) 5405 { 5406 MA_WR_STATE(wr_mas, mas, entry); 5407 5408 trace_ma_write(__func__, mas, 0, entry); 5409 #ifdef CONFIG_DEBUG_MAPLE_TREE 5410 if (MAS_WARN_ON(mas, mas->index > mas->last)) 5411 pr_err("Error %lX > %lX %p\n", mas->index, mas->last, entry); 5412 5413 if (mas->index > mas->last) { 5414 mas_set_err(mas, -EINVAL); 5415 return NULL; 5416 } 5417 5418 #endif 5419 5420 /* 5421 * Storing is the same operation as insert with the added caveat that it 5422 * can overwrite entries. Although this seems simple enough, one may 5423 * want to examine what happens if a single store operation was to 5424 * overwrite multiple entries within a self-balancing B-Tree. 5425 */ 5426 mas_wr_store_setup(&wr_mas); 5427 mas_wr_store_entry(&wr_mas); 5428 return wr_mas.content; 5429 } 5430 EXPORT_SYMBOL_GPL(mas_store); 5431 5432 /** 5433 * mas_store_gfp() - Store a value into the tree. 5434 * @mas: The maple state 5435 * @entry: The entry to store 5436 * @gfp: The GFP_FLAGS to use for allocations if necessary. 5437 * 5438 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 5439 * be allocated. 5440 */ 5441 int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp) 5442 { 5443 MA_WR_STATE(wr_mas, mas, entry); 5444 5445 mas_wr_store_setup(&wr_mas); 5446 trace_ma_write(__func__, mas, 0, entry); 5447 retry: 5448 mas_wr_store_entry(&wr_mas); 5449 if (unlikely(mas_nomem(mas, gfp))) 5450 goto retry; 5451 5452 if (unlikely(mas_is_err(mas))) 5453 return xa_err(mas->node); 5454 5455 return 0; 5456 } 5457 EXPORT_SYMBOL_GPL(mas_store_gfp); 5458 5459 /** 5460 * mas_store_prealloc() - Store a value into the tree using memory 5461 * preallocated in the maple state. 5462 * @mas: The maple state 5463 * @entry: The entry to store. 5464 */ 5465 void mas_store_prealloc(struct ma_state *mas, void *entry) 5466 { 5467 MA_WR_STATE(wr_mas, mas, entry); 5468 5469 mas_wr_store_setup(&wr_mas); 5470 trace_ma_write(__func__, mas, 0, entry); 5471 mas_wr_store_entry(&wr_mas); 5472 MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas)); 5473 mas_destroy(mas); 5474 } 5475 EXPORT_SYMBOL_GPL(mas_store_prealloc); 5476 5477 /** 5478 * mas_preallocate() - Preallocate enough nodes for a store operation 5479 * @mas: The maple state 5480 * @entry: The entry that will be stored 5481 * @gfp: The GFP_FLAGS to use for allocations. 5482 * 5483 * Return: 0 on success, -ENOMEM if memory could not be allocated. 5484 */ 5485 int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp) 5486 { 5487 MA_WR_STATE(wr_mas, mas, entry); 5488 unsigned char node_size; 5489 int request = 1; 5490 int ret; 5491 5492 5493 if (unlikely(!mas->index && mas->last == ULONG_MAX)) 5494 goto ask_now; 5495 5496 mas_wr_store_setup(&wr_mas); 5497 wr_mas.content = mas_start(mas); 5498 /* Root expand */ 5499 if (unlikely(mas_is_none(mas) || mas_is_ptr(mas))) 5500 goto ask_now; 5501 5502 if (unlikely(!mas_wr_walk(&wr_mas))) { 5503 /* Spanning store, use worst case for now */ 5504 request = 1 + mas_mt_height(mas) * 3; 5505 goto ask_now; 5506 } 5507 5508 /* At this point, we are at the leaf node that needs to be altered. */ 5509 /* Exact fit, no nodes needed. */ 5510 if (wr_mas.r_min == mas->index && wr_mas.r_max == mas->last) 5511 return 0; 5512 5513 mas_wr_end_piv(&wr_mas); 5514 node_size = mas_wr_new_end(&wr_mas); 5515 5516 /* Slot store, does not require additional nodes */ 5517 if (node_size == wr_mas.node_end) { 5518 /* reuse node */ 5519 if (!mt_in_rcu(mas->tree)) 5520 return 0; 5521 /* shifting boundary */ 5522 if (wr_mas.offset_end - mas->offset == 1) 5523 return 0; 5524 } 5525 5526 if (node_size >= mt_slots[wr_mas.type]) { 5527 /* Split, worst case for now. */ 5528 request = 1 + mas_mt_height(mas) * 2; 5529 goto ask_now; 5530 } 5531 5532 /* New root needs a singe node */ 5533 if (unlikely(mte_is_root(mas->node))) 5534 goto ask_now; 5535 5536 /* Potential spanning rebalance collapsing a node, use worst-case */ 5537 if (node_size - 1 <= mt_min_slots[wr_mas.type]) 5538 request = mas_mt_height(mas) * 2 - 1; 5539 5540 /* node store, slot store needs one node */ 5541 ask_now: 5542 mas_node_count_gfp(mas, request, gfp); 5543 mas->mas_flags |= MA_STATE_PREALLOC; 5544 if (likely(!mas_is_err(mas))) 5545 return 0; 5546 5547 mas_set_alloc_req(mas, 0); 5548 ret = xa_err(mas->node); 5549 mas_reset(mas); 5550 mas_destroy(mas); 5551 mas_reset(mas); 5552 return ret; 5553 } 5554 EXPORT_SYMBOL_GPL(mas_preallocate); 5555 5556 /* 5557 * mas_destroy() - destroy a maple state. 5558 * @mas: The maple state 5559 * 5560 * Upon completion, check the left-most node and rebalance against the node to 5561 * the right if necessary. Frees any allocated nodes associated with this maple 5562 * state. 5563 */ 5564 void mas_destroy(struct ma_state *mas) 5565 { 5566 struct maple_alloc *node; 5567 unsigned long total; 5568 5569 /* 5570 * When using mas_for_each() to insert an expected number of elements, 5571 * it is possible that the number inserted is less than the expected 5572 * number. To fix an invalid final node, a check is performed here to 5573 * rebalance the previous node with the final node. 5574 */ 5575 if (mas->mas_flags & MA_STATE_REBALANCE) { 5576 unsigned char end; 5577 5578 mas_start(mas); 5579 mtree_range_walk(mas); 5580 end = mas_data_end(mas) + 1; 5581 if (end < mt_min_slot_count(mas->node) - 1) 5582 mas_destroy_rebalance(mas, end); 5583 5584 mas->mas_flags &= ~MA_STATE_REBALANCE; 5585 } 5586 mas->mas_flags &= ~(MA_STATE_BULK|MA_STATE_PREALLOC); 5587 5588 total = mas_allocated(mas); 5589 while (total) { 5590 node = mas->alloc; 5591 mas->alloc = node->slot[0]; 5592 if (node->node_count > 1) { 5593 size_t count = node->node_count - 1; 5594 5595 mt_free_bulk(count, (void __rcu **)&node->slot[1]); 5596 total -= count; 5597 } 5598 kmem_cache_free(maple_node_cache, node); 5599 total--; 5600 } 5601 5602 mas->alloc = NULL; 5603 } 5604 EXPORT_SYMBOL_GPL(mas_destroy); 5605 5606 /* 5607 * mas_expected_entries() - Set the expected number of entries that will be inserted. 5608 * @mas: The maple state 5609 * @nr_entries: The number of expected entries. 5610 * 5611 * This will attempt to pre-allocate enough nodes to store the expected number 5612 * of entries. The allocations will occur using the bulk allocator interface 5613 * for speed. Please call mas_destroy() on the @mas after inserting the entries 5614 * to ensure any unused nodes are freed. 5615 * 5616 * Return: 0 on success, -ENOMEM if memory could not be allocated. 5617 */ 5618 int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries) 5619 { 5620 int nonleaf_cap = MAPLE_ARANGE64_SLOTS - 2; 5621 struct maple_enode *enode = mas->node; 5622 int nr_nodes; 5623 int ret; 5624 5625 /* 5626 * Sometimes it is necessary to duplicate a tree to a new tree, such as 5627 * forking a process and duplicating the VMAs from one tree to a new 5628 * tree. When such a situation arises, it is known that the new tree is 5629 * not going to be used until the entire tree is populated. For 5630 * performance reasons, it is best to use a bulk load with RCU disabled. 5631 * This allows for optimistic splitting that favours the left and reuse 5632 * of nodes during the operation. 5633 */ 5634 5635 /* Optimize splitting for bulk insert in-order */ 5636 mas->mas_flags |= MA_STATE_BULK; 5637 5638 /* 5639 * Avoid overflow, assume a gap between each entry and a trailing null. 5640 * If this is wrong, it just means allocation can happen during 5641 * insertion of entries. 5642 */ 5643 nr_nodes = max(nr_entries, nr_entries * 2 + 1); 5644 if (!mt_is_alloc(mas->tree)) 5645 nonleaf_cap = MAPLE_RANGE64_SLOTS - 2; 5646 5647 /* Leaves; reduce slots to keep space for expansion */ 5648 nr_nodes = DIV_ROUND_UP(nr_nodes, MAPLE_RANGE64_SLOTS - 2); 5649 /* Internal nodes */ 5650 nr_nodes += DIV_ROUND_UP(nr_nodes, nonleaf_cap); 5651 /* Add working room for split (2 nodes) + new parents */ 5652 mas_node_count_gfp(mas, nr_nodes + 3, GFP_KERNEL); 5653 5654 /* Detect if allocations run out */ 5655 mas->mas_flags |= MA_STATE_PREALLOC; 5656 5657 if (!mas_is_err(mas)) 5658 return 0; 5659 5660 ret = xa_err(mas->node); 5661 mas->node = enode; 5662 mas_destroy(mas); 5663 return ret; 5664 5665 } 5666 EXPORT_SYMBOL_GPL(mas_expected_entries); 5667 5668 static inline bool mas_next_setup(struct ma_state *mas, unsigned long max, 5669 void **entry) 5670 { 5671 bool was_none = mas_is_none(mas); 5672 5673 if (unlikely(mas->last >= max)) { 5674 mas->node = MAS_OVERFLOW; 5675 return true; 5676 } 5677 5678 if (mas_is_active(mas)) 5679 return false; 5680 5681 if (mas_is_none(mas) || mas_is_paused(mas)) { 5682 mas->node = MAS_START; 5683 } else if (mas_is_overflow(mas)) { 5684 /* Overflowed before, but the max changed */ 5685 mas->node = MAS_START; 5686 } else if (mas_is_underflow(mas)) { 5687 mas->node = MAS_START; 5688 *entry = mas_walk(mas); 5689 if (*entry) 5690 return true; 5691 } 5692 5693 if (mas_is_start(mas)) 5694 *entry = mas_walk(mas); /* Retries on dead nodes handled by mas_walk */ 5695 5696 if (mas_is_ptr(mas)) { 5697 *entry = NULL; 5698 if (was_none && mas->index == 0) { 5699 mas->index = mas->last = 0; 5700 return true; 5701 } 5702 mas->index = 1; 5703 mas->last = ULONG_MAX; 5704 mas->node = MAS_NONE; 5705 return true; 5706 } 5707 5708 if (mas_is_none(mas)) 5709 return true; 5710 5711 return false; 5712 } 5713 5714 /** 5715 * mas_next() - Get the next entry. 5716 * @mas: The maple state 5717 * @max: The maximum index to check. 5718 * 5719 * Returns the next entry after @mas->index. 5720 * Must hold rcu_read_lock or the write lock. 5721 * Can return the zero entry. 5722 * 5723 * Return: The next entry or %NULL 5724 */ 5725 void *mas_next(struct ma_state *mas, unsigned long max) 5726 { 5727 void *entry = NULL; 5728 5729 if (mas_next_setup(mas, max, &entry)) 5730 return entry; 5731 5732 /* Retries on dead nodes handled by mas_next_slot */ 5733 return mas_next_slot(mas, max, false, true); 5734 } 5735 EXPORT_SYMBOL_GPL(mas_next); 5736 5737 /** 5738 * mas_next_range() - Advance the maple state to the next range 5739 * @mas: The maple state 5740 * @max: The maximum index to check. 5741 * 5742 * Sets @mas->index and @mas->last to the range. 5743 * Must hold rcu_read_lock or the write lock. 5744 * Can return the zero entry. 5745 * 5746 * Return: The next entry or %NULL 5747 */ 5748 void *mas_next_range(struct ma_state *mas, unsigned long max) 5749 { 5750 void *entry = NULL; 5751 5752 if (mas_next_setup(mas, max, &entry)) 5753 return entry; 5754 5755 /* Retries on dead nodes handled by mas_next_slot */ 5756 return mas_next_slot(mas, max, true, true); 5757 } 5758 EXPORT_SYMBOL_GPL(mas_next_range); 5759 5760 /** 5761 * mt_next() - get the next value in the maple tree 5762 * @mt: The maple tree 5763 * @index: The start index 5764 * @max: The maximum index to check 5765 * 5766 * Takes RCU read lock internally to protect the search, which does not 5767 * protect the returned pointer after dropping RCU read lock. 5768 * See also: Documentation/core-api/maple_tree.rst 5769 * 5770 * Return: The entry higher than @index or %NULL if nothing is found. 5771 */ 5772 void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max) 5773 { 5774 void *entry = NULL; 5775 MA_STATE(mas, mt, index, index); 5776 5777 rcu_read_lock(); 5778 entry = mas_next(&mas, max); 5779 rcu_read_unlock(); 5780 return entry; 5781 } 5782 EXPORT_SYMBOL_GPL(mt_next); 5783 5784 static inline bool mas_prev_setup(struct ma_state *mas, unsigned long min, 5785 void **entry) 5786 { 5787 if (unlikely(mas->index <= min)) { 5788 mas->node = MAS_UNDERFLOW; 5789 return true; 5790 } 5791 5792 if (mas_is_active(mas)) 5793 return false; 5794 5795 if (mas_is_overflow(mas)) { 5796 mas->node = MAS_START; 5797 *entry = mas_walk(mas); 5798 if (*entry) 5799 return true; 5800 } 5801 5802 if (mas_is_none(mas) || mas_is_paused(mas)) { 5803 mas->node = MAS_START; 5804 } else if (mas_is_underflow(mas)) { 5805 /* underflowed before but the min changed */ 5806 mas->node = MAS_START; 5807 } 5808 5809 if (mas_is_start(mas)) 5810 mas_walk(mas); 5811 5812 if (unlikely(mas_is_ptr(mas))) { 5813 if (!mas->index) 5814 goto none; 5815 mas->index = mas->last = 0; 5816 *entry = mas_root(mas); 5817 return true; 5818 } 5819 5820 if (mas_is_none(mas)) { 5821 if (mas->index) { 5822 /* Walked to out-of-range pointer? */ 5823 mas->index = mas->last = 0; 5824 mas->node = MAS_ROOT; 5825 *entry = mas_root(mas); 5826 return true; 5827 } 5828 return true; 5829 } 5830 5831 return false; 5832 5833 none: 5834 mas->node = MAS_NONE; 5835 return true; 5836 } 5837 5838 /** 5839 * mas_prev() - Get the previous entry 5840 * @mas: The maple state 5841 * @min: The minimum value to check. 5842 * 5843 * Must hold rcu_read_lock or the write lock. 5844 * Will reset mas to MAS_START if the node is MAS_NONE. Will stop on not 5845 * searchable nodes. 5846 * 5847 * Return: the previous value or %NULL. 5848 */ 5849 void *mas_prev(struct ma_state *mas, unsigned long min) 5850 { 5851 void *entry = NULL; 5852 5853 if (mas_prev_setup(mas, min, &entry)) 5854 return entry; 5855 5856 return mas_prev_slot(mas, min, false, true); 5857 } 5858 EXPORT_SYMBOL_GPL(mas_prev); 5859 5860 /** 5861 * mas_prev_range() - Advance to the previous range 5862 * @mas: The maple state 5863 * @min: The minimum value to check. 5864 * 5865 * Sets @mas->index and @mas->last to the range. 5866 * Must hold rcu_read_lock or the write lock. 5867 * Will reset mas to MAS_START if the node is MAS_NONE. Will stop on not 5868 * searchable nodes. 5869 * 5870 * Return: the previous value or %NULL. 5871 */ 5872 void *mas_prev_range(struct ma_state *mas, unsigned long min) 5873 { 5874 void *entry = NULL; 5875 5876 if (mas_prev_setup(mas, min, &entry)) 5877 return entry; 5878 5879 return mas_prev_slot(mas, min, true, true); 5880 } 5881 EXPORT_SYMBOL_GPL(mas_prev_range); 5882 5883 /** 5884 * mt_prev() - get the previous value in the maple tree 5885 * @mt: The maple tree 5886 * @index: The start index 5887 * @min: The minimum index to check 5888 * 5889 * Takes RCU read lock internally to protect the search, which does not 5890 * protect the returned pointer after dropping RCU read lock. 5891 * See also: Documentation/core-api/maple_tree.rst 5892 * 5893 * Return: The entry before @index or %NULL if nothing is found. 5894 */ 5895 void *mt_prev(struct maple_tree *mt, unsigned long index, unsigned long min) 5896 { 5897 void *entry = NULL; 5898 MA_STATE(mas, mt, index, index); 5899 5900 rcu_read_lock(); 5901 entry = mas_prev(&mas, min); 5902 rcu_read_unlock(); 5903 return entry; 5904 } 5905 EXPORT_SYMBOL_GPL(mt_prev); 5906 5907 /** 5908 * mas_pause() - Pause a mas_find/mas_for_each to drop the lock. 5909 * @mas: The maple state to pause 5910 * 5911 * Some users need to pause a walk and drop the lock they're holding in 5912 * order to yield to a higher priority thread or carry out an operation 5913 * on an entry. Those users should call this function before they drop 5914 * the lock. It resets the @mas to be suitable for the next iteration 5915 * of the loop after the user has reacquired the lock. If most entries 5916 * found during a walk require you to call mas_pause(), the mt_for_each() 5917 * iterator may be more appropriate. 5918 * 5919 */ 5920 void mas_pause(struct ma_state *mas) 5921 { 5922 mas->node = MAS_PAUSE; 5923 } 5924 EXPORT_SYMBOL_GPL(mas_pause); 5925 5926 /** 5927 * mas_find_setup() - Internal function to set up mas_find*(). 5928 * @mas: The maple state 5929 * @max: The maximum index 5930 * @entry: Pointer to the entry 5931 * 5932 * Returns: True if entry is the answer, false otherwise. 5933 */ 5934 static inline bool mas_find_setup(struct ma_state *mas, unsigned long max, 5935 void **entry) 5936 { 5937 if (mas_is_active(mas)) { 5938 if (mas->last < max) 5939 return false; 5940 5941 return true; 5942 } 5943 5944 if (mas_is_paused(mas)) { 5945 if (unlikely(mas->last >= max)) 5946 return true; 5947 5948 mas->index = ++mas->last; 5949 mas->node = MAS_START; 5950 } else if (mas_is_none(mas)) { 5951 if (unlikely(mas->last >= max)) 5952 return true; 5953 5954 mas->index = mas->last; 5955 mas->node = MAS_START; 5956 } else if (mas_is_overflow(mas) || mas_is_underflow(mas)) { 5957 if (mas->index > max) { 5958 mas->node = MAS_OVERFLOW; 5959 return true; 5960 } 5961 5962 mas->node = MAS_START; 5963 } 5964 5965 if (mas_is_start(mas)) { 5966 /* First run or continue */ 5967 if (mas->index > max) 5968 return true; 5969 5970 *entry = mas_walk(mas); 5971 if (*entry) 5972 return true; 5973 5974 } 5975 5976 if (unlikely(!mas_searchable(mas))) { 5977 if (unlikely(mas_is_ptr(mas))) 5978 goto ptr_out_of_range; 5979 5980 return true; 5981 } 5982 5983 if (mas->index == max) 5984 return true; 5985 5986 return false; 5987 5988 ptr_out_of_range: 5989 mas->node = MAS_NONE; 5990 mas->index = 1; 5991 mas->last = ULONG_MAX; 5992 return true; 5993 } 5994 5995 /** 5996 * mas_find() - On the first call, find the entry at or after mas->index up to 5997 * %max. Otherwise, find the entry after mas->index. 5998 * @mas: The maple state 5999 * @max: The maximum value to check. 6000 * 6001 * Must hold rcu_read_lock or the write lock. 6002 * If an entry exists, last and index are updated accordingly. 6003 * May set @mas->node to MAS_NONE. 6004 * 6005 * Return: The entry or %NULL. 6006 */ 6007 void *mas_find(struct ma_state *mas, unsigned long max) 6008 { 6009 void *entry = NULL; 6010 6011 if (mas_find_setup(mas, max, &entry)) 6012 return entry; 6013 6014 /* Retries on dead nodes handled by mas_next_slot */ 6015 return mas_next_slot(mas, max, false, false); 6016 } 6017 EXPORT_SYMBOL_GPL(mas_find); 6018 6019 /** 6020 * mas_find_range() - On the first call, find the entry at or after 6021 * mas->index up to %max. Otherwise, advance to the next slot mas->index. 6022 * @mas: The maple state 6023 * @max: The maximum value to check. 6024 * 6025 * Must hold rcu_read_lock or the write lock. 6026 * If an entry exists, last and index are updated accordingly. 6027 * May set @mas->node to MAS_NONE. 6028 * 6029 * Return: The entry or %NULL. 6030 */ 6031 void *mas_find_range(struct ma_state *mas, unsigned long max) 6032 { 6033 void *entry = NULL; 6034 6035 if (mas_find_setup(mas, max, &entry)) 6036 return entry; 6037 6038 /* Retries on dead nodes handled by mas_next_slot */ 6039 return mas_next_slot(mas, max, true, false); 6040 } 6041 EXPORT_SYMBOL_GPL(mas_find_range); 6042 6043 /** 6044 * mas_find_rev_setup() - Internal function to set up mas_find_*_rev() 6045 * @mas: The maple state 6046 * @min: The minimum index 6047 * @entry: Pointer to the entry 6048 * 6049 * Returns: True if entry is the answer, false otherwise. 6050 */ 6051 static inline bool mas_find_rev_setup(struct ma_state *mas, unsigned long min, 6052 void **entry) 6053 { 6054 if (mas_is_active(mas)) { 6055 if (mas->index > min) 6056 return false; 6057 6058 return true; 6059 } 6060 6061 if (mas_is_paused(mas)) { 6062 if (unlikely(mas->index <= min)) { 6063 mas->node = MAS_NONE; 6064 return true; 6065 } 6066 mas->node = MAS_START; 6067 mas->last = --mas->index; 6068 } else if (mas_is_none(mas)) { 6069 if (mas->index <= min) 6070 goto none; 6071 6072 mas->last = mas->index; 6073 mas->node = MAS_START; 6074 } else if (mas_is_underflow(mas) || mas_is_overflow(mas)) { 6075 if (mas->last <= min) { 6076 mas->node = MAS_UNDERFLOW; 6077 return true; 6078 } 6079 6080 mas->node = MAS_START; 6081 } 6082 6083 if (mas_is_start(mas)) { 6084 /* First run or continue */ 6085 if (mas->index < min) 6086 return true; 6087 6088 *entry = mas_walk(mas); 6089 if (*entry) 6090 return true; 6091 } 6092 6093 if (unlikely(!mas_searchable(mas))) { 6094 if (mas_is_ptr(mas)) 6095 goto none; 6096 6097 if (mas_is_none(mas)) { 6098 /* 6099 * Walked to the location, and there was nothing so the 6100 * previous location is 0. 6101 */ 6102 mas->last = mas->index = 0; 6103 mas->node = MAS_ROOT; 6104 *entry = mas_root(mas); 6105 return true; 6106 } 6107 } 6108 6109 if (mas->index < min) 6110 return true; 6111 6112 return false; 6113 6114 none: 6115 mas->node = MAS_NONE; 6116 return true; 6117 } 6118 6119 /** 6120 * mas_find_rev: On the first call, find the first non-null entry at or below 6121 * mas->index down to %min. Otherwise find the first non-null entry below 6122 * mas->index down to %min. 6123 * @mas: The maple state 6124 * @min: The minimum value to check. 6125 * 6126 * Must hold rcu_read_lock or the write lock. 6127 * If an entry exists, last and index are updated accordingly. 6128 * May set @mas->node to MAS_NONE. 6129 * 6130 * Return: The entry or %NULL. 6131 */ 6132 void *mas_find_rev(struct ma_state *mas, unsigned long min) 6133 { 6134 void *entry = NULL; 6135 6136 if (mas_find_rev_setup(mas, min, &entry)) 6137 return entry; 6138 6139 /* Retries on dead nodes handled by mas_prev_slot */ 6140 return mas_prev_slot(mas, min, false, false); 6141 6142 } 6143 EXPORT_SYMBOL_GPL(mas_find_rev); 6144 6145 /** 6146 * mas_find_range_rev: On the first call, find the first non-null entry at or 6147 * below mas->index down to %min. Otherwise advance to the previous slot after 6148 * mas->index down to %min. 6149 * @mas: The maple state 6150 * @min: The minimum value to check. 6151 * 6152 * Must hold rcu_read_lock or the write lock. 6153 * If an entry exists, last and index are updated accordingly. 6154 * May set @mas->node to MAS_NONE. 6155 * 6156 * Return: The entry or %NULL. 6157 */ 6158 void *mas_find_range_rev(struct ma_state *mas, unsigned long min) 6159 { 6160 void *entry = NULL; 6161 6162 if (mas_find_rev_setup(mas, min, &entry)) 6163 return entry; 6164 6165 /* Retries on dead nodes handled by mas_prev_slot */ 6166 return mas_prev_slot(mas, min, true, false); 6167 } 6168 EXPORT_SYMBOL_GPL(mas_find_range_rev); 6169 6170 /** 6171 * mas_erase() - Find the range in which index resides and erase the entire 6172 * range. 6173 * @mas: The maple state 6174 * 6175 * Must hold the write lock. 6176 * Searches for @mas->index, sets @mas->index and @mas->last to the range and 6177 * erases that range. 6178 * 6179 * Return: the entry that was erased or %NULL, @mas->index and @mas->last are updated. 6180 */ 6181 void *mas_erase(struct ma_state *mas) 6182 { 6183 void *entry; 6184 MA_WR_STATE(wr_mas, mas, NULL); 6185 6186 if (mas_is_none(mas) || mas_is_paused(mas)) 6187 mas->node = MAS_START; 6188 6189 /* Retry unnecessary when holding the write lock. */ 6190 entry = mas_state_walk(mas); 6191 if (!entry) 6192 return NULL; 6193 6194 write_retry: 6195 /* Must reset to ensure spanning writes of last slot are detected */ 6196 mas_reset(mas); 6197 mas_wr_store_setup(&wr_mas); 6198 mas_wr_store_entry(&wr_mas); 6199 if (mas_nomem(mas, GFP_KERNEL)) 6200 goto write_retry; 6201 6202 return entry; 6203 } 6204 EXPORT_SYMBOL_GPL(mas_erase); 6205 6206 /** 6207 * mas_nomem() - Check if there was an error allocating and do the allocation 6208 * if necessary If there are allocations, then free them. 6209 * @mas: The maple state 6210 * @gfp: The GFP_FLAGS to use for allocations 6211 * Return: true on allocation, false otherwise. 6212 */ 6213 bool mas_nomem(struct ma_state *mas, gfp_t gfp) 6214 __must_hold(mas->tree->ma_lock) 6215 { 6216 if (likely(mas->node != MA_ERROR(-ENOMEM))) { 6217 mas_destroy(mas); 6218 return false; 6219 } 6220 6221 if (gfpflags_allow_blocking(gfp) && !mt_external_lock(mas->tree)) { 6222 mtree_unlock(mas->tree); 6223 mas_alloc_nodes(mas, gfp); 6224 mtree_lock(mas->tree); 6225 } else { 6226 mas_alloc_nodes(mas, gfp); 6227 } 6228 6229 if (!mas_allocated(mas)) 6230 return false; 6231 6232 mas->node = MAS_START; 6233 return true; 6234 } 6235 6236 void __init maple_tree_init(void) 6237 { 6238 maple_node_cache = kmem_cache_create("maple_node", 6239 sizeof(struct maple_node), sizeof(struct maple_node), 6240 SLAB_PANIC, NULL); 6241 } 6242 6243 /** 6244 * mtree_load() - Load a value stored in a maple tree 6245 * @mt: The maple tree 6246 * @index: The index to load 6247 * 6248 * Return: the entry or %NULL 6249 */ 6250 void *mtree_load(struct maple_tree *mt, unsigned long index) 6251 { 6252 MA_STATE(mas, mt, index, index); 6253 void *entry; 6254 6255 trace_ma_read(__func__, &mas); 6256 rcu_read_lock(); 6257 retry: 6258 entry = mas_start(&mas); 6259 if (unlikely(mas_is_none(&mas))) 6260 goto unlock; 6261 6262 if (unlikely(mas_is_ptr(&mas))) { 6263 if (index) 6264 entry = NULL; 6265 6266 goto unlock; 6267 } 6268 6269 entry = mtree_lookup_walk(&mas); 6270 if (!entry && unlikely(mas_is_start(&mas))) 6271 goto retry; 6272 unlock: 6273 rcu_read_unlock(); 6274 if (xa_is_zero(entry)) 6275 return NULL; 6276 6277 return entry; 6278 } 6279 EXPORT_SYMBOL(mtree_load); 6280 6281 /** 6282 * mtree_store_range() - Store an entry at a given range. 6283 * @mt: The maple tree 6284 * @index: The start of the range 6285 * @last: The end of the range 6286 * @entry: The entry to store 6287 * @gfp: The GFP_FLAGS to use for allocations 6288 * 6289 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 6290 * be allocated. 6291 */ 6292 int mtree_store_range(struct maple_tree *mt, unsigned long index, 6293 unsigned long last, void *entry, gfp_t gfp) 6294 { 6295 MA_STATE(mas, mt, index, last); 6296 MA_WR_STATE(wr_mas, &mas, entry); 6297 6298 trace_ma_write(__func__, &mas, 0, entry); 6299 if (WARN_ON_ONCE(xa_is_advanced(entry))) 6300 return -EINVAL; 6301 6302 if (index > last) 6303 return -EINVAL; 6304 6305 mtree_lock(mt); 6306 retry: 6307 mas_wr_store_entry(&wr_mas); 6308 if (mas_nomem(&mas, gfp)) 6309 goto retry; 6310 6311 mtree_unlock(mt); 6312 if (mas_is_err(&mas)) 6313 return xa_err(mas.node); 6314 6315 return 0; 6316 } 6317 EXPORT_SYMBOL(mtree_store_range); 6318 6319 /** 6320 * mtree_store() - Store an entry at a given index. 6321 * @mt: The maple tree 6322 * @index: The index to store the value 6323 * @entry: The entry to store 6324 * @gfp: The GFP_FLAGS to use for allocations 6325 * 6326 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 6327 * be allocated. 6328 */ 6329 int mtree_store(struct maple_tree *mt, unsigned long index, void *entry, 6330 gfp_t gfp) 6331 { 6332 return mtree_store_range(mt, index, index, entry, gfp); 6333 } 6334 EXPORT_SYMBOL(mtree_store); 6335 6336 /** 6337 * mtree_insert_range() - Insert an entry at a given range if there is no value. 6338 * @mt: The maple tree 6339 * @first: The start of the range 6340 * @last: The end of the range 6341 * @entry: The entry to store 6342 * @gfp: The GFP_FLAGS to use for allocations. 6343 * 6344 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid 6345 * request, -ENOMEM if memory could not be allocated. 6346 */ 6347 int mtree_insert_range(struct maple_tree *mt, unsigned long first, 6348 unsigned long last, void *entry, gfp_t gfp) 6349 { 6350 MA_STATE(ms, mt, first, last); 6351 6352 if (WARN_ON_ONCE(xa_is_advanced(entry))) 6353 return -EINVAL; 6354 6355 if (first > last) 6356 return -EINVAL; 6357 6358 mtree_lock(mt); 6359 retry: 6360 mas_insert(&ms, entry); 6361 if (mas_nomem(&ms, gfp)) 6362 goto retry; 6363 6364 mtree_unlock(mt); 6365 if (mas_is_err(&ms)) 6366 return xa_err(ms.node); 6367 6368 return 0; 6369 } 6370 EXPORT_SYMBOL(mtree_insert_range); 6371 6372 /** 6373 * mtree_insert() - Insert an entry at a given index if there is no value. 6374 * @mt: The maple tree 6375 * @index : The index to store the value 6376 * @entry: The entry to store 6377 * @gfp: The GFP_FLAGS to use for allocations. 6378 * 6379 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid 6380 * request, -ENOMEM if memory could not be allocated. 6381 */ 6382 int mtree_insert(struct maple_tree *mt, unsigned long index, void *entry, 6383 gfp_t gfp) 6384 { 6385 return mtree_insert_range(mt, index, index, entry, gfp); 6386 } 6387 EXPORT_SYMBOL(mtree_insert); 6388 6389 int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp, 6390 void *entry, unsigned long size, unsigned long min, 6391 unsigned long max, gfp_t gfp) 6392 { 6393 int ret = 0; 6394 6395 MA_STATE(mas, mt, 0, 0); 6396 if (!mt_is_alloc(mt)) 6397 return -EINVAL; 6398 6399 if (WARN_ON_ONCE(mt_is_reserved(entry))) 6400 return -EINVAL; 6401 6402 mtree_lock(mt); 6403 retry: 6404 ret = mas_empty_area(&mas, min, max, size); 6405 if (ret) 6406 goto unlock; 6407 6408 mas_insert(&mas, entry); 6409 /* 6410 * mas_nomem() may release the lock, causing the allocated area 6411 * to be unavailable, so try to allocate a free area again. 6412 */ 6413 if (mas_nomem(&mas, gfp)) 6414 goto retry; 6415 6416 if (mas_is_err(&mas)) 6417 ret = xa_err(mas.node); 6418 else 6419 *startp = mas.index; 6420 6421 unlock: 6422 mtree_unlock(mt); 6423 return ret; 6424 } 6425 EXPORT_SYMBOL(mtree_alloc_range); 6426 6427 int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp, 6428 void *entry, unsigned long size, unsigned long min, 6429 unsigned long max, gfp_t gfp) 6430 { 6431 int ret = 0; 6432 6433 MA_STATE(mas, mt, 0, 0); 6434 if (!mt_is_alloc(mt)) 6435 return -EINVAL; 6436 6437 if (WARN_ON_ONCE(mt_is_reserved(entry))) 6438 return -EINVAL; 6439 6440 mtree_lock(mt); 6441 retry: 6442 ret = mas_empty_area_rev(&mas, min, max, size); 6443 if (ret) 6444 goto unlock; 6445 6446 mas_insert(&mas, entry); 6447 /* 6448 * mas_nomem() may release the lock, causing the allocated area 6449 * to be unavailable, so try to allocate a free area again. 6450 */ 6451 if (mas_nomem(&mas, gfp)) 6452 goto retry; 6453 6454 if (mas_is_err(&mas)) 6455 ret = xa_err(mas.node); 6456 else 6457 *startp = mas.index; 6458 6459 unlock: 6460 mtree_unlock(mt); 6461 return ret; 6462 } 6463 EXPORT_SYMBOL(mtree_alloc_rrange); 6464 6465 /** 6466 * mtree_erase() - Find an index and erase the entire range. 6467 * @mt: The maple tree 6468 * @index: The index to erase 6469 * 6470 * Erasing is the same as a walk to an entry then a store of a NULL to that 6471 * ENTIRE range. In fact, it is implemented as such using the advanced API. 6472 * 6473 * Return: The entry stored at the @index or %NULL 6474 */ 6475 void *mtree_erase(struct maple_tree *mt, unsigned long index) 6476 { 6477 void *entry = NULL; 6478 6479 MA_STATE(mas, mt, index, index); 6480 trace_ma_op(__func__, &mas); 6481 6482 mtree_lock(mt); 6483 entry = mas_erase(&mas); 6484 mtree_unlock(mt); 6485 6486 return entry; 6487 } 6488 EXPORT_SYMBOL(mtree_erase); 6489 6490 /** 6491 * __mt_destroy() - Walk and free all nodes of a locked maple tree. 6492 * @mt: The maple tree 6493 * 6494 * Note: Does not handle locking. 6495 */ 6496 void __mt_destroy(struct maple_tree *mt) 6497 { 6498 void *root = mt_root_locked(mt); 6499 6500 rcu_assign_pointer(mt->ma_root, NULL); 6501 if (xa_is_node(root)) 6502 mte_destroy_walk(root, mt); 6503 6504 mt->ma_flags = 0; 6505 } 6506 EXPORT_SYMBOL_GPL(__mt_destroy); 6507 6508 /** 6509 * mtree_destroy() - Destroy a maple tree 6510 * @mt: The maple tree 6511 * 6512 * Frees all resources used by the tree. Handles locking. 6513 */ 6514 void mtree_destroy(struct maple_tree *mt) 6515 { 6516 mtree_lock(mt); 6517 __mt_destroy(mt); 6518 mtree_unlock(mt); 6519 } 6520 EXPORT_SYMBOL(mtree_destroy); 6521 6522 /** 6523 * mt_find() - Search from the start up until an entry is found. 6524 * @mt: The maple tree 6525 * @index: Pointer which contains the start location of the search 6526 * @max: The maximum value of the search range 6527 * 6528 * Takes RCU read lock internally to protect the search, which does not 6529 * protect the returned pointer after dropping RCU read lock. 6530 * See also: Documentation/core-api/maple_tree.rst 6531 * 6532 * In case that an entry is found @index is updated to point to the next 6533 * possible entry independent whether the found entry is occupying a 6534 * single index or a range if indices. 6535 * 6536 * Return: The entry at or after the @index or %NULL 6537 */ 6538 void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max) 6539 { 6540 MA_STATE(mas, mt, *index, *index); 6541 void *entry; 6542 #ifdef CONFIG_DEBUG_MAPLE_TREE 6543 unsigned long copy = *index; 6544 #endif 6545 6546 trace_ma_read(__func__, &mas); 6547 6548 if ((*index) > max) 6549 return NULL; 6550 6551 rcu_read_lock(); 6552 retry: 6553 entry = mas_state_walk(&mas); 6554 if (mas_is_start(&mas)) 6555 goto retry; 6556 6557 if (unlikely(xa_is_zero(entry))) 6558 entry = NULL; 6559 6560 if (entry) 6561 goto unlock; 6562 6563 while (mas_searchable(&mas) && (mas.last < max)) { 6564 entry = mas_next_entry(&mas, max); 6565 if (likely(entry && !xa_is_zero(entry))) 6566 break; 6567 } 6568 6569 if (unlikely(xa_is_zero(entry))) 6570 entry = NULL; 6571 unlock: 6572 rcu_read_unlock(); 6573 if (likely(entry)) { 6574 *index = mas.last + 1; 6575 #ifdef CONFIG_DEBUG_MAPLE_TREE 6576 if (MT_WARN_ON(mt, (*index) && ((*index) <= copy))) 6577 pr_err("index not increased! %lx <= %lx\n", 6578 *index, copy); 6579 #endif 6580 } 6581 6582 return entry; 6583 } 6584 EXPORT_SYMBOL(mt_find); 6585 6586 /** 6587 * mt_find_after() - Search from the start up until an entry is found. 6588 * @mt: The maple tree 6589 * @index: Pointer which contains the start location of the search 6590 * @max: The maximum value to check 6591 * 6592 * Same as mt_find() except that it checks @index for 0 before 6593 * searching. If @index == 0, the search is aborted. This covers a wrap 6594 * around of @index to 0 in an iterator loop. 6595 * 6596 * Return: The entry at or after the @index or %NULL 6597 */ 6598 void *mt_find_after(struct maple_tree *mt, unsigned long *index, 6599 unsigned long max) 6600 { 6601 if (!(*index)) 6602 return NULL; 6603 6604 return mt_find(mt, index, max); 6605 } 6606 EXPORT_SYMBOL(mt_find_after); 6607 6608 #ifdef CONFIG_DEBUG_MAPLE_TREE 6609 atomic_t maple_tree_tests_run; 6610 EXPORT_SYMBOL_GPL(maple_tree_tests_run); 6611 atomic_t maple_tree_tests_passed; 6612 EXPORT_SYMBOL_GPL(maple_tree_tests_passed); 6613 6614 #ifndef __KERNEL__ 6615 extern void kmem_cache_set_non_kernel(struct kmem_cache *, unsigned int); 6616 void mt_set_non_kernel(unsigned int val) 6617 { 6618 kmem_cache_set_non_kernel(maple_node_cache, val); 6619 } 6620 6621 extern unsigned long kmem_cache_get_alloc(struct kmem_cache *); 6622 unsigned long mt_get_alloc_size(void) 6623 { 6624 return kmem_cache_get_alloc(maple_node_cache); 6625 } 6626 6627 extern void kmem_cache_zero_nr_tallocated(struct kmem_cache *); 6628 void mt_zero_nr_tallocated(void) 6629 { 6630 kmem_cache_zero_nr_tallocated(maple_node_cache); 6631 } 6632 6633 extern unsigned int kmem_cache_nr_tallocated(struct kmem_cache *); 6634 unsigned int mt_nr_tallocated(void) 6635 { 6636 return kmem_cache_nr_tallocated(maple_node_cache); 6637 } 6638 6639 extern unsigned int kmem_cache_nr_allocated(struct kmem_cache *); 6640 unsigned int mt_nr_allocated(void) 6641 { 6642 return kmem_cache_nr_allocated(maple_node_cache); 6643 } 6644 6645 /* 6646 * mas_dead_node() - Check if the maple state is pointing to a dead node. 6647 * @mas: The maple state 6648 * @index: The index to restore in @mas. 6649 * 6650 * Used in test code. 6651 * Return: 1 if @mas has been reset to MAS_START, 0 otherwise. 6652 */ 6653 static inline int mas_dead_node(struct ma_state *mas, unsigned long index) 6654 { 6655 if (unlikely(!mas_searchable(mas) || mas_is_start(mas))) 6656 return 0; 6657 6658 if (likely(!mte_dead_node(mas->node))) 6659 return 0; 6660 6661 mas_rewalk(mas, index); 6662 return 1; 6663 } 6664 6665 void mt_cache_shrink(void) 6666 { 6667 } 6668 #else 6669 /* 6670 * mt_cache_shrink() - For testing, don't use this. 6671 * 6672 * Certain testcases can trigger an OOM when combined with other memory 6673 * debugging configuration options. This function is used to reduce the 6674 * possibility of an out of memory even due to kmem_cache objects remaining 6675 * around for longer than usual. 6676 */ 6677 void mt_cache_shrink(void) 6678 { 6679 kmem_cache_shrink(maple_node_cache); 6680 6681 } 6682 EXPORT_SYMBOL_GPL(mt_cache_shrink); 6683 6684 #endif /* not defined __KERNEL__ */ 6685 /* 6686 * mas_get_slot() - Get the entry in the maple state node stored at @offset. 6687 * @mas: The maple state 6688 * @offset: The offset into the slot array to fetch. 6689 * 6690 * Return: The entry stored at @offset. 6691 */ 6692 static inline struct maple_enode *mas_get_slot(struct ma_state *mas, 6693 unsigned char offset) 6694 { 6695 return mas_slot(mas, ma_slots(mas_mn(mas), mte_node_type(mas->node)), 6696 offset); 6697 } 6698 6699 /* Depth first search, post-order */ 6700 static void mas_dfs_postorder(struct ma_state *mas, unsigned long max) 6701 { 6702 6703 struct maple_enode *p = MAS_NONE, *mn = mas->node; 6704 unsigned long p_min, p_max; 6705 6706 mas_next_node(mas, mas_mn(mas), max); 6707 if (!mas_is_none(mas)) 6708 return; 6709 6710 if (mte_is_root(mn)) 6711 return; 6712 6713 mas->node = mn; 6714 mas_ascend(mas); 6715 do { 6716 p = mas->node; 6717 p_min = mas->min; 6718 p_max = mas->max; 6719 mas_prev_node(mas, 0); 6720 } while (!mas_is_none(mas)); 6721 6722 mas->node = p; 6723 mas->max = p_max; 6724 mas->min = p_min; 6725 } 6726 6727 /* Tree validations */ 6728 static void mt_dump_node(const struct maple_tree *mt, void *entry, 6729 unsigned long min, unsigned long max, unsigned int depth, 6730 enum mt_dump_format format); 6731 static void mt_dump_range(unsigned long min, unsigned long max, 6732 unsigned int depth, enum mt_dump_format format) 6733 { 6734 static const char spaces[] = " "; 6735 6736 switch(format) { 6737 case mt_dump_hex: 6738 if (min == max) 6739 pr_info("%.*s%lx: ", depth * 2, spaces, min); 6740 else 6741 pr_info("%.*s%lx-%lx: ", depth * 2, spaces, min, max); 6742 break; 6743 default: 6744 case mt_dump_dec: 6745 if (min == max) 6746 pr_info("%.*s%lu: ", depth * 2, spaces, min); 6747 else 6748 pr_info("%.*s%lu-%lu: ", depth * 2, spaces, min, max); 6749 } 6750 } 6751 6752 static void mt_dump_entry(void *entry, unsigned long min, unsigned long max, 6753 unsigned int depth, enum mt_dump_format format) 6754 { 6755 mt_dump_range(min, max, depth, format); 6756 6757 if (xa_is_value(entry)) 6758 pr_cont("value %ld (0x%lx) [%p]\n", xa_to_value(entry), 6759 xa_to_value(entry), entry); 6760 else if (xa_is_zero(entry)) 6761 pr_cont("zero (%ld)\n", xa_to_internal(entry)); 6762 else if (mt_is_reserved(entry)) 6763 pr_cont("UNKNOWN ENTRY (%p)\n", entry); 6764 else 6765 pr_cont("%p\n", entry); 6766 } 6767 6768 static void mt_dump_range64(const struct maple_tree *mt, void *entry, 6769 unsigned long min, unsigned long max, unsigned int depth, 6770 enum mt_dump_format format) 6771 { 6772 struct maple_range_64 *node = &mte_to_node(entry)->mr64; 6773 bool leaf = mte_is_leaf(entry); 6774 unsigned long first = min; 6775 int i; 6776 6777 pr_cont(" contents: "); 6778 for (i = 0; i < MAPLE_RANGE64_SLOTS - 1; i++) { 6779 switch(format) { 6780 case mt_dump_hex: 6781 pr_cont("%p %lX ", node->slot[i], node->pivot[i]); 6782 break; 6783 default: 6784 case mt_dump_dec: 6785 pr_cont("%p %lu ", node->slot[i], node->pivot[i]); 6786 } 6787 } 6788 pr_cont("%p\n", node->slot[i]); 6789 for (i = 0; i < MAPLE_RANGE64_SLOTS; i++) { 6790 unsigned long last = max; 6791 6792 if (i < (MAPLE_RANGE64_SLOTS - 1)) 6793 last = node->pivot[i]; 6794 else if (!node->slot[i] && max != mt_node_max(entry)) 6795 break; 6796 if (last == 0 && i > 0) 6797 break; 6798 if (leaf) 6799 mt_dump_entry(mt_slot(mt, node->slot, i), 6800 first, last, depth + 1, format); 6801 else if (node->slot[i]) 6802 mt_dump_node(mt, mt_slot(mt, node->slot, i), 6803 first, last, depth + 1, format); 6804 6805 if (last == max) 6806 break; 6807 if (last > max) { 6808 switch(format) { 6809 case mt_dump_hex: 6810 pr_err("node %p last (%lx) > max (%lx) at pivot %d!\n", 6811 node, last, max, i); 6812 break; 6813 default: 6814 case mt_dump_dec: 6815 pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n", 6816 node, last, max, i); 6817 } 6818 } 6819 first = last + 1; 6820 } 6821 } 6822 6823 static void mt_dump_arange64(const struct maple_tree *mt, void *entry, 6824 unsigned long min, unsigned long max, unsigned int depth, 6825 enum mt_dump_format format) 6826 { 6827 struct maple_arange_64 *node = &mte_to_node(entry)->ma64; 6828 bool leaf = mte_is_leaf(entry); 6829 unsigned long first = min; 6830 int i; 6831 6832 pr_cont(" contents: "); 6833 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) { 6834 switch (format) { 6835 case mt_dump_hex: 6836 pr_cont("%lx ", node->gap[i]); 6837 break; 6838 default: 6839 case mt_dump_dec: 6840 pr_cont("%lu ", node->gap[i]); 6841 } 6842 } 6843 pr_cont("| %02X %02X| ", node->meta.end, node->meta.gap); 6844 for (i = 0; i < MAPLE_ARANGE64_SLOTS - 1; i++) { 6845 switch (format) { 6846 case mt_dump_hex: 6847 pr_cont("%p %lX ", node->slot[i], node->pivot[i]); 6848 break; 6849 default: 6850 case mt_dump_dec: 6851 pr_cont("%p %lu ", node->slot[i], node->pivot[i]); 6852 } 6853 } 6854 pr_cont("%p\n", node->slot[i]); 6855 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) { 6856 unsigned long last = max; 6857 6858 if (i < (MAPLE_ARANGE64_SLOTS - 1)) 6859 last = node->pivot[i]; 6860 else if (!node->slot[i]) 6861 break; 6862 if (last == 0 && i > 0) 6863 break; 6864 if (leaf) 6865 mt_dump_entry(mt_slot(mt, node->slot, i), 6866 first, last, depth + 1, format); 6867 else if (node->slot[i]) 6868 mt_dump_node(mt, mt_slot(mt, node->slot, i), 6869 first, last, depth + 1, format); 6870 6871 if (last == max) 6872 break; 6873 if (last > max) { 6874 pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n", 6875 node, last, max, i); 6876 break; 6877 } 6878 first = last + 1; 6879 } 6880 } 6881 6882 static void mt_dump_node(const struct maple_tree *mt, void *entry, 6883 unsigned long min, unsigned long max, unsigned int depth, 6884 enum mt_dump_format format) 6885 { 6886 struct maple_node *node = mte_to_node(entry); 6887 unsigned int type = mte_node_type(entry); 6888 unsigned int i; 6889 6890 mt_dump_range(min, max, depth, format); 6891 6892 pr_cont("node %p depth %d type %d parent %p", node, depth, type, 6893 node ? node->parent : NULL); 6894 switch (type) { 6895 case maple_dense: 6896 pr_cont("\n"); 6897 for (i = 0; i < MAPLE_NODE_SLOTS; i++) { 6898 if (min + i > max) 6899 pr_cont("OUT OF RANGE: "); 6900 mt_dump_entry(mt_slot(mt, node->slot, i), 6901 min + i, min + i, depth, format); 6902 } 6903 break; 6904 case maple_leaf_64: 6905 case maple_range_64: 6906 mt_dump_range64(mt, entry, min, max, depth, format); 6907 break; 6908 case maple_arange_64: 6909 mt_dump_arange64(mt, entry, min, max, depth, format); 6910 break; 6911 6912 default: 6913 pr_cont(" UNKNOWN TYPE\n"); 6914 } 6915 } 6916 6917 void mt_dump(const struct maple_tree *mt, enum mt_dump_format format) 6918 { 6919 void *entry = rcu_dereference_check(mt->ma_root, mt_locked(mt)); 6920 6921 pr_info("maple_tree(%p) flags %X, height %u root %p\n", 6922 mt, mt->ma_flags, mt_height(mt), entry); 6923 if (!xa_is_node(entry)) 6924 mt_dump_entry(entry, 0, 0, 0, format); 6925 else if (entry) 6926 mt_dump_node(mt, entry, 0, mt_node_max(entry), 0, format); 6927 } 6928 EXPORT_SYMBOL_GPL(mt_dump); 6929 6930 /* 6931 * Calculate the maximum gap in a node and check if that's what is reported in 6932 * the parent (unless root). 6933 */ 6934 static void mas_validate_gaps(struct ma_state *mas) 6935 { 6936 struct maple_enode *mte = mas->node; 6937 struct maple_node *p_mn, *node = mte_to_node(mte); 6938 enum maple_type mt = mte_node_type(mas->node); 6939 unsigned long gap = 0, max_gap = 0; 6940 unsigned long p_end, p_start = mas->min; 6941 unsigned char p_slot, offset; 6942 unsigned long *gaps = NULL; 6943 unsigned long *pivots = ma_pivots(node, mt); 6944 unsigned int i; 6945 6946 if (ma_is_dense(mt)) { 6947 for (i = 0; i < mt_slot_count(mte); i++) { 6948 if (mas_get_slot(mas, i)) { 6949 if (gap > max_gap) 6950 max_gap = gap; 6951 gap = 0; 6952 continue; 6953 } 6954 gap++; 6955 } 6956 goto counted; 6957 } 6958 6959 gaps = ma_gaps(node, mt); 6960 for (i = 0; i < mt_slot_count(mte); i++) { 6961 p_end = mas_safe_pivot(mas, pivots, i, mt); 6962 6963 if (!gaps) { 6964 if (!mas_get_slot(mas, i)) 6965 gap = p_end - p_start + 1; 6966 } else { 6967 void *entry = mas_get_slot(mas, i); 6968 6969 gap = gaps[i]; 6970 MT_BUG_ON(mas->tree, !entry); 6971 6972 if (gap > p_end - p_start + 1) { 6973 pr_err("%p[%u] %lu >= %lu - %lu + 1 (%lu)\n", 6974 mas_mn(mas), i, gap, p_end, p_start, 6975 p_end - p_start + 1); 6976 MT_BUG_ON(mas->tree, gap > p_end - p_start + 1); 6977 } 6978 } 6979 6980 if (gap > max_gap) 6981 max_gap = gap; 6982 6983 p_start = p_end + 1; 6984 if (p_end >= mas->max) 6985 break; 6986 } 6987 6988 counted: 6989 if (mt == maple_arange_64) { 6990 offset = ma_meta_gap(node, mt); 6991 if (offset > i) { 6992 pr_err("gap offset %p[%u] is invalid\n", node, offset); 6993 MT_BUG_ON(mas->tree, 1); 6994 } 6995 6996 if (gaps[offset] != max_gap) { 6997 pr_err("gap %p[%u] is not the largest gap %lu\n", 6998 node, offset, max_gap); 6999 MT_BUG_ON(mas->tree, 1); 7000 } 7001 7002 MT_BUG_ON(mas->tree, !gaps); 7003 for (i++ ; i < mt_slot_count(mte); i++) { 7004 if (gaps[i] != 0) { 7005 pr_err("gap %p[%u] beyond node limit != 0\n", 7006 node, i); 7007 MT_BUG_ON(mas->tree, 1); 7008 } 7009 } 7010 } 7011 7012 if (mte_is_root(mte)) 7013 return; 7014 7015 p_slot = mte_parent_slot(mas->node); 7016 p_mn = mte_parent(mte); 7017 MT_BUG_ON(mas->tree, max_gap > mas->max); 7018 if (ma_gaps(p_mn, mas_parent_type(mas, mte))[p_slot] != max_gap) { 7019 pr_err("gap %p[%u] != %lu\n", p_mn, p_slot, max_gap); 7020 mt_dump(mas->tree, mt_dump_hex); 7021 MT_BUG_ON(mas->tree, 1); 7022 } 7023 } 7024 7025 static void mas_validate_parent_slot(struct ma_state *mas) 7026 { 7027 struct maple_node *parent; 7028 struct maple_enode *node; 7029 enum maple_type p_type; 7030 unsigned char p_slot; 7031 void __rcu **slots; 7032 int i; 7033 7034 if (mte_is_root(mas->node)) 7035 return; 7036 7037 p_slot = mte_parent_slot(mas->node); 7038 p_type = mas_parent_type(mas, mas->node); 7039 parent = mte_parent(mas->node); 7040 slots = ma_slots(parent, p_type); 7041 MT_BUG_ON(mas->tree, mas_mn(mas) == parent); 7042 7043 /* Check prev/next parent slot for duplicate node entry */ 7044 7045 for (i = 0; i < mt_slots[p_type]; i++) { 7046 node = mas_slot(mas, slots, i); 7047 if (i == p_slot) { 7048 if (node != mas->node) 7049 pr_err("parent %p[%u] does not have %p\n", 7050 parent, i, mas_mn(mas)); 7051 MT_BUG_ON(mas->tree, node != mas->node); 7052 } else if (node == mas->node) { 7053 pr_err("Invalid child %p at parent %p[%u] p_slot %u\n", 7054 mas_mn(mas), parent, i, p_slot); 7055 MT_BUG_ON(mas->tree, node == mas->node); 7056 } 7057 } 7058 } 7059 7060 static void mas_validate_child_slot(struct ma_state *mas) 7061 { 7062 enum maple_type type = mte_node_type(mas->node); 7063 void __rcu **slots = ma_slots(mte_to_node(mas->node), type); 7064 unsigned long *pivots = ma_pivots(mte_to_node(mas->node), type); 7065 struct maple_enode *child; 7066 unsigned char i; 7067 7068 if (mte_is_leaf(mas->node)) 7069 return; 7070 7071 for (i = 0; i < mt_slots[type]; i++) { 7072 child = mas_slot(mas, slots, i); 7073 7074 if (!child) { 7075 pr_err("Non-leaf node lacks child at %p[%u]\n", 7076 mas_mn(mas), i); 7077 MT_BUG_ON(mas->tree, 1); 7078 } 7079 7080 if (mte_parent_slot(child) != i) { 7081 pr_err("Slot error at %p[%u]: child %p has pslot %u\n", 7082 mas_mn(mas), i, mte_to_node(child), 7083 mte_parent_slot(child)); 7084 MT_BUG_ON(mas->tree, 1); 7085 } 7086 7087 if (mte_parent(child) != mte_to_node(mas->node)) { 7088 pr_err("child %p has parent %p not %p\n", 7089 mte_to_node(child), mte_parent(child), 7090 mte_to_node(mas->node)); 7091 MT_BUG_ON(mas->tree, 1); 7092 } 7093 7094 if (i < mt_pivots[type] && pivots[i] == mas->max) 7095 break; 7096 } 7097 } 7098 7099 /* 7100 * Validate all pivots are within mas->min and mas->max, check metadata ends 7101 * where the maximum ends and ensure there is no slots or pivots set outside of 7102 * the end of the data. 7103 */ 7104 static void mas_validate_limits(struct ma_state *mas) 7105 { 7106 int i; 7107 unsigned long prev_piv = 0; 7108 enum maple_type type = mte_node_type(mas->node); 7109 void __rcu **slots = ma_slots(mte_to_node(mas->node), type); 7110 unsigned long *pivots = ma_pivots(mas_mn(mas), type); 7111 7112 for (i = 0; i < mt_slots[type]; i++) { 7113 unsigned long piv; 7114 7115 piv = mas_safe_pivot(mas, pivots, i, type); 7116 7117 if (!piv && (i != 0)) { 7118 pr_err("Missing node limit pivot at %p[%u]", 7119 mas_mn(mas), i); 7120 MAS_WARN_ON(mas, 1); 7121 } 7122 7123 if (prev_piv > piv) { 7124 pr_err("%p[%u] piv %lu < prev_piv %lu\n", 7125 mas_mn(mas), i, piv, prev_piv); 7126 MAS_WARN_ON(mas, piv < prev_piv); 7127 } 7128 7129 if (piv < mas->min) { 7130 pr_err("%p[%u] %lu < %lu\n", mas_mn(mas), i, 7131 piv, mas->min); 7132 MAS_WARN_ON(mas, piv < mas->min); 7133 } 7134 if (piv > mas->max) { 7135 pr_err("%p[%u] %lu > %lu\n", mas_mn(mas), i, 7136 piv, mas->max); 7137 MAS_WARN_ON(mas, piv > mas->max); 7138 } 7139 prev_piv = piv; 7140 if (piv == mas->max) 7141 break; 7142 } 7143 7144 if (mas_data_end(mas) != i) { 7145 pr_err("node%p: data_end %u != the last slot offset %u\n", 7146 mas_mn(mas), mas_data_end(mas), i); 7147 MT_BUG_ON(mas->tree, 1); 7148 } 7149 7150 for (i += 1; i < mt_slots[type]; i++) { 7151 void *entry = mas_slot(mas, slots, i); 7152 7153 if (entry && (i != mt_slots[type] - 1)) { 7154 pr_err("%p[%u] should not have entry %p\n", mas_mn(mas), 7155 i, entry); 7156 MT_BUG_ON(mas->tree, entry != NULL); 7157 } 7158 7159 if (i < mt_pivots[type]) { 7160 unsigned long piv = pivots[i]; 7161 7162 if (!piv) 7163 continue; 7164 7165 pr_err("%p[%u] should not have piv %lu\n", 7166 mas_mn(mas), i, piv); 7167 MAS_WARN_ON(mas, i < mt_pivots[type] - 1); 7168 } 7169 } 7170 } 7171 7172 static void mt_validate_nulls(struct maple_tree *mt) 7173 { 7174 void *entry, *last = (void *)1; 7175 unsigned char offset = 0; 7176 void __rcu **slots; 7177 MA_STATE(mas, mt, 0, 0); 7178 7179 mas_start(&mas); 7180 if (mas_is_none(&mas) || (mas.node == MAS_ROOT)) 7181 return; 7182 7183 while (!mte_is_leaf(mas.node)) 7184 mas_descend(&mas); 7185 7186 slots = ma_slots(mte_to_node(mas.node), mte_node_type(mas.node)); 7187 do { 7188 entry = mas_slot(&mas, slots, offset); 7189 if (!last && !entry) { 7190 pr_err("Sequential nulls end at %p[%u]\n", 7191 mas_mn(&mas), offset); 7192 } 7193 MT_BUG_ON(mt, !last && !entry); 7194 last = entry; 7195 if (offset == mas_data_end(&mas)) { 7196 mas_next_node(&mas, mas_mn(&mas), ULONG_MAX); 7197 if (mas_is_none(&mas)) 7198 return; 7199 offset = 0; 7200 slots = ma_slots(mte_to_node(mas.node), 7201 mte_node_type(mas.node)); 7202 } else { 7203 offset++; 7204 } 7205 7206 } while (!mas_is_none(&mas)); 7207 } 7208 7209 /* 7210 * validate a maple tree by checking: 7211 * 1. The limits (pivots are within mas->min to mas->max) 7212 * 2. The gap is correctly set in the parents 7213 */ 7214 void mt_validate(struct maple_tree *mt) 7215 { 7216 unsigned char end; 7217 7218 MA_STATE(mas, mt, 0, 0); 7219 rcu_read_lock(); 7220 mas_start(&mas); 7221 if (!mas_searchable(&mas)) 7222 goto done; 7223 7224 while (!mte_is_leaf(mas.node)) 7225 mas_descend(&mas); 7226 7227 while (!mas_is_none(&mas)) { 7228 MAS_WARN_ON(&mas, mte_dead_node(mas.node)); 7229 end = mas_data_end(&mas); 7230 if (MAS_WARN_ON(&mas, (end < mt_min_slot_count(mas.node)) && 7231 (mas.max != ULONG_MAX))) { 7232 pr_err("Invalid size %u of %p\n", end, mas_mn(&mas)); 7233 } 7234 7235 mas_validate_parent_slot(&mas); 7236 mas_validate_limits(&mas); 7237 mas_validate_child_slot(&mas); 7238 if (mt_is_alloc(mt)) 7239 mas_validate_gaps(&mas); 7240 mas_dfs_postorder(&mas, ULONG_MAX); 7241 } 7242 mt_validate_nulls(mt); 7243 done: 7244 rcu_read_unlock(); 7245 7246 } 7247 EXPORT_SYMBOL_GPL(mt_validate); 7248 7249 void mas_dump(const struct ma_state *mas) 7250 { 7251 pr_err("MAS: tree=%p enode=%p ", mas->tree, mas->node); 7252 if (mas_is_none(mas)) 7253 pr_err("(MAS_NONE) "); 7254 else if (mas_is_ptr(mas)) 7255 pr_err("(MAS_ROOT) "); 7256 else if (mas_is_start(mas)) 7257 pr_err("(MAS_START) "); 7258 else if (mas_is_paused(mas)) 7259 pr_err("(MAS_PAUSED) "); 7260 7261 pr_err("[%u] index=%lx last=%lx\n", mas->offset, mas->index, mas->last); 7262 pr_err(" min=%lx max=%lx alloc=%p, depth=%u, flags=%x\n", 7263 mas->min, mas->max, mas->alloc, mas->depth, mas->mas_flags); 7264 if (mas->index > mas->last) 7265 pr_err("Check index & last\n"); 7266 } 7267 EXPORT_SYMBOL_GPL(mas_dump); 7268 7269 void mas_wr_dump(const struct ma_wr_state *wr_mas) 7270 { 7271 pr_err("WR_MAS: node=%p r_min=%lx r_max=%lx\n", 7272 wr_mas->node, wr_mas->r_min, wr_mas->r_max); 7273 pr_err(" type=%u off_end=%u, node_end=%u, end_piv=%lx\n", 7274 wr_mas->type, wr_mas->offset_end, wr_mas->node_end, 7275 wr_mas->end_piv); 7276 } 7277 EXPORT_SYMBOL_GPL(mas_wr_dump); 7278 7279 #endif /* CONFIG_DEBUG_MAPLE_TREE */ 7280