1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Maple Tree implementation 4 * Copyright (c) 2018-2022 Oracle Corporation 5 * Authors: Liam R. Howlett <Liam.Howlett@oracle.com> 6 * Matthew Wilcox <willy@infradead.org> 7 */ 8 9 /* 10 * DOC: Interesting implementation details of the Maple Tree 11 * 12 * Each node type has a number of slots for entries and a number of slots for 13 * pivots. In the case of dense nodes, the pivots are implied by the position 14 * and are simply the slot index + the minimum of the node. 15 * 16 * In regular B-Tree terms, pivots are called keys. The term pivot is used to 17 * indicate that the tree is specifying ranges, Pivots may appear in the 18 * subtree with an entry attached to the value where as keys are unique to a 19 * specific position of a B-tree. Pivot values are inclusive of the slot with 20 * the same index. 21 * 22 * 23 * The following illustrates the layout of a range64 nodes slots and pivots. 24 * 25 * 26 * Slots -> | 0 | 1 | 2 | ... | 12 | 13 | 14 | 15 | 27 * ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ 28 * │ │ │ │ │ │ │ │ └─ Implied maximum 29 * │ │ │ │ │ │ │ └─ Pivot 14 30 * │ │ │ │ │ │ └─ Pivot 13 31 * │ │ │ │ │ └─ Pivot 12 32 * │ │ │ │ └─ Pivot 11 33 * │ │ │ └─ Pivot 2 34 * │ │ └─ Pivot 1 35 * │ └─ Pivot 0 36 * └─ Implied minimum 37 * 38 * Slot contents: 39 * Internal (non-leaf) nodes contain pointers to other nodes. 40 * Leaf nodes contain entries. 41 * 42 * The location of interest is often referred to as an offset. All offsets have 43 * a slot, but the last offset has an implied pivot from the node above (or 44 * UINT_MAX for the root node. 45 * 46 * Ranges complicate certain write activities. When modifying any of 47 * the B-tree variants, it is known that one entry will either be added or 48 * deleted. When modifying the Maple Tree, one store operation may overwrite 49 * the entire data set, or one half of the tree, or the middle half of the tree. 50 * 51 */ 52 53 54 #include <linux/maple_tree.h> 55 #include <linux/xarray.h> 56 #include <linux/types.h> 57 #include <linux/export.h> 58 #include <linux/slab.h> 59 #include <linux/limits.h> 60 #include <asm/barrier.h> 61 62 #define CREATE_TRACE_POINTS 63 #include <trace/events/maple_tree.h> 64 65 #define MA_ROOT_PARENT 1 66 67 /* 68 * Maple state flags 69 * * MA_STATE_BULK - Bulk insert mode 70 * * MA_STATE_REBALANCE - Indicate a rebalance during bulk insert 71 * * MA_STATE_PREALLOC - Preallocated nodes, WARN_ON allocation 72 */ 73 #define MA_STATE_BULK 1 74 #define MA_STATE_REBALANCE 2 75 #define MA_STATE_PREALLOC 4 76 77 #define ma_parent_ptr(x) ((struct maple_pnode *)(x)) 78 #define mas_tree_parent(x) ((unsigned long)(x->tree) | MA_ROOT_PARENT) 79 #define ma_mnode_ptr(x) ((struct maple_node *)(x)) 80 #define ma_enode_ptr(x) ((struct maple_enode *)(x)) 81 static struct kmem_cache *maple_node_cache; 82 83 #ifdef CONFIG_DEBUG_MAPLE_TREE 84 static const unsigned long mt_max[] = { 85 [maple_dense] = MAPLE_NODE_SLOTS, 86 [maple_leaf_64] = ULONG_MAX, 87 [maple_range_64] = ULONG_MAX, 88 [maple_arange_64] = ULONG_MAX, 89 }; 90 #define mt_node_max(x) mt_max[mte_node_type(x)] 91 #endif 92 93 static const unsigned char mt_slots[] = { 94 [maple_dense] = MAPLE_NODE_SLOTS, 95 [maple_leaf_64] = MAPLE_RANGE64_SLOTS, 96 [maple_range_64] = MAPLE_RANGE64_SLOTS, 97 [maple_arange_64] = MAPLE_ARANGE64_SLOTS, 98 }; 99 #define mt_slot_count(x) mt_slots[mte_node_type(x)] 100 101 static const unsigned char mt_pivots[] = { 102 [maple_dense] = 0, 103 [maple_leaf_64] = MAPLE_RANGE64_SLOTS - 1, 104 [maple_range_64] = MAPLE_RANGE64_SLOTS - 1, 105 [maple_arange_64] = MAPLE_ARANGE64_SLOTS - 1, 106 }; 107 #define mt_pivot_count(x) mt_pivots[mte_node_type(x)] 108 109 static const unsigned char mt_min_slots[] = { 110 [maple_dense] = MAPLE_NODE_SLOTS / 2, 111 [maple_leaf_64] = (MAPLE_RANGE64_SLOTS / 2) - 2, 112 [maple_range_64] = (MAPLE_RANGE64_SLOTS / 2) - 2, 113 [maple_arange_64] = (MAPLE_ARANGE64_SLOTS / 2) - 1, 114 }; 115 #define mt_min_slot_count(x) mt_min_slots[mte_node_type(x)] 116 117 #define MAPLE_BIG_NODE_SLOTS (MAPLE_RANGE64_SLOTS * 2 + 2) 118 #define MAPLE_BIG_NODE_GAPS (MAPLE_ARANGE64_SLOTS * 2 + 1) 119 120 struct maple_big_node { 121 struct maple_pnode *parent; 122 unsigned long pivot[MAPLE_BIG_NODE_SLOTS - 1]; 123 union { 124 struct maple_enode *slot[MAPLE_BIG_NODE_SLOTS]; 125 struct { 126 unsigned long padding[MAPLE_BIG_NODE_GAPS]; 127 unsigned long gap[MAPLE_BIG_NODE_GAPS]; 128 }; 129 }; 130 unsigned char b_end; 131 enum maple_type type; 132 }; 133 134 /* 135 * The maple_subtree_state is used to build a tree to replace a segment of an 136 * existing tree in a more atomic way. Any walkers of the older tree will hit a 137 * dead node and restart on updates. 138 */ 139 struct maple_subtree_state { 140 struct ma_state *orig_l; /* Original left side of subtree */ 141 struct ma_state *orig_r; /* Original right side of subtree */ 142 struct ma_state *l; /* New left side of subtree */ 143 struct ma_state *m; /* New middle of subtree (rare) */ 144 struct ma_state *r; /* New right side of subtree */ 145 struct ma_topiary *free; /* nodes to be freed */ 146 struct ma_topiary *destroy; /* Nodes to be destroyed (walked and freed) */ 147 struct maple_big_node *bn; 148 }; 149 150 #ifdef CONFIG_KASAN_STACK 151 /* Prevent mas_wr_bnode() from exceeding the stack frame limit */ 152 #define noinline_for_kasan noinline_for_stack 153 #else 154 #define noinline_for_kasan inline 155 #endif 156 157 /* Functions */ 158 static inline struct maple_node *mt_alloc_one(gfp_t gfp) 159 { 160 return kmem_cache_alloc(maple_node_cache, gfp); 161 } 162 163 static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes) 164 { 165 return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes); 166 } 167 168 static inline void mt_free_bulk(size_t size, void __rcu **nodes) 169 { 170 kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes); 171 } 172 173 static void mt_free_rcu(struct rcu_head *head) 174 { 175 struct maple_node *node = container_of(head, struct maple_node, rcu); 176 177 kmem_cache_free(maple_node_cache, node); 178 } 179 180 /* 181 * ma_free_rcu() - Use rcu callback to free a maple node 182 * @node: The node to free 183 * 184 * The maple tree uses the parent pointer to indicate this node is no longer in 185 * use and will be freed. 186 */ 187 static void ma_free_rcu(struct maple_node *node) 188 { 189 WARN_ON(node->parent != ma_parent_ptr(node)); 190 call_rcu(&node->rcu, mt_free_rcu); 191 } 192 193 static void mas_set_height(struct ma_state *mas) 194 { 195 unsigned int new_flags = mas->tree->ma_flags; 196 197 new_flags &= ~MT_FLAGS_HEIGHT_MASK; 198 MAS_BUG_ON(mas, mas->depth > MAPLE_HEIGHT_MAX); 199 new_flags |= mas->depth << MT_FLAGS_HEIGHT_OFFSET; 200 mas->tree->ma_flags = new_flags; 201 } 202 203 static unsigned int mas_mt_height(struct ma_state *mas) 204 { 205 return mt_height(mas->tree); 206 } 207 208 static inline enum maple_type mte_node_type(const struct maple_enode *entry) 209 { 210 return ((unsigned long)entry >> MAPLE_NODE_TYPE_SHIFT) & 211 MAPLE_NODE_TYPE_MASK; 212 } 213 214 static inline bool ma_is_dense(const enum maple_type type) 215 { 216 return type < maple_leaf_64; 217 } 218 219 static inline bool ma_is_leaf(const enum maple_type type) 220 { 221 return type < maple_range_64; 222 } 223 224 static inline bool mte_is_leaf(const struct maple_enode *entry) 225 { 226 return ma_is_leaf(mte_node_type(entry)); 227 } 228 229 /* 230 * We also reserve values with the bottom two bits set to '10' which are 231 * below 4096 232 */ 233 static inline bool mt_is_reserved(const void *entry) 234 { 235 return ((unsigned long)entry < MAPLE_RESERVED_RANGE) && 236 xa_is_internal(entry); 237 } 238 239 static inline void mas_set_err(struct ma_state *mas, long err) 240 { 241 mas->node = MA_ERROR(err); 242 } 243 244 static inline bool mas_is_ptr(const struct ma_state *mas) 245 { 246 return mas->node == MAS_ROOT; 247 } 248 249 static inline bool mas_is_start(const struct ma_state *mas) 250 { 251 return mas->node == MAS_START; 252 } 253 254 bool mas_is_err(struct ma_state *mas) 255 { 256 return xa_is_err(mas->node); 257 } 258 259 static __always_inline bool mas_is_overflow(struct ma_state *mas) 260 { 261 if (unlikely(mas->node == MAS_OVERFLOW)) 262 return true; 263 264 return false; 265 } 266 267 static __always_inline bool mas_is_underflow(struct ma_state *mas) 268 { 269 if (unlikely(mas->node == MAS_UNDERFLOW)) 270 return true; 271 272 return false; 273 } 274 275 static inline bool mas_searchable(struct ma_state *mas) 276 { 277 if (mas_is_none(mas)) 278 return false; 279 280 if (mas_is_ptr(mas)) 281 return false; 282 283 return true; 284 } 285 286 static inline struct maple_node *mte_to_node(const struct maple_enode *entry) 287 { 288 return (struct maple_node *)((unsigned long)entry & ~MAPLE_NODE_MASK); 289 } 290 291 /* 292 * mte_to_mat() - Convert a maple encoded node to a maple topiary node. 293 * @entry: The maple encoded node 294 * 295 * Return: a maple topiary pointer 296 */ 297 static inline struct maple_topiary *mte_to_mat(const struct maple_enode *entry) 298 { 299 return (struct maple_topiary *) 300 ((unsigned long)entry & ~MAPLE_NODE_MASK); 301 } 302 303 /* 304 * mas_mn() - Get the maple state node. 305 * @mas: The maple state 306 * 307 * Return: the maple node (not encoded - bare pointer). 308 */ 309 static inline struct maple_node *mas_mn(const struct ma_state *mas) 310 { 311 return mte_to_node(mas->node); 312 } 313 314 /* 315 * mte_set_node_dead() - Set a maple encoded node as dead. 316 * @mn: The maple encoded node. 317 */ 318 static inline void mte_set_node_dead(struct maple_enode *mn) 319 { 320 mte_to_node(mn)->parent = ma_parent_ptr(mte_to_node(mn)); 321 smp_wmb(); /* Needed for RCU */ 322 } 323 324 /* Bit 1 indicates the root is a node */ 325 #define MAPLE_ROOT_NODE 0x02 326 /* maple_type stored bit 3-6 */ 327 #define MAPLE_ENODE_TYPE_SHIFT 0x03 328 /* Bit 2 means a NULL somewhere below */ 329 #define MAPLE_ENODE_NULL 0x04 330 331 static inline struct maple_enode *mt_mk_node(const struct maple_node *node, 332 enum maple_type type) 333 { 334 return (void *)((unsigned long)node | 335 (type << MAPLE_ENODE_TYPE_SHIFT) | MAPLE_ENODE_NULL); 336 } 337 338 static inline void *mte_mk_root(const struct maple_enode *node) 339 { 340 return (void *)((unsigned long)node | MAPLE_ROOT_NODE); 341 } 342 343 static inline void *mte_safe_root(const struct maple_enode *node) 344 { 345 return (void *)((unsigned long)node & ~MAPLE_ROOT_NODE); 346 } 347 348 static inline void *mte_set_full(const struct maple_enode *node) 349 { 350 return (void *)((unsigned long)node & ~MAPLE_ENODE_NULL); 351 } 352 353 static inline void *mte_clear_full(const struct maple_enode *node) 354 { 355 return (void *)((unsigned long)node | MAPLE_ENODE_NULL); 356 } 357 358 static inline bool mte_has_null(const struct maple_enode *node) 359 { 360 return (unsigned long)node & MAPLE_ENODE_NULL; 361 } 362 363 static inline bool ma_is_root(struct maple_node *node) 364 { 365 return ((unsigned long)node->parent & MA_ROOT_PARENT); 366 } 367 368 static inline bool mte_is_root(const struct maple_enode *node) 369 { 370 return ma_is_root(mte_to_node(node)); 371 } 372 373 static inline bool mas_is_root_limits(const struct ma_state *mas) 374 { 375 return !mas->min && mas->max == ULONG_MAX; 376 } 377 378 static inline bool mt_is_alloc(struct maple_tree *mt) 379 { 380 return (mt->ma_flags & MT_FLAGS_ALLOC_RANGE); 381 } 382 383 /* 384 * The Parent Pointer 385 * Excluding root, the parent pointer is 256B aligned like all other tree nodes. 386 * When storing a 32 or 64 bit values, the offset can fit into 5 bits. The 16 387 * bit values need an extra bit to store the offset. This extra bit comes from 388 * a reuse of the last bit in the node type. This is possible by using bit 1 to 389 * indicate if bit 2 is part of the type or the slot. 390 * 391 * Note types: 392 * 0x??1 = Root 393 * 0x?00 = 16 bit nodes 394 * 0x010 = 32 bit nodes 395 * 0x110 = 64 bit nodes 396 * 397 * Slot size and alignment 398 * 0b??1 : Root 399 * 0b?00 : 16 bit values, type in 0-1, slot in 2-7 400 * 0b010 : 32 bit values, type in 0-2, slot in 3-7 401 * 0b110 : 64 bit values, type in 0-2, slot in 3-7 402 */ 403 404 #define MAPLE_PARENT_ROOT 0x01 405 406 #define MAPLE_PARENT_SLOT_SHIFT 0x03 407 #define MAPLE_PARENT_SLOT_MASK 0xF8 408 409 #define MAPLE_PARENT_16B_SLOT_SHIFT 0x02 410 #define MAPLE_PARENT_16B_SLOT_MASK 0xFC 411 412 #define MAPLE_PARENT_RANGE64 0x06 413 #define MAPLE_PARENT_RANGE32 0x04 414 #define MAPLE_PARENT_NOT_RANGE16 0x02 415 416 /* 417 * mte_parent_shift() - Get the parent shift for the slot storage. 418 * @parent: The parent pointer cast as an unsigned long 419 * Return: The shift into that pointer to the star to of the slot 420 */ 421 static inline unsigned long mte_parent_shift(unsigned long parent) 422 { 423 /* Note bit 1 == 0 means 16B */ 424 if (likely(parent & MAPLE_PARENT_NOT_RANGE16)) 425 return MAPLE_PARENT_SLOT_SHIFT; 426 427 return MAPLE_PARENT_16B_SLOT_SHIFT; 428 } 429 430 /* 431 * mte_parent_slot_mask() - Get the slot mask for the parent. 432 * @parent: The parent pointer cast as an unsigned long. 433 * Return: The slot mask for that parent. 434 */ 435 static inline unsigned long mte_parent_slot_mask(unsigned long parent) 436 { 437 /* Note bit 1 == 0 means 16B */ 438 if (likely(parent & MAPLE_PARENT_NOT_RANGE16)) 439 return MAPLE_PARENT_SLOT_MASK; 440 441 return MAPLE_PARENT_16B_SLOT_MASK; 442 } 443 444 /* 445 * mas_parent_type() - Return the maple_type of the parent from the stored 446 * parent type. 447 * @mas: The maple state 448 * @enode: The maple_enode to extract the parent's enum 449 * Return: The node->parent maple_type 450 */ 451 static inline 452 enum maple_type mas_parent_type(struct ma_state *mas, struct maple_enode *enode) 453 { 454 unsigned long p_type; 455 456 p_type = (unsigned long)mte_to_node(enode)->parent; 457 if (WARN_ON(p_type & MAPLE_PARENT_ROOT)) 458 return 0; 459 460 p_type &= MAPLE_NODE_MASK; 461 p_type &= ~mte_parent_slot_mask(p_type); 462 switch (p_type) { 463 case MAPLE_PARENT_RANGE64: /* or MAPLE_PARENT_ARANGE64 */ 464 if (mt_is_alloc(mas->tree)) 465 return maple_arange_64; 466 return maple_range_64; 467 } 468 469 return 0; 470 } 471 472 /* 473 * mas_set_parent() - Set the parent node and encode the slot 474 * @enode: The encoded maple node. 475 * @parent: The encoded maple node that is the parent of @enode. 476 * @slot: The slot that @enode resides in @parent. 477 * 478 * Slot number is encoded in the enode->parent bit 3-6 or 2-6, depending on the 479 * parent type. 480 */ 481 static inline 482 void mas_set_parent(struct ma_state *mas, struct maple_enode *enode, 483 const struct maple_enode *parent, unsigned char slot) 484 { 485 unsigned long val = (unsigned long)parent; 486 unsigned long shift; 487 unsigned long type; 488 enum maple_type p_type = mte_node_type(parent); 489 490 MAS_BUG_ON(mas, p_type == maple_dense); 491 MAS_BUG_ON(mas, p_type == maple_leaf_64); 492 493 switch (p_type) { 494 case maple_range_64: 495 case maple_arange_64: 496 shift = MAPLE_PARENT_SLOT_SHIFT; 497 type = MAPLE_PARENT_RANGE64; 498 break; 499 default: 500 case maple_dense: 501 case maple_leaf_64: 502 shift = type = 0; 503 break; 504 } 505 506 val &= ~MAPLE_NODE_MASK; /* Clear all node metadata in parent */ 507 val |= (slot << shift) | type; 508 mte_to_node(enode)->parent = ma_parent_ptr(val); 509 } 510 511 /* 512 * mte_parent_slot() - get the parent slot of @enode. 513 * @enode: The encoded maple node. 514 * 515 * Return: The slot in the parent node where @enode resides. 516 */ 517 static inline unsigned int mte_parent_slot(const struct maple_enode *enode) 518 { 519 unsigned long val = (unsigned long)mte_to_node(enode)->parent; 520 521 if (val & MA_ROOT_PARENT) 522 return 0; 523 524 /* 525 * Okay to use MAPLE_PARENT_16B_SLOT_MASK as the last bit will be lost 526 * by shift if the parent shift is MAPLE_PARENT_SLOT_SHIFT 527 */ 528 return (val & MAPLE_PARENT_16B_SLOT_MASK) >> mte_parent_shift(val); 529 } 530 531 /* 532 * mte_parent() - Get the parent of @node. 533 * @node: The encoded maple node. 534 * 535 * Return: The parent maple node. 536 */ 537 static inline struct maple_node *mte_parent(const struct maple_enode *enode) 538 { 539 return (void *)((unsigned long) 540 (mte_to_node(enode)->parent) & ~MAPLE_NODE_MASK); 541 } 542 543 /* 544 * ma_dead_node() - check if the @enode is dead. 545 * @enode: The encoded maple node 546 * 547 * Return: true if dead, false otherwise. 548 */ 549 static inline bool ma_dead_node(const struct maple_node *node) 550 { 551 struct maple_node *parent; 552 553 /* Do not reorder reads from the node prior to the parent check */ 554 smp_rmb(); 555 parent = (void *)((unsigned long) node->parent & ~MAPLE_NODE_MASK); 556 return (parent == node); 557 } 558 559 /* 560 * mte_dead_node() - check if the @enode is dead. 561 * @enode: The encoded maple node 562 * 563 * Return: true if dead, false otherwise. 564 */ 565 static inline bool mte_dead_node(const struct maple_enode *enode) 566 { 567 struct maple_node *parent, *node; 568 569 node = mte_to_node(enode); 570 /* Do not reorder reads from the node prior to the parent check */ 571 smp_rmb(); 572 parent = mte_parent(enode); 573 return (parent == node); 574 } 575 576 /* 577 * mas_allocated() - Get the number of nodes allocated in a maple state. 578 * @mas: The maple state 579 * 580 * The ma_state alloc member is overloaded to hold a pointer to the first 581 * allocated node or to the number of requested nodes to allocate. If bit 0 is 582 * set, then the alloc contains the number of requested nodes. If there is an 583 * allocated node, then the total allocated nodes is in that node. 584 * 585 * Return: The total number of nodes allocated 586 */ 587 static inline unsigned long mas_allocated(const struct ma_state *mas) 588 { 589 if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) 590 return 0; 591 592 return mas->alloc->total; 593 } 594 595 /* 596 * mas_set_alloc_req() - Set the requested number of allocations. 597 * @mas: the maple state 598 * @count: the number of allocations. 599 * 600 * The requested number of allocations is either in the first allocated node, 601 * located in @mas->alloc->request_count, or directly in @mas->alloc if there is 602 * no allocated node. Set the request either in the node or do the necessary 603 * encoding to store in @mas->alloc directly. 604 */ 605 static inline void mas_set_alloc_req(struct ma_state *mas, unsigned long count) 606 { 607 if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) { 608 if (!count) 609 mas->alloc = NULL; 610 else 611 mas->alloc = (struct maple_alloc *)(((count) << 1U) | 1U); 612 return; 613 } 614 615 mas->alloc->request_count = count; 616 } 617 618 /* 619 * mas_alloc_req() - get the requested number of allocations. 620 * @mas: The maple state 621 * 622 * The alloc count is either stored directly in @mas, or in 623 * @mas->alloc->request_count if there is at least one node allocated. Decode 624 * the request count if it's stored directly in @mas->alloc. 625 * 626 * Return: The allocation request count. 627 */ 628 static inline unsigned int mas_alloc_req(const struct ma_state *mas) 629 { 630 if ((unsigned long)mas->alloc & 0x1) 631 return (unsigned long)(mas->alloc) >> 1; 632 else if (mas->alloc) 633 return mas->alloc->request_count; 634 return 0; 635 } 636 637 /* 638 * ma_pivots() - Get a pointer to the maple node pivots. 639 * @node - the maple node 640 * @type - the node type 641 * 642 * In the event of a dead node, this array may be %NULL 643 * 644 * Return: A pointer to the maple node pivots 645 */ 646 static inline unsigned long *ma_pivots(struct maple_node *node, 647 enum maple_type type) 648 { 649 switch (type) { 650 case maple_arange_64: 651 return node->ma64.pivot; 652 case maple_range_64: 653 case maple_leaf_64: 654 return node->mr64.pivot; 655 case maple_dense: 656 return NULL; 657 } 658 return NULL; 659 } 660 661 /* 662 * ma_gaps() - Get a pointer to the maple node gaps. 663 * @node - the maple node 664 * @type - the node type 665 * 666 * Return: A pointer to the maple node gaps 667 */ 668 static inline unsigned long *ma_gaps(struct maple_node *node, 669 enum maple_type type) 670 { 671 switch (type) { 672 case maple_arange_64: 673 return node->ma64.gap; 674 case maple_range_64: 675 case maple_leaf_64: 676 case maple_dense: 677 return NULL; 678 } 679 return NULL; 680 } 681 682 /* 683 * mas_pivot() - Get the pivot at @piv of the maple encoded node. 684 * @mas: The maple state. 685 * @piv: The pivot. 686 * 687 * Return: the pivot at @piv of @mn. 688 */ 689 static inline unsigned long mas_pivot(struct ma_state *mas, unsigned char piv) 690 { 691 struct maple_node *node = mas_mn(mas); 692 enum maple_type type = mte_node_type(mas->node); 693 694 if (MAS_WARN_ON(mas, piv >= mt_pivots[type])) { 695 mas_set_err(mas, -EIO); 696 return 0; 697 } 698 699 switch (type) { 700 case maple_arange_64: 701 return node->ma64.pivot[piv]; 702 case maple_range_64: 703 case maple_leaf_64: 704 return node->mr64.pivot[piv]; 705 case maple_dense: 706 return 0; 707 } 708 return 0; 709 } 710 711 /* 712 * mas_safe_pivot() - get the pivot at @piv or mas->max. 713 * @mas: The maple state 714 * @pivots: The pointer to the maple node pivots 715 * @piv: The pivot to fetch 716 * @type: The maple node type 717 * 718 * Return: The pivot at @piv within the limit of the @pivots array, @mas->max 719 * otherwise. 720 */ 721 static inline unsigned long 722 mas_safe_pivot(const struct ma_state *mas, unsigned long *pivots, 723 unsigned char piv, enum maple_type type) 724 { 725 if (piv >= mt_pivots[type]) 726 return mas->max; 727 728 return pivots[piv]; 729 } 730 731 /* 732 * mas_safe_min() - Return the minimum for a given offset. 733 * @mas: The maple state 734 * @pivots: The pointer to the maple node pivots 735 * @offset: The offset into the pivot array 736 * 737 * Return: The minimum range value that is contained in @offset. 738 */ 739 static inline unsigned long 740 mas_safe_min(struct ma_state *mas, unsigned long *pivots, unsigned char offset) 741 { 742 if (likely(offset)) 743 return pivots[offset - 1] + 1; 744 745 return mas->min; 746 } 747 748 /* 749 * mte_set_pivot() - Set a pivot to a value in an encoded maple node. 750 * @mn: The encoded maple node 751 * @piv: The pivot offset 752 * @val: The value of the pivot 753 */ 754 static inline void mte_set_pivot(struct maple_enode *mn, unsigned char piv, 755 unsigned long val) 756 { 757 struct maple_node *node = mte_to_node(mn); 758 enum maple_type type = mte_node_type(mn); 759 760 BUG_ON(piv >= mt_pivots[type]); 761 switch (type) { 762 default: 763 case maple_range_64: 764 case maple_leaf_64: 765 node->mr64.pivot[piv] = val; 766 break; 767 case maple_arange_64: 768 node->ma64.pivot[piv] = val; 769 break; 770 case maple_dense: 771 break; 772 } 773 774 } 775 776 /* 777 * ma_slots() - Get a pointer to the maple node slots. 778 * @mn: The maple node 779 * @mt: The maple node type 780 * 781 * Return: A pointer to the maple node slots 782 */ 783 static inline void __rcu **ma_slots(struct maple_node *mn, enum maple_type mt) 784 { 785 switch (mt) { 786 default: 787 case maple_arange_64: 788 return mn->ma64.slot; 789 case maple_range_64: 790 case maple_leaf_64: 791 return mn->mr64.slot; 792 case maple_dense: 793 return mn->slot; 794 } 795 } 796 797 static inline bool mt_write_locked(const struct maple_tree *mt) 798 { 799 return mt_external_lock(mt) ? mt_write_lock_is_held(mt) : 800 lockdep_is_held(&mt->ma_lock); 801 } 802 803 static inline bool mt_locked(const struct maple_tree *mt) 804 { 805 return mt_external_lock(mt) ? mt_lock_is_held(mt) : 806 lockdep_is_held(&mt->ma_lock); 807 } 808 809 static inline void *mt_slot(const struct maple_tree *mt, 810 void __rcu **slots, unsigned char offset) 811 { 812 return rcu_dereference_check(slots[offset], mt_locked(mt)); 813 } 814 815 static inline void *mt_slot_locked(struct maple_tree *mt, void __rcu **slots, 816 unsigned char offset) 817 { 818 return rcu_dereference_protected(slots[offset], mt_write_locked(mt)); 819 } 820 /* 821 * mas_slot_locked() - Get the slot value when holding the maple tree lock. 822 * @mas: The maple state 823 * @slots: The pointer to the slots 824 * @offset: The offset into the slots array to fetch 825 * 826 * Return: The entry stored in @slots at the @offset. 827 */ 828 static inline void *mas_slot_locked(struct ma_state *mas, void __rcu **slots, 829 unsigned char offset) 830 { 831 return mt_slot_locked(mas->tree, slots, offset); 832 } 833 834 /* 835 * mas_slot() - Get the slot value when not holding the maple tree lock. 836 * @mas: The maple state 837 * @slots: The pointer to the slots 838 * @offset: The offset into the slots array to fetch 839 * 840 * Return: The entry stored in @slots at the @offset 841 */ 842 static inline void *mas_slot(struct ma_state *mas, void __rcu **slots, 843 unsigned char offset) 844 { 845 return mt_slot(mas->tree, slots, offset); 846 } 847 848 /* 849 * mas_root() - Get the maple tree root. 850 * @mas: The maple state. 851 * 852 * Return: The pointer to the root of the tree 853 */ 854 static inline void *mas_root(struct ma_state *mas) 855 { 856 return rcu_dereference_check(mas->tree->ma_root, mt_locked(mas->tree)); 857 } 858 859 static inline void *mt_root_locked(struct maple_tree *mt) 860 { 861 return rcu_dereference_protected(mt->ma_root, mt_write_locked(mt)); 862 } 863 864 /* 865 * mas_root_locked() - Get the maple tree root when holding the maple tree lock. 866 * @mas: The maple state. 867 * 868 * Return: The pointer to the root of the tree 869 */ 870 static inline void *mas_root_locked(struct ma_state *mas) 871 { 872 return mt_root_locked(mas->tree); 873 } 874 875 static inline struct maple_metadata *ma_meta(struct maple_node *mn, 876 enum maple_type mt) 877 { 878 switch (mt) { 879 case maple_arange_64: 880 return &mn->ma64.meta; 881 default: 882 return &mn->mr64.meta; 883 } 884 } 885 886 /* 887 * ma_set_meta() - Set the metadata information of a node. 888 * @mn: The maple node 889 * @mt: The maple node type 890 * @offset: The offset of the highest sub-gap in this node. 891 * @end: The end of the data in this node. 892 */ 893 static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt, 894 unsigned char offset, unsigned char end) 895 { 896 struct maple_metadata *meta = ma_meta(mn, mt); 897 898 meta->gap = offset; 899 meta->end = end; 900 } 901 902 /* 903 * mt_clear_meta() - clear the metadata information of a node, if it exists 904 * @mt: The maple tree 905 * @mn: The maple node 906 * @type: The maple node type 907 * @offset: The offset of the highest sub-gap in this node. 908 * @end: The end of the data in this node. 909 */ 910 static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn, 911 enum maple_type type) 912 { 913 struct maple_metadata *meta; 914 unsigned long *pivots; 915 void __rcu **slots; 916 void *next; 917 918 switch (type) { 919 case maple_range_64: 920 pivots = mn->mr64.pivot; 921 if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) { 922 slots = mn->mr64.slot; 923 next = mt_slot_locked(mt, slots, 924 MAPLE_RANGE64_SLOTS - 1); 925 if (unlikely((mte_to_node(next) && 926 mte_node_type(next)))) 927 return; /* no metadata, could be node */ 928 } 929 fallthrough; 930 case maple_arange_64: 931 meta = ma_meta(mn, type); 932 break; 933 default: 934 return; 935 } 936 937 meta->gap = 0; 938 meta->end = 0; 939 } 940 941 /* 942 * ma_meta_end() - Get the data end of a node from the metadata 943 * @mn: The maple node 944 * @mt: The maple node type 945 */ 946 static inline unsigned char ma_meta_end(struct maple_node *mn, 947 enum maple_type mt) 948 { 949 struct maple_metadata *meta = ma_meta(mn, mt); 950 951 return meta->end; 952 } 953 954 /* 955 * ma_meta_gap() - Get the largest gap location of a node from the metadata 956 * @mn: The maple node 957 * @mt: The maple node type 958 */ 959 static inline unsigned char ma_meta_gap(struct maple_node *mn, 960 enum maple_type mt) 961 { 962 return mn->ma64.meta.gap; 963 } 964 965 /* 966 * ma_set_meta_gap() - Set the largest gap location in a nodes metadata 967 * @mn: The maple node 968 * @mn: The maple node type 969 * @offset: The location of the largest gap. 970 */ 971 static inline void ma_set_meta_gap(struct maple_node *mn, enum maple_type mt, 972 unsigned char offset) 973 { 974 975 struct maple_metadata *meta = ma_meta(mn, mt); 976 977 meta->gap = offset; 978 } 979 980 /* 981 * mat_add() - Add a @dead_enode to the ma_topiary of a list of dead nodes. 982 * @mat - the ma_topiary, a linked list of dead nodes. 983 * @dead_enode - the node to be marked as dead and added to the tail of the list 984 * 985 * Add the @dead_enode to the linked list in @mat. 986 */ 987 static inline void mat_add(struct ma_topiary *mat, 988 struct maple_enode *dead_enode) 989 { 990 mte_set_node_dead(dead_enode); 991 mte_to_mat(dead_enode)->next = NULL; 992 if (!mat->tail) { 993 mat->tail = mat->head = dead_enode; 994 return; 995 } 996 997 mte_to_mat(mat->tail)->next = dead_enode; 998 mat->tail = dead_enode; 999 } 1000 1001 static void mt_free_walk(struct rcu_head *head); 1002 static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, 1003 bool free); 1004 /* 1005 * mas_mat_destroy() - Free all nodes and subtrees in a dead list. 1006 * @mas - the maple state 1007 * @mat - the ma_topiary linked list of dead nodes to free. 1008 * 1009 * Destroy walk a dead list. 1010 */ 1011 static void mas_mat_destroy(struct ma_state *mas, struct ma_topiary *mat) 1012 { 1013 struct maple_enode *next; 1014 struct maple_node *node; 1015 bool in_rcu = mt_in_rcu(mas->tree); 1016 1017 while (mat->head) { 1018 next = mte_to_mat(mat->head)->next; 1019 node = mte_to_node(mat->head); 1020 mt_destroy_walk(mat->head, mas->tree, !in_rcu); 1021 if (in_rcu) 1022 call_rcu(&node->rcu, mt_free_walk); 1023 mat->head = next; 1024 } 1025 } 1026 /* 1027 * mas_descend() - Descend into the slot stored in the ma_state. 1028 * @mas - the maple state. 1029 * 1030 * Note: Not RCU safe, only use in write side or debug code. 1031 */ 1032 static inline void mas_descend(struct ma_state *mas) 1033 { 1034 enum maple_type type; 1035 unsigned long *pivots; 1036 struct maple_node *node; 1037 void __rcu **slots; 1038 1039 node = mas_mn(mas); 1040 type = mte_node_type(mas->node); 1041 pivots = ma_pivots(node, type); 1042 slots = ma_slots(node, type); 1043 1044 if (mas->offset) 1045 mas->min = pivots[mas->offset - 1] + 1; 1046 mas->max = mas_safe_pivot(mas, pivots, mas->offset, type); 1047 mas->node = mas_slot(mas, slots, mas->offset); 1048 } 1049 1050 /* 1051 * mte_set_gap() - Set a maple node gap. 1052 * @mn: The encoded maple node 1053 * @gap: The offset of the gap to set 1054 * @val: The gap value 1055 */ 1056 static inline void mte_set_gap(const struct maple_enode *mn, 1057 unsigned char gap, unsigned long val) 1058 { 1059 switch (mte_node_type(mn)) { 1060 default: 1061 break; 1062 case maple_arange_64: 1063 mte_to_node(mn)->ma64.gap[gap] = val; 1064 break; 1065 } 1066 } 1067 1068 /* 1069 * mas_ascend() - Walk up a level of the tree. 1070 * @mas: The maple state 1071 * 1072 * Sets the @mas->max and @mas->min to the correct values when walking up. This 1073 * may cause several levels of walking up to find the correct min and max. 1074 * May find a dead node which will cause a premature return. 1075 * Return: 1 on dead node, 0 otherwise 1076 */ 1077 static int mas_ascend(struct ma_state *mas) 1078 { 1079 struct maple_enode *p_enode; /* parent enode. */ 1080 struct maple_enode *a_enode; /* ancestor enode. */ 1081 struct maple_node *a_node; /* ancestor node. */ 1082 struct maple_node *p_node; /* parent node. */ 1083 unsigned char a_slot; 1084 enum maple_type a_type; 1085 unsigned long min, max; 1086 unsigned long *pivots; 1087 bool set_max = false, set_min = false; 1088 1089 a_node = mas_mn(mas); 1090 if (ma_is_root(a_node)) { 1091 mas->offset = 0; 1092 return 0; 1093 } 1094 1095 p_node = mte_parent(mas->node); 1096 if (unlikely(a_node == p_node)) 1097 return 1; 1098 1099 a_type = mas_parent_type(mas, mas->node); 1100 mas->offset = mte_parent_slot(mas->node); 1101 a_enode = mt_mk_node(p_node, a_type); 1102 1103 /* Check to make sure all parent information is still accurate */ 1104 if (p_node != mte_parent(mas->node)) 1105 return 1; 1106 1107 mas->node = a_enode; 1108 1109 if (mte_is_root(a_enode)) { 1110 mas->max = ULONG_MAX; 1111 mas->min = 0; 1112 return 0; 1113 } 1114 1115 if (!mas->min) 1116 set_min = true; 1117 1118 if (mas->max == ULONG_MAX) 1119 set_max = true; 1120 1121 min = 0; 1122 max = ULONG_MAX; 1123 do { 1124 p_enode = a_enode; 1125 a_type = mas_parent_type(mas, p_enode); 1126 a_node = mte_parent(p_enode); 1127 a_slot = mte_parent_slot(p_enode); 1128 a_enode = mt_mk_node(a_node, a_type); 1129 pivots = ma_pivots(a_node, a_type); 1130 1131 if (unlikely(ma_dead_node(a_node))) 1132 return 1; 1133 1134 if (!set_min && a_slot) { 1135 set_min = true; 1136 min = pivots[a_slot - 1] + 1; 1137 } 1138 1139 if (!set_max && a_slot < mt_pivots[a_type]) { 1140 set_max = true; 1141 max = pivots[a_slot]; 1142 } 1143 1144 if (unlikely(ma_dead_node(a_node))) 1145 return 1; 1146 1147 if (unlikely(ma_is_root(a_node))) 1148 break; 1149 1150 } while (!set_min || !set_max); 1151 1152 mas->max = max; 1153 mas->min = min; 1154 return 0; 1155 } 1156 1157 /* 1158 * mas_pop_node() - Get a previously allocated maple node from the maple state. 1159 * @mas: The maple state 1160 * 1161 * Return: A pointer to a maple node. 1162 */ 1163 static inline struct maple_node *mas_pop_node(struct ma_state *mas) 1164 { 1165 struct maple_alloc *ret, *node = mas->alloc; 1166 unsigned long total = mas_allocated(mas); 1167 unsigned int req = mas_alloc_req(mas); 1168 1169 /* nothing or a request pending. */ 1170 if (WARN_ON(!total)) 1171 return NULL; 1172 1173 if (total == 1) { 1174 /* single allocation in this ma_state */ 1175 mas->alloc = NULL; 1176 ret = node; 1177 goto single_node; 1178 } 1179 1180 if (node->node_count == 1) { 1181 /* Single allocation in this node. */ 1182 mas->alloc = node->slot[0]; 1183 mas->alloc->total = node->total - 1; 1184 ret = node; 1185 goto new_head; 1186 } 1187 node->total--; 1188 ret = node->slot[--node->node_count]; 1189 node->slot[node->node_count] = NULL; 1190 1191 single_node: 1192 new_head: 1193 if (req) { 1194 req++; 1195 mas_set_alloc_req(mas, req); 1196 } 1197 1198 memset(ret, 0, sizeof(*ret)); 1199 return (struct maple_node *)ret; 1200 } 1201 1202 /* 1203 * mas_push_node() - Push a node back on the maple state allocation. 1204 * @mas: The maple state 1205 * @used: The used maple node 1206 * 1207 * Stores the maple node back into @mas->alloc for reuse. Updates allocated and 1208 * requested node count as necessary. 1209 */ 1210 static inline void mas_push_node(struct ma_state *mas, struct maple_node *used) 1211 { 1212 struct maple_alloc *reuse = (struct maple_alloc *)used; 1213 struct maple_alloc *head = mas->alloc; 1214 unsigned long count; 1215 unsigned int requested = mas_alloc_req(mas); 1216 1217 count = mas_allocated(mas); 1218 1219 reuse->request_count = 0; 1220 reuse->node_count = 0; 1221 if (count && (head->node_count < MAPLE_ALLOC_SLOTS)) { 1222 head->slot[head->node_count++] = reuse; 1223 head->total++; 1224 goto done; 1225 } 1226 1227 reuse->total = 1; 1228 if ((head) && !((unsigned long)head & 0x1)) { 1229 reuse->slot[0] = head; 1230 reuse->node_count = 1; 1231 reuse->total += head->total; 1232 } 1233 1234 mas->alloc = reuse; 1235 done: 1236 if (requested > 1) 1237 mas_set_alloc_req(mas, requested - 1); 1238 } 1239 1240 /* 1241 * mas_alloc_nodes() - Allocate nodes into a maple state 1242 * @mas: The maple state 1243 * @gfp: The GFP Flags 1244 */ 1245 static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp) 1246 { 1247 struct maple_alloc *node; 1248 unsigned long allocated = mas_allocated(mas); 1249 unsigned int requested = mas_alloc_req(mas); 1250 unsigned int count; 1251 void **slots = NULL; 1252 unsigned int max_req = 0; 1253 1254 if (!requested) 1255 return; 1256 1257 mas_set_alloc_req(mas, 0); 1258 if (mas->mas_flags & MA_STATE_PREALLOC) { 1259 if (allocated) 1260 return; 1261 WARN_ON(!allocated); 1262 } 1263 1264 if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS) { 1265 node = (struct maple_alloc *)mt_alloc_one(gfp); 1266 if (!node) 1267 goto nomem_one; 1268 1269 if (allocated) { 1270 node->slot[0] = mas->alloc; 1271 node->node_count = 1; 1272 } else { 1273 node->node_count = 0; 1274 } 1275 1276 mas->alloc = node; 1277 node->total = ++allocated; 1278 requested--; 1279 } 1280 1281 node = mas->alloc; 1282 node->request_count = 0; 1283 while (requested) { 1284 max_req = MAPLE_ALLOC_SLOTS - node->node_count; 1285 slots = (void **)&node->slot[node->node_count]; 1286 max_req = min(requested, max_req); 1287 count = mt_alloc_bulk(gfp, max_req, slots); 1288 if (!count) 1289 goto nomem_bulk; 1290 1291 if (node->node_count == 0) { 1292 node->slot[0]->node_count = 0; 1293 node->slot[0]->request_count = 0; 1294 } 1295 1296 node->node_count += count; 1297 allocated += count; 1298 node = node->slot[0]; 1299 requested -= count; 1300 } 1301 mas->alloc->total = allocated; 1302 return; 1303 1304 nomem_bulk: 1305 /* Clean up potential freed allocations on bulk failure */ 1306 memset(slots, 0, max_req * sizeof(unsigned long)); 1307 nomem_one: 1308 mas_set_alloc_req(mas, requested); 1309 if (mas->alloc && !(((unsigned long)mas->alloc & 0x1))) 1310 mas->alloc->total = allocated; 1311 mas_set_err(mas, -ENOMEM); 1312 } 1313 1314 /* 1315 * mas_free() - Free an encoded maple node 1316 * @mas: The maple state 1317 * @used: The encoded maple node to free. 1318 * 1319 * Uses rcu free if necessary, pushes @used back on the maple state allocations 1320 * otherwise. 1321 */ 1322 static inline void mas_free(struct ma_state *mas, struct maple_enode *used) 1323 { 1324 struct maple_node *tmp = mte_to_node(used); 1325 1326 if (mt_in_rcu(mas->tree)) 1327 ma_free_rcu(tmp); 1328 else 1329 mas_push_node(mas, tmp); 1330 } 1331 1332 /* 1333 * mas_node_count() - Check if enough nodes are allocated and request more if 1334 * there is not enough nodes. 1335 * @mas: The maple state 1336 * @count: The number of nodes needed 1337 * @gfp: the gfp flags 1338 */ 1339 static void mas_node_count_gfp(struct ma_state *mas, int count, gfp_t gfp) 1340 { 1341 unsigned long allocated = mas_allocated(mas); 1342 1343 if (allocated < count) { 1344 mas_set_alloc_req(mas, count - allocated); 1345 mas_alloc_nodes(mas, gfp); 1346 } 1347 } 1348 1349 /* 1350 * mas_node_count() - Check if enough nodes are allocated and request more if 1351 * there is not enough nodes. 1352 * @mas: The maple state 1353 * @count: The number of nodes needed 1354 * 1355 * Note: Uses GFP_NOWAIT | __GFP_NOWARN for gfp flags. 1356 */ 1357 static void mas_node_count(struct ma_state *mas, int count) 1358 { 1359 return mas_node_count_gfp(mas, count, GFP_NOWAIT | __GFP_NOWARN); 1360 } 1361 1362 /* 1363 * mas_start() - Sets up maple state for operations. 1364 * @mas: The maple state. 1365 * 1366 * If mas->node == MAS_START, then set the min, max and depth to 1367 * defaults. 1368 * 1369 * Return: 1370 * - If mas->node is an error or not MAS_START, return NULL. 1371 * - If it's an empty tree: NULL & mas->node == MAS_NONE 1372 * - If it's a single entry: The entry & mas->node == MAS_ROOT 1373 * - If it's a tree: NULL & mas->node == safe root node. 1374 */ 1375 static inline struct maple_enode *mas_start(struct ma_state *mas) 1376 { 1377 if (likely(mas_is_start(mas))) { 1378 struct maple_enode *root; 1379 1380 mas->min = 0; 1381 mas->max = ULONG_MAX; 1382 1383 retry: 1384 mas->depth = 0; 1385 root = mas_root(mas); 1386 /* Tree with nodes */ 1387 if (likely(xa_is_node(root))) { 1388 mas->depth = 1; 1389 mas->node = mte_safe_root(root); 1390 mas->offset = 0; 1391 if (mte_dead_node(mas->node)) 1392 goto retry; 1393 1394 return NULL; 1395 } 1396 1397 /* empty tree */ 1398 if (unlikely(!root)) { 1399 mas->node = MAS_NONE; 1400 mas->offset = MAPLE_NODE_SLOTS; 1401 return NULL; 1402 } 1403 1404 /* Single entry tree */ 1405 mas->node = MAS_ROOT; 1406 mas->offset = MAPLE_NODE_SLOTS; 1407 1408 /* Single entry tree. */ 1409 if (mas->index > 0) 1410 return NULL; 1411 1412 return root; 1413 } 1414 1415 return NULL; 1416 } 1417 1418 /* 1419 * ma_data_end() - Find the end of the data in a node. 1420 * @node: The maple node 1421 * @type: The maple node type 1422 * @pivots: The array of pivots in the node 1423 * @max: The maximum value in the node 1424 * 1425 * Uses metadata to find the end of the data when possible. 1426 * Return: The zero indexed last slot with data (may be null). 1427 */ 1428 static inline unsigned char ma_data_end(struct maple_node *node, 1429 enum maple_type type, 1430 unsigned long *pivots, 1431 unsigned long max) 1432 { 1433 unsigned char offset; 1434 1435 if (!pivots) 1436 return 0; 1437 1438 if (type == maple_arange_64) 1439 return ma_meta_end(node, type); 1440 1441 offset = mt_pivots[type] - 1; 1442 if (likely(!pivots[offset])) 1443 return ma_meta_end(node, type); 1444 1445 if (likely(pivots[offset] == max)) 1446 return offset; 1447 1448 return mt_pivots[type]; 1449 } 1450 1451 /* 1452 * mas_data_end() - Find the end of the data (slot). 1453 * @mas: the maple state 1454 * 1455 * This method is optimized to check the metadata of a node if the node type 1456 * supports data end metadata. 1457 * 1458 * Return: The zero indexed last slot with data (may be null). 1459 */ 1460 static inline unsigned char mas_data_end(struct ma_state *mas) 1461 { 1462 enum maple_type type; 1463 struct maple_node *node; 1464 unsigned char offset; 1465 unsigned long *pivots; 1466 1467 type = mte_node_type(mas->node); 1468 node = mas_mn(mas); 1469 if (type == maple_arange_64) 1470 return ma_meta_end(node, type); 1471 1472 pivots = ma_pivots(node, type); 1473 if (unlikely(ma_dead_node(node))) 1474 return 0; 1475 1476 offset = mt_pivots[type] - 1; 1477 if (likely(!pivots[offset])) 1478 return ma_meta_end(node, type); 1479 1480 if (likely(pivots[offset] == mas->max)) 1481 return offset; 1482 1483 return mt_pivots[type]; 1484 } 1485 1486 /* 1487 * mas_leaf_max_gap() - Returns the largest gap in a leaf node 1488 * @mas - the maple state 1489 * 1490 * Return: The maximum gap in the leaf. 1491 */ 1492 static unsigned long mas_leaf_max_gap(struct ma_state *mas) 1493 { 1494 enum maple_type mt; 1495 unsigned long pstart, gap, max_gap; 1496 struct maple_node *mn; 1497 unsigned long *pivots; 1498 void __rcu **slots; 1499 unsigned char i; 1500 unsigned char max_piv; 1501 1502 mt = mte_node_type(mas->node); 1503 mn = mas_mn(mas); 1504 slots = ma_slots(mn, mt); 1505 max_gap = 0; 1506 if (unlikely(ma_is_dense(mt))) { 1507 gap = 0; 1508 for (i = 0; i < mt_slots[mt]; i++) { 1509 if (slots[i]) { 1510 if (gap > max_gap) 1511 max_gap = gap; 1512 gap = 0; 1513 } else { 1514 gap++; 1515 } 1516 } 1517 if (gap > max_gap) 1518 max_gap = gap; 1519 return max_gap; 1520 } 1521 1522 /* 1523 * Check the first implied pivot optimizes the loop below and slot 1 may 1524 * be skipped if there is a gap in slot 0. 1525 */ 1526 pivots = ma_pivots(mn, mt); 1527 if (likely(!slots[0])) { 1528 max_gap = pivots[0] - mas->min + 1; 1529 i = 2; 1530 } else { 1531 i = 1; 1532 } 1533 1534 /* reduce max_piv as the special case is checked before the loop */ 1535 max_piv = ma_data_end(mn, mt, pivots, mas->max) - 1; 1536 /* 1537 * Check end implied pivot which can only be a gap on the right most 1538 * node. 1539 */ 1540 if (unlikely(mas->max == ULONG_MAX) && !slots[max_piv + 1]) { 1541 gap = ULONG_MAX - pivots[max_piv]; 1542 if (gap > max_gap) 1543 max_gap = gap; 1544 } 1545 1546 for (; i <= max_piv; i++) { 1547 /* data == no gap. */ 1548 if (likely(slots[i])) 1549 continue; 1550 1551 pstart = pivots[i - 1]; 1552 gap = pivots[i] - pstart; 1553 if (gap > max_gap) 1554 max_gap = gap; 1555 1556 /* There cannot be two gaps in a row. */ 1557 i++; 1558 } 1559 return max_gap; 1560 } 1561 1562 /* 1563 * ma_max_gap() - Get the maximum gap in a maple node (non-leaf) 1564 * @node: The maple node 1565 * @gaps: The pointer to the gaps 1566 * @mt: The maple node type 1567 * @*off: Pointer to store the offset location of the gap. 1568 * 1569 * Uses the metadata data end to scan backwards across set gaps. 1570 * 1571 * Return: The maximum gap value 1572 */ 1573 static inline unsigned long 1574 ma_max_gap(struct maple_node *node, unsigned long *gaps, enum maple_type mt, 1575 unsigned char *off) 1576 { 1577 unsigned char offset, i; 1578 unsigned long max_gap = 0; 1579 1580 i = offset = ma_meta_end(node, mt); 1581 do { 1582 if (gaps[i] > max_gap) { 1583 max_gap = gaps[i]; 1584 offset = i; 1585 } 1586 } while (i--); 1587 1588 *off = offset; 1589 return max_gap; 1590 } 1591 1592 /* 1593 * mas_max_gap() - find the largest gap in a non-leaf node and set the slot. 1594 * @mas: The maple state. 1595 * 1596 * Return: The gap value. 1597 */ 1598 static inline unsigned long mas_max_gap(struct ma_state *mas) 1599 { 1600 unsigned long *gaps; 1601 unsigned char offset; 1602 enum maple_type mt; 1603 struct maple_node *node; 1604 1605 mt = mte_node_type(mas->node); 1606 if (ma_is_leaf(mt)) 1607 return mas_leaf_max_gap(mas); 1608 1609 node = mas_mn(mas); 1610 MAS_BUG_ON(mas, mt != maple_arange_64); 1611 offset = ma_meta_gap(node, mt); 1612 gaps = ma_gaps(node, mt); 1613 return gaps[offset]; 1614 } 1615 1616 /* 1617 * mas_parent_gap() - Set the parent gap and any gaps above, as needed 1618 * @mas: The maple state 1619 * @offset: The gap offset in the parent to set 1620 * @new: The new gap value. 1621 * 1622 * Set the parent gap then continue to set the gap upwards, using the metadata 1623 * of the parent to see if it is necessary to check the node above. 1624 */ 1625 static inline void mas_parent_gap(struct ma_state *mas, unsigned char offset, 1626 unsigned long new) 1627 { 1628 unsigned long meta_gap = 0; 1629 struct maple_node *pnode; 1630 struct maple_enode *penode; 1631 unsigned long *pgaps; 1632 unsigned char meta_offset; 1633 enum maple_type pmt; 1634 1635 pnode = mte_parent(mas->node); 1636 pmt = mas_parent_type(mas, mas->node); 1637 penode = mt_mk_node(pnode, pmt); 1638 pgaps = ma_gaps(pnode, pmt); 1639 1640 ascend: 1641 MAS_BUG_ON(mas, pmt != maple_arange_64); 1642 meta_offset = ma_meta_gap(pnode, pmt); 1643 meta_gap = pgaps[meta_offset]; 1644 1645 pgaps[offset] = new; 1646 1647 if (meta_gap == new) 1648 return; 1649 1650 if (offset != meta_offset) { 1651 if (meta_gap > new) 1652 return; 1653 1654 ma_set_meta_gap(pnode, pmt, offset); 1655 } else if (new < meta_gap) { 1656 new = ma_max_gap(pnode, pgaps, pmt, &meta_offset); 1657 ma_set_meta_gap(pnode, pmt, meta_offset); 1658 } 1659 1660 if (ma_is_root(pnode)) 1661 return; 1662 1663 /* Go to the parent node. */ 1664 pnode = mte_parent(penode); 1665 pmt = mas_parent_type(mas, penode); 1666 pgaps = ma_gaps(pnode, pmt); 1667 offset = mte_parent_slot(penode); 1668 penode = mt_mk_node(pnode, pmt); 1669 goto ascend; 1670 } 1671 1672 /* 1673 * mas_update_gap() - Update a nodes gaps and propagate up if necessary. 1674 * @mas - the maple state. 1675 */ 1676 static inline void mas_update_gap(struct ma_state *mas) 1677 { 1678 unsigned char pslot; 1679 unsigned long p_gap; 1680 unsigned long max_gap; 1681 1682 if (!mt_is_alloc(mas->tree)) 1683 return; 1684 1685 if (mte_is_root(mas->node)) 1686 return; 1687 1688 max_gap = mas_max_gap(mas); 1689 1690 pslot = mte_parent_slot(mas->node); 1691 p_gap = ma_gaps(mte_parent(mas->node), 1692 mas_parent_type(mas, mas->node))[pslot]; 1693 1694 if (p_gap != max_gap) 1695 mas_parent_gap(mas, pslot, max_gap); 1696 } 1697 1698 /* 1699 * mas_adopt_children() - Set the parent pointer of all nodes in @parent to 1700 * @parent with the slot encoded. 1701 * @mas - the maple state (for the tree) 1702 * @parent - the maple encoded node containing the children. 1703 */ 1704 static inline void mas_adopt_children(struct ma_state *mas, 1705 struct maple_enode *parent) 1706 { 1707 enum maple_type type = mte_node_type(parent); 1708 struct maple_node *node = mte_to_node(parent); 1709 void __rcu **slots = ma_slots(node, type); 1710 unsigned long *pivots = ma_pivots(node, type); 1711 struct maple_enode *child; 1712 unsigned char offset; 1713 1714 offset = ma_data_end(node, type, pivots, mas->max); 1715 do { 1716 child = mas_slot_locked(mas, slots, offset); 1717 mas_set_parent(mas, child, parent, offset); 1718 } while (offset--); 1719 } 1720 1721 /* 1722 * mas_put_in_tree() - Put a new node in the tree, smp_wmb(), and mark the old 1723 * node as dead. 1724 * @mas - the maple state with the new node 1725 * @old_enode - The old maple encoded node to replace. 1726 */ 1727 static inline void mas_put_in_tree(struct ma_state *mas, 1728 struct maple_enode *old_enode) 1729 __must_hold(mas->tree->ma_lock) 1730 { 1731 unsigned char offset; 1732 void __rcu **slots; 1733 1734 if (mte_is_root(mas->node)) { 1735 mas_mn(mas)->parent = ma_parent_ptr(mas_tree_parent(mas)); 1736 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 1737 mas_set_height(mas); 1738 } else { 1739 1740 offset = mte_parent_slot(mas->node); 1741 slots = ma_slots(mte_parent(mas->node), 1742 mas_parent_type(mas, mas->node)); 1743 rcu_assign_pointer(slots[offset], mas->node); 1744 } 1745 1746 mte_set_node_dead(old_enode); 1747 } 1748 1749 /* 1750 * mas_replace_node() - Replace a node by putting it in the tree, marking it 1751 * dead, and freeing it. 1752 * the parent encoding to locate the maple node in the tree. 1753 * @mas - the ma_state with @mas->node pointing to the new node. 1754 * @old_enode - The old maple encoded node. 1755 */ 1756 static inline void mas_replace_node(struct ma_state *mas, 1757 struct maple_enode *old_enode) 1758 __must_hold(mas->tree->ma_lock) 1759 { 1760 mas_put_in_tree(mas, old_enode); 1761 mas_free(mas, old_enode); 1762 } 1763 1764 /* 1765 * mas_find_child() - Find a child who has the parent @mas->node. 1766 * @mas: the maple state with the parent. 1767 * @child: the maple state to store the child. 1768 */ 1769 static inline bool mas_find_child(struct ma_state *mas, struct ma_state *child) 1770 __must_hold(mas->tree->ma_lock) 1771 { 1772 enum maple_type mt; 1773 unsigned char offset; 1774 unsigned char end; 1775 unsigned long *pivots; 1776 struct maple_enode *entry; 1777 struct maple_node *node; 1778 void __rcu **slots; 1779 1780 mt = mte_node_type(mas->node); 1781 node = mas_mn(mas); 1782 slots = ma_slots(node, mt); 1783 pivots = ma_pivots(node, mt); 1784 end = ma_data_end(node, mt, pivots, mas->max); 1785 for (offset = mas->offset; offset <= end; offset++) { 1786 entry = mas_slot_locked(mas, slots, offset); 1787 if (mte_parent(entry) == node) { 1788 *child = *mas; 1789 mas->offset = offset + 1; 1790 child->offset = offset; 1791 mas_descend(child); 1792 child->offset = 0; 1793 return true; 1794 } 1795 } 1796 return false; 1797 } 1798 1799 /* 1800 * mab_shift_right() - Shift the data in mab right. Note, does not clean out the 1801 * old data or set b_node->b_end. 1802 * @b_node: the maple_big_node 1803 * @shift: the shift count 1804 */ 1805 static inline void mab_shift_right(struct maple_big_node *b_node, 1806 unsigned char shift) 1807 { 1808 unsigned long size = b_node->b_end * sizeof(unsigned long); 1809 1810 memmove(b_node->pivot + shift, b_node->pivot, size); 1811 memmove(b_node->slot + shift, b_node->slot, size); 1812 if (b_node->type == maple_arange_64) 1813 memmove(b_node->gap + shift, b_node->gap, size); 1814 } 1815 1816 /* 1817 * mab_middle_node() - Check if a middle node is needed (unlikely) 1818 * @b_node: the maple_big_node that contains the data. 1819 * @size: the amount of data in the b_node 1820 * @split: the potential split location 1821 * @slot_count: the size that can be stored in a single node being considered. 1822 * 1823 * Return: true if a middle node is required. 1824 */ 1825 static inline bool mab_middle_node(struct maple_big_node *b_node, int split, 1826 unsigned char slot_count) 1827 { 1828 unsigned char size = b_node->b_end; 1829 1830 if (size >= 2 * slot_count) 1831 return true; 1832 1833 if (!b_node->slot[split] && (size >= 2 * slot_count - 1)) 1834 return true; 1835 1836 return false; 1837 } 1838 1839 /* 1840 * mab_no_null_split() - ensure the split doesn't fall on a NULL 1841 * @b_node: the maple_big_node with the data 1842 * @split: the suggested split location 1843 * @slot_count: the number of slots in the node being considered. 1844 * 1845 * Return: the split location. 1846 */ 1847 static inline int mab_no_null_split(struct maple_big_node *b_node, 1848 unsigned char split, unsigned char slot_count) 1849 { 1850 if (!b_node->slot[split]) { 1851 /* 1852 * If the split is less than the max slot && the right side will 1853 * still be sufficient, then increment the split on NULL. 1854 */ 1855 if ((split < slot_count - 1) && 1856 (b_node->b_end - split) > (mt_min_slots[b_node->type])) 1857 split++; 1858 else 1859 split--; 1860 } 1861 return split; 1862 } 1863 1864 /* 1865 * mab_calc_split() - Calculate the split location and if there needs to be two 1866 * splits. 1867 * @bn: The maple_big_node with the data 1868 * @mid_split: The second split, if required. 0 otherwise. 1869 * 1870 * Return: The first split location. The middle split is set in @mid_split. 1871 */ 1872 static inline int mab_calc_split(struct ma_state *mas, 1873 struct maple_big_node *bn, unsigned char *mid_split, unsigned long min) 1874 { 1875 unsigned char b_end = bn->b_end; 1876 int split = b_end / 2; /* Assume equal split. */ 1877 unsigned char slot_min, slot_count = mt_slots[bn->type]; 1878 1879 /* 1880 * To support gap tracking, all NULL entries are kept together and a node cannot 1881 * end on a NULL entry, with the exception of the left-most leaf. The 1882 * limitation means that the split of a node must be checked for this condition 1883 * and be able to put more data in one direction or the other. 1884 */ 1885 if (unlikely((mas->mas_flags & MA_STATE_BULK))) { 1886 *mid_split = 0; 1887 split = b_end - mt_min_slots[bn->type]; 1888 1889 if (!ma_is_leaf(bn->type)) 1890 return split; 1891 1892 mas->mas_flags |= MA_STATE_REBALANCE; 1893 if (!bn->slot[split]) 1894 split--; 1895 return split; 1896 } 1897 1898 /* 1899 * Although extremely rare, it is possible to enter what is known as the 3-way 1900 * split scenario. The 3-way split comes about by means of a store of a range 1901 * that overwrites the end and beginning of two full nodes. The result is a set 1902 * of entries that cannot be stored in 2 nodes. Sometimes, these two nodes can 1903 * also be located in different parent nodes which are also full. This can 1904 * carry upwards all the way to the root in the worst case. 1905 */ 1906 if (unlikely(mab_middle_node(bn, split, slot_count))) { 1907 split = b_end / 3; 1908 *mid_split = split * 2; 1909 } else { 1910 slot_min = mt_min_slots[bn->type]; 1911 1912 *mid_split = 0; 1913 /* 1914 * Avoid having a range less than the slot count unless it 1915 * causes one node to be deficient. 1916 * NOTE: mt_min_slots is 1 based, b_end and split are zero. 1917 */ 1918 while ((split < slot_count - 1) && 1919 ((bn->pivot[split] - min) < slot_count - 1) && 1920 (b_end - split > slot_min)) 1921 split++; 1922 } 1923 1924 /* Avoid ending a node on a NULL entry */ 1925 split = mab_no_null_split(bn, split, slot_count); 1926 1927 if (unlikely(*mid_split)) 1928 *mid_split = mab_no_null_split(bn, *mid_split, slot_count); 1929 1930 return split; 1931 } 1932 1933 /* 1934 * mas_mab_cp() - Copy data from a maple state inclusively to a maple_big_node 1935 * and set @b_node->b_end to the next free slot. 1936 * @mas: The maple state 1937 * @mas_start: The starting slot to copy 1938 * @mas_end: The end slot to copy (inclusively) 1939 * @b_node: The maple_big_node to place the data 1940 * @mab_start: The starting location in maple_big_node to store the data. 1941 */ 1942 static inline void mas_mab_cp(struct ma_state *mas, unsigned char mas_start, 1943 unsigned char mas_end, struct maple_big_node *b_node, 1944 unsigned char mab_start) 1945 { 1946 enum maple_type mt; 1947 struct maple_node *node; 1948 void __rcu **slots; 1949 unsigned long *pivots, *gaps; 1950 int i = mas_start, j = mab_start; 1951 unsigned char piv_end; 1952 1953 node = mas_mn(mas); 1954 mt = mte_node_type(mas->node); 1955 pivots = ma_pivots(node, mt); 1956 if (!i) { 1957 b_node->pivot[j] = pivots[i++]; 1958 if (unlikely(i > mas_end)) 1959 goto complete; 1960 j++; 1961 } 1962 1963 piv_end = min(mas_end, mt_pivots[mt]); 1964 for (; i < piv_end; i++, j++) { 1965 b_node->pivot[j] = pivots[i]; 1966 if (unlikely(!b_node->pivot[j])) 1967 break; 1968 1969 if (unlikely(mas->max == b_node->pivot[j])) 1970 goto complete; 1971 } 1972 1973 if (likely(i <= mas_end)) 1974 b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt); 1975 1976 complete: 1977 b_node->b_end = ++j; 1978 j -= mab_start; 1979 slots = ma_slots(node, mt); 1980 memcpy(b_node->slot + mab_start, slots + mas_start, sizeof(void *) * j); 1981 if (!ma_is_leaf(mt) && mt_is_alloc(mas->tree)) { 1982 gaps = ma_gaps(node, mt); 1983 memcpy(b_node->gap + mab_start, gaps + mas_start, 1984 sizeof(unsigned long) * j); 1985 } 1986 } 1987 1988 /* 1989 * mas_leaf_set_meta() - Set the metadata of a leaf if possible. 1990 * @mas: The maple state 1991 * @node: The maple node 1992 * @pivots: pointer to the maple node pivots 1993 * @mt: The maple type 1994 * @end: The assumed end 1995 * 1996 * Note, end may be incremented within this function but not modified at the 1997 * source. This is fine since the metadata is the last thing to be stored in a 1998 * node during a write. 1999 */ 2000 static inline void mas_leaf_set_meta(struct ma_state *mas, 2001 struct maple_node *node, unsigned long *pivots, 2002 enum maple_type mt, unsigned char end) 2003 { 2004 /* There is no room for metadata already */ 2005 if (mt_pivots[mt] <= end) 2006 return; 2007 2008 if (pivots[end] && pivots[end] < mas->max) 2009 end++; 2010 2011 if (end < mt_slots[mt] - 1) 2012 ma_set_meta(node, mt, 0, end); 2013 } 2014 2015 /* 2016 * mab_mas_cp() - Copy data from maple_big_node to a maple encoded node. 2017 * @b_node: the maple_big_node that has the data 2018 * @mab_start: the start location in @b_node. 2019 * @mab_end: The end location in @b_node (inclusively) 2020 * @mas: The maple state with the maple encoded node. 2021 */ 2022 static inline void mab_mas_cp(struct maple_big_node *b_node, 2023 unsigned char mab_start, unsigned char mab_end, 2024 struct ma_state *mas, bool new_max) 2025 { 2026 int i, j = 0; 2027 enum maple_type mt = mte_node_type(mas->node); 2028 struct maple_node *node = mte_to_node(mas->node); 2029 void __rcu **slots = ma_slots(node, mt); 2030 unsigned long *pivots = ma_pivots(node, mt); 2031 unsigned long *gaps = NULL; 2032 unsigned char end; 2033 2034 if (mab_end - mab_start > mt_pivots[mt]) 2035 mab_end--; 2036 2037 if (!pivots[mt_pivots[mt] - 1]) 2038 slots[mt_pivots[mt]] = NULL; 2039 2040 i = mab_start; 2041 do { 2042 pivots[j++] = b_node->pivot[i++]; 2043 } while (i <= mab_end && likely(b_node->pivot[i])); 2044 2045 memcpy(slots, b_node->slot + mab_start, 2046 sizeof(void *) * (i - mab_start)); 2047 2048 if (new_max) 2049 mas->max = b_node->pivot[i - 1]; 2050 2051 end = j - 1; 2052 if (likely(!ma_is_leaf(mt) && mt_is_alloc(mas->tree))) { 2053 unsigned long max_gap = 0; 2054 unsigned char offset = 0; 2055 2056 gaps = ma_gaps(node, mt); 2057 do { 2058 gaps[--j] = b_node->gap[--i]; 2059 if (gaps[j] > max_gap) { 2060 offset = j; 2061 max_gap = gaps[j]; 2062 } 2063 } while (j); 2064 2065 ma_set_meta(node, mt, offset, end); 2066 } else { 2067 mas_leaf_set_meta(mas, node, pivots, mt, end); 2068 } 2069 } 2070 2071 /* 2072 * mas_bulk_rebalance() - Rebalance the end of a tree after a bulk insert. 2073 * @mas: The maple state 2074 * @end: The maple node end 2075 * @mt: The maple node type 2076 */ 2077 static inline void mas_bulk_rebalance(struct ma_state *mas, unsigned char end, 2078 enum maple_type mt) 2079 { 2080 if (!(mas->mas_flags & MA_STATE_BULK)) 2081 return; 2082 2083 if (mte_is_root(mas->node)) 2084 return; 2085 2086 if (end > mt_min_slots[mt]) { 2087 mas->mas_flags &= ~MA_STATE_REBALANCE; 2088 return; 2089 } 2090 } 2091 2092 /* 2093 * mas_store_b_node() - Store an @entry into the b_node while also copying the 2094 * data from a maple encoded node. 2095 * @wr_mas: the maple write state 2096 * @b_node: the maple_big_node to fill with data 2097 * @offset_end: the offset to end copying 2098 * 2099 * Return: The actual end of the data stored in @b_node 2100 */ 2101 static noinline_for_kasan void mas_store_b_node(struct ma_wr_state *wr_mas, 2102 struct maple_big_node *b_node, unsigned char offset_end) 2103 { 2104 unsigned char slot; 2105 unsigned char b_end; 2106 /* Possible underflow of piv will wrap back to 0 before use. */ 2107 unsigned long piv; 2108 struct ma_state *mas = wr_mas->mas; 2109 2110 b_node->type = wr_mas->type; 2111 b_end = 0; 2112 slot = mas->offset; 2113 if (slot) { 2114 /* Copy start data up to insert. */ 2115 mas_mab_cp(mas, 0, slot - 1, b_node, 0); 2116 b_end = b_node->b_end; 2117 piv = b_node->pivot[b_end - 1]; 2118 } else 2119 piv = mas->min - 1; 2120 2121 if (piv + 1 < mas->index) { 2122 /* Handle range starting after old range */ 2123 b_node->slot[b_end] = wr_mas->content; 2124 if (!wr_mas->content) 2125 b_node->gap[b_end] = mas->index - 1 - piv; 2126 b_node->pivot[b_end++] = mas->index - 1; 2127 } 2128 2129 /* Store the new entry. */ 2130 mas->offset = b_end; 2131 b_node->slot[b_end] = wr_mas->entry; 2132 b_node->pivot[b_end] = mas->last; 2133 2134 /* Appended. */ 2135 if (mas->last >= mas->max) 2136 goto b_end; 2137 2138 /* Handle new range ending before old range ends */ 2139 piv = mas_safe_pivot(mas, wr_mas->pivots, offset_end, wr_mas->type); 2140 if (piv > mas->last) { 2141 if (piv == ULONG_MAX) 2142 mas_bulk_rebalance(mas, b_node->b_end, wr_mas->type); 2143 2144 if (offset_end != slot) 2145 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 2146 offset_end); 2147 2148 b_node->slot[++b_end] = wr_mas->content; 2149 if (!wr_mas->content) 2150 b_node->gap[b_end] = piv - mas->last + 1; 2151 b_node->pivot[b_end] = piv; 2152 } 2153 2154 slot = offset_end + 1; 2155 if (slot > wr_mas->node_end) 2156 goto b_end; 2157 2158 /* Copy end data to the end of the node. */ 2159 mas_mab_cp(mas, slot, wr_mas->node_end + 1, b_node, ++b_end); 2160 b_node->b_end--; 2161 return; 2162 2163 b_end: 2164 b_node->b_end = b_end; 2165 } 2166 2167 /* 2168 * mas_prev_sibling() - Find the previous node with the same parent. 2169 * @mas: the maple state 2170 * 2171 * Return: True if there is a previous sibling, false otherwise. 2172 */ 2173 static inline bool mas_prev_sibling(struct ma_state *mas) 2174 { 2175 unsigned int p_slot = mte_parent_slot(mas->node); 2176 2177 if (mte_is_root(mas->node)) 2178 return false; 2179 2180 if (!p_slot) 2181 return false; 2182 2183 mas_ascend(mas); 2184 mas->offset = p_slot - 1; 2185 mas_descend(mas); 2186 return true; 2187 } 2188 2189 /* 2190 * mas_next_sibling() - Find the next node with the same parent. 2191 * @mas: the maple state 2192 * 2193 * Return: true if there is a next sibling, false otherwise. 2194 */ 2195 static inline bool mas_next_sibling(struct ma_state *mas) 2196 { 2197 MA_STATE(parent, mas->tree, mas->index, mas->last); 2198 2199 if (mte_is_root(mas->node)) 2200 return false; 2201 2202 parent = *mas; 2203 mas_ascend(&parent); 2204 parent.offset = mte_parent_slot(mas->node) + 1; 2205 if (parent.offset > mas_data_end(&parent)) 2206 return false; 2207 2208 *mas = parent; 2209 mas_descend(mas); 2210 return true; 2211 } 2212 2213 /* 2214 * mte_node_or_node() - Return the encoded node or MAS_NONE. 2215 * @enode: The encoded maple node. 2216 * 2217 * Shorthand to avoid setting %NULLs in the tree or maple_subtree_state. 2218 * 2219 * Return: @enode or MAS_NONE 2220 */ 2221 static inline struct maple_enode *mte_node_or_none(struct maple_enode *enode) 2222 { 2223 if (enode) 2224 return enode; 2225 2226 return ma_enode_ptr(MAS_NONE); 2227 } 2228 2229 /* 2230 * mas_wr_node_walk() - Find the correct offset for the index in the @mas. 2231 * @wr_mas: The maple write state 2232 * 2233 * Uses mas_slot_locked() and does not need to worry about dead nodes. 2234 */ 2235 static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas) 2236 { 2237 struct ma_state *mas = wr_mas->mas; 2238 unsigned char count, offset; 2239 2240 if (unlikely(ma_is_dense(wr_mas->type))) { 2241 wr_mas->r_max = wr_mas->r_min = mas->index; 2242 mas->offset = mas->index = mas->min; 2243 return; 2244 } 2245 2246 wr_mas->node = mas_mn(wr_mas->mas); 2247 wr_mas->pivots = ma_pivots(wr_mas->node, wr_mas->type); 2248 count = wr_mas->node_end = ma_data_end(wr_mas->node, wr_mas->type, 2249 wr_mas->pivots, mas->max); 2250 offset = mas->offset; 2251 2252 while (offset < count && mas->index > wr_mas->pivots[offset]) 2253 offset++; 2254 2255 wr_mas->r_max = offset < count ? wr_mas->pivots[offset] : mas->max; 2256 wr_mas->r_min = mas_safe_min(mas, wr_mas->pivots, offset); 2257 wr_mas->offset_end = mas->offset = offset; 2258 } 2259 2260 /* 2261 * mast_rebalance_next() - Rebalance against the next node 2262 * @mast: The maple subtree state 2263 * @old_r: The encoded maple node to the right (next node). 2264 */ 2265 static inline void mast_rebalance_next(struct maple_subtree_state *mast) 2266 { 2267 unsigned char b_end = mast->bn->b_end; 2268 2269 mas_mab_cp(mast->orig_r, 0, mt_slot_count(mast->orig_r->node), 2270 mast->bn, b_end); 2271 mast->orig_r->last = mast->orig_r->max; 2272 } 2273 2274 /* 2275 * mast_rebalance_prev() - Rebalance against the previous node 2276 * @mast: The maple subtree state 2277 * @old_l: The encoded maple node to the left (previous node) 2278 */ 2279 static inline void mast_rebalance_prev(struct maple_subtree_state *mast) 2280 { 2281 unsigned char end = mas_data_end(mast->orig_l) + 1; 2282 unsigned char b_end = mast->bn->b_end; 2283 2284 mab_shift_right(mast->bn, end); 2285 mas_mab_cp(mast->orig_l, 0, end - 1, mast->bn, 0); 2286 mast->l->min = mast->orig_l->min; 2287 mast->orig_l->index = mast->orig_l->min; 2288 mast->bn->b_end = end + b_end; 2289 mast->l->offset += end; 2290 } 2291 2292 /* 2293 * mast_spanning_rebalance() - Rebalance nodes with nearest neighbour favouring 2294 * the node to the right. Checking the nodes to the right then the left at each 2295 * level upwards until root is reached. 2296 * Data is copied into the @mast->bn. 2297 * @mast: The maple_subtree_state. 2298 */ 2299 static inline 2300 bool mast_spanning_rebalance(struct maple_subtree_state *mast) 2301 { 2302 struct ma_state r_tmp = *mast->orig_r; 2303 struct ma_state l_tmp = *mast->orig_l; 2304 unsigned char depth = 0; 2305 2306 r_tmp = *mast->orig_r; 2307 l_tmp = *mast->orig_l; 2308 do { 2309 mas_ascend(mast->orig_r); 2310 mas_ascend(mast->orig_l); 2311 depth++; 2312 if (mast->orig_r->offset < mas_data_end(mast->orig_r)) { 2313 mast->orig_r->offset++; 2314 do { 2315 mas_descend(mast->orig_r); 2316 mast->orig_r->offset = 0; 2317 } while (--depth); 2318 2319 mast_rebalance_next(mast); 2320 *mast->orig_l = l_tmp; 2321 return true; 2322 } else if (mast->orig_l->offset != 0) { 2323 mast->orig_l->offset--; 2324 do { 2325 mas_descend(mast->orig_l); 2326 mast->orig_l->offset = 2327 mas_data_end(mast->orig_l); 2328 } while (--depth); 2329 2330 mast_rebalance_prev(mast); 2331 *mast->orig_r = r_tmp; 2332 return true; 2333 } 2334 } while (!mte_is_root(mast->orig_r->node)); 2335 2336 *mast->orig_r = r_tmp; 2337 *mast->orig_l = l_tmp; 2338 return false; 2339 } 2340 2341 /* 2342 * mast_ascend() - Ascend the original left and right maple states. 2343 * @mast: the maple subtree state. 2344 * 2345 * Ascend the original left and right sides. Set the offsets to point to the 2346 * data already in the new tree (@mast->l and @mast->r). 2347 */ 2348 static inline void mast_ascend(struct maple_subtree_state *mast) 2349 { 2350 MA_WR_STATE(wr_mas, mast->orig_r, NULL); 2351 mas_ascend(mast->orig_l); 2352 mas_ascend(mast->orig_r); 2353 2354 mast->orig_r->offset = 0; 2355 mast->orig_r->index = mast->r->max; 2356 /* last should be larger than or equal to index */ 2357 if (mast->orig_r->last < mast->orig_r->index) 2358 mast->orig_r->last = mast->orig_r->index; 2359 2360 wr_mas.type = mte_node_type(mast->orig_r->node); 2361 mas_wr_node_walk(&wr_mas); 2362 /* Set up the left side of things */ 2363 mast->orig_l->offset = 0; 2364 mast->orig_l->index = mast->l->min; 2365 wr_mas.mas = mast->orig_l; 2366 wr_mas.type = mte_node_type(mast->orig_l->node); 2367 mas_wr_node_walk(&wr_mas); 2368 2369 mast->bn->type = wr_mas.type; 2370 } 2371 2372 /* 2373 * mas_new_ma_node() - Create and return a new maple node. Helper function. 2374 * @mas: the maple state with the allocations. 2375 * @b_node: the maple_big_node with the type encoding. 2376 * 2377 * Use the node type from the maple_big_node to allocate a new node from the 2378 * ma_state. This function exists mainly for code readability. 2379 * 2380 * Return: A new maple encoded node 2381 */ 2382 static inline struct maple_enode 2383 *mas_new_ma_node(struct ma_state *mas, struct maple_big_node *b_node) 2384 { 2385 return mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), b_node->type); 2386 } 2387 2388 /* 2389 * mas_mab_to_node() - Set up right and middle nodes 2390 * 2391 * @mas: the maple state that contains the allocations. 2392 * @b_node: the node which contains the data. 2393 * @left: The pointer which will have the left node 2394 * @right: The pointer which may have the right node 2395 * @middle: the pointer which may have the middle node (rare) 2396 * @mid_split: the split location for the middle node 2397 * 2398 * Return: the split of left. 2399 */ 2400 static inline unsigned char mas_mab_to_node(struct ma_state *mas, 2401 struct maple_big_node *b_node, struct maple_enode **left, 2402 struct maple_enode **right, struct maple_enode **middle, 2403 unsigned char *mid_split, unsigned long min) 2404 { 2405 unsigned char split = 0; 2406 unsigned char slot_count = mt_slots[b_node->type]; 2407 2408 *left = mas_new_ma_node(mas, b_node); 2409 *right = NULL; 2410 *middle = NULL; 2411 *mid_split = 0; 2412 2413 if (b_node->b_end < slot_count) { 2414 split = b_node->b_end; 2415 } else { 2416 split = mab_calc_split(mas, b_node, mid_split, min); 2417 *right = mas_new_ma_node(mas, b_node); 2418 } 2419 2420 if (*mid_split) 2421 *middle = mas_new_ma_node(mas, b_node); 2422 2423 return split; 2424 2425 } 2426 2427 /* 2428 * mab_set_b_end() - Add entry to b_node at b_node->b_end and increment the end 2429 * pointer. 2430 * @b_node - the big node to add the entry 2431 * @mas - the maple state to get the pivot (mas->max) 2432 * @entry - the entry to add, if NULL nothing happens. 2433 */ 2434 static inline void mab_set_b_end(struct maple_big_node *b_node, 2435 struct ma_state *mas, 2436 void *entry) 2437 { 2438 if (!entry) 2439 return; 2440 2441 b_node->slot[b_node->b_end] = entry; 2442 if (mt_is_alloc(mas->tree)) 2443 b_node->gap[b_node->b_end] = mas_max_gap(mas); 2444 b_node->pivot[b_node->b_end++] = mas->max; 2445 } 2446 2447 /* 2448 * mas_set_split_parent() - combine_then_separate helper function. Sets the parent 2449 * of @mas->node to either @left or @right, depending on @slot and @split 2450 * 2451 * @mas - the maple state with the node that needs a parent 2452 * @left - possible parent 1 2453 * @right - possible parent 2 2454 * @slot - the slot the mas->node was placed 2455 * @split - the split location between @left and @right 2456 */ 2457 static inline void mas_set_split_parent(struct ma_state *mas, 2458 struct maple_enode *left, 2459 struct maple_enode *right, 2460 unsigned char *slot, unsigned char split) 2461 { 2462 if (mas_is_none(mas)) 2463 return; 2464 2465 if ((*slot) <= split) 2466 mas_set_parent(mas, mas->node, left, *slot); 2467 else if (right) 2468 mas_set_parent(mas, mas->node, right, (*slot) - split - 1); 2469 2470 (*slot)++; 2471 } 2472 2473 /* 2474 * mte_mid_split_check() - Check if the next node passes the mid-split 2475 * @**l: Pointer to left encoded maple node. 2476 * @**m: Pointer to middle encoded maple node. 2477 * @**r: Pointer to right encoded maple node. 2478 * @slot: The offset 2479 * @*split: The split location. 2480 * @mid_split: The middle split. 2481 */ 2482 static inline void mte_mid_split_check(struct maple_enode **l, 2483 struct maple_enode **r, 2484 struct maple_enode *right, 2485 unsigned char slot, 2486 unsigned char *split, 2487 unsigned char mid_split) 2488 { 2489 if (*r == right) 2490 return; 2491 2492 if (slot < mid_split) 2493 return; 2494 2495 *l = *r; 2496 *r = right; 2497 *split = mid_split; 2498 } 2499 2500 /* 2501 * mast_set_split_parents() - Helper function to set three nodes parents. Slot 2502 * is taken from @mast->l. 2503 * @mast - the maple subtree state 2504 * @left - the left node 2505 * @right - the right node 2506 * @split - the split location. 2507 */ 2508 static inline void mast_set_split_parents(struct maple_subtree_state *mast, 2509 struct maple_enode *left, 2510 struct maple_enode *middle, 2511 struct maple_enode *right, 2512 unsigned char split, 2513 unsigned char mid_split) 2514 { 2515 unsigned char slot; 2516 struct maple_enode *l = left; 2517 struct maple_enode *r = right; 2518 2519 if (mas_is_none(mast->l)) 2520 return; 2521 2522 if (middle) 2523 r = middle; 2524 2525 slot = mast->l->offset; 2526 2527 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2528 mas_set_split_parent(mast->l, l, r, &slot, split); 2529 2530 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2531 mas_set_split_parent(mast->m, l, r, &slot, split); 2532 2533 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2534 mas_set_split_parent(mast->r, l, r, &slot, split); 2535 } 2536 2537 /* 2538 * mas_topiary_node() - Dispose of a singe node 2539 * @mas: The maple state for pushing nodes 2540 * @enode: The encoded maple node 2541 * @in_rcu: If the tree is in rcu mode 2542 * 2543 * The node will either be RCU freed or pushed back on the maple state. 2544 */ 2545 static inline void mas_topiary_node(struct ma_state *mas, 2546 struct maple_enode *enode, bool in_rcu) 2547 { 2548 struct maple_node *tmp; 2549 2550 if (enode == MAS_NONE) 2551 return; 2552 2553 tmp = mte_to_node(enode); 2554 mte_set_node_dead(enode); 2555 if (in_rcu) 2556 ma_free_rcu(tmp); 2557 else 2558 mas_push_node(mas, tmp); 2559 } 2560 2561 /* 2562 * mas_topiary_replace() - Replace the data with new data, then repair the 2563 * parent links within the new tree. Iterate over the dead sub-tree and collect 2564 * the dead subtrees and topiary the nodes that are no longer of use. 2565 * 2566 * The new tree will have up to three children with the correct parent. Keep 2567 * track of the new entries as they need to be followed to find the next level 2568 * of new entries. 2569 * 2570 * The old tree will have up to three children with the old parent. Keep track 2571 * of the old entries as they may have more nodes below replaced. Nodes within 2572 * [index, last] are dead subtrees, others need to be freed and followed. 2573 * 2574 * @mas: The maple state pointing at the new data 2575 * @old_enode: The maple encoded node being replaced 2576 * 2577 */ 2578 static inline void mas_topiary_replace(struct ma_state *mas, 2579 struct maple_enode *old_enode) 2580 { 2581 struct ma_state tmp[3], tmp_next[3]; 2582 MA_TOPIARY(subtrees, mas->tree); 2583 bool in_rcu; 2584 int i, n; 2585 2586 /* Place data in tree & then mark node as old */ 2587 mas_put_in_tree(mas, old_enode); 2588 2589 /* Update the parent pointers in the tree */ 2590 tmp[0] = *mas; 2591 tmp[0].offset = 0; 2592 tmp[1].node = MAS_NONE; 2593 tmp[2].node = MAS_NONE; 2594 while (!mte_is_leaf(tmp[0].node)) { 2595 n = 0; 2596 for (i = 0; i < 3; i++) { 2597 if (mas_is_none(&tmp[i])) 2598 continue; 2599 2600 while (n < 3) { 2601 if (!mas_find_child(&tmp[i], &tmp_next[n])) 2602 break; 2603 n++; 2604 } 2605 2606 mas_adopt_children(&tmp[i], tmp[i].node); 2607 } 2608 2609 if (MAS_WARN_ON(mas, n == 0)) 2610 break; 2611 2612 while (n < 3) 2613 tmp_next[n++].node = MAS_NONE; 2614 2615 for (i = 0; i < 3; i++) 2616 tmp[i] = tmp_next[i]; 2617 } 2618 2619 /* Collect the old nodes that need to be discarded */ 2620 if (mte_is_leaf(old_enode)) 2621 return mas_free(mas, old_enode); 2622 2623 tmp[0] = *mas; 2624 tmp[0].offset = 0; 2625 tmp[0].node = old_enode; 2626 tmp[1].node = MAS_NONE; 2627 tmp[2].node = MAS_NONE; 2628 in_rcu = mt_in_rcu(mas->tree); 2629 do { 2630 n = 0; 2631 for (i = 0; i < 3; i++) { 2632 if (mas_is_none(&tmp[i])) 2633 continue; 2634 2635 while (n < 3) { 2636 if (!mas_find_child(&tmp[i], &tmp_next[n])) 2637 break; 2638 2639 if ((tmp_next[n].min >= tmp_next->index) && 2640 (tmp_next[n].max <= tmp_next->last)) { 2641 mat_add(&subtrees, tmp_next[n].node); 2642 tmp_next[n].node = MAS_NONE; 2643 } else { 2644 n++; 2645 } 2646 } 2647 } 2648 2649 if (MAS_WARN_ON(mas, n == 0)) 2650 break; 2651 2652 while (n < 3) 2653 tmp_next[n++].node = MAS_NONE; 2654 2655 for (i = 0; i < 3; i++) { 2656 mas_topiary_node(mas, tmp[i].node, in_rcu); 2657 tmp[i] = tmp_next[i]; 2658 } 2659 } while (!mte_is_leaf(tmp[0].node)); 2660 2661 for (i = 0; i < 3; i++) 2662 mas_topiary_node(mas, tmp[i].node, in_rcu); 2663 2664 mas_mat_destroy(mas, &subtrees); 2665 } 2666 2667 /* 2668 * mas_wmb_replace() - Write memory barrier and replace 2669 * @mas: The maple state 2670 * @old: The old maple encoded node that is being replaced. 2671 * 2672 * Updates gap as necessary. 2673 */ 2674 static inline void mas_wmb_replace(struct ma_state *mas, 2675 struct maple_enode *old_enode) 2676 { 2677 /* Insert the new data in the tree */ 2678 mas_topiary_replace(mas, old_enode); 2679 2680 if (mte_is_leaf(mas->node)) 2681 return; 2682 2683 mas_update_gap(mas); 2684 } 2685 2686 /* 2687 * mast_cp_to_nodes() - Copy data out to nodes. 2688 * @mast: The maple subtree state 2689 * @left: The left encoded maple node 2690 * @middle: The middle encoded maple node 2691 * @right: The right encoded maple node 2692 * @split: The location to split between left and (middle ? middle : right) 2693 * @mid_split: The location to split between middle and right. 2694 */ 2695 static inline void mast_cp_to_nodes(struct maple_subtree_state *mast, 2696 struct maple_enode *left, struct maple_enode *middle, 2697 struct maple_enode *right, unsigned char split, unsigned char mid_split) 2698 { 2699 bool new_lmax = true; 2700 2701 mast->l->node = mte_node_or_none(left); 2702 mast->m->node = mte_node_or_none(middle); 2703 mast->r->node = mte_node_or_none(right); 2704 2705 mast->l->min = mast->orig_l->min; 2706 if (split == mast->bn->b_end) { 2707 mast->l->max = mast->orig_r->max; 2708 new_lmax = false; 2709 } 2710 2711 mab_mas_cp(mast->bn, 0, split, mast->l, new_lmax); 2712 2713 if (middle) { 2714 mab_mas_cp(mast->bn, 1 + split, mid_split, mast->m, true); 2715 mast->m->min = mast->bn->pivot[split] + 1; 2716 split = mid_split; 2717 } 2718 2719 mast->r->max = mast->orig_r->max; 2720 if (right) { 2721 mab_mas_cp(mast->bn, 1 + split, mast->bn->b_end, mast->r, false); 2722 mast->r->min = mast->bn->pivot[split] + 1; 2723 } 2724 } 2725 2726 /* 2727 * mast_combine_cp_left - Copy in the original left side of the tree into the 2728 * combined data set in the maple subtree state big node. 2729 * @mast: The maple subtree state 2730 */ 2731 static inline void mast_combine_cp_left(struct maple_subtree_state *mast) 2732 { 2733 unsigned char l_slot = mast->orig_l->offset; 2734 2735 if (!l_slot) 2736 return; 2737 2738 mas_mab_cp(mast->orig_l, 0, l_slot - 1, mast->bn, 0); 2739 } 2740 2741 /* 2742 * mast_combine_cp_right: Copy in the original right side of the tree into the 2743 * combined data set in the maple subtree state big node. 2744 * @mast: The maple subtree state 2745 */ 2746 static inline void mast_combine_cp_right(struct maple_subtree_state *mast) 2747 { 2748 if (mast->bn->pivot[mast->bn->b_end - 1] >= mast->orig_r->max) 2749 return; 2750 2751 mas_mab_cp(mast->orig_r, mast->orig_r->offset + 1, 2752 mt_slot_count(mast->orig_r->node), mast->bn, 2753 mast->bn->b_end); 2754 mast->orig_r->last = mast->orig_r->max; 2755 } 2756 2757 /* 2758 * mast_sufficient: Check if the maple subtree state has enough data in the big 2759 * node to create at least one sufficient node 2760 * @mast: the maple subtree state 2761 */ 2762 static inline bool mast_sufficient(struct maple_subtree_state *mast) 2763 { 2764 if (mast->bn->b_end > mt_min_slot_count(mast->orig_l->node)) 2765 return true; 2766 2767 return false; 2768 } 2769 2770 /* 2771 * mast_overflow: Check if there is too much data in the subtree state for a 2772 * single node. 2773 * @mast: The maple subtree state 2774 */ 2775 static inline bool mast_overflow(struct maple_subtree_state *mast) 2776 { 2777 if (mast->bn->b_end >= mt_slot_count(mast->orig_l->node)) 2778 return true; 2779 2780 return false; 2781 } 2782 2783 static inline void *mtree_range_walk(struct ma_state *mas) 2784 { 2785 unsigned long *pivots; 2786 unsigned char offset; 2787 struct maple_node *node; 2788 struct maple_enode *next, *last; 2789 enum maple_type type; 2790 void __rcu **slots; 2791 unsigned char end; 2792 unsigned long max, min; 2793 unsigned long prev_max, prev_min; 2794 2795 next = mas->node; 2796 min = mas->min; 2797 max = mas->max; 2798 do { 2799 offset = 0; 2800 last = next; 2801 node = mte_to_node(next); 2802 type = mte_node_type(next); 2803 pivots = ma_pivots(node, type); 2804 end = ma_data_end(node, type, pivots, max); 2805 if (unlikely(ma_dead_node(node))) 2806 goto dead_node; 2807 2808 if (pivots[offset] >= mas->index) { 2809 prev_max = max; 2810 prev_min = min; 2811 max = pivots[offset]; 2812 goto next; 2813 } 2814 2815 do { 2816 offset++; 2817 } while ((offset < end) && (pivots[offset] < mas->index)); 2818 2819 prev_min = min; 2820 min = pivots[offset - 1] + 1; 2821 prev_max = max; 2822 if (likely(offset < end && pivots[offset])) 2823 max = pivots[offset]; 2824 2825 next: 2826 slots = ma_slots(node, type); 2827 next = mt_slot(mas->tree, slots, offset); 2828 if (unlikely(ma_dead_node(node))) 2829 goto dead_node; 2830 } while (!ma_is_leaf(type)); 2831 2832 mas->offset = offset; 2833 mas->index = min; 2834 mas->last = max; 2835 mas->min = prev_min; 2836 mas->max = prev_max; 2837 mas->node = last; 2838 return (void *)next; 2839 2840 dead_node: 2841 mas_reset(mas); 2842 return NULL; 2843 } 2844 2845 /* 2846 * mas_spanning_rebalance() - Rebalance across two nodes which may not be peers. 2847 * @mas: The starting maple state 2848 * @mast: The maple_subtree_state, keeps track of 4 maple states. 2849 * @count: The estimated count of iterations needed. 2850 * 2851 * Follow the tree upwards from @l_mas and @r_mas for @count, or until the root 2852 * is hit. First @b_node is split into two entries which are inserted into the 2853 * next iteration of the loop. @b_node is returned populated with the final 2854 * iteration. @mas is used to obtain allocations. orig_l_mas keeps track of the 2855 * nodes that will remain active by using orig_l_mas->index and orig_l_mas->last 2856 * to account of what has been copied into the new sub-tree. The update of 2857 * orig_l_mas->last is used in mas_consume to find the slots that will need to 2858 * be either freed or destroyed. orig_l_mas->depth keeps track of the height of 2859 * the new sub-tree in case the sub-tree becomes the full tree. 2860 * 2861 * Return: the number of elements in b_node during the last loop. 2862 */ 2863 static int mas_spanning_rebalance(struct ma_state *mas, 2864 struct maple_subtree_state *mast, unsigned char count) 2865 { 2866 unsigned char split, mid_split; 2867 unsigned char slot = 0; 2868 struct maple_enode *left = NULL, *middle = NULL, *right = NULL; 2869 struct maple_enode *old_enode; 2870 2871 MA_STATE(l_mas, mas->tree, mas->index, mas->index); 2872 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 2873 MA_STATE(m_mas, mas->tree, mas->index, mas->index); 2874 2875 /* 2876 * The tree needs to be rebalanced and leaves need to be kept at the same level. 2877 * Rebalancing is done by use of the ``struct maple_topiary``. 2878 */ 2879 mast->l = &l_mas; 2880 mast->m = &m_mas; 2881 mast->r = &r_mas; 2882 l_mas.node = r_mas.node = m_mas.node = MAS_NONE; 2883 2884 /* Check if this is not root and has sufficient data. */ 2885 if (((mast->orig_l->min != 0) || (mast->orig_r->max != ULONG_MAX)) && 2886 unlikely(mast->bn->b_end <= mt_min_slots[mast->bn->type])) 2887 mast_spanning_rebalance(mast); 2888 2889 l_mas.depth = 0; 2890 2891 /* 2892 * Each level of the tree is examined and balanced, pushing data to the left or 2893 * right, or rebalancing against left or right nodes is employed to avoid 2894 * rippling up the tree to limit the amount of churn. Once a new sub-section of 2895 * the tree is created, there may be a mix of new and old nodes. The old nodes 2896 * will have the incorrect parent pointers and currently be in two trees: the 2897 * original tree and the partially new tree. To remedy the parent pointers in 2898 * the old tree, the new data is swapped into the active tree and a walk down 2899 * the tree is performed and the parent pointers are updated. 2900 * See mas_topiary_replace() for more information. 2901 */ 2902 while (count--) { 2903 mast->bn->b_end--; 2904 mast->bn->type = mte_node_type(mast->orig_l->node); 2905 split = mas_mab_to_node(mas, mast->bn, &left, &right, &middle, 2906 &mid_split, mast->orig_l->min); 2907 mast_set_split_parents(mast, left, middle, right, split, 2908 mid_split); 2909 mast_cp_to_nodes(mast, left, middle, right, split, mid_split); 2910 2911 /* 2912 * Copy data from next level in the tree to mast->bn from next 2913 * iteration 2914 */ 2915 memset(mast->bn, 0, sizeof(struct maple_big_node)); 2916 mast->bn->type = mte_node_type(left); 2917 l_mas.depth++; 2918 2919 /* Root already stored in l->node. */ 2920 if (mas_is_root_limits(mast->l)) 2921 goto new_root; 2922 2923 mast_ascend(mast); 2924 mast_combine_cp_left(mast); 2925 l_mas.offset = mast->bn->b_end; 2926 mab_set_b_end(mast->bn, &l_mas, left); 2927 mab_set_b_end(mast->bn, &m_mas, middle); 2928 mab_set_b_end(mast->bn, &r_mas, right); 2929 2930 /* Copy anything necessary out of the right node. */ 2931 mast_combine_cp_right(mast); 2932 mast->orig_l->last = mast->orig_l->max; 2933 2934 if (mast_sufficient(mast)) 2935 continue; 2936 2937 if (mast_overflow(mast)) 2938 continue; 2939 2940 /* May be a new root stored in mast->bn */ 2941 if (mas_is_root_limits(mast->orig_l)) 2942 break; 2943 2944 mast_spanning_rebalance(mast); 2945 2946 /* rebalancing from other nodes may require another loop. */ 2947 if (!count) 2948 count++; 2949 } 2950 2951 l_mas.node = mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), 2952 mte_node_type(mast->orig_l->node)); 2953 l_mas.depth++; 2954 mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, &l_mas, true); 2955 mas_set_parent(mas, left, l_mas.node, slot); 2956 if (middle) 2957 mas_set_parent(mas, middle, l_mas.node, ++slot); 2958 2959 if (right) 2960 mas_set_parent(mas, right, l_mas.node, ++slot); 2961 2962 if (mas_is_root_limits(mast->l)) { 2963 new_root: 2964 mas_mn(mast->l)->parent = ma_parent_ptr(mas_tree_parent(mas)); 2965 while (!mte_is_root(mast->orig_l->node)) 2966 mast_ascend(mast); 2967 } else { 2968 mas_mn(&l_mas)->parent = mas_mn(mast->orig_l)->parent; 2969 } 2970 2971 old_enode = mast->orig_l->node; 2972 mas->depth = l_mas.depth; 2973 mas->node = l_mas.node; 2974 mas->min = l_mas.min; 2975 mas->max = l_mas.max; 2976 mas->offset = l_mas.offset; 2977 mas_wmb_replace(mas, old_enode); 2978 mtree_range_walk(mas); 2979 return mast->bn->b_end; 2980 } 2981 2982 /* 2983 * mas_rebalance() - Rebalance a given node. 2984 * @mas: The maple state 2985 * @b_node: The big maple node. 2986 * 2987 * Rebalance two nodes into a single node or two new nodes that are sufficient. 2988 * Continue upwards until tree is sufficient. 2989 * 2990 * Return: the number of elements in b_node during the last loop. 2991 */ 2992 static inline int mas_rebalance(struct ma_state *mas, 2993 struct maple_big_node *b_node) 2994 { 2995 char empty_count = mas_mt_height(mas); 2996 struct maple_subtree_state mast; 2997 unsigned char shift, b_end = ++b_node->b_end; 2998 2999 MA_STATE(l_mas, mas->tree, mas->index, mas->last); 3000 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 3001 3002 trace_ma_op(__func__, mas); 3003 3004 /* 3005 * Rebalancing occurs if a node is insufficient. Data is rebalanced 3006 * against the node to the right if it exists, otherwise the node to the 3007 * left of this node is rebalanced against this node. If rebalancing 3008 * causes just one node to be produced instead of two, then the parent 3009 * is also examined and rebalanced if it is insufficient. Every level 3010 * tries to combine the data in the same way. If one node contains the 3011 * entire range of the tree, then that node is used as a new root node. 3012 */ 3013 mas_node_count(mas, empty_count * 2 - 1); 3014 if (mas_is_err(mas)) 3015 return 0; 3016 3017 mast.orig_l = &l_mas; 3018 mast.orig_r = &r_mas; 3019 mast.bn = b_node; 3020 mast.bn->type = mte_node_type(mas->node); 3021 3022 l_mas = r_mas = *mas; 3023 3024 if (mas_next_sibling(&r_mas)) { 3025 mas_mab_cp(&r_mas, 0, mt_slot_count(r_mas.node), b_node, b_end); 3026 r_mas.last = r_mas.index = r_mas.max; 3027 } else { 3028 mas_prev_sibling(&l_mas); 3029 shift = mas_data_end(&l_mas) + 1; 3030 mab_shift_right(b_node, shift); 3031 mas->offset += shift; 3032 mas_mab_cp(&l_mas, 0, shift - 1, b_node, 0); 3033 b_node->b_end = shift + b_end; 3034 l_mas.index = l_mas.last = l_mas.min; 3035 } 3036 3037 return mas_spanning_rebalance(mas, &mast, empty_count); 3038 } 3039 3040 /* 3041 * mas_destroy_rebalance() - Rebalance left-most node while destroying the maple 3042 * state. 3043 * @mas: The maple state 3044 * @end: The end of the left-most node. 3045 * 3046 * During a mass-insert event (such as forking), it may be necessary to 3047 * rebalance the left-most node when it is not sufficient. 3048 */ 3049 static inline void mas_destroy_rebalance(struct ma_state *mas, unsigned char end) 3050 { 3051 enum maple_type mt = mte_node_type(mas->node); 3052 struct maple_node reuse, *newnode, *parent, *new_left, *left, *node; 3053 struct maple_enode *eparent, *old_eparent; 3054 unsigned char offset, tmp, split = mt_slots[mt] / 2; 3055 void __rcu **l_slots, **slots; 3056 unsigned long *l_pivs, *pivs, gap; 3057 bool in_rcu = mt_in_rcu(mas->tree); 3058 3059 MA_STATE(l_mas, mas->tree, mas->index, mas->last); 3060 3061 l_mas = *mas; 3062 mas_prev_sibling(&l_mas); 3063 3064 /* set up node. */ 3065 if (in_rcu) { 3066 /* Allocate for both left and right as well as parent. */ 3067 mas_node_count(mas, 3); 3068 if (mas_is_err(mas)) 3069 return; 3070 3071 newnode = mas_pop_node(mas); 3072 } else { 3073 newnode = &reuse; 3074 } 3075 3076 node = mas_mn(mas); 3077 newnode->parent = node->parent; 3078 slots = ma_slots(newnode, mt); 3079 pivs = ma_pivots(newnode, mt); 3080 left = mas_mn(&l_mas); 3081 l_slots = ma_slots(left, mt); 3082 l_pivs = ma_pivots(left, mt); 3083 if (!l_slots[split]) 3084 split++; 3085 tmp = mas_data_end(&l_mas) - split; 3086 3087 memcpy(slots, l_slots + split + 1, sizeof(void *) * tmp); 3088 memcpy(pivs, l_pivs + split + 1, sizeof(unsigned long) * tmp); 3089 pivs[tmp] = l_mas.max; 3090 memcpy(slots + tmp, ma_slots(node, mt), sizeof(void *) * end); 3091 memcpy(pivs + tmp, ma_pivots(node, mt), sizeof(unsigned long) * end); 3092 3093 l_mas.max = l_pivs[split]; 3094 mas->min = l_mas.max + 1; 3095 old_eparent = mt_mk_node(mte_parent(l_mas.node), 3096 mas_parent_type(&l_mas, l_mas.node)); 3097 tmp += end; 3098 if (!in_rcu) { 3099 unsigned char max_p = mt_pivots[mt]; 3100 unsigned char max_s = mt_slots[mt]; 3101 3102 if (tmp < max_p) 3103 memset(pivs + tmp, 0, 3104 sizeof(unsigned long) * (max_p - tmp)); 3105 3106 if (tmp < mt_slots[mt]) 3107 memset(slots + tmp, 0, sizeof(void *) * (max_s - tmp)); 3108 3109 memcpy(node, newnode, sizeof(struct maple_node)); 3110 ma_set_meta(node, mt, 0, tmp - 1); 3111 mte_set_pivot(old_eparent, mte_parent_slot(l_mas.node), 3112 l_pivs[split]); 3113 3114 /* Remove data from l_pivs. */ 3115 tmp = split + 1; 3116 memset(l_pivs + tmp, 0, sizeof(unsigned long) * (max_p - tmp)); 3117 memset(l_slots + tmp, 0, sizeof(void *) * (max_s - tmp)); 3118 ma_set_meta(left, mt, 0, split); 3119 eparent = old_eparent; 3120 3121 goto done; 3122 } 3123 3124 /* RCU requires replacing both l_mas, mas, and parent. */ 3125 mas->node = mt_mk_node(newnode, mt); 3126 ma_set_meta(newnode, mt, 0, tmp); 3127 3128 new_left = mas_pop_node(mas); 3129 new_left->parent = left->parent; 3130 mt = mte_node_type(l_mas.node); 3131 slots = ma_slots(new_left, mt); 3132 pivs = ma_pivots(new_left, mt); 3133 memcpy(slots, l_slots, sizeof(void *) * split); 3134 memcpy(pivs, l_pivs, sizeof(unsigned long) * split); 3135 ma_set_meta(new_left, mt, 0, split); 3136 l_mas.node = mt_mk_node(new_left, mt); 3137 3138 /* replace parent. */ 3139 offset = mte_parent_slot(mas->node); 3140 mt = mas_parent_type(&l_mas, l_mas.node); 3141 parent = mas_pop_node(mas); 3142 slots = ma_slots(parent, mt); 3143 pivs = ma_pivots(parent, mt); 3144 memcpy(parent, mte_to_node(old_eparent), sizeof(struct maple_node)); 3145 rcu_assign_pointer(slots[offset], mas->node); 3146 rcu_assign_pointer(slots[offset - 1], l_mas.node); 3147 pivs[offset - 1] = l_mas.max; 3148 eparent = mt_mk_node(parent, mt); 3149 done: 3150 gap = mas_leaf_max_gap(mas); 3151 mte_set_gap(eparent, mte_parent_slot(mas->node), gap); 3152 gap = mas_leaf_max_gap(&l_mas); 3153 mte_set_gap(eparent, mte_parent_slot(l_mas.node), gap); 3154 mas_ascend(mas); 3155 3156 if (in_rcu) { 3157 mas_replace_node(mas, old_eparent); 3158 mas_adopt_children(mas, mas->node); 3159 } 3160 3161 mas_update_gap(mas); 3162 } 3163 3164 /* 3165 * mas_split_final_node() - Split the final node in a subtree operation. 3166 * @mast: the maple subtree state 3167 * @mas: The maple state 3168 * @height: The height of the tree in case it's a new root. 3169 */ 3170 static inline bool mas_split_final_node(struct maple_subtree_state *mast, 3171 struct ma_state *mas, int height) 3172 { 3173 struct maple_enode *ancestor; 3174 3175 if (mte_is_root(mas->node)) { 3176 if (mt_is_alloc(mas->tree)) 3177 mast->bn->type = maple_arange_64; 3178 else 3179 mast->bn->type = maple_range_64; 3180 mas->depth = height; 3181 } 3182 /* 3183 * Only a single node is used here, could be root. 3184 * The Big_node data should just fit in a single node. 3185 */ 3186 ancestor = mas_new_ma_node(mas, mast->bn); 3187 mas_set_parent(mas, mast->l->node, ancestor, mast->l->offset); 3188 mas_set_parent(mas, mast->r->node, ancestor, mast->r->offset); 3189 mte_to_node(ancestor)->parent = mas_mn(mas)->parent; 3190 3191 mast->l->node = ancestor; 3192 mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, mast->l, true); 3193 mas->offset = mast->bn->b_end - 1; 3194 return true; 3195 } 3196 3197 /* 3198 * mast_fill_bnode() - Copy data into the big node in the subtree state 3199 * @mast: The maple subtree state 3200 * @mas: the maple state 3201 * @skip: The number of entries to skip for new nodes insertion. 3202 */ 3203 static inline void mast_fill_bnode(struct maple_subtree_state *mast, 3204 struct ma_state *mas, 3205 unsigned char skip) 3206 { 3207 bool cp = true; 3208 unsigned char split; 3209 3210 memset(mast->bn->gap, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->gap)); 3211 memset(mast->bn->slot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->slot)); 3212 memset(mast->bn->pivot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->pivot)); 3213 mast->bn->b_end = 0; 3214 3215 if (mte_is_root(mas->node)) { 3216 cp = false; 3217 } else { 3218 mas_ascend(mas); 3219 mas->offset = mte_parent_slot(mas->node); 3220 } 3221 3222 if (cp && mast->l->offset) 3223 mas_mab_cp(mas, 0, mast->l->offset - 1, mast->bn, 0); 3224 3225 split = mast->bn->b_end; 3226 mab_set_b_end(mast->bn, mast->l, mast->l->node); 3227 mast->r->offset = mast->bn->b_end; 3228 mab_set_b_end(mast->bn, mast->r, mast->r->node); 3229 if (mast->bn->pivot[mast->bn->b_end - 1] == mas->max) 3230 cp = false; 3231 3232 if (cp) 3233 mas_mab_cp(mas, split + skip, mt_slot_count(mas->node) - 1, 3234 mast->bn, mast->bn->b_end); 3235 3236 mast->bn->b_end--; 3237 mast->bn->type = mte_node_type(mas->node); 3238 } 3239 3240 /* 3241 * mast_split_data() - Split the data in the subtree state big node into regular 3242 * nodes. 3243 * @mast: The maple subtree state 3244 * @mas: The maple state 3245 * @split: The location to split the big node 3246 */ 3247 static inline void mast_split_data(struct maple_subtree_state *mast, 3248 struct ma_state *mas, unsigned char split) 3249 { 3250 unsigned char p_slot; 3251 3252 mab_mas_cp(mast->bn, 0, split, mast->l, true); 3253 mte_set_pivot(mast->r->node, 0, mast->r->max); 3254 mab_mas_cp(mast->bn, split + 1, mast->bn->b_end, mast->r, false); 3255 mast->l->offset = mte_parent_slot(mas->node); 3256 mast->l->max = mast->bn->pivot[split]; 3257 mast->r->min = mast->l->max + 1; 3258 if (mte_is_leaf(mas->node)) 3259 return; 3260 3261 p_slot = mast->orig_l->offset; 3262 mas_set_split_parent(mast->orig_l, mast->l->node, mast->r->node, 3263 &p_slot, split); 3264 mas_set_split_parent(mast->orig_r, mast->l->node, mast->r->node, 3265 &p_slot, split); 3266 } 3267 3268 /* 3269 * mas_push_data() - Instead of splitting a node, it is beneficial to push the 3270 * data to the right or left node if there is room. 3271 * @mas: The maple state 3272 * @height: The current height of the maple state 3273 * @mast: The maple subtree state 3274 * @left: Push left or not. 3275 * 3276 * Keeping the height of the tree low means faster lookups. 3277 * 3278 * Return: True if pushed, false otherwise. 3279 */ 3280 static inline bool mas_push_data(struct ma_state *mas, int height, 3281 struct maple_subtree_state *mast, bool left) 3282 { 3283 unsigned char slot_total = mast->bn->b_end; 3284 unsigned char end, space, split; 3285 3286 MA_STATE(tmp_mas, mas->tree, mas->index, mas->last); 3287 tmp_mas = *mas; 3288 tmp_mas.depth = mast->l->depth; 3289 3290 if (left && !mas_prev_sibling(&tmp_mas)) 3291 return false; 3292 else if (!left && !mas_next_sibling(&tmp_mas)) 3293 return false; 3294 3295 end = mas_data_end(&tmp_mas); 3296 slot_total += end; 3297 space = 2 * mt_slot_count(mas->node) - 2; 3298 /* -2 instead of -1 to ensure there isn't a triple split */ 3299 if (ma_is_leaf(mast->bn->type)) 3300 space--; 3301 3302 if (mas->max == ULONG_MAX) 3303 space--; 3304 3305 if (slot_total >= space) 3306 return false; 3307 3308 /* Get the data; Fill mast->bn */ 3309 mast->bn->b_end++; 3310 if (left) { 3311 mab_shift_right(mast->bn, end + 1); 3312 mas_mab_cp(&tmp_mas, 0, end, mast->bn, 0); 3313 mast->bn->b_end = slot_total + 1; 3314 } else { 3315 mas_mab_cp(&tmp_mas, 0, end, mast->bn, mast->bn->b_end); 3316 } 3317 3318 /* Configure mast for splitting of mast->bn */ 3319 split = mt_slots[mast->bn->type] - 2; 3320 if (left) { 3321 /* Switch mas to prev node */ 3322 *mas = tmp_mas; 3323 /* Start using mast->l for the left side. */ 3324 tmp_mas.node = mast->l->node; 3325 *mast->l = tmp_mas; 3326 } else { 3327 tmp_mas.node = mast->r->node; 3328 *mast->r = tmp_mas; 3329 split = slot_total - split; 3330 } 3331 split = mab_no_null_split(mast->bn, split, mt_slots[mast->bn->type]); 3332 /* Update parent slot for split calculation. */ 3333 if (left) 3334 mast->orig_l->offset += end + 1; 3335 3336 mast_split_data(mast, mas, split); 3337 mast_fill_bnode(mast, mas, 2); 3338 mas_split_final_node(mast, mas, height + 1); 3339 return true; 3340 } 3341 3342 /* 3343 * mas_split() - Split data that is too big for one node into two. 3344 * @mas: The maple state 3345 * @b_node: The maple big node 3346 * Return: 1 on success, 0 on failure. 3347 */ 3348 static int mas_split(struct ma_state *mas, struct maple_big_node *b_node) 3349 { 3350 struct maple_subtree_state mast; 3351 int height = 0; 3352 unsigned char mid_split, split = 0; 3353 struct maple_enode *old; 3354 3355 /* 3356 * Splitting is handled differently from any other B-tree; the Maple 3357 * Tree splits upwards. Splitting up means that the split operation 3358 * occurs when the walk of the tree hits the leaves and not on the way 3359 * down. The reason for splitting up is that it is impossible to know 3360 * how much space will be needed until the leaf is (or leaves are) 3361 * reached. Since overwriting data is allowed and a range could 3362 * overwrite more than one range or result in changing one entry into 3 3363 * entries, it is impossible to know if a split is required until the 3364 * data is examined. 3365 * 3366 * Splitting is a balancing act between keeping allocations to a minimum 3367 * and avoiding a 'jitter' event where a tree is expanded to make room 3368 * for an entry followed by a contraction when the entry is removed. To 3369 * accomplish the balance, there are empty slots remaining in both left 3370 * and right nodes after a split. 3371 */ 3372 MA_STATE(l_mas, mas->tree, mas->index, mas->last); 3373 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 3374 MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last); 3375 MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last); 3376 3377 trace_ma_op(__func__, mas); 3378 mas->depth = mas_mt_height(mas); 3379 /* Allocation failures will happen early. */ 3380 mas_node_count(mas, 1 + mas->depth * 2); 3381 if (mas_is_err(mas)) 3382 return 0; 3383 3384 mast.l = &l_mas; 3385 mast.r = &r_mas; 3386 mast.orig_l = &prev_l_mas; 3387 mast.orig_r = &prev_r_mas; 3388 mast.bn = b_node; 3389 3390 while (height++ <= mas->depth) { 3391 if (mt_slots[b_node->type] > b_node->b_end) { 3392 mas_split_final_node(&mast, mas, height); 3393 break; 3394 } 3395 3396 l_mas = r_mas = *mas; 3397 l_mas.node = mas_new_ma_node(mas, b_node); 3398 r_mas.node = mas_new_ma_node(mas, b_node); 3399 /* 3400 * Another way that 'jitter' is avoided is to terminate a split up early if the 3401 * left or right node has space to spare. This is referred to as "pushing left" 3402 * or "pushing right" and is similar to the B* tree, except the nodes left or 3403 * right can rarely be reused due to RCU, but the ripple upwards is halted which 3404 * is a significant savings. 3405 */ 3406 /* Try to push left. */ 3407 if (mas_push_data(mas, height, &mast, true)) 3408 break; 3409 3410 /* Try to push right. */ 3411 if (mas_push_data(mas, height, &mast, false)) 3412 break; 3413 3414 split = mab_calc_split(mas, b_node, &mid_split, prev_l_mas.min); 3415 mast_split_data(&mast, mas, split); 3416 /* 3417 * Usually correct, mab_mas_cp in the above call overwrites 3418 * r->max. 3419 */ 3420 mast.r->max = mas->max; 3421 mast_fill_bnode(&mast, mas, 1); 3422 prev_l_mas = *mast.l; 3423 prev_r_mas = *mast.r; 3424 } 3425 3426 /* Set the original node as dead */ 3427 old = mas->node; 3428 mas->node = l_mas.node; 3429 mas_wmb_replace(mas, old); 3430 mtree_range_walk(mas); 3431 return 1; 3432 } 3433 3434 /* 3435 * mas_reuse_node() - Reuse the node to store the data. 3436 * @wr_mas: The maple write state 3437 * @bn: The maple big node 3438 * @end: The end of the data. 3439 * 3440 * Will always return false in RCU mode. 3441 * 3442 * Return: True if node was reused, false otherwise. 3443 */ 3444 static inline bool mas_reuse_node(struct ma_wr_state *wr_mas, 3445 struct maple_big_node *bn, unsigned char end) 3446 { 3447 /* Need to be rcu safe. */ 3448 if (mt_in_rcu(wr_mas->mas->tree)) 3449 return false; 3450 3451 if (end > bn->b_end) { 3452 int clear = mt_slots[wr_mas->type] - bn->b_end; 3453 3454 memset(wr_mas->slots + bn->b_end, 0, sizeof(void *) * clear--); 3455 memset(wr_mas->pivots + bn->b_end, 0, sizeof(void *) * clear); 3456 } 3457 mab_mas_cp(bn, 0, bn->b_end, wr_mas->mas, false); 3458 return true; 3459 } 3460 3461 /* 3462 * mas_commit_b_node() - Commit the big node into the tree. 3463 * @wr_mas: The maple write state 3464 * @b_node: The maple big node 3465 * @end: The end of the data. 3466 */ 3467 static noinline_for_kasan int mas_commit_b_node(struct ma_wr_state *wr_mas, 3468 struct maple_big_node *b_node, unsigned char end) 3469 { 3470 struct maple_node *node; 3471 struct maple_enode *old_enode; 3472 unsigned char b_end = b_node->b_end; 3473 enum maple_type b_type = b_node->type; 3474 3475 old_enode = wr_mas->mas->node; 3476 if ((b_end < mt_min_slots[b_type]) && 3477 (!mte_is_root(old_enode)) && 3478 (mas_mt_height(wr_mas->mas) > 1)) 3479 return mas_rebalance(wr_mas->mas, b_node); 3480 3481 if (b_end >= mt_slots[b_type]) 3482 return mas_split(wr_mas->mas, b_node); 3483 3484 if (mas_reuse_node(wr_mas, b_node, end)) 3485 goto reuse_node; 3486 3487 mas_node_count(wr_mas->mas, 1); 3488 if (mas_is_err(wr_mas->mas)) 3489 return 0; 3490 3491 node = mas_pop_node(wr_mas->mas); 3492 node->parent = mas_mn(wr_mas->mas)->parent; 3493 wr_mas->mas->node = mt_mk_node(node, b_type); 3494 mab_mas_cp(b_node, 0, b_end, wr_mas->mas, false); 3495 mas_replace_node(wr_mas->mas, old_enode); 3496 reuse_node: 3497 mas_update_gap(wr_mas->mas); 3498 return 1; 3499 } 3500 3501 /* 3502 * mas_root_expand() - Expand a root to a node 3503 * @mas: The maple state 3504 * @entry: The entry to store into the tree 3505 */ 3506 static inline int mas_root_expand(struct ma_state *mas, void *entry) 3507 { 3508 void *contents = mas_root_locked(mas); 3509 enum maple_type type = maple_leaf_64; 3510 struct maple_node *node; 3511 void __rcu **slots; 3512 unsigned long *pivots; 3513 int slot = 0; 3514 3515 mas_node_count(mas, 1); 3516 if (unlikely(mas_is_err(mas))) 3517 return 0; 3518 3519 node = mas_pop_node(mas); 3520 pivots = ma_pivots(node, type); 3521 slots = ma_slots(node, type); 3522 node->parent = ma_parent_ptr(mas_tree_parent(mas)); 3523 mas->node = mt_mk_node(node, type); 3524 3525 if (mas->index) { 3526 if (contents) { 3527 rcu_assign_pointer(slots[slot], contents); 3528 if (likely(mas->index > 1)) 3529 slot++; 3530 } 3531 pivots[slot++] = mas->index - 1; 3532 } 3533 3534 rcu_assign_pointer(slots[slot], entry); 3535 mas->offset = slot; 3536 pivots[slot] = mas->last; 3537 if (mas->last != ULONG_MAX) 3538 pivots[++slot] = ULONG_MAX; 3539 3540 mas->depth = 1; 3541 mas_set_height(mas); 3542 ma_set_meta(node, maple_leaf_64, 0, slot); 3543 /* swap the new root into the tree */ 3544 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 3545 return slot; 3546 } 3547 3548 static inline void mas_store_root(struct ma_state *mas, void *entry) 3549 { 3550 if (likely((mas->last != 0) || (mas->index != 0))) 3551 mas_root_expand(mas, entry); 3552 else if (((unsigned long) (entry) & 3) == 2) 3553 mas_root_expand(mas, entry); 3554 else { 3555 rcu_assign_pointer(mas->tree->ma_root, entry); 3556 mas->node = MAS_START; 3557 } 3558 } 3559 3560 /* 3561 * mas_is_span_wr() - Check if the write needs to be treated as a write that 3562 * spans the node. 3563 * @mas: The maple state 3564 * @piv: The pivot value being written 3565 * @type: The maple node type 3566 * @entry: The data to write 3567 * 3568 * Spanning writes are writes that start in one node and end in another OR if 3569 * the write of a %NULL will cause the node to end with a %NULL. 3570 * 3571 * Return: True if this is a spanning write, false otherwise. 3572 */ 3573 static bool mas_is_span_wr(struct ma_wr_state *wr_mas) 3574 { 3575 unsigned long max = wr_mas->r_max; 3576 unsigned long last = wr_mas->mas->last; 3577 enum maple_type type = wr_mas->type; 3578 void *entry = wr_mas->entry; 3579 3580 /* Contained in this pivot, fast path */ 3581 if (last < max) 3582 return false; 3583 3584 if (ma_is_leaf(type)) { 3585 max = wr_mas->mas->max; 3586 if (last < max) 3587 return false; 3588 } 3589 3590 if (last == max) { 3591 /* 3592 * The last entry of leaf node cannot be NULL unless it is the 3593 * rightmost node (writing ULONG_MAX), otherwise it spans slots. 3594 */ 3595 if (entry || last == ULONG_MAX) 3596 return false; 3597 } 3598 3599 trace_ma_write(__func__, wr_mas->mas, wr_mas->r_max, entry); 3600 return true; 3601 } 3602 3603 static inline void mas_wr_walk_descend(struct ma_wr_state *wr_mas) 3604 { 3605 wr_mas->type = mte_node_type(wr_mas->mas->node); 3606 mas_wr_node_walk(wr_mas); 3607 wr_mas->slots = ma_slots(wr_mas->node, wr_mas->type); 3608 } 3609 3610 static inline void mas_wr_walk_traverse(struct ma_wr_state *wr_mas) 3611 { 3612 wr_mas->mas->max = wr_mas->r_max; 3613 wr_mas->mas->min = wr_mas->r_min; 3614 wr_mas->mas->node = wr_mas->content; 3615 wr_mas->mas->offset = 0; 3616 wr_mas->mas->depth++; 3617 } 3618 /* 3619 * mas_wr_walk() - Walk the tree for a write. 3620 * @wr_mas: The maple write state 3621 * 3622 * Uses mas_slot_locked() and does not need to worry about dead nodes. 3623 * 3624 * Return: True if it's contained in a node, false on spanning write. 3625 */ 3626 static bool mas_wr_walk(struct ma_wr_state *wr_mas) 3627 { 3628 struct ma_state *mas = wr_mas->mas; 3629 3630 while (true) { 3631 mas_wr_walk_descend(wr_mas); 3632 if (unlikely(mas_is_span_wr(wr_mas))) 3633 return false; 3634 3635 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 3636 mas->offset); 3637 if (ma_is_leaf(wr_mas->type)) 3638 return true; 3639 3640 mas_wr_walk_traverse(wr_mas); 3641 } 3642 3643 return true; 3644 } 3645 3646 static bool mas_wr_walk_index(struct ma_wr_state *wr_mas) 3647 { 3648 struct ma_state *mas = wr_mas->mas; 3649 3650 while (true) { 3651 mas_wr_walk_descend(wr_mas); 3652 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 3653 mas->offset); 3654 if (ma_is_leaf(wr_mas->type)) 3655 return true; 3656 mas_wr_walk_traverse(wr_mas); 3657 3658 } 3659 return true; 3660 } 3661 /* 3662 * mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs. 3663 * @l_wr_mas: The left maple write state 3664 * @r_wr_mas: The right maple write state 3665 */ 3666 static inline void mas_extend_spanning_null(struct ma_wr_state *l_wr_mas, 3667 struct ma_wr_state *r_wr_mas) 3668 { 3669 struct ma_state *r_mas = r_wr_mas->mas; 3670 struct ma_state *l_mas = l_wr_mas->mas; 3671 unsigned char l_slot; 3672 3673 l_slot = l_mas->offset; 3674 if (!l_wr_mas->content) 3675 l_mas->index = l_wr_mas->r_min; 3676 3677 if ((l_mas->index == l_wr_mas->r_min) && 3678 (l_slot && 3679 !mas_slot_locked(l_mas, l_wr_mas->slots, l_slot - 1))) { 3680 if (l_slot > 1) 3681 l_mas->index = l_wr_mas->pivots[l_slot - 2] + 1; 3682 else 3683 l_mas->index = l_mas->min; 3684 3685 l_mas->offset = l_slot - 1; 3686 } 3687 3688 if (!r_wr_mas->content) { 3689 if (r_mas->last < r_wr_mas->r_max) 3690 r_mas->last = r_wr_mas->r_max; 3691 r_mas->offset++; 3692 } else if ((r_mas->last == r_wr_mas->r_max) && 3693 (r_mas->last < r_mas->max) && 3694 !mas_slot_locked(r_mas, r_wr_mas->slots, r_mas->offset + 1)) { 3695 r_mas->last = mas_safe_pivot(r_mas, r_wr_mas->pivots, 3696 r_wr_mas->type, r_mas->offset + 1); 3697 r_mas->offset++; 3698 } 3699 } 3700 3701 static inline void *mas_state_walk(struct ma_state *mas) 3702 { 3703 void *entry; 3704 3705 entry = mas_start(mas); 3706 if (mas_is_none(mas)) 3707 return NULL; 3708 3709 if (mas_is_ptr(mas)) 3710 return entry; 3711 3712 return mtree_range_walk(mas); 3713 } 3714 3715 /* 3716 * mtree_lookup_walk() - Internal quick lookup that does not keep maple state up 3717 * to date. 3718 * 3719 * @mas: The maple state. 3720 * 3721 * Note: Leaves mas in undesirable state. 3722 * Return: The entry for @mas->index or %NULL on dead node. 3723 */ 3724 static inline void *mtree_lookup_walk(struct ma_state *mas) 3725 { 3726 unsigned long *pivots; 3727 unsigned char offset; 3728 struct maple_node *node; 3729 struct maple_enode *next; 3730 enum maple_type type; 3731 void __rcu **slots; 3732 unsigned char end; 3733 unsigned long max; 3734 3735 next = mas->node; 3736 max = ULONG_MAX; 3737 do { 3738 offset = 0; 3739 node = mte_to_node(next); 3740 type = mte_node_type(next); 3741 pivots = ma_pivots(node, type); 3742 end = ma_data_end(node, type, pivots, max); 3743 if (unlikely(ma_dead_node(node))) 3744 goto dead_node; 3745 do { 3746 if (pivots[offset] >= mas->index) { 3747 max = pivots[offset]; 3748 break; 3749 } 3750 } while (++offset < end); 3751 3752 slots = ma_slots(node, type); 3753 next = mt_slot(mas->tree, slots, offset); 3754 if (unlikely(ma_dead_node(node))) 3755 goto dead_node; 3756 } while (!ma_is_leaf(type)); 3757 3758 return (void *)next; 3759 3760 dead_node: 3761 mas_reset(mas); 3762 return NULL; 3763 } 3764 3765 static void mte_destroy_walk(struct maple_enode *, struct maple_tree *); 3766 /* 3767 * mas_new_root() - Create a new root node that only contains the entry passed 3768 * in. 3769 * @mas: The maple state 3770 * @entry: The entry to store. 3771 * 3772 * Only valid when the index == 0 and the last == ULONG_MAX 3773 * 3774 * Return 0 on error, 1 on success. 3775 */ 3776 static inline int mas_new_root(struct ma_state *mas, void *entry) 3777 { 3778 struct maple_enode *root = mas_root_locked(mas); 3779 enum maple_type type = maple_leaf_64; 3780 struct maple_node *node; 3781 void __rcu **slots; 3782 unsigned long *pivots; 3783 3784 if (!entry && !mas->index && mas->last == ULONG_MAX) { 3785 mas->depth = 0; 3786 mas_set_height(mas); 3787 rcu_assign_pointer(mas->tree->ma_root, entry); 3788 mas->node = MAS_START; 3789 goto done; 3790 } 3791 3792 mas_node_count(mas, 1); 3793 if (mas_is_err(mas)) 3794 return 0; 3795 3796 node = mas_pop_node(mas); 3797 pivots = ma_pivots(node, type); 3798 slots = ma_slots(node, type); 3799 node->parent = ma_parent_ptr(mas_tree_parent(mas)); 3800 mas->node = mt_mk_node(node, type); 3801 rcu_assign_pointer(slots[0], entry); 3802 pivots[0] = mas->last; 3803 mas->depth = 1; 3804 mas_set_height(mas); 3805 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 3806 3807 done: 3808 if (xa_is_node(root)) 3809 mte_destroy_walk(root, mas->tree); 3810 3811 return 1; 3812 } 3813 /* 3814 * mas_wr_spanning_store() - Create a subtree with the store operation completed 3815 * and new nodes where necessary, then place the sub-tree in the actual tree. 3816 * Note that mas is expected to point to the node which caused the store to 3817 * span. 3818 * @wr_mas: The maple write state 3819 * 3820 * Return: 0 on error, positive on success. 3821 */ 3822 static inline int mas_wr_spanning_store(struct ma_wr_state *wr_mas) 3823 { 3824 struct maple_subtree_state mast; 3825 struct maple_big_node b_node; 3826 struct ma_state *mas; 3827 unsigned char height; 3828 3829 /* Left and Right side of spanning store */ 3830 MA_STATE(l_mas, NULL, 0, 0); 3831 MA_STATE(r_mas, NULL, 0, 0); 3832 MA_WR_STATE(r_wr_mas, &r_mas, wr_mas->entry); 3833 MA_WR_STATE(l_wr_mas, &l_mas, wr_mas->entry); 3834 3835 /* 3836 * A store operation that spans multiple nodes is called a spanning 3837 * store and is handled early in the store call stack by the function 3838 * mas_is_span_wr(). When a spanning store is identified, the maple 3839 * state is duplicated. The first maple state walks the left tree path 3840 * to ``index``, the duplicate walks the right tree path to ``last``. 3841 * The data in the two nodes are combined into a single node, two nodes, 3842 * or possibly three nodes (see the 3-way split above). A ``NULL`` 3843 * written to the last entry of a node is considered a spanning store as 3844 * a rebalance is required for the operation to complete and an overflow 3845 * of data may happen. 3846 */ 3847 mas = wr_mas->mas; 3848 trace_ma_op(__func__, mas); 3849 3850 if (unlikely(!mas->index && mas->last == ULONG_MAX)) 3851 return mas_new_root(mas, wr_mas->entry); 3852 /* 3853 * Node rebalancing may occur due to this store, so there may be three new 3854 * entries per level plus a new root. 3855 */ 3856 height = mas_mt_height(mas); 3857 mas_node_count(mas, 1 + height * 3); 3858 if (mas_is_err(mas)) 3859 return 0; 3860 3861 /* 3862 * Set up right side. Need to get to the next offset after the spanning 3863 * store to ensure it's not NULL and to combine both the next node and 3864 * the node with the start together. 3865 */ 3866 r_mas = *mas; 3867 /* Avoid overflow, walk to next slot in the tree. */ 3868 if (r_mas.last + 1) 3869 r_mas.last++; 3870 3871 r_mas.index = r_mas.last; 3872 mas_wr_walk_index(&r_wr_mas); 3873 r_mas.last = r_mas.index = mas->last; 3874 3875 /* Set up left side. */ 3876 l_mas = *mas; 3877 mas_wr_walk_index(&l_wr_mas); 3878 3879 if (!wr_mas->entry) { 3880 mas_extend_spanning_null(&l_wr_mas, &r_wr_mas); 3881 mas->offset = l_mas.offset; 3882 mas->index = l_mas.index; 3883 mas->last = l_mas.last = r_mas.last; 3884 } 3885 3886 /* expanding NULLs may make this cover the entire range */ 3887 if (!l_mas.index && r_mas.last == ULONG_MAX) { 3888 mas_set_range(mas, 0, ULONG_MAX); 3889 return mas_new_root(mas, wr_mas->entry); 3890 } 3891 3892 memset(&b_node, 0, sizeof(struct maple_big_node)); 3893 /* Copy l_mas and store the value in b_node. */ 3894 mas_store_b_node(&l_wr_mas, &b_node, l_wr_mas.node_end); 3895 /* Copy r_mas into b_node. */ 3896 if (r_mas.offset <= r_wr_mas.node_end) 3897 mas_mab_cp(&r_mas, r_mas.offset, r_wr_mas.node_end, 3898 &b_node, b_node.b_end + 1); 3899 else 3900 b_node.b_end++; 3901 3902 /* Stop spanning searches by searching for just index. */ 3903 l_mas.index = l_mas.last = mas->index; 3904 3905 mast.bn = &b_node; 3906 mast.orig_l = &l_mas; 3907 mast.orig_r = &r_mas; 3908 /* Combine l_mas and r_mas and split them up evenly again. */ 3909 return mas_spanning_rebalance(mas, &mast, height + 1); 3910 } 3911 3912 /* 3913 * mas_wr_node_store() - Attempt to store the value in a node 3914 * @wr_mas: The maple write state 3915 * 3916 * Attempts to reuse the node, but may allocate. 3917 * 3918 * Return: True if stored, false otherwise 3919 */ 3920 static inline bool mas_wr_node_store(struct ma_wr_state *wr_mas, 3921 unsigned char new_end) 3922 { 3923 struct ma_state *mas = wr_mas->mas; 3924 void __rcu **dst_slots; 3925 unsigned long *dst_pivots; 3926 unsigned char dst_offset, offset_end = wr_mas->offset_end; 3927 struct maple_node reuse, *newnode; 3928 unsigned char copy_size, node_pivots = mt_pivots[wr_mas->type]; 3929 bool in_rcu = mt_in_rcu(mas->tree); 3930 3931 /* Check if there is enough data. The room is enough. */ 3932 if (!mte_is_root(mas->node) && (new_end <= mt_min_slots[wr_mas->type]) && 3933 !(mas->mas_flags & MA_STATE_BULK)) 3934 return false; 3935 3936 if (mas->last == wr_mas->end_piv) 3937 offset_end++; /* don't copy this offset */ 3938 else if (unlikely(wr_mas->r_max == ULONG_MAX)) 3939 mas_bulk_rebalance(mas, wr_mas->node_end, wr_mas->type); 3940 3941 /* set up node. */ 3942 if (in_rcu) { 3943 mas_node_count(mas, 1); 3944 if (mas_is_err(mas)) 3945 return false; 3946 3947 newnode = mas_pop_node(mas); 3948 } else { 3949 memset(&reuse, 0, sizeof(struct maple_node)); 3950 newnode = &reuse; 3951 } 3952 3953 newnode->parent = mas_mn(mas)->parent; 3954 dst_pivots = ma_pivots(newnode, wr_mas->type); 3955 dst_slots = ma_slots(newnode, wr_mas->type); 3956 /* Copy from start to insert point */ 3957 memcpy(dst_pivots, wr_mas->pivots, sizeof(unsigned long) * mas->offset); 3958 memcpy(dst_slots, wr_mas->slots, sizeof(void *) * mas->offset); 3959 3960 /* Handle insert of new range starting after old range */ 3961 if (wr_mas->r_min < mas->index) { 3962 rcu_assign_pointer(dst_slots[mas->offset], wr_mas->content); 3963 dst_pivots[mas->offset++] = mas->index - 1; 3964 } 3965 3966 /* Store the new entry and range end. */ 3967 if (mas->offset < node_pivots) 3968 dst_pivots[mas->offset] = mas->last; 3969 rcu_assign_pointer(dst_slots[mas->offset], wr_mas->entry); 3970 3971 /* 3972 * this range wrote to the end of the node or it overwrote the rest of 3973 * the data 3974 */ 3975 if (offset_end > wr_mas->node_end) 3976 goto done; 3977 3978 dst_offset = mas->offset + 1; 3979 /* Copy to the end of node if necessary. */ 3980 copy_size = wr_mas->node_end - offset_end + 1; 3981 memcpy(dst_slots + dst_offset, wr_mas->slots + offset_end, 3982 sizeof(void *) * copy_size); 3983 memcpy(dst_pivots + dst_offset, wr_mas->pivots + offset_end, 3984 sizeof(unsigned long) * (copy_size - 1)); 3985 3986 if (new_end < node_pivots) 3987 dst_pivots[new_end] = mas->max; 3988 3989 done: 3990 mas_leaf_set_meta(mas, newnode, dst_pivots, maple_leaf_64, new_end); 3991 if (in_rcu) { 3992 struct maple_enode *old_enode = mas->node; 3993 3994 mas->node = mt_mk_node(newnode, wr_mas->type); 3995 mas_replace_node(mas, old_enode); 3996 } else { 3997 memcpy(wr_mas->node, newnode, sizeof(struct maple_node)); 3998 } 3999 trace_ma_write(__func__, mas, 0, wr_mas->entry); 4000 mas_update_gap(mas); 4001 return true; 4002 } 4003 4004 /* 4005 * mas_wr_slot_store: Attempt to store a value in a slot. 4006 * @wr_mas: the maple write state 4007 * 4008 * Return: True if stored, false otherwise 4009 */ 4010 static inline bool mas_wr_slot_store(struct ma_wr_state *wr_mas) 4011 { 4012 struct ma_state *mas = wr_mas->mas; 4013 unsigned char offset = mas->offset; 4014 void __rcu **slots = wr_mas->slots; 4015 bool gap = false; 4016 4017 gap |= !mt_slot_locked(mas->tree, slots, offset); 4018 gap |= !mt_slot_locked(mas->tree, slots, offset + 1); 4019 4020 if (wr_mas->offset_end - offset == 1) { 4021 if (mas->index == wr_mas->r_min) { 4022 /* Overwriting the range and a part of the next one */ 4023 rcu_assign_pointer(slots[offset], wr_mas->entry); 4024 wr_mas->pivots[offset] = mas->last; 4025 } else { 4026 /* Overwriting a part of the range and the next one */ 4027 rcu_assign_pointer(slots[offset + 1], wr_mas->entry); 4028 wr_mas->pivots[offset] = mas->index - 1; 4029 mas->offset++; /* Keep mas accurate. */ 4030 } 4031 } else if (!mt_in_rcu(mas->tree)) { 4032 /* 4033 * Expand the range, only partially overwriting the previous and 4034 * next ranges 4035 */ 4036 gap |= !mt_slot_locked(mas->tree, slots, offset + 2); 4037 rcu_assign_pointer(slots[offset + 1], wr_mas->entry); 4038 wr_mas->pivots[offset] = mas->index - 1; 4039 wr_mas->pivots[offset + 1] = mas->last; 4040 mas->offset++; /* Keep mas accurate. */ 4041 } else { 4042 return false; 4043 } 4044 4045 trace_ma_write(__func__, mas, 0, wr_mas->entry); 4046 /* 4047 * Only update gap when the new entry is empty or there is an empty 4048 * entry in the original two ranges. 4049 */ 4050 if (!wr_mas->entry || gap) 4051 mas_update_gap(mas); 4052 4053 return true; 4054 } 4055 4056 static inline void mas_wr_extend_null(struct ma_wr_state *wr_mas) 4057 { 4058 struct ma_state *mas = wr_mas->mas; 4059 4060 if (!wr_mas->slots[wr_mas->offset_end]) { 4061 /* If this one is null, the next and prev are not */ 4062 mas->last = wr_mas->end_piv; 4063 } else { 4064 /* Check next slot(s) if we are overwriting the end */ 4065 if ((mas->last == wr_mas->end_piv) && 4066 (wr_mas->node_end != wr_mas->offset_end) && 4067 !wr_mas->slots[wr_mas->offset_end + 1]) { 4068 wr_mas->offset_end++; 4069 if (wr_mas->offset_end == wr_mas->node_end) 4070 mas->last = mas->max; 4071 else 4072 mas->last = wr_mas->pivots[wr_mas->offset_end]; 4073 wr_mas->end_piv = mas->last; 4074 } 4075 } 4076 4077 if (!wr_mas->content) { 4078 /* If this one is null, the next and prev are not */ 4079 mas->index = wr_mas->r_min; 4080 } else { 4081 /* Check prev slot if we are overwriting the start */ 4082 if (mas->index == wr_mas->r_min && mas->offset && 4083 !wr_mas->slots[mas->offset - 1]) { 4084 mas->offset--; 4085 wr_mas->r_min = mas->index = 4086 mas_safe_min(mas, wr_mas->pivots, mas->offset); 4087 wr_mas->r_max = wr_mas->pivots[mas->offset]; 4088 } 4089 } 4090 } 4091 4092 static inline void mas_wr_end_piv(struct ma_wr_state *wr_mas) 4093 { 4094 while ((wr_mas->offset_end < wr_mas->node_end) && 4095 (wr_mas->mas->last > wr_mas->pivots[wr_mas->offset_end])) 4096 wr_mas->offset_end++; 4097 4098 if (wr_mas->offset_end < wr_mas->node_end) 4099 wr_mas->end_piv = wr_mas->pivots[wr_mas->offset_end]; 4100 else 4101 wr_mas->end_piv = wr_mas->mas->max; 4102 4103 if (!wr_mas->entry) 4104 mas_wr_extend_null(wr_mas); 4105 } 4106 4107 static inline unsigned char mas_wr_new_end(struct ma_wr_state *wr_mas) 4108 { 4109 struct ma_state *mas = wr_mas->mas; 4110 unsigned char new_end = wr_mas->node_end + 2; 4111 4112 new_end -= wr_mas->offset_end - mas->offset; 4113 if (wr_mas->r_min == mas->index) 4114 new_end--; 4115 4116 if (wr_mas->end_piv == mas->last) 4117 new_end--; 4118 4119 return new_end; 4120 } 4121 4122 /* 4123 * mas_wr_append: Attempt to append 4124 * @wr_mas: the maple write state 4125 * @new_end: The end of the node after the modification 4126 * 4127 * This is currently unsafe in rcu mode since the end of the node may be cached 4128 * by readers while the node contents may be updated which could result in 4129 * inaccurate information. 4130 * 4131 * Return: True if appended, false otherwise 4132 */ 4133 static inline bool mas_wr_append(struct ma_wr_state *wr_mas, 4134 unsigned char new_end) 4135 { 4136 struct ma_state *mas; 4137 void __rcu **slots; 4138 unsigned char end; 4139 4140 mas = wr_mas->mas; 4141 if (mt_in_rcu(mas->tree)) 4142 return false; 4143 4144 if (mas->offset != wr_mas->node_end) 4145 return false; 4146 4147 end = wr_mas->node_end; 4148 if (mas->offset != end) 4149 return false; 4150 4151 if (new_end < mt_pivots[wr_mas->type]) { 4152 wr_mas->pivots[new_end] = wr_mas->pivots[end]; 4153 ma_set_meta(wr_mas->node, wr_mas->type, 0, new_end); 4154 } 4155 4156 slots = wr_mas->slots; 4157 if (new_end == end + 1) { 4158 if (mas->last == wr_mas->r_max) { 4159 /* Append to end of range */ 4160 rcu_assign_pointer(slots[new_end], wr_mas->entry); 4161 wr_mas->pivots[end] = mas->index - 1; 4162 mas->offset = new_end; 4163 } else { 4164 /* Append to start of range */ 4165 rcu_assign_pointer(slots[new_end], wr_mas->content); 4166 wr_mas->pivots[end] = mas->last; 4167 rcu_assign_pointer(slots[end], wr_mas->entry); 4168 } 4169 } else { 4170 /* Append to the range without touching any boundaries. */ 4171 rcu_assign_pointer(slots[new_end], wr_mas->content); 4172 wr_mas->pivots[end + 1] = mas->last; 4173 rcu_assign_pointer(slots[end + 1], wr_mas->entry); 4174 wr_mas->pivots[end] = mas->index - 1; 4175 mas->offset = end + 1; 4176 } 4177 4178 if (!wr_mas->content || !wr_mas->entry) 4179 mas_update_gap(mas); 4180 4181 trace_ma_write(__func__, mas, new_end, wr_mas->entry); 4182 return true; 4183 } 4184 4185 /* 4186 * mas_wr_bnode() - Slow path for a modification. 4187 * @wr_mas: The write maple state 4188 * 4189 * This is where split, rebalance end up. 4190 */ 4191 static void mas_wr_bnode(struct ma_wr_state *wr_mas) 4192 { 4193 struct maple_big_node b_node; 4194 4195 trace_ma_write(__func__, wr_mas->mas, 0, wr_mas->entry); 4196 memset(&b_node, 0, sizeof(struct maple_big_node)); 4197 mas_store_b_node(wr_mas, &b_node, wr_mas->offset_end); 4198 mas_commit_b_node(wr_mas, &b_node, wr_mas->node_end); 4199 } 4200 4201 static inline void mas_wr_modify(struct ma_wr_state *wr_mas) 4202 { 4203 struct ma_state *mas = wr_mas->mas; 4204 unsigned char new_end; 4205 4206 /* Direct replacement */ 4207 if (wr_mas->r_min == mas->index && wr_mas->r_max == mas->last) { 4208 rcu_assign_pointer(wr_mas->slots[mas->offset], wr_mas->entry); 4209 if (!!wr_mas->entry ^ !!wr_mas->content) 4210 mas_update_gap(mas); 4211 return; 4212 } 4213 4214 /* 4215 * new_end exceeds the size of the maple node and cannot enter the fast 4216 * path. 4217 */ 4218 new_end = mas_wr_new_end(wr_mas); 4219 if (new_end >= mt_slots[wr_mas->type]) 4220 goto slow_path; 4221 4222 /* Attempt to append */ 4223 if (mas_wr_append(wr_mas, new_end)) 4224 return; 4225 4226 if (new_end == wr_mas->node_end && mas_wr_slot_store(wr_mas)) 4227 return; 4228 4229 if (mas_wr_node_store(wr_mas, new_end)) 4230 return; 4231 4232 if (mas_is_err(mas)) 4233 return; 4234 4235 slow_path: 4236 mas_wr_bnode(wr_mas); 4237 } 4238 4239 /* 4240 * mas_wr_store_entry() - Internal call to store a value 4241 * @mas: The maple state 4242 * @entry: The entry to store. 4243 * 4244 * Return: The contents that was stored at the index. 4245 */ 4246 static inline void *mas_wr_store_entry(struct ma_wr_state *wr_mas) 4247 { 4248 struct ma_state *mas = wr_mas->mas; 4249 4250 wr_mas->content = mas_start(mas); 4251 if (mas_is_none(mas) || mas_is_ptr(mas)) { 4252 mas_store_root(mas, wr_mas->entry); 4253 return wr_mas->content; 4254 } 4255 4256 if (unlikely(!mas_wr_walk(wr_mas))) { 4257 mas_wr_spanning_store(wr_mas); 4258 return wr_mas->content; 4259 } 4260 4261 /* At this point, we are at the leaf node that needs to be altered. */ 4262 mas_wr_end_piv(wr_mas); 4263 /* New root for a single pointer */ 4264 if (unlikely(!mas->index && mas->last == ULONG_MAX)) { 4265 mas_new_root(mas, wr_mas->entry); 4266 return wr_mas->content; 4267 } 4268 4269 mas_wr_modify(wr_mas); 4270 return wr_mas->content; 4271 } 4272 4273 /** 4274 * mas_insert() - Internal call to insert a value 4275 * @mas: The maple state 4276 * @entry: The entry to store 4277 * 4278 * Return: %NULL or the contents that already exists at the requested index 4279 * otherwise. The maple state needs to be checked for error conditions. 4280 */ 4281 static inline void *mas_insert(struct ma_state *mas, void *entry) 4282 { 4283 MA_WR_STATE(wr_mas, mas, entry); 4284 4285 /* 4286 * Inserting a new range inserts either 0, 1, or 2 pivots within the 4287 * tree. If the insert fits exactly into an existing gap with a value 4288 * of NULL, then the slot only needs to be written with the new value. 4289 * If the range being inserted is adjacent to another range, then only a 4290 * single pivot needs to be inserted (as well as writing the entry). If 4291 * the new range is within a gap but does not touch any other ranges, 4292 * then two pivots need to be inserted: the start - 1, and the end. As 4293 * usual, the entry must be written. Most operations require a new node 4294 * to be allocated and replace an existing node to ensure RCU safety, 4295 * when in RCU mode. The exception to requiring a newly allocated node 4296 * is when inserting at the end of a node (appending). When done 4297 * carefully, appending can reuse the node in place. 4298 */ 4299 wr_mas.content = mas_start(mas); 4300 if (wr_mas.content) 4301 goto exists; 4302 4303 if (mas_is_none(mas) || mas_is_ptr(mas)) { 4304 mas_store_root(mas, entry); 4305 return NULL; 4306 } 4307 4308 /* spanning writes always overwrite something */ 4309 if (!mas_wr_walk(&wr_mas)) 4310 goto exists; 4311 4312 /* At this point, we are at the leaf node that needs to be altered. */ 4313 wr_mas.offset_end = mas->offset; 4314 wr_mas.end_piv = wr_mas.r_max; 4315 4316 if (wr_mas.content || (mas->last > wr_mas.r_max)) 4317 goto exists; 4318 4319 if (!entry) 4320 return NULL; 4321 4322 mas_wr_modify(&wr_mas); 4323 return wr_mas.content; 4324 4325 exists: 4326 mas_set_err(mas, -EEXIST); 4327 return wr_mas.content; 4328 4329 } 4330 4331 static inline void mas_rewalk(struct ma_state *mas, unsigned long index) 4332 { 4333 retry: 4334 mas_set(mas, index); 4335 mas_state_walk(mas); 4336 if (mas_is_start(mas)) 4337 goto retry; 4338 } 4339 4340 static inline bool mas_rewalk_if_dead(struct ma_state *mas, 4341 struct maple_node *node, const unsigned long index) 4342 { 4343 if (unlikely(ma_dead_node(node))) { 4344 mas_rewalk(mas, index); 4345 return true; 4346 } 4347 return false; 4348 } 4349 4350 /* 4351 * mas_prev_node() - Find the prev non-null entry at the same level in the 4352 * tree. The prev value will be mas->node[mas->offset] or MAS_NONE. 4353 * @mas: The maple state 4354 * @min: The lower limit to search 4355 * 4356 * The prev node value will be mas->node[mas->offset] or MAS_NONE. 4357 * Return: 1 if the node is dead, 0 otherwise. 4358 */ 4359 static inline int mas_prev_node(struct ma_state *mas, unsigned long min) 4360 { 4361 enum maple_type mt; 4362 int offset, level; 4363 void __rcu **slots; 4364 struct maple_node *node; 4365 unsigned long *pivots; 4366 unsigned long max; 4367 4368 node = mas_mn(mas); 4369 if (!mas->min) 4370 goto no_entry; 4371 4372 max = mas->min - 1; 4373 if (max < min) 4374 goto no_entry; 4375 4376 level = 0; 4377 do { 4378 if (ma_is_root(node)) 4379 goto no_entry; 4380 4381 /* Walk up. */ 4382 if (unlikely(mas_ascend(mas))) 4383 return 1; 4384 offset = mas->offset; 4385 level++; 4386 node = mas_mn(mas); 4387 } while (!offset); 4388 4389 offset--; 4390 mt = mte_node_type(mas->node); 4391 while (level > 1) { 4392 level--; 4393 slots = ma_slots(node, mt); 4394 mas->node = mas_slot(mas, slots, offset); 4395 if (unlikely(ma_dead_node(node))) 4396 return 1; 4397 4398 mt = mte_node_type(mas->node); 4399 node = mas_mn(mas); 4400 pivots = ma_pivots(node, mt); 4401 offset = ma_data_end(node, mt, pivots, max); 4402 if (unlikely(ma_dead_node(node))) 4403 return 1; 4404 } 4405 4406 slots = ma_slots(node, mt); 4407 mas->node = mas_slot(mas, slots, offset); 4408 pivots = ma_pivots(node, mt); 4409 if (unlikely(ma_dead_node(node))) 4410 return 1; 4411 4412 if (likely(offset)) 4413 mas->min = pivots[offset - 1] + 1; 4414 mas->max = max; 4415 mas->offset = mas_data_end(mas); 4416 if (unlikely(mte_dead_node(mas->node))) 4417 return 1; 4418 4419 return 0; 4420 4421 no_entry: 4422 if (unlikely(ma_dead_node(node))) 4423 return 1; 4424 4425 mas->node = MAS_NONE; 4426 return 0; 4427 } 4428 4429 /* 4430 * mas_prev_slot() - Get the entry in the previous slot 4431 * 4432 * @mas: The maple state 4433 * @max: The minimum starting range 4434 * @empty: Can be empty 4435 * @set_underflow: Set the @mas->node to underflow state on limit. 4436 * 4437 * Return: The entry in the previous slot which is possibly NULL 4438 */ 4439 static void *mas_prev_slot(struct ma_state *mas, unsigned long min, bool empty, 4440 bool set_underflow) 4441 { 4442 void *entry; 4443 void __rcu **slots; 4444 unsigned long pivot; 4445 enum maple_type type; 4446 unsigned long *pivots; 4447 struct maple_node *node; 4448 unsigned long save_point = mas->index; 4449 4450 retry: 4451 node = mas_mn(mas); 4452 type = mte_node_type(mas->node); 4453 pivots = ma_pivots(node, type); 4454 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4455 goto retry; 4456 4457 if (mas->min <= min) { 4458 pivot = mas_safe_min(mas, pivots, mas->offset); 4459 4460 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4461 goto retry; 4462 4463 if (pivot <= min) 4464 goto underflow; 4465 } 4466 4467 again: 4468 if (likely(mas->offset)) { 4469 mas->offset--; 4470 mas->last = mas->index - 1; 4471 mas->index = mas_safe_min(mas, pivots, mas->offset); 4472 } else { 4473 if (mas_prev_node(mas, min)) { 4474 mas_rewalk(mas, save_point); 4475 goto retry; 4476 } 4477 4478 if (mas_is_none(mas)) 4479 goto underflow; 4480 4481 mas->last = mas->max; 4482 node = mas_mn(mas); 4483 type = mte_node_type(mas->node); 4484 pivots = ma_pivots(node, type); 4485 mas->index = pivots[mas->offset - 1] + 1; 4486 } 4487 4488 slots = ma_slots(node, type); 4489 entry = mas_slot(mas, slots, mas->offset); 4490 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4491 goto retry; 4492 4493 if (likely(entry)) 4494 return entry; 4495 4496 if (!empty) { 4497 if (mas->index <= min) 4498 goto underflow; 4499 4500 goto again; 4501 } 4502 4503 return entry; 4504 4505 underflow: 4506 if (set_underflow) 4507 mas->node = MAS_UNDERFLOW; 4508 return NULL; 4509 } 4510 4511 /* 4512 * mas_next_node() - Get the next node at the same level in the tree. 4513 * @mas: The maple state 4514 * @max: The maximum pivot value to check. 4515 * 4516 * The next value will be mas->node[mas->offset] or MAS_NONE. 4517 * Return: 1 on dead node, 0 otherwise. 4518 */ 4519 static inline int mas_next_node(struct ma_state *mas, struct maple_node *node, 4520 unsigned long max) 4521 { 4522 unsigned long min; 4523 unsigned long *pivots; 4524 struct maple_enode *enode; 4525 int level = 0; 4526 unsigned char node_end; 4527 enum maple_type mt; 4528 void __rcu **slots; 4529 4530 if (mas->max >= max) 4531 goto no_entry; 4532 4533 min = mas->max + 1; 4534 level = 0; 4535 do { 4536 if (ma_is_root(node)) 4537 goto no_entry; 4538 4539 /* Walk up. */ 4540 if (unlikely(mas_ascend(mas))) 4541 return 1; 4542 4543 level++; 4544 node = mas_mn(mas); 4545 mt = mte_node_type(mas->node); 4546 pivots = ma_pivots(node, mt); 4547 node_end = ma_data_end(node, mt, pivots, mas->max); 4548 if (unlikely(ma_dead_node(node))) 4549 return 1; 4550 4551 } while (unlikely(mas->offset == node_end)); 4552 4553 slots = ma_slots(node, mt); 4554 mas->offset++; 4555 enode = mas_slot(mas, slots, mas->offset); 4556 if (unlikely(ma_dead_node(node))) 4557 return 1; 4558 4559 if (level > 1) 4560 mas->offset = 0; 4561 4562 while (unlikely(level > 1)) { 4563 level--; 4564 mas->node = enode; 4565 node = mas_mn(mas); 4566 mt = mte_node_type(mas->node); 4567 slots = ma_slots(node, mt); 4568 enode = mas_slot(mas, slots, 0); 4569 if (unlikely(ma_dead_node(node))) 4570 return 1; 4571 } 4572 4573 if (!mas->offset) 4574 pivots = ma_pivots(node, mt); 4575 4576 mas->max = mas_safe_pivot(mas, pivots, mas->offset, mt); 4577 if (unlikely(ma_dead_node(node))) 4578 return 1; 4579 4580 mas->node = enode; 4581 mas->min = min; 4582 return 0; 4583 4584 no_entry: 4585 if (unlikely(ma_dead_node(node))) 4586 return 1; 4587 4588 mas->node = MAS_NONE; 4589 return 0; 4590 } 4591 4592 /* 4593 * mas_next_slot() - Get the entry in the next slot 4594 * 4595 * @mas: The maple state 4596 * @max: The maximum starting range 4597 * @empty: Can be empty 4598 * @set_overflow: Should @mas->node be set to overflow when the limit is 4599 * reached. 4600 * 4601 * Return: The entry in the next slot which is possibly NULL 4602 */ 4603 static void *mas_next_slot(struct ma_state *mas, unsigned long max, bool empty, 4604 bool set_overflow) 4605 { 4606 void __rcu **slots; 4607 unsigned long *pivots; 4608 unsigned long pivot; 4609 enum maple_type type; 4610 struct maple_node *node; 4611 unsigned char data_end; 4612 unsigned long save_point = mas->last; 4613 void *entry; 4614 4615 retry: 4616 node = mas_mn(mas); 4617 type = mte_node_type(mas->node); 4618 pivots = ma_pivots(node, type); 4619 data_end = ma_data_end(node, type, pivots, mas->max); 4620 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4621 goto retry; 4622 4623 if (mas->max >= max) { 4624 if (likely(mas->offset < data_end)) 4625 pivot = pivots[mas->offset]; 4626 else 4627 goto overflow; 4628 4629 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4630 goto retry; 4631 4632 if (pivot >= max) 4633 goto overflow; 4634 } 4635 4636 if (likely(mas->offset < data_end)) { 4637 mas->index = pivots[mas->offset] + 1; 4638 again: 4639 mas->offset++; 4640 if (likely(mas->offset < data_end)) 4641 mas->last = pivots[mas->offset]; 4642 else 4643 mas->last = mas->max; 4644 } else { 4645 if (mas_next_node(mas, node, max)) { 4646 mas_rewalk(mas, save_point); 4647 goto retry; 4648 } 4649 4650 if (WARN_ON_ONCE(mas_is_none(mas))) { 4651 mas->node = MAS_OVERFLOW; 4652 return NULL; 4653 goto overflow; 4654 } 4655 4656 mas->offset = 0; 4657 mas->index = mas->min; 4658 node = mas_mn(mas); 4659 type = mte_node_type(mas->node); 4660 pivots = ma_pivots(node, type); 4661 mas->last = pivots[0]; 4662 } 4663 4664 slots = ma_slots(node, type); 4665 entry = mt_slot(mas->tree, slots, mas->offset); 4666 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4667 goto retry; 4668 4669 if (entry) 4670 return entry; 4671 4672 if (!empty) { 4673 if (mas->last >= max) 4674 goto overflow; 4675 4676 mas->index = mas->last + 1; 4677 /* Node cannot end on NULL, so it's safe to short-cut here */ 4678 goto again; 4679 } 4680 4681 return entry; 4682 4683 overflow: 4684 if (set_overflow) 4685 mas->node = MAS_OVERFLOW; 4686 return NULL; 4687 } 4688 4689 /* 4690 * mas_next_entry() - Internal function to get the next entry. 4691 * @mas: The maple state 4692 * @limit: The maximum range start. 4693 * 4694 * Set the @mas->node to the next entry and the range_start to 4695 * the beginning value for the entry. Does not check beyond @limit. 4696 * Sets @mas->index and @mas->last to the range, Does not update @mas->index and 4697 * @mas->last on overflow. 4698 * Restarts on dead nodes. 4699 * 4700 * Return: the next entry or %NULL. 4701 */ 4702 static inline void *mas_next_entry(struct ma_state *mas, unsigned long limit) 4703 { 4704 if (mas->last >= limit) { 4705 mas->node = MAS_OVERFLOW; 4706 return NULL; 4707 } 4708 4709 return mas_next_slot(mas, limit, false, true); 4710 } 4711 4712 /* 4713 * mas_rev_awalk() - Internal function. Reverse allocation walk. Find the 4714 * highest gap address of a given size in a given node and descend. 4715 * @mas: The maple state 4716 * @size: The needed size. 4717 * 4718 * Return: True if found in a leaf, false otherwise. 4719 * 4720 */ 4721 static bool mas_rev_awalk(struct ma_state *mas, unsigned long size, 4722 unsigned long *gap_min, unsigned long *gap_max) 4723 { 4724 enum maple_type type = mte_node_type(mas->node); 4725 struct maple_node *node = mas_mn(mas); 4726 unsigned long *pivots, *gaps; 4727 void __rcu **slots; 4728 unsigned long gap = 0; 4729 unsigned long max, min; 4730 unsigned char offset; 4731 4732 if (unlikely(mas_is_err(mas))) 4733 return true; 4734 4735 if (ma_is_dense(type)) { 4736 /* dense nodes. */ 4737 mas->offset = (unsigned char)(mas->index - mas->min); 4738 return true; 4739 } 4740 4741 pivots = ma_pivots(node, type); 4742 slots = ma_slots(node, type); 4743 gaps = ma_gaps(node, type); 4744 offset = mas->offset; 4745 min = mas_safe_min(mas, pivots, offset); 4746 /* Skip out of bounds. */ 4747 while (mas->last < min) 4748 min = mas_safe_min(mas, pivots, --offset); 4749 4750 max = mas_safe_pivot(mas, pivots, offset, type); 4751 while (mas->index <= max) { 4752 gap = 0; 4753 if (gaps) 4754 gap = gaps[offset]; 4755 else if (!mas_slot(mas, slots, offset)) 4756 gap = max - min + 1; 4757 4758 if (gap) { 4759 if ((size <= gap) && (size <= mas->last - min + 1)) 4760 break; 4761 4762 if (!gaps) { 4763 /* Skip the next slot, it cannot be a gap. */ 4764 if (offset < 2) 4765 goto ascend; 4766 4767 offset -= 2; 4768 max = pivots[offset]; 4769 min = mas_safe_min(mas, pivots, offset); 4770 continue; 4771 } 4772 } 4773 4774 if (!offset) 4775 goto ascend; 4776 4777 offset--; 4778 max = min - 1; 4779 min = mas_safe_min(mas, pivots, offset); 4780 } 4781 4782 if (unlikely((mas->index > max) || (size - 1 > max - mas->index))) 4783 goto no_space; 4784 4785 if (unlikely(ma_is_leaf(type))) { 4786 mas->offset = offset; 4787 *gap_min = min; 4788 *gap_max = min + gap - 1; 4789 return true; 4790 } 4791 4792 /* descend, only happens under lock. */ 4793 mas->node = mas_slot(mas, slots, offset); 4794 mas->min = min; 4795 mas->max = max; 4796 mas->offset = mas_data_end(mas); 4797 return false; 4798 4799 ascend: 4800 if (!mte_is_root(mas->node)) 4801 return false; 4802 4803 no_space: 4804 mas_set_err(mas, -EBUSY); 4805 return false; 4806 } 4807 4808 static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size) 4809 { 4810 enum maple_type type = mte_node_type(mas->node); 4811 unsigned long pivot, min, gap = 0; 4812 unsigned char offset, data_end; 4813 unsigned long *gaps, *pivots; 4814 void __rcu **slots; 4815 struct maple_node *node; 4816 bool found = false; 4817 4818 if (ma_is_dense(type)) { 4819 mas->offset = (unsigned char)(mas->index - mas->min); 4820 return true; 4821 } 4822 4823 node = mas_mn(mas); 4824 pivots = ma_pivots(node, type); 4825 slots = ma_slots(node, type); 4826 gaps = ma_gaps(node, type); 4827 offset = mas->offset; 4828 min = mas_safe_min(mas, pivots, offset); 4829 data_end = ma_data_end(node, type, pivots, mas->max); 4830 for (; offset <= data_end; offset++) { 4831 pivot = mas_safe_pivot(mas, pivots, offset, type); 4832 4833 /* Not within lower bounds */ 4834 if (mas->index > pivot) 4835 goto next_slot; 4836 4837 if (gaps) 4838 gap = gaps[offset]; 4839 else if (!mas_slot(mas, slots, offset)) 4840 gap = min(pivot, mas->last) - max(mas->index, min) + 1; 4841 else 4842 goto next_slot; 4843 4844 if (gap >= size) { 4845 if (ma_is_leaf(type)) { 4846 found = true; 4847 goto done; 4848 } 4849 if (mas->index <= pivot) { 4850 mas->node = mas_slot(mas, slots, offset); 4851 mas->min = min; 4852 mas->max = pivot; 4853 offset = 0; 4854 break; 4855 } 4856 } 4857 next_slot: 4858 min = pivot + 1; 4859 if (mas->last <= pivot) { 4860 mas_set_err(mas, -EBUSY); 4861 return true; 4862 } 4863 } 4864 4865 if (mte_is_root(mas->node)) 4866 found = true; 4867 done: 4868 mas->offset = offset; 4869 return found; 4870 } 4871 4872 /** 4873 * mas_walk() - Search for @mas->index in the tree. 4874 * @mas: The maple state. 4875 * 4876 * mas->index and mas->last will be set to the range if there is a value. If 4877 * mas->node is MAS_NONE, reset to MAS_START. 4878 * 4879 * Return: the entry at the location or %NULL. 4880 */ 4881 void *mas_walk(struct ma_state *mas) 4882 { 4883 void *entry; 4884 4885 if (!mas_is_active(mas) || !mas_is_start(mas)) 4886 mas->node = MAS_START; 4887 retry: 4888 entry = mas_state_walk(mas); 4889 if (mas_is_start(mas)) { 4890 goto retry; 4891 } else if (mas_is_none(mas)) { 4892 mas->index = 0; 4893 mas->last = ULONG_MAX; 4894 } else if (mas_is_ptr(mas)) { 4895 if (!mas->index) { 4896 mas->last = 0; 4897 return entry; 4898 } 4899 4900 mas->index = 1; 4901 mas->last = ULONG_MAX; 4902 mas->node = MAS_NONE; 4903 return NULL; 4904 } 4905 4906 return entry; 4907 } 4908 EXPORT_SYMBOL_GPL(mas_walk); 4909 4910 static inline bool mas_rewind_node(struct ma_state *mas) 4911 { 4912 unsigned char slot; 4913 4914 do { 4915 if (mte_is_root(mas->node)) { 4916 slot = mas->offset; 4917 if (!slot) 4918 return false; 4919 } else { 4920 mas_ascend(mas); 4921 slot = mas->offset; 4922 } 4923 } while (!slot); 4924 4925 mas->offset = --slot; 4926 return true; 4927 } 4928 4929 /* 4930 * mas_skip_node() - Internal function. Skip over a node. 4931 * @mas: The maple state. 4932 * 4933 * Return: true if there is another node, false otherwise. 4934 */ 4935 static inline bool mas_skip_node(struct ma_state *mas) 4936 { 4937 if (mas_is_err(mas)) 4938 return false; 4939 4940 do { 4941 if (mte_is_root(mas->node)) { 4942 if (mas->offset >= mas_data_end(mas)) { 4943 mas_set_err(mas, -EBUSY); 4944 return false; 4945 } 4946 } else { 4947 mas_ascend(mas); 4948 } 4949 } while (mas->offset >= mas_data_end(mas)); 4950 4951 mas->offset++; 4952 return true; 4953 } 4954 4955 /* 4956 * mas_awalk() - Allocation walk. Search from low address to high, for a gap of 4957 * @size 4958 * @mas: The maple state 4959 * @size: The size of the gap required 4960 * 4961 * Search between @mas->index and @mas->last for a gap of @size. 4962 */ 4963 static inline void mas_awalk(struct ma_state *mas, unsigned long size) 4964 { 4965 struct maple_enode *last = NULL; 4966 4967 /* 4968 * There are 4 options: 4969 * go to child (descend) 4970 * go back to parent (ascend) 4971 * no gap found. (return, slot == MAPLE_NODE_SLOTS) 4972 * found the gap. (return, slot != MAPLE_NODE_SLOTS) 4973 */ 4974 while (!mas_is_err(mas) && !mas_anode_descend(mas, size)) { 4975 if (last == mas->node) 4976 mas_skip_node(mas); 4977 else 4978 last = mas->node; 4979 } 4980 } 4981 4982 /* 4983 * mas_sparse_area() - Internal function. Return upper or lower limit when 4984 * searching for a gap in an empty tree. 4985 * @mas: The maple state 4986 * @min: the minimum range 4987 * @max: The maximum range 4988 * @size: The size of the gap 4989 * @fwd: Searching forward or back 4990 */ 4991 static inline int mas_sparse_area(struct ma_state *mas, unsigned long min, 4992 unsigned long max, unsigned long size, bool fwd) 4993 { 4994 if (!unlikely(mas_is_none(mas)) && min == 0) { 4995 min++; 4996 /* 4997 * At this time, min is increased, we need to recheck whether 4998 * the size is satisfied. 4999 */ 5000 if (min > max || max - min + 1 < size) 5001 return -EBUSY; 5002 } 5003 /* mas_is_ptr */ 5004 5005 if (fwd) { 5006 mas->index = min; 5007 mas->last = min + size - 1; 5008 } else { 5009 mas->last = max; 5010 mas->index = max - size + 1; 5011 } 5012 return 0; 5013 } 5014 5015 /* 5016 * mas_empty_area() - Get the lowest address within the range that is 5017 * sufficient for the size requested. 5018 * @mas: The maple state 5019 * @min: The lowest value of the range 5020 * @max: The highest value of the range 5021 * @size: The size needed 5022 */ 5023 int mas_empty_area(struct ma_state *mas, unsigned long min, 5024 unsigned long max, unsigned long size) 5025 { 5026 unsigned char offset; 5027 unsigned long *pivots; 5028 enum maple_type mt; 5029 5030 if (min > max) 5031 return -EINVAL; 5032 5033 if (size == 0 || max - min < size - 1) 5034 return -EINVAL; 5035 5036 if (mas_is_start(mas)) 5037 mas_start(mas); 5038 else if (mas->offset >= 2) 5039 mas->offset -= 2; 5040 else if (!mas_skip_node(mas)) 5041 return -EBUSY; 5042 5043 /* Empty set */ 5044 if (mas_is_none(mas) || mas_is_ptr(mas)) 5045 return mas_sparse_area(mas, min, max, size, true); 5046 5047 /* The start of the window can only be within these values */ 5048 mas->index = min; 5049 mas->last = max; 5050 mas_awalk(mas, size); 5051 5052 if (unlikely(mas_is_err(mas))) 5053 return xa_err(mas->node); 5054 5055 offset = mas->offset; 5056 if (unlikely(offset == MAPLE_NODE_SLOTS)) 5057 return -EBUSY; 5058 5059 mt = mte_node_type(mas->node); 5060 pivots = ma_pivots(mas_mn(mas), mt); 5061 min = mas_safe_min(mas, pivots, offset); 5062 if (mas->index < min) 5063 mas->index = min; 5064 mas->last = mas->index + size - 1; 5065 return 0; 5066 } 5067 EXPORT_SYMBOL_GPL(mas_empty_area); 5068 5069 /* 5070 * mas_empty_area_rev() - Get the highest address within the range that is 5071 * sufficient for the size requested. 5072 * @mas: The maple state 5073 * @min: The lowest value of the range 5074 * @max: The highest value of the range 5075 * @size: The size needed 5076 */ 5077 int mas_empty_area_rev(struct ma_state *mas, unsigned long min, 5078 unsigned long max, unsigned long size) 5079 { 5080 struct maple_enode *last = mas->node; 5081 5082 if (min > max) 5083 return -EINVAL; 5084 5085 if (size == 0 || max - min < size - 1) 5086 return -EINVAL; 5087 5088 if (mas_is_start(mas)) { 5089 mas_start(mas); 5090 mas->offset = mas_data_end(mas); 5091 } else if (mas->offset >= 2) { 5092 mas->offset -= 2; 5093 } else if (!mas_rewind_node(mas)) { 5094 return -EBUSY; 5095 } 5096 5097 /* Empty set. */ 5098 if (mas_is_none(mas) || mas_is_ptr(mas)) 5099 return mas_sparse_area(mas, min, max, size, false); 5100 5101 /* The start of the window can only be within these values. */ 5102 mas->index = min; 5103 mas->last = max; 5104 5105 while (!mas_rev_awalk(mas, size, &min, &max)) { 5106 if (last == mas->node) { 5107 if (!mas_rewind_node(mas)) 5108 return -EBUSY; 5109 } else { 5110 last = mas->node; 5111 } 5112 } 5113 5114 if (mas_is_err(mas)) 5115 return xa_err(mas->node); 5116 5117 if (unlikely(mas->offset == MAPLE_NODE_SLOTS)) 5118 return -EBUSY; 5119 5120 /* Trim the upper limit to the max. */ 5121 if (max < mas->last) 5122 mas->last = max; 5123 5124 mas->index = mas->last - size + 1; 5125 return 0; 5126 } 5127 EXPORT_SYMBOL_GPL(mas_empty_area_rev); 5128 5129 /* 5130 * mte_dead_leaves() - Mark all leaves of a node as dead. 5131 * @mas: The maple state 5132 * @slots: Pointer to the slot array 5133 * @type: The maple node type 5134 * 5135 * Must hold the write lock. 5136 * 5137 * Return: The number of leaves marked as dead. 5138 */ 5139 static inline 5140 unsigned char mte_dead_leaves(struct maple_enode *enode, struct maple_tree *mt, 5141 void __rcu **slots) 5142 { 5143 struct maple_node *node; 5144 enum maple_type type; 5145 void *entry; 5146 int offset; 5147 5148 for (offset = 0; offset < mt_slot_count(enode); offset++) { 5149 entry = mt_slot(mt, slots, offset); 5150 type = mte_node_type(entry); 5151 node = mte_to_node(entry); 5152 /* Use both node and type to catch LE & BE metadata */ 5153 if (!node || !type) 5154 break; 5155 5156 mte_set_node_dead(entry); 5157 node->type = type; 5158 rcu_assign_pointer(slots[offset], node); 5159 } 5160 5161 return offset; 5162 } 5163 5164 /** 5165 * mte_dead_walk() - Walk down a dead tree to just before the leaves 5166 * @enode: The maple encoded node 5167 * @offset: The starting offset 5168 * 5169 * Note: This can only be used from the RCU callback context. 5170 */ 5171 static void __rcu **mte_dead_walk(struct maple_enode **enode, unsigned char offset) 5172 { 5173 struct maple_node *node, *next; 5174 void __rcu **slots = NULL; 5175 5176 next = mte_to_node(*enode); 5177 do { 5178 *enode = ma_enode_ptr(next); 5179 node = mte_to_node(*enode); 5180 slots = ma_slots(node, node->type); 5181 next = rcu_dereference_protected(slots[offset], 5182 lock_is_held(&rcu_callback_map)); 5183 offset = 0; 5184 } while (!ma_is_leaf(next->type)); 5185 5186 return slots; 5187 } 5188 5189 /** 5190 * mt_free_walk() - Walk & free a tree in the RCU callback context 5191 * @head: The RCU head that's within the node. 5192 * 5193 * Note: This can only be used from the RCU callback context. 5194 */ 5195 static void mt_free_walk(struct rcu_head *head) 5196 { 5197 void __rcu **slots; 5198 struct maple_node *node, *start; 5199 struct maple_enode *enode; 5200 unsigned char offset; 5201 enum maple_type type; 5202 5203 node = container_of(head, struct maple_node, rcu); 5204 5205 if (ma_is_leaf(node->type)) 5206 goto free_leaf; 5207 5208 start = node; 5209 enode = mt_mk_node(node, node->type); 5210 slots = mte_dead_walk(&enode, 0); 5211 node = mte_to_node(enode); 5212 do { 5213 mt_free_bulk(node->slot_len, slots); 5214 offset = node->parent_slot + 1; 5215 enode = node->piv_parent; 5216 if (mte_to_node(enode) == node) 5217 goto free_leaf; 5218 5219 type = mte_node_type(enode); 5220 slots = ma_slots(mte_to_node(enode), type); 5221 if ((offset < mt_slots[type]) && 5222 rcu_dereference_protected(slots[offset], 5223 lock_is_held(&rcu_callback_map))) 5224 slots = mte_dead_walk(&enode, offset); 5225 node = mte_to_node(enode); 5226 } while ((node != start) || (node->slot_len < offset)); 5227 5228 slots = ma_slots(node, node->type); 5229 mt_free_bulk(node->slot_len, slots); 5230 5231 free_leaf: 5232 mt_free_rcu(&node->rcu); 5233 } 5234 5235 static inline void __rcu **mte_destroy_descend(struct maple_enode **enode, 5236 struct maple_tree *mt, struct maple_enode *prev, unsigned char offset) 5237 { 5238 struct maple_node *node; 5239 struct maple_enode *next = *enode; 5240 void __rcu **slots = NULL; 5241 enum maple_type type; 5242 unsigned char next_offset = 0; 5243 5244 do { 5245 *enode = next; 5246 node = mte_to_node(*enode); 5247 type = mte_node_type(*enode); 5248 slots = ma_slots(node, type); 5249 next = mt_slot_locked(mt, slots, next_offset); 5250 if ((mte_dead_node(next))) 5251 next = mt_slot_locked(mt, slots, ++next_offset); 5252 5253 mte_set_node_dead(*enode); 5254 node->type = type; 5255 node->piv_parent = prev; 5256 node->parent_slot = offset; 5257 offset = next_offset; 5258 next_offset = 0; 5259 prev = *enode; 5260 } while (!mte_is_leaf(next)); 5261 5262 return slots; 5263 } 5264 5265 static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, 5266 bool free) 5267 { 5268 void __rcu **slots; 5269 struct maple_node *node = mte_to_node(enode); 5270 struct maple_enode *start; 5271 5272 if (mte_is_leaf(enode)) { 5273 node->type = mte_node_type(enode); 5274 goto free_leaf; 5275 } 5276 5277 start = enode; 5278 slots = mte_destroy_descend(&enode, mt, start, 0); 5279 node = mte_to_node(enode); // Updated in the above call. 5280 do { 5281 enum maple_type type; 5282 unsigned char offset; 5283 struct maple_enode *parent, *tmp; 5284 5285 node->slot_len = mte_dead_leaves(enode, mt, slots); 5286 if (free) 5287 mt_free_bulk(node->slot_len, slots); 5288 offset = node->parent_slot + 1; 5289 enode = node->piv_parent; 5290 if (mte_to_node(enode) == node) 5291 goto free_leaf; 5292 5293 type = mte_node_type(enode); 5294 slots = ma_slots(mte_to_node(enode), type); 5295 if (offset >= mt_slots[type]) 5296 goto next; 5297 5298 tmp = mt_slot_locked(mt, slots, offset); 5299 if (mte_node_type(tmp) && mte_to_node(tmp)) { 5300 parent = enode; 5301 enode = tmp; 5302 slots = mte_destroy_descend(&enode, mt, parent, offset); 5303 } 5304 next: 5305 node = mte_to_node(enode); 5306 } while (start != enode); 5307 5308 node = mte_to_node(enode); 5309 node->slot_len = mte_dead_leaves(enode, mt, slots); 5310 if (free) 5311 mt_free_bulk(node->slot_len, slots); 5312 5313 free_leaf: 5314 if (free) 5315 mt_free_rcu(&node->rcu); 5316 else 5317 mt_clear_meta(mt, node, node->type); 5318 } 5319 5320 /* 5321 * mte_destroy_walk() - Free a tree or sub-tree. 5322 * @enode: the encoded maple node (maple_enode) to start 5323 * @mt: the tree to free - needed for node types. 5324 * 5325 * Must hold the write lock. 5326 */ 5327 static inline void mte_destroy_walk(struct maple_enode *enode, 5328 struct maple_tree *mt) 5329 { 5330 struct maple_node *node = mte_to_node(enode); 5331 5332 if (mt_in_rcu(mt)) { 5333 mt_destroy_walk(enode, mt, false); 5334 call_rcu(&node->rcu, mt_free_walk); 5335 } else { 5336 mt_destroy_walk(enode, mt, true); 5337 } 5338 } 5339 5340 static void mas_wr_store_setup(struct ma_wr_state *wr_mas) 5341 { 5342 if (!mas_is_active(wr_mas->mas)) { 5343 if (mas_is_start(wr_mas->mas)) 5344 return; 5345 5346 if (unlikely(mas_is_paused(wr_mas->mas))) 5347 goto reset; 5348 5349 if (unlikely(mas_is_none(wr_mas->mas))) 5350 goto reset; 5351 5352 if (unlikely(mas_is_overflow(wr_mas->mas))) 5353 goto reset; 5354 5355 if (unlikely(mas_is_underflow(wr_mas->mas))) 5356 goto reset; 5357 } 5358 5359 /* 5360 * A less strict version of mas_is_span_wr() where we allow spanning 5361 * writes within this node. This is to stop partial walks in 5362 * mas_prealloc() from being reset. 5363 */ 5364 if (wr_mas->mas->last > wr_mas->mas->max) 5365 goto reset; 5366 5367 if (wr_mas->entry) 5368 return; 5369 5370 if (mte_is_leaf(wr_mas->mas->node) && 5371 wr_mas->mas->last == wr_mas->mas->max) 5372 goto reset; 5373 5374 return; 5375 5376 reset: 5377 mas_reset(wr_mas->mas); 5378 } 5379 5380 /* Interface */ 5381 5382 /** 5383 * mas_store() - Store an @entry. 5384 * @mas: The maple state. 5385 * @entry: The entry to store. 5386 * 5387 * The @mas->index and @mas->last is used to set the range for the @entry. 5388 * Note: The @mas should have pre-allocated entries to ensure there is memory to 5389 * store the entry. Please see mas_expected_entries()/mas_destroy() for more details. 5390 * 5391 * Return: the first entry between mas->index and mas->last or %NULL. 5392 */ 5393 void *mas_store(struct ma_state *mas, void *entry) 5394 { 5395 MA_WR_STATE(wr_mas, mas, entry); 5396 5397 trace_ma_write(__func__, mas, 0, entry); 5398 #ifdef CONFIG_DEBUG_MAPLE_TREE 5399 if (MAS_WARN_ON(mas, mas->index > mas->last)) 5400 pr_err("Error %lX > %lX %p\n", mas->index, mas->last, entry); 5401 5402 if (mas->index > mas->last) { 5403 mas_set_err(mas, -EINVAL); 5404 return NULL; 5405 } 5406 5407 #endif 5408 5409 /* 5410 * Storing is the same operation as insert with the added caveat that it 5411 * can overwrite entries. Although this seems simple enough, one may 5412 * want to examine what happens if a single store operation was to 5413 * overwrite multiple entries within a self-balancing B-Tree. 5414 */ 5415 mas_wr_store_setup(&wr_mas); 5416 mas_wr_store_entry(&wr_mas); 5417 return wr_mas.content; 5418 } 5419 EXPORT_SYMBOL_GPL(mas_store); 5420 5421 /** 5422 * mas_store_gfp() - Store a value into the tree. 5423 * @mas: The maple state 5424 * @entry: The entry to store 5425 * @gfp: The GFP_FLAGS to use for allocations if necessary. 5426 * 5427 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 5428 * be allocated. 5429 */ 5430 int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp) 5431 { 5432 MA_WR_STATE(wr_mas, mas, entry); 5433 5434 mas_wr_store_setup(&wr_mas); 5435 trace_ma_write(__func__, mas, 0, entry); 5436 retry: 5437 mas_wr_store_entry(&wr_mas); 5438 if (unlikely(mas_nomem(mas, gfp))) 5439 goto retry; 5440 5441 if (unlikely(mas_is_err(mas))) 5442 return xa_err(mas->node); 5443 5444 return 0; 5445 } 5446 EXPORT_SYMBOL_GPL(mas_store_gfp); 5447 5448 /** 5449 * mas_store_prealloc() - Store a value into the tree using memory 5450 * preallocated in the maple state. 5451 * @mas: The maple state 5452 * @entry: The entry to store. 5453 */ 5454 void mas_store_prealloc(struct ma_state *mas, void *entry) 5455 { 5456 MA_WR_STATE(wr_mas, mas, entry); 5457 5458 mas_wr_store_setup(&wr_mas); 5459 trace_ma_write(__func__, mas, 0, entry); 5460 mas_wr_store_entry(&wr_mas); 5461 MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas)); 5462 mas_destroy(mas); 5463 } 5464 EXPORT_SYMBOL_GPL(mas_store_prealloc); 5465 5466 /** 5467 * mas_preallocate() - Preallocate enough nodes for a store operation 5468 * @mas: The maple state 5469 * @entry: The entry that will be stored 5470 * @gfp: The GFP_FLAGS to use for allocations. 5471 * 5472 * Return: 0 on success, -ENOMEM if memory could not be allocated. 5473 */ 5474 int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp) 5475 { 5476 MA_WR_STATE(wr_mas, mas, entry); 5477 unsigned char node_size; 5478 int request = 1; 5479 int ret; 5480 5481 5482 if (unlikely(!mas->index && mas->last == ULONG_MAX)) 5483 goto ask_now; 5484 5485 mas_wr_store_setup(&wr_mas); 5486 wr_mas.content = mas_start(mas); 5487 /* Root expand */ 5488 if (unlikely(mas_is_none(mas) || mas_is_ptr(mas))) 5489 goto ask_now; 5490 5491 if (unlikely(!mas_wr_walk(&wr_mas))) { 5492 /* Spanning store, use worst case for now */ 5493 request = 1 + mas_mt_height(mas) * 3; 5494 goto ask_now; 5495 } 5496 5497 /* At this point, we are at the leaf node that needs to be altered. */ 5498 /* Exact fit, no nodes needed. */ 5499 if (wr_mas.r_min == mas->index && wr_mas.r_max == mas->last) 5500 return 0; 5501 5502 mas_wr_end_piv(&wr_mas); 5503 node_size = mas_wr_new_end(&wr_mas); 5504 5505 /* Slot store, does not require additional nodes */ 5506 if (node_size == wr_mas.node_end) { 5507 /* reuse node */ 5508 if (!mt_in_rcu(mas->tree)) 5509 return 0; 5510 /* shifting boundary */ 5511 if (wr_mas.offset_end - mas->offset == 1) 5512 return 0; 5513 } 5514 5515 if (node_size >= mt_slots[wr_mas.type]) { 5516 /* Split, worst case for now. */ 5517 request = 1 + mas_mt_height(mas) * 2; 5518 goto ask_now; 5519 } 5520 5521 /* New root needs a singe node */ 5522 if (unlikely(mte_is_root(mas->node))) 5523 goto ask_now; 5524 5525 /* Potential spanning rebalance collapsing a node, use worst-case */ 5526 if (node_size - 1 <= mt_min_slots[wr_mas.type]) 5527 request = mas_mt_height(mas) * 2 - 1; 5528 5529 /* node store, slot store needs one node */ 5530 ask_now: 5531 mas_node_count_gfp(mas, request, gfp); 5532 mas->mas_flags |= MA_STATE_PREALLOC; 5533 if (likely(!mas_is_err(mas))) 5534 return 0; 5535 5536 mas_set_alloc_req(mas, 0); 5537 ret = xa_err(mas->node); 5538 mas_reset(mas); 5539 mas_destroy(mas); 5540 mas_reset(mas); 5541 return ret; 5542 } 5543 EXPORT_SYMBOL_GPL(mas_preallocate); 5544 5545 /* 5546 * mas_destroy() - destroy a maple state. 5547 * @mas: The maple state 5548 * 5549 * Upon completion, check the left-most node and rebalance against the node to 5550 * the right if necessary. Frees any allocated nodes associated with this maple 5551 * state. 5552 */ 5553 void mas_destroy(struct ma_state *mas) 5554 { 5555 struct maple_alloc *node; 5556 unsigned long total; 5557 5558 /* 5559 * When using mas_for_each() to insert an expected number of elements, 5560 * it is possible that the number inserted is less than the expected 5561 * number. To fix an invalid final node, a check is performed here to 5562 * rebalance the previous node with the final node. 5563 */ 5564 if (mas->mas_flags & MA_STATE_REBALANCE) { 5565 unsigned char end; 5566 5567 mas_start(mas); 5568 mtree_range_walk(mas); 5569 end = mas_data_end(mas) + 1; 5570 if (end < mt_min_slot_count(mas->node) - 1) 5571 mas_destroy_rebalance(mas, end); 5572 5573 mas->mas_flags &= ~MA_STATE_REBALANCE; 5574 } 5575 mas->mas_flags &= ~(MA_STATE_BULK|MA_STATE_PREALLOC); 5576 5577 total = mas_allocated(mas); 5578 while (total) { 5579 node = mas->alloc; 5580 mas->alloc = node->slot[0]; 5581 if (node->node_count > 1) { 5582 size_t count = node->node_count - 1; 5583 5584 mt_free_bulk(count, (void __rcu **)&node->slot[1]); 5585 total -= count; 5586 } 5587 kmem_cache_free(maple_node_cache, node); 5588 total--; 5589 } 5590 5591 mas->alloc = NULL; 5592 } 5593 EXPORT_SYMBOL_GPL(mas_destroy); 5594 5595 /* 5596 * mas_expected_entries() - Set the expected number of entries that will be inserted. 5597 * @mas: The maple state 5598 * @nr_entries: The number of expected entries. 5599 * 5600 * This will attempt to pre-allocate enough nodes to store the expected number 5601 * of entries. The allocations will occur using the bulk allocator interface 5602 * for speed. Please call mas_destroy() on the @mas after inserting the entries 5603 * to ensure any unused nodes are freed. 5604 * 5605 * Return: 0 on success, -ENOMEM if memory could not be allocated. 5606 */ 5607 int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries) 5608 { 5609 int nonleaf_cap = MAPLE_ARANGE64_SLOTS - 2; 5610 struct maple_enode *enode = mas->node; 5611 int nr_nodes; 5612 int ret; 5613 5614 /* 5615 * Sometimes it is necessary to duplicate a tree to a new tree, such as 5616 * forking a process and duplicating the VMAs from one tree to a new 5617 * tree. When such a situation arises, it is known that the new tree is 5618 * not going to be used until the entire tree is populated. For 5619 * performance reasons, it is best to use a bulk load with RCU disabled. 5620 * This allows for optimistic splitting that favours the left and reuse 5621 * of nodes during the operation. 5622 */ 5623 5624 /* Optimize splitting for bulk insert in-order */ 5625 mas->mas_flags |= MA_STATE_BULK; 5626 5627 /* 5628 * Avoid overflow, assume a gap between each entry and a trailing null. 5629 * If this is wrong, it just means allocation can happen during 5630 * insertion of entries. 5631 */ 5632 nr_nodes = max(nr_entries, nr_entries * 2 + 1); 5633 if (!mt_is_alloc(mas->tree)) 5634 nonleaf_cap = MAPLE_RANGE64_SLOTS - 2; 5635 5636 /* Leaves; reduce slots to keep space for expansion */ 5637 nr_nodes = DIV_ROUND_UP(nr_nodes, MAPLE_RANGE64_SLOTS - 2); 5638 /* Internal nodes */ 5639 nr_nodes += DIV_ROUND_UP(nr_nodes, nonleaf_cap); 5640 /* Add working room for split (2 nodes) + new parents */ 5641 mas_node_count_gfp(mas, nr_nodes + 3, GFP_KERNEL); 5642 5643 /* Detect if allocations run out */ 5644 mas->mas_flags |= MA_STATE_PREALLOC; 5645 5646 if (!mas_is_err(mas)) 5647 return 0; 5648 5649 ret = xa_err(mas->node); 5650 mas->node = enode; 5651 mas_destroy(mas); 5652 return ret; 5653 5654 } 5655 EXPORT_SYMBOL_GPL(mas_expected_entries); 5656 5657 static inline bool mas_next_setup(struct ma_state *mas, unsigned long max, 5658 void **entry) 5659 { 5660 bool was_none = mas_is_none(mas); 5661 5662 if (unlikely(mas->last >= max)) { 5663 mas->node = MAS_OVERFLOW; 5664 return true; 5665 } 5666 5667 if (mas_is_active(mas)) 5668 return false; 5669 5670 if (mas_is_none(mas) || mas_is_paused(mas)) { 5671 mas->node = MAS_START; 5672 } else if (mas_is_overflow(mas)) { 5673 /* Overflowed before, but the max changed */ 5674 mas->node = MAS_START; 5675 } else if (mas_is_underflow(mas)) { 5676 mas->node = MAS_START; 5677 *entry = mas_walk(mas); 5678 if (*entry) 5679 return true; 5680 } 5681 5682 if (mas_is_start(mas)) 5683 *entry = mas_walk(mas); /* Retries on dead nodes handled by mas_walk */ 5684 5685 if (mas_is_ptr(mas)) { 5686 *entry = NULL; 5687 if (was_none && mas->index == 0) { 5688 mas->index = mas->last = 0; 5689 return true; 5690 } 5691 mas->index = 1; 5692 mas->last = ULONG_MAX; 5693 mas->node = MAS_NONE; 5694 return true; 5695 } 5696 5697 if (mas_is_none(mas)) 5698 return true; 5699 5700 return false; 5701 } 5702 5703 /** 5704 * mas_next() - Get the next entry. 5705 * @mas: The maple state 5706 * @max: The maximum index to check. 5707 * 5708 * Returns the next entry after @mas->index. 5709 * Must hold rcu_read_lock or the write lock. 5710 * Can return the zero entry. 5711 * 5712 * Return: The next entry or %NULL 5713 */ 5714 void *mas_next(struct ma_state *mas, unsigned long max) 5715 { 5716 void *entry = NULL; 5717 5718 if (mas_next_setup(mas, max, &entry)) 5719 return entry; 5720 5721 /* Retries on dead nodes handled by mas_next_slot */ 5722 return mas_next_slot(mas, max, false, true); 5723 } 5724 EXPORT_SYMBOL_GPL(mas_next); 5725 5726 /** 5727 * mas_next_range() - Advance the maple state to the next range 5728 * @mas: The maple state 5729 * @max: The maximum index to check. 5730 * 5731 * Sets @mas->index and @mas->last to the range. 5732 * Must hold rcu_read_lock or the write lock. 5733 * Can return the zero entry. 5734 * 5735 * Return: The next entry or %NULL 5736 */ 5737 void *mas_next_range(struct ma_state *mas, unsigned long max) 5738 { 5739 void *entry = NULL; 5740 5741 if (mas_next_setup(mas, max, &entry)) 5742 return entry; 5743 5744 /* Retries on dead nodes handled by mas_next_slot */ 5745 return mas_next_slot(mas, max, true, true); 5746 } 5747 EXPORT_SYMBOL_GPL(mas_next_range); 5748 5749 /** 5750 * mt_next() - get the next value in the maple tree 5751 * @mt: The maple tree 5752 * @index: The start index 5753 * @max: The maximum index to check 5754 * 5755 * Takes RCU read lock internally to protect the search, which does not 5756 * protect the returned pointer after dropping RCU read lock. 5757 * See also: Documentation/core-api/maple_tree.rst 5758 * 5759 * Return: The entry higher than @index or %NULL if nothing is found. 5760 */ 5761 void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max) 5762 { 5763 void *entry = NULL; 5764 MA_STATE(mas, mt, index, index); 5765 5766 rcu_read_lock(); 5767 entry = mas_next(&mas, max); 5768 rcu_read_unlock(); 5769 return entry; 5770 } 5771 EXPORT_SYMBOL_GPL(mt_next); 5772 5773 static inline bool mas_prev_setup(struct ma_state *mas, unsigned long min, 5774 void **entry) 5775 { 5776 if (unlikely(mas->index <= min)) { 5777 mas->node = MAS_UNDERFLOW; 5778 return true; 5779 } 5780 5781 if (mas_is_active(mas)) 5782 return false; 5783 5784 if (mas_is_overflow(mas)) { 5785 mas->node = MAS_START; 5786 *entry = mas_walk(mas); 5787 if (*entry) 5788 return true; 5789 } 5790 5791 if (mas_is_none(mas) || mas_is_paused(mas)) { 5792 mas->node = MAS_START; 5793 } else if (mas_is_underflow(mas)) { 5794 /* underflowed before but the min changed */ 5795 mas->node = MAS_START; 5796 } 5797 5798 if (mas_is_start(mas)) 5799 mas_walk(mas); 5800 5801 if (unlikely(mas_is_ptr(mas))) { 5802 if (!mas->index) 5803 goto none; 5804 mas->index = mas->last = 0; 5805 *entry = mas_root(mas); 5806 return true; 5807 } 5808 5809 if (mas_is_none(mas)) { 5810 if (mas->index) { 5811 /* Walked to out-of-range pointer? */ 5812 mas->index = mas->last = 0; 5813 mas->node = MAS_ROOT; 5814 *entry = mas_root(mas); 5815 return true; 5816 } 5817 return true; 5818 } 5819 5820 return false; 5821 5822 none: 5823 mas->node = MAS_NONE; 5824 return true; 5825 } 5826 5827 /** 5828 * mas_prev() - Get the previous entry 5829 * @mas: The maple state 5830 * @min: The minimum value to check. 5831 * 5832 * Must hold rcu_read_lock or the write lock. 5833 * Will reset mas to MAS_START if the node is MAS_NONE. Will stop on not 5834 * searchable nodes. 5835 * 5836 * Return: the previous value or %NULL. 5837 */ 5838 void *mas_prev(struct ma_state *mas, unsigned long min) 5839 { 5840 void *entry = NULL; 5841 5842 if (mas_prev_setup(mas, min, &entry)) 5843 return entry; 5844 5845 return mas_prev_slot(mas, min, false, true); 5846 } 5847 EXPORT_SYMBOL_GPL(mas_prev); 5848 5849 /** 5850 * mas_prev_range() - Advance to the previous range 5851 * @mas: The maple state 5852 * @min: The minimum value to check. 5853 * 5854 * Sets @mas->index and @mas->last to the range. 5855 * Must hold rcu_read_lock or the write lock. 5856 * Will reset mas to MAS_START if the node is MAS_NONE. Will stop on not 5857 * searchable nodes. 5858 * 5859 * Return: the previous value or %NULL. 5860 */ 5861 void *mas_prev_range(struct ma_state *mas, unsigned long min) 5862 { 5863 void *entry = NULL; 5864 5865 if (mas_prev_setup(mas, min, &entry)) 5866 return entry; 5867 5868 return mas_prev_slot(mas, min, true, true); 5869 } 5870 EXPORT_SYMBOL_GPL(mas_prev_range); 5871 5872 /** 5873 * mt_prev() - get the previous value in the maple tree 5874 * @mt: The maple tree 5875 * @index: The start index 5876 * @min: The minimum index to check 5877 * 5878 * Takes RCU read lock internally to protect the search, which does not 5879 * protect the returned pointer after dropping RCU read lock. 5880 * See also: Documentation/core-api/maple_tree.rst 5881 * 5882 * Return: The entry before @index or %NULL if nothing is found. 5883 */ 5884 void *mt_prev(struct maple_tree *mt, unsigned long index, unsigned long min) 5885 { 5886 void *entry = NULL; 5887 MA_STATE(mas, mt, index, index); 5888 5889 rcu_read_lock(); 5890 entry = mas_prev(&mas, min); 5891 rcu_read_unlock(); 5892 return entry; 5893 } 5894 EXPORT_SYMBOL_GPL(mt_prev); 5895 5896 /** 5897 * mas_pause() - Pause a mas_find/mas_for_each to drop the lock. 5898 * @mas: The maple state to pause 5899 * 5900 * Some users need to pause a walk and drop the lock they're holding in 5901 * order to yield to a higher priority thread or carry out an operation 5902 * on an entry. Those users should call this function before they drop 5903 * the lock. It resets the @mas to be suitable for the next iteration 5904 * of the loop after the user has reacquired the lock. If most entries 5905 * found during a walk require you to call mas_pause(), the mt_for_each() 5906 * iterator may be more appropriate. 5907 * 5908 */ 5909 void mas_pause(struct ma_state *mas) 5910 { 5911 mas->node = MAS_PAUSE; 5912 } 5913 EXPORT_SYMBOL_GPL(mas_pause); 5914 5915 /** 5916 * mas_find_setup() - Internal function to set up mas_find*(). 5917 * @mas: The maple state 5918 * @max: The maximum index 5919 * @entry: Pointer to the entry 5920 * 5921 * Returns: True if entry is the answer, false otherwise. 5922 */ 5923 static inline bool mas_find_setup(struct ma_state *mas, unsigned long max, 5924 void **entry) 5925 { 5926 if (mas_is_active(mas)) { 5927 if (mas->last < max) 5928 return false; 5929 5930 return true; 5931 } 5932 5933 if (mas_is_paused(mas)) { 5934 if (unlikely(mas->last >= max)) 5935 return true; 5936 5937 mas->index = ++mas->last; 5938 mas->node = MAS_START; 5939 } else if (mas_is_none(mas)) { 5940 if (unlikely(mas->last >= max)) 5941 return true; 5942 5943 mas->index = mas->last; 5944 mas->node = MAS_START; 5945 } else if (mas_is_overflow(mas) || mas_is_underflow(mas)) { 5946 if (mas->index > max) { 5947 mas->node = MAS_OVERFLOW; 5948 return true; 5949 } 5950 5951 mas->node = MAS_START; 5952 } 5953 5954 if (mas_is_start(mas)) { 5955 /* First run or continue */ 5956 if (mas->index > max) 5957 return true; 5958 5959 *entry = mas_walk(mas); 5960 if (*entry) 5961 return true; 5962 5963 } 5964 5965 if (unlikely(!mas_searchable(mas))) { 5966 if (unlikely(mas_is_ptr(mas))) 5967 goto ptr_out_of_range; 5968 5969 return true; 5970 } 5971 5972 if (mas->index == max) 5973 return true; 5974 5975 return false; 5976 5977 ptr_out_of_range: 5978 mas->node = MAS_NONE; 5979 mas->index = 1; 5980 mas->last = ULONG_MAX; 5981 return true; 5982 } 5983 5984 /** 5985 * mas_find() - On the first call, find the entry at or after mas->index up to 5986 * %max. Otherwise, find the entry after mas->index. 5987 * @mas: The maple state 5988 * @max: The maximum value to check. 5989 * 5990 * Must hold rcu_read_lock or the write lock. 5991 * If an entry exists, last and index are updated accordingly. 5992 * May set @mas->node to MAS_NONE. 5993 * 5994 * Return: The entry or %NULL. 5995 */ 5996 void *mas_find(struct ma_state *mas, unsigned long max) 5997 { 5998 void *entry = NULL; 5999 6000 if (mas_find_setup(mas, max, &entry)) 6001 return entry; 6002 6003 /* Retries on dead nodes handled by mas_next_slot */ 6004 return mas_next_slot(mas, max, false, false); 6005 } 6006 EXPORT_SYMBOL_GPL(mas_find); 6007 6008 /** 6009 * mas_find_range() - On the first call, find the entry at or after 6010 * mas->index up to %max. Otherwise, advance to the next slot mas->index. 6011 * @mas: The maple state 6012 * @max: The maximum value to check. 6013 * 6014 * Must hold rcu_read_lock or the write lock. 6015 * If an entry exists, last and index are updated accordingly. 6016 * May set @mas->node to MAS_NONE. 6017 * 6018 * Return: The entry or %NULL. 6019 */ 6020 void *mas_find_range(struct ma_state *mas, unsigned long max) 6021 { 6022 void *entry = NULL; 6023 6024 if (mas_find_setup(mas, max, &entry)) 6025 return entry; 6026 6027 /* Retries on dead nodes handled by mas_next_slot */ 6028 return mas_next_slot(mas, max, true, false); 6029 } 6030 EXPORT_SYMBOL_GPL(mas_find_range); 6031 6032 /** 6033 * mas_find_rev_setup() - Internal function to set up mas_find_*_rev() 6034 * @mas: The maple state 6035 * @min: The minimum index 6036 * @entry: Pointer to the entry 6037 * 6038 * Returns: True if entry is the answer, false otherwise. 6039 */ 6040 static inline bool mas_find_rev_setup(struct ma_state *mas, unsigned long min, 6041 void **entry) 6042 { 6043 if (mas_is_active(mas)) { 6044 if (mas->index > min) 6045 return false; 6046 6047 return true; 6048 } 6049 6050 if (mas_is_paused(mas)) { 6051 if (unlikely(mas->index <= min)) { 6052 mas->node = MAS_NONE; 6053 return true; 6054 } 6055 mas->node = MAS_START; 6056 mas->last = --mas->index; 6057 } else if (mas_is_none(mas)) { 6058 if (mas->index <= min) 6059 goto none; 6060 6061 mas->last = mas->index; 6062 mas->node = MAS_START; 6063 } else if (mas_is_underflow(mas) || mas_is_overflow(mas)) { 6064 if (mas->last <= min) { 6065 mas->node = MAS_UNDERFLOW; 6066 return true; 6067 } 6068 6069 mas->node = MAS_START; 6070 } 6071 6072 if (mas_is_start(mas)) { 6073 /* First run or continue */ 6074 if (mas->index < min) 6075 return true; 6076 6077 *entry = mas_walk(mas); 6078 if (*entry) 6079 return true; 6080 } 6081 6082 if (unlikely(!mas_searchable(mas))) { 6083 if (mas_is_ptr(mas)) 6084 goto none; 6085 6086 if (mas_is_none(mas)) { 6087 /* 6088 * Walked to the location, and there was nothing so the 6089 * previous location is 0. 6090 */ 6091 mas->last = mas->index = 0; 6092 mas->node = MAS_ROOT; 6093 *entry = mas_root(mas); 6094 return true; 6095 } 6096 } 6097 6098 if (mas->index < min) 6099 return true; 6100 6101 return false; 6102 6103 none: 6104 mas->node = MAS_NONE; 6105 return true; 6106 } 6107 6108 /** 6109 * mas_find_rev: On the first call, find the first non-null entry at or below 6110 * mas->index down to %min. Otherwise find the first non-null entry below 6111 * mas->index down to %min. 6112 * @mas: The maple state 6113 * @min: The minimum value to check. 6114 * 6115 * Must hold rcu_read_lock or the write lock. 6116 * If an entry exists, last and index are updated accordingly. 6117 * May set @mas->node to MAS_NONE. 6118 * 6119 * Return: The entry or %NULL. 6120 */ 6121 void *mas_find_rev(struct ma_state *mas, unsigned long min) 6122 { 6123 void *entry = NULL; 6124 6125 if (mas_find_rev_setup(mas, min, &entry)) 6126 return entry; 6127 6128 /* Retries on dead nodes handled by mas_prev_slot */ 6129 return mas_prev_slot(mas, min, false, false); 6130 6131 } 6132 EXPORT_SYMBOL_GPL(mas_find_rev); 6133 6134 /** 6135 * mas_find_range_rev: On the first call, find the first non-null entry at or 6136 * below mas->index down to %min. Otherwise advance to the previous slot after 6137 * mas->index down to %min. 6138 * @mas: The maple state 6139 * @min: The minimum value to check. 6140 * 6141 * Must hold rcu_read_lock or the write lock. 6142 * If an entry exists, last and index are updated accordingly. 6143 * May set @mas->node to MAS_NONE. 6144 * 6145 * Return: The entry or %NULL. 6146 */ 6147 void *mas_find_range_rev(struct ma_state *mas, unsigned long min) 6148 { 6149 void *entry = NULL; 6150 6151 if (mas_find_rev_setup(mas, min, &entry)) 6152 return entry; 6153 6154 /* Retries on dead nodes handled by mas_prev_slot */ 6155 return mas_prev_slot(mas, min, true, false); 6156 } 6157 EXPORT_SYMBOL_GPL(mas_find_range_rev); 6158 6159 /** 6160 * mas_erase() - Find the range in which index resides and erase the entire 6161 * range. 6162 * @mas: The maple state 6163 * 6164 * Must hold the write lock. 6165 * Searches for @mas->index, sets @mas->index and @mas->last to the range and 6166 * erases that range. 6167 * 6168 * Return: the entry that was erased or %NULL, @mas->index and @mas->last are updated. 6169 */ 6170 void *mas_erase(struct ma_state *mas) 6171 { 6172 void *entry; 6173 MA_WR_STATE(wr_mas, mas, NULL); 6174 6175 if (mas_is_none(mas) || mas_is_paused(mas)) 6176 mas->node = MAS_START; 6177 6178 /* Retry unnecessary when holding the write lock. */ 6179 entry = mas_state_walk(mas); 6180 if (!entry) 6181 return NULL; 6182 6183 write_retry: 6184 /* Must reset to ensure spanning writes of last slot are detected */ 6185 mas_reset(mas); 6186 mas_wr_store_setup(&wr_mas); 6187 mas_wr_store_entry(&wr_mas); 6188 if (mas_nomem(mas, GFP_KERNEL)) 6189 goto write_retry; 6190 6191 return entry; 6192 } 6193 EXPORT_SYMBOL_GPL(mas_erase); 6194 6195 /** 6196 * mas_nomem() - Check if there was an error allocating and do the allocation 6197 * if necessary If there are allocations, then free them. 6198 * @mas: The maple state 6199 * @gfp: The GFP_FLAGS to use for allocations 6200 * Return: true on allocation, false otherwise. 6201 */ 6202 bool mas_nomem(struct ma_state *mas, gfp_t gfp) 6203 __must_hold(mas->tree->ma_lock) 6204 { 6205 if (likely(mas->node != MA_ERROR(-ENOMEM))) { 6206 mas_destroy(mas); 6207 return false; 6208 } 6209 6210 if (gfpflags_allow_blocking(gfp) && !mt_external_lock(mas->tree)) { 6211 mtree_unlock(mas->tree); 6212 mas_alloc_nodes(mas, gfp); 6213 mtree_lock(mas->tree); 6214 } else { 6215 mas_alloc_nodes(mas, gfp); 6216 } 6217 6218 if (!mas_allocated(mas)) 6219 return false; 6220 6221 mas->node = MAS_START; 6222 return true; 6223 } 6224 6225 void __init maple_tree_init(void) 6226 { 6227 maple_node_cache = kmem_cache_create("maple_node", 6228 sizeof(struct maple_node), sizeof(struct maple_node), 6229 SLAB_PANIC, NULL); 6230 } 6231 6232 /** 6233 * mtree_load() - Load a value stored in a maple tree 6234 * @mt: The maple tree 6235 * @index: The index to load 6236 * 6237 * Return: the entry or %NULL 6238 */ 6239 void *mtree_load(struct maple_tree *mt, unsigned long index) 6240 { 6241 MA_STATE(mas, mt, index, index); 6242 void *entry; 6243 6244 trace_ma_read(__func__, &mas); 6245 rcu_read_lock(); 6246 retry: 6247 entry = mas_start(&mas); 6248 if (unlikely(mas_is_none(&mas))) 6249 goto unlock; 6250 6251 if (unlikely(mas_is_ptr(&mas))) { 6252 if (index) 6253 entry = NULL; 6254 6255 goto unlock; 6256 } 6257 6258 entry = mtree_lookup_walk(&mas); 6259 if (!entry && unlikely(mas_is_start(&mas))) 6260 goto retry; 6261 unlock: 6262 rcu_read_unlock(); 6263 if (xa_is_zero(entry)) 6264 return NULL; 6265 6266 return entry; 6267 } 6268 EXPORT_SYMBOL(mtree_load); 6269 6270 /** 6271 * mtree_store_range() - Store an entry at a given range. 6272 * @mt: The maple tree 6273 * @index: The start of the range 6274 * @last: The end of the range 6275 * @entry: The entry to store 6276 * @gfp: The GFP_FLAGS to use for allocations 6277 * 6278 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 6279 * be allocated. 6280 */ 6281 int mtree_store_range(struct maple_tree *mt, unsigned long index, 6282 unsigned long last, void *entry, gfp_t gfp) 6283 { 6284 MA_STATE(mas, mt, index, last); 6285 MA_WR_STATE(wr_mas, &mas, entry); 6286 6287 trace_ma_write(__func__, &mas, 0, entry); 6288 if (WARN_ON_ONCE(xa_is_advanced(entry))) 6289 return -EINVAL; 6290 6291 if (index > last) 6292 return -EINVAL; 6293 6294 mtree_lock(mt); 6295 retry: 6296 mas_wr_store_entry(&wr_mas); 6297 if (mas_nomem(&mas, gfp)) 6298 goto retry; 6299 6300 mtree_unlock(mt); 6301 if (mas_is_err(&mas)) 6302 return xa_err(mas.node); 6303 6304 return 0; 6305 } 6306 EXPORT_SYMBOL(mtree_store_range); 6307 6308 /** 6309 * mtree_store() - Store an entry at a given index. 6310 * @mt: The maple tree 6311 * @index: The index to store the value 6312 * @entry: The entry to store 6313 * @gfp: The GFP_FLAGS to use for allocations 6314 * 6315 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 6316 * be allocated. 6317 */ 6318 int mtree_store(struct maple_tree *mt, unsigned long index, void *entry, 6319 gfp_t gfp) 6320 { 6321 return mtree_store_range(mt, index, index, entry, gfp); 6322 } 6323 EXPORT_SYMBOL(mtree_store); 6324 6325 /** 6326 * mtree_insert_range() - Insert an entry at a given range if there is no value. 6327 * @mt: The maple tree 6328 * @first: The start of the range 6329 * @last: The end of the range 6330 * @entry: The entry to store 6331 * @gfp: The GFP_FLAGS to use for allocations. 6332 * 6333 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid 6334 * request, -ENOMEM if memory could not be allocated. 6335 */ 6336 int mtree_insert_range(struct maple_tree *mt, unsigned long first, 6337 unsigned long last, void *entry, gfp_t gfp) 6338 { 6339 MA_STATE(ms, mt, first, last); 6340 6341 if (WARN_ON_ONCE(xa_is_advanced(entry))) 6342 return -EINVAL; 6343 6344 if (first > last) 6345 return -EINVAL; 6346 6347 mtree_lock(mt); 6348 retry: 6349 mas_insert(&ms, entry); 6350 if (mas_nomem(&ms, gfp)) 6351 goto retry; 6352 6353 mtree_unlock(mt); 6354 if (mas_is_err(&ms)) 6355 return xa_err(ms.node); 6356 6357 return 0; 6358 } 6359 EXPORT_SYMBOL(mtree_insert_range); 6360 6361 /** 6362 * mtree_insert() - Insert an entry at a given index if there is no value. 6363 * @mt: The maple tree 6364 * @index : The index to store the value 6365 * @entry: The entry to store 6366 * @gfp: The GFP_FLAGS to use for allocations. 6367 * 6368 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid 6369 * request, -ENOMEM if memory could not be allocated. 6370 */ 6371 int mtree_insert(struct maple_tree *mt, unsigned long index, void *entry, 6372 gfp_t gfp) 6373 { 6374 return mtree_insert_range(mt, index, index, entry, gfp); 6375 } 6376 EXPORT_SYMBOL(mtree_insert); 6377 6378 int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp, 6379 void *entry, unsigned long size, unsigned long min, 6380 unsigned long max, gfp_t gfp) 6381 { 6382 int ret = 0; 6383 6384 MA_STATE(mas, mt, 0, 0); 6385 if (!mt_is_alloc(mt)) 6386 return -EINVAL; 6387 6388 if (WARN_ON_ONCE(mt_is_reserved(entry))) 6389 return -EINVAL; 6390 6391 mtree_lock(mt); 6392 retry: 6393 ret = mas_empty_area(&mas, min, max, size); 6394 if (ret) 6395 goto unlock; 6396 6397 mas_insert(&mas, entry); 6398 /* 6399 * mas_nomem() may release the lock, causing the allocated area 6400 * to be unavailable, so try to allocate a free area again. 6401 */ 6402 if (mas_nomem(&mas, gfp)) 6403 goto retry; 6404 6405 if (mas_is_err(&mas)) 6406 ret = xa_err(mas.node); 6407 else 6408 *startp = mas.index; 6409 6410 unlock: 6411 mtree_unlock(mt); 6412 return ret; 6413 } 6414 EXPORT_SYMBOL(mtree_alloc_range); 6415 6416 int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp, 6417 void *entry, unsigned long size, unsigned long min, 6418 unsigned long max, gfp_t gfp) 6419 { 6420 int ret = 0; 6421 6422 MA_STATE(mas, mt, 0, 0); 6423 if (!mt_is_alloc(mt)) 6424 return -EINVAL; 6425 6426 if (WARN_ON_ONCE(mt_is_reserved(entry))) 6427 return -EINVAL; 6428 6429 mtree_lock(mt); 6430 retry: 6431 ret = mas_empty_area_rev(&mas, min, max, size); 6432 if (ret) 6433 goto unlock; 6434 6435 mas_insert(&mas, entry); 6436 /* 6437 * mas_nomem() may release the lock, causing the allocated area 6438 * to be unavailable, so try to allocate a free area again. 6439 */ 6440 if (mas_nomem(&mas, gfp)) 6441 goto retry; 6442 6443 if (mas_is_err(&mas)) 6444 ret = xa_err(mas.node); 6445 else 6446 *startp = mas.index; 6447 6448 unlock: 6449 mtree_unlock(mt); 6450 return ret; 6451 } 6452 EXPORT_SYMBOL(mtree_alloc_rrange); 6453 6454 /** 6455 * mtree_erase() - Find an index and erase the entire range. 6456 * @mt: The maple tree 6457 * @index: The index to erase 6458 * 6459 * Erasing is the same as a walk to an entry then a store of a NULL to that 6460 * ENTIRE range. In fact, it is implemented as such using the advanced API. 6461 * 6462 * Return: The entry stored at the @index or %NULL 6463 */ 6464 void *mtree_erase(struct maple_tree *mt, unsigned long index) 6465 { 6466 void *entry = NULL; 6467 6468 MA_STATE(mas, mt, index, index); 6469 trace_ma_op(__func__, &mas); 6470 6471 mtree_lock(mt); 6472 entry = mas_erase(&mas); 6473 mtree_unlock(mt); 6474 6475 return entry; 6476 } 6477 EXPORT_SYMBOL(mtree_erase); 6478 6479 /** 6480 * __mt_destroy() - Walk and free all nodes of a locked maple tree. 6481 * @mt: The maple tree 6482 * 6483 * Note: Does not handle locking. 6484 */ 6485 void __mt_destroy(struct maple_tree *mt) 6486 { 6487 void *root = mt_root_locked(mt); 6488 6489 rcu_assign_pointer(mt->ma_root, NULL); 6490 if (xa_is_node(root)) 6491 mte_destroy_walk(root, mt); 6492 6493 mt->ma_flags = 0; 6494 } 6495 EXPORT_SYMBOL_GPL(__mt_destroy); 6496 6497 /** 6498 * mtree_destroy() - Destroy a maple tree 6499 * @mt: The maple tree 6500 * 6501 * Frees all resources used by the tree. Handles locking. 6502 */ 6503 void mtree_destroy(struct maple_tree *mt) 6504 { 6505 mtree_lock(mt); 6506 __mt_destroy(mt); 6507 mtree_unlock(mt); 6508 } 6509 EXPORT_SYMBOL(mtree_destroy); 6510 6511 /** 6512 * mt_find() - Search from the start up until an entry is found. 6513 * @mt: The maple tree 6514 * @index: Pointer which contains the start location of the search 6515 * @max: The maximum value of the search range 6516 * 6517 * Takes RCU read lock internally to protect the search, which does not 6518 * protect the returned pointer after dropping RCU read lock. 6519 * See also: Documentation/core-api/maple_tree.rst 6520 * 6521 * In case that an entry is found @index is updated to point to the next 6522 * possible entry independent whether the found entry is occupying a 6523 * single index or a range if indices. 6524 * 6525 * Return: The entry at or after the @index or %NULL 6526 */ 6527 void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max) 6528 { 6529 MA_STATE(mas, mt, *index, *index); 6530 void *entry; 6531 #ifdef CONFIG_DEBUG_MAPLE_TREE 6532 unsigned long copy = *index; 6533 #endif 6534 6535 trace_ma_read(__func__, &mas); 6536 6537 if ((*index) > max) 6538 return NULL; 6539 6540 rcu_read_lock(); 6541 retry: 6542 entry = mas_state_walk(&mas); 6543 if (mas_is_start(&mas)) 6544 goto retry; 6545 6546 if (unlikely(xa_is_zero(entry))) 6547 entry = NULL; 6548 6549 if (entry) 6550 goto unlock; 6551 6552 while (mas_searchable(&mas) && (mas.last < max)) { 6553 entry = mas_next_entry(&mas, max); 6554 if (likely(entry && !xa_is_zero(entry))) 6555 break; 6556 } 6557 6558 if (unlikely(xa_is_zero(entry))) 6559 entry = NULL; 6560 unlock: 6561 rcu_read_unlock(); 6562 if (likely(entry)) { 6563 *index = mas.last + 1; 6564 #ifdef CONFIG_DEBUG_MAPLE_TREE 6565 if (MT_WARN_ON(mt, (*index) && ((*index) <= copy))) 6566 pr_err("index not increased! %lx <= %lx\n", 6567 *index, copy); 6568 #endif 6569 } 6570 6571 return entry; 6572 } 6573 EXPORT_SYMBOL(mt_find); 6574 6575 /** 6576 * mt_find_after() - Search from the start up until an entry is found. 6577 * @mt: The maple tree 6578 * @index: Pointer which contains the start location of the search 6579 * @max: The maximum value to check 6580 * 6581 * Same as mt_find() except that it checks @index for 0 before 6582 * searching. If @index == 0, the search is aborted. This covers a wrap 6583 * around of @index to 0 in an iterator loop. 6584 * 6585 * Return: The entry at or after the @index or %NULL 6586 */ 6587 void *mt_find_after(struct maple_tree *mt, unsigned long *index, 6588 unsigned long max) 6589 { 6590 if (!(*index)) 6591 return NULL; 6592 6593 return mt_find(mt, index, max); 6594 } 6595 EXPORT_SYMBOL(mt_find_after); 6596 6597 #ifdef CONFIG_DEBUG_MAPLE_TREE 6598 atomic_t maple_tree_tests_run; 6599 EXPORT_SYMBOL_GPL(maple_tree_tests_run); 6600 atomic_t maple_tree_tests_passed; 6601 EXPORT_SYMBOL_GPL(maple_tree_tests_passed); 6602 6603 #ifndef __KERNEL__ 6604 extern void kmem_cache_set_non_kernel(struct kmem_cache *, unsigned int); 6605 void mt_set_non_kernel(unsigned int val) 6606 { 6607 kmem_cache_set_non_kernel(maple_node_cache, val); 6608 } 6609 6610 extern unsigned long kmem_cache_get_alloc(struct kmem_cache *); 6611 unsigned long mt_get_alloc_size(void) 6612 { 6613 return kmem_cache_get_alloc(maple_node_cache); 6614 } 6615 6616 extern void kmem_cache_zero_nr_tallocated(struct kmem_cache *); 6617 void mt_zero_nr_tallocated(void) 6618 { 6619 kmem_cache_zero_nr_tallocated(maple_node_cache); 6620 } 6621 6622 extern unsigned int kmem_cache_nr_tallocated(struct kmem_cache *); 6623 unsigned int mt_nr_tallocated(void) 6624 { 6625 return kmem_cache_nr_tallocated(maple_node_cache); 6626 } 6627 6628 extern unsigned int kmem_cache_nr_allocated(struct kmem_cache *); 6629 unsigned int mt_nr_allocated(void) 6630 { 6631 return kmem_cache_nr_allocated(maple_node_cache); 6632 } 6633 6634 /* 6635 * mas_dead_node() - Check if the maple state is pointing to a dead node. 6636 * @mas: The maple state 6637 * @index: The index to restore in @mas. 6638 * 6639 * Used in test code. 6640 * Return: 1 if @mas has been reset to MAS_START, 0 otherwise. 6641 */ 6642 static inline int mas_dead_node(struct ma_state *mas, unsigned long index) 6643 { 6644 if (unlikely(!mas_searchable(mas) || mas_is_start(mas))) 6645 return 0; 6646 6647 if (likely(!mte_dead_node(mas->node))) 6648 return 0; 6649 6650 mas_rewalk(mas, index); 6651 return 1; 6652 } 6653 6654 void mt_cache_shrink(void) 6655 { 6656 } 6657 #else 6658 /* 6659 * mt_cache_shrink() - For testing, don't use this. 6660 * 6661 * Certain testcases can trigger an OOM when combined with other memory 6662 * debugging configuration options. This function is used to reduce the 6663 * possibility of an out of memory even due to kmem_cache objects remaining 6664 * around for longer than usual. 6665 */ 6666 void mt_cache_shrink(void) 6667 { 6668 kmem_cache_shrink(maple_node_cache); 6669 6670 } 6671 EXPORT_SYMBOL_GPL(mt_cache_shrink); 6672 6673 #endif /* not defined __KERNEL__ */ 6674 /* 6675 * mas_get_slot() - Get the entry in the maple state node stored at @offset. 6676 * @mas: The maple state 6677 * @offset: The offset into the slot array to fetch. 6678 * 6679 * Return: The entry stored at @offset. 6680 */ 6681 static inline struct maple_enode *mas_get_slot(struct ma_state *mas, 6682 unsigned char offset) 6683 { 6684 return mas_slot(mas, ma_slots(mas_mn(mas), mte_node_type(mas->node)), 6685 offset); 6686 } 6687 6688 /* Depth first search, post-order */ 6689 static void mas_dfs_postorder(struct ma_state *mas, unsigned long max) 6690 { 6691 6692 struct maple_enode *p = MAS_NONE, *mn = mas->node; 6693 unsigned long p_min, p_max; 6694 6695 mas_next_node(mas, mas_mn(mas), max); 6696 if (!mas_is_none(mas)) 6697 return; 6698 6699 if (mte_is_root(mn)) 6700 return; 6701 6702 mas->node = mn; 6703 mas_ascend(mas); 6704 do { 6705 p = mas->node; 6706 p_min = mas->min; 6707 p_max = mas->max; 6708 mas_prev_node(mas, 0); 6709 } while (!mas_is_none(mas)); 6710 6711 mas->node = p; 6712 mas->max = p_max; 6713 mas->min = p_min; 6714 } 6715 6716 /* Tree validations */ 6717 static void mt_dump_node(const struct maple_tree *mt, void *entry, 6718 unsigned long min, unsigned long max, unsigned int depth, 6719 enum mt_dump_format format); 6720 static void mt_dump_range(unsigned long min, unsigned long max, 6721 unsigned int depth, enum mt_dump_format format) 6722 { 6723 static const char spaces[] = " "; 6724 6725 switch(format) { 6726 case mt_dump_hex: 6727 if (min == max) 6728 pr_info("%.*s%lx: ", depth * 2, spaces, min); 6729 else 6730 pr_info("%.*s%lx-%lx: ", depth * 2, spaces, min, max); 6731 break; 6732 default: 6733 case mt_dump_dec: 6734 if (min == max) 6735 pr_info("%.*s%lu: ", depth * 2, spaces, min); 6736 else 6737 pr_info("%.*s%lu-%lu: ", depth * 2, spaces, min, max); 6738 } 6739 } 6740 6741 static void mt_dump_entry(void *entry, unsigned long min, unsigned long max, 6742 unsigned int depth, enum mt_dump_format format) 6743 { 6744 mt_dump_range(min, max, depth, format); 6745 6746 if (xa_is_value(entry)) 6747 pr_cont("value %ld (0x%lx) [%p]\n", xa_to_value(entry), 6748 xa_to_value(entry), entry); 6749 else if (xa_is_zero(entry)) 6750 pr_cont("zero (%ld)\n", xa_to_internal(entry)); 6751 else if (mt_is_reserved(entry)) 6752 pr_cont("UNKNOWN ENTRY (%p)\n", entry); 6753 else 6754 pr_cont("%p\n", entry); 6755 } 6756 6757 static void mt_dump_range64(const struct maple_tree *mt, void *entry, 6758 unsigned long min, unsigned long max, unsigned int depth, 6759 enum mt_dump_format format) 6760 { 6761 struct maple_range_64 *node = &mte_to_node(entry)->mr64; 6762 bool leaf = mte_is_leaf(entry); 6763 unsigned long first = min; 6764 int i; 6765 6766 pr_cont(" contents: "); 6767 for (i = 0; i < MAPLE_RANGE64_SLOTS - 1; i++) { 6768 switch(format) { 6769 case mt_dump_hex: 6770 pr_cont("%p %lX ", node->slot[i], node->pivot[i]); 6771 break; 6772 default: 6773 case mt_dump_dec: 6774 pr_cont("%p %lu ", node->slot[i], node->pivot[i]); 6775 } 6776 } 6777 pr_cont("%p\n", node->slot[i]); 6778 for (i = 0; i < MAPLE_RANGE64_SLOTS; i++) { 6779 unsigned long last = max; 6780 6781 if (i < (MAPLE_RANGE64_SLOTS - 1)) 6782 last = node->pivot[i]; 6783 else if (!node->slot[i] && max != mt_node_max(entry)) 6784 break; 6785 if (last == 0 && i > 0) 6786 break; 6787 if (leaf) 6788 mt_dump_entry(mt_slot(mt, node->slot, i), 6789 first, last, depth + 1, format); 6790 else if (node->slot[i]) 6791 mt_dump_node(mt, mt_slot(mt, node->slot, i), 6792 first, last, depth + 1, format); 6793 6794 if (last == max) 6795 break; 6796 if (last > max) { 6797 switch(format) { 6798 case mt_dump_hex: 6799 pr_err("node %p last (%lx) > max (%lx) at pivot %d!\n", 6800 node, last, max, i); 6801 break; 6802 default: 6803 case mt_dump_dec: 6804 pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n", 6805 node, last, max, i); 6806 } 6807 } 6808 first = last + 1; 6809 } 6810 } 6811 6812 static void mt_dump_arange64(const struct maple_tree *mt, void *entry, 6813 unsigned long min, unsigned long max, unsigned int depth, 6814 enum mt_dump_format format) 6815 { 6816 struct maple_arange_64 *node = &mte_to_node(entry)->ma64; 6817 bool leaf = mte_is_leaf(entry); 6818 unsigned long first = min; 6819 int i; 6820 6821 pr_cont(" contents: "); 6822 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) { 6823 switch (format) { 6824 case mt_dump_hex: 6825 pr_cont("%lx ", node->gap[i]); 6826 break; 6827 default: 6828 case mt_dump_dec: 6829 pr_cont("%lu ", node->gap[i]); 6830 } 6831 } 6832 pr_cont("| %02X %02X| ", node->meta.end, node->meta.gap); 6833 for (i = 0; i < MAPLE_ARANGE64_SLOTS - 1; i++) { 6834 switch (format) { 6835 case mt_dump_hex: 6836 pr_cont("%p %lX ", node->slot[i], node->pivot[i]); 6837 break; 6838 default: 6839 case mt_dump_dec: 6840 pr_cont("%p %lu ", node->slot[i], node->pivot[i]); 6841 } 6842 } 6843 pr_cont("%p\n", node->slot[i]); 6844 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) { 6845 unsigned long last = max; 6846 6847 if (i < (MAPLE_ARANGE64_SLOTS - 1)) 6848 last = node->pivot[i]; 6849 else if (!node->slot[i]) 6850 break; 6851 if (last == 0 && i > 0) 6852 break; 6853 if (leaf) 6854 mt_dump_entry(mt_slot(mt, node->slot, i), 6855 first, last, depth + 1, format); 6856 else if (node->slot[i]) 6857 mt_dump_node(mt, mt_slot(mt, node->slot, i), 6858 first, last, depth + 1, format); 6859 6860 if (last == max) 6861 break; 6862 if (last > max) { 6863 pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n", 6864 node, last, max, i); 6865 break; 6866 } 6867 first = last + 1; 6868 } 6869 } 6870 6871 static void mt_dump_node(const struct maple_tree *mt, void *entry, 6872 unsigned long min, unsigned long max, unsigned int depth, 6873 enum mt_dump_format format) 6874 { 6875 struct maple_node *node = mte_to_node(entry); 6876 unsigned int type = mte_node_type(entry); 6877 unsigned int i; 6878 6879 mt_dump_range(min, max, depth, format); 6880 6881 pr_cont("node %p depth %d type %d parent %p", node, depth, type, 6882 node ? node->parent : NULL); 6883 switch (type) { 6884 case maple_dense: 6885 pr_cont("\n"); 6886 for (i = 0; i < MAPLE_NODE_SLOTS; i++) { 6887 if (min + i > max) 6888 pr_cont("OUT OF RANGE: "); 6889 mt_dump_entry(mt_slot(mt, node->slot, i), 6890 min + i, min + i, depth, format); 6891 } 6892 break; 6893 case maple_leaf_64: 6894 case maple_range_64: 6895 mt_dump_range64(mt, entry, min, max, depth, format); 6896 break; 6897 case maple_arange_64: 6898 mt_dump_arange64(mt, entry, min, max, depth, format); 6899 break; 6900 6901 default: 6902 pr_cont(" UNKNOWN TYPE\n"); 6903 } 6904 } 6905 6906 void mt_dump(const struct maple_tree *mt, enum mt_dump_format format) 6907 { 6908 void *entry = rcu_dereference_check(mt->ma_root, mt_locked(mt)); 6909 6910 pr_info("maple_tree(%p) flags %X, height %u root %p\n", 6911 mt, mt->ma_flags, mt_height(mt), entry); 6912 if (!xa_is_node(entry)) 6913 mt_dump_entry(entry, 0, 0, 0, format); 6914 else if (entry) 6915 mt_dump_node(mt, entry, 0, mt_node_max(entry), 0, format); 6916 } 6917 EXPORT_SYMBOL_GPL(mt_dump); 6918 6919 /* 6920 * Calculate the maximum gap in a node and check if that's what is reported in 6921 * the parent (unless root). 6922 */ 6923 static void mas_validate_gaps(struct ma_state *mas) 6924 { 6925 struct maple_enode *mte = mas->node; 6926 struct maple_node *p_mn, *node = mte_to_node(mte); 6927 enum maple_type mt = mte_node_type(mas->node); 6928 unsigned long gap = 0, max_gap = 0; 6929 unsigned long p_end, p_start = mas->min; 6930 unsigned char p_slot, offset; 6931 unsigned long *gaps = NULL; 6932 unsigned long *pivots = ma_pivots(node, mt); 6933 unsigned int i; 6934 6935 if (ma_is_dense(mt)) { 6936 for (i = 0; i < mt_slot_count(mte); i++) { 6937 if (mas_get_slot(mas, i)) { 6938 if (gap > max_gap) 6939 max_gap = gap; 6940 gap = 0; 6941 continue; 6942 } 6943 gap++; 6944 } 6945 goto counted; 6946 } 6947 6948 gaps = ma_gaps(node, mt); 6949 for (i = 0; i < mt_slot_count(mte); i++) { 6950 p_end = mas_safe_pivot(mas, pivots, i, mt); 6951 6952 if (!gaps) { 6953 if (!mas_get_slot(mas, i)) 6954 gap = p_end - p_start + 1; 6955 } else { 6956 void *entry = mas_get_slot(mas, i); 6957 6958 gap = gaps[i]; 6959 MT_BUG_ON(mas->tree, !entry); 6960 6961 if (gap > p_end - p_start + 1) { 6962 pr_err("%p[%u] %lu >= %lu - %lu + 1 (%lu)\n", 6963 mas_mn(mas), i, gap, p_end, p_start, 6964 p_end - p_start + 1); 6965 MT_BUG_ON(mas->tree, gap > p_end - p_start + 1); 6966 } 6967 } 6968 6969 if (gap > max_gap) 6970 max_gap = gap; 6971 6972 p_start = p_end + 1; 6973 if (p_end >= mas->max) 6974 break; 6975 } 6976 6977 counted: 6978 if (mt == maple_arange_64) { 6979 offset = ma_meta_gap(node, mt); 6980 if (offset > i) { 6981 pr_err("gap offset %p[%u] is invalid\n", node, offset); 6982 MT_BUG_ON(mas->tree, 1); 6983 } 6984 6985 if (gaps[offset] != max_gap) { 6986 pr_err("gap %p[%u] is not the largest gap %lu\n", 6987 node, offset, max_gap); 6988 MT_BUG_ON(mas->tree, 1); 6989 } 6990 6991 MT_BUG_ON(mas->tree, !gaps); 6992 for (i++ ; i < mt_slot_count(mte); i++) { 6993 if (gaps[i] != 0) { 6994 pr_err("gap %p[%u] beyond node limit != 0\n", 6995 node, i); 6996 MT_BUG_ON(mas->tree, 1); 6997 } 6998 } 6999 } 7000 7001 if (mte_is_root(mte)) 7002 return; 7003 7004 p_slot = mte_parent_slot(mas->node); 7005 p_mn = mte_parent(mte); 7006 MT_BUG_ON(mas->tree, max_gap > mas->max); 7007 if (ma_gaps(p_mn, mas_parent_type(mas, mte))[p_slot] != max_gap) { 7008 pr_err("gap %p[%u] != %lu\n", p_mn, p_slot, max_gap); 7009 mt_dump(mas->tree, mt_dump_hex); 7010 MT_BUG_ON(mas->tree, 1); 7011 } 7012 } 7013 7014 static void mas_validate_parent_slot(struct ma_state *mas) 7015 { 7016 struct maple_node *parent; 7017 struct maple_enode *node; 7018 enum maple_type p_type; 7019 unsigned char p_slot; 7020 void __rcu **slots; 7021 int i; 7022 7023 if (mte_is_root(mas->node)) 7024 return; 7025 7026 p_slot = mte_parent_slot(mas->node); 7027 p_type = mas_parent_type(mas, mas->node); 7028 parent = mte_parent(mas->node); 7029 slots = ma_slots(parent, p_type); 7030 MT_BUG_ON(mas->tree, mas_mn(mas) == parent); 7031 7032 /* Check prev/next parent slot for duplicate node entry */ 7033 7034 for (i = 0; i < mt_slots[p_type]; i++) { 7035 node = mas_slot(mas, slots, i); 7036 if (i == p_slot) { 7037 if (node != mas->node) 7038 pr_err("parent %p[%u] does not have %p\n", 7039 parent, i, mas_mn(mas)); 7040 MT_BUG_ON(mas->tree, node != mas->node); 7041 } else if (node == mas->node) { 7042 pr_err("Invalid child %p at parent %p[%u] p_slot %u\n", 7043 mas_mn(mas), parent, i, p_slot); 7044 MT_BUG_ON(mas->tree, node == mas->node); 7045 } 7046 } 7047 } 7048 7049 static void mas_validate_child_slot(struct ma_state *mas) 7050 { 7051 enum maple_type type = mte_node_type(mas->node); 7052 void __rcu **slots = ma_slots(mte_to_node(mas->node), type); 7053 unsigned long *pivots = ma_pivots(mte_to_node(mas->node), type); 7054 struct maple_enode *child; 7055 unsigned char i; 7056 7057 if (mte_is_leaf(mas->node)) 7058 return; 7059 7060 for (i = 0; i < mt_slots[type]; i++) { 7061 child = mas_slot(mas, slots, i); 7062 7063 if (!child) { 7064 pr_err("Non-leaf node lacks child at %p[%u]\n", 7065 mas_mn(mas), i); 7066 MT_BUG_ON(mas->tree, 1); 7067 } 7068 7069 if (mte_parent_slot(child) != i) { 7070 pr_err("Slot error at %p[%u]: child %p has pslot %u\n", 7071 mas_mn(mas), i, mte_to_node(child), 7072 mte_parent_slot(child)); 7073 MT_BUG_ON(mas->tree, 1); 7074 } 7075 7076 if (mte_parent(child) != mte_to_node(mas->node)) { 7077 pr_err("child %p has parent %p not %p\n", 7078 mte_to_node(child), mte_parent(child), 7079 mte_to_node(mas->node)); 7080 MT_BUG_ON(mas->tree, 1); 7081 } 7082 7083 if (i < mt_pivots[type] && pivots[i] == mas->max) 7084 break; 7085 } 7086 } 7087 7088 /* 7089 * Validate all pivots are within mas->min and mas->max, check metadata ends 7090 * where the maximum ends and ensure there is no slots or pivots set outside of 7091 * the end of the data. 7092 */ 7093 static void mas_validate_limits(struct ma_state *mas) 7094 { 7095 int i; 7096 unsigned long prev_piv = 0; 7097 enum maple_type type = mte_node_type(mas->node); 7098 void __rcu **slots = ma_slots(mte_to_node(mas->node), type); 7099 unsigned long *pivots = ma_pivots(mas_mn(mas), type); 7100 7101 for (i = 0; i < mt_slots[type]; i++) { 7102 unsigned long piv; 7103 7104 piv = mas_safe_pivot(mas, pivots, i, type); 7105 7106 if (!piv && (i != 0)) { 7107 pr_err("Missing node limit pivot at %p[%u]", 7108 mas_mn(mas), i); 7109 MAS_WARN_ON(mas, 1); 7110 } 7111 7112 if (prev_piv > piv) { 7113 pr_err("%p[%u] piv %lu < prev_piv %lu\n", 7114 mas_mn(mas), i, piv, prev_piv); 7115 MAS_WARN_ON(mas, piv < prev_piv); 7116 } 7117 7118 if (piv < mas->min) { 7119 pr_err("%p[%u] %lu < %lu\n", mas_mn(mas), i, 7120 piv, mas->min); 7121 MAS_WARN_ON(mas, piv < mas->min); 7122 } 7123 if (piv > mas->max) { 7124 pr_err("%p[%u] %lu > %lu\n", mas_mn(mas), i, 7125 piv, mas->max); 7126 MAS_WARN_ON(mas, piv > mas->max); 7127 } 7128 prev_piv = piv; 7129 if (piv == mas->max) 7130 break; 7131 } 7132 7133 if (mas_data_end(mas) != i) { 7134 pr_err("node%p: data_end %u != the last slot offset %u\n", 7135 mas_mn(mas), mas_data_end(mas), i); 7136 MT_BUG_ON(mas->tree, 1); 7137 } 7138 7139 for (i += 1; i < mt_slots[type]; i++) { 7140 void *entry = mas_slot(mas, slots, i); 7141 7142 if (entry && (i != mt_slots[type] - 1)) { 7143 pr_err("%p[%u] should not have entry %p\n", mas_mn(mas), 7144 i, entry); 7145 MT_BUG_ON(mas->tree, entry != NULL); 7146 } 7147 7148 if (i < mt_pivots[type]) { 7149 unsigned long piv = pivots[i]; 7150 7151 if (!piv) 7152 continue; 7153 7154 pr_err("%p[%u] should not have piv %lu\n", 7155 mas_mn(mas), i, piv); 7156 MAS_WARN_ON(mas, i < mt_pivots[type] - 1); 7157 } 7158 } 7159 } 7160 7161 static void mt_validate_nulls(struct maple_tree *mt) 7162 { 7163 void *entry, *last = (void *)1; 7164 unsigned char offset = 0; 7165 void __rcu **slots; 7166 MA_STATE(mas, mt, 0, 0); 7167 7168 mas_start(&mas); 7169 if (mas_is_none(&mas) || (mas.node == MAS_ROOT)) 7170 return; 7171 7172 while (!mte_is_leaf(mas.node)) 7173 mas_descend(&mas); 7174 7175 slots = ma_slots(mte_to_node(mas.node), mte_node_type(mas.node)); 7176 do { 7177 entry = mas_slot(&mas, slots, offset); 7178 if (!last && !entry) { 7179 pr_err("Sequential nulls end at %p[%u]\n", 7180 mas_mn(&mas), offset); 7181 } 7182 MT_BUG_ON(mt, !last && !entry); 7183 last = entry; 7184 if (offset == mas_data_end(&mas)) { 7185 mas_next_node(&mas, mas_mn(&mas), ULONG_MAX); 7186 if (mas_is_none(&mas)) 7187 return; 7188 offset = 0; 7189 slots = ma_slots(mte_to_node(mas.node), 7190 mte_node_type(mas.node)); 7191 } else { 7192 offset++; 7193 } 7194 7195 } while (!mas_is_none(&mas)); 7196 } 7197 7198 /* 7199 * validate a maple tree by checking: 7200 * 1. The limits (pivots are within mas->min to mas->max) 7201 * 2. The gap is correctly set in the parents 7202 */ 7203 void mt_validate(struct maple_tree *mt) 7204 { 7205 unsigned char end; 7206 7207 MA_STATE(mas, mt, 0, 0); 7208 rcu_read_lock(); 7209 mas_start(&mas); 7210 if (!mas_searchable(&mas)) 7211 goto done; 7212 7213 while (!mte_is_leaf(mas.node)) 7214 mas_descend(&mas); 7215 7216 while (!mas_is_none(&mas)) { 7217 MAS_WARN_ON(&mas, mte_dead_node(mas.node)); 7218 end = mas_data_end(&mas); 7219 if (MAS_WARN_ON(&mas, (end < mt_min_slot_count(mas.node)) && 7220 (mas.max != ULONG_MAX))) { 7221 pr_err("Invalid size %u of %p\n", end, mas_mn(&mas)); 7222 } 7223 7224 mas_validate_parent_slot(&mas); 7225 mas_validate_limits(&mas); 7226 mas_validate_child_slot(&mas); 7227 if (mt_is_alloc(mt)) 7228 mas_validate_gaps(&mas); 7229 mas_dfs_postorder(&mas, ULONG_MAX); 7230 } 7231 mt_validate_nulls(mt); 7232 done: 7233 rcu_read_unlock(); 7234 7235 } 7236 EXPORT_SYMBOL_GPL(mt_validate); 7237 7238 void mas_dump(const struct ma_state *mas) 7239 { 7240 pr_err("MAS: tree=%p enode=%p ", mas->tree, mas->node); 7241 if (mas_is_none(mas)) 7242 pr_err("(MAS_NONE) "); 7243 else if (mas_is_ptr(mas)) 7244 pr_err("(MAS_ROOT) "); 7245 else if (mas_is_start(mas)) 7246 pr_err("(MAS_START) "); 7247 else if (mas_is_paused(mas)) 7248 pr_err("(MAS_PAUSED) "); 7249 7250 pr_err("[%u] index=%lx last=%lx\n", mas->offset, mas->index, mas->last); 7251 pr_err(" min=%lx max=%lx alloc=%p, depth=%u, flags=%x\n", 7252 mas->min, mas->max, mas->alloc, mas->depth, mas->mas_flags); 7253 if (mas->index > mas->last) 7254 pr_err("Check index & last\n"); 7255 } 7256 EXPORT_SYMBOL_GPL(mas_dump); 7257 7258 void mas_wr_dump(const struct ma_wr_state *wr_mas) 7259 { 7260 pr_err("WR_MAS: node=%p r_min=%lx r_max=%lx\n", 7261 wr_mas->node, wr_mas->r_min, wr_mas->r_max); 7262 pr_err(" type=%u off_end=%u, node_end=%u, end_piv=%lx\n", 7263 wr_mas->type, wr_mas->offset_end, wr_mas->node_end, 7264 wr_mas->end_piv); 7265 } 7266 EXPORT_SYMBOL_GPL(mas_wr_dump); 7267 7268 #endif /* CONFIG_DEBUG_MAPLE_TREE */ 7269