1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Maple Tree implementation 4 * Copyright (c) 2018-2022 Oracle Corporation 5 * Authors: Liam R. Howlett <Liam.Howlett@oracle.com> 6 * Matthew Wilcox <willy@infradead.org> 7 */ 8 9 /* 10 * DOC: Interesting implementation details of the Maple Tree 11 * 12 * Each node type has a number of slots for entries and a number of slots for 13 * pivots. In the case of dense nodes, the pivots are implied by the position 14 * and are simply the slot index + the minimum of the node. 15 * 16 * In regular B-Tree terms, pivots are called keys. The term pivot is used to 17 * indicate that the tree is specifying ranges, Pivots may appear in the 18 * subtree with an entry attached to the value where as keys are unique to a 19 * specific position of a B-tree. Pivot values are inclusive of the slot with 20 * the same index. 21 * 22 * 23 * The following illustrates the layout of a range64 nodes slots and pivots. 24 * 25 * 26 * Slots -> | 0 | 1 | 2 | ... | 12 | 13 | 14 | 15 | 27 * ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ 28 * │ │ │ │ │ │ │ │ └─ Implied maximum 29 * │ │ │ │ │ │ │ └─ Pivot 14 30 * │ │ │ │ │ │ └─ Pivot 13 31 * │ │ │ │ │ └─ Pivot 12 32 * │ │ │ │ └─ Pivot 11 33 * │ │ │ └─ Pivot 2 34 * │ │ └─ Pivot 1 35 * │ └─ Pivot 0 36 * └─ Implied minimum 37 * 38 * Slot contents: 39 * Internal (non-leaf) nodes contain pointers to other nodes. 40 * Leaf nodes contain entries. 41 * 42 * The location of interest is often referred to as an offset. All offsets have 43 * a slot, but the last offset has an implied pivot from the node above (or 44 * UINT_MAX for the root node. 45 * 46 * Ranges complicate certain write activities. When modifying any of 47 * the B-tree variants, it is known that one entry will either be added or 48 * deleted. When modifying the Maple Tree, one store operation may overwrite 49 * the entire data set, or one half of the tree, or the middle half of the tree. 50 * 51 */ 52 53 54 #include <linux/maple_tree.h> 55 #include <linux/xarray.h> 56 #include <linux/types.h> 57 #include <linux/export.h> 58 #include <linux/slab.h> 59 #include <linux/limits.h> 60 #include <asm/barrier.h> 61 62 #define CREATE_TRACE_POINTS 63 #include <trace/events/maple_tree.h> 64 65 #define MA_ROOT_PARENT 1 66 67 /* 68 * Maple state flags 69 * * MA_STATE_BULK - Bulk insert mode 70 * * MA_STATE_REBALANCE - Indicate a rebalance during bulk insert 71 * * MA_STATE_PREALLOC - Preallocated nodes, WARN_ON allocation 72 */ 73 #define MA_STATE_BULK 1 74 #define MA_STATE_REBALANCE 2 75 #define MA_STATE_PREALLOC 4 76 77 #define ma_parent_ptr(x) ((struct maple_pnode *)(x)) 78 #define mas_tree_parent(x) ((unsigned long)(x->tree) | MA_ROOT_PARENT) 79 #define ma_mnode_ptr(x) ((struct maple_node *)(x)) 80 #define ma_enode_ptr(x) ((struct maple_enode *)(x)) 81 static struct kmem_cache *maple_node_cache; 82 83 #ifdef CONFIG_DEBUG_MAPLE_TREE 84 static const unsigned long mt_max[] = { 85 [maple_dense] = MAPLE_NODE_SLOTS, 86 [maple_leaf_64] = ULONG_MAX, 87 [maple_range_64] = ULONG_MAX, 88 [maple_arange_64] = ULONG_MAX, 89 }; 90 #define mt_node_max(x) mt_max[mte_node_type(x)] 91 #endif 92 93 static const unsigned char mt_slots[] = { 94 [maple_dense] = MAPLE_NODE_SLOTS, 95 [maple_leaf_64] = MAPLE_RANGE64_SLOTS, 96 [maple_range_64] = MAPLE_RANGE64_SLOTS, 97 [maple_arange_64] = MAPLE_ARANGE64_SLOTS, 98 }; 99 #define mt_slot_count(x) mt_slots[mte_node_type(x)] 100 101 static const unsigned char mt_pivots[] = { 102 [maple_dense] = 0, 103 [maple_leaf_64] = MAPLE_RANGE64_SLOTS - 1, 104 [maple_range_64] = MAPLE_RANGE64_SLOTS - 1, 105 [maple_arange_64] = MAPLE_ARANGE64_SLOTS - 1, 106 }; 107 #define mt_pivot_count(x) mt_pivots[mte_node_type(x)] 108 109 static const unsigned char mt_min_slots[] = { 110 [maple_dense] = MAPLE_NODE_SLOTS / 2, 111 [maple_leaf_64] = (MAPLE_RANGE64_SLOTS / 2) - 2, 112 [maple_range_64] = (MAPLE_RANGE64_SLOTS / 2) - 2, 113 [maple_arange_64] = (MAPLE_ARANGE64_SLOTS / 2) - 1, 114 }; 115 #define mt_min_slot_count(x) mt_min_slots[mte_node_type(x)] 116 117 #define MAPLE_BIG_NODE_SLOTS (MAPLE_RANGE64_SLOTS * 2 + 2) 118 #define MAPLE_BIG_NODE_GAPS (MAPLE_ARANGE64_SLOTS * 2 + 1) 119 120 struct maple_big_node { 121 struct maple_pnode *parent; 122 unsigned long pivot[MAPLE_BIG_NODE_SLOTS - 1]; 123 union { 124 struct maple_enode *slot[MAPLE_BIG_NODE_SLOTS]; 125 struct { 126 unsigned long padding[MAPLE_BIG_NODE_GAPS]; 127 unsigned long gap[MAPLE_BIG_NODE_GAPS]; 128 }; 129 }; 130 unsigned char b_end; 131 enum maple_type type; 132 }; 133 134 /* 135 * The maple_subtree_state is used to build a tree to replace a segment of an 136 * existing tree in a more atomic way. Any walkers of the older tree will hit a 137 * dead node and restart on updates. 138 */ 139 struct maple_subtree_state { 140 struct ma_state *orig_l; /* Original left side of subtree */ 141 struct ma_state *orig_r; /* Original right side of subtree */ 142 struct ma_state *l; /* New left side of subtree */ 143 struct ma_state *m; /* New middle of subtree (rare) */ 144 struct ma_state *r; /* New right side of subtree */ 145 struct ma_topiary *free; /* nodes to be freed */ 146 struct ma_topiary *destroy; /* Nodes to be destroyed (walked and freed) */ 147 struct maple_big_node *bn; 148 }; 149 150 #ifdef CONFIG_KASAN_STACK 151 /* Prevent mas_wr_bnode() from exceeding the stack frame limit */ 152 #define noinline_for_kasan noinline_for_stack 153 #else 154 #define noinline_for_kasan inline 155 #endif 156 157 /* Functions */ 158 static inline struct maple_node *mt_alloc_one(gfp_t gfp) 159 { 160 return kmem_cache_alloc(maple_node_cache, gfp); 161 } 162 163 static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes) 164 { 165 return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes); 166 } 167 168 static inline void mt_free_bulk(size_t size, void __rcu **nodes) 169 { 170 kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes); 171 } 172 173 static void mt_free_rcu(struct rcu_head *head) 174 { 175 struct maple_node *node = container_of(head, struct maple_node, rcu); 176 177 kmem_cache_free(maple_node_cache, node); 178 } 179 180 /* 181 * ma_free_rcu() - Use rcu callback to free a maple node 182 * @node: The node to free 183 * 184 * The maple tree uses the parent pointer to indicate this node is no longer in 185 * use and will be freed. 186 */ 187 static void ma_free_rcu(struct maple_node *node) 188 { 189 WARN_ON(node->parent != ma_parent_ptr(node)); 190 call_rcu(&node->rcu, mt_free_rcu); 191 } 192 193 static void mas_set_height(struct ma_state *mas) 194 { 195 unsigned int new_flags = mas->tree->ma_flags; 196 197 new_flags &= ~MT_FLAGS_HEIGHT_MASK; 198 MAS_BUG_ON(mas, mas->depth > MAPLE_HEIGHT_MAX); 199 new_flags |= mas->depth << MT_FLAGS_HEIGHT_OFFSET; 200 mas->tree->ma_flags = new_flags; 201 } 202 203 static unsigned int mas_mt_height(struct ma_state *mas) 204 { 205 return mt_height(mas->tree); 206 } 207 208 static inline enum maple_type mte_node_type(const struct maple_enode *entry) 209 { 210 return ((unsigned long)entry >> MAPLE_NODE_TYPE_SHIFT) & 211 MAPLE_NODE_TYPE_MASK; 212 } 213 214 static inline bool ma_is_dense(const enum maple_type type) 215 { 216 return type < maple_leaf_64; 217 } 218 219 static inline bool ma_is_leaf(const enum maple_type type) 220 { 221 return type < maple_range_64; 222 } 223 224 static inline bool mte_is_leaf(const struct maple_enode *entry) 225 { 226 return ma_is_leaf(mte_node_type(entry)); 227 } 228 229 /* 230 * We also reserve values with the bottom two bits set to '10' which are 231 * below 4096 232 */ 233 static inline bool mt_is_reserved(const void *entry) 234 { 235 return ((unsigned long)entry < MAPLE_RESERVED_RANGE) && 236 xa_is_internal(entry); 237 } 238 239 static inline void mas_set_err(struct ma_state *mas, long err) 240 { 241 mas->node = MA_ERROR(err); 242 } 243 244 static inline bool mas_is_ptr(const struct ma_state *mas) 245 { 246 return mas->node == MAS_ROOT; 247 } 248 249 static inline bool mas_is_start(const struct ma_state *mas) 250 { 251 return mas->node == MAS_START; 252 } 253 254 bool mas_is_err(struct ma_state *mas) 255 { 256 return xa_is_err(mas->node); 257 } 258 259 static inline bool mas_searchable(struct ma_state *mas) 260 { 261 if (mas_is_none(mas)) 262 return false; 263 264 if (mas_is_ptr(mas)) 265 return false; 266 267 return true; 268 } 269 270 static inline struct maple_node *mte_to_node(const struct maple_enode *entry) 271 { 272 return (struct maple_node *)((unsigned long)entry & ~MAPLE_NODE_MASK); 273 } 274 275 /* 276 * mte_to_mat() - Convert a maple encoded node to a maple topiary node. 277 * @entry: The maple encoded node 278 * 279 * Return: a maple topiary pointer 280 */ 281 static inline struct maple_topiary *mte_to_mat(const struct maple_enode *entry) 282 { 283 return (struct maple_topiary *) 284 ((unsigned long)entry & ~MAPLE_NODE_MASK); 285 } 286 287 /* 288 * mas_mn() - Get the maple state node. 289 * @mas: The maple state 290 * 291 * Return: the maple node (not encoded - bare pointer). 292 */ 293 static inline struct maple_node *mas_mn(const struct ma_state *mas) 294 { 295 return mte_to_node(mas->node); 296 } 297 298 /* 299 * mte_set_node_dead() - Set a maple encoded node as dead. 300 * @mn: The maple encoded node. 301 */ 302 static inline void mte_set_node_dead(struct maple_enode *mn) 303 { 304 mte_to_node(mn)->parent = ma_parent_ptr(mte_to_node(mn)); 305 smp_wmb(); /* Needed for RCU */ 306 } 307 308 /* Bit 1 indicates the root is a node */ 309 #define MAPLE_ROOT_NODE 0x02 310 /* maple_type stored bit 3-6 */ 311 #define MAPLE_ENODE_TYPE_SHIFT 0x03 312 /* Bit 2 means a NULL somewhere below */ 313 #define MAPLE_ENODE_NULL 0x04 314 315 static inline struct maple_enode *mt_mk_node(const struct maple_node *node, 316 enum maple_type type) 317 { 318 return (void *)((unsigned long)node | 319 (type << MAPLE_ENODE_TYPE_SHIFT) | MAPLE_ENODE_NULL); 320 } 321 322 static inline void *mte_mk_root(const struct maple_enode *node) 323 { 324 return (void *)((unsigned long)node | MAPLE_ROOT_NODE); 325 } 326 327 static inline void *mte_safe_root(const struct maple_enode *node) 328 { 329 return (void *)((unsigned long)node & ~MAPLE_ROOT_NODE); 330 } 331 332 static inline void *mte_set_full(const struct maple_enode *node) 333 { 334 return (void *)((unsigned long)node & ~MAPLE_ENODE_NULL); 335 } 336 337 static inline void *mte_clear_full(const struct maple_enode *node) 338 { 339 return (void *)((unsigned long)node | MAPLE_ENODE_NULL); 340 } 341 342 static inline bool mte_has_null(const struct maple_enode *node) 343 { 344 return (unsigned long)node & MAPLE_ENODE_NULL; 345 } 346 347 static inline bool ma_is_root(struct maple_node *node) 348 { 349 return ((unsigned long)node->parent & MA_ROOT_PARENT); 350 } 351 352 static inline bool mte_is_root(const struct maple_enode *node) 353 { 354 return ma_is_root(mte_to_node(node)); 355 } 356 357 static inline bool mas_is_root_limits(const struct ma_state *mas) 358 { 359 return !mas->min && mas->max == ULONG_MAX; 360 } 361 362 static inline bool mt_is_alloc(struct maple_tree *mt) 363 { 364 return (mt->ma_flags & MT_FLAGS_ALLOC_RANGE); 365 } 366 367 /* 368 * The Parent Pointer 369 * Excluding root, the parent pointer is 256B aligned like all other tree nodes. 370 * When storing a 32 or 64 bit values, the offset can fit into 5 bits. The 16 371 * bit values need an extra bit to store the offset. This extra bit comes from 372 * a reuse of the last bit in the node type. This is possible by using bit 1 to 373 * indicate if bit 2 is part of the type or the slot. 374 * 375 * Note types: 376 * 0x??1 = Root 377 * 0x?00 = 16 bit nodes 378 * 0x010 = 32 bit nodes 379 * 0x110 = 64 bit nodes 380 * 381 * Slot size and alignment 382 * 0b??1 : Root 383 * 0b?00 : 16 bit values, type in 0-1, slot in 2-7 384 * 0b010 : 32 bit values, type in 0-2, slot in 3-7 385 * 0b110 : 64 bit values, type in 0-2, slot in 3-7 386 */ 387 388 #define MAPLE_PARENT_ROOT 0x01 389 390 #define MAPLE_PARENT_SLOT_SHIFT 0x03 391 #define MAPLE_PARENT_SLOT_MASK 0xF8 392 393 #define MAPLE_PARENT_16B_SLOT_SHIFT 0x02 394 #define MAPLE_PARENT_16B_SLOT_MASK 0xFC 395 396 #define MAPLE_PARENT_RANGE64 0x06 397 #define MAPLE_PARENT_RANGE32 0x04 398 #define MAPLE_PARENT_NOT_RANGE16 0x02 399 400 /* 401 * mte_parent_shift() - Get the parent shift for the slot storage. 402 * @parent: The parent pointer cast as an unsigned long 403 * Return: The shift into that pointer to the star to of the slot 404 */ 405 static inline unsigned long mte_parent_shift(unsigned long parent) 406 { 407 /* Note bit 1 == 0 means 16B */ 408 if (likely(parent & MAPLE_PARENT_NOT_RANGE16)) 409 return MAPLE_PARENT_SLOT_SHIFT; 410 411 return MAPLE_PARENT_16B_SLOT_SHIFT; 412 } 413 414 /* 415 * mte_parent_slot_mask() - Get the slot mask for the parent. 416 * @parent: The parent pointer cast as an unsigned long. 417 * Return: The slot mask for that parent. 418 */ 419 static inline unsigned long mte_parent_slot_mask(unsigned long parent) 420 { 421 /* Note bit 1 == 0 means 16B */ 422 if (likely(parent & MAPLE_PARENT_NOT_RANGE16)) 423 return MAPLE_PARENT_SLOT_MASK; 424 425 return MAPLE_PARENT_16B_SLOT_MASK; 426 } 427 428 /* 429 * mas_parent_type() - Return the maple_type of the parent from the stored 430 * parent type. 431 * @mas: The maple state 432 * @enode: The maple_enode to extract the parent's enum 433 * Return: The node->parent maple_type 434 */ 435 static inline 436 enum maple_type mas_parent_type(struct ma_state *mas, struct maple_enode *enode) 437 { 438 unsigned long p_type; 439 440 p_type = (unsigned long)mte_to_node(enode)->parent; 441 if (WARN_ON(p_type & MAPLE_PARENT_ROOT)) 442 return 0; 443 444 p_type &= MAPLE_NODE_MASK; 445 p_type &= ~mte_parent_slot_mask(p_type); 446 switch (p_type) { 447 case MAPLE_PARENT_RANGE64: /* or MAPLE_PARENT_ARANGE64 */ 448 if (mt_is_alloc(mas->tree)) 449 return maple_arange_64; 450 return maple_range_64; 451 } 452 453 return 0; 454 } 455 456 /* 457 * mas_set_parent() - Set the parent node and encode the slot 458 * @enode: The encoded maple node. 459 * @parent: The encoded maple node that is the parent of @enode. 460 * @slot: The slot that @enode resides in @parent. 461 * 462 * Slot number is encoded in the enode->parent bit 3-6 or 2-6, depending on the 463 * parent type. 464 */ 465 static inline 466 void mas_set_parent(struct ma_state *mas, struct maple_enode *enode, 467 const struct maple_enode *parent, unsigned char slot) 468 { 469 unsigned long val = (unsigned long)parent; 470 unsigned long shift; 471 unsigned long type; 472 enum maple_type p_type = mte_node_type(parent); 473 474 MAS_BUG_ON(mas, p_type == maple_dense); 475 MAS_BUG_ON(mas, p_type == maple_leaf_64); 476 477 switch (p_type) { 478 case maple_range_64: 479 case maple_arange_64: 480 shift = MAPLE_PARENT_SLOT_SHIFT; 481 type = MAPLE_PARENT_RANGE64; 482 break; 483 default: 484 case maple_dense: 485 case maple_leaf_64: 486 shift = type = 0; 487 break; 488 } 489 490 val &= ~MAPLE_NODE_MASK; /* Clear all node metadata in parent */ 491 val |= (slot << shift) | type; 492 mte_to_node(enode)->parent = ma_parent_ptr(val); 493 } 494 495 /* 496 * mte_parent_slot() - get the parent slot of @enode. 497 * @enode: The encoded maple node. 498 * 499 * Return: The slot in the parent node where @enode resides. 500 */ 501 static inline unsigned int mte_parent_slot(const struct maple_enode *enode) 502 { 503 unsigned long val = (unsigned long)mte_to_node(enode)->parent; 504 505 if (val & MA_ROOT_PARENT) 506 return 0; 507 508 /* 509 * Okay to use MAPLE_PARENT_16B_SLOT_MASK as the last bit will be lost 510 * by shift if the parent shift is MAPLE_PARENT_SLOT_SHIFT 511 */ 512 return (val & MAPLE_PARENT_16B_SLOT_MASK) >> mte_parent_shift(val); 513 } 514 515 /* 516 * mte_parent() - Get the parent of @node. 517 * @node: The encoded maple node. 518 * 519 * Return: The parent maple node. 520 */ 521 static inline struct maple_node *mte_parent(const struct maple_enode *enode) 522 { 523 return (void *)((unsigned long) 524 (mte_to_node(enode)->parent) & ~MAPLE_NODE_MASK); 525 } 526 527 /* 528 * ma_dead_node() - check if the @enode is dead. 529 * @enode: The encoded maple node 530 * 531 * Return: true if dead, false otherwise. 532 */ 533 static inline bool ma_dead_node(const struct maple_node *node) 534 { 535 struct maple_node *parent; 536 537 /* Do not reorder reads from the node prior to the parent check */ 538 smp_rmb(); 539 parent = (void *)((unsigned long) node->parent & ~MAPLE_NODE_MASK); 540 return (parent == node); 541 } 542 543 /* 544 * mte_dead_node() - check if the @enode is dead. 545 * @enode: The encoded maple node 546 * 547 * Return: true if dead, false otherwise. 548 */ 549 static inline bool mte_dead_node(const struct maple_enode *enode) 550 { 551 struct maple_node *parent, *node; 552 553 node = mte_to_node(enode); 554 /* Do not reorder reads from the node prior to the parent check */ 555 smp_rmb(); 556 parent = mte_parent(enode); 557 return (parent == node); 558 } 559 560 /* 561 * mas_allocated() - Get the number of nodes allocated in a maple state. 562 * @mas: The maple state 563 * 564 * The ma_state alloc member is overloaded to hold a pointer to the first 565 * allocated node or to the number of requested nodes to allocate. If bit 0 is 566 * set, then the alloc contains the number of requested nodes. If there is an 567 * allocated node, then the total allocated nodes is in that node. 568 * 569 * Return: The total number of nodes allocated 570 */ 571 static inline unsigned long mas_allocated(const struct ma_state *mas) 572 { 573 if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) 574 return 0; 575 576 return mas->alloc->total; 577 } 578 579 /* 580 * mas_set_alloc_req() - Set the requested number of allocations. 581 * @mas: the maple state 582 * @count: the number of allocations. 583 * 584 * The requested number of allocations is either in the first allocated node, 585 * located in @mas->alloc->request_count, or directly in @mas->alloc if there is 586 * no allocated node. Set the request either in the node or do the necessary 587 * encoding to store in @mas->alloc directly. 588 */ 589 static inline void mas_set_alloc_req(struct ma_state *mas, unsigned long count) 590 { 591 if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) { 592 if (!count) 593 mas->alloc = NULL; 594 else 595 mas->alloc = (struct maple_alloc *)(((count) << 1U) | 1U); 596 return; 597 } 598 599 mas->alloc->request_count = count; 600 } 601 602 /* 603 * mas_alloc_req() - get the requested number of allocations. 604 * @mas: The maple state 605 * 606 * The alloc count is either stored directly in @mas, or in 607 * @mas->alloc->request_count if there is at least one node allocated. Decode 608 * the request count if it's stored directly in @mas->alloc. 609 * 610 * Return: The allocation request count. 611 */ 612 static inline unsigned int mas_alloc_req(const struct ma_state *mas) 613 { 614 if ((unsigned long)mas->alloc & 0x1) 615 return (unsigned long)(mas->alloc) >> 1; 616 else if (mas->alloc) 617 return mas->alloc->request_count; 618 return 0; 619 } 620 621 /* 622 * ma_pivots() - Get a pointer to the maple node pivots. 623 * @node - the maple node 624 * @type - the node type 625 * 626 * In the event of a dead node, this array may be %NULL 627 * 628 * Return: A pointer to the maple node pivots 629 */ 630 static inline unsigned long *ma_pivots(struct maple_node *node, 631 enum maple_type type) 632 { 633 switch (type) { 634 case maple_arange_64: 635 return node->ma64.pivot; 636 case maple_range_64: 637 case maple_leaf_64: 638 return node->mr64.pivot; 639 case maple_dense: 640 return NULL; 641 } 642 return NULL; 643 } 644 645 /* 646 * ma_gaps() - Get a pointer to the maple node gaps. 647 * @node - the maple node 648 * @type - the node type 649 * 650 * Return: A pointer to the maple node gaps 651 */ 652 static inline unsigned long *ma_gaps(struct maple_node *node, 653 enum maple_type type) 654 { 655 switch (type) { 656 case maple_arange_64: 657 return node->ma64.gap; 658 case maple_range_64: 659 case maple_leaf_64: 660 case maple_dense: 661 return NULL; 662 } 663 return NULL; 664 } 665 666 /* 667 * mas_pivot() - Get the pivot at @piv of the maple encoded node. 668 * @mas: The maple state. 669 * @piv: The pivot. 670 * 671 * Return: the pivot at @piv of @mn. 672 */ 673 static inline unsigned long mas_pivot(struct ma_state *mas, unsigned char piv) 674 { 675 struct maple_node *node = mas_mn(mas); 676 enum maple_type type = mte_node_type(mas->node); 677 678 if (MAS_WARN_ON(mas, piv >= mt_pivots[type])) { 679 mas_set_err(mas, -EIO); 680 return 0; 681 } 682 683 switch (type) { 684 case maple_arange_64: 685 return node->ma64.pivot[piv]; 686 case maple_range_64: 687 case maple_leaf_64: 688 return node->mr64.pivot[piv]; 689 case maple_dense: 690 return 0; 691 } 692 return 0; 693 } 694 695 /* 696 * mas_safe_pivot() - get the pivot at @piv or mas->max. 697 * @mas: The maple state 698 * @pivots: The pointer to the maple node pivots 699 * @piv: The pivot to fetch 700 * @type: The maple node type 701 * 702 * Return: The pivot at @piv within the limit of the @pivots array, @mas->max 703 * otherwise. 704 */ 705 static inline unsigned long 706 mas_safe_pivot(const struct ma_state *mas, unsigned long *pivots, 707 unsigned char piv, enum maple_type type) 708 { 709 if (piv >= mt_pivots[type]) 710 return mas->max; 711 712 return pivots[piv]; 713 } 714 715 /* 716 * mas_safe_min() - Return the minimum for a given offset. 717 * @mas: The maple state 718 * @pivots: The pointer to the maple node pivots 719 * @offset: The offset into the pivot array 720 * 721 * Return: The minimum range value that is contained in @offset. 722 */ 723 static inline unsigned long 724 mas_safe_min(struct ma_state *mas, unsigned long *pivots, unsigned char offset) 725 { 726 if (likely(offset)) 727 return pivots[offset - 1] + 1; 728 729 return mas->min; 730 } 731 732 /* 733 * mte_set_pivot() - Set a pivot to a value in an encoded maple node. 734 * @mn: The encoded maple node 735 * @piv: The pivot offset 736 * @val: The value of the pivot 737 */ 738 static inline void mte_set_pivot(struct maple_enode *mn, unsigned char piv, 739 unsigned long val) 740 { 741 struct maple_node *node = mte_to_node(mn); 742 enum maple_type type = mte_node_type(mn); 743 744 BUG_ON(piv >= mt_pivots[type]); 745 switch (type) { 746 default: 747 case maple_range_64: 748 case maple_leaf_64: 749 node->mr64.pivot[piv] = val; 750 break; 751 case maple_arange_64: 752 node->ma64.pivot[piv] = val; 753 break; 754 case maple_dense: 755 break; 756 } 757 758 } 759 760 /* 761 * ma_slots() - Get a pointer to the maple node slots. 762 * @mn: The maple node 763 * @mt: The maple node type 764 * 765 * Return: A pointer to the maple node slots 766 */ 767 static inline void __rcu **ma_slots(struct maple_node *mn, enum maple_type mt) 768 { 769 switch (mt) { 770 default: 771 case maple_arange_64: 772 return mn->ma64.slot; 773 case maple_range_64: 774 case maple_leaf_64: 775 return mn->mr64.slot; 776 case maple_dense: 777 return mn->slot; 778 } 779 } 780 781 static inline bool mt_write_locked(const struct maple_tree *mt) 782 { 783 return mt_external_lock(mt) ? mt_write_lock_is_held(mt) : 784 lockdep_is_held(&mt->ma_lock); 785 } 786 787 static inline bool mt_locked(const struct maple_tree *mt) 788 { 789 return mt_external_lock(mt) ? mt_lock_is_held(mt) : 790 lockdep_is_held(&mt->ma_lock); 791 } 792 793 static inline void *mt_slot(const struct maple_tree *mt, 794 void __rcu **slots, unsigned char offset) 795 { 796 return rcu_dereference_check(slots[offset], mt_locked(mt)); 797 } 798 799 static inline void *mt_slot_locked(struct maple_tree *mt, void __rcu **slots, 800 unsigned char offset) 801 { 802 return rcu_dereference_protected(slots[offset], mt_write_locked(mt)); 803 } 804 /* 805 * mas_slot_locked() - Get the slot value when holding the maple tree lock. 806 * @mas: The maple state 807 * @slots: The pointer to the slots 808 * @offset: The offset into the slots array to fetch 809 * 810 * Return: The entry stored in @slots at the @offset. 811 */ 812 static inline void *mas_slot_locked(struct ma_state *mas, void __rcu **slots, 813 unsigned char offset) 814 { 815 return mt_slot_locked(mas->tree, slots, offset); 816 } 817 818 /* 819 * mas_slot() - Get the slot value when not holding the maple tree lock. 820 * @mas: The maple state 821 * @slots: The pointer to the slots 822 * @offset: The offset into the slots array to fetch 823 * 824 * Return: The entry stored in @slots at the @offset 825 */ 826 static inline void *mas_slot(struct ma_state *mas, void __rcu **slots, 827 unsigned char offset) 828 { 829 return mt_slot(mas->tree, slots, offset); 830 } 831 832 /* 833 * mas_root() - Get the maple tree root. 834 * @mas: The maple state. 835 * 836 * Return: The pointer to the root of the tree 837 */ 838 static inline void *mas_root(struct ma_state *mas) 839 { 840 return rcu_dereference_check(mas->tree->ma_root, mt_locked(mas->tree)); 841 } 842 843 static inline void *mt_root_locked(struct maple_tree *mt) 844 { 845 return rcu_dereference_protected(mt->ma_root, mt_write_locked(mt)); 846 } 847 848 /* 849 * mas_root_locked() - Get the maple tree root when holding the maple tree lock. 850 * @mas: The maple state. 851 * 852 * Return: The pointer to the root of the tree 853 */ 854 static inline void *mas_root_locked(struct ma_state *mas) 855 { 856 return mt_root_locked(mas->tree); 857 } 858 859 static inline struct maple_metadata *ma_meta(struct maple_node *mn, 860 enum maple_type mt) 861 { 862 switch (mt) { 863 case maple_arange_64: 864 return &mn->ma64.meta; 865 default: 866 return &mn->mr64.meta; 867 } 868 } 869 870 /* 871 * ma_set_meta() - Set the metadata information of a node. 872 * @mn: The maple node 873 * @mt: The maple node type 874 * @offset: The offset of the highest sub-gap in this node. 875 * @end: The end of the data in this node. 876 */ 877 static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt, 878 unsigned char offset, unsigned char end) 879 { 880 struct maple_metadata *meta = ma_meta(mn, mt); 881 882 meta->gap = offset; 883 meta->end = end; 884 } 885 886 /* 887 * mt_clear_meta() - clear the metadata information of a node, if it exists 888 * @mt: The maple tree 889 * @mn: The maple node 890 * @type: The maple node type 891 * @offset: The offset of the highest sub-gap in this node. 892 * @end: The end of the data in this node. 893 */ 894 static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn, 895 enum maple_type type) 896 { 897 struct maple_metadata *meta; 898 unsigned long *pivots; 899 void __rcu **slots; 900 void *next; 901 902 switch (type) { 903 case maple_range_64: 904 pivots = mn->mr64.pivot; 905 if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) { 906 slots = mn->mr64.slot; 907 next = mt_slot_locked(mt, slots, 908 MAPLE_RANGE64_SLOTS - 1); 909 if (unlikely((mte_to_node(next) && 910 mte_node_type(next)))) 911 return; /* no metadata, could be node */ 912 } 913 fallthrough; 914 case maple_arange_64: 915 meta = ma_meta(mn, type); 916 break; 917 default: 918 return; 919 } 920 921 meta->gap = 0; 922 meta->end = 0; 923 } 924 925 /* 926 * ma_meta_end() - Get the data end of a node from the metadata 927 * @mn: The maple node 928 * @mt: The maple node type 929 */ 930 static inline unsigned char ma_meta_end(struct maple_node *mn, 931 enum maple_type mt) 932 { 933 struct maple_metadata *meta = ma_meta(mn, mt); 934 935 return meta->end; 936 } 937 938 /* 939 * ma_meta_gap() - Get the largest gap location of a node from the metadata 940 * @mn: The maple node 941 * @mt: The maple node type 942 */ 943 static inline unsigned char ma_meta_gap(struct maple_node *mn, 944 enum maple_type mt) 945 { 946 return mn->ma64.meta.gap; 947 } 948 949 /* 950 * ma_set_meta_gap() - Set the largest gap location in a nodes metadata 951 * @mn: The maple node 952 * @mn: The maple node type 953 * @offset: The location of the largest gap. 954 */ 955 static inline void ma_set_meta_gap(struct maple_node *mn, enum maple_type mt, 956 unsigned char offset) 957 { 958 959 struct maple_metadata *meta = ma_meta(mn, mt); 960 961 meta->gap = offset; 962 } 963 964 /* 965 * mat_add() - Add a @dead_enode to the ma_topiary of a list of dead nodes. 966 * @mat - the ma_topiary, a linked list of dead nodes. 967 * @dead_enode - the node to be marked as dead and added to the tail of the list 968 * 969 * Add the @dead_enode to the linked list in @mat. 970 */ 971 static inline void mat_add(struct ma_topiary *mat, 972 struct maple_enode *dead_enode) 973 { 974 mte_set_node_dead(dead_enode); 975 mte_to_mat(dead_enode)->next = NULL; 976 if (!mat->tail) { 977 mat->tail = mat->head = dead_enode; 978 return; 979 } 980 981 mte_to_mat(mat->tail)->next = dead_enode; 982 mat->tail = dead_enode; 983 } 984 985 static void mt_free_walk(struct rcu_head *head); 986 static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, 987 bool free); 988 /* 989 * mas_mat_destroy() - Free all nodes and subtrees in a dead list. 990 * @mas - the maple state 991 * @mat - the ma_topiary linked list of dead nodes to free. 992 * 993 * Destroy walk a dead list. 994 */ 995 static void mas_mat_destroy(struct ma_state *mas, struct ma_topiary *mat) 996 { 997 struct maple_enode *next; 998 struct maple_node *node; 999 bool in_rcu = mt_in_rcu(mas->tree); 1000 1001 while (mat->head) { 1002 next = mte_to_mat(mat->head)->next; 1003 node = mte_to_node(mat->head); 1004 mt_destroy_walk(mat->head, mas->tree, !in_rcu); 1005 if (in_rcu) 1006 call_rcu(&node->rcu, mt_free_walk); 1007 mat->head = next; 1008 } 1009 } 1010 /* 1011 * mas_descend() - Descend into the slot stored in the ma_state. 1012 * @mas - the maple state. 1013 * 1014 * Note: Not RCU safe, only use in write side or debug code. 1015 */ 1016 static inline void mas_descend(struct ma_state *mas) 1017 { 1018 enum maple_type type; 1019 unsigned long *pivots; 1020 struct maple_node *node; 1021 void __rcu **slots; 1022 1023 node = mas_mn(mas); 1024 type = mte_node_type(mas->node); 1025 pivots = ma_pivots(node, type); 1026 slots = ma_slots(node, type); 1027 1028 if (mas->offset) 1029 mas->min = pivots[mas->offset - 1] + 1; 1030 mas->max = mas_safe_pivot(mas, pivots, mas->offset, type); 1031 mas->node = mas_slot(mas, slots, mas->offset); 1032 } 1033 1034 /* 1035 * mte_set_gap() - Set a maple node gap. 1036 * @mn: The encoded maple node 1037 * @gap: The offset of the gap to set 1038 * @val: The gap value 1039 */ 1040 static inline void mte_set_gap(const struct maple_enode *mn, 1041 unsigned char gap, unsigned long val) 1042 { 1043 switch (mte_node_type(mn)) { 1044 default: 1045 break; 1046 case maple_arange_64: 1047 mte_to_node(mn)->ma64.gap[gap] = val; 1048 break; 1049 } 1050 } 1051 1052 /* 1053 * mas_ascend() - Walk up a level of the tree. 1054 * @mas: The maple state 1055 * 1056 * Sets the @mas->max and @mas->min to the correct values when walking up. This 1057 * may cause several levels of walking up to find the correct min and max. 1058 * May find a dead node which will cause a premature return. 1059 * Return: 1 on dead node, 0 otherwise 1060 */ 1061 static int mas_ascend(struct ma_state *mas) 1062 { 1063 struct maple_enode *p_enode; /* parent enode. */ 1064 struct maple_enode *a_enode; /* ancestor enode. */ 1065 struct maple_node *a_node; /* ancestor node. */ 1066 struct maple_node *p_node; /* parent node. */ 1067 unsigned char a_slot; 1068 enum maple_type a_type; 1069 unsigned long min, max; 1070 unsigned long *pivots; 1071 bool set_max = false, set_min = false; 1072 1073 a_node = mas_mn(mas); 1074 if (ma_is_root(a_node)) { 1075 mas->offset = 0; 1076 return 0; 1077 } 1078 1079 p_node = mte_parent(mas->node); 1080 if (unlikely(a_node == p_node)) 1081 return 1; 1082 1083 a_type = mas_parent_type(mas, mas->node); 1084 mas->offset = mte_parent_slot(mas->node); 1085 a_enode = mt_mk_node(p_node, a_type); 1086 1087 /* Check to make sure all parent information is still accurate */ 1088 if (p_node != mte_parent(mas->node)) 1089 return 1; 1090 1091 mas->node = a_enode; 1092 1093 if (mte_is_root(a_enode)) { 1094 mas->max = ULONG_MAX; 1095 mas->min = 0; 1096 return 0; 1097 } 1098 1099 if (!mas->min) 1100 set_min = true; 1101 1102 if (mas->max == ULONG_MAX) 1103 set_max = true; 1104 1105 min = 0; 1106 max = ULONG_MAX; 1107 do { 1108 p_enode = a_enode; 1109 a_type = mas_parent_type(mas, p_enode); 1110 a_node = mte_parent(p_enode); 1111 a_slot = mte_parent_slot(p_enode); 1112 a_enode = mt_mk_node(a_node, a_type); 1113 pivots = ma_pivots(a_node, a_type); 1114 1115 if (unlikely(ma_dead_node(a_node))) 1116 return 1; 1117 1118 if (!set_min && a_slot) { 1119 set_min = true; 1120 min = pivots[a_slot - 1] + 1; 1121 } 1122 1123 if (!set_max && a_slot < mt_pivots[a_type]) { 1124 set_max = true; 1125 max = pivots[a_slot]; 1126 } 1127 1128 if (unlikely(ma_dead_node(a_node))) 1129 return 1; 1130 1131 if (unlikely(ma_is_root(a_node))) 1132 break; 1133 1134 } while (!set_min || !set_max); 1135 1136 mas->max = max; 1137 mas->min = min; 1138 return 0; 1139 } 1140 1141 /* 1142 * mas_pop_node() - Get a previously allocated maple node from the maple state. 1143 * @mas: The maple state 1144 * 1145 * Return: A pointer to a maple node. 1146 */ 1147 static inline struct maple_node *mas_pop_node(struct ma_state *mas) 1148 { 1149 struct maple_alloc *ret, *node = mas->alloc; 1150 unsigned long total = mas_allocated(mas); 1151 unsigned int req = mas_alloc_req(mas); 1152 1153 /* nothing or a request pending. */ 1154 if (WARN_ON(!total)) 1155 return NULL; 1156 1157 if (total == 1) { 1158 /* single allocation in this ma_state */ 1159 mas->alloc = NULL; 1160 ret = node; 1161 goto single_node; 1162 } 1163 1164 if (node->node_count == 1) { 1165 /* Single allocation in this node. */ 1166 mas->alloc = node->slot[0]; 1167 mas->alloc->total = node->total - 1; 1168 ret = node; 1169 goto new_head; 1170 } 1171 node->total--; 1172 ret = node->slot[--node->node_count]; 1173 node->slot[node->node_count] = NULL; 1174 1175 single_node: 1176 new_head: 1177 if (req) { 1178 req++; 1179 mas_set_alloc_req(mas, req); 1180 } 1181 1182 memset(ret, 0, sizeof(*ret)); 1183 return (struct maple_node *)ret; 1184 } 1185 1186 /* 1187 * mas_push_node() - Push a node back on the maple state allocation. 1188 * @mas: The maple state 1189 * @used: The used maple node 1190 * 1191 * Stores the maple node back into @mas->alloc for reuse. Updates allocated and 1192 * requested node count as necessary. 1193 */ 1194 static inline void mas_push_node(struct ma_state *mas, struct maple_node *used) 1195 { 1196 struct maple_alloc *reuse = (struct maple_alloc *)used; 1197 struct maple_alloc *head = mas->alloc; 1198 unsigned long count; 1199 unsigned int requested = mas_alloc_req(mas); 1200 1201 count = mas_allocated(mas); 1202 1203 reuse->request_count = 0; 1204 reuse->node_count = 0; 1205 if (count && (head->node_count < MAPLE_ALLOC_SLOTS)) { 1206 head->slot[head->node_count++] = reuse; 1207 head->total++; 1208 goto done; 1209 } 1210 1211 reuse->total = 1; 1212 if ((head) && !((unsigned long)head & 0x1)) { 1213 reuse->slot[0] = head; 1214 reuse->node_count = 1; 1215 reuse->total += head->total; 1216 } 1217 1218 mas->alloc = reuse; 1219 done: 1220 if (requested > 1) 1221 mas_set_alloc_req(mas, requested - 1); 1222 } 1223 1224 /* 1225 * mas_alloc_nodes() - Allocate nodes into a maple state 1226 * @mas: The maple state 1227 * @gfp: The GFP Flags 1228 */ 1229 static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp) 1230 { 1231 struct maple_alloc *node; 1232 unsigned long allocated = mas_allocated(mas); 1233 unsigned int requested = mas_alloc_req(mas); 1234 unsigned int count; 1235 void **slots = NULL; 1236 unsigned int max_req = 0; 1237 1238 if (!requested) 1239 return; 1240 1241 mas_set_alloc_req(mas, 0); 1242 if (mas->mas_flags & MA_STATE_PREALLOC) { 1243 if (allocated) 1244 return; 1245 WARN_ON(!allocated); 1246 } 1247 1248 if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS) { 1249 node = (struct maple_alloc *)mt_alloc_one(gfp); 1250 if (!node) 1251 goto nomem_one; 1252 1253 if (allocated) { 1254 node->slot[0] = mas->alloc; 1255 node->node_count = 1; 1256 } else { 1257 node->node_count = 0; 1258 } 1259 1260 mas->alloc = node; 1261 node->total = ++allocated; 1262 requested--; 1263 } 1264 1265 node = mas->alloc; 1266 node->request_count = 0; 1267 while (requested) { 1268 max_req = MAPLE_ALLOC_SLOTS - node->node_count; 1269 slots = (void **)&node->slot[node->node_count]; 1270 max_req = min(requested, max_req); 1271 count = mt_alloc_bulk(gfp, max_req, slots); 1272 if (!count) 1273 goto nomem_bulk; 1274 1275 if (node->node_count == 0) { 1276 node->slot[0]->node_count = 0; 1277 node->slot[0]->request_count = 0; 1278 } 1279 1280 node->node_count += count; 1281 allocated += count; 1282 node = node->slot[0]; 1283 requested -= count; 1284 } 1285 mas->alloc->total = allocated; 1286 return; 1287 1288 nomem_bulk: 1289 /* Clean up potential freed allocations on bulk failure */ 1290 memset(slots, 0, max_req * sizeof(unsigned long)); 1291 nomem_one: 1292 mas_set_alloc_req(mas, requested); 1293 if (mas->alloc && !(((unsigned long)mas->alloc & 0x1))) 1294 mas->alloc->total = allocated; 1295 mas_set_err(mas, -ENOMEM); 1296 } 1297 1298 /* 1299 * mas_free() - Free an encoded maple node 1300 * @mas: The maple state 1301 * @used: The encoded maple node to free. 1302 * 1303 * Uses rcu free if necessary, pushes @used back on the maple state allocations 1304 * otherwise. 1305 */ 1306 static inline void mas_free(struct ma_state *mas, struct maple_enode *used) 1307 { 1308 struct maple_node *tmp = mte_to_node(used); 1309 1310 if (mt_in_rcu(mas->tree)) 1311 ma_free_rcu(tmp); 1312 else 1313 mas_push_node(mas, tmp); 1314 } 1315 1316 /* 1317 * mas_node_count() - Check if enough nodes are allocated and request more if 1318 * there is not enough nodes. 1319 * @mas: The maple state 1320 * @count: The number of nodes needed 1321 * @gfp: the gfp flags 1322 */ 1323 static void mas_node_count_gfp(struct ma_state *mas, int count, gfp_t gfp) 1324 { 1325 unsigned long allocated = mas_allocated(mas); 1326 1327 if (allocated < count) { 1328 mas_set_alloc_req(mas, count - allocated); 1329 mas_alloc_nodes(mas, gfp); 1330 } 1331 } 1332 1333 /* 1334 * mas_node_count() - Check if enough nodes are allocated and request more if 1335 * there is not enough nodes. 1336 * @mas: The maple state 1337 * @count: The number of nodes needed 1338 * 1339 * Note: Uses GFP_NOWAIT | __GFP_NOWARN for gfp flags. 1340 */ 1341 static void mas_node_count(struct ma_state *mas, int count) 1342 { 1343 return mas_node_count_gfp(mas, count, GFP_NOWAIT | __GFP_NOWARN); 1344 } 1345 1346 /* 1347 * mas_start() - Sets up maple state for operations. 1348 * @mas: The maple state. 1349 * 1350 * If mas->node == MAS_START, then set the min, max and depth to 1351 * defaults. 1352 * 1353 * Return: 1354 * - If mas->node is an error or not MAS_START, return NULL. 1355 * - If it's an empty tree: NULL & mas->node == MAS_NONE 1356 * - If it's a single entry: The entry & mas->node == MAS_ROOT 1357 * - If it's a tree: NULL & mas->node == safe root node. 1358 */ 1359 static inline struct maple_enode *mas_start(struct ma_state *mas) 1360 { 1361 if (likely(mas_is_start(mas))) { 1362 struct maple_enode *root; 1363 1364 mas->min = 0; 1365 mas->max = ULONG_MAX; 1366 1367 retry: 1368 mas->depth = 0; 1369 root = mas_root(mas); 1370 /* Tree with nodes */ 1371 if (likely(xa_is_node(root))) { 1372 mas->depth = 1; 1373 mas->node = mte_safe_root(root); 1374 mas->offset = 0; 1375 if (mte_dead_node(mas->node)) 1376 goto retry; 1377 1378 return NULL; 1379 } 1380 1381 /* empty tree */ 1382 if (unlikely(!root)) { 1383 mas->node = MAS_NONE; 1384 mas->offset = MAPLE_NODE_SLOTS; 1385 return NULL; 1386 } 1387 1388 /* Single entry tree */ 1389 mas->node = MAS_ROOT; 1390 mas->offset = MAPLE_NODE_SLOTS; 1391 1392 /* Single entry tree. */ 1393 if (mas->index > 0) 1394 return NULL; 1395 1396 return root; 1397 } 1398 1399 return NULL; 1400 } 1401 1402 /* 1403 * ma_data_end() - Find the end of the data in a node. 1404 * @node: The maple node 1405 * @type: The maple node type 1406 * @pivots: The array of pivots in the node 1407 * @max: The maximum value in the node 1408 * 1409 * Uses metadata to find the end of the data when possible. 1410 * Return: The zero indexed last slot with data (may be null). 1411 */ 1412 static inline unsigned char ma_data_end(struct maple_node *node, 1413 enum maple_type type, 1414 unsigned long *pivots, 1415 unsigned long max) 1416 { 1417 unsigned char offset; 1418 1419 if (!pivots) 1420 return 0; 1421 1422 if (type == maple_arange_64) 1423 return ma_meta_end(node, type); 1424 1425 offset = mt_pivots[type] - 1; 1426 if (likely(!pivots[offset])) 1427 return ma_meta_end(node, type); 1428 1429 if (likely(pivots[offset] == max)) 1430 return offset; 1431 1432 return mt_pivots[type]; 1433 } 1434 1435 /* 1436 * mas_data_end() - Find the end of the data (slot). 1437 * @mas: the maple state 1438 * 1439 * This method is optimized to check the metadata of a node if the node type 1440 * supports data end metadata. 1441 * 1442 * Return: The zero indexed last slot with data (may be null). 1443 */ 1444 static inline unsigned char mas_data_end(struct ma_state *mas) 1445 { 1446 enum maple_type type; 1447 struct maple_node *node; 1448 unsigned char offset; 1449 unsigned long *pivots; 1450 1451 type = mte_node_type(mas->node); 1452 node = mas_mn(mas); 1453 if (type == maple_arange_64) 1454 return ma_meta_end(node, type); 1455 1456 pivots = ma_pivots(node, type); 1457 if (unlikely(ma_dead_node(node))) 1458 return 0; 1459 1460 offset = mt_pivots[type] - 1; 1461 if (likely(!pivots[offset])) 1462 return ma_meta_end(node, type); 1463 1464 if (likely(pivots[offset] == mas->max)) 1465 return offset; 1466 1467 return mt_pivots[type]; 1468 } 1469 1470 /* 1471 * mas_leaf_max_gap() - Returns the largest gap in a leaf node 1472 * @mas - the maple state 1473 * 1474 * Return: The maximum gap in the leaf. 1475 */ 1476 static unsigned long mas_leaf_max_gap(struct ma_state *mas) 1477 { 1478 enum maple_type mt; 1479 unsigned long pstart, gap, max_gap; 1480 struct maple_node *mn; 1481 unsigned long *pivots; 1482 void __rcu **slots; 1483 unsigned char i; 1484 unsigned char max_piv; 1485 1486 mt = mte_node_type(mas->node); 1487 mn = mas_mn(mas); 1488 slots = ma_slots(mn, mt); 1489 max_gap = 0; 1490 if (unlikely(ma_is_dense(mt))) { 1491 gap = 0; 1492 for (i = 0; i < mt_slots[mt]; i++) { 1493 if (slots[i]) { 1494 if (gap > max_gap) 1495 max_gap = gap; 1496 gap = 0; 1497 } else { 1498 gap++; 1499 } 1500 } 1501 if (gap > max_gap) 1502 max_gap = gap; 1503 return max_gap; 1504 } 1505 1506 /* 1507 * Check the first implied pivot optimizes the loop below and slot 1 may 1508 * be skipped if there is a gap in slot 0. 1509 */ 1510 pivots = ma_pivots(mn, mt); 1511 if (likely(!slots[0])) { 1512 max_gap = pivots[0] - mas->min + 1; 1513 i = 2; 1514 } else { 1515 i = 1; 1516 } 1517 1518 /* reduce max_piv as the special case is checked before the loop */ 1519 max_piv = ma_data_end(mn, mt, pivots, mas->max) - 1; 1520 /* 1521 * Check end implied pivot which can only be a gap on the right most 1522 * node. 1523 */ 1524 if (unlikely(mas->max == ULONG_MAX) && !slots[max_piv + 1]) { 1525 gap = ULONG_MAX - pivots[max_piv]; 1526 if (gap > max_gap) 1527 max_gap = gap; 1528 } 1529 1530 for (; i <= max_piv; i++) { 1531 /* data == no gap. */ 1532 if (likely(slots[i])) 1533 continue; 1534 1535 pstart = pivots[i - 1]; 1536 gap = pivots[i] - pstart; 1537 if (gap > max_gap) 1538 max_gap = gap; 1539 1540 /* There cannot be two gaps in a row. */ 1541 i++; 1542 } 1543 return max_gap; 1544 } 1545 1546 /* 1547 * ma_max_gap() - Get the maximum gap in a maple node (non-leaf) 1548 * @node: The maple node 1549 * @gaps: The pointer to the gaps 1550 * @mt: The maple node type 1551 * @*off: Pointer to store the offset location of the gap. 1552 * 1553 * Uses the metadata data end to scan backwards across set gaps. 1554 * 1555 * Return: The maximum gap value 1556 */ 1557 static inline unsigned long 1558 ma_max_gap(struct maple_node *node, unsigned long *gaps, enum maple_type mt, 1559 unsigned char *off) 1560 { 1561 unsigned char offset, i; 1562 unsigned long max_gap = 0; 1563 1564 i = offset = ma_meta_end(node, mt); 1565 do { 1566 if (gaps[i] > max_gap) { 1567 max_gap = gaps[i]; 1568 offset = i; 1569 } 1570 } while (i--); 1571 1572 *off = offset; 1573 return max_gap; 1574 } 1575 1576 /* 1577 * mas_max_gap() - find the largest gap in a non-leaf node and set the slot. 1578 * @mas: The maple state. 1579 * 1580 * Return: The gap value. 1581 */ 1582 static inline unsigned long mas_max_gap(struct ma_state *mas) 1583 { 1584 unsigned long *gaps; 1585 unsigned char offset; 1586 enum maple_type mt; 1587 struct maple_node *node; 1588 1589 mt = mte_node_type(mas->node); 1590 if (ma_is_leaf(mt)) 1591 return mas_leaf_max_gap(mas); 1592 1593 node = mas_mn(mas); 1594 MAS_BUG_ON(mas, mt != maple_arange_64); 1595 offset = ma_meta_gap(node, mt); 1596 gaps = ma_gaps(node, mt); 1597 return gaps[offset]; 1598 } 1599 1600 /* 1601 * mas_parent_gap() - Set the parent gap and any gaps above, as needed 1602 * @mas: The maple state 1603 * @offset: The gap offset in the parent to set 1604 * @new: The new gap value. 1605 * 1606 * Set the parent gap then continue to set the gap upwards, using the metadata 1607 * of the parent to see if it is necessary to check the node above. 1608 */ 1609 static inline void mas_parent_gap(struct ma_state *mas, unsigned char offset, 1610 unsigned long new) 1611 { 1612 unsigned long meta_gap = 0; 1613 struct maple_node *pnode; 1614 struct maple_enode *penode; 1615 unsigned long *pgaps; 1616 unsigned char meta_offset; 1617 enum maple_type pmt; 1618 1619 pnode = mte_parent(mas->node); 1620 pmt = mas_parent_type(mas, mas->node); 1621 penode = mt_mk_node(pnode, pmt); 1622 pgaps = ma_gaps(pnode, pmt); 1623 1624 ascend: 1625 MAS_BUG_ON(mas, pmt != maple_arange_64); 1626 meta_offset = ma_meta_gap(pnode, pmt); 1627 meta_gap = pgaps[meta_offset]; 1628 1629 pgaps[offset] = new; 1630 1631 if (meta_gap == new) 1632 return; 1633 1634 if (offset != meta_offset) { 1635 if (meta_gap > new) 1636 return; 1637 1638 ma_set_meta_gap(pnode, pmt, offset); 1639 } else if (new < meta_gap) { 1640 new = ma_max_gap(pnode, pgaps, pmt, &meta_offset); 1641 ma_set_meta_gap(pnode, pmt, meta_offset); 1642 } 1643 1644 if (ma_is_root(pnode)) 1645 return; 1646 1647 /* Go to the parent node. */ 1648 pnode = mte_parent(penode); 1649 pmt = mas_parent_type(mas, penode); 1650 pgaps = ma_gaps(pnode, pmt); 1651 offset = mte_parent_slot(penode); 1652 penode = mt_mk_node(pnode, pmt); 1653 goto ascend; 1654 } 1655 1656 /* 1657 * mas_update_gap() - Update a nodes gaps and propagate up if necessary. 1658 * @mas - the maple state. 1659 */ 1660 static inline void mas_update_gap(struct ma_state *mas) 1661 { 1662 unsigned char pslot; 1663 unsigned long p_gap; 1664 unsigned long max_gap; 1665 1666 if (!mt_is_alloc(mas->tree)) 1667 return; 1668 1669 if (mte_is_root(mas->node)) 1670 return; 1671 1672 max_gap = mas_max_gap(mas); 1673 1674 pslot = mte_parent_slot(mas->node); 1675 p_gap = ma_gaps(mte_parent(mas->node), 1676 mas_parent_type(mas, mas->node))[pslot]; 1677 1678 if (p_gap != max_gap) 1679 mas_parent_gap(mas, pslot, max_gap); 1680 } 1681 1682 /* 1683 * mas_adopt_children() - Set the parent pointer of all nodes in @parent to 1684 * @parent with the slot encoded. 1685 * @mas - the maple state (for the tree) 1686 * @parent - the maple encoded node containing the children. 1687 */ 1688 static inline void mas_adopt_children(struct ma_state *mas, 1689 struct maple_enode *parent) 1690 { 1691 enum maple_type type = mte_node_type(parent); 1692 struct maple_node *node = mte_to_node(parent); 1693 void __rcu **slots = ma_slots(node, type); 1694 unsigned long *pivots = ma_pivots(node, type); 1695 struct maple_enode *child; 1696 unsigned char offset; 1697 1698 offset = ma_data_end(node, type, pivots, mas->max); 1699 do { 1700 child = mas_slot_locked(mas, slots, offset); 1701 mas_set_parent(mas, child, parent, offset); 1702 } while (offset--); 1703 } 1704 1705 /* 1706 * mas_put_in_tree() - Put a new node in the tree, smp_wmb(), and mark the old 1707 * node as dead. 1708 * @mas - the maple state with the new node 1709 * @old_enode - The old maple encoded node to replace. 1710 */ 1711 static inline void mas_put_in_tree(struct ma_state *mas, 1712 struct maple_enode *old_enode) 1713 __must_hold(mas->tree->ma_lock) 1714 { 1715 unsigned char offset; 1716 void __rcu **slots; 1717 1718 if (mte_is_root(mas->node)) { 1719 mas_mn(mas)->parent = ma_parent_ptr(mas_tree_parent(mas)); 1720 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 1721 mas_set_height(mas); 1722 } else { 1723 1724 offset = mte_parent_slot(mas->node); 1725 slots = ma_slots(mte_parent(mas->node), 1726 mas_parent_type(mas, mas->node)); 1727 rcu_assign_pointer(slots[offset], mas->node); 1728 } 1729 1730 mte_set_node_dead(old_enode); 1731 } 1732 1733 /* 1734 * mas_replace_node() - Replace a node by putting it in the tree, marking it 1735 * dead, and freeing it. 1736 * the parent encoding to locate the maple node in the tree. 1737 * @mas - the ma_state with @mas->node pointing to the new node. 1738 * @old_enode - The old maple encoded node. 1739 */ 1740 static inline void mas_replace_node(struct ma_state *mas, 1741 struct maple_enode *old_enode) 1742 __must_hold(mas->tree->ma_lock) 1743 { 1744 mas_put_in_tree(mas, old_enode); 1745 mas_free(mas, old_enode); 1746 } 1747 1748 /* 1749 * mas_find_child() - Find a child who has the parent @mas->node. 1750 * @mas: the maple state with the parent. 1751 * @child: the maple state to store the child. 1752 */ 1753 static inline bool mas_find_child(struct ma_state *mas, struct ma_state *child) 1754 __must_hold(mas->tree->ma_lock) 1755 { 1756 enum maple_type mt; 1757 unsigned char offset; 1758 unsigned char end; 1759 unsigned long *pivots; 1760 struct maple_enode *entry; 1761 struct maple_node *node; 1762 void __rcu **slots; 1763 1764 mt = mte_node_type(mas->node); 1765 node = mas_mn(mas); 1766 slots = ma_slots(node, mt); 1767 pivots = ma_pivots(node, mt); 1768 end = ma_data_end(node, mt, pivots, mas->max); 1769 for (offset = mas->offset; offset <= end; offset++) { 1770 entry = mas_slot_locked(mas, slots, offset); 1771 if (mte_parent(entry) == node) { 1772 *child = *mas; 1773 mas->offset = offset + 1; 1774 child->offset = offset; 1775 mas_descend(child); 1776 child->offset = 0; 1777 return true; 1778 } 1779 } 1780 return false; 1781 } 1782 1783 /* 1784 * mab_shift_right() - Shift the data in mab right. Note, does not clean out the 1785 * old data or set b_node->b_end. 1786 * @b_node: the maple_big_node 1787 * @shift: the shift count 1788 */ 1789 static inline void mab_shift_right(struct maple_big_node *b_node, 1790 unsigned char shift) 1791 { 1792 unsigned long size = b_node->b_end * sizeof(unsigned long); 1793 1794 memmove(b_node->pivot + shift, b_node->pivot, size); 1795 memmove(b_node->slot + shift, b_node->slot, size); 1796 if (b_node->type == maple_arange_64) 1797 memmove(b_node->gap + shift, b_node->gap, size); 1798 } 1799 1800 /* 1801 * mab_middle_node() - Check if a middle node is needed (unlikely) 1802 * @b_node: the maple_big_node that contains the data. 1803 * @size: the amount of data in the b_node 1804 * @split: the potential split location 1805 * @slot_count: the size that can be stored in a single node being considered. 1806 * 1807 * Return: true if a middle node is required. 1808 */ 1809 static inline bool mab_middle_node(struct maple_big_node *b_node, int split, 1810 unsigned char slot_count) 1811 { 1812 unsigned char size = b_node->b_end; 1813 1814 if (size >= 2 * slot_count) 1815 return true; 1816 1817 if (!b_node->slot[split] && (size >= 2 * slot_count - 1)) 1818 return true; 1819 1820 return false; 1821 } 1822 1823 /* 1824 * mab_no_null_split() - ensure the split doesn't fall on a NULL 1825 * @b_node: the maple_big_node with the data 1826 * @split: the suggested split location 1827 * @slot_count: the number of slots in the node being considered. 1828 * 1829 * Return: the split location. 1830 */ 1831 static inline int mab_no_null_split(struct maple_big_node *b_node, 1832 unsigned char split, unsigned char slot_count) 1833 { 1834 if (!b_node->slot[split]) { 1835 /* 1836 * If the split is less than the max slot && the right side will 1837 * still be sufficient, then increment the split on NULL. 1838 */ 1839 if ((split < slot_count - 1) && 1840 (b_node->b_end - split) > (mt_min_slots[b_node->type])) 1841 split++; 1842 else 1843 split--; 1844 } 1845 return split; 1846 } 1847 1848 /* 1849 * mab_calc_split() - Calculate the split location and if there needs to be two 1850 * splits. 1851 * @bn: The maple_big_node with the data 1852 * @mid_split: The second split, if required. 0 otherwise. 1853 * 1854 * Return: The first split location. The middle split is set in @mid_split. 1855 */ 1856 static inline int mab_calc_split(struct ma_state *mas, 1857 struct maple_big_node *bn, unsigned char *mid_split, unsigned long min) 1858 { 1859 unsigned char b_end = bn->b_end; 1860 int split = b_end / 2; /* Assume equal split. */ 1861 unsigned char slot_min, slot_count = mt_slots[bn->type]; 1862 1863 /* 1864 * To support gap tracking, all NULL entries are kept together and a node cannot 1865 * end on a NULL entry, with the exception of the left-most leaf. The 1866 * limitation means that the split of a node must be checked for this condition 1867 * and be able to put more data in one direction or the other. 1868 */ 1869 if (unlikely((mas->mas_flags & MA_STATE_BULK))) { 1870 *mid_split = 0; 1871 split = b_end - mt_min_slots[bn->type]; 1872 1873 if (!ma_is_leaf(bn->type)) 1874 return split; 1875 1876 mas->mas_flags |= MA_STATE_REBALANCE; 1877 if (!bn->slot[split]) 1878 split--; 1879 return split; 1880 } 1881 1882 /* 1883 * Although extremely rare, it is possible to enter what is known as the 3-way 1884 * split scenario. The 3-way split comes about by means of a store of a range 1885 * that overwrites the end and beginning of two full nodes. The result is a set 1886 * of entries that cannot be stored in 2 nodes. Sometimes, these two nodes can 1887 * also be located in different parent nodes which are also full. This can 1888 * carry upwards all the way to the root in the worst case. 1889 */ 1890 if (unlikely(mab_middle_node(bn, split, slot_count))) { 1891 split = b_end / 3; 1892 *mid_split = split * 2; 1893 } else { 1894 slot_min = mt_min_slots[bn->type]; 1895 1896 *mid_split = 0; 1897 /* 1898 * Avoid having a range less than the slot count unless it 1899 * causes one node to be deficient. 1900 * NOTE: mt_min_slots is 1 based, b_end and split are zero. 1901 */ 1902 while ((split < slot_count - 1) && 1903 ((bn->pivot[split] - min) < slot_count - 1) && 1904 (b_end - split > slot_min)) 1905 split++; 1906 } 1907 1908 /* Avoid ending a node on a NULL entry */ 1909 split = mab_no_null_split(bn, split, slot_count); 1910 1911 if (unlikely(*mid_split)) 1912 *mid_split = mab_no_null_split(bn, *mid_split, slot_count); 1913 1914 return split; 1915 } 1916 1917 /* 1918 * mas_mab_cp() - Copy data from a maple state inclusively to a maple_big_node 1919 * and set @b_node->b_end to the next free slot. 1920 * @mas: The maple state 1921 * @mas_start: The starting slot to copy 1922 * @mas_end: The end slot to copy (inclusively) 1923 * @b_node: The maple_big_node to place the data 1924 * @mab_start: The starting location in maple_big_node to store the data. 1925 */ 1926 static inline void mas_mab_cp(struct ma_state *mas, unsigned char mas_start, 1927 unsigned char mas_end, struct maple_big_node *b_node, 1928 unsigned char mab_start) 1929 { 1930 enum maple_type mt; 1931 struct maple_node *node; 1932 void __rcu **slots; 1933 unsigned long *pivots, *gaps; 1934 int i = mas_start, j = mab_start; 1935 unsigned char piv_end; 1936 1937 node = mas_mn(mas); 1938 mt = mte_node_type(mas->node); 1939 pivots = ma_pivots(node, mt); 1940 if (!i) { 1941 b_node->pivot[j] = pivots[i++]; 1942 if (unlikely(i > mas_end)) 1943 goto complete; 1944 j++; 1945 } 1946 1947 piv_end = min(mas_end, mt_pivots[mt]); 1948 for (; i < piv_end; i++, j++) { 1949 b_node->pivot[j] = pivots[i]; 1950 if (unlikely(!b_node->pivot[j])) 1951 break; 1952 1953 if (unlikely(mas->max == b_node->pivot[j])) 1954 goto complete; 1955 } 1956 1957 if (likely(i <= mas_end)) 1958 b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt); 1959 1960 complete: 1961 b_node->b_end = ++j; 1962 j -= mab_start; 1963 slots = ma_slots(node, mt); 1964 memcpy(b_node->slot + mab_start, slots + mas_start, sizeof(void *) * j); 1965 if (!ma_is_leaf(mt) && mt_is_alloc(mas->tree)) { 1966 gaps = ma_gaps(node, mt); 1967 memcpy(b_node->gap + mab_start, gaps + mas_start, 1968 sizeof(unsigned long) * j); 1969 } 1970 } 1971 1972 /* 1973 * mas_leaf_set_meta() - Set the metadata of a leaf if possible. 1974 * @mas: The maple state 1975 * @node: The maple node 1976 * @pivots: pointer to the maple node pivots 1977 * @mt: The maple type 1978 * @end: The assumed end 1979 * 1980 * Note, end may be incremented within this function but not modified at the 1981 * source. This is fine since the metadata is the last thing to be stored in a 1982 * node during a write. 1983 */ 1984 static inline void mas_leaf_set_meta(struct ma_state *mas, 1985 struct maple_node *node, unsigned long *pivots, 1986 enum maple_type mt, unsigned char end) 1987 { 1988 /* There is no room for metadata already */ 1989 if (mt_pivots[mt] <= end) 1990 return; 1991 1992 if (pivots[end] && pivots[end] < mas->max) 1993 end++; 1994 1995 if (end < mt_slots[mt] - 1) 1996 ma_set_meta(node, mt, 0, end); 1997 } 1998 1999 /* 2000 * mab_mas_cp() - Copy data from maple_big_node to a maple encoded node. 2001 * @b_node: the maple_big_node that has the data 2002 * @mab_start: the start location in @b_node. 2003 * @mab_end: The end location in @b_node (inclusively) 2004 * @mas: The maple state with the maple encoded node. 2005 */ 2006 static inline void mab_mas_cp(struct maple_big_node *b_node, 2007 unsigned char mab_start, unsigned char mab_end, 2008 struct ma_state *mas, bool new_max) 2009 { 2010 int i, j = 0; 2011 enum maple_type mt = mte_node_type(mas->node); 2012 struct maple_node *node = mte_to_node(mas->node); 2013 void __rcu **slots = ma_slots(node, mt); 2014 unsigned long *pivots = ma_pivots(node, mt); 2015 unsigned long *gaps = NULL; 2016 unsigned char end; 2017 2018 if (mab_end - mab_start > mt_pivots[mt]) 2019 mab_end--; 2020 2021 if (!pivots[mt_pivots[mt] - 1]) 2022 slots[mt_pivots[mt]] = NULL; 2023 2024 i = mab_start; 2025 do { 2026 pivots[j++] = b_node->pivot[i++]; 2027 } while (i <= mab_end && likely(b_node->pivot[i])); 2028 2029 memcpy(slots, b_node->slot + mab_start, 2030 sizeof(void *) * (i - mab_start)); 2031 2032 if (new_max) 2033 mas->max = b_node->pivot[i - 1]; 2034 2035 end = j - 1; 2036 if (likely(!ma_is_leaf(mt) && mt_is_alloc(mas->tree))) { 2037 unsigned long max_gap = 0; 2038 unsigned char offset = 0; 2039 2040 gaps = ma_gaps(node, mt); 2041 do { 2042 gaps[--j] = b_node->gap[--i]; 2043 if (gaps[j] > max_gap) { 2044 offset = j; 2045 max_gap = gaps[j]; 2046 } 2047 } while (j); 2048 2049 ma_set_meta(node, mt, offset, end); 2050 } else { 2051 mas_leaf_set_meta(mas, node, pivots, mt, end); 2052 } 2053 } 2054 2055 /* 2056 * mas_bulk_rebalance() - Rebalance the end of a tree after a bulk insert. 2057 * @mas: The maple state 2058 * @end: The maple node end 2059 * @mt: The maple node type 2060 */ 2061 static inline void mas_bulk_rebalance(struct ma_state *mas, unsigned char end, 2062 enum maple_type mt) 2063 { 2064 if (!(mas->mas_flags & MA_STATE_BULK)) 2065 return; 2066 2067 if (mte_is_root(mas->node)) 2068 return; 2069 2070 if (end > mt_min_slots[mt]) { 2071 mas->mas_flags &= ~MA_STATE_REBALANCE; 2072 return; 2073 } 2074 } 2075 2076 /* 2077 * mas_store_b_node() - Store an @entry into the b_node while also copying the 2078 * data from a maple encoded node. 2079 * @wr_mas: the maple write state 2080 * @b_node: the maple_big_node to fill with data 2081 * @offset_end: the offset to end copying 2082 * 2083 * Return: The actual end of the data stored in @b_node 2084 */ 2085 static noinline_for_kasan void mas_store_b_node(struct ma_wr_state *wr_mas, 2086 struct maple_big_node *b_node, unsigned char offset_end) 2087 { 2088 unsigned char slot; 2089 unsigned char b_end; 2090 /* Possible underflow of piv will wrap back to 0 before use. */ 2091 unsigned long piv; 2092 struct ma_state *mas = wr_mas->mas; 2093 2094 b_node->type = wr_mas->type; 2095 b_end = 0; 2096 slot = mas->offset; 2097 if (slot) { 2098 /* Copy start data up to insert. */ 2099 mas_mab_cp(mas, 0, slot - 1, b_node, 0); 2100 b_end = b_node->b_end; 2101 piv = b_node->pivot[b_end - 1]; 2102 } else 2103 piv = mas->min - 1; 2104 2105 if (piv + 1 < mas->index) { 2106 /* Handle range starting after old range */ 2107 b_node->slot[b_end] = wr_mas->content; 2108 if (!wr_mas->content) 2109 b_node->gap[b_end] = mas->index - 1 - piv; 2110 b_node->pivot[b_end++] = mas->index - 1; 2111 } 2112 2113 /* Store the new entry. */ 2114 mas->offset = b_end; 2115 b_node->slot[b_end] = wr_mas->entry; 2116 b_node->pivot[b_end] = mas->last; 2117 2118 /* Appended. */ 2119 if (mas->last >= mas->max) 2120 goto b_end; 2121 2122 /* Handle new range ending before old range ends */ 2123 piv = mas_safe_pivot(mas, wr_mas->pivots, offset_end, wr_mas->type); 2124 if (piv > mas->last) { 2125 if (piv == ULONG_MAX) 2126 mas_bulk_rebalance(mas, b_node->b_end, wr_mas->type); 2127 2128 if (offset_end != slot) 2129 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 2130 offset_end); 2131 2132 b_node->slot[++b_end] = wr_mas->content; 2133 if (!wr_mas->content) 2134 b_node->gap[b_end] = piv - mas->last + 1; 2135 b_node->pivot[b_end] = piv; 2136 } 2137 2138 slot = offset_end + 1; 2139 if (slot > wr_mas->node_end) 2140 goto b_end; 2141 2142 /* Copy end data to the end of the node. */ 2143 mas_mab_cp(mas, slot, wr_mas->node_end + 1, b_node, ++b_end); 2144 b_node->b_end--; 2145 return; 2146 2147 b_end: 2148 b_node->b_end = b_end; 2149 } 2150 2151 /* 2152 * mas_prev_sibling() - Find the previous node with the same parent. 2153 * @mas: the maple state 2154 * 2155 * Return: True if there is a previous sibling, false otherwise. 2156 */ 2157 static inline bool mas_prev_sibling(struct ma_state *mas) 2158 { 2159 unsigned int p_slot = mte_parent_slot(mas->node); 2160 2161 if (mte_is_root(mas->node)) 2162 return false; 2163 2164 if (!p_slot) 2165 return false; 2166 2167 mas_ascend(mas); 2168 mas->offset = p_slot - 1; 2169 mas_descend(mas); 2170 return true; 2171 } 2172 2173 /* 2174 * mas_next_sibling() - Find the next node with the same parent. 2175 * @mas: the maple state 2176 * 2177 * Return: true if there is a next sibling, false otherwise. 2178 */ 2179 static inline bool mas_next_sibling(struct ma_state *mas) 2180 { 2181 MA_STATE(parent, mas->tree, mas->index, mas->last); 2182 2183 if (mte_is_root(mas->node)) 2184 return false; 2185 2186 parent = *mas; 2187 mas_ascend(&parent); 2188 parent.offset = mte_parent_slot(mas->node) + 1; 2189 if (parent.offset > mas_data_end(&parent)) 2190 return false; 2191 2192 *mas = parent; 2193 mas_descend(mas); 2194 return true; 2195 } 2196 2197 /* 2198 * mte_node_or_node() - Return the encoded node or MAS_NONE. 2199 * @enode: The encoded maple node. 2200 * 2201 * Shorthand to avoid setting %NULLs in the tree or maple_subtree_state. 2202 * 2203 * Return: @enode or MAS_NONE 2204 */ 2205 static inline struct maple_enode *mte_node_or_none(struct maple_enode *enode) 2206 { 2207 if (enode) 2208 return enode; 2209 2210 return ma_enode_ptr(MAS_NONE); 2211 } 2212 2213 /* 2214 * mas_wr_node_walk() - Find the correct offset for the index in the @mas. 2215 * @wr_mas: The maple write state 2216 * 2217 * Uses mas_slot_locked() and does not need to worry about dead nodes. 2218 */ 2219 static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas) 2220 { 2221 struct ma_state *mas = wr_mas->mas; 2222 unsigned char count, offset; 2223 2224 if (unlikely(ma_is_dense(wr_mas->type))) { 2225 wr_mas->r_max = wr_mas->r_min = mas->index; 2226 mas->offset = mas->index = mas->min; 2227 return; 2228 } 2229 2230 wr_mas->node = mas_mn(wr_mas->mas); 2231 wr_mas->pivots = ma_pivots(wr_mas->node, wr_mas->type); 2232 count = wr_mas->node_end = ma_data_end(wr_mas->node, wr_mas->type, 2233 wr_mas->pivots, mas->max); 2234 offset = mas->offset; 2235 2236 while (offset < count && mas->index > wr_mas->pivots[offset]) 2237 offset++; 2238 2239 wr_mas->r_max = offset < count ? wr_mas->pivots[offset] : mas->max; 2240 wr_mas->r_min = mas_safe_min(mas, wr_mas->pivots, offset); 2241 wr_mas->offset_end = mas->offset = offset; 2242 } 2243 2244 /* 2245 * mast_rebalance_next() - Rebalance against the next node 2246 * @mast: The maple subtree state 2247 * @old_r: The encoded maple node to the right (next node). 2248 */ 2249 static inline void mast_rebalance_next(struct maple_subtree_state *mast) 2250 { 2251 unsigned char b_end = mast->bn->b_end; 2252 2253 mas_mab_cp(mast->orig_r, 0, mt_slot_count(mast->orig_r->node), 2254 mast->bn, b_end); 2255 mast->orig_r->last = mast->orig_r->max; 2256 } 2257 2258 /* 2259 * mast_rebalance_prev() - Rebalance against the previous node 2260 * @mast: The maple subtree state 2261 * @old_l: The encoded maple node to the left (previous node) 2262 */ 2263 static inline void mast_rebalance_prev(struct maple_subtree_state *mast) 2264 { 2265 unsigned char end = mas_data_end(mast->orig_l) + 1; 2266 unsigned char b_end = mast->bn->b_end; 2267 2268 mab_shift_right(mast->bn, end); 2269 mas_mab_cp(mast->orig_l, 0, end - 1, mast->bn, 0); 2270 mast->l->min = mast->orig_l->min; 2271 mast->orig_l->index = mast->orig_l->min; 2272 mast->bn->b_end = end + b_end; 2273 mast->l->offset += end; 2274 } 2275 2276 /* 2277 * mast_spanning_rebalance() - Rebalance nodes with nearest neighbour favouring 2278 * the node to the right. Checking the nodes to the right then the left at each 2279 * level upwards until root is reached. 2280 * Data is copied into the @mast->bn. 2281 * @mast: The maple_subtree_state. 2282 */ 2283 static inline 2284 bool mast_spanning_rebalance(struct maple_subtree_state *mast) 2285 { 2286 struct ma_state r_tmp = *mast->orig_r; 2287 struct ma_state l_tmp = *mast->orig_l; 2288 unsigned char depth = 0; 2289 2290 r_tmp = *mast->orig_r; 2291 l_tmp = *mast->orig_l; 2292 do { 2293 mas_ascend(mast->orig_r); 2294 mas_ascend(mast->orig_l); 2295 depth++; 2296 if (mast->orig_r->offset < mas_data_end(mast->orig_r)) { 2297 mast->orig_r->offset++; 2298 do { 2299 mas_descend(mast->orig_r); 2300 mast->orig_r->offset = 0; 2301 } while (--depth); 2302 2303 mast_rebalance_next(mast); 2304 *mast->orig_l = l_tmp; 2305 return true; 2306 } else if (mast->orig_l->offset != 0) { 2307 mast->orig_l->offset--; 2308 do { 2309 mas_descend(mast->orig_l); 2310 mast->orig_l->offset = 2311 mas_data_end(mast->orig_l); 2312 } while (--depth); 2313 2314 mast_rebalance_prev(mast); 2315 *mast->orig_r = r_tmp; 2316 return true; 2317 } 2318 } while (!mte_is_root(mast->orig_r->node)); 2319 2320 *mast->orig_r = r_tmp; 2321 *mast->orig_l = l_tmp; 2322 return false; 2323 } 2324 2325 /* 2326 * mast_ascend() - Ascend the original left and right maple states. 2327 * @mast: the maple subtree state. 2328 * 2329 * Ascend the original left and right sides. Set the offsets to point to the 2330 * data already in the new tree (@mast->l and @mast->r). 2331 */ 2332 static inline void mast_ascend(struct maple_subtree_state *mast) 2333 { 2334 MA_WR_STATE(wr_mas, mast->orig_r, NULL); 2335 mas_ascend(mast->orig_l); 2336 mas_ascend(mast->orig_r); 2337 2338 mast->orig_r->offset = 0; 2339 mast->orig_r->index = mast->r->max; 2340 /* last should be larger than or equal to index */ 2341 if (mast->orig_r->last < mast->orig_r->index) 2342 mast->orig_r->last = mast->orig_r->index; 2343 2344 wr_mas.type = mte_node_type(mast->orig_r->node); 2345 mas_wr_node_walk(&wr_mas); 2346 /* Set up the left side of things */ 2347 mast->orig_l->offset = 0; 2348 mast->orig_l->index = mast->l->min; 2349 wr_mas.mas = mast->orig_l; 2350 wr_mas.type = mte_node_type(mast->orig_l->node); 2351 mas_wr_node_walk(&wr_mas); 2352 2353 mast->bn->type = wr_mas.type; 2354 } 2355 2356 /* 2357 * mas_new_ma_node() - Create and return a new maple node. Helper function. 2358 * @mas: the maple state with the allocations. 2359 * @b_node: the maple_big_node with the type encoding. 2360 * 2361 * Use the node type from the maple_big_node to allocate a new node from the 2362 * ma_state. This function exists mainly for code readability. 2363 * 2364 * Return: A new maple encoded node 2365 */ 2366 static inline struct maple_enode 2367 *mas_new_ma_node(struct ma_state *mas, struct maple_big_node *b_node) 2368 { 2369 return mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), b_node->type); 2370 } 2371 2372 /* 2373 * mas_mab_to_node() - Set up right and middle nodes 2374 * 2375 * @mas: the maple state that contains the allocations. 2376 * @b_node: the node which contains the data. 2377 * @left: The pointer which will have the left node 2378 * @right: The pointer which may have the right node 2379 * @middle: the pointer which may have the middle node (rare) 2380 * @mid_split: the split location for the middle node 2381 * 2382 * Return: the split of left. 2383 */ 2384 static inline unsigned char mas_mab_to_node(struct ma_state *mas, 2385 struct maple_big_node *b_node, struct maple_enode **left, 2386 struct maple_enode **right, struct maple_enode **middle, 2387 unsigned char *mid_split, unsigned long min) 2388 { 2389 unsigned char split = 0; 2390 unsigned char slot_count = mt_slots[b_node->type]; 2391 2392 *left = mas_new_ma_node(mas, b_node); 2393 *right = NULL; 2394 *middle = NULL; 2395 *mid_split = 0; 2396 2397 if (b_node->b_end < slot_count) { 2398 split = b_node->b_end; 2399 } else { 2400 split = mab_calc_split(mas, b_node, mid_split, min); 2401 *right = mas_new_ma_node(mas, b_node); 2402 } 2403 2404 if (*mid_split) 2405 *middle = mas_new_ma_node(mas, b_node); 2406 2407 return split; 2408 2409 } 2410 2411 /* 2412 * mab_set_b_end() - Add entry to b_node at b_node->b_end and increment the end 2413 * pointer. 2414 * @b_node - the big node to add the entry 2415 * @mas - the maple state to get the pivot (mas->max) 2416 * @entry - the entry to add, if NULL nothing happens. 2417 */ 2418 static inline void mab_set_b_end(struct maple_big_node *b_node, 2419 struct ma_state *mas, 2420 void *entry) 2421 { 2422 if (!entry) 2423 return; 2424 2425 b_node->slot[b_node->b_end] = entry; 2426 if (mt_is_alloc(mas->tree)) 2427 b_node->gap[b_node->b_end] = mas_max_gap(mas); 2428 b_node->pivot[b_node->b_end++] = mas->max; 2429 } 2430 2431 /* 2432 * mas_set_split_parent() - combine_then_separate helper function. Sets the parent 2433 * of @mas->node to either @left or @right, depending on @slot and @split 2434 * 2435 * @mas - the maple state with the node that needs a parent 2436 * @left - possible parent 1 2437 * @right - possible parent 2 2438 * @slot - the slot the mas->node was placed 2439 * @split - the split location between @left and @right 2440 */ 2441 static inline void mas_set_split_parent(struct ma_state *mas, 2442 struct maple_enode *left, 2443 struct maple_enode *right, 2444 unsigned char *slot, unsigned char split) 2445 { 2446 if (mas_is_none(mas)) 2447 return; 2448 2449 if ((*slot) <= split) 2450 mas_set_parent(mas, mas->node, left, *slot); 2451 else if (right) 2452 mas_set_parent(mas, mas->node, right, (*slot) - split - 1); 2453 2454 (*slot)++; 2455 } 2456 2457 /* 2458 * mte_mid_split_check() - Check if the next node passes the mid-split 2459 * @**l: Pointer to left encoded maple node. 2460 * @**m: Pointer to middle encoded maple node. 2461 * @**r: Pointer to right encoded maple node. 2462 * @slot: The offset 2463 * @*split: The split location. 2464 * @mid_split: The middle split. 2465 */ 2466 static inline void mte_mid_split_check(struct maple_enode **l, 2467 struct maple_enode **r, 2468 struct maple_enode *right, 2469 unsigned char slot, 2470 unsigned char *split, 2471 unsigned char mid_split) 2472 { 2473 if (*r == right) 2474 return; 2475 2476 if (slot < mid_split) 2477 return; 2478 2479 *l = *r; 2480 *r = right; 2481 *split = mid_split; 2482 } 2483 2484 /* 2485 * mast_set_split_parents() - Helper function to set three nodes parents. Slot 2486 * is taken from @mast->l. 2487 * @mast - the maple subtree state 2488 * @left - the left node 2489 * @right - the right node 2490 * @split - the split location. 2491 */ 2492 static inline void mast_set_split_parents(struct maple_subtree_state *mast, 2493 struct maple_enode *left, 2494 struct maple_enode *middle, 2495 struct maple_enode *right, 2496 unsigned char split, 2497 unsigned char mid_split) 2498 { 2499 unsigned char slot; 2500 struct maple_enode *l = left; 2501 struct maple_enode *r = right; 2502 2503 if (mas_is_none(mast->l)) 2504 return; 2505 2506 if (middle) 2507 r = middle; 2508 2509 slot = mast->l->offset; 2510 2511 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2512 mas_set_split_parent(mast->l, l, r, &slot, split); 2513 2514 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2515 mas_set_split_parent(mast->m, l, r, &slot, split); 2516 2517 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2518 mas_set_split_parent(mast->r, l, r, &slot, split); 2519 } 2520 2521 /* 2522 * mas_topiary_node() - Dispose of a singe node 2523 * @mas: The maple state for pushing nodes 2524 * @enode: The encoded maple node 2525 * @in_rcu: If the tree is in rcu mode 2526 * 2527 * The node will either be RCU freed or pushed back on the maple state. 2528 */ 2529 static inline void mas_topiary_node(struct ma_state *mas, 2530 struct maple_enode *enode, bool in_rcu) 2531 { 2532 struct maple_node *tmp; 2533 2534 if (enode == MAS_NONE) 2535 return; 2536 2537 tmp = mte_to_node(enode); 2538 mte_set_node_dead(enode); 2539 if (in_rcu) 2540 ma_free_rcu(tmp); 2541 else 2542 mas_push_node(mas, tmp); 2543 } 2544 2545 /* 2546 * mas_topiary_replace() - Replace the data with new data, then repair the 2547 * parent links within the new tree. Iterate over the dead sub-tree and collect 2548 * the dead subtrees and topiary the nodes that are no longer of use. 2549 * 2550 * The new tree will have up to three children with the correct parent. Keep 2551 * track of the new entries as they need to be followed to find the next level 2552 * of new entries. 2553 * 2554 * The old tree will have up to three children with the old parent. Keep track 2555 * of the old entries as they may have more nodes below replaced. Nodes within 2556 * [index, last] are dead subtrees, others need to be freed and followed. 2557 * 2558 * @mas: The maple state pointing at the new data 2559 * @old_enode: The maple encoded node being replaced 2560 * 2561 */ 2562 static inline void mas_topiary_replace(struct ma_state *mas, 2563 struct maple_enode *old_enode) 2564 { 2565 struct ma_state tmp[3], tmp_next[3]; 2566 MA_TOPIARY(subtrees, mas->tree); 2567 bool in_rcu; 2568 int i, n; 2569 2570 /* Place data in tree & then mark node as old */ 2571 mas_put_in_tree(mas, old_enode); 2572 2573 /* Update the parent pointers in the tree */ 2574 tmp[0] = *mas; 2575 tmp[0].offset = 0; 2576 tmp[1].node = MAS_NONE; 2577 tmp[2].node = MAS_NONE; 2578 while (!mte_is_leaf(tmp[0].node)) { 2579 n = 0; 2580 for (i = 0; i < 3; i++) { 2581 if (mas_is_none(&tmp[i])) 2582 continue; 2583 2584 while (n < 3) { 2585 if (!mas_find_child(&tmp[i], &tmp_next[n])) 2586 break; 2587 n++; 2588 } 2589 2590 mas_adopt_children(&tmp[i], tmp[i].node); 2591 } 2592 2593 if (MAS_WARN_ON(mas, n == 0)) 2594 break; 2595 2596 while (n < 3) 2597 tmp_next[n++].node = MAS_NONE; 2598 2599 for (i = 0; i < 3; i++) 2600 tmp[i] = tmp_next[i]; 2601 } 2602 2603 /* Collect the old nodes that need to be discarded */ 2604 if (mte_is_leaf(old_enode)) 2605 return mas_free(mas, old_enode); 2606 2607 tmp[0] = *mas; 2608 tmp[0].offset = 0; 2609 tmp[0].node = old_enode; 2610 tmp[1].node = MAS_NONE; 2611 tmp[2].node = MAS_NONE; 2612 in_rcu = mt_in_rcu(mas->tree); 2613 do { 2614 n = 0; 2615 for (i = 0; i < 3; i++) { 2616 if (mas_is_none(&tmp[i])) 2617 continue; 2618 2619 while (n < 3) { 2620 if (!mas_find_child(&tmp[i], &tmp_next[n])) 2621 break; 2622 2623 if ((tmp_next[n].min >= tmp_next->index) && 2624 (tmp_next[n].max <= tmp_next->last)) { 2625 mat_add(&subtrees, tmp_next[n].node); 2626 tmp_next[n].node = MAS_NONE; 2627 } else { 2628 n++; 2629 } 2630 } 2631 } 2632 2633 if (MAS_WARN_ON(mas, n == 0)) 2634 break; 2635 2636 while (n < 3) 2637 tmp_next[n++].node = MAS_NONE; 2638 2639 for (i = 0; i < 3; i++) { 2640 mas_topiary_node(mas, tmp[i].node, in_rcu); 2641 tmp[i] = tmp_next[i]; 2642 } 2643 } while (!mte_is_leaf(tmp[0].node)); 2644 2645 for (i = 0; i < 3; i++) 2646 mas_topiary_node(mas, tmp[i].node, in_rcu); 2647 2648 mas_mat_destroy(mas, &subtrees); 2649 } 2650 2651 /* 2652 * mas_wmb_replace() - Write memory barrier and replace 2653 * @mas: The maple state 2654 * @old: The old maple encoded node that is being replaced. 2655 * 2656 * Updates gap as necessary. 2657 */ 2658 static inline void mas_wmb_replace(struct ma_state *mas, 2659 struct maple_enode *old_enode) 2660 { 2661 /* Insert the new data in the tree */ 2662 mas_topiary_replace(mas, old_enode); 2663 2664 if (mte_is_leaf(mas->node)) 2665 return; 2666 2667 mas_update_gap(mas); 2668 } 2669 2670 /* 2671 * mast_cp_to_nodes() - Copy data out to nodes. 2672 * @mast: The maple subtree state 2673 * @left: The left encoded maple node 2674 * @middle: The middle encoded maple node 2675 * @right: The right encoded maple node 2676 * @split: The location to split between left and (middle ? middle : right) 2677 * @mid_split: The location to split between middle and right. 2678 */ 2679 static inline void mast_cp_to_nodes(struct maple_subtree_state *mast, 2680 struct maple_enode *left, struct maple_enode *middle, 2681 struct maple_enode *right, unsigned char split, unsigned char mid_split) 2682 { 2683 bool new_lmax = true; 2684 2685 mast->l->node = mte_node_or_none(left); 2686 mast->m->node = mte_node_or_none(middle); 2687 mast->r->node = mte_node_or_none(right); 2688 2689 mast->l->min = mast->orig_l->min; 2690 if (split == mast->bn->b_end) { 2691 mast->l->max = mast->orig_r->max; 2692 new_lmax = false; 2693 } 2694 2695 mab_mas_cp(mast->bn, 0, split, mast->l, new_lmax); 2696 2697 if (middle) { 2698 mab_mas_cp(mast->bn, 1 + split, mid_split, mast->m, true); 2699 mast->m->min = mast->bn->pivot[split] + 1; 2700 split = mid_split; 2701 } 2702 2703 mast->r->max = mast->orig_r->max; 2704 if (right) { 2705 mab_mas_cp(mast->bn, 1 + split, mast->bn->b_end, mast->r, false); 2706 mast->r->min = mast->bn->pivot[split] + 1; 2707 } 2708 } 2709 2710 /* 2711 * mast_combine_cp_left - Copy in the original left side of the tree into the 2712 * combined data set in the maple subtree state big node. 2713 * @mast: The maple subtree state 2714 */ 2715 static inline void mast_combine_cp_left(struct maple_subtree_state *mast) 2716 { 2717 unsigned char l_slot = mast->orig_l->offset; 2718 2719 if (!l_slot) 2720 return; 2721 2722 mas_mab_cp(mast->orig_l, 0, l_slot - 1, mast->bn, 0); 2723 } 2724 2725 /* 2726 * mast_combine_cp_right: Copy in the original right side of the tree into the 2727 * combined data set in the maple subtree state big node. 2728 * @mast: The maple subtree state 2729 */ 2730 static inline void mast_combine_cp_right(struct maple_subtree_state *mast) 2731 { 2732 if (mast->bn->pivot[mast->bn->b_end - 1] >= mast->orig_r->max) 2733 return; 2734 2735 mas_mab_cp(mast->orig_r, mast->orig_r->offset + 1, 2736 mt_slot_count(mast->orig_r->node), mast->bn, 2737 mast->bn->b_end); 2738 mast->orig_r->last = mast->orig_r->max; 2739 } 2740 2741 /* 2742 * mast_sufficient: Check if the maple subtree state has enough data in the big 2743 * node to create at least one sufficient node 2744 * @mast: the maple subtree state 2745 */ 2746 static inline bool mast_sufficient(struct maple_subtree_state *mast) 2747 { 2748 if (mast->bn->b_end > mt_min_slot_count(mast->orig_l->node)) 2749 return true; 2750 2751 return false; 2752 } 2753 2754 /* 2755 * mast_overflow: Check if there is too much data in the subtree state for a 2756 * single node. 2757 * @mast: The maple subtree state 2758 */ 2759 static inline bool mast_overflow(struct maple_subtree_state *mast) 2760 { 2761 if (mast->bn->b_end >= mt_slot_count(mast->orig_l->node)) 2762 return true; 2763 2764 return false; 2765 } 2766 2767 static inline void *mtree_range_walk(struct ma_state *mas) 2768 { 2769 unsigned long *pivots; 2770 unsigned char offset; 2771 struct maple_node *node; 2772 struct maple_enode *next, *last; 2773 enum maple_type type; 2774 void __rcu **slots; 2775 unsigned char end; 2776 unsigned long max, min; 2777 unsigned long prev_max, prev_min; 2778 2779 next = mas->node; 2780 min = mas->min; 2781 max = mas->max; 2782 do { 2783 offset = 0; 2784 last = next; 2785 node = mte_to_node(next); 2786 type = mte_node_type(next); 2787 pivots = ma_pivots(node, type); 2788 end = ma_data_end(node, type, pivots, max); 2789 if (unlikely(ma_dead_node(node))) 2790 goto dead_node; 2791 2792 if (pivots[offset] >= mas->index) { 2793 prev_max = max; 2794 prev_min = min; 2795 max = pivots[offset]; 2796 goto next; 2797 } 2798 2799 do { 2800 offset++; 2801 } while ((offset < end) && (pivots[offset] < mas->index)); 2802 2803 prev_min = min; 2804 min = pivots[offset - 1] + 1; 2805 prev_max = max; 2806 if (likely(offset < end && pivots[offset])) 2807 max = pivots[offset]; 2808 2809 next: 2810 slots = ma_slots(node, type); 2811 next = mt_slot(mas->tree, slots, offset); 2812 if (unlikely(ma_dead_node(node))) 2813 goto dead_node; 2814 } while (!ma_is_leaf(type)); 2815 2816 mas->offset = offset; 2817 mas->index = min; 2818 mas->last = max; 2819 mas->min = prev_min; 2820 mas->max = prev_max; 2821 mas->node = last; 2822 return (void *)next; 2823 2824 dead_node: 2825 mas_reset(mas); 2826 return NULL; 2827 } 2828 2829 /* 2830 * mas_spanning_rebalance() - Rebalance across two nodes which may not be peers. 2831 * @mas: The starting maple state 2832 * @mast: The maple_subtree_state, keeps track of 4 maple states. 2833 * @count: The estimated count of iterations needed. 2834 * 2835 * Follow the tree upwards from @l_mas and @r_mas for @count, or until the root 2836 * is hit. First @b_node is split into two entries which are inserted into the 2837 * next iteration of the loop. @b_node is returned populated with the final 2838 * iteration. @mas is used to obtain allocations. orig_l_mas keeps track of the 2839 * nodes that will remain active by using orig_l_mas->index and orig_l_mas->last 2840 * to account of what has been copied into the new sub-tree. The update of 2841 * orig_l_mas->last is used in mas_consume to find the slots that will need to 2842 * be either freed or destroyed. orig_l_mas->depth keeps track of the height of 2843 * the new sub-tree in case the sub-tree becomes the full tree. 2844 * 2845 * Return: the number of elements in b_node during the last loop. 2846 */ 2847 static int mas_spanning_rebalance(struct ma_state *mas, 2848 struct maple_subtree_state *mast, unsigned char count) 2849 { 2850 unsigned char split, mid_split; 2851 unsigned char slot = 0; 2852 struct maple_enode *left = NULL, *middle = NULL, *right = NULL; 2853 struct maple_enode *old_enode; 2854 2855 MA_STATE(l_mas, mas->tree, mas->index, mas->index); 2856 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 2857 MA_STATE(m_mas, mas->tree, mas->index, mas->index); 2858 2859 /* 2860 * The tree needs to be rebalanced and leaves need to be kept at the same level. 2861 * Rebalancing is done by use of the ``struct maple_topiary``. 2862 */ 2863 mast->l = &l_mas; 2864 mast->m = &m_mas; 2865 mast->r = &r_mas; 2866 l_mas.node = r_mas.node = m_mas.node = MAS_NONE; 2867 2868 /* Check if this is not root and has sufficient data. */ 2869 if (((mast->orig_l->min != 0) || (mast->orig_r->max != ULONG_MAX)) && 2870 unlikely(mast->bn->b_end <= mt_min_slots[mast->bn->type])) 2871 mast_spanning_rebalance(mast); 2872 2873 l_mas.depth = 0; 2874 2875 /* 2876 * Each level of the tree is examined and balanced, pushing data to the left or 2877 * right, or rebalancing against left or right nodes is employed to avoid 2878 * rippling up the tree to limit the amount of churn. Once a new sub-section of 2879 * the tree is created, there may be a mix of new and old nodes. The old nodes 2880 * will have the incorrect parent pointers and currently be in two trees: the 2881 * original tree and the partially new tree. To remedy the parent pointers in 2882 * the old tree, the new data is swapped into the active tree and a walk down 2883 * the tree is performed and the parent pointers are updated. 2884 * See mas_topiary_replace() for more information. 2885 */ 2886 while (count--) { 2887 mast->bn->b_end--; 2888 mast->bn->type = mte_node_type(mast->orig_l->node); 2889 split = mas_mab_to_node(mas, mast->bn, &left, &right, &middle, 2890 &mid_split, mast->orig_l->min); 2891 mast_set_split_parents(mast, left, middle, right, split, 2892 mid_split); 2893 mast_cp_to_nodes(mast, left, middle, right, split, mid_split); 2894 2895 /* 2896 * Copy data from next level in the tree to mast->bn from next 2897 * iteration 2898 */ 2899 memset(mast->bn, 0, sizeof(struct maple_big_node)); 2900 mast->bn->type = mte_node_type(left); 2901 l_mas.depth++; 2902 2903 /* Root already stored in l->node. */ 2904 if (mas_is_root_limits(mast->l)) 2905 goto new_root; 2906 2907 mast_ascend(mast); 2908 mast_combine_cp_left(mast); 2909 l_mas.offset = mast->bn->b_end; 2910 mab_set_b_end(mast->bn, &l_mas, left); 2911 mab_set_b_end(mast->bn, &m_mas, middle); 2912 mab_set_b_end(mast->bn, &r_mas, right); 2913 2914 /* Copy anything necessary out of the right node. */ 2915 mast_combine_cp_right(mast); 2916 mast->orig_l->last = mast->orig_l->max; 2917 2918 if (mast_sufficient(mast)) 2919 continue; 2920 2921 if (mast_overflow(mast)) 2922 continue; 2923 2924 /* May be a new root stored in mast->bn */ 2925 if (mas_is_root_limits(mast->orig_l)) 2926 break; 2927 2928 mast_spanning_rebalance(mast); 2929 2930 /* rebalancing from other nodes may require another loop. */ 2931 if (!count) 2932 count++; 2933 } 2934 2935 l_mas.node = mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), 2936 mte_node_type(mast->orig_l->node)); 2937 l_mas.depth++; 2938 mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, &l_mas, true); 2939 mas_set_parent(mas, left, l_mas.node, slot); 2940 if (middle) 2941 mas_set_parent(mas, middle, l_mas.node, ++slot); 2942 2943 if (right) 2944 mas_set_parent(mas, right, l_mas.node, ++slot); 2945 2946 if (mas_is_root_limits(mast->l)) { 2947 new_root: 2948 mas_mn(mast->l)->parent = ma_parent_ptr(mas_tree_parent(mas)); 2949 while (!mte_is_root(mast->orig_l->node)) 2950 mast_ascend(mast); 2951 } else { 2952 mas_mn(&l_mas)->parent = mas_mn(mast->orig_l)->parent; 2953 } 2954 2955 old_enode = mast->orig_l->node; 2956 mas->depth = l_mas.depth; 2957 mas->node = l_mas.node; 2958 mas->min = l_mas.min; 2959 mas->max = l_mas.max; 2960 mas->offset = l_mas.offset; 2961 mas_wmb_replace(mas, old_enode); 2962 mtree_range_walk(mas); 2963 return mast->bn->b_end; 2964 } 2965 2966 /* 2967 * mas_rebalance() - Rebalance a given node. 2968 * @mas: The maple state 2969 * @b_node: The big maple node. 2970 * 2971 * Rebalance two nodes into a single node or two new nodes that are sufficient. 2972 * Continue upwards until tree is sufficient. 2973 * 2974 * Return: the number of elements in b_node during the last loop. 2975 */ 2976 static inline int mas_rebalance(struct ma_state *mas, 2977 struct maple_big_node *b_node) 2978 { 2979 char empty_count = mas_mt_height(mas); 2980 struct maple_subtree_state mast; 2981 unsigned char shift, b_end = ++b_node->b_end; 2982 2983 MA_STATE(l_mas, mas->tree, mas->index, mas->last); 2984 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 2985 2986 trace_ma_op(__func__, mas); 2987 2988 /* 2989 * Rebalancing occurs if a node is insufficient. Data is rebalanced 2990 * against the node to the right if it exists, otherwise the node to the 2991 * left of this node is rebalanced against this node. If rebalancing 2992 * causes just one node to be produced instead of two, then the parent 2993 * is also examined and rebalanced if it is insufficient. Every level 2994 * tries to combine the data in the same way. If one node contains the 2995 * entire range of the tree, then that node is used as a new root node. 2996 */ 2997 mas_node_count(mas, empty_count * 2 - 1); 2998 if (mas_is_err(mas)) 2999 return 0; 3000 3001 mast.orig_l = &l_mas; 3002 mast.orig_r = &r_mas; 3003 mast.bn = b_node; 3004 mast.bn->type = mte_node_type(mas->node); 3005 3006 l_mas = r_mas = *mas; 3007 3008 if (mas_next_sibling(&r_mas)) { 3009 mas_mab_cp(&r_mas, 0, mt_slot_count(r_mas.node), b_node, b_end); 3010 r_mas.last = r_mas.index = r_mas.max; 3011 } else { 3012 mas_prev_sibling(&l_mas); 3013 shift = mas_data_end(&l_mas) + 1; 3014 mab_shift_right(b_node, shift); 3015 mas->offset += shift; 3016 mas_mab_cp(&l_mas, 0, shift - 1, b_node, 0); 3017 b_node->b_end = shift + b_end; 3018 l_mas.index = l_mas.last = l_mas.min; 3019 } 3020 3021 return mas_spanning_rebalance(mas, &mast, empty_count); 3022 } 3023 3024 /* 3025 * mas_destroy_rebalance() - Rebalance left-most node while destroying the maple 3026 * state. 3027 * @mas: The maple state 3028 * @end: The end of the left-most node. 3029 * 3030 * During a mass-insert event (such as forking), it may be necessary to 3031 * rebalance the left-most node when it is not sufficient. 3032 */ 3033 static inline void mas_destroy_rebalance(struct ma_state *mas, unsigned char end) 3034 { 3035 enum maple_type mt = mte_node_type(mas->node); 3036 struct maple_node reuse, *newnode, *parent, *new_left, *left, *node; 3037 struct maple_enode *eparent, *old_eparent; 3038 unsigned char offset, tmp, split = mt_slots[mt] / 2; 3039 void __rcu **l_slots, **slots; 3040 unsigned long *l_pivs, *pivs, gap; 3041 bool in_rcu = mt_in_rcu(mas->tree); 3042 3043 MA_STATE(l_mas, mas->tree, mas->index, mas->last); 3044 3045 l_mas = *mas; 3046 mas_prev_sibling(&l_mas); 3047 3048 /* set up node. */ 3049 if (in_rcu) { 3050 /* Allocate for both left and right as well as parent. */ 3051 mas_node_count(mas, 3); 3052 if (mas_is_err(mas)) 3053 return; 3054 3055 newnode = mas_pop_node(mas); 3056 } else { 3057 newnode = &reuse; 3058 } 3059 3060 node = mas_mn(mas); 3061 newnode->parent = node->parent; 3062 slots = ma_slots(newnode, mt); 3063 pivs = ma_pivots(newnode, mt); 3064 left = mas_mn(&l_mas); 3065 l_slots = ma_slots(left, mt); 3066 l_pivs = ma_pivots(left, mt); 3067 if (!l_slots[split]) 3068 split++; 3069 tmp = mas_data_end(&l_mas) - split; 3070 3071 memcpy(slots, l_slots + split + 1, sizeof(void *) * tmp); 3072 memcpy(pivs, l_pivs + split + 1, sizeof(unsigned long) * tmp); 3073 pivs[tmp] = l_mas.max; 3074 memcpy(slots + tmp, ma_slots(node, mt), sizeof(void *) * end); 3075 memcpy(pivs + tmp, ma_pivots(node, mt), sizeof(unsigned long) * end); 3076 3077 l_mas.max = l_pivs[split]; 3078 mas->min = l_mas.max + 1; 3079 old_eparent = mt_mk_node(mte_parent(l_mas.node), 3080 mas_parent_type(&l_mas, l_mas.node)); 3081 tmp += end; 3082 if (!in_rcu) { 3083 unsigned char max_p = mt_pivots[mt]; 3084 unsigned char max_s = mt_slots[mt]; 3085 3086 if (tmp < max_p) 3087 memset(pivs + tmp, 0, 3088 sizeof(unsigned long) * (max_p - tmp)); 3089 3090 if (tmp < mt_slots[mt]) 3091 memset(slots + tmp, 0, sizeof(void *) * (max_s - tmp)); 3092 3093 memcpy(node, newnode, sizeof(struct maple_node)); 3094 ma_set_meta(node, mt, 0, tmp - 1); 3095 mte_set_pivot(old_eparent, mte_parent_slot(l_mas.node), 3096 l_pivs[split]); 3097 3098 /* Remove data from l_pivs. */ 3099 tmp = split + 1; 3100 memset(l_pivs + tmp, 0, sizeof(unsigned long) * (max_p - tmp)); 3101 memset(l_slots + tmp, 0, sizeof(void *) * (max_s - tmp)); 3102 ma_set_meta(left, mt, 0, split); 3103 eparent = old_eparent; 3104 3105 goto done; 3106 } 3107 3108 /* RCU requires replacing both l_mas, mas, and parent. */ 3109 mas->node = mt_mk_node(newnode, mt); 3110 ma_set_meta(newnode, mt, 0, tmp); 3111 3112 new_left = mas_pop_node(mas); 3113 new_left->parent = left->parent; 3114 mt = mte_node_type(l_mas.node); 3115 slots = ma_slots(new_left, mt); 3116 pivs = ma_pivots(new_left, mt); 3117 memcpy(slots, l_slots, sizeof(void *) * split); 3118 memcpy(pivs, l_pivs, sizeof(unsigned long) * split); 3119 ma_set_meta(new_left, mt, 0, split); 3120 l_mas.node = mt_mk_node(new_left, mt); 3121 3122 /* replace parent. */ 3123 offset = mte_parent_slot(mas->node); 3124 mt = mas_parent_type(&l_mas, l_mas.node); 3125 parent = mas_pop_node(mas); 3126 slots = ma_slots(parent, mt); 3127 pivs = ma_pivots(parent, mt); 3128 memcpy(parent, mte_to_node(old_eparent), sizeof(struct maple_node)); 3129 rcu_assign_pointer(slots[offset], mas->node); 3130 rcu_assign_pointer(slots[offset - 1], l_mas.node); 3131 pivs[offset - 1] = l_mas.max; 3132 eparent = mt_mk_node(parent, mt); 3133 done: 3134 gap = mas_leaf_max_gap(mas); 3135 mte_set_gap(eparent, mte_parent_slot(mas->node), gap); 3136 gap = mas_leaf_max_gap(&l_mas); 3137 mte_set_gap(eparent, mte_parent_slot(l_mas.node), gap); 3138 mas_ascend(mas); 3139 3140 if (in_rcu) { 3141 mas_replace_node(mas, old_eparent); 3142 mas_adopt_children(mas, mas->node); 3143 } 3144 3145 mas_update_gap(mas); 3146 } 3147 3148 /* 3149 * mas_split_final_node() - Split the final node in a subtree operation. 3150 * @mast: the maple subtree state 3151 * @mas: The maple state 3152 * @height: The height of the tree in case it's a new root. 3153 */ 3154 static inline bool mas_split_final_node(struct maple_subtree_state *mast, 3155 struct ma_state *mas, int height) 3156 { 3157 struct maple_enode *ancestor; 3158 3159 if (mte_is_root(mas->node)) { 3160 if (mt_is_alloc(mas->tree)) 3161 mast->bn->type = maple_arange_64; 3162 else 3163 mast->bn->type = maple_range_64; 3164 mas->depth = height; 3165 } 3166 /* 3167 * Only a single node is used here, could be root. 3168 * The Big_node data should just fit in a single node. 3169 */ 3170 ancestor = mas_new_ma_node(mas, mast->bn); 3171 mas_set_parent(mas, mast->l->node, ancestor, mast->l->offset); 3172 mas_set_parent(mas, mast->r->node, ancestor, mast->r->offset); 3173 mte_to_node(ancestor)->parent = mas_mn(mas)->parent; 3174 3175 mast->l->node = ancestor; 3176 mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, mast->l, true); 3177 mas->offset = mast->bn->b_end - 1; 3178 return true; 3179 } 3180 3181 /* 3182 * mast_fill_bnode() - Copy data into the big node in the subtree state 3183 * @mast: The maple subtree state 3184 * @mas: the maple state 3185 * @skip: The number of entries to skip for new nodes insertion. 3186 */ 3187 static inline void mast_fill_bnode(struct maple_subtree_state *mast, 3188 struct ma_state *mas, 3189 unsigned char skip) 3190 { 3191 bool cp = true; 3192 unsigned char split; 3193 3194 memset(mast->bn->gap, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->gap)); 3195 memset(mast->bn->slot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->slot)); 3196 memset(mast->bn->pivot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->pivot)); 3197 mast->bn->b_end = 0; 3198 3199 if (mte_is_root(mas->node)) { 3200 cp = false; 3201 } else { 3202 mas_ascend(mas); 3203 mas->offset = mte_parent_slot(mas->node); 3204 } 3205 3206 if (cp && mast->l->offset) 3207 mas_mab_cp(mas, 0, mast->l->offset - 1, mast->bn, 0); 3208 3209 split = mast->bn->b_end; 3210 mab_set_b_end(mast->bn, mast->l, mast->l->node); 3211 mast->r->offset = mast->bn->b_end; 3212 mab_set_b_end(mast->bn, mast->r, mast->r->node); 3213 if (mast->bn->pivot[mast->bn->b_end - 1] == mas->max) 3214 cp = false; 3215 3216 if (cp) 3217 mas_mab_cp(mas, split + skip, mt_slot_count(mas->node) - 1, 3218 mast->bn, mast->bn->b_end); 3219 3220 mast->bn->b_end--; 3221 mast->bn->type = mte_node_type(mas->node); 3222 } 3223 3224 /* 3225 * mast_split_data() - Split the data in the subtree state big node into regular 3226 * nodes. 3227 * @mast: The maple subtree state 3228 * @mas: The maple state 3229 * @split: The location to split the big node 3230 */ 3231 static inline void mast_split_data(struct maple_subtree_state *mast, 3232 struct ma_state *mas, unsigned char split) 3233 { 3234 unsigned char p_slot; 3235 3236 mab_mas_cp(mast->bn, 0, split, mast->l, true); 3237 mte_set_pivot(mast->r->node, 0, mast->r->max); 3238 mab_mas_cp(mast->bn, split + 1, mast->bn->b_end, mast->r, false); 3239 mast->l->offset = mte_parent_slot(mas->node); 3240 mast->l->max = mast->bn->pivot[split]; 3241 mast->r->min = mast->l->max + 1; 3242 if (mte_is_leaf(mas->node)) 3243 return; 3244 3245 p_slot = mast->orig_l->offset; 3246 mas_set_split_parent(mast->orig_l, mast->l->node, mast->r->node, 3247 &p_slot, split); 3248 mas_set_split_parent(mast->orig_r, mast->l->node, mast->r->node, 3249 &p_slot, split); 3250 } 3251 3252 /* 3253 * mas_push_data() - Instead of splitting a node, it is beneficial to push the 3254 * data to the right or left node if there is room. 3255 * @mas: The maple state 3256 * @height: The current height of the maple state 3257 * @mast: The maple subtree state 3258 * @left: Push left or not. 3259 * 3260 * Keeping the height of the tree low means faster lookups. 3261 * 3262 * Return: True if pushed, false otherwise. 3263 */ 3264 static inline bool mas_push_data(struct ma_state *mas, int height, 3265 struct maple_subtree_state *mast, bool left) 3266 { 3267 unsigned char slot_total = mast->bn->b_end; 3268 unsigned char end, space, split; 3269 3270 MA_STATE(tmp_mas, mas->tree, mas->index, mas->last); 3271 tmp_mas = *mas; 3272 tmp_mas.depth = mast->l->depth; 3273 3274 if (left && !mas_prev_sibling(&tmp_mas)) 3275 return false; 3276 else if (!left && !mas_next_sibling(&tmp_mas)) 3277 return false; 3278 3279 end = mas_data_end(&tmp_mas); 3280 slot_total += end; 3281 space = 2 * mt_slot_count(mas->node) - 2; 3282 /* -2 instead of -1 to ensure there isn't a triple split */ 3283 if (ma_is_leaf(mast->bn->type)) 3284 space--; 3285 3286 if (mas->max == ULONG_MAX) 3287 space--; 3288 3289 if (slot_total >= space) 3290 return false; 3291 3292 /* Get the data; Fill mast->bn */ 3293 mast->bn->b_end++; 3294 if (left) { 3295 mab_shift_right(mast->bn, end + 1); 3296 mas_mab_cp(&tmp_mas, 0, end, mast->bn, 0); 3297 mast->bn->b_end = slot_total + 1; 3298 } else { 3299 mas_mab_cp(&tmp_mas, 0, end, mast->bn, mast->bn->b_end); 3300 } 3301 3302 /* Configure mast for splitting of mast->bn */ 3303 split = mt_slots[mast->bn->type] - 2; 3304 if (left) { 3305 /* Switch mas to prev node */ 3306 *mas = tmp_mas; 3307 /* Start using mast->l for the left side. */ 3308 tmp_mas.node = mast->l->node; 3309 *mast->l = tmp_mas; 3310 } else { 3311 tmp_mas.node = mast->r->node; 3312 *mast->r = tmp_mas; 3313 split = slot_total - split; 3314 } 3315 split = mab_no_null_split(mast->bn, split, mt_slots[mast->bn->type]); 3316 /* Update parent slot for split calculation. */ 3317 if (left) 3318 mast->orig_l->offset += end + 1; 3319 3320 mast_split_data(mast, mas, split); 3321 mast_fill_bnode(mast, mas, 2); 3322 mas_split_final_node(mast, mas, height + 1); 3323 return true; 3324 } 3325 3326 /* 3327 * mas_split() - Split data that is too big for one node into two. 3328 * @mas: The maple state 3329 * @b_node: The maple big node 3330 * Return: 1 on success, 0 on failure. 3331 */ 3332 static int mas_split(struct ma_state *mas, struct maple_big_node *b_node) 3333 { 3334 struct maple_subtree_state mast; 3335 int height = 0; 3336 unsigned char mid_split, split = 0; 3337 struct maple_enode *old; 3338 3339 /* 3340 * Splitting is handled differently from any other B-tree; the Maple 3341 * Tree splits upwards. Splitting up means that the split operation 3342 * occurs when the walk of the tree hits the leaves and not on the way 3343 * down. The reason for splitting up is that it is impossible to know 3344 * how much space will be needed until the leaf is (or leaves are) 3345 * reached. Since overwriting data is allowed and a range could 3346 * overwrite more than one range or result in changing one entry into 3 3347 * entries, it is impossible to know if a split is required until the 3348 * data is examined. 3349 * 3350 * Splitting is a balancing act between keeping allocations to a minimum 3351 * and avoiding a 'jitter' event where a tree is expanded to make room 3352 * for an entry followed by a contraction when the entry is removed. To 3353 * accomplish the balance, there are empty slots remaining in both left 3354 * and right nodes after a split. 3355 */ 3356 MA_STATE(l_mas, mas->tree, mas->index, mas->last); 3357 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 3358 MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last); 3359 MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last); 3360 3361 trace_ma_op(__func__, mas); 3362 mas->depth = mas_mt_height(mas); 3363 /* Allocation failures will happen early. */ 3364 mas_node_count(mas, 1 + mas->depth * 2); 3365 if (mas_is_err(mas)) 3366 return 0; 3367 3368 mast.l = &l_mas; 3369 mast.r = &r_mas; 3370 mast.orig_l = &prev_l_mas; 3371 mast.orig_r = &prev_r_mas; 3372 mast.bn = b_node; 3373 3374 while (height++ <= mas->depth) { 3375 if (mt_slots[b_node->type] > b_node->b_end) { 3376 mas_split_final_node(&mast, mas, height); 3377 break; 3378 } 3379 3380 l_mas = r_mas = *mas; 3381 l_mas.node = mas_new_ma_node(mas, b_node); 3382 r_mas.node = mas_new_ma_node(mas, b_node); 3383 /* 3384 * Another way that 'jitter' is avoided is to terminate a split up early if the 3385 * left or right node has space to spare. This is referred to as "pushing left" 3386 * or "pushing right" and is similar to the B* tree, except the nodes left or 3387 * right can rarely be reused due to RCU, but the ripple upwards is halted which 3388 * is a significant savings. 3389 */ 3390 /* Try to push left. */ 3391 if (mas_push_data(mas, height, &mast, true)) 3392 break; 3393 3394 /* Try to push right. */ 3395 if (mas_push_data(mas, height, &mast, false)) 3396 break; 3397 3398 split = mab_calc_split(mas, b_node, &mid_split, prev_l_mas.min); 3399 mast_split_data(&mast, mas, split); 3400 /* 3401 * Usually correct, mab_mas_cp in the above call overwrites 3402 * r->max. 3403 */ 3404 mast.r->max = mas->max; 3405 mast_fill_bnode(&mast, mas, 1); 3406 prev_l_mas = *mast.l; 3407 prev_r_mas = *mast.r; 3408 } 3409 3410 /* Set the original node as dead */ 3411 old = mas->node; 3412 mas->node = l_mas.node; 3413 mas_wmb_replace(mas, old); 3414 mtree_range_walk(mas); 3415 return 1; 3416 } 3417 3418 /* 3419 * mas_reuse_node() - Reuse the node to store the data. 3420 * @wr_mas: The maple write state 3421 * @bn: The maple big node 3422 * @end: The end of the data. 3423 * 3424 * Will always return false in RCU mode. 3425 * 3426 * Return: True if node was reused, false otherwise. 3427 */ 3428 static inline bool mas_reuse_node(struct ma_wr_state *wr_mas, 3429 struct maple_big_node *bn, unsigned char end) 3430 { 3431 /* Need to be rcu safe. */ 3432 if (mt_in_rcu(wr_mas->mas->tree)) 3433 return false; 3434 3435 if (end > bn->b_end) { 3436 int clear = mt_slots[wr_mas->type] - bn->b_end; 3437 3438 memset(wr_mas->slots + bn->b_end, 0, sizeof(void *) * clear--); 3439 memset(wr_mas->pivots + bn->b_end, 0, sizeof(void *) * clear); 3440 } 3441 mab_mas_cp(bn, 0, bn->b_end, wr_mas->mas, false); 3442 return true; 3443 } 3444 3445 /* 3446 * mas_commit_b_node() - Commit the big node into the tree. 3447 * @wr_mas: The maple write state 3448 * @b_node: The maple big node 3449 * @end: The end of the data. 3450 */ 3451 static noinline_for_kasan int mas_commit_b_node(struct ma_wr_state *wr_mas, 3452 struct maple_big_node *b_node, unsigned char end) 3453 { 3454 struct maple_node *node; 3455 struct maple_enode *old_enode; 3456 unsigned char b_end = b_node->b_end; 3457 enum maple_type b_type = b_node->type; 3458 3459 old_enode = wr_mas->mas->node; 3460 if ((b_end < mt_min_slots[b_type]) && 3461 (!mte_is_root(old_enode)) && 3462 (mas_mt_height(wr_mas->mas) > 1)) 3463 return mas_rebalance(wr_mas->mas, b_node); 3464 3465 if (b_end >= mt_slots[b_type]) 3466 return mas_split(wr_mas->mas, b_node); 3467 3468 if (mas_reuse_node(wr_mas, b_node, end)) 3469 goto reuse_node; 3470 3471 mas_node_count(wr_mas->mas, 1); 3472 if (mas_is_err(wr_mas->mas)) 3473 return 0; 3474 3475 node = mas_pop_node(wr_mas->mas); 3476 node->parent = mas_mn(wr_mas->mas)->parent; 3477 wr_mas->mas->node = mt_mk_node(node, b_type); 3478 mab_mas_cp(b_node, 0, b_end, wr_mas->mas, false); 3479 mas_replace_node(wr_mas->mas, old_enode); 3480 reuse_node: 3481 mas_update_gap(wr_mas->mas); 3482 return 1; 3483 } 3484 3485 /* 3486 * mas_root_expand() - Expand a root to a node 3487 * @mas: The maple state 3488 * @entry: The entry to store into the tree 3489 */ 3490 static inline int mas_root_expand(struct ma_state *mas, void *entry) 3491 { 3492 void *contents = mas_root_locked(mas); 3493 enum maple_type type = maple_leaf_64; 3494 struct maple_node *node; 3495 void __rcu **slots; 3496 unsigned long *pivots; 3497 int slot = 0; 3498 3499 mas_node_count(mas, 1); 3500 if (unlikely(mas_is_err(mas))) 3501 return 0; 3502 3503 node = mas_pop_node(mas); 3504 pivots = ma_pivots(node, type); 3505 slots = ma_slots(node, type); 3506 node->parent = ma_parent_ptr(mas_tree_parent(mas)); 3507 mas->node = mt_mk_node(node, type); 3508 3509 if (mas->index) { 3510 if (contents) { 3511 rcu_assign_pointer(slots[slot], contents); 3512 if (likely(mas->index > 1)) 3513 slot++; 3514 } 3515 pivots[slot++] = mas->index - 1; 3516 } 3517 3518 rcu_assign_pointer(slots[slot], entry); 3519 mas->offset = slot; 3520 pivots[slot] = mas->last; 3521 if (mas->last != ULONG_MAX) 3522 pivots[++slot] = ULONG_MAX; 3523 3524 mas->depth = 1; 3525 mas_set_height(mas); 3526 ma_set_meta(node, maple_leaf_64, 0, slot); 3527 /* swap the new root into the tree */ 3528 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 3529 return slot; 3530 } 3531 3532 static inline void mas_store_root(struct ma_state *mas, void *entry) 3533 { 3534 if (likely((mas->last != 0) || (mas->index != 0))) 3535 mas_root_expand(mas, entry); 3536 else if (((unsigned long) (entry) & 3) == 2) 3537 mas_root_expand(mas, entry); 3538 else { 3539 rcu_assign_pointer(mas->tree->ma_root, entry); 3540 mas->node = MAS_START; 3541 } 3542 } 3543 3544 /* 3545 * mas_is_span_wr() - Check if the write needs to be treated as a write that 3546 * spans the node. 3547 * @mas: The maple state 3548 * @piv: The pivot value being written 3549 * @type: The maple node type 3550 * @entry: The data to write 3551 * 3552 * Spanning writes are writes that start in one node and end in another OR if 3553 * the write of a %NULL will cause the node to end with a %NULL. 3554 * 3555 * Return: True if this is a spanning write, false otherwise. 3556 */ 3557 static bool mas_is_span_wr(struct ma_wr_state *wr_mas) 3558 { 3559 unsigned long max = wr_mas->r_max; 3560 unsigned long last = wr_mas->mas->last; 3561 enum maple_type type = wr_mas->type; 3562 void *entry = wr_mas->entry; 3563 3564 /* Contained in this pivot, fast path */ 3565 if (last < max) 3566 return false; 3567 3568 if (ma_is_leaf(type)) { 3569 max = wr_mas->mas->max; 3570 if (last < max) 3571 return false; 3572 } 3573 3574 if (last == max) { 3575 /* 3576 * The last entry of leaf node cannot be NULL unless it is the 3577 * rightmost node (writing ULONG_MAX), otherwise it spans slots. 3578 */ 3579 if (entry || last == ULONG_MAX) 3580 return false; 3581 } 3582 3583 trace_ma_write(__func__, wr_mas->mas, wr_mas->r_max, entry); 3584 return true; 3585 } 3586 3587 static inline void mas_wr_walk_descend(struct ma_wr_state *wr_mas) 3588 { 3589 wr_mas->type = mte_node_type(wr_mas->mas->node); 3590 mas_wr_node_walk(wr_mas); 3591 wr_mas->slots = ma_slots(wr_mas->node, wr_mas->type); 3592 } 3593 3594 static inline void mas_wr_walk_traverse(struct ma_wr_state *wr_mas) 3595 { 3596 wr_mas->mas->max = wr_mas->r_max; 3597 wr_mas->mas->min = wr_mas->r_min; 3598 wr_mas->mas->node = wr_mas->content; 3599 wr_mas->mas->offset = 0; 3600 wr_mas->mas->depth++; 3601 } 3602 /* 3603 * mas_wr_walk() - Walk the tree for a write. 3604 * @wr_mas: The maple write state 3605 * 3606 * Uses mas_slot_locked() and does not need to worry about dead nodes. 3607 * 3608 * Return: True if it's contained in a node, false on spanning write. 3609 */ 3610 static bool mas_wr_walk(struct ma_wr_state *wr_mas) 3611 { 3612 struct ma_state *mas = wr_mas->mas; 3613 3614 while (true) { 3615 mas_wr_walk_descend(wr_mas); 3616 if (unlikely(mas_is_span_wr(wr_mas))) 3617 return false; 3618 3619 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 3620 mas->offset); 3621 if (ma_is_leaf(wr_mas->type)) 3622 return true; 3623 3624 mas_wr_walk_traverse(wr_mas); 3625 } 3626 3627 return true; 3628 } 3629 3630 static bool mas_wr_walk_index(struct ma_wr_state *wr_mas) 3631 { 3632 struct ma_state *mas = wr_mas->mas; 3633 3634 while (true) { 3635 mas_wr_walk_descend(wr_mas); 3636 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 3637 mas->offset); 3638 if (ma_is_leaf(wr_mas->type)) 3639 return true; 3640 mas_wr_walk_traverse(wr_mas); 3641 3642 } 3643 return true; 3644 } 3645 /* 3646 * mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs. 3647 * @l_wr_mas: The left maple write state 3648 * @r_wr_mas: The right maple write state 3649 */ 3650 static inline void mas_extend_spanning_null(struct ma_wr_state *l_wr_mas, 3651 struct ma_wr_state *r_wr_mas) 3652 { 3653 struct ma_state *r_mas = r_wr_mas->mas; 3654 struct ma_state *l_mas = l_wr_mas->mas; 3655 unsigned char l_slot; 3656 3657 l_slot = l_mas->offset; 3658 if (!l_wr_mas->content) 3659 l_mas->index = l_wr_mas->r_min; 3660 3661 if ((l_mas->index == l_wr_mas->r_min) && 3662 (l_slot && 3663 !mas_slot_locked(l_mas, l_wr_mas->slots, l_slot - 1))) { 3664 if (l_slot > 1) 3665 l_mas->index = l_wr_mas->pivots[l_slot - 2] + 1; 3666 else 3667 l_mas->index = l_mas->min; 3668 3669 l_mas->offset = l_slot - 1; 3670 } 3671 3672 if (!r_wr_mas->content) { 3673 if (r_mas->last < r_wr_mas->r_max) 3674 r_mas->last = r_wr_mas->r_max; 3675 r_mas->offset++; 3676 } else if ((r_mas->last == r_wr_mas->r_max) && 3677 (r_mas->last < r_mas->max) && 3678 !mas_slot_locked(r_mas, r_wr_mas->slots, r_mas->offset + 1)) { 3679 r_mas->last = mas_safe_pivot(r_mas, r_wr_mas->pivots, 3680 r_wr_mas->type, r_mas->offset + 1); 3681 r_mas->offset++; 3682 } 3683 } 3684 3685 static inline void *mas_state_walk(struct ma_state *mas) 3686 { 3687 void *entry; 3688 3689 entry = mas_start(mas); 3690 if (mas_is_none(mas)) 3691 return NULL; 3692 3693 if (mas_is_ptr(mas)) 3694 return entry; 3695 3696 return mtree_range_walk(mas); 3697 } 3698 3699 /* 3700 * mtree_lookup_walk() - Internal quick lookup that does not keep maple state up 3701 * to date. 3702 * 3703 * @mas: The maple state. 3704 * 3705 * Note: Leaves mas in undesirable state. 3706 * Return: The entry for @mas->index or %NULL on dead node. 3707 */ 3708 static inline void *mtree_lookup_walk(struct ma_state *mas) 3709 { 3710 unsigned long *pivots; 3711 unsigned char offset; 3712 struct maple_node *node; 3713 struct maple_enode *next; 3714 enum maple_type type; 3715 void __rcu **slots; 3716 unsigned char end; 3717 unsigned long max; 3718 3719 next = mas->node; 3720 max = ULONG_MAX; 3721 do { 3722 offset = 0; 3723 node = mte_to_node(next); 3724 type = mte_node_type(next); 3725 pivots = ma_pivots(node, type); 3726 end = ma_data_end(node, type, pivots, max); 3727 if (unlikely(ma_dead_node(node))) 3728 goto dead_node; 3729 do { 3730 if (pivots[offset] >= mas->index) { 3731 max = pivots[offset]; 3732 break; 3733 } 3734 } while (++offset < end); 3735 3736 slots = ma_slots(node, type); 3737 next = mt_slot(mas->tree, slots, offset); 3738 if (unlikely(ma_dead_node(node))) 3739 goto dead_node; 3740 } while (!ma_is_leaf(type)); 3741 3742 return (void *)next; 3743 3744 dead_node: 3745 mas_reset(mas); 3746 return NULL; 3747 } 3748 3749 static void mte_destroy_walk(struct maple_enode *, struct maple_tree *); 3750 /* 3751 * mas_new_root() - Create a new root node that only contains the entry passed 3752 * in. 3753 * @mas: The maple state 3754 * @entry: The entry to store. 3755 * 3756 * Only valid when the index == 0 and the last == ULONG_MAX 3757 * 3758 * Return 0 on error, 1 on success. 3759 */ 3760 static inline int mas_new_root(struct ma_state *mas, void *entry) 3761 { 3762 struct maple_enode *root = mas_root_locked(mas); 3763 enum maple_type type = maple_leaf_64; 3764 struct maple_node *node; 3765 void __rcu **slots; 3766 unsigned long *pivots; 3767 3768 if (!entry && !mas->index && mas->last == ULONG_MAX) { 3769 mas->depth = 0; 3770 mas_set_height(mas); 3771 rcu_assign_pointer(mas->tree->ma_root, entry); 3772 mas->node = MAS_START; 3773 goto done; 3774 } 3775 3776 mas_node_count(mas, 1); 3777 if (mas_is_err(mas)) 3778 return 0; 3779 3780 node = mas_pop_node(mas); 3781 pivots = ma_pivots(node, type); 3782 slots = ma_slots(node, type); 3783 node->parent = ma_parent_ptr(mas_tree_parent(mas)); 3784 mas->node = mt_mk_node(node, type); 3785 rcu_assign_pointer(slots[0], entry); 3786 pivots[0] = mas->last; 3787 mas->depth = 1; 3788 mas_set_height(mas); 3789 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 3790 3791 done: 3792 if (xa_is_node(root)) 3793 mte_destroy_walk(root, mas->tree); 3794 3795 return 1; 3796 } 3797 /* 3798 * mas_wr_spanning_store() - Create a subtree with the store operation completed 3799 * and new nodes where necessary, then place the sub-tree in the actual tree. 3800 * Note that mas is expected to point to the node which caused the store to 3801 * span. 3802 * @wr_mas: The maple write state 3803 * 3804 * Return: 0 on error, positive on success. 3805 */ 3806 static inline int mas_wr_spanning_store(struct ma_wr_state *wr_mas) 3807 { 3808 struct maple_subtree_state mast; 3809 struct maple_big_node b_node; 3810 struct ma_state *mas; 3811 unsigned char height; 3812 3813 /* Left and Right side of spanning store */ 3814 MA_STATE(l_mas, NULL, 0, 0); 3815 MA_STATE(r_mas, NULL, 0, 0); 3816 MA_WR_STATE(r_wr_mas, &r_mas, wr_mas->entry); 3817 MA_WR_STATE(l_wr_mas, &l_mas, wr_mas->entry); 3818 3819 /* 3820 * A store operation that spans multiple nodes is called a spanning 3821 * store and is handled early in the store call stack by the function 3822 * mas_is_span_wr(). When a spanning store is identified, the maple 3823 * state is duplicated. The first maple state walks the left tree path 3824 * to ``index``, the duplicate walks the right tree path to ``last``. 3825 * The data in the two nodes are combined into a single node, two nodes, 3826 * or possibly three nodes (see the 3-way split above). A ``NULL`` 3827 * written to the last entry of a node is considered a spanning store as 3828 * a rebalance is required for the operation to complete and an overflow 3829 * of data may happen. 3830 */ 3831 mas = wr_mas->mas; 3832 trace_ma_op(__func__, mas); 3833 3834 if (unlikely(!mas->index && mas->last == ULONG_MAX)) 3835 return mas_new_root(mas, wr_mas->entry); 3836 /* 3837 * Node rebalancing may occur due to this store, so there may be three new 3838 * entries per level plus a new root. 3839 */ 3840 height = mas_mt_height(mas); 3841 mas_node_count(mas, 1 + height * 3); 3842 if (mas_is_err(mas)) 3843 return 0; 3844 3845 /* 3846 * Set up right side. Need to get to the next offset after the spanning 3847 * store to ensure it's not NULL and to combine both the next node and 3848 * the node with the start together. 3849 */ 3850 r_mas = *mas; 3851 /* Avoid overflow, walk to next slot in the tree. */ 3852 if (r_mas.last + 1) 3853 r_mas.last++; 3854 3855 r_mas.index = r_mas.last; 3856 mas_wr_walk_index(&r_wr_mas); 3857 r_mas.last = r_mas.index = mas->last; 3858 3859 /* Set up left side. */ 3860 l_mas = *mas; 3861 mas_wr_walk_index(&l_wr_mas); 3862 3863 if (!wr_mas->entry) { 3864 mas_extend_spanning_null(&l_wr_mas, &r_wr_mas); 3865 mas->offset = l_mas.offset; 3866 mas->index = l_mas.index; 3867 mas->last = l_mas.last = r_mas.last; 3868 } 3869 3870 /* expanding NULLs may make this cover the entire range */ 3871 if (!l_mas.index && r_mas.last == ULONG_MAX) { 3872 mas_set_range(mas, 0, ULONG_MAX); 3873 return mas_new_root(mas, wr_mas->entry); 3874 } 3875 3876 memset(&b_node, 0, sizeof(struct maple_big_node)); 3877 /* Copy l_mas and store the value in b_node. */ 3878 mas_store_b_node(&l_wr_mas, &b_node, l_wr_mas.node_end); 3879 /* Copy r_mas into b_node. */ 3880 if (r_mas.offset <= r_wr_mas.node_end) 3881 mas_mab_cp(&r_mas, r_mas.offset, r_wr_mas.node_end, 3882 &b_node, b_node.b_end + 1); 3883 else 3884 b_node.b_end++; 3885 3886 /* Stop spanning searches by searching for just index. */ 3887 l_mas.index = l_mas.last = mas->index; 3888 3889 mast.bn = &b_node; 3890 mast.orig_l = &l_mas; 3891 mast.orig_r = &r_mas; 3892 /* Combine l_mas and r_mas and split them up evenly again. */ 3893 return mas_spanning_rebalance(mas, &mast, height + 1); 3894 } 3895 3896 /* 3897 * mas_wr_node_store() - Attempt to store the value in a node 3898 * @wr_mas: The maple write state 3899 * 3900 * Attempts to reuse the node, but may allocate. 3901 * 3902 * Return: True if stored, false otherwise 3903 */ 3904 static inline bool mas_wr_node_store(struct ma_wr_state *wr_mas, 3905 unsigned char new_end) 3906 { 3907 struct ma_state *mas = wr_mas->mas; 3908 void __rcu **dst_slots; 3909 unsigned long *dst_pivots; 3910 unsigned char dst_offset, offset_end = wr_mas->offset_end; 3911 struct maple_node reuse, *newnode; 3912 unsigned char copy_size, node_pivots = mt_pivots[wr_mas->type]; 3913 bool in_rcu = mt_in_rcu(mas->tree); 3914 3915 /* Check if there is enough data. The room is enough. */ 3916 if (!mte_is_root(mas->node) && (new_end <= mt_min_slots[wr_mas->type]) && 3917 !(mas->mas_flags & MA_STATE_BULK)) 3918 return false; 3919 3920 if (mas->last == wr_mas->end_piv) 3921 offset_end++; /* don't copy this offset */ 3922 else if (unlikely(wr_mas->r_max == ULONG_MAX)) 3923 mas_bulk_rebalance(mas, wr_mas->node_end, wr_mas->type); 3924 3925 /* set up node. */ 3926 if (in_rcu) { 3927 mas_node_count(mas, 1); 3928 if (mas_is_err(mas)) 3929 return false; 3930 3931 newnode = mas_pop_node(mas); 3932 } else { 3933 memset(&reuse, 0, sizeof(struct maple_node)); 3934 newnode = &reuse; 3935 } 3936 3937 newnode->parent = mas_mn(mas)->parent; 3938 dst_pivots = ma_pivots(newnode, wr_mas->type); 3939 dst_slots = ma_slots(newnode, wr_mas->type); 3940 /* Copy from start to insert point */ 3941 memcpy(dst_pivots, wr_mas->pivots, sizeof(unsigned long) * mas->offset); 3942 memcpy(dst_slots, wr_mas->slots, sizeof(void *) * mas->offset); 3943 3944 /* Handle insert of new range starting after old range */ 3945 if (wr_mas->r_min < mas->index) { 3946 rcu_assign_pointer(dst_slots[mas->offset], wr_mas->content); 3947 dst_pivots[mas->offset++] = mas->index - 1; 3948 } 3949 3950 /* Store the new entry and range end. */ 3951 if (mas->offset < node_pivots) 3952 dst_pivots[mas->offset] = mas->last; 3953 rcu_assign_pointer(dst_slots[mas->offset], wr_mas->entry); 3954 3955 /* 3956 * this range wrote to the end of the node or it overwrote the rest of 3957 * the data 3958 */ 3959 if (offset_end > wr_mas->node_end) 3960 goto done; 3961 3962 dst_offset = mas->offset + 1; 3963 /* Copy to the end of node if necessary. */ 3964 copy_size = wr_mas->node_end - offset_end + 1; 3965 memcpy(dst_slots + dst_offset, wr_mas->slots + offset_end, 3966 sizeof(void *) * copy_size); 3967 memcpy(dst_pivots + dst_offset, wr_mas->pivots + offset_end, 3968 sizeof(unsigned long) * (copy_size - 1)); 3969 3970 if (new_end < node_pivots) 3971 dst_pivots[new_end] = mas->max; 3972 3973 done: 3974 mas_leaf_set_meta(mas, newnode, dst_pivots, maple_leaf_64, new_end); 3975 if (in_rcu) { 3976 struct maple_enode *old_enode = mas->node; 3977 3978 mas->node = mt_mk_node(newnode, wr_mas->type); 3979 mas_replace_node(mas, old_enode); 3980 } else { 3981 memcpy(wr_mas->node, newnode, sizeof(struct maple_node)); 3982 } 3983 trace_ma_write(__func__, mas, 0, wr_mas->entry); 3984 mas_update_gap(mas); 3985 return true; 3986 } 3987 3988 /* 3989 * mas_wr_slot_store: Attempt to store a value in a slot. 3990 * @wr_mas: the maple write state 3991 * 3992 * Return: True if stored, false otherwise 3993 */ 3994 static inline bool mas_wr_slot_store(struct ma_wr_state *wr_mas) 3995 { 3996 struct ma_state *mas = wr_mas->mas; 3997 unsigned char offset = mas->offset; 3998 void __rcu **slots = wr_mas->slots; 3999 bool gap = false; 4000 4001 gap |= !mt_slot_locked(mas->tree, slots, offset); 4002 gap |= !mt_slot_locked(mas->tree, slots, offset + 1); 4003 4004 if (wr_mas->offset_end - offset == 1) { 4005 if (mas->index == wr_mas->r_min) { 4006 /* Overwriting the range and a part of the next one */ 4007 rcu_assign_pointer(slots[offset], wr_mas->entry); 4008 wr_mas->pivots[offset] = mas->last; 4009 } else { 4010 /* Overwriting a part of the range and the next one */ 4011 rcu_assign_pointer(slots[offset + 1], wr_mas->entry); 4012 wr_mas->pivots[offset] = mas->index - 1; 4013 mas->offset++; /* Keep mas accurate. */ 4014 } 4015 } else if (!mt_in_rcu(mas->tree)) { 4016 /* 4017 * Expand the range, only partially overwriting the previous and 4018 * next ranges 4019 */ 4020 gap |= !mt_slot_locked(mas->tree, slots, offset + 2); 4021 rcu_assign_pointer(slots[offset + 1], wr_mas->entry); 4022 wr_mas->pivots[offset] = mas->index - 1; 4023 wr_mas->pivots[offset + 1] = mas->last; 4024 mas->offset++; /* Keep mas accurate. */ 4025 } else { 4026 return false; 4027 } 4028 4029 trace_ma_write(__func__, mas, 0, wr_mas->entry); 4030 /* 4031 * Only update gap when the new entry is empty or there is an empty 4032 * entry in the original two ranges. 4033 */ 4034 if (!wr_mas->entry || gap) 4035 mas_update_gap(mas); 4036 4037 return true; 4038 } 4039 4040 static inline void mas_wr_extend_null(struct ma_wr_state *wr_mas) 4041 { 4042 struct ma_state *mas = wr_mas->mas; 4043 4044 if (!wr_mas->slots[wr_mas->offset_end]) { 4045 /* If this one is null, the next and prev are not */ 4046 mas->last = wr_mas->end_piv; 4047 } else { 4048 /* Check next slot(s) if we are overwriting the end */ 4049 if ((mas->last == wr_mas->end_piv) && 4050 (wr_mas->node_end != wr_mas->offset_end) && 4051 !wr_mas->slots[wr_mas->offset_end + 1]) { 4052 wr_mas->offset_end++; 4053 if (wr_mas->offset_end == wr_mas->node_end) 4054 mas->last = mas->max; 4055 else 4056 mas->last = wr_mas->pivots[wr_mas->offset_end]; 4057 wr_mas->end_piv = mas->last; 4058 } 4059 } 4060 4061 if (!wr_mas->content) { 4062 /* If this one is null, the next and prev are not */ 4063 mas->index = wr_mas->r_min; 4064 } else { 4065 /* Check prev slot if we are overwriting the start */ 4066 if (mas->index == wr_mas->r_min && mas->offset && 4067 !wr_mas->slots[mas->offset - 1]) { 4068 mas->offset--; 4069 wr_mas->r_min = mas->index = 4070 mas_safe_min(mas, wr_mas->pivots, mas->offset); 4071 wr_mas->r_max = wr_mas->pivots[mas->offset]; 4072 } 4073 } 4074 } 4075 4076 static inline void mas_wr_end_piv(struct ma_wr_state *wr_mas) 4077 { 4078 while ((wr_mas->offset_end < wr_mas->node_end) && 4079 (wr_mas->mas->last > wr_mas->pivots[wr_mas->offset_end])) 4080 wr_mas->offset_end++; 4081 4082 if (wr_mas->offset_end < wr_mas->node_end) 4083 wr_mas->end_piv = wr_mas->pivots[wr_mas->offset_end]; 4084 else 4085 wr_mas->end_piv = wr_mas->mas->max; 4086 4087 if (!wr_mas->entry) 4088 mas_wr_extend_null(wr_mas); 4089 } 4090 4091 static inline unsigned char mas_wr_new_end(struct ma_wr_state *wr_mas) 4092 { 4093 struct ma_state *mas = wr_mas->mas; 4094 unsigned char new_end = wr_mas->node_end + 2; 4095 4096 new_end -= wr_mas->offset_end - mas->offset; 4097 if (wr_mas->r_min == mas->index) 4098 new_end--; 4099 4100 if (wr_mas->end_piv == mas->last) 4101 new_end--; 4102 4103 return new_end; 4104 } 4105 4106 /* 4107 * mas_wr_append: Attempt to append 4108 * @wr_mas: the maple write state 4109 * @new_end: The end of the node after the modification 4110 * 4111 * This is currently unsafe in rcu mode since the end of the node may be cached 4112 * by readers while the node contents may be updated which could result in 4113 * inaccurate information. 4114 * 4115 * Return: True if appended, false otherwise 4116 */ 4117 static inline bool mas_wr_append(struct ma_wr_state *wr_mas, 4118 unsigned char new_end) 4119 { 4120 struct ma_state *mas; 4121 void __rcu **slots; 4122 unsigned char end; 4123 4124 mas = wr_mas->mas; 4125 if (mt_in_rcu(mas->tree)) 4126 return false; 4127 4128 if (mas->offset != wr_mas->node_end) 4129 return false; 4130 4131 end = wr_mas->node_end; 4132 if (mas->offset != end) 4133 return false; 4134 4135 if (new_end < mt_pivots[wr_mas->type]) { 4136 wr_mas->pivots[new_end] = wr_mas->pivots[end]; 4137 ma_set_meta(wr_mas->node, wr_mas->type, 0, new_end); 4138 } 4139 4140 slots = wr_mas->slots; 4141 if (new_end == end + 1) { 4142 if (mas->last == wr_mas->r_max) { 4143 /* Append to end of range */ 4144 rcu_assign_pointer(slots[new_end], wr_mas->entry); 4145 wr_mas->pivots[end] = mas->index - 1; 4146 mas->offset = new_end; 4147 } else { 4148 /* Append to start of range */ 4149 rcu_assign_pointer(slots[new_end], wr_mas->content); 4150 wr_mas->pivots[end] = mas->last; 4151 rcu_assign_pointer(slots[end], wr_mas->entry); 4152 } 4153 } else { 4154 /* Append to the range without touching any boundaries. */ 4155 rcu_assign_pointer(slots[new_end], wr_mas->content); 4156 wr_mas->pivots[end + 1] = mas->last; 4157 rcu_assign_pointer(slots[end + 1], wr_mas->entry); 4158 wr_mas->pivots[end] = mas->index - 1; 4159 mas->offset = end + 1; 4160 } 4161 4162 if (!wr_mas->content || !wr_mas->entry) 4163 mas_update_gap(mas); 4164 4165 trace_ma_write(__func__, mas, new_end, wr_mas->entry); 4166 return true; 4167 } 4168 4169 /* 4170 * mas_wr_bnode() - Slow path for a modification. 4171 * @wr_mas: The write maple state 4172 * 4173 * This is where split, rebalance end up. 4174 */ 4175 static void mas_wr_bnode(struct ma_wr_state *wr_mas) 4176 { 4177 struct maple_big_node b_node; 4178 4179 trace_ma_write(__func__, wr_mas->mas, 0, wr_mas->entry); 4180 memset(&b_node, 0, sizeof(struct maple_big_node)); 4181 mas_store_b_node(wr_mas, &b_node, wr_mas->offset_end); 4182 mas_commit_b_node(wr_mas, &b_node, wr_mas->node_end); 4183 } 4184 4185 static inline void mas_wr_modify(struct ma_wr_state *wr_mas) 4186 { 4187 struct ma_state *mas = wr_mas->mas; 4188 unsigned char new_end; 4189 4190 /* Direct replacement */ 4191 if (wr_mas->r_min == mas->index && wr_mas->r_max == mas->last) { 4192 rcu_assign_pointer(wr_mas->slots[mas->offset], wr_mas->entry); 4193 if (!!wr_mas->entry ^ !!wr_mas->content) 4194 mas_update_gap(mas); 4195 return; 4196 } 4197 4198 /* 4199 * new_end exceeds the size of the maple node and cannot enter the fast 4200 * path. 4201 */ 4202 new_end = mas_wr_new_end(wr_mas); 4203 if (new_end >= mt_slots[wr_mas->type]) 4204 goto slow_path; 4205 4206 /* Attempt to append */ 4207 if (mas_wr_append(wr_mas, new_end)) 4208 return; 4209 4210 if (new_end == wr_mas->node_end && mas_wr_slot_store(wr_mas)) 4211 return; 4212 4213 if (mas_wr_node_store(wr_mas, new_end)) 4214 return; 4215 4216 if (mas_is_err(mas)) 4217 return; 4218 4219 slow_path: 4220 mas_wr_bnode(wr_mas); 4221 } 4222 4223 /* 4224 * mas_wr_store_entry() - Internal call to store a value 4225 * @mas: The maple state 4226 * @entry: The entry to store. 4227 * 4228 * Return: The contents that was stored at the index. 4229 */ 4230 static inline void *mas_wr_store_entry(struct ma_wr_state *wr_mas) 4231 { 4232 struct ma_state *mas = wr_mas->mas; 4233 4234 wr_mas->content = mas_start(mas); 4235 if (mas_is_none(mas) || mas_is_ptr(mas)) { 4236 mas_store_root(mas, wr_mas->entry); 4237 return wr_mas->content; 4238 } 4239 4240 if (unlikely(!mas_wr_walk(wr_mas))) { 4241 mas_wr_spanning_store(wr_mas); 4242 return wr_mas->content; 4243 } 4244 4245 /* At this point, we are at the leaf node that needs to be altered. */ 4246 mas_wr_end_piv(wr_mas); 4247 /* New root for a single pointer */ 4248 if (unlikely(!mas->index && mas->last == ULONG_MAX)) { 4249 mas_new_root(mas, wr_mas->entry); 4250 return wr_mas->content; 4251 } 4252 4253 mas_wr_modify(wr_mas); 4254 return wr_mas->content; 4255 } 4256 4257 /** 4258 * mas_insert() - Internal call to insert a value 4259 * @mas: The maple state 4260 * @entry: The entry to store 4261 * 4262 * Return: %NULL or the contents that already exists at the requested index 4263 * otherwise. The maple state needs to be checked for error conditions. 4264 */ 4265 static inline void *mas_insert(struct ma_state *mas, void *entry) 4266 { 4267 MA_WR_STATE(wr_mas, mas, entry); 4268 4269 /* 4270 * Inserting a new range inserts either 0, 1, or 2 pivots within the 4271 * tree. If the insert fits exactly into an existing gap with a value 4272 * of NULL, then the slot only needs to be written with the new value. 4273 * If the range being inserted is adjacent to another range, then only a 4274 * single pivot needs to be inserted (as well as writing the entry). If 4275 * the new range is within a gap but does not touch any other ranges, 4276 * then two pivots need to be inserted: the start - 1, and the end. As 4277 * usual, the entry must be written. Most operations require a new node 4278 * to be allocated and replace an existing node to ensure RCU safety, 4279 * when in RCU mode. The exception to requiring a newly allocated node 4280 * is when inserting at the end of a node (appending). When done 4281 * carefully, appending can reuse the node in place. 4282 */ 4283 wr_mas.content = mas_start(mas); 4284 if (wr_mas.content) 4285 goto exists; 4286 4287 if (mas_is_none(mas) || mas_is_ptr(mas)) { 4288 mas_store_root(mas, entry); 4289 return NULL; 4290 } 4291 4292 /* spanning writes always overwrite something */ 4293 if (!mas_wr_walk(&wr_mas)) 4294 goto exists; 4295 4296 /* At this point, we are at the leaf node that needs to be altered. */ 4297 wr_mas.offset_end = mas->offset; 4298 wr_mas.end_piv = wr_mas.r_max; 4299 4300 if (wr_mas.content || (mas->last > wr_mas.r_max)) 4301 goto exists; 4302 4303 if (!entry) 4304 return NULL; 4305 4306 mas_wr_modify(&wr_mas); 4307 return wr_mas.content; 4308 4309 exists: 4310 mas_set_err(mas, -EEXIST); 4311 return wr_mas.content; 4312 4313 } 4314 4315 static inline void mas_rewalk(struct ma_state *mas, unsigned long index) 4316 { 4317 retry: 4318 mas_set(mas, index); 4319 mas_state_walk(mas); 4320 if (mas_is_start(mas)) 4321 goto retry; 4322 } 4323 4324 static inline bool mas_rewalk_if_dead(struct ma_state *mas, 4325 struct maple_node *node, const unsigned long index) 4326 { 4327 if (unlikely(ma_dead_node(node))) { 4328 mas_rewalk(mas, index); 4329 return true; 4330 } 4331 return false; 4332 } 4333 4334 /* 4335 * mas_prev_node() - Find the prev non-null entry at the same level in the 4336 * tree. The prev value will be mas->node[mas->offset] or MAS_NONE. 4337 * @mas: The maple state 4338 * @min: The lower limit to search 4339 * 4340 * The prev node value will be mas->node[mas->offset] or MAS_NONE. 4341 * Return: 1 if the node is dead, 0 otherwise. 4342 */ 4343 static inline int mas_prev_node(struct ma_state *mas, unsigned long min) 4344 { 4345 enum maple_type mt; 4346 int offset, level; 4347 void __rcu **slots; 4348 struct maple_node *node; 4349 unsigned long *pivots; 4350 unsigned long max; 4351 4352 node = mas_mn(mas); 4353 if (!mas->min) 4354 goto no_entry; 4355 4356 max = mas->min - 1; 4357 if (max < min) 4358 goto no_entry; 4359 4360 level = 0; 4361 do { 4362 if (ma_is_root(node)) 4363 goto no_entry; 4364 4365 /* Walk up. */ 4366 if (unlikely(mas_ascend(mas))) 4367 return 1; 4368 offset = mas->offset; 4369 level++; 4370 node = mas_mn(mas); 4371 } while (!offset); 4372 4373 offset--; 4374 mt = mte_node_type(mas->node); 4375 while (level > 1) { 4376 level--; 4377 slots = ma_slots(node, mt); 4378 mas->node = mas_slot(mas, slots, offset); 4379 if (unlikely(ma_dead_node(node))) 4380 return 1; 4381 4382 mt = mte_node_type(mas->node); 4383 node = mas_mn(mas); 4384 pivots = ma_pivots(node, mt); 4385 offset = ma_data_end(node, mt, pivots, max); 4386 if (unlikely(ma_dead_node(node))) 4387 return 1; 4388 } 4389 4390 slots = ma_slots(node, mt); 4391 mas->node = mas_slot(mas, slots, offset); 4392 pivots = ma_pivots(node, mt); 4393 if (unlikely(ma_dead_node(node))) 4394 return 1; 4395 4396 if (likely(offset)) 4397 mas->min = pivots[offset - 1] + 1; 4398 mas->max = max; 4399 mas->offset = mas_data_end(mas); 4400 if (unlikely(mte_dead_node(mas->node))) 4401 return 1; 4402 4403 return 0; 4404 4405 no_entry: 4406 if (unlikely(ma_dead_node(node))) 4407 return 1; 4408 4409 mas->node = MAS_NONE; 4410 return 0; 4411 } 4412 4413 /* 4414 * mas_prev_slot() - Get the entry in the previous slot 4415 * 4416 * @mas: The maple state 4417 * @max: The minimum starting range 4418 * 4419 * Return: The entry in the previous slot which is possibly NULL 4420 */ 4421 static void *mas_prev_slot(struct ma_state *mas, unsigned long min, bool empty) 4422 { 4423 void *entry; 4424 void __rcu **slots; 4425 unsigned long pivot; 4426 enum maple_type type; 4427 unsigned long *pivots; 4428 struct maple_node *node; 4429 unsigned long save_point = mas->index; 4430 4431 retry: 4432 node = mas_mn(mas); 4433 type = mte_node_type(mas->node); 4434 pivots = ma_pivots(node, type); 4435 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4436 goto retry; 4437 4438 again: 4439 if (mas->min <= min) { 4440 pivot = mas_safe_min(mas, pivots, mas->offset); 4441 4442 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4443 goto retry; 4444 4445 if (pivot <= min) 4446 return NULL; 4447 } 4448 4449 if (likely(mas->offset)) { 4450 mas->offset--; 4451 mas->last = mas->index - 1; 4452 mas->index = mas_safe_min(mas, pivots, mas->offset); 4453 } else { 4454 if (mas_prev_node(mas, min)) { 4455 mas_rewalk(mas, save_point); 4456 goto retry; 4457 } 4458 4459 if (mas_is_none(mas)) 4460 return NULL; 4461 4462 mas->last = mas->max; 4463 node = mas_mn(mas); 4464 type = mte_node_type(mas->node); 4465 pivots = ma_pivots(node, type); 4466 mas->index = pivots[mas->offset - 1] + 1; 4467 } 4468 4469 slots = ma_slots(node, type); 4470 entry = mas_slot(mas, slots, mas->offset); 4471 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4472 goto retry; 4473 4474 if (likely(entry)) 4475 return entry; 4476 4477 if (!empty) 4478 goto again; 4479 4480 return entry; 4481 } 4482 4483 /* 4484 * mas_next_node() - Get the next node at the same level in the tree. 4485 * @mas: The maple state 4486 * @max: The maximum pivot value to check. 4487 * 4488 * The next value will be mas->node[mas->offset] or MAS_NONE. 4489 * Return: 1 on dead node, 0 otherwise. 4490 */ 4491 static inline int mas_next_node(struct ma_state *mas, struct maple_node *node, 4492 unsigned long max) 4493 { 4494 unsigned long min; 4495 unsigned long *pivots; 4496 struct maple_enode *enode; 4497 int level = 0; 4498 unsigned char node_end; 4499 enum maple_type mt; 4500 void __rcu **slots; 4501 4502 if (mas->max >= max) 4503 goto no_entry; 4504 4505 min = mas->max + 1; 4506 level = 0; 4507 do { 4508 if (ma_is_root(node)) 4509 goto no_entry; 4510 4511 /* Walk up. */ 4512 if (unlikely(mas_ascend(mas))) 4513 return 1; 4514 4515 level++; 4516 node = mas_mn(mas); 4517 mt = mte_node_type(mas->node); 4518 pivots = ma_pivots(node, mt); 4519 node_end = ma_data_end(node, mt, pivots, mas->max); 4520 if (unlikely(ma_dead_node(node))) 4521 return 1; 4522 4523 } while (unlikely(mas->offset == node_end)); 4524 4525 slots = ma_slots(node, mt); 4526 mas->offset++; 4527 enode = mas_slot(mas, slots, mas->offset); 4528 if (unlikely(ma_dead_node(node))) 4529 return 1; 4530 4531 if (level > 1) 4532 mas->offset = 0; 4533 4534 while (unlikely(level > 1)) { 4535 level--; 4536 mas->node = enode; 4537 node = mas_mn(mas); 4538 mt = mte_node_type(mas->node); 4539 slots = ma_slots(node, mt); 4540 enode = mas_slot(mas, slots, 0); 4541 if (unlikely(ma_dead_node(node))) 4542 return 1; 4543 } 4544 4545 if (!mas->offset) 4546 pivots = ma_pivots(node, mt); 4547 4548 mas->max = mas_safe_pivot(mas, pivots, mas->offset, mt); 4549 if (unlikely(ma_dead_node(node))) 4550 return 1; 4551 4552 mas->node = enode; 4553 mas->min = min; 4554 return 0; 4555 4556 no_entry: 4557 if (unlikely(ma_dead_node(node))) 4558 return 1; 4559 4560 mas->node = MAS_NONE; 4561 return 0; 4562 } 4563 4564 /* 4565 * mas_next_slot() - Get the entry in the next slot 4566 * 4567 * @mas: The maple state 4568 * @max: The maximum starting range 4569 * @empty: Can be empty 4570 * 4571 * Return: The entry in the next slot which is possibly NULL 4572 */ 4573 static void *mas_next_slot(struct ma_state *mas, unsigned long max, bool empty) 4574 { 4575 void __rcu **slots; 4576 unsigned long *pivots; 4577 unsigned long pivot; 4578 enum maple_type type; 4579 struct maple_node *node; 4580 unsigned char data_end; 4581 unsigned long save_point = mas->last; 4582 void *entry; 4583 4584 retry: 4585 node = mas_mn(mas); 4586 type = mte_node_type(mas->node); 4587 pivots = ma_pivots(node, type); 4588 data_end = ma_data_end(node, type, pivots, mas->max); 4589 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4590 goto retry; 4591 4592 again: 4593 if (mas->max >= max) { 4594 if (likely(mas->offset < data_end)) 4595 pivot = pivots[mas->offset]; 4596 else 4597 return NULL; /* must be mas->max */ 4598 4599 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4600 goto retry; 4601 4602 if (pivot >= max) 4603 return NULL; 4604 } 4605 4606 if (likely(mas->offset < data_end)) { 4607 mas->index = pivots[mas->offset] + 1; 4608 mas->offset++; 4609 if (likely(mas->offset < data_end)) 4610 mas->last = pivots[mas->offset]; 4611 else 4612 mas->last = mas->max; 4613 } else { 4614 if (mas_next_node(mas, node, max)) { 4615 mas_rewalk(mas, save_point); 4616 goto retry; 4617 } 4618 4619 if (mas_is_none(mas)) 4620 return NULL; 4621 4622 mas->offset = 0; 4623 mas->index = mas->min; 4624 node = mas_mn(mas); 4625 type = mte_node_type(mas->node); 4626 pivots = ma_pivots(node, type); 4627 mas->last = pivots[0]; 4628 } 4629 4630 slots = ma_slots(node, type); 4631 entry = mt_slot(mas->tree, slots, mas->offset); 4632 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4633 goto retry; 4634 4635 if (entry) 4636 return entry; 4637 4638 if (!empty) { 4639 if (!mas->offset) 4640 data_end = 2; 4641 goto again; 4642 } 4643 4644 return entry; 4645 } 4646 4647 /* 4648 * mas_next_entry() - Internal function to get the next entry. 4649 * @mas: The maple state 4650 * @limit: The maximum range start. 4651 * 4652 * Set the @mas->node to the next entry and the range_start to 4653 * the beginning value for the entry. Does not check beyond @limit. 4654 * Sets @mas->index and @mas->last to the limit if it is hit. 4655 * Restarts on dead nodes. 4656 * 4657 * Return: the next entry or %NULL. 4658 */ 4659 static inline void *mas_next_entry(struct ma_state *mas, unsigned long limit) 4660 { 4661 if (mas->last >= limit) 4662 return NULL; 4663 4664 return mas_next_slot(mas, limit, false); 4665 } 4666 4667 /* 4668 * mas_rev_awalk() - Internal function. Reverse allocation walk. Find the 4669 * highest gap address of a given size in a given node and descend. 4670 * @mas: The maple state 4671 * @size: The needed size. 4672 * 4673 * Return: True if found in a leaf, false otherwise. 4674 * 4675 */ 4676 static bool mas_rev_awalk(struct ma_state *mas, unsigned long size, 4677 unsigned long *gap_min, unsigned long *gap_max) 4678 { 4679 enum maple_type type = mte_node_type(mas->node); 4680 struct maple_node *node = mas_mn(mas); 4681 unsigned long *pivots, *gaps; 4682 void __rcu **slots; 4683 unsigned long gap = 0; 4684 unsigned long max, min; 4685 unsigned char offset; 4686 4687 if (unlikely(mas_is_err(mas))) 4688 return true; 4689 4690 if (ma_is_dense(type)) { 4691 /* dense nodes. */ 4692 mas->offset = (unsigned char)(mas->index - mas->min); 4693 return true; 4694 } 4695 4696 pivots = ma_pivots(node, type); 4697 slots = ma_slots(node, type); 4698 gaps = ma_gaps(node, type); 4699 offset = mas->offset; 4700 min = mas_safe_min(mas, pivots, offset); 4701 /* Skip out of bounds. */ 4702 while (mas->last < min) 4703 min = mas_safe_min(mas, pivots, --offset); 4704 4705 max = mas_safe_pivot(mas, pivots, offset, type); 4706 while (mas->index <= max) { 4707 gap = 0; 4708 if (gaps) 4709 gap = gaps[offset]; 4710 else if (!mas_slot(mas, slots, offset)) 4711 gap = max - min + 1; 4712 4713 if (gap) { 4714 if ((size <= gap) && (size <= mas->last - min + 1)) 4715 break; 4716 4717 if (!gaps) { 4718 /* Skip the next slot, it cannot be a gap. */ 4719 if (offset < 2) 4720 goto ascend; 4721 4722 offset -= 2; 4723 max = pivots[offset]; 4724 min = mas_safe_min(mas, pivots, offset); 4725 continue; 4726 } 4727 } 4728 4729 if (!offset) 4730 goto ascend; 4731 4732 offset--; 4733 max = min - 1; 4734 min = mas_safe_min(mas, pivots, offset); 4735 } 4736 4737 if (unlikely((mas->index > max) || (size - 1 > max - mas->index))) 4738 goto no_space; 4739 4740 if (unlikely(ma_is_leaf(type))) { 4741 mas->offset = offset; 4742 *gap_min = min; 4743 *gap_max = min + gap - 1; 4744 return true; 4745 } 4746 4747 /* descend, only happens under lock. */ 4748 mas->node = mas_slot(mas, slots, offset); 4749 mas->min = min; 4750 mas->max = max; 4751 mas->offset = mas_data_end(mas); 4752 return false; 4753 4754 ascend: 4755 if (!mte_is_root(mas->node)) 4756 return false; 4757 4758 no_space: 4759 mas_set_err(mas, -EBUSY); 4760 return false; 4761 } 4762 4763 static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size) 4764 { 4765 enum maple_type type = mte_node_type(mas->node); 4766 unsigned long pivot, min, gap = 0; 4767 unsigned char offset, data_end; 4768 unsigned long *gaps, *pivots; 4769 void __rcu **slots; 4770 struct maple_node *node; 4771 bool found = false; 4772 4773 if (ma_is_dense(type)) { 4774 mas->offset = (unsigned char)(mas->index - mas->min); 4775 return true; 4776 } 4777 4778 node = mas_mn(mas); 4779 pivots = ma_pivots(node, type); 4780 slots = ma_slots(node, type); 4781 gaps = ma_gaps(node, type); 4782 offset = mas->offset; 4783 min = mas_safe_min(mas, pivots, offset); 4784 data_end = ma_data_end(node, type, pivots, mas->max); 4785 for (; offset <= data_end; offset++) { 4786 pivot = mas_safe_pivot(mas, pivots, offset, type); 4787 4788 /* Not within lower bounds */ 4789 if (mas->index > pivot) 4790 goto next_slot; 4791 4792 if (gaps) 4793 gap = gaps[offset]; 4794 else if (!mas_slot(mas, slots, offset)) 4795 gap = min(pivot, mas->last) - max(mas->index, min) + 1; 4796 else 4797 goto next_slot; 4798 4799 if (gap >= size) { 4800 if (ma_is_leaf(type)) { 4801 found = true; 4802 goto done; 4803 } 4804 if (mas->index <= pivot) { 4805 mas->node = mas_slot(mas, slots, offset); 4806 mas->min = min; 4807 mas->max = pivot; 4808 offset = 0; 4809 break; 4810 } 4811 } 4812 next_slot: 4813 min = pivot + 1; 4814 if (mas->last <= pivot) { 4815 mas_set_err(mas, -EBUSY); 4816 return true; 4817 } 4818 } 4819 4820 if (mte_is_root(mas->node)) 4821 found = true; 4822 done: 4823 mas->offset = offset; 4824 return found; 4825 } 4826 4827 /** 4828 * mas_walk() - Search for @mas->index in the tree. 4829 * @mas: The maple state. 4830 * 4831 * mas->index and mas->last will be set to the range if there is a value. If 4832 * mas->node is MAS_NONE, reset to MAS_START. 4833 * 4834 * Return: the entry at the location or %NULL. 4835 */ 4836 void *mas_walk(struct ma_state *mas) 4837 { 4838 void *entry; 4839 4840 if (mas_is_none(mas) || mas_is_paused(mas) || mas_is_ptr(mas)) 4841 mas->node = MAS_START; 4842 retry: 4843 entry = mas_state_walk(mas); 4844 if (mas_is_start(mas)) { 4845 goto retry; 4846 } else if (mas_is_none(mas)) { 4847 mas->index = 0; 4848 mas->last = ULONG_MAX; 4849 } else if (mas_is_ptr(mas)) { 4850 if (!mas->index) { 4851 mas->last = 0; 4852 return entry; 4853 } 4854 4855 mas->index = 1; 4856 mas->last = ULONG_MAX; 4857 mas->node = MAS_NONE; 4858 return NULL; 4859 } 4860 4861 return entry; 4862 } 4863 EXPORT_SYMBOL_GPL(mas_walk); 4864 4865 static inline bool mas_rewind_node(struct ma_state *mas) 4866 { 4867 unsigned char slot; 4868 4869 do { 4870 if (mte_is_root(mas->node)) { 4871 slot = mas->offset; 4872 if (!slot) 4873 return false; 4874 } else { 4875 mas_ascend(mas); 4876 slot = mas->offset; 4877 } 4878 } while (!slot); 4879 4880 mas->offset = --slot; 4881 return true; 4882 } 4883 4884 /* 4885 * mas_skip_node() - Internal function. Skip over a node. 4886 * @mas: The maple state. 4887 * 4888 * Return: true if there is another node, false otherwise. 4889 */ 4890 static inline bool mas_skip_node(struct ma_state *mas) 4891 { 4892 if (mas_is_err(mas)) 4893 return false; 4894 4895 do { 4896 if (mte_is_root(mas->node)) { 4897 if (mas->offset >= mas_data_end(mas)) { 4898 mas_set_err(mas, -EBUSY); 4899 return false; 4900 } 4901 } else { 4902 mas_ascend(mas); 4903 } 4904 } while (mas->offset >= mas_data_end(mas)); 4905 4906 mas->offset++; 4907 return true; 4908 } 4909 4910 /* 4911 * mas_awalk() - Allocation walk. Search from low address to high, for a gap of 4912 * @size 4913 * @mas: The maple state 4914 * @size: The size of the gap required 4915 * 4916 * Search between @mas->index and @mas->last for a gap of @size. 4917 */ 4918 static inline void mas_awalk(struct ma_state *mas, unsigned long size) 4919 { 4920 struct maple_enode *last = NULL; 4921 4922 /* 4923 * There are 4 options: 4924 * go to child (descend) 4925 * go back to parent (ascend) 4926 * no gap found. (return, slot == MAPLE_NODE_SLOTS) 4927 * found the gap. (return, slot != MAPLE_NODE_SLOTS) 4928 */ 4929 while (!mas_is_err(mas) && !mas_anode_descend(mas, size)) { 4930 if (last == mas->node) 4931 mas_skip_node(mas); 4932 else 4933 last = mas->node; 4934 } 4935 } 4936 4937 /* 4938 * mas_sparse_area() - Internal function. Return upper or lower limit when 4939 * searching for a gap in an empty tree. 4940 * @mas: The maple state 4941 * @min: the minimum range 4942 * @max: The maximum range 4943 * @size: The size of the gap 4944 * @fwd: Searching forward or back 4945 */ 4946 static inline int mas_sparse_area(struct ma_state *mas, unsigned long min, 4947 unsigned long max, unsigned long size, bool fwd) 4948 { 4949 if (!unlikely(mas_is_none(mas)) && min == 0) { 4950 min++; 4951 /* 4952 * At this time, min is increased, we need to recheck whether 4953 * the size is satisfied. 4954 */ 4955 if (min > max || max - min + 1 < size) 4956 return -EBUSY; 4957 } 4958 /* mas_is_ptr */ 4959 4960 if (fwd) { 4961 mas->index = min; 4962 mas->last = min + size - 1; 4963 } else { 4964 mas->last = max; 4965 mas->index = max - size + 1; 4966 } 4967 return 0; 4968 } 4969 4970 /* 4971 * mas_empty_area() - Get the lowest address within the range that is 4972 * sufficient for the size requested. 4973 * @mas: The maple state 4974 * @min: The lowest value of the range 4975 * @max: The highest value of the range 4976 * @size: The size needed 4977 */ 4978 int mas_empty_area(struct ma_state *mas, unsigned long min, 4979 unsigned long max, unsigned long size) 4980 { 4981 unsigned char offset; 4982 unsigned long *pivots; 4983 enum maple_type mt; 4984 4985 if (min > max) 4986 return -EINVAL; 4987 4988 if (size == 0 || max - min < size - 1) 4989 return -EINVAL; 4990 4991 if (mas_is_start(mas)) 4992 mas_start(mas); 4993 else if (mas->offset >= 2) 4994 mas->offset -= 2; 4995 else if (!mas_skip_node(mas)) 4996 return -EBUSY; 4997 4998 /* Empty set */ 4999 if (mas_is_none(mas) || mas_is_ptr(mas)) 5000 return mas_sparse_area(mas, min, max, size, true); 5001 5002 /* The start of the window can only be within these values */ 5003 mas->index = min; 5004 mas->last = max; 5005 mas_awalk(mas, size); 5006 5007 if (unlikely(mas_is_err(mas))) 5008 return xa_err(mas->node); 5009 5010 offset = mas->offset; 5011 if (unlikely(offset == MAPLE_NODE_SLOTS)) 5012 return -EBUSY; 5013 5014 mt = mte_node_type(mas->node); 5015 pivots = ma_pivots(mas_mn(mas), mt); 5016 min = mas_safe_min(mas, pivots, offset); 5017 if (mas->index < min) 5018 mas->index = min; 5019 mas->last = mas->index + size - 1; 5020 return 0; 5021 } 5022 EXPORT_SYMBOL_GPL(mas_empty_area); 5023 5024 /* 5025 * mas_empty_area_rev() - Get the highest address within the range that is 5026 * sufficient for the size requested. 5027 * @mas: The maple state 5028 * @min: The lowest value of the range 5029 * @max: The highest value of the range 5030 * @size: The size needed 5031 */ 5032 int mas_empty_area_rev(struct ma_state *mas, unsigned long min, 5033 unsigned long max, unsigned long size) 5034 { 5035 struct maple_enode *last = mas->node; 5036 5037 if (min > max) 5038 return -EINVAL; 5039 5040 if (size == 0 || max - min < size - 1) 5041 return -EINVAL; 5042 5043 if (mas_is_start(mas)) { 5044 mas_start(mas); 5045 mas->offset = mas_data_end(mas); 5046 } else if (mas->offset >= 2) { 5047 mas->offset -= 2; 5048 } else if (!mas_rewind_node(mas)) { 5049 return -EBUSY; 5050 } 5051 5052 /* Empty set. */ 5053 if (mas_is_none(mas) || mas_is_ptr(mas)) 5054 return mas_sparse_area(mas, min, max, size, false); 5055 5056 /* The start of the window can only be within these values. */ 5057 mas->index = min; 5058 mas->last = max; 5059 5060 while (!mas_rev_awalk(mas, size, &min, &max)) { 5061 if (last == mas->node) { 5062 if (!mas_rewind_node(mas)) 5063 return -EBUSY; 5064 } else { 5065 last = mas->node; 5066 } 5067 } 5068 5069 if (mas_is_err(mas)) 5070 return xa_err(mas->node); 5071 5072 if (unlikely(mas->offset == MAPLE_NODE_SLOTS)) 5073 return -EBUSY; 5074 5075 /* Trim the upper limit to the max. */ 5076 if (max < mas->last) 5077 mas->last = max; 5078 5079 mas->index = mas->last - size + 1; 5080 return 0; 5081 } 5082 EXPORT_SYMBOL_GPL(mas_empty_area_rev); 5083 5084 /* 5085 * mte_dead_leaves() - Mark all leaves of a node as dead. 5086 * @mas: The maple state 5087 * @slots: Pointer to the slot array 5088 * @type: The maple node type 5089 * 5090 * Must hold the write lock. 5091 * 5092 * Return: The number of leaves marked as dead. 5093 */ 5094 static inline 5095 unsigned char mte_dead_leaves(struct maple_enode *enode, struct maple_tree *mt, 5096 void __rcu **slots) 5097 { 5098 struct maple_node *node; 5099 enum maple_type type; 5100 void *entry; 5101 int offset; 5102 5103 for (offset = 0; offset < mt_slot_count(enode); offset++) { 5104 entry = mt_slot(mt, slots, offset); 5105 type = mte_node_type(entry); 5106 node = mte_to_node(entry); 5107 /* Use both node and type to catch LE & BE metadata */ 5108 if (!node || !type) 5109 break; 5110 5111 mte_set_node_dead(entry); 5112 node->type = type; 5113 rcu_assign_pointer(slots[offset], node); 5114 } 5115 5116 return offset; 5117 } 5118 5119 /** 5120 * mte_dead_walk() - Walk down a dead tree to just before the leaves 5121 * @enode: The maple encoded node 5122 * @offset: The starting offset 5123 * 5124 * Note: This can only be used from the RCU callback context. 5125 */ 5126 static void __rcu **mte_dead_walk(struct maple_enode **enode, unsigned char offset) 5127 { 5128 struct maple_node *node, *next; 5129 void __rcu **slots = NULL; 5130 5131 next = mte_to_node(*enode); 5132 do { 5133 *enode = ma_enode_ptr(next); 5134 node = mte_to_node(*enode); 5135 slots = ma_slots(node, node->type); 5136 next = rcu_dereference_protected(slots[offset], 5137 lock_is_held(&rcu_callback_map)); 5138 offset = 0; 5139 } while (!ma_is_leaf(next->type)); 5140 5141 return slots; 5142 } 5143 5144 /** 5145 * mt_free_walk() - Walk & free a tree in the RCU callback context 5146 * @head: The RCU head that's within the node. 5147 * 5148 * Note: This can only be used from the RCU callback context. 5149 */ 5150 static void mt_free_walk(struct rcu_head *head) 5151 { 5152 void __rcu **slots; 5153 struct maple_node *node, *start; 5154 struct maple_enode *enode; 5155 unsigned char offset; 5156 enum maple_type type; 5157 5158 node = container_of(head, struct maple_node, rcu); 5159 5160 if (ma_is_leaf(node->type)) 5161 goto free_leaf; 5162 5163 start = node; 5164 enode = mt_mk_node(node, node->type); 5165 slots = mte_dead_walk(&enode, 0); 5166 node = mte_to_node(enode); 5167 do { 5168 mt_free_bulk(node->slot_len, slots); 5169 offset = node->parent_slot + 1; 5170 enode = node->piv_parent; 5171 if (mte_to_node(enode) == node) 5172 goto free_leaf; 5173 5174 type = mte_node_type(enode); 5175 slots = ma_slots(mte_to_node(enode), type); 5176 if ((offset < mt_slots[type]) && 5177 rcu_dereference_protected(slots[offset], 5178 lock_is_held(&rcu_callback_map))) 5179 slots = mte_dead_walk(&enode, offset); 5180 node = mte_to_node(enode); 5181 } while ((node != start) || (node->slot_len < offset)); 5182 5183 slots = ma_slots(node, node->type); 5184 mt_free_bulk(node->slot_len, slots); 5185 5186 free_leaf: 5187 mt_free_rcu(&node->rcu); 5188 } 5189 5190 static inline void __rcu **mte_destroy_descend(struct maple_enode **enode, 5191 struct maple_tree *mt, struct maple_enode *prev, unsigned char offset) 5192 { 5193 struct maple_node *node; 5194 struct maple_enode *next = *enode; 5195 void __rcu **slots = NULL; 5196 enum maple_type type; 5197 unsigned char next_offset = 0; 5198 5199 do { 5200 *enode = next; 5201 node = mte_to_node(*enode); 5202 type = mte_node_type(*enode); 5203 slots = ma_slots(node, type); 5204 next = mt_slot_locked(mt, slots, next_offset); 5205 if ((mte_dead_node(next))) 5206 next = mt_slot_locked(mt, slots, ++next_offset); 5207 5208 mte_set_node_dead(*enode); 5209 node->type = type; 5210 node->piv_parent = prev; 5211 node->parent_slot = offset; 5212 offset = next_offset; 5213 next_offset = 0; 5214 prev = *enode; 5215 } while (!mte_is_leaf(next)); 5216 5217 return slots; 5218 } 5219 5220 static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, 5221 bool free) 5222 { 5223 void __rcu **slots; 5224 struct maple_node *node = mte_to_node(enode); 5225 struct maple_enode *start; 5226 5227 if (mte_is_leaf(enode)) { 5228 node->type = mte_node_type(enode); 5229 goto free_leaf; 5230 } 5231 5232 start = enode; 5233 slots = mte_destroy_descend(&enode, mt, start, 0); 5234 node = mte_to_node(enode); // Updated in the above call. 5235 do { 5236 enum maple_type type; 5237 unsigned char offset; 5238 struct maple_enode *parent, *tmp; 5239 5240 node->slot_len = mte_dead_leaves(enode, mt, slots); 5241 if (free) 5242 mt_free_bulk(node->slot_len, slots); 5243 offset = node->parent_slot + 1; 5244 enode = node->piv_parent; 5245 if (mte_to_node(enode) == node) 5246 goto free_leaf; 5247 5248 type = mte_node_type(enode); 5249 slots = ma_slots(mte_to_node(enode), type); 5250 if (offset >= mt_slots[type]) 5251 goto next; 5252 5253 tmp = mt_slot_locked(mt, slots, offset); 5254 if (mte_node_type(tmp) && mte_to_node(tmp)) { 5255 parent = enode; 5256 enode = tmp; 5257 slots = mte_destroy_descend(&enode, mt, parent, offset); 5258 } 5259 next: 5260 node = mte_to_node(enode); 5261 } while (start != enode); 5262 5263 node = mte_to_node(enode); 5264 node->slot_len = mte_dead_leaves(enode, mt, slots); 5265 if (free) 5266 mt_free_bulk(node->slot_len, slots); 5267 5268 free_leaf: 5269 if (free) 5270 mt_free_rcu(&node->rcu); 5271 else 5272 mt_clear_meta(mt, node, node->type); 5273 } 5274 5275 /* 5276 * mte_destroy_walk() - Free a tree or sub-tree. 5277 * @enode: the encoded maple node (maple_enode) to start 5278 * @mt: the tree to free - needed for node types. 5279 * 5280 * Must hold the write lock. 5281 */ 5282 static inline void mte_destroy_walk(struct maple_enode *enode, 5283 struct maple_tree *mt) 5284 { 5285 struct maple_node *node = mte_to_node(enode); 5286 5287 if (mt_in_rcu(mt)) { 5288 mt_destroy_walk(enode, mt, false); 5289 call_rcu(&node->rcu, mt_free_walk); 5290 } else { 5291 mt_destroy_walk(enode, mt, true); 5292 } 5293 } 5294 5295 static void mas_wr_store_setup(struct ma_wr_state *wr_mas) 5296 { 5297 if (mas_is_start(wr_mas->mas)) 5298 return; 5299 5300 if (unlikely(mas_is_paused(wr_mas->mas))) 5301 goto reset; 5302 5303 if (unlikely(mas_is_none(wr_mas->mas))) 5304 goto reset; 5305 5306 /* 5307 * A less strict version of mas_is_span_wr() where we allow spanning 5308 * writes within this node. This is to stop partial walks in 5309 * mas_prealloc() from being reset. 5310 */ 5311 if (wr_mas->mas->last > wr_mas->mas->max) 5312 goto reset; 5313 5314 if (wr_mas->entry) 5315 return; 5316 5317 if (mte_is_leaf(wr_mas->mas->node) && 5318 wr_mas->mas->last == wr_mas->mas->max) 5319 goto reset; 5320 5321 return; 5322 5323 reset: 5324 mas_reset(wr_mas->mas); 5325 } 5326 5327 /* Interface */ 5328 5329 /** 5330 * mas_store() - Store an @entry. 5331 * @mas: The maple state. 5332 * @entry: The entry to store. 5333 * 5334 * The @mas->index and @mas->last is used to set the range for the @entry. 5335 * Note: The @mas should have pre-allocated entries to ensure there is memory to 5336 * store the entry. Please see mas_expected_entries()/mas_destroy() for more details. 5337 * 5338 * Return: the first entry between mas->index and mas->last or %NULL. 5339 */ 5340 void *mas_store(struct ma_state *mas, void *entry) 5341 { 5342 MA_WR_STATE(wr_mas, mas, entry); 5343 5344 trace_ma_write(__func__, mas, 0, entry); 5345 #ifdef CONFIG_DEBUG_MAPLE_TREE 5346 if (MAS_WARN_ON(mas, mas->index > mas->last)) 5347 pr_err("Error %lX > %lX %p\n", mas->index, mas->last, entry); 5348 5349 if (mas->index > mas->last) { 5350 mas_set_err(mas, -EINVAL); 5351 return NULL; 5352 } 5353 5354 #endif 5355 5356 /* 5357 * Storing is the same operation as insert with the added caveat that it 5358 * can overwrite entries. Although this seems simple enough, one may 5359 * want to examine what happens if a single store operation was to 5360 * overwrite multiple entries within a self-balancing B-Tree. 5361 */ 5362 mas_wr_store_setup(&wr_mas); 5363 mas_wr_store_entry(&wr_mas); 5364 return wr_mas.content; 5365 } 5366 EXPORT_SYMBOL_GPL(mas_store); 5367 5368 /** 5369 * mas_store_gfp() - Store a value into the tree. 5370 * @mas: The maple state 5371 * @entry: The entry to store 5372 * @gfp: The GFP_FLAGS to use for allocations if necessary. 5373 * 5374 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 5375 * be allocated. 5376 */ 5377 int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp) 5378 { 5379 MA_WR_STATE(wr_mas, mas, entry); 5380 5381 mas_wr_store_setup(&wr_mas); 5382 trace_ma_write(__func__, mas, 0, entry); 5383 retry: 5384 mas_wr_store_entry(&wr_mas); 5385 if (unlikely(mas_nomem(mas, gfp))) 5386 goto retry; 5387 5388 if (unlikely(mas_is_err(mas))) 5389 return xa_err(mas->node); 5390 5391 return 0; 5392 } 5393 EXPORT_SYMBOL_GPL(mas_store_gfp); 5394 5395 /** 5396 * mas_store_prealloc() - Store a value into the tree using memory 5397 * preallocated in the maple state. 5398 * @mas: The maple state 5399 * @entry: The entry to store. 5400 */ 5401 void mas_store_prealloc(struct ma_state *mas, void *entry) 5402 { 5403 MA_WR_STATE(wr_mas, mas, entry); 5404 5405 mas_wr_store_setup(&wr_mas); 5406 trace_ma_write(__func__, mas, 0, entry); 5407 mas_wr_store_entry(&wr_mas); 5408 MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas)); 5409 mas_destroy(mas); 5410 } 5411 EXPORT_SYMBOL_GPL(mas_store_prealloc); 5412 5413 /** 5414 * mas_preallocate() - Preallocate enough nodes for a store operation 5415 * @mas: The maple state 5416 * @entry: The entry that will be stored 5417 * @gfp: The GFP_FLAGS to use for allocations. 5418 * 5419 * Return: 0 on success, -ENOMEM if memory could not be allocated. 5420 */ 5421 int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp) 5422 { 5423 MA_WR_STATE(wr_mas, mas, entry); 5424 unsigned char node_size; 5425 int request = 1; 5426 int ret; 5427 5428 5429 if (unlikely(!mas->index && mas->last == ULONG_MAX)) 5430 goto ask_now; 5431 5432 mas_wr_store_setup(&wr_mas); 5433 wr_mas.content = mas_start(mas); 5434 /* Root expand */ 5435 if (unlikely(mas_is_none(mas) || mas_is_ptr(mas))) 5436 goto ask_now; 5437 5438 if (unlikely(!mas_wr_walk(&wr_mas))) { 5439 /* Spanning store, use worst case for now */ 5440 request = 1 + mas_mt_height(mas) * 3; 5441 goto ask_now; 5442 } 5443 5444 /* At this point, we are at the leaf node that needs to be altered. */ 5445 /* Exact fit, no nodes needed. */ 5446 if (wr_mas.r_min == mas->index && wr_mas.r_max == mas->last) 5447 return 0; 5448 5449 mas_wr_end_piv(&wr_mas); 5450 node_size = mas_wr_new_end(&wr_mas); 5451 if (node_size >= mt_slots[wr_mas.type]) { 5452 /* Split, worst case for now. */ 5453 request = 1 + mas_mt_height(mas) * 2; 5454 goto ask_now; 5455 } 5456 5457 /* New root needs a singe node */ 5458 if (unlikely(mte_is_root(mas->node))) 5459 goto ask_now; 5460 5461 /* Potential spanning rebalance collapsing a node, use worst-case */ 5462 if (node_size - 1 <= mt_min_slots[wr_mas.type]) 5463 request = mas_mt_height(mas) * 2 - 1; 5464 5465 /* node store, slot store needs one node */ 5466 ask_now: 5467 mas_node_count_gfp(mas, request, gfp); 5468 mas->mas_flags |= MA_STATE_PREALLOC; 5469 if (likely(!mas_is_err(mas))) 5470 return 0; 5471 5472 mas_set_alloc_req(mas, 0); 5473 ret = xa_err(mas->node); 5474 mas_reset(mas); 5475 mas_destroy(mas); 5476 mas_reset(mas); 5477 return ret; 5478 } 5479 EXPORT_SYMBOL_GPL(mas_preallocate); 5480 5481 /* 5482 * mas_destroy() - destroy a maple state. 5483 * @mas: The maple state 5484 * 5485 * Upon completion, check the left-most node and rebalance against the node to 5486 * the right if necessary. Frees any allocated nodes associated with this maple 5487 * state. 5488 */ 5489 void mas_destroy(struct ma_state *mas) 5490 { 5491 struct maple_alloc *node; 5492 unsigned long total; 5493 5494 /* 5495 * When using mas_for_each() to insert an expected number of elements, 5496 * it is possible that the number inserted is less than the expected 5497 * number. To fix an invalid final node, a check is performed here to 5498 * rebalance the previous node with the final node. 5499 */ 5500 if (mas->mas_flags & MA_STATE_REBALANCE) { 5501 unsigned char end; 5502 5503 mas_start(mas); 5504 mtree_range_walk(mas); 5505 end = mas_data_end(mas) + 1; 5506 if (end < mt_min_slot_count(mas->node) - 1) 5507 mas_destroy_rebalance(mas, end); 5508 5509 mas->mas_flags &= ~MA_STATE_REBALANCE; 5510 } 5511 mas->mas_flags &= ~(MA_STATE_BULK|MA_STATE_PREALLOC); 5512 5513 total = mas_allocated(mas); 5514 while (total) { 5515 node = mas->alloc; 5516 mas->alloc = node->slot[0]; 5517 if (node->node_count > 1) { 5518 size_t count = node->node_count - 1; 5519 5520 mt_free_bulk(count, (void __rcu **)&node->slot[1]); 5521 total -= count; 5522 } 5523 kmem_cache_free(maple_node_cache, node); 5524 total--; 5525 } 5526 5527 mas->alloc = NULL; 5528 } 5529 EXPORT_SYMBOL_GPL(mas_destroy); 5530 5531 /* 5532 * mas_expected_entries() - Set the expected number of entries that will be inserted. 5533 * @mas: The maple state 5534 * @nr_entries: The number of expected entries. 5535 * 5536 * This will attempt to pre-allocate enough nodes to store the expected number 5537 * of entries. The allocations will occur using the bulk allocator interface 5538 * for speed. Please call mas_destroy() on the @mas after inserting the entries 5539 * to ensure any unused nodes are freed. 5540 * 5541 * Return: 0 on success, -ENOMEM if memory could not be allocated. 5542 */ 5543 int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries) 5544 { 5545 int nonleaf_cap = MAPLE_ARANGE64_SLOTS - 2; 5546 struct maple_enode *enode = mas->node; 5547 int nr_nodes; 5548 int ret; 5549 5550 /* 5551 * Sometimes it is necessary to duplicate a tree to a new tree, such as 5552 * forking a process and duplicating the VMAs from one tree to a new 5553 * tree. When such a situation arises, it is known that the new tree is 5554 * not going to be used until the entire tree is populated. For 5555 * performance reasons, it is best to use a bulk load with RCU disabled. 5556 * This allows for optimistic splitting that favours the left and reuse 5557 * of nodes during the operation. 5558 */ 5559 5560 /* Optimize splitting for bulk insert in-order */ 5561 mas->mas_flags |= MA_STATE_BULK; 5562 5563 /* 5564 * Avoid overflow, assume a gap between each entry and a trailing null. 5565 * If this is wrong, it just means allocation can happen during 5566 * insertion of entries. 5567 */ 5568 nr_nodes = max(nr_entries, nr_entries * 2 + 1); 5569 if (!mt_is_alloc(mas->tree)) 5570 nonleaf_cap = MAPLE_RANGE64_SLOTS - 2; 5571 5572 /* Leaves; reduce slots to keep space for expansion */ 5573 nr_nodes = DIV_ROUND_UP(nr_nodes, MAPLE_RANGE64_SLOTS - 2); 5574 /* Internal nodes */ 5575 nr_nodes += DIV_ROUND_UP(nr_nodes, nonleaf_cap); 5576 /* Add working room for split (2 nodes) + new parents */ 5577 mas_node_count(mas, nr_nodes + 3); 5578 5579 /* Detect if allocations run out */ 5580 mas->mas_flags |= MA_STATE_PREALLOC; 5581 5582 if (!mas_is_err(mas)) 5583 return 0; 5584 5585 ret = xa_err(mas->node); 5586 mas->node = enode; 5587 mas_destroy(mas); 5588 return ret; 5589 5590 } 5591 EXPORT_SYMBOL_GPL(mas_expected_entries); 5592 5593 static inline bool mas_next_setup(struct ma_state *mas, unsigned long max, 5594 void **entry) 5595 { 5596 bool was_none = mas_is_none(mas); 5597 5598 if (mas_is_none(mas) || mas_is_paused(mas)) 5599 mas->node = MAS_START; 5600 5601 if (mas_is_start(mas)) 5602 *entry = mas_walk(mas); /* Retries on dead nodes handled by mas_walk */ 5603 5604 if (mas_is_ptr(mas)) { 5605 *entry = NULL; 5606 if (was_none && mas->index == 0) { 5607 mas->index = mas->last = 0; 5608 return true; 5609 } 5610 mas->index = 1; 5611 mas->last = ULONG_MAX; 5612 mas->node = MAS_NONE; 5613 return true; 5614 } 5615 5616 if (mas_is_none(mas)) 5617 return true; 5618 return false; 5619 } 5620 5621 /** 5622 * mas_next() - Get the next entry. 5623 * @mas: The maple state 5624 * @max: The maximum index to check. 5625 * 5626 * Returns the next entry after @mas->index. 5627 * Must hold rcu_read_lock or the write lock. 5628 * Can return the zero entry. 5629 * 5630 * Return: The next entry or %NULL 5631 */ 5632 void *mas_next(struct ma_state *mas, unsigned long max) 5633 { 5634 void *entry = NULL; 5635 5636 if (mas_next_setup(mas, max, &entry)) 5637 return entry; 5638 5639 /* Retries on dead nodes handled by mas_next_slot */ 5640 return mas_next_slot(mas, max, false); 5641 } 5642 EXPORT_SYMBOL_GPL(mas_next); 5643 5644 /** 5645 * mas_next_range() - Advance the maple state to the next range 5646 * @mas: The maple state 5647 * @max: The maximum index to check. 5648 * 5649 * Sets @mas->index and @mas->last to the range. 5650 * Must hold rcu_read_lock or the write lock. 5651 * Can return the zero entry. 5652 * 5653 * Return: The next entry or %NULL 5654 */ 5655 void *mas_next_range(struct ma_state *mas, unsigned long max) 5656 { 5657 void *entry = NULL; 5658 5659 if (mas_next_setup(mas, max, &entry)) 5660 return entry; 5661 5662 /* Retries on dead nodes handled by mas_next_slot */ 5663 return mas_next_slot(mas, max, true); 5664 } 5665 EXPORT_SYMBOL_GPL(mas_next_range); 5666 5667 /** 5668 * mt_next() - get the next value in the maple tree 5669 * @mt: The maple tree 5670 * @index: The start index 5671 * @max: The maximum index to check 5672 * 5673 * Takes RCU read lock internally to protect the search, which does not 5674 * protect the returned pointer after dropping RCU read lock. 5675 * See also: Documentation/core-api/maple_tree.rst 5676 * 5677 * Return: The entry higher than @index or %NULL if nothing is found. 5678 */ 5679 void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max) 5680 { 5681 void *entry = NULL; 5682 MA_STATE(mas, mt, index, index); 5683 5684 rcu_read_lock(); 5685 entry = mas_next(&mas, max); 5686 rcu_read_unlock(); 5687 return entry; 5688 } 5689 EXPORT_SYMBOL_GPL(mt_next); 5690 5691 static inline bool mas_prev_setup(struct ma_state *mas, unsigned long min, 5692 void **entry) 5693 { 5694 if (mas->index <= min) 5695 goto none; 5696 5697 if (mas_is_none(mas) || mas_is_paused(mas)) 5698 mas->node = MAS_START; 5699 5700 if (mas_is_start(mas)) { 5701 mas_walk(mas); 5702 if (!mas->index) 5703 goto none; 5704 } 5705 5706 if (unlikely(mas_is_ptr(mas))) { 5707 if (!mas->index) 5708 goto none; 5709 mas->index = mas->last = 0; 5710 *entry = mas_root(mas); 5711 return true; 5712 } 5713 5714 if (mas_is_none(mas)) { 5715 if (mas->index) { 5716 /* Walked to out-of-range pointer? */ 5717 mas->index = mas->last = 0; 5718 mas->node = MAS_ROOT; 5719 *entry = mas_root(mas); 5720 return true; 5721 } 5722 return true; 5723 } 5724 5725 return false; 5726 5727 none: 5728 mas->node = MAS_NONE; 5729 return true; 5730 } 5731 5732 /** 5733 * mas_prev() - Get the previous entry 5734 * @mas: The maple state 5735 * @min: The minimum value to check. 5736 * 5737 * Must hold rcu_read_lock or the write lock. 5738 * Will reset mas to MAS_START if the node is MAS_NONE. Will stop on not 5739 * searchable nodes. 5740 * 5741 * Return: the previous value or %NULL. 5742 */ 5743 void *mas_prev(struct ma_state *mas, unsigned long min) 5744 { 5745 void *entry = NULL; 5746 5747 if (mas_prev_setup(mas, min, &entry)) 5748 return entry; 5749 5750 return mas_prev_slot(mas, min, false); 5751 } 5752 EXPORT_SYMBOL_GPL(mas_prev); 5753 5754 /** 5755 * mas_prev_range() - Advance to the previous range 5756 * @mas: The maple state 5757 * @min: The minimum value to check. 5758 * 5759 * Sets @mas->index and @mas->last to the range. 5760 * Must hold rcu_read_lock or the write lock. 5761 * Will reset mas to MAS_START if the node is MAS_NONE. Will stop on not 5762 * searchable nodes. 5763 * 5764 * Return: the previous value or %NULL. 5765 */ 5766 void *mas_prev_range(struct ma_state *mas, unsigned long min) 5767 { 5768 void *entry = NULL; 5769 5770 if (mas_prev_setup(mas, min, &entry)) 5771 return entry; 5772 5773 return mas_prev_slot(mas, min, true); 5774 } 5775 EXPORT_SYMBOL_GPL(mas_prev_range); 5776 5777 /** 5778 * mt_prev() - get the previous value in the maple tree 5779 * @mt: The maple tree 5780 * @index: The start index 5781 * @min: The minimum index to check 5782 * 5783 * Takes RCU read lock internally to protect the search, which does not 5784 * protect the returned pointer after dropping RCU read lock. 5785 * See also: Documentation/core-api/maple_tree.rst 5786 * 5787 * Return: The entry before @index or %NULL if nothing is found. 5788 */ 5789 void *mt_prev(struct maple_tree *mt, unsigned long index, unsigned long min) 5790 { 5791 void *entry = NULL; 5792 MA_STATE(mas, mt, index, index); 5793 5794 rcu_read_lock(); 5795 entry = mas_prev(&mas, min); 5796 rcu_read_unlock(); 5797 return entry; 5798 } 5799 EXPORT_SYMBOL_GPL(mt_prev); 5800 5801 /** 5802 * mas_pause() - Pause a mas_find/mas_for_each to drop the lock. 5803 * @mas: The maple state to pause 5804 * 5805 * Some users need to pause a walk and drop the lock they're holding in 5806 * order to yield to a higher priority thread or carry out an operation 5807 * on an entry. Those users should call this function before they drop 5808 * the lock. It resets the @mas to be suitable for the next iteration 5809 * of the loop after the user has reacquired the lock. If most entries 5810 * found during a walk require you to call mas_pause(), the mt_for_each() 5811 * iterator may be more appropriate. 5812 * 5813 */ 5814 void mas_pause(struct ma_state *mas) 5815 { 5816 mas->node = MAS_PAUSE; 5817 } 5818 EXPORT_SYMBOL_GPL(mas_pause); 5819 5820 /** 5821 * mas_find_setup() - Internal function to set up mas_find*(). 5822 * @mas: The maple state 5823 * @max: The maximum index 5824 * @entry: Pointer to the entry 5825 * 5826 * Returns: True if entry is the answer, false otherwise. 5827 */ 5828 static inline bool mas_find_setup(struct ma_state *mas, unsigned long max, 5829 void **entry) 5830 { 5831 *entry = NULL; 5832 5833 if (unlikely(mas_is_none(mas))) { 5834 if (unlikely(mas->last >= max)) 5835 return true; 5836 5837 mas->index = mas->last; 5838 mas->node = MAS_START; 5839 } else if (unlikely(mas_is_paused(mas))) { 5840 if (unlikely(mas->last >= max)) 5841 return true; 5842 5843 mas->node = MAS_START; 5844 mas->index = ++mas->last; 5845 } else if (unlikely(mas_is_ptr(mas))) 5846 goto ptr_out_of_range; 5847 5848 if (unlikely(mas_is_start(mas))) { 5849 /* First run or continue */ 5850 if (mas->index > max) 5851 return true; 5852 5853 *entry = mas_walk(mas); 5854 if (*entry) 5855 return true; 5856 5857 } 5858 5859 if (unlikely(!mas_searchable(mas))) { 5860 if (unlikely(mas_is_ptr(mas))) 5861 goto ptr_out_of_range; 5862 5863 return true; 5864 } 5865 5866 if (mas->index == max) 5867 return true; 5868 5869 return false; 5870 5871 ptr_out_of_range: 5872 mas->node = MAS_NONE; 5873 mas->index = 1; 5874 mas->last = ULONG_MAX; 5875 return true; 5876 } 5877 5878 /** 5879 * mas_find() - On the first call, find the entry at or after mas->index up to 5880 * %max. Otherwise, find the entry after mas->index. 5881 * @mas: The maple state 5882 * @max: The maximum value to check. 5883 * 5884 * Must hold rcu_read_lock or the write lock. 5885 * If an entry exists, last and index are updated accordingly. 5886 * May set @mas->node to MAS_NONE. 5887 * 5888 * Return: The entry or %NULL. 5889 */ 5890 void *mas_find(struct ma_state *mas, unsigned long max) 5891 { 5892 void *entry = NULL; 5893 5894 if (mas_find_setup(mas, max, &entry)) 5895 return entry; 5896 5897 /* Retries on dead nodes handled by mas_next_slot */ 5898 return mas_next_slot(mas, max, false); 5899 } 5900 EXPORT_SYMBOL_GPL(mas_find); 5901 5902 /** 5903 * mas_find_range() - On the first call, find the entry at or after 5904 * mas->index up to %max. Otherwise, advance to the next slot mas->index. 5905 * @mas: The maple state 5906 * @max: The maximum value to check. 5907 * 5908 * Must hold rcu_read_lock or the write lock. 5909 * If an entry exists, last and index are updated accordingly. 5910 * May set @mas->node to MAS_NONE. 5911 * 5912 * Return: The entry or %NULL. 5913 */ 5914 void *mas_find_range(struct ma_state *mas, unsigned long max) 5915 { 5916 void *entry; 5917 5918 if (mas_find_setup(mas, max, &entry)) 5919 return entry; 5920 5921 /* Retries on dead nodes handled by mas_next_slot */ 5922 return mas_next_slot(mas, max, true); 5923 } 5924 EXPORT_SYMBOL_GPL(mas_find_range); 5925 5926 /** 5927 * mas_find_rev_setup() - Internal function to set up mas_find_*_rev() 5928 * @mas: The maple state 5929 * @min: The minimum index 5930 * @entry: Pointer to the entry 5931 * 5932 * Returns: True if entry is the answer, false otherwise. 5933 */ 5934 static inline bool mas_find_rev_setup(struct ma_state *mas, unsigned long min, 5935 void **entry) 5936 { 5937 *entry = NULL; 5938 5939 if (unlikely(mas_is_none(mas))) { 5940 if (mas->index <= min) 5941 goto none; 5942 5943 mas->last = mas->index; 5944 mas->node = MAS_START; 5945 } 5946 5947 if (unlikely(mas_is_paused(mas))) { 5948 if (unlikely(mas->index <= min)) { 5949 mas->node = MAS_NONE; 5950 return true; 5951 } 5952 mas->node = MAS_START; 5953 mas->last = --mas->index; 5954 } 5955 5956 if (unlikely(mas_is_start(mas))) { 5957 /* First run or continue */ 5958 if (mas->index < min) 5959 return true; 5960 5961 *entry = mas_walk(mas); 5962 if (*entry) 5963 return true; 5964 } 5965 5966 if (unlikely(!mas_searchable(mas))) { 5967 if (mas_is_ptr(mas)) 5968 goto none; 5969 5970 if (mas_is_none(mas)) { 5971 /* 5972 * Walked to the location, and there was nothing so the 5973 * previous location is 0. 5974 */ 5975 mas->last = mas->index = 0; 5976 mas->node = MAS_ROOT; 5977 *entry = mas_root(mas); 5978 return true; 5979 } 5980 } 5981 5982 if (mas->index < min) 5983 return true; 5984 5985 return false; 5986 5987 none: 5988 mas->node = MAS_NONE; 5989 return true; 5990 } 5991 5992 /** 5993 * mas_find_rev: On the first call, find the first non-null entry at or below 5994 * mas->index down to %min. Otherwise find the first non-null entry below 5995 * mas->index down to %min. 5996 * @mas: The maple state 5997 * @min: The minimum value to check. 5998 * 5999 * Must hold rcu_read_lock or the write lock. 6000 * If an entry exists, last and index are updated accordingly. 6001 * May set @mas->node to MAS_NONE. 6002 * 6003 * Return: The entry or %NULL. 6004 */ 6005 void *mas_find_rev(struct ma_state *mas, unsigned long min) 6006 { 6007 void *entry; 6008 6009 if (mas_find_rev_setup(mas, min, &entry)) 6010 return entry; 6011 6012 /* Retries on dead nodes handled by mas_prev_slot */ 6013 return mas_prev_slot(mas, min, false); 6014 6015 } 6016 EXPORT_SYMBOL_GPL(mas_find_rev); 6017 6018 /** 6019 * mas_find_range_rev: On the first call, find the first non-null entry at or 6020 * below mas->index down to %min. Otherwise advance to the previous slot after 6021 * mas->index down to %min. 6022 * @mas: The maple state 6023 * @min: The minimum value to check. 6024 * 6025 * Must hold rcu_read_lock or the write lock. 6026 * If an entry exists, last and index are updated accordingly. 6027 * May set @mas->node to MAS_NONE. 6028 * 6029 * Return: The entry or %NULL. 6030 */ 6031 void *mas_find_range_rev(struct ma_state *mas, unsigned long min) 6032 { 6033 void *entry; 6034 6035 if (mas_find_rev_setup(mas, min, &entry)) 6036 return entry; 6037 6038 /* Retries on dead nodes handled by mas_prev_slot */ 6039 return mas_prev_slot(mas, min, true); 6040 } 6041 EXPORT_SYMBOL_GPL(mas_find_range_rev); 6042 6043 /** 6044 * mas_erase() - Find the range in which index resides and erase the entire 6045 * range. 6046 * @mas: The maple state 6047 * 6048 * Must hold the write lock. 6049 * Searches for @mas->index, sets @mas->index and @mas->last to the range and 6050 * erases that range. 6051 * 6052 * Return: the entry that was erased or %NULL, @mas->index and @mas->last are updated. 6053 */ 6054 void *mas_erase(struct ma_state *mas) 6055 { 6056 void *entry; 6057 MA_WR_STATE(wr_mas, mas, NULL); 6058 6059 if (mas_is_none(mas) || mas_is_paused(mas)) 6060 mas->node = MAS_START; 6061 6062 /* Retry unnecessary when holding the write lock. */ 6063 entry = mas_state_walk(mas); 6064 if (!entry) 6065 return NULL; 6066 6067 write_retry: 6068 /* Must reset to ensure spanning writes of last slot are detected */ 6069 mas_reset(mas); 6070 mas_wr_store_setup(&wr_mas); 6071 mas_wr_store_entry(&wr_mas); 6072 if (mas_nomem(mas, GFP_KERNEL)) 6073 goto write_retry; 6074 6075 return entry; 6076 } 6077 EXPORT_SYMBOL_GPL(mas_erase); 6078 6079 /** 6080 * mas_nomem() - Check if there was an error allocating and do the allocation 6081 * if necessary If there are allocations, then free them. 6082 * @mas: The maple state 6083 * @gfp: The GFP_FLAGS to use for allocations 6084 * Return: true on allocation, false otherwise. 6085 */ 6086 bool mas_nomem(struct ma_state *mas, gfp_t gfp) 6087 __must_hold(mas->tree->ma_lock) 6088 { 6089 if (likely(mas->node != MA_ERROR(-ENOMEM))) { 6090 mas_destroy(mas); 6091 return false; 6092 } 6093 6094 if (gfpflags_allow_blocking(gfp) && !mt_external_lock(mas->tree)) { 6095 mtree_unlock(mas->tree); 6096 mas_alloc_nodes(mas, gfp); 6097 mtree_lock(mas->tree); 6098 } else { 6099 mas_alloc_nodes(mas, gfp); 6100 } 6101 6102 if (!mas_allocated(mas)) 6103 return false; 6104 6105 mas->node = MAS_START; 6106 return true; 6107 } 6108 6109 void __init maple_tree_init(void) 6110 { 6111 maple_node_cache = kmem_cache_create("maple_node", 6112 sizeof(struct maple_node), sizeof(struct maple_node), 6113 SLAB_PANIC, NULL); 6114 } 6115 6116 /** 6117 * mtree_load() - Load a value stored in a maple tree 6118 * @mt: The maple tree 6119 * @index: The index to load 6120 * 6121 * Return: the entry or %NULL 6122 */ 6123 void *mtree_load(struct maple_tree *mt, unsigned long index) 6124 { 6125 MA_STATE(mas, mt, index, index); 6126 void *entry; 6127 6128 trace_ma_read(__func__, &mas); 6129 rcu_read_lock(); 6130 retry: 6131 entry = mas_start(&mas); 6132 if (unlikely(mas_is_none(&mas))) 6133 goto unlock; 6134 6135 if (unlikely(mas_is_ptr(&mas))) { 6136 if (index) 6137 entry = NULL; 6138 6139 goto unlock; 6140 } 6141 6142 entry = mtree_lookup_walk(&mas); 6143 if (!entry && unlikely(mas_is_start(&mas))) 6144 goto retry; 6145 unlock: 6146 rcu_read_unlock(); 6147 if (xa_is_zero(entry)) 6148 return NULL; 6149 6150 return entry; 6151 } 6152 EXPORT_SYMBOL(mtree_load); 6153 6154 /** 6155 * mtree_store_range() - Store an entry at a given range. 6156 * @mt: The maple tree 6157 * @index: The start of the range 6158 * @last: The end of the range 6159 * @entry: The entry to store 6160 * @gfp: The GFP_FLAGS to use for allocations 6161 * 6162 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 6163 * be allocated. 6164 */ 6165 int mtree_store_range(struct maple_tree *mt, unsigned long index, 6166 unsigned long last, void *entry, gfp_t gfp) 6167 { 6168 MA_STATE(mas, mt, index, last); 6169 MA_WR_STATE(wr_mas, &mas, entry); 6170 6171 trace_ma_write(__func__, &mas, 0, entry); 6172 if (WARN_ON_ONCE(xa_is_advanced(entry))) 6173 return -EINVAL; 6174 6175 if (index > last) 6176 return -EINVAL; 6177 6178 mtree_lock(mt); 6179 retry: 6180 mas_wr_store_entry(&wr_mas); 6181 if (mas_nomem(&mas, gfp)) 6182 goto retry; 6183 6184 mtree_unlock(mt); 6185 if (mas_is_err(&mas)) 6186 return xa_err(mas.node); 6187 6188 return 0; 6189 } 6190 EXPORT_SYMBOL(mtree_store_range); 6191 6192 /** 6193 * mtree_store() - Store an entry at a given index. 6194 * @mt: The maple tree 6195 * @index: The index to store the value 6196 * @entry: The entry to store 6197 * @gfp: The GFP_FLAGS to use for allocations 6198 * 6199 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 6200 * be allocated. 6201 */ 6202 int mtree_store(struct maple_tree *mt, unsigned long index, void *entry, 6203 gfp_t gfp) 6204 { 6205 return mtree_store_range(mt, index, index, entry, gfp); 6206 } 6207 EXPORT_SYMBOL(mtree_store); 6208 6209 /** 6210 * mtree_insert_range() - Insert an entry at a given range if there is no value. 6211 * @mt: The maple tree 6212 * @first: The start of the range 6213 * @last: The end of the range 6214 * @entry: The entry to store 6215 * @gfp: The GFP_FLAGS to use for allocations. 6216 * 6217 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid 6218 * request, -ENOMEM if memory could not be allocated. 6219 */ 6220 int mtree_insert_range(struct maple_tree *mt, unsigned long first, 6221 unsigned long last, void *entry, gfp_t gfp) 6222 { 6223 MA_STATE(ms, mt, first, last); 6224 6225 if (WARN_ON_ONCE(xa_is_advanced(entry))) 6226 return -EINVAL; 6227 6228 if (first > last) 6229 return -EINVAL; 6230 6231 mtree_lock(mt); 6232 retry: 6233 mas_insert(&ms, entry); 6234 if (mas_nomem(&ms, gfp)) 6235 goto retry; 6236 6237 mtree_unlock(mt); 6238 if (mas_is_err(&ms)) 6239 return xa_err(ms.node); 6240 6241 return 0; 6242 } 6243 EXPORT_SYMBOL(mtree_insert_range); 6244 6245 /** 6246 * mtree_insert() - Insert an entry at a given index if there is no value. 6247 * @mt: The maple tree 6248 * @index : The index to store the value 6249 * @entry: The entry to store 6250 * @gfp: The GFP_FLAGS to use for allocations. 6251 * 6252 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid 6253 * request, -ENOMEM if memory could not be allocated. 6254 */ 6255 int mtree_insert(struct maple_tree *mt, unsigned long index, void *entry, 6256 gfp_t gfp) 6257 { 6258 return mtree_insert_range(mt, index, index, entry, gfp); 6259 } 6260 EXPORT_SYMBOL(mtree_insert); 6261 6262 int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp, 6263 void *entry, unsigned long size, unsigned long min, 6264 unsigned long max, gfp_t gfp) 6265 { 6266 int ret = 0; 6267 6268 MA_STATE(mas, mt, 0, 0); 6269 if (!mt_is_alloc(mt)) 6270 return -EINVAL; 6271 6272 if (WARN_ON_ONCE(mt_is_reserved(entry))) 6273 return -EINVAL; 6274 6275 mtree_lock(mt); 6276 retry: 6277 ret = mas_empty_area(&mas, min, max, size); 6278 if (ret) 6279 goto unlock; 6280 6281 mas_insert(&mas, entry); 6282 /* 6283 * mas_nomem() may release the lock, causing the allocated area 6284 * to be unavailable, so try to allocate a free area again. 6285 */ 6286 if (mas_nomem(&mas, gfp)) 6287 goto retry; 6288 6289 if (mas_is_err(&mas)) 6290 ret = xa_err(mas.node); 6291 else 6292 *startp = mas.index; 6293 6294 unlock: 6295 mtree_unlock(mt); 6296 return ret; 6297 } 6298 EXPORT_SYMBOL(mtree_alloc_range); 6299 6300 int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp, 6301 void *entry, unsigned long size, unsigned long min, 6302 unsigned long max, gfp_t gfp) 6303 { 6304 int ret = 0; 6305 6306 MA_STATE(mas, mt, 0, 0); 6307 if (!mt_is_alloc(mt)) 6308 return -EINVAL; 6309 6310 if (WARN_ON_ONCE(mt_is_reserved(entry))) 6311 return -EINVAL; 6312 6313 mtree_lock(mt); 6314 retry: 6315 ret = mas_empty_area_rev(&mas, min, max, size); 6316 if (ret) 6317 goto unlock; 6318 6319 mas_insert(&mas, entry); 6320 /* 6321 * mas_nomem() may release the lock, causing the allocated area 6322 * to be unavailable, so try to allocate a free area again. 6323 */ 6324 if (mas_nomem(&mas, gfp)) 6325 goto retry; 6326 6327 if (mas_is_err(&mas)) 6328 ret = xa_err(mas.node); 6329 else 6330 *startp = mas.index; 6331 6332 unlock: 6333 mtree_unlock(mt); 6334 return ret; 6335 } 6336 EXPORT_SYMBOL(mtree_alloc_rrange); 6337 6338 /** 6339 * mtree_erase() - Find an index and erase the entire range. 6340 * @mt: The maple tree 6341 * @index: The index to erase 6342 * 6343 * Erasing is the same as a walk to an entry then a store of a NULL to that 6344 * ENTIRE range. In fact, it is implemented as such using the advanced API. 6345 * 6346 * Return: The entry stored at the @index or %NULL 6347 */ 6348 void *mtree_erase(struct maple_tree *mt, unsigned long index) 6349 { 6350 void *entry = NULL; 6351 6352 MA_STATE(mas, mt, index, index); 6353 trace_ma_op(__func__, &mas); 6354 6355 mtree_lock(mt); 6356 entry = mas_erase(&mas); 6357 mtree_unlock(mt); 6358 6359 return entry; 6360 } 6361 EXPORT_SYMBOL(mtree_erase); 6362 6363 /** 6364 * __mt_destroy() - Walk and free all nodes of a locked maple tree. 6365 * @mt: The maple tree 6366 * 6367 * Note: Does not handle locking. 6368 */ 6369 void __mt_destroy(struct maple_tree *mt) 6370 { 6371 void *root = mt_root_locked(mt); 6372 6373 rcu_assign_pointer(mt->ma_root, NULL); 6374 if (xa_is_node(root)) 6375 mte_destroy_walk(root, mt); 6376 6377 mt->ma_flags = 0; 6378 } 6379 EXPORT_SYMBOL_GPL(__mt_destroy); 6380 6381 /** 6382 * mtree_destroy() - Destroy a maple tree 6383 * @mt: The maple tree 6384 * 6385 * Frees all resources used by the tree. Handles locking. 6386 */ 6387 void mtree_destroy(struct maple_tree *mt) 6388 { 6389 mtree_lock(mt); 6390 __mt_destroy(mt); 6391 mtree_unlock(mt); 6392 } 6393 EXPORT_SYMBOL(mtree_destroy); 6394 6395 /** 6396 * mt_find() - Search from the start up until an entry is found. 6397 * @mt: The maple tree 6398 * @index: Pointer which contains the start location of the search 6399 * @max: The maximum value of the search range 6400 * 6401 * Takes RCU read lock internally to protect the search, which does not 6402 * protect the returned pointer after dropping RCU read lock. 6403 * See also: Documentation/core-api/maple_tree.rst 6404 * 6405 * In case that an entry is found @index is updated to point to the next 6406 * possible entry independent whether the found entry is occupying a 6407 * single index or a range if indices. 6408 * 6409 * Return: The entry at or after the @index or %NULL 6410 */ 6411 void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max) 6412 { 6413 MA_STATE(mas, mt, *index, *index); 6414 void *entry; 6415 #ifdef CONFIG_DEBUG_MAPLE_TREE 6416 unsigned long copy = *index; 6417 #endif 6418 6419 trace_ma_read(__func__, &mas); 6420 6421 if ((*index) > max) 6422 return NULL; 6423 6424 rcu_read_lock(); 6425 retry: 6426 entry = mas_state_walk(&mas); 6427 if (mas_is_start(&mas)) 6428 goto retry; 6429 6430 if (unlikely(xa_is_zero(entry))) 6431 entry = NULL; 6432 6433 if (entry) 6434 goto unlock; 6435 6436 while (mas_searchable(&mas) && (mas.last < max)) { 6437 entry = mas_next_entry(&mas, max); 6438 if (likely(entry && !xa_is_zero(entry))) 6439 break; 6440 } 6441 6442 if (unlikely(xa_is_zero(entry))) 6443 entry = NULL; 6444 unlock: 6445 rcu_read_unlock(); 6446 if (likely(entry)) { 6447 *index = mas.last + 1; 6448 #ifdef CONFIG_DEBUG_MAPLE_TREE 6449 if (MT_WARN_ON(mt, (*index) && ((*index) <= copy))) 6450 pr_err("index not increased! %lx <= %lx\n", 6451 *index, copy); 6452 #endif 6453 } 6454 6455 return entry; 6456 } 6457 EXPORT_SYMBOL(mt_find); 6458 6459 /** 6460 * mt_find_after() - Search from the start up until an entry is found. 6461 * @mt: The maple tree 6462 * @index: Pointer which contains the start location of the search 6463 * @max: The maximum value to check 6464 * 6465 * Same as mt_find() except that it checks @index for 0 before 6466 * searching. If @index == 0, the search is aborted. This covers a wrap 6467 * around of @index to 0 in an iterator loop. 6468 * 6469 * Return: The entry at or after the @index or %NULL 6470 */ 6471 void *mt_find_after(struct maple_tree *mt, unsigned long *index, 6472 unsigned long max) 6473 { 6474 if (!(*index)) 6475 return NULL; 6476 6477 return mt_find(mt, index, max); 6478 } 6479 EXPORT_SYMBOL(mt_find_after); 6480 6481 #ifdef CONFIG_DEBUG_MAPLE_TREE 6482 atomic_t maple_tree_tests_run; 6483 EXPORT_SYMBOL_GPL(maple_tree_tests_run); 6484 atomic_t maple_tree_tests_passed; 6485 EXPORT_SYMBOL_GPL(maple_tree_tests_passed); 6486 6487 #ifndef __KERNEL__ 6488 extern void kmem_cache_set_non_kernel(struct kmem_cache *, unsigned int); 6489 void mt_set_non_kernel(unsigned int val) 6490 { 6491 kmem_cache_set_non_kernel(maple_node_cache, val); 6492 } 6493 6494 extern unsigned long kmem_cache_get_alloc(struct kmem_cache *); 6495 unsigned long mt_get_alloc_size(void) 6496 { 6497 return kmem_cache_get_alloc(maple_node_cache); 6498 } 6499 6500 extern void kmem_cache_zero_nr_tallocated(struct kmem_cache *); 6501 void mt_zero_nr_tallocated(void) 6502 { 6503 kmem_cache_zero_nr_tallocated(maple_node_cache); 6504 } 6505 6506 extern unsigned int kmem_cache_nr_tallocated(struct kmem_cache *); 6507 unsigned int mt_nr_tallocated(void) 6508 { 6509 return kmem_cache_nr_tallocated(maple_node_cache); 6510 } 6511 6512 extern unsigned int kmem_cache_nr_allocated(struct kmem_cache *); 6513 unsigned int mt_nr_allocated(void) 6514 { 6515 return kmem_cache_nr_allocated(maple_node_cache); 6516 } 6517 6518 /* 6519 * mas_dead_node() - Check if the maple state is pointing to a dead node. 6520 * @mas: The maple state 6521 * @index: The index to restore in @mas. 6522 * 6523 * Used in test code. 6524 * Return: 1 if @mas has been reset to MAS_START, 0 otherwise. 6525 */ 6526 static inline int mas_dead_node(struct ma_state *mas, unsigned long index) 6527 { 6528 if (unlikely(!mas_searchable(mas) || mas_is_start(mas))) 6529 return 0; 6530 6531 if (likely(!mte_dead_node(mas->node))) 6532 return 0; 6533 6534 mas_rewalk(mas, index); 6535 return 1; 6536 } 6537 6538 void mt_cache_shrink(void) 6539 { 6540 } 6541 #else 6542 /* 6543 * mt_cache_shrink() - For testing, don't use this. 6544 * 6545 * Certain testcases can trigger an OOM when combined with other memory 6546 * debugging configuration options. This function is used to reduce the 6547 * possibility of an out of memory even due to kmem_cache objects remaining 6548 * around for longer than usual. 6549 */ 6550 void mt_cache_shrink(void) 6551 { 6552 kmem_cache_shrink(maple_node_cache); 6553 6554 } 6555 EXPORT_SYMBOL_GPL(mt_cache_shrink); 6556 6557 #endif /* not defined __KERNEL__ */ 6558 /* 6559 * mas_get_slot() - Get the entry in the maple state node stored at @offset. 6560 * @mas: The maple state 6561 * @offset: The offset into the slot array to fetch. 6562 * 6563 * Return: The entry stored at @offset. 6564 */ 6565 static inline struct maple_enode *mas_get_slot(struct ma_state *mas, 6566 unsigned char offset) 6567 { 6568 return mas_slot(mas, ma_slots(mas_mn(mas), mte_node_type(mas->node)), 6569 offset); 6570 } 6571 6572 /* Depth first search, post-order */ 6573 static void mas_dfs_postorder(struct ma_state *mas, unsigned long max) 6574 { 6575 6576 struct maple_enode *p = MAS_NONE, *mn = mas->node; 6577 unsigned long p_min, p_max; 6578 6579 mas_next_node(mas, mas_mn(mas), max); 6580 if (!mas_is_none(mas)) 6581 return; 6582 6583 if (mte_is_root(mn)) 6584 return; 6585 6586 mas->node = mn; 6587 mas_ascend(mas); 6588 do { 6589 p = mas->node; 6590 p_min = mas->min; 6591 p_max = mas->max; 6592 mas_prev_node(mas, 0); 6593 } while (!mas_is_none(mas)); 6594 6595 mas->node = p; 6596 mas->max = p_max; 6597 mas->min = p_min; 6598 } 6599 6600 /* Tree validations */ 6601 static void mt_dump_node(const struct maple_tree *mt, void *entry, 6602 unsigned long min, unsigned long max, unsigned int depth, 6603 enum mt_dump_format format); 6604 static void mt_dump_range(unsigned long min, unsigned long max, 6605 unsigned int depth, enum mt_dump_format format) 6606 { 6607 static const char spaces[] = " "; 6608 6609 switch(format) { 6610 case mt_dump_hex: 6611 if (min == max) 6612 pr_info("%.*s%lx: ", depth * 2, spaces, min); 6613 else 6614 pr_info("%.*s%lx-%lx: ", depth * 2, spaces, min, max); 6615 break; 6616 default: 6617 case mt_dump_dec: 6618 if (min == max) 6619 pr_info("%.*s%lu: ", depth * 2, spaces, min); 6620 else 6621 pr_info("%.*s%lu-%lu: ", depth * 2, spaces, min, max); 6622 } 6623 } 6624 6625 static void mt_dump_entry(void *entry, unsigned long min, unsigned long max, 6626 unsigned int depth, enum mt_dump_format format) 6627 { 6628 mt_dump_range(min, max, depth, format); 6629 6630 if (xa_is_value(entry)) 6631 pr_cont("value %ld (0x%lx) [%p]\n", xa_to_value(entry), 6632 xa_to_value(entry), entry); 6633 else if (xa_is_zero(entry)) 6634 pr_cont("zero (%ld)\n", xa_to_internal(entry)); 6635 else if (mt_is_reserved(entry)) 6636 pr_cont("UNKNOWN ENTRY (%p)\n", entry); 6637 else 6638 pr_cont("%p\n", entry); 6639 } 6640 6641 static void mt_dump_range64(const struct maple_tree *mt, void *entry, 6642 unsigned long min, unsigned long max, unsigned int depth, 6643 enum mt_dump_format format) 6644 { 6645 struct maple_range_64 *node = &mte_to_node(entry)->mr64; 6646 bool leaf = mte_is_leaf(entry); 6647 unsigned long first = min; 6648 int i; 6649 6650 pr_cont(" contents: "); 6651 for (i = 0; i < MAPLE_RANGE64_SLOTS - 1; i++) { 6652 switch(format) { 6653 case mt_dump_hex: 6654 pr_cont("%p %lX ", node->slot[i], node->pivot[i]); 6655 break; 6656 default: 6657 case mt_dump_dec: 6658 pr_cont("%p %lu ", node->slot[i], node->pivot[i]); 6659 } 6660 } 6661 pr_cont("%p\n", node->slot[i]); 6662 for (i = 0; i < MAPLE_RANGE64_SLOTS; i++) { 6663 unsigned long last = max; 6664 6665 if (i < (MAPLE_RANGE64_SLOTS - 1)) 6666 last = node->pivot[i]; 6667 else if (!node->slot[i] && max != mt_node_max(entry)) 6668 break; 6669 if (last == 0 && i > 0) 6670 break; 6671 if (leaf) 6672 mt_dump_entry(mt_slot(mt, node->slot, i), 6673 first, last, depth + 1, format); 6674 else if (node->slot[i]) 6675 mt_dump_node(mt, mt_slot(mt, node->slot, i), 6676 first, last, depth + 1, format); 6677 6678 if (last == max) 6679 break; 6680 if (last > max) { 6681 switch(format) { 6682 case mt_dump_hex: 6683 pr_err("node %p last (%lx) > max (%lx) at pivot %d!\n", 6684 node, last, max, i); 6685 break; 6686 default: 6687 case mt_dump_dec: 6688 pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n", 6689 node, last, max, i); 6690 } 6691 } 6692 first = last + 1; 6693 } 6694 } 6695 6696 static void mt_dump_arange64(const struct maple_tree *mt, void *entry, 6697 unsigned long min, unsigned long max, unsigned int depth, 6698 enum mt_dump_format format) 6699 { 6700 struct maple_arange_64 *node = &mte_to_node(entry)->ma64; 6701 bool leaf = mte_is_leaf(entry); 6702 unsigned long first = min; 6703 int i; 6704 6705 pr_cont(" contents: "); 6706 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) { 6707 switch (format) { 6708 case mt_dump_hex: 6709 pr_cont("%lx ", node->gap[i]); 6710 break; 6711 default: 6712 case mt_dump_dec: 6713 pr_cont("%lu ", node->gap[i]); 6714 } 6715 } 6716 pr_cont("| %02X %02X| ", node->meta.end, node->meta.gap); 6717 for (i = 0; i < MAPLE_ARANGE64_SLOTS - 1; i++) { 6718 switch (format) { 6719 case mt_dump_hex: 6720 pr_cont("%p %lX ", node->slot[i], node->pivot[i]); 6721 break; 6722 default: 6723 case mt_dump_dec: 6724 pr_cont("%p %lu ", node->slot[i], node->pivot[i]); 6725 } 6726 } 6727 pr_cont("%p\n", node->slot[i]); 6728 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) { 6729 unsigned long last = max; 6730 6731 if (i < (MAPLE_ARANGE64_SLOTS - 1)) 6732 last = node->pivot[i]; 6733 else if (!node->slot[i]) 6734 break; 6735 if (last == 0 && i > 0) 6736 break; 6737 if (leaf) 6738 mt_dump_entry(mt_slot(mt, node->slot, i), 6739 first, last, depth + 1, format); 6740 else if (node->slot[i]) 6741 mt_dump_node(mt, mt_slot(mt, node->slot, i), 6742 first, last, depth + 1, format); 6743 6744 if (last == max) 6745 break; 6746 if (last > max) { 6747 pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n", 6748 node, last, max, i); 6749 break; 6750 } 6751 first = last + 1; 6752 } 6753 } 6754 6755 static void mt_dump_node(const struct maple_tree *mt, void *entry, 6756 unsigned long min, unsigned long max, unsigned int depth, 6757 enum mt_dump_format format) 6758 { 6759 struct maple_node *node = mte_to_node(entry); 6760 unsigned int type = mte_node_type(entry); 6761 unsigned int i; 6762 6763 mt_dump_range(min, max, depth, format); 6764 6765 pr_cont("node %p depth %d type %d parent %p", node, depth, type, 6766 node ? node->parent : NULL); 6767 switch (type) { 6768 case maple_dense: 6769 pr_cont("\n"); 6770 for (i = 0; i < MAPLE_NODE_SLOTS; i++) { 6771 if (min + i > max) 6772 pr_cont("OUT OF RANGE: "); 6773 mt_dump_entry(mt_slot(mt, node->slot, i), 6774 min + i, min + i, depth, format); 6775 } 6776 break; 6777 case maple_leaf_64: 6778 case maple_range_64: 6779 mt_dump_range64(mt, entry, min, max, depth, format); 6780 break; 6781 case maple_arange_64: 6782 mt_dump_arange64(mt, entry, min, max, depth, format); 6783 break; 6784 6785 default: 6786 pr_cont(" UNKNOWN TYPE\n"); 6787 } 6788 } 6789 6790 void mt_dump(const struct maple_tree *mt, enum mt_dump_format format) 6791 { 6792 void *entry = rcu_dereference_check(mt->ma_root, mt_locked(mt)); 6793 6794 pr_info("maple_tree(%p) flags %X, height %u root %p\n", 6795 mt, mt->ma_flags, mt_height(mt), entry); 6796 if (!xa_is_node(entry)) 6797 mt_dump_entry(entry, 0, 0, 0, format); 6798 else if (entry) 6799 mt_dump_node(mt, entry, 0, mt_node_max(entry), 0, format); 6800 } 6801 EXPORT_SYMBOL_GPL(mt_dump); 6802 6803 /* 6804 * Calculate the maximum gap in a node and check if that's what is reported in 6805 * the parent (unless root). 6806 */ 6807 static void mas_validate_gaps(struct ma_state *mas) 6808 { 6809 struct maple_enode *mte = mas->node; 6810 struct maple_node *p_mn, *node = mte_to_node(mte); 6811 enum maple_type mt = mte_node_type(mas->node); 6812 unsigned long gap = 0, max_gap = 0; 6813 unsigned long p_end, p_start = mas->min; 6814 unsigned char p_slot, offset; 6815 unsigned long *gaps = NULL; 6816 unsigned long *pivots = ma_pivots(node, mt); 6817 unsigned int i; 6818 6819 if (ma_is_dense(mt)) { 6820 for (i = 0; i < mt_slot_count(mte); i++) { 6821 if (mas_get_slot(mas, i)) { 6822 if (gap > max_gap) 6823 max_gap = gap; 6824 gap = 0; 6825 continue; 6826 } 6827 gap++; 6828 } 6829 goto counted; 6830 } 6831 6832 gaps = ma_gaps(node, mt); 6833 for (i = 0; i < mt_slot_count(mte); i++) { 6834 p_end = mas_safe_pivot(mas, pivots, i, mt); 6835 6836 if (!gaps) { 6837 if (!mas_get_slot(mas, i)) 6838 gap = p_end - p_start + 1; 6839 } else { 6840 void *entry = mas_get_slot(mas, i); 6841 6842 gap = gaps[i]; 6843 MT_BUG_ON(mas->tree, !entry); 6844 6845 if (gap > p_end - p_start + 1) { 6846 pr_err("%p[%u] %lu >= %lu - %lu + 1 (%lu)\n", 6847 mas_mn(mas), i, gap, p_end, p_start, 6848 p_end - p_start + 1); 6849 MT_BUG_ON(mas->tree, gap > p_end - p_start + 1); 6850 } 6851 } 6852 6853 if (gap > max_gap) 6854 max_gap = gap; 6855 6856 p_start = p_end + 1; 6857 if (p_end >= mas->max) 6858 break; 6859 } 6860 6861 counted: 6862 if (mt == maple_arange_64) { 6863 offset = ma_meta_gap(node, mt); 6864 if (offset > i) { 6865 pr_err("gap offset %p[%u] is invalid\n", node, offset); 6866 MT_BUG_ON(mas->tree, 1); 6867 } 6868 6869 if (gaps[offset] != max_gap) { 6870 pr_err("gap %p[%u] is not the largest gap %lu\n", 6871 node, offset, max_gap); 6872 MT_BUG_ON(mas->tree, 1); 6873 } 6874 6875 MT_BUG_ON(mas->tree, !gaps); 6876 for (i++ ; i < mt_slot_count(mte); i++) { 6877 if (gaps[i] != 0) { 6878 pr_err("gap %p[%u] beyond node limit != 0\n", 6879 node, i); 6880 MT_BUG_ON(mas->tree, 1); 6881 } 6882 } 6883 } 6884 6885 if (mte_is_root(mte)) 6886 return; 6887 6888 p_slot = mte_parent_slot(mas->node); 6889 p_mn = mte_parent(mte); 6890 MT_BUG_ON(mas->tree, max_gap > mas->max); 6891 if (ma_gaps(p_mn, mas_parent_type(mas, mte))[p_slot] != max_gap) { 6892 pr_err("gap %p[%u] != %lu\n", p_mn, p_slot, max_gap); 6893 mt_dump(mas->tree, mt_dump_hex); 6894 MT_BUG_ON(mas->tree, 1); 6895 } 6896 } 6897 6898 static void mas_validate_parent_slot(struct ma_state *mas) 6899 { 6900 struct maple_node *parent; 6901 struct maple_enode *node; 6902 enum maple_type p_type; 6903 unsigned char p_slot; 6904 void __rcu **slots; 6905 int i; 6906 6907 if (mte_is_root(mas->node)) 6908 return; 6909 6910 p_slot = mte_parent_slot(mas->node); 6911 p_type = mas_parent_type(mas, mas->node); 6912 parent = mte_parent(mas->node); 6913 slots = ma_slots(parent, p_type); 6914 MT_BUG_ON(mas->tree, mas_mn(mas) == parent); 6915 6916 /* Check prev/next parent slot for duplicate node entry */ 6917 6918 for (i = 0; i < mt_slots[p_type]; i++) { 6919 node = mas_slot(mas, slots, i); 6920 if (i == p_slot) { 6921 if (node != mas->node) 6922 pr_err("parent %p[%u] does not have %p\n", 6923 parent, i, mas_mn(mas)); 6924 MT_BUG_ON(mas->tree, node != mas->node); 6925 } else if (node == mas->node) { 6926 pr_err("Invalid child %p at parent %p[%u] p_slot %u\n", 6927 mas_mn(mas), parent, i, p_slot); 6928 MT_BUG_ON(mas->tree, node == mas->node); 6929 } 6930 } 6931 } 6932 6933 static void mas_validate_child_slot(struct ma_state *mas) 6934 { 6935 enum maple_type type = mte_node_type(mas->node); 6936 void __rcu **slots = ma_slots(mte_to_node(mas->node), type); 6937 unsigned long *pivots = ma_pivots(mte_to_node(mas->node), type); 6938 struct maple_enode *child; 6939 unsigned char i; 6940 6941 if (mte_is_leaf(mas->node)) 6942 return; 6943 6944 for (i = 0; i < mt_slots[type]; i++) { 6945 child = mas_slot(mas, slots, i); 6946 6947 if (!child) { 6948 pr_err("Non-leaf node lacks child at %p[%u]\n", 6949 mas_mn(mas), i); 6950 MT_BUG_ON(mas->tree, 1); 6951 } 6952 6953 if (mte_parent_slot(child) != i) { 6954 pr_err("Slot error at %p[%u]: child %p has pslot %u\n", 6955 mas_mn(mas), i, mte_to_node(child), 6956 mte_parent_slot(child)); 6957 MT_BUG_ON(mas->tree, 1); 6958 } 6959 6960 if (mte_parent(child) != mte_to_node(mas->node)) { 6961 pr_err("child %p has parent %p not %p\n", 6962 mte_to_node(child), mte_parent(child), 6963 mte_to_node(mas->node)); 6964 MT_BUG_ON(mas->tree, 1); 6965 } 6966 6967 if (i < mt_pivots[type] && pivots[i] == mas->max) 6968 break; 6969 } 6970 } 6971 6972 /* 6973 * Validate all pivots are within mas->min and mas->max, check metadata ends 6974 * where the maximum ends and ensure there is no slots or pivots set outside of 6975 * the end of the data. 6976 */ 6977 static void mas_validate_limits(struct ma_state *mas) 6978 { 6979 int i; 6980 unsigned long prev_piv = 0; 6981 enum maple_type type = mte_node_type(mas->node); 6982 void __rcu **slots = ma_slots(mte_to_node(mas->node), type); 6983 unsigned long *pivots = ma_pivots(mas_mn(mas), type); 6984 6985 for (i = 0; i < mt_slots[type]; i++) { 6986 unsigned long piv; 6987 6988 piv = mas_safe_pivot(mas, pivots, i, type); 6989 6990 if (!piv && (i != 0)) { 6991 pr_err("Missing node limit pivot at %p[%u]", 6992 mas_mn(mas), i); 6993 MAS_WARN_ON(mas, 1); 6994 } 6995 6996 if (prev_piv > piv) { 6997 pr_err("%p[%u] piv %lu < prev_piv %lu\n", 6998 mas_mn(mas), i, piv, prev_piv); 6999 MAS_WARN_ON(mas, piv < prev_piv); 7000 } 7001 7002 if (piv < mas->min) { 7003 pr_err("%p[%u] %lu < %lu\n", mas_mn(mas), i, 7004 piv, mas->min); 7005 MAS_WARN_ON(mas, piv < mas->min); 7006 } 7007 if (piv > mas->max) { 7008 pr_err("%p[%u] %lu > %lu\n", mas_mn(mas), i, 7009 piv, mas->max); 7010 MAS_WARN_ON(mas, piv > mas->max); 7011 } 7012 prev_piv = piv; 7013 if (piv == mas->max) 7014 break; 7015 } 7016 7017 if (mas_data_end(mas) != i) { 7018 pr_err("node%p: data_end %u != the last slot offset %u\n", 7019 mas_mn(mas), mas_data_end(mas), i); 7020 MT_BUG_ON(mas->tree, 1); 7021 } 7022 7023 for (i += 1; i < mt_slots[type]; i++) { 7024 void *entry = mas_slot(mas, slots, i); 7025 7026 if (entry && (i != mt_slots[type] - 1)) { 7027 pr_err("%p[%u] should not have entry %p\n", mas_mn(mas), 7028 i, entry); 7029 MT_BUG_ON(mas->tree, entry != NULL); 7030 } 7031 7032 if (i < mt_pivots[type]) { 7033 unsigned long piv = pivots[i]; 7034 7035 if (!piv) 7036 continue; 7037 7038 pr_err("%p[%u] should not have piv %lu\n", 7039 mas_mn(mas), i, piv); 7040 MAS_WARN_ON(mas, i < mt_pivots[type] - 1); 7041 } 7042 } 7043 } 7044 7045 static void mt_validate_nulls(struct maple_tree *mt) 7046 { 7047 void *entry, *last = (void *)1; 7048 unsigned char offset = 0; 7049 void __rcu **slots; 7050 MA_STATE(mas, mt, 0, 0); 7051 7052 mas_start(&mas); 7053 if (mas_is_none(&mas) || (mas.node == MAS_ROOT)) 7054 return; 7055 7056 while (!mte_is_leaf(mas.node)) 7057 mas_descend(&mas); 7058 7059 slots = ma_slots(mte_to_node(mas.node), mte_node_type(mas.node)); 7060 do { 7061 entry = mas_slot(&mas, slots, offset); 7062 if (!last && !entry) { 7063 pr_err("Sequential nulls end at %p[%u]\n", 7064 mas_mn(&mas), offset); 7065 } 7066 MT_BUG_ON(mt, !last && !entry); 7067 last = entry; 7068 if (offset == mas_data_end(&mas)) { 7069 mas_next_node(&mas, mas_mn(&mas), ULONG_MAX); 7070 if (mas_is_none(&mas)) 7071 return; 7072 offset = 0; 7073 slots = ma_slots(mte_to_node(mas.node), 7074 mte_node_type(mas.node)); 7075 } else { 7076 offset++; 7077 } 7078 7079 } while (!mas_is_none(&mas)); 7080 } 7081 7082 /* 7083 * validate a maple tree by checking: 7084 * 1. The limits (pivots are within mas->min to mas->max) 7085 * 2. The gap is correctly set in the parents 7086 */ 7087 void mt_validate(struct maple_tree *mt) 7088 { 7089 unsigned char end; 7090 7091 MA_STATE(mas, mt, 0, 0); 7092 rcu_read_lock(); 7093 mas_start(&mas); 7094 if (!mas_searchable(&mas)) 7095 goto done; 7096 7097 while (!mte_is_leaf(mas.node)) 7098 mas_descend(&mas); 7099 7100 while (!mas_is_none(&mas)) { 7101 MAS_WARN_ON(&mas, mte_dead_node(mas.node)); 7102 end = mas_data_end(&mas); 7103 if (MAS_WARN_ON(&mas, (end < mt_min_slot_count(mas.node)) && 7104 (mas.max != ULONG_MAX))) { 7105 pr_err("Invalid size %u of %p\n", end, mas_mn(&mas)); 7106 } 7107 7108 mas_validate_parent_slot(&mas); 7109 mas_validate_limits(&mas); 7110 mas_validate_child_slot(&mas); 7111 if (mt_is_alloc(mt)) 7112 mas_validate_gaps(&mas); 7113 mas_dfs_postorder(&mas, ULONG_MAX); 7114 } 7115 mt_validate_nulls(mt); 7116 done: 7117 rcu_read_unlock(); 7118 7119 } 7120 EXPORT_SYMBOL_GPL(mt_validate); 7121 7122 void mas_dump(const struct ma_state *mas) 7123 { 7124 pr_err("MAS: tree=%p enode=%p ", mas->tree, mas->node); 7125 if (mas_is_none(mas)) 7126 pr_err("(MAS_NONE) "); 7127 else if (mas_is_ptr(mas)) 7128 pr_err("(MAS_ROOT) "); 7129 else if (mas_is_start(mas)) 7130 pr_err("(MAS_START) "); 7131 else if (mas_is_paused(mas)) 7132 pr_err("(MAS_PAUSED) "); 7133 7134 pr_err("[%u] index=%lx last=%lx\n", mas->offset, mas->index, mas->last); 7135 pr_err(" min=%lx max=%lx alloc=%p, depth=%u, flags=%x\n", 7136 mas->min, mas->max, mas->alloc, mas->depth, mas->mas_flags); 7137 if (mas->index > mas->last) 7138 pr_err("Check index & last\n"); 7139 } 7140 EXPORT_SYMBOL_GPL(mas_dump); 7141 7142 void mas_wr_dump(const struct ma_wr_state *wr_mas) 7143 { 7144 pr_err("WR_MAS: node=%p r_min=%lx r_max=%lx\n", 7145 wr_mas->node, wr_mas->r_min, wr_mas->r_max); 7146 pr_err(" type=%u off_end=%u, node_end=%u, end_piv=%lx\n", 7147 wr_mas->type, wr_mas->offset_end, wr_mas->node_end, 7148 wr_mas->end_piv); 7149 } 7150 EXPORT_SYMBOL_GPL(mas_wr_dump); 7151 7152 #endif /* CONFIG_DEBUG_MAPLE_TREE */ 7153