1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Maple Tree implementation 4 * Copyright (c) 2018-2022 Oracle Corporation 5 * Authors: Liam R. Howlett <Liam.Howlett@oracle.com> 6 * Matthew Wilcox <willy@infradead.org> 7 */ 8 9 /* 10 * DOC: Interesting implementation details of the Maple Tree 11 * 12 * Each node type has a number of slots for entries and a number of slots for 13 * pivots. In the case of dense nodes, the pivots are implied by the position 14 * and are simply the slot index + the minimum of the node. 15 * 16 * In regular B-Tree terms, pivots are called keys. The term pivot is used to 17 * indicate that the tree is specifying ranges, Pivots may appear in the 18 * subtree with an entry attached to the value where as keys are unique to a 19 * specific position of a B-tree. Pivot values are inclusive of the slot with 20 * the same index. 21 * 22 * 23 * The following illustrates the layout of a range64 nodes slots and pivots. 24 * 25 * 26 * Slots -> | 0 | 1 | 2 | ... | 12 | 13 | 14 | 15 | 27 * ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ 28 * │ │ │ │ │ │ │ │ └─ Implied maximum 29 * │ │ │ │ │ │ │ └─ Pivot 14 30 * │ │ │ │ │ │ └─ Pivot 13 31 * │ │ │ │ │ └─ Pivot 12 32 * │ │ │ │ └─ Pivot 11 33 * │ │ │ └─ Pivot 2 34 * │ │ └─ Pivot 1 35 * │ └─ Pivot 0 36 * └─ Implied minimum 37 * 38 * Slot contents: 39 * Internal (non-leaf) nodes contain pointers to other nodes. 40 * Leaf nodes contain entries. 41 * 42 * The location of interest is often referred to as an offset. All offsets have 43 * a slot, but the last offset has an implied pivot from the node above (or 44 * UINT_MAX for the root node. 45 * 46 * Ranges complicate certain write activities. When modifying any of 47 * the B-tree variants, it is known that one entry will either be added or 48 * deleted. When modifying the Maple Tree, one store operation may overwrite 49 * the entire data set, or one half of the tree, or the middle half of the tree. 50 * 51 */ 52 53 54 #include <linux/maple_tree.h> 55 #include <linux/xarray.h> 56 #include <linux/types.h> 57 #include <linux/export.h> 58 #include <linux/slab.h> 59 #include <linux/limits.h> 60 #include <asm/barrier.h> 61 62 #define CREATE_TRACE_POINTS 63 #include <trace/events/maple_tree.h> 64 65 #define MA_ROOT_PARENT 1 66 67 /* 68 * Maple state flags 69 * * MA_STATE_BULK - Bulk insert mode 70 * * MA_STATE_REBALANCE - Indicate a rebalance during bulk insert 71 * * MA_STATE_PREALLOC - Preallocated nodes, WARN_ON allocation 72 */ 73 #define MA_STATE_BULK 1 74 #define MA_STATE_REBALANCE 2 75 #define MA_STATE_PREALLOC 4 76 77 #define ma_parent_ptr(x) ((struct maple_pnode *)(x)) 78 #define mas_tree_parent(x) ((unsigned long)(x->tree) | MA_ROOT_PARENT) 79 #define ma_mnode_ptr(x) ((struct maple_node *)(x)) 80 #define ma_enode_ptr(x) ((struct maple_enode *)(x)) 81 static struct kmem_cache *maple_node_cache; 82 83 #ifdef CONFIG_DEBUG_MAPLE_TREE 84 static const unsigned long mt_max[] = { 85 [maple_dense] = MAPLE_NODE_SLOTS, 86 [maple_leaf_64] = ULONG_MAX, 87 [maple_range_64] = ULONG_MAX, 88 [maple_arange_64] = ULONG_MAX, 89 }; 90 #define mt_node_max(x) mt_max[mte_node_type(x)] 91 #endif 92 93 static const unsigned char mt_slots[] = { 94 [maple_dense] = MAPLE_NODE_SLOTS, 95 [maple_leaf_64] = MAPLE_RANGE64_SLOTS, 96 [maple_range_64] = MAPLE_RANGE64_SLOTS, 97 [maple_arange_64] = MAPLE_ARANGE64_SLOTS, 98 }; 99 #define mt_slot_count(x) mt_slots[mte_node_type(x)] 100 101 static const unsigned char mt_pivots[] = { 102 [maple_dense] = 0, 103 [maple_leaf_64] = MAPLE_RANGE64_SLOTS - 1, 104 [maple_range_64] = MAPLE_RANGE64_SLOTS - 1, 105 [maple_arange_64] = MAPLE_ARANGE64_SLOTS - 1, 106 }; 107 #define mt_pivot_count(x) mt_pivots[mte_node_type(x)] 108 109 static const unsigned char mt_min_slots[] = { 110 [maple_dense] = MAPLE_NODE_SLOTS / 2, 111 [maple_leaf_64] = (MAPLE_RANGE64_SLOTS / 2) - 2, 112 [maple_range_64] = (MAPLE_RANGE64_SLOTS / 2) - 2, 113 [maple_arange_64] = (MAPLE_ARANGE64_SLOTS / 2) - 1, 114 }; 115 #define mt_min_slot_count(x) mt_min_slots[mte_node_type(x)] 116 117 #define MAPLE_BIG_NODE_SLOTS (MAPLE_RANGE64_SLOTS * 2 + 2) 118 #define MAPLE_BIG_NODE_GAPS (MAPLE_ARANGE64_SLOTS * 2 + 1) 119 120 struct maple_big_node { 121 struct maple_pnode *parent; 122 unsigned long pivot[MAPLE_BIG_NODE_SLOTS - 1]; 123 union { 124 struct maple_enode *slot[MAPLE_BIG_NODE_SLOTS]; 125 struct { 126 unsigned long padding[MAPLE_BIG_NODE_GAPS]; 127 unsigned long gap[MAPLE_BIG_NODE_GAPS]; 128 }; 129 }; 130 unsigned char b_end; 131 enum maple_type type; 132 }; 133 134 /* 135 * The maple_subtree_state is used to build a tree to replace a segment of an 136 * existing tree in a more atomic way. Any walkers of the older tree will hit a 137 * dead node and restart on updates. 138 */ 139 struct maple_subtree_state { 140 struct ma_state *orig_l; /* Original left side of subtree */ 141 struct ma_state *orig_r; /* Original right side of subtree */ 142 struct ma_state *l; /* New left side of subtree */ 143 struct ma_state *m; /* New middle of subtree (rare) */ 144 struct ma_state *r; /* New right side of subtree */ 145 struct ma_topiary *free; /* nodes to be freed */ 146 struct ma_topiary *destroy; /* Nodes to be destroyed (walked and freed) */ 147 struct maple_big_node *bn; 148 }; 149 150 #ifdef CONFIG_KASAN_STACK 151 /* Prevent mas_wr_bnode() from exceeding the stack frame limit */ 152 #define noinline_for_kasan noinline_for_stack 153 #else 154 #define noinline_for_kasan inline 155 #endif 156 157 /* Functions */ 158 static inline struct maple_node *mt_alloc_one(gfp_t gfp) 159 { 160 return kmem_cache_alloc(maple_node_cache, gfp); 161 } 162 163 static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes) 164 { 165 return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes); 166 } 167 168 static inline void mt_free_bulk(size_t size, void __rcu **nodes) 169 { 170 kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes); 171 } 172 173 static void mt_free_rcu(struct rcu_head *head) 174 { 175 struct maple_node *node = container_of(head, struct maple_node, rcu); 176 177 kmem_cache_free(maple_node_cache, node); 178 } 179 180 /* 181 * ma_free_rcu() - Use rcu callback to free a maple node 182 * @node: The node to free 183 * 184 * The maple tree uses the parent pointer to indicate this node is no longer in 185 * use and will be freed. 186 */ 187 static void ma_free_rcu(struct maple_node *node) 188 { 189 WARN_ON(node->parent != ma_parent_ptr(node)); 190 call_rcu(&node->rcu, mt_free_rcu); 191 } 192 193 static void mas_set_height(struct ma_state *mas) 194 { 195 unsigned int new_flags = mas->tree->ma_flags; 196 197 new_flags &= ~MT_FLAGS_HEIGHT_MASK; 198 MAS_BUG_ON(mas, mas->depth > MAPLE_HEIGHT_MAX); 199 new_flags |= mas->depth << MT_FLAGS_HEIGHT_OFFSET; 200 mas->tree->ma_flags = new_flags; 201 } 202 203 static unsigned int mas_mt_height(struct ma_state *mas) 204 { 205 return mt_height(mas->tree); 206 } 207 208 static inline enum maple_type mte_node_type(const struct maple_enode *entry) 209 { 210 return ((unsigned long)entry >> MAPLE_NODE_TYPE_SHIFT) & 211 MAPLE_NODE_TYPE_MASK; 212 } 213 214 static inline bool ma_is_dense(const enum maple_type type) 215 { 216 return type < maple_leaf_64; 217 } 218 219 static inline bool ma_is_leaf(const enum maple_type type) 220 { 221 return type < maple_range_64; 222 } 223 224 static inline bool mte_is_leaf(const struct maple_enode *entry) 225 { 226 return ma_is_leaf(mte_node_type(entry)); 227 } 228 229 /* 230 * We also reserve values with the bottom two bits set to '10' which are 231 * below 4096 232 */ 233 static inline bool mt_is_reserved(const void *entry) 234 { 235 return ((unsigned long)entry < MAPLE_RESERVED_RANGE) && 236 xa_is_internal(entry); 237 } 238 239 static inline void mas_set_err(struct ma_state *mas, long err) 240 { 241 mas->node = MA_ERROR(err); 242 } 243 244 static inline bool mas_is_ptr(const struct ma_state *mas) 245 { 246 return mas->node == MAS_ROOT; 247 } 248 249 static inline bool mas_is_start(const struct ma_state *mas) 250 { 251 return mas->node == MAS_START; 252 } 253 254 bool mas_is_err(struct ma_state *mas) 255 { 256 return xa_is_err(mas->node); 257 } 258 259 static inline bool mas_searchable(struct ma_state *mas) 260 { 261 if (mas_is_none(mas)) 262 return false; 263 264 if (mas_is_ptr(mas)) 265 return false; 266 267 return true; 268 } 269 270 static inline struct maple_node *mte_to_node(const struct maple_enode *entry) 271 { 272 return (struct maple_node *)((unsigned long)entry & ~MAPLE_NODE_MASK); 273 } 274 275 /* 276 * mte_to_mat() - Convert a maple encoded node to a maple topiary node. 277 * @entry: The maple encoded node 278 * 279 * Return: a maple topiary pointer 280 */ 281 static inline struct maple_topiary *mte_to_mat(const struct maple_enode *entry) 282 { 283 return (struct maple_topiary *) 284 ((unsigned long)entry & ~MAPLE_NODE_MASK); 285 } 286 287 /* 288 * mas_mn() - Get the maple state node. 289 * @mas: The maple state 290 * 291 * Return: the maple node (not encoded - bare pointer). 292 */ 293 static inline struct maple_node *mas_mn(const struct ma_state *mas) 294 { 295 return mte_to_node(mas->node); 296 } 297 298 /* 299 * mte_set_node_dead() - Set a maple encoded node as dead. 300 * @mn: The maple encoded node. 301 */ 302 static inline void mte_set_node_dead(struct maple_enode *mn) 303 { 304 mte_to_node(mn)->parent = ma_parent_ptr(mte_to_node(mn)); 305 smp_wmb(); /* Needed for RCU */ 306 } 307 308 /* Bit 1 indicates the root is a node */ 309 #define MAPLE_ROOT_NODE 0x02 310 /* maple_type stored bit 3-6 */ 311 #define MAPLE_ENODE_TYPE_SHIFT 0x03 312 /* Bit 2 means a NULL somewhere below */ 313 #define MAPLE_ENODE_NULL 0x04 314 315 static inline struct maple_enode *mt_mk_node(const struct maple_node *node, 316 enum maple_type type) 317 { 318 return (void *)((unsigned long)node | 319 (type << MAPLE_ENODE_TYPE_SHIFT) | MAPLE_ENODE_NULL); 320 } 321 322 static inline void *mte_mk_root(const struct maple_enode *node) 323 { 324 return (void *)((unsigned long)node | MAPLE_ROOT_NODE); 325 } 326 327 static inline void *mte_safe_root(const struct maple_enode *node) 328 { 329 return (void *)((unsigned long)node & ~MAPLE_ROOT_NODE); 330 } 331 332 static inline void *mte_set_full(const struct maple_enode *node) 333 { 334 return (void *)((unsigned long)node & ~MAPLE_ENODE_NULL); 335 } 336 337 static inline void *mte_clear_full(const struct maple_enode *node) 338 { 339 return (void *)((unsigned long)node | MAPLE_ENODE_NULL); 340 } 341 342 static inline bool mte_has_null(const struct maple_enode *node) 343 { 344 return (unsigned long)node & MAPLE_ENODE_NULL; 345 } 346 347 static inline bool ma_is_root(struct maple_node *node) 348 { 349 return ((unsigned long)node->parent & MA_ROOT_PARENT); 350 } 351 352 static inline bool mte_is_root(const struct maple_enode *node) 353 { 354 return ma_is_root(mte_to_node(node)); 355 } 356 357 static inline bool mas_is_root_limits(const struct ma_state *mas) 358 { 359 return !mas->min && mas->max == ULONG_MAX; 360 } 361 362 static inline bool mt_is_alloc(struct maple_tree *mt) 363 { 364 return (mt->ma_flags & MT_FLAGS_ALLOC_RANGE); 365 } 366 367 /* 368 * The Parent Pointer 369 * Excluding root, the parent pointer is 256B aligned like all other tree nodes. 370 * When storing a 32 or 64 bit values, the offset can fit into 5 bits. The 16 371 * bit values need an extra bit to store the offset. This extra bit comes from 372 * a reuse of the last bit in the node type. This is possible by using bit 1 to 373 * indicate if bit 2 is part of the type or the slot. 374 * 375 * Note types: 376 * 0x??1 = Root 377 * 0x?00 = 16 bit nodes 378 * 0x010 = 32 bit nodes 379 * 0x110 = 64 bit nodes 380 * 381 * Slot size and alignment 382 * 0b??1 : Root 383 * 0b?00 : 16 bit values, type in 0-1, slot in 2-7 384 * 0b010 : 32 bit values, type in 0-2, slot in 3-7 385 * 0b110 : 64 bit values, type in 0-2, slot in 3-7 386 */ 387 388 #define MAPLE_PARENT_ROOT 0x01 389 390 #define MAPLE_PARENT_SLOT_SHIFT 0x03 391 #define MAPLE_PARENT_SLOT_MASK 0xF8 392 393 #define MAPLE_PARENT_16B_SLOT_SHIFT 0x02 394 #define MAPLE_PARENT_16B_SLOT_MASK 0xFC 395 396 #define MAPLE_PARENT_RANGE64 0x06 397 #define MAPLE_PARENT_RANGE32 0x04 398 #define MAPLE_PARENT_NOT_RANGE16 0x02 399 400 /* 401 * mte_parent_shift() - Get the parent shift for the slot storage. 402 * @parent: The parent pointer cast as an unsigned long 403 * Return: The shift into that pointer to the star to of the slot 404 */ 405 static inline unsigned long mte_parent_shift(unsigned long parent) 406 { 407 /* Note bit 1 == 0 means 16B */ 408 if (likely(parent & MAPLE_PARENT_NOT_RANGE16)) 409 return MAPLE_PARENT_SLOT_SHIFT; 410 411 return MAPLE_PARENT_16B_SLOT_SHIFT; 412 } 413 414 /* 415 * mte_parent_slot_mask() - Get the slot mask for the parent. 416 * @parent: The parent pointer cast as an unsigned long. 417 * Return: The slot mask for that parent. 418 */ 419 static inline unsigned long mte_parent_slot_mask(unsigned long parent) 420 { 421 /* Note bit 1 == 0 means 16B */ 422 if (likely(parent & MAPLE_PARENT_NOT_RANGE16)) 423 return MAPLE_PARENT_SLOT_MASK; 424 425 return MAPLE_PARENT_16B_SLOT_MASK; 426 } 427 428 /* 429 * mas_parent_type() - Return the maple_type of the parent from the stored 430 * parent type. 431 * @mas: The maple state 432 * @enode: The maple_enode to extract the parent's enum 433 * Return: The node->parent maple_type 434 */ 435 static inline 436 enum maple_type mas_parent_type(struct ma_state *mas, struct maple_enode *enode) 437 { 438 unsigned long p_type; 439 440 p_type = (unsigned long)mte_to_node(enode)->parent; 441 if (WARN_ON(p_type & MAPLE_PARENT_ROOT)) 442 return 0; 443 444 p_type &= MAPLE_NODE_MASK; 445 p_type &= ~mte_parent_slot_mask(p_type); 446 switch (p_type) { 447 case MAPLE_PARENT_RANGE64: /* or MAPLE_PARENT_ARANGE64 */ 448 if (mt_is_alloc(mas->tree)) 449 return maple_arange_64; 450 return maple_range_64; 451 } 452 453 return 0; 454 } 455 456 /* 457 * mas_set_parent() - Set the parent node and encode the slot 458 * @enode: The encoded maple node. 459 * @parent: The encoded maple node that is the parent of @enode. 460 * @slot: The slot that @enode resides in @parent. 461 * 462 * Slot number is encoded in the enode->parent bit 3-6 or 2-6, depending on the 463 * parent type. 464 */ 465 static inline 466 void mas_set_parent(struct ma_state *mas, struct maple_enode *enode, 467 const struct maple_enode *parent, unsigned char slot) 468 { 469 unsigned long val = (unsigned long)parent; 470 unsigned long shift; 471 unsigned long type; 472 enum maple_type p_type = mte_node_type(parent); 473 474 MAS_BUG_ON(mas, p_type == maple_dense); 475 MAS_BUG_ON(mas, p_type == maple_leaf_64); 476 477 switch (p_type) { 478 case maple_range_64: 479 case maple_arange_64: 480 shift = MAPLE_PARENT_SLOT_SHIFT; 481 type = MAPLE_PARENT_RANGE64; 482 break; 483 default: 484 case maple_dense: 485 case maple_leaf_64: 486 shift = type = 0; 487 break; 488 } 489 490 val &= ~MAPLE_NODE_MASK; /* Clear all node metadata in parent */ 491 val |= (slot << shift) | type; 492 mte_to_node(enode)->parent = ma_parent_ptr(val); 493 } 494 495 /* 496 * mte_parent_slot() - get the parent slot of @enode. 497 * @enode: The encoded maple node. 498 * 499 * Return: The slot in the parent node where @enode resides. 500 */ 501 static inline unsigned int mte_parent_slot(const struct maple_enode *enode) 502 { 503 unsigned long val = (unsigned long)mte_to_node(enode)->parent; 504 505 if (val & MA_ROOT_PARENT) 506 return 0; 507 508 /* 509 * Okay to use MAPLE_PARENT_16B_SLOT_MASK as the last bit will be lost 510 * by shift if the parent shift is MAPLE_PARENT_SLOT_SHIFT 511 */ 512 return (val & MAPLE_PARENT_16B_SLOT_MASK) >> mte_parent_shift(val); 513 } 514 515 /* 516 * mte_parent() - Get the parent of @node. 517 * @node: The encoded maple node. 518 * 519 * Return: The parent maple node. 520 */ 521 static inline struct maple_node *mte_parent(const struct maple_enode *enode) 522 { 523 return (void *)((unsigned long) 524 (mte_to_node(enode)->parent) & ~MAPLE_NODE_MASK); 525 } 526 527 /* 528 * ma_dead_node() - check if the @enode is dead. 529 * @enode: The encoded maple node 530 * 531 * Return: true if dead, false otherwise. 532 */ 533 static inline bool ma_dead_node(const struct maple_node *node) 534 { 535 struct maple_node *parent; 536 537 /* Do not reorder reads from the node prior to the parent check */ 538 smp_rmb(); 539 parent = (void *)((unsigned long) node->parent & ~MAPLE_NODE_MASK); 540 return (parent == node); 541 } 542 543 /* 544 * mte_dead_node() - check if the @enode is dead. 545 * @enode: The encoded maple node 546 * 547 * Return: true if dead, false otherwise. 548 */ 549 static inline bool mte_dead_node(const struct maple_enode *enode) 550 { 551 struct maple_node *parent, *node; 552 553 node = mte_to_node(enode); 554 /* Do not reorder reads from the node prior to the parent check */ 555 smp_rmb(); 556 parent = mte_parent(enode); 557 return (parent == node); 558 } 559 560 /* 561 * mas_allocated() - Get the number of nodes allocated in a maple state. 562 * @mas: The maple state 563 * 564 * The ma_state alloc member is overloaded to hold a pointer to the first 565 * allocated node or to the number of requested nodes to allocate. If bit 0 is 566 * set, then the alloc contains the number of requested nodes. If there is an 567 * allocated node, then the total allocated nodes is in that node. 568 * 569 * Return: The total number of nodes allocated 570 */ 571 static inline unsigned long mas_allocated(const struct ma_state *mas) 572 { 573 if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) 574 return 0; 575 576 return mas->alloc->total; 577 } 578 579 /* 580 * mas_set_alloc_req() - Set the requested number of allocations. 581 * @mas: the maple state 582 * @count: the number of allocations. 583 * 584 * The requested number of allocations is either in the first allocated node, 585 * located in @mas->alloc->request_count, or directly in @mas->alloc if there is 586 * no allocated node. Set the request either in the node or do the necessary 587 * encoding to store in @mas->alloc directly. 588 */ 589 static inline void mas_set_alloc_req(struct ma_state *mas, unsigned long count) 590 { 591 if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) { 592 if (!count) 593 mas->alloc = NULL; 594 else 595 mas->alloc = (struct maple_alloc *)(((count) << 1U) | 1U); 596 return; 597 } 598 599 mas->alloc->request_count = count; 600 } 601 602 /* 603 * mas_alloc_req() - get the requested number of allocations. 604 * @mas: The maple state 605 * 606 * The alloc count is either stored directly in @mas, or in 607 * @mas->alloc->request_count if there is at least one node allocated. Decode 608 * the request count if it's stored directly in @mas->alloc. 609 * 610 * Return: The allocation request count. 611 */ 612 static inline unsigned int mas_alloc_req(const struct ma_state *mas) 613 { 614 if ((unsigned long)mas->alloc & 0x1) 615 return (unsigned long)(mas->alloc) >> 1; 616 else if (mas->alloc) 617 return mas->alloc->request_count; 618 return 0; 619 } 620 621 /* 622 * ma_pivots() - Get a pointer to the maple node pivots. 623 * @node - the maple node 624 * @type - the node type 625 * 626 * In the event of a dead node, this array may be %NULL 627 * 628 * Return: A pointer to the maple node pivots 629 */ 630 static inline unsigned long *ma_pivots(struct maple_node *node, 631 enum maple_type type) 632 { 633 switch (type) { 634 case maple_arange_64: 635 return node->ma64.pivot; 636 case maple_range_64: 637 case maple_leaf_64: 638 return node->mr64.pivot; 639 case maple_dense: 640 return NULL; 641 } 642 return NULL; 643 } 644 645 /* 646 * ma_gaps() - Get a pointer to the maple node gaps. 647 * @node - the maple node 648 * @type - the node type 649 * 650 * Return: A pointer to the maple node gaps 651 */ 652 static inline unsigned long *ma_gaps(struct maple_node *node, 653 enum maple_type type) 654 { 655 switch (type) { 656 case maple_arange_64: 657 return node->ma64.gap; 658 case maple_range_64: 659 case maple_leaf_64: 660 case maple_dense: 661 return NULL; 662 } 663 return NULL; 664 } 665 666 /* 667 * mas_pivot() - Get the pivot at @piv of the maple encoded node. 668 * @mas: The maple state. 669 * @piv: The pivot. 670 * 671 * Return: the pivot at @piv of @mn. 672 */ 673 static inline unsigned long mas_pivot(struct ma_state *mas, unsigned char piv) 674 { 675 struct maple_node *node = mas_mn(mas); 676 enum maple_type type = mte_node_type(mas->node); 677 678 if (MAS_WARN_ON(mas, piv >= mt_pivots[type])) { 679 mas_set_err(mas, -EIO); 680 return 0; 681 } 682 683 switch (type) { 684 case maple_arange_64: 685 return node->ma64.pivot[piv]; 686 case maple_range_64: 687 case maple_leaf_64: 688 return node->mr64.pivot[piv]; 689 case maple_dense: 690 return 0; 691 } 692 return 0; 693 } 694 695 /* 696 * mas_safe_pivot() - get the pivot at @piv or mas->max. 697 * @mas: The maple state 698 * @pivots: The pointer to the maple node pivots 699 * @piv: The pivot to fetch 700 * @type: The maple node type 701 * 702 * Return: The pivot at @piv within the limit of the @pivots array, @mas->max 703 * otherwise. 704 */ 705 static inline unsigned long 706 mas_safe_pivot(const struct ma_state *mas, unsigned long *pivots, 707 unsigned char piv, enum maple_type type) 708 { 709 if (piv >= mt_pivots[type]) 710 return mas->max; 711 712 return pivots[piv]; 713 } 714 715 /* 716 * mas_safe_min() - Return the minimum for a given offset. 717 * @mas: The maple state 718 * @pivots: The pointer to the maple node pivots 719 * @offset: The offset into the pivot array 720 * 721 * Return: The minimum range value that is contained in @offset. 722 */ 723 static inline unsigned long 724 mas_safe_min(struct ma_state *mas, unsigned long *pivots, unsigned char offset) 725 { 726 if (likely(offset)) 727 return pivots[offset - 1] + 1; 728 729 return mas->min; 730 } 731 732 /* 733 * mte_set_pivot() - Set a pivot to a value in an encoded maple node. 734 * @mn: The encoded maple node 735 * @piv: The pivot offset 736 * @val: The value of the pivot 737 */ 738 static inline void mte_set_pivot(struct maple_enode *mn, unsigned char piv, 739 unsigned long val) 740 { 741 struct maple_node *node = mte_to_node(mn); 742 enum maple_type type = mte_node_type(mn); 743 744 BUG_ON(piv >= mt_pivots[type]); 745 switch (type) { 746 default: 747 case maple_range_64: 748 case maple_leaf_64: 749 node->mr64.pivot[piv] = val; 750 break; 751 case maple_arange_64: 752 node->ma64.pivot[piv] = val; 753 break; 754 case maple_dense: 755 break; 756 } 757 758 } 759 760 /* 761 * ma_slots() - Get a pointer to the maple node slots. 762 * @mn: The maple node 763 * @mt: The maple node type 764 * 765 * Return: A pointer to the maple node slots 766 */ 767 static inline void __rcu **ma_slots(struct maple_node *mn, enum maple_type mt) 768 { 769 switch (mt) { 770 default: 771 case maple_arange_64: 772 return mn->ma64.slot; 773 case maple_range_64: 774 case maple_leaf_64: 775 return mn->mr64.slot; 776 case maple_dense: 777 return mn->slot; 778 } 779 } 780 781 static inline bool mt_write_locked(const struct maple_tree *mt) 782 { 783 return mt_external_lock(mt) ? mt_write_lock_is_held(mt) : 784 lockdep_is_held(&mt->ma_lock); 785 } 786 787 static inline bool mt_locked(const struct maple_tree *mt) 788 { 789 return mt_external_lock(mt) ? mt_lock_is_held(mt) : 790 lockdep_is_held(&mt->ma_lock); 791 } 792 793 static inline void *mt_slot(const struct maple_tree *mt, 794 void __rcu **slots, unsigned char offset) 795 { 796 return rcu_dereference_check(slots[offset], mt_locked(mt)); 797 } 798 799 static inline void *mt_slot_locked(struct maple_tree *mt, void __rcu **slots, 800 unsigned char offset) 801 { 802 return rcu_dereference_protected(slots[offset], mt_write_locked(mt)); 803 } 804 /* 805 * mas_slot_locked() - Get the slot value when holding the maple tree lock. 806 * @mas: The maple state 807 * @slots: The pointer to the slots 808 * @offset: The offset into the slots array to fetch 809 * 810 * Return: The entry stored in @slots at the @offset. 811 */ 812 static inline void *mas_slot_locked(struct ma_state *mas, void __rcu **slots, 813 unsigned char offset) 814 { 815 return mt_slot_locked(mas->tree, slots, offset); 816 } 817 818 /* 819 * mas_slot() - Get the slot value when not holding the maple tree lock. 820 * @mas: The maple state 821 * @slots: The pointer to the slots 822 * @offset: The offset into the slots array to fetch 823 * 824 * Return: The entry stored in @slots at the @offset 825 */ 826 static inline void *mas_slot(struct ma_state *mas, void __rcu **slots, 827 unsigned char offset) 828 { 829 return mt_slot(mas->tree, slots, offset); 830 } 831 832 /* 833 * mas_root() - Get the maple tree root. 834 * @mas: The maple state. 835 * 836 * Return: The pointer to the root of the tree 837 */ 838 static inline void *mas_root(struct ma_state *mas) 839 { 840 return rcu_dereference_check(mas->tree->ma_root, mt_locked(mas->tree)); 841 } 842 843 static inline void *mt_root_locked(struct maple_tree *mt) 844 { 845 return rcu_dereference_protected(mt->ma_root, mt_write_locked(mt)); 846 } 847 848 /* 849 * mas_root_locked() - Get the maple tree root when holding the maple tree lock. 850 * @mas: The maple state. 851 * 852 * Return: The pointer to the root of the tree 853 */ 854 static inline void *mas_root_locked(struct ma_state *mas) 855 { 856 return mt_root_locked(mas->tree); 857 } 858 859 static inline struct maple_metadata *ma_meta(struct maple_node *mn, 860 enum maple_type mt) 861 { 862 switch (mt) { 863 case maple_arange_64: 864 return &mn->ma64.meta; 865 default: 866 return &mn->mr64.meta; 867 } 868 } 869 870 /* 871 * ma_set_meta() - Set the metadata information of a node. 872 * @mn: The maple node 873 * @mt: The maple node type 874 * @offset: The offset of the highest sub-gap in this node. 875 * @end: The end of the data in this node. 876 */ 877 static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt, 878 unsigned char offset, unsigned char end) 879 { 880 struct maple_metadata *meta = ma_meta(mn, mt); 881 882 meta->gap = offset; 883 meta->end = end; 884 } 885 886 /* 887 * mt_clear_meta() - clear the metadata information of a node, if it exists 888 * @mt: The maple tree 889 * @mn: The maple node 890 * @type: The maple node type 891 * @offset: The offset of the highest sub-gap in this node. 892 * @end: The end of the data in this node. 893 */ 894 static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn, 895 enum maple_type type) 896 { 897 struct maple_metadata *meta; 898 unsigned long *pivots; 899 void __rcu **slots; 900 void *next; 901 902 switch (type) { 903 case maple_range_64: 904 pivots = mn->mr64.pivot; 905 if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) { 906 slots = mn->mr64.slot; 907 next = mt_slot_locked(mt, slots, 908 MAPLE_RANGE64_SLOTS - 1); 909 if (unlikely((mte_to_node(next) && 910 mte_node_type(next)))) 911 return; /* no metadata, could be node */ 912 } 913 fallthrough; 914 case maple_arange_64: 915 meta = ma_meta(mn, type); 916 break; 917 default: 918 return; 919 } 920 921 meta->gap = 0; 922 meta->end = 0; 923 } 924 925 /* 926 * ma_meta_end() - Get the data end of a node from the metadata 927 * @mn: The maple node 928 * @mt: The maple node type 929 */ 930 static inline unsigned char ma_meta_end(struct maple_node *mn, 931 enum maple_type mt) 932 { 933 struct maple_metadata *meta = ma_meta(mn, mt); 934 935 return meta->end; 936 } 937 938 /* 939 * ma_meta_gap() - Get the largest gap location of a node from the metadata 940 * @mn: The maple node 941 * @mt: The maple node type 942 */ 943 static inline unsigned char ma_meta_gap(struct maple_node *mn, 944 enum maple_type mt) 945 { 946 return mn->ma64.meta.gap; 947 } 948 949 /* 950 * ma_set_meta_gap() - Set the largest gap location in a nodes metadata 951 * @mn: The maple node 952 * @mn: The maple node type 953 * @offset: The location of the largest gap. 954 */ 955 static inline void ma_set_meta_gap(struct maple_node *mn, enum maple_type mt, 956 unsigned char offset) 957 { 958 959 struct maple_metadata *meta = ma_meta(mn, mt); 960 961 meta->gap = offset; 962 } 963 964 /* 965 * mat_add() - Add a @dead_enode to the ma_topiary of a list of dead nodes. 966 * @mat - the ma_topiary, a linked list of dead nodes. 967 * @dead_enode - the node to be marked as dead and added to the tail of the list 968 * 969 * Add the @dead_enode to the linked list in @mat. 970 */ 971 static inline void mat_add(struct ma_topiary *mat, 972 struct maple_enode *dead_enode) 973 { 974 mte_set_node_dead(dead_enode); 975 mte_to_mat(dead_enode)->next = NULL; 976 if (!mat->tail) { 977 mat->tail = mat->head = dead_enode; 978 return; 979 } 980 981 mte_to_mat(mat->tail)->next = dead_enode; 982 mat->tail = dead_enode; 983 } 984 985 static void mte_destroy_walk(struct maple_enode *, struct maple_tree *); 986 static inline void mas_free(struct ma_state *mas, struct maple_enode *used); 987 988 /* 989 * mas_mat_free() - Free all nodes in a dead list. 990 * @mas - the maple state 991 * @mat - the ma_topiary linked list of dead nodes to free. 992 * 993 * Free walk a dead list. 994 */ 995 static void mas_mat_free(struct ma_state *mas, struct ma_topiary *mat) 996 { 997 struct maple_enode *next; 998 999 while (mat->head) { 1000 next = mte_to_mat(mat->head)->next; 1001 mas_free(mas, mat->head); 1002 mat->head = next; 1003 } 1004 } 1005 1006 /* 1007 * mas_mat_destroy() - Free all nodes and subtrees in a dead list. 1008 * @mas - the maple state 1009 * @mat - the ma_topiary linked list of dead nodes to free. 1010 * 1011 * Destroy walk a dead list. 1012 */ 1013 static void mas_mat_destroy(struct ma_state *mas, struct ma_topiary *mat) 1014 { 1015 struct maple_enode *next; 1016 1017 while (mat->head) { 1018 next = mte_to_mat(mat->head)->next; 1019 mte_destroy_walk(mat->head, mat->mtree); 1020 mat->head = next; 1021 } 1022 } 1023 /* 1024 * mas_descend() - Descend into the slot stored in the ma_state. 1025 * @mas - the maple state. 1026 * 1027 * Note: Not RCU safe, only use in write side or debug code. 1028 */ 1029 static inline void mas_descend(struct ma_state *mas) 1030 { 1031 enum maple_type type; 1032 unsigned long *pivots; 1033 struct maple_node *node; 1034 void __rcu **slots; 1035 1036 node = mas_mn(mas); 1037 type = mte_node_type(mas->node); 1038 pivots = ma_pivots(node, type); 1039 slots = ma_slots(node, type); 1040 1041 if (mas->offset) 1042 mas->min = pivots[mas->offset - 1] + 1; 1043 mas->max = mas_safe_pivot(mas, pivots, mas->offset, type); 1044 mas->node = mas_slot(mas, slots, mas->offset); 1045 } 1046 1047 /* 1048 * mte_set_gap() - Set a maple node gap. 1049 * @mn: The encoded maple node 1050 * @gap: The offset of the gap to set 1051 * @val: The gap value 1052 */ 1053 static inline void mte_set_gap(const struct maple_enode *mn, 1054 unsigned char gap, unsigned long val) 1055 { 1056 switch (mte_node_type(mn)) { 1057 default: 1058 break; 1059 case maple_arange_64: 1060 mte_to_node(mn)->ma64.gap[gap] = val; 1061 break; 1062 } 1063 } 1064 1065 /* 1066 * mas_ascend() - Walk up a level of the tree. 1067 * @mas: The maple state 1068 * 1069 * Sets the @mas->max and @mas->min to the correct values when walking up. This 1070 * may cause several levels of walking up to find the correct min and max. 1071 * May find a dead node which will cause a premature return. 1072 * Return: 1 on dead node, 0 otherwise 1073 */ 1074 static int mas_ascend(struct ma_state *mas) 1075 { 1076 struct maple_enode *p_enode; /* parent enode. */ 1077 struct maple_enode *a_enode; /* ancestor enode. */ 1078 struct maple_node *a_node; /* ancestor node. */ 1079 struct maple_node *p_node; /* parent node. */ 1080 unsigned char a_slot; 1081 enum maple_type a_type; 1082 unsigned long min, max; 1083 unsigned long *pivots; 1084 bool set_max = false, set_min = false; 1085 1086 a_node = mas_mn(mas); 1087 if (ma_is_root(a_node)) { 1088 mas->offset = 0; 1089 return 0; 1090 } 1091 1092 p_node = mte_parent(mas->node); 1093 if (unlikely(a_node == p_node)) 1094 return 1; 1095 1096 a_type = mas_parent_type(mas, mas->node); 1097 mas->offset = mte_parent_slot(mas->node); 1098 a_enode = mt_mk_node(p_node, a_type); 1099 1100 /* Check to make sure all parent information is still accurate */ 1101 if (p_node != mte_parent(mas->node)) 1102 return 1; 1103 1104 mas->node = a_enode; 1105 1106 if (mte_is_root(a_enode)) { 1107 mas->max = ULONG_MAX; 1108 mas->min = 0; 1109 return 0; 1110 } 1111 1112 if (!mas->min) 1113 set_min = true; 1114 1115 if (mas->max == ULONG_MAX) 1116 set_max = true; 1117 1118 min = 0; 1119 max = ULONG_MAX; 1120 do { 1121 p_enode = a_enode; 1122 a_type = mas_parent_type(mas, p_enode); 1123 a_node = mte_parent(p_enode); 1124 a_slot = mte_parent_slot(p_enode); 1125 a_enode = mt_mk_node(a_node, a_type); 1126 pivots = ma_pivots(a_node, a_type); 1127 1128 if (unlikely(ma_dead_node(a_node))) 1129 return 1; 1130 1131 if (!set_min && a_slot) { 1132 set_min = true; 1133 min = pivots[a_slot - 1] + 1; 1134 } 1135 1136 if (!set_max && a_slot < mt_pivots[a_type]) { 1137 set_max = true; 1138 max = pivots[a_slot]; 1139 } 1140 1141 if (unlikely(ma_dead_node(a_node))) 1142 return 1; 1143 1144 if (unlikely(ma_is_root(a_node))) 1145 break; 1146 1147 } while (!set_min || !set_max); 1148 1149 mas->max = max; 1150 mas->min = min; 1151 return 0; 1152 } 1153 1154 /* 1155 * mas_pop_node() - Get a previously allocated maple node from the maple state. 1156 * @mas: The maple state 1157 * 1158 * Return: A pointer to a maple node. 1159 */ 1160 static inline struct maple_node *mas_pop_node(struct ma_state *mas) 1161 { 1162 struct maple_alloc *ret, *node = mas->alloc; 1163 unsigned long total = mas_allocated(mas); 1164 unsigned int req = mas_alloc_req(mas); 1165 1166 /* nothing or a request pending. */ 1167 if (WARN_ON(!total)) 1168 return NULL; 1169 1170 if (total == 1) { 1171 /* single allocation in this ma_state */ 1172 mas->alloc = NULL; 1173 ret = node; 1174 goto single_node; 1175 } 1176 1177 if (node->node_count == 1) { 1178 /* Single allocation in this node. */ 1179 mas->alloc = node->slot[0]; 1180 mas->alloc->total = node->total - 1; 1181 ret = node; 1182 goto new_head; 1183 } 1184 node->total--; 1185 ret = node->slot[--node->node_count]; 1186 node->slot[node->node_count] = NULL; 1187 1188 single_node: 1189 new_head: 1190 if (req) { 1191 req++; 1192 mas_set_alloc_req(mas, req); 1193 } 1194 1195 memset(ret, 0, sizeof(*ret)); 1196 return (struct maple_node *)ret; 1197 } 1198 1199 /* 1200 * mas_push_node() - Push a node back on the maple state allocation. 1201 * @mas: The maple state 1202 * @used: The used maple node 1203 * 1204 * Stores the maple node back into @mas->alloc for reuse. Updates allocated and 1205 * requested node count as necessary. 1206 */ 1207 static inline void mas_push_node(struct ma_state *mas, struct maple_node *used) 1208 { 1209 struct maple_alloc *reuse = (struct maple_alloc *)used; 1210 struct maple_alloc *head = mas->alloc; 1211 unsigned long count; 1212 unsigned int requested = mas_alloc_req(mas); 1213 1214 count = mas_allocated(mas); 1215 1216 reuse->request_count = 0; 1217 reuse->node_count = 0; 1218 if (count && (head->node_count < MAPLE_ALLOC_SLOTS)) { 1219 head->slot[head->node_count++] = reuse; 1220 head->total++; 1221 goto done; 1222 } 1223 1224 reuse->total = 1; 1225 if ((head) && !((unsigned long)head & 0x1)) { 1226 reuse->slot[0] = head; 1227 reuse->node_count = 1; 1228 reuse->total += head->total; 1229 } 1230 1231 mas->alloc = reuse; 1232 done: 1233 if (requested > 1) 1234 mas_set_alloc_req(mas, requested - 1); 1235 } 1236 1237 /* 1238 * mas_alloc_nodes() - Allocate nodes into a maple state 1239 * @mas: The maple state 1240 * @gfp: The GFP Flags 1241 */ 1242 static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp) 1243 { 1244 struct maple_alloc *node; 1245 unsigned long allocated = mas_allocated(mas); 1246 unsigned int requested = mas_alloc_req(mas); 1247 unsigned int count; 1248 void **slots = NULL; 1249 unsigned int max_req = 0; 1250 1251 if (!requested) 1252 return; 1253 1254 mas_set_alloc_req(mas, 0); 1255 if (mas->mas_flags & MA_STATE_PREALLOC) { 1256 if (allocated) 1257 return; 1258 WARN_ON(!allocated); 1259 } 1260 1261 if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS) { 1262 node = (struct maple_alloc *)mt_alloc_one(gfp); 1263 if (!node) 1264 goto nomem_one; 1265 1266 if (allocated) { 1267 node->slot[0] = mas->alloc; 1268 node->node_count = 1; 1269 } else { 1270 node->node_count = 0; 1271 } 1272 1273 mas->alloc = node; 1274 node->total = ++allocated; 1275 requested--; 1276 } 1277 1278 node = mas->alloc; 1279 node->request_count = 0; 1280 while (requested) { 1281 max_req = MAPLE_ALLOC_SLOTS - node->node_count; 1282 slots = (void **)&node->slot[node->node_count]; 1283 max_req = min(requested, max_req); 1284 count = mt_alloc_bulk(gfp, max_req, slots); 1285 if (!count) 1286 goto nomem_bulk; 1287 1288 if (node->node_count == 0) { 1289 node->slot[0]->node_count = 0; 1290 node->slot[0]->request_count = 0; 1291 } 1292 1293 node->node_count += count; 1294 allocated += count; 1295 node = node->slot[0]; 1296 requested -= count; 1297 } 1298 mas->alloc->total = allocated; 1299 return; 1300 1301 nomem_bulk: 1302 /* Clean up potential freed allocations on bulk failure */ 1303 memset(slots, 0, max_req * sizeof(unsigned long)); 1304 nomem_one: 1305 mas_set_alloc_req(mas, requested); 1306 if (mas->alloc && !(((unsigned long)mas->alloc & 0x1))) 1307 mas->alloc->total = allocated; 1308 mas_set_err(mas, -ENOMEM); 1309 } 1310 1311 /* 1312 * mas_free() - Free an encoded maple node 1313 * @mas: The maple state 1314 * @used: The encoded maple node to free. 1315 * 1316 * Uses rcu free if necessary, pushes @used back on the maple state allocations 1317 * otherwise. 1318 */ 1319 static inline void mas_free(struct ma_state *mas, struct maple_enode *used) 1320 { 1321 struct maple_node *tmp = mte_to_node(used); 1322 1323 if (mt_in_rcu(mas->tree)) 1324 ma_free_rcu(tmp); 1325 else 1326 mas_push_node(mas, tmp); 1327 } 1328 1329 /* 1330 * mas_node_count() - Check if enough nodes are allocated and request more if 1331 * there is not enough nodes. 1332 * @mas: The maple state 1333 * @count: The number of nodes needed 1334 * @gfp: the gfp flags 1335 */ 1336 static void mas_node_count_gfp(struct ma_state *mas, int count, gfp_t gfp) 1337 { 1338 unsigned long allocated = mas_allocated(mas); 1339 1340 if (allocated < count) { 1341 mas_set_alloc_req(mas, count - allocated); 1342 mas_alloc_nodes(mas, gfp); 1343 } 1344 } 1345 1346 /* 1347 * mas_node_count() - Check if enough nodes are allocated and request more if 1348 * there is not enough nodes. 1349 * @mas: The maple state 1350 * @count: The number of nodes needed 1351 * 1352 * Note: Uses GFP_NOWAIT | __GFP_NOWARN for gfp flags. 1353 */ 1354 static void mas_node_count(struct ma_state *mas, int count) 1355 { 1356 return mas_node_count_gfp(mas, count, GFP_NOWAIT | __GFP_NOWARN); 1357 } 1358 1359 /* 1360 * mas_start() - Sets up maple state for operations. 1361 * @mas: The maple state. 1362 * 1363 * If mas->node == MAS_START, then set the min, max and depth to 1364 * defaults. 1365 * 1366 * Return: 1367 * - If mas->node is an error or not MAS_START, return NULL. 1368 * - If it's an empty tree: NULL & mas->node == MAS_NONE 1369 * - If it's a single entry: The entry & mas->node == MAS_ROOT 1370 * - If it's a tree: NULL & mas->node == safe root node. 1371 */ 1372 static inline struct maple_enode *mas_start(struct ma_state *mas) 1373 { 1374 if (likely(mas_is_start(mas))) { 1375 struct maple_enode *root; 1376 1377 mas->min = 0; 1378 mas->max = ULONG_MAX; 1379 1380 retry: 1381 mas->depth = 0; 1382 root = mas_root(mas); 1383 /* Tree with nodes */ 1384 if (likely(xa_is_node(root))) { 1385 mas->depth = 1; 1386 mas->node = mte_safe_root(root); 1387 mas->offset = 0; 1388 if (mte_dead_node(mas->node)) 1389 goto retry; 1390 1391 return NULL; 1392 } 1393 1394 /* empty tree */ 1395 if (unlikely(!root)) { 1396 mas->node = MAS_NONE; 1397 mas->offset = MAPLE_NODE_SLOTS; 1398 return NULL; 1399 } 1400 1401 /* Single entry tree */ 1402 mas->node = MAS_ROOT; 1403 mas->offset = MAPLE_NODE_SLOTS; 1404 1405 /* Single entry tree. */ 1406 if (mas->index > 0) 1407 return NULL; 1408 1409 return root; 1410 } 1411 1412 return NULL; 1413 } 1414 1415 /* 1416 * ma_data_end() - Find the end of the data in a node. 1417 * @node: The maple node 1418 * @type: The maple node type 1419 * @pivots: The array of pivots in the node 1420 * @max: The maximum value in the node 1421 * 1422 * Uses metadata to find the end of the data when possible. 1423 * Return: The zero indexed last slot with data (may be null). 1424 */ 1425 static inline unsigned char ma_data_end(struct maple_node *node, 1426 enum maple_type type, 1427 unsigned long *pivots, 1428 unsigned long max) 1429 { 1430 unsigned char offset; 1431 1432 if (!pivots) 1433 return 0; 1434 1435 if (type == maple_arange_64) 1436 return ma_meta_end(node, type); 1437 1438 offset = mt_pivots[type] - 1; 1439 if (likely(!pivots[offset])) 1440 return ma_meta_end(node, type); 1441 1442 if (likely(pivots[offset] == max)) 1443 return offset; 1444 1445 return mt_pivots[type]; 1446 } 1447 1448 /* 1449 * mas_data_end() - Find the end of the data (slot). 1450 * @mas: the maple state 1451 * 1452 * This method is optimized to check the metadata of a node if the node type 1453 * supports data end metadata. 1454 * 1455 * Return: The zero indexed last slot with data (may be null). 1456 */ 1457 static inline unsigned char mas_data_end(struct ma_state *mas) 1458 { 1459 enum maple_type type; 1460 struct maple_node *node; 1461 unsigned char offset; 1462 unsigned long *pivots; 1463 1464 type = mte_node_type(mas->node); 1465 node = mas_mn(mas); 1466 if (type == maple_arange_64) 1467 return ma_meta_end(node, type); 1468 1469 pivots = ma_pivots(node, type); 1470 if (unlikely(ma_dead_node(node))) 1471 return 0; 1472 1473 offset = mt_pivots[type] - 1; 1474 if (likely(!pivots[offset])) 1475 return ma_meta_end(node, type); 1476 1477 if (likely(pivots[offset] == mas->max)) 1478 return offset; 1479 1480 return mt_pivots[type]; 1481 } 1482 1483 /* 1484 * mas_leaf_max_gap() - Returns the largest gap in a leaf node 1485 * @mas - the maple state 1486 * 1487 * Return: The maximum gap in the leaf. 1488 */ 1489 static unsigned long mas_leaf_max_gap(struct ma_state *mas) 1490 { 1491 enum maple_type mt; 1492 unsigned long pstart, gap, max_gap; 1493 struct maple_node *mn; 1494 unsigned long *pivots; 1495 void __rcu **slots; 1496 unsigned char i; 1497 unsigned char max_piv; 1498 1499 mt = mte_node_type(mas->node); 1500 mn = mas_mn(mas); 1501 slots = ma_slots(mn, mt); 1502 max_gap = 0; 1503 if (unlikely(ma_is_dense(mt))) { 1504 gap = 0; 1505 for (i = 0; i < mt_slots[mt]; i++) { 1506 if (slots[i]) { 1507 if (gap > max_gap) 1508 max_gap = gap; 1509 gap = 0; 1510 } else { 1511 gap++; 1512 } 1513 } 1514 if (gap > max_gap) 1515 max_gap = gap; 1516 return max_gap; 1517 } 1518 1519 /* 1520 * Check the first implied pivot optimizes the loop below and slot 1 may 1521 * be skipped if there is a gap in slot 0. 1522 */ 1523 pivots = ma_pivots(mn, mt); 1524 if (likely(!slots[0])) { 1525 max_gap = pivots[0] - mas->min + 1; 1526 i = 2; 1527 } else { 1528 i = 1; 1529 } 1530 1531 /* reduce max_piv as the special case is checked before the loop */ 1532 max_piv = ma_data_end(mn, mt, pivots, mas->max) - 1; 1533 /* 1534 * Check end implied pivot which can only be a gap on the right most 1535 * node. 1536 */ 1537 if (unlikely(mas->max == ULONG_MAX) && !slots[max_piv + 1]) { 1538 gap = ULONG_MAX - pivots[max_piv]; 1539 if (gap > max_gap) 1540 max_gap = gap; 1541 } 1542 1543 for (; i <= max_piv; i++) { 1544 /* data == no gap. */ 1545 if (likely(slots[i])) 1546 continue; 1547 1548 pstart = pivots[i - 1]; 1549 gap = pivots[i] - pstart; 1550 if (gap > max_gap) 1551 max_gap = gap; 1552 1553 /* There cannot be two gaps in a row. */ 1554 i++; 1555 } 1556 return max_gap; 1557 } 1558 1559 /* 1560 * ma_max_gap() - Get the maximum gap in a maple node (non-leaf) 1561 * @node: The maple node 1562 * @gaps: The pointer to the gaps 1563 * @mt: The maple node type 1564 * @*off: Pointer to store the offset location of the gap. 1565 * 1566 * Uses the metadata data end to scan backwards across set gaps. 1567 * 1568 * Return: The maximum gap value 1569 */ 1570 static inline unsigned long 1571 ma_max_gap(struct maple_node *node, unsigned long *gaps, enum maple_type mt, 1572 unsigned char *off) 1573 { 1574 unsigned char offset, i; 1575 unsigned long max_gap = 0; 1576 1577 i = offset = ma_meta_end(node, mt); 1578 do { 1579 if (gaps[i] > max_gap) { 1580 max_gap = gaps[i]; 1581 offset = i; 1582 } 1583 } while (i--); 1584 1585 *off = offset; 1586 return max_gap; 1587 } 1588 1589 /* 1590 * mas_max_gap() - find the largest gap in a non-leaf node and set the slot. 1591 * @mas: The maple state. 1592 * 1593 * Return: The gap value. 1594 */ 1595 static inline unsigned long mas_max_gap(struct ma_state *mas) 1596 { 1597 unsigned long *gaps; 1598 unsigned char offset; 1599 enum maple_type mt; 1600 struct maple_node *node; 1601 1602 mt = mte_node_type(mas->node); 1603 if (ma_is_leaf(mt)) 1604 return mas_leaf_max_gap(mas); 1605 1606 node = mas_mn(mas); 1607 MAS_BUG_ON(mas, mt != maple_arange_64); 1608 offset = ma_meta_gap(node, mt); 1609 gaps = ma_gaps(node, mt); 1610 return gaps[offset]; 1611 } 1612 1613 /* 1614 * mas_parent_gap() - Set the parent gap and any gaps above, as needed 1615 * @mas: The maple state 1616 * @offset: The gap offset in the parent to set 1617 * @new: The new gap value. 1618 * 1619 * Set the parent gap then continue to set the gap upwards, using the metadata 1620 * of the parent to see if it is necessary to check the node above. 1621 */ 1622 static inline void mas_parent_gap(struct ma_state *mas, unsigned char offset, 1623 unsigned long new) 1624 { 1625 unsigned long meta_gap = 0; 1626 struct maple_node *pnode; 1627 struct maple_enode *penode; 1628 unsigned long *pgaps; 1629 unsigned char meta_offset; 1630 enum maple_type pmt; 1631 1632 pnode = mte_parent(mas->node); 1633 pmt = mas_parent_type(mas, mas->node); 1634 penode = mt_mk_node(pnode, pmt); 1635 pgaps = ma_gaps(pnode, pmt); 1636 1637 ascend: 1638 MAS_BUG_ON(mas, pmt != maple_arange_64); 1639 meta_offset = ma_meta_gap(pnode, pmt); 1640 meta_gap = pgaps[meta_offset]; 1641 1642 pgaps[offset] = new; 1643 1644 if (meta_gap == new) 1645 return; 1646 1647 if (offset != meta_offset) { 1648 if (meta_gap > new) 1649 return; 1650 1651 ma_set_meta_gap(pnode, pmt, offset); 1652 } else if (new < meta_gap) { 1653 new = ma_max_gap(pnode, pgaps, pmt, &meta_offset); 1654 ma_set_meta_gap(pnode, pmt, meta_offset); 1655 } 1656 1657 if (ma_is_root(pnode)) 1658 return; 1659 1660 /* Go to the parent node. */ 1661 pnode = mte_parent(penode); 1662 pmt = mas_parent_type(mas, penode); 1663 pgaps = ma_gaps(pnode, pmt); 1664 offset = mte_parent_slot(penode); 1665 penode = mt_mk_node(pnode, pmt); 1666 goto ascend; 1667 } 1668 1669 /* 1670 * mas_update_gap() - Update a nodes gaps and propagate up if necessary. 1671 * @mas - the maple state. 1672 */ 1673 static inline void mas_update_gap(struct ma_state *mas) 1674 { 1675 unsigned char pslot; 1676 unsigned long p_gap; 1677 unsigned long max_gap; 1678 1679 if (!mt_is_alloc(mas->tree)) 1680 return; 1681 1682 if (mte_is_root(mas->node)) 1683 return; 1684 1685 max_gap = mas_max_gap(mas); 1686 1687 pslot = mte_parent_slot(mas->node); 1688 p_gap = ma_gaps(mte_parent(mas->node), 1689 mas_parent_type(mas, mas->node))[pslot]; 1690 1691 if (p_gap != max_gap) 1692 mas_parent_gap(mas, pslot, max_gap); 1693 } 1694 1695 /* 1696 * mas_adopt_children() - Set the parent pointer of all nodes in @parent to 1697 * @parent with the slot encoded. 1698 * @mas - the maple state (for the tree) 1699 * @parent - the maple encoded node containing the children. 1700 */ 1701 static inline void mas_adopt_children(struct ma_state *mas, 1702 struct maple_enode *parent) 1703 { 1704 enum maple_type type = mte_node_type(parent); 1705 struct maple_node *node = mas_mn(mas); 1706 void __rcu **slots = ma_slots(node, type); 1707 unsigned long *pivots = ma_pivots(node, type); 1708 struct maple_enode *child; 1709 unsigned char offset; 1710 1711 offset = ma_data_end(node, type, pivots, mas->max); 1712 do { 1713 child = mas_slot_locked(mas, slots, offset); 1714 mas_set_parent(mas, child, parent, offset); 1715 } while (offset--); 1716 } 1717 1718 /* 1719 * mas_put_in_tree() - Put a new node in the tree, smp_wmb(), and mark the old 1720 * node as dead. 1721 * @mas - the maple state with the new node 1722 * @old_enode - The old maple encoded node to replace. 1723 */ 1724 static inline void mas_put_in_tree(struct ma_state *mas, 1725 struct maple_enode *old_enode) 1726 __must_hold(mas->tree->ma_lock) 1727 { 1728 unsigned char offset; 1729 void __rcu **slots; 1730 1731 if (mte_is_root(mas->node)) { 1732 mas_mn(mas)->parent = ma_parent_ptr(mas_tree_parent(mas)); 1733 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 1734 mas_set_height(mas); 1735 } else { 1736 1737 offset = mte_parent_slot(mas->node); 1738 slots = ma_slots(mte_parent(mas->node), 1739 mas_parent_type(mas, mas->node)); 1740 rcu_assign_pointer(slots[offset], mas->node); 1741 } 1742 1743 mte_set_node_dead(old_enode); 1744 } 1745 1746 /* 1747 * mas_replace_node() - Replace a node by putting it in the tree, marking it 1748 * dead, and freeing it. 1749 * the parent encoding to locate the maple node in the tree. 1750 * @mas - the ma_state with @mas->node pointing to the new node. 1751 * @old_enode - The old maple encoded node. 1752 */ 1753 static inline void mas_replace_node(struct ma_state *mas, 1754 struct maple_enode *old_enode) 1755 __must_hold(mas->tree->ma_lock) 1756 { 1757 mas_put_in_tree(mas, old_enode); 1758 mas_free(mas, old_enode); 1759 } 1760 1761 /* 1762 * mas_new_child() - Find the new child of a node. 1763 * @mas: the maple state 1764 * @child: the maple state to store the child. 1765 */ 1766 static inline bool mas_new_child(struct ma_state *mas, struct ma_state *child) 1767 __must_hold(mas->tree->ma_lock) 1768 { 1769 enum maple_type mt; 1770 unsigned char offset; 1771 unsigned char end; 1772 unsigned long *pivots; 1773 struct maple_enode *entry; 1774 struct maple_node *node; 1775 void __rcu **slots; 1776 1777 mt = mte_node_type(mas->node); 1778 node = mas_mn(mas); 1779 slots = ma_slots(node, mt); 1780 pivots = ma_pivots(node, mt); 1781 end = ma_data_end(node, mt, pivots, mas->max); 1782 for (offset = mas->offset; offset <= end; offset++) { 1783 entry = mas_slot_locked(mas, slots, offset); 1784 if (mte_parent(entry) == node) { 1785 *child = *mas; 1786 mas->offset = offset + 1; 1787 child->offset = offset; 1788 mas_descend(child); 1789 child->offset = 0; 1790 return true; 1791 } 1792 } 1793 return false; 1794 } 1795 1796 /* 1797 * mab_shift_right() - Shift the data in mab right. Note, does not clean out the 1798 * old data or set b_node->b_end. 1799 * @b_node: the maple_big_node 1800 * @shift: the shift count 1801 */ 1802 static inline void mab_shift_right(struct maple_big_node *b_node, 1803 unsigned char shift) 1804 { 1805 unsigned long size = b_node->b_end * sizeof(unsigned long); 1806 1807 memmove(b_node->pivot + shift, b_node->pivot, size); 1808 memmove(b_node->slot + shift, b_node->slot, size); 1809 if (b_node->type == maple_arange_64) 1810 memmove(b_node->gap + shift, b_node->gap, size); 1811 } 1812 1813 /* 1814 * mab_middle_node() - Check if a middle node is needed (unlikely) 1815 * @b_node: the maple_big_node that contains the data. 1816 * @size: the amount of data in the b_node 1817 * @split: the potential split location 1818 * @slot_count: the size that can be stored in a single node being considered. 1819 * 1820 * Return: true if a middle node is required. 1821 */ 1822 static inline bool mab_middle_node(struct maple_big_node *b_node, int split, 1823 unsigned char slot_count) 1824 { 1825 unsigned char size = b_node->b_end; 1826 1827 if (size >= 2 * slot_count) 1828 return true; 1829 1830 if (!b_node->slot[split] && (size >= 2 * slot_count - 1)) 1831 return true; 1832 1833 return false; 1834 } 1835 1836 /* 1837 * mab_no_null_split() - ensure the split doesn't fall on a NULL 1838 * @b_node: the maple_big_node with the data 1839 * @split: the suggested split location 1840 * @slot_count: the number of slots in the node being considered. 1841 * 1842 * Return: the split location. 1843 */ 1844 static inline int mab_no_null_split(struct maple_big_node *b_node, 1845 unsigned char split, unsigned char slot_count) 1846 { 1847 if (!b_node->slot[split]) { 1848 /* 1849 * If the split is less than the max slot && the right side will 1850 * still be sufficient, then increment the split on NULL. 1851 */ 1852 if ((split < slot_count - 1) && 1853 (b_node->b_end - split) > (mt_min_slots[b_node->type])) 1854 split++; 1855 else 1856 split--; 1857 } 1858 return split; 1859 } 1860 1861 /* 1862 * mab_calc_split() - Calculate the split location and if there needs to be two 1863 * splits. 1864 * @bn: The maple_big_node with the data 1865 * @mid_split: The second split, if required. 0 otherwise. 1866 * 1867 * Return: The first split location. The middle split is set in @mid_split. 1868 */ 1869 static inline int mab_calc_split(struct ma_state *mas, 1870 struct maple_big_node *bn, unsigned char *mid_split, unsigned long min) 1871 { 1872 unsigned char b_end = bn->b_end; 1873 int split = b_end / 2; /* Assume equal split. */ 1874 unsigned char slot_min, slot_count = mt_slots[bn->type]; 1875 1876 /* 1877 * To support gap tracking, all NULL entries are kept together and a node cannot 1878 * end on a NULL entry, with the exception of the left-most leaf. The 1879 * limitation means that the split of a node must be checked for this condition 1880 * and be able to put more data in one direction or the other. 1881 */ 1882 if (unlikely((mas->mas_flags & MA_STATE_BULK))) { 1883 *mid_split = 0; 1884 split = b_end - mt_min_slots[bn->type]; 1885 1886 if (!ma_is_leaf(bn->type)) 1887 return split; 1888 1889 mas->mas_flags |= MA_STATE_REBALANCE; 1890 if (!bn->slot[split]) 1891 split--; 1892 return split; 1893 } 1894 1895 /* 1896 * Although extremely rare, it is possible to enter what is known as the 3-way 1897 * split scenario. The 3-way split comes about by means of a store of a range 1898 * that overwrites the end and beginning of two full nodes. The result is a set 1899 * of entries that cannot be stored in 2 nodes. Sometimes, these two nodes can 1900 * also be located in different parent nodes which are also full. This can 1901 * carry upwards all the way to the root in the worst case. 1902 */ 1903 if (unlikely(mab_middle_node(bn, split, slot_count))) { 1904 split = b_end / 3; 1905 *mid_split = split * 2; 1906 } else { 1907 slot_min = mt_min_slots[bn->type]; 1908 1909 *mid_split = 0; 1910 /* 1911 * Avoid having a range less than the slot count unless it 1912 * causes one node to be deficient. 1913 * NOTE: mt_min_slots is 1 based, b_end and split are zero. 1914 */ 1915 while ((split < slot_count - 1) && 1916 ((bn->pivot[split] - min) < slot_count - 1) && 1917 (b_end - split > slot_min)) 1918 split++; 1919 } 1920 1921 /* Avoid ending a node on a NULL entry */ 1922 split = mab_no_null_split(bn, split, slot_count); 1923 1924 if (unlikely(*mid_split)) 1925 *mid_split = mab_no_null_split(bn, *mid_split, slot_count); 1926 1927 return split; 1928 } 1929 1930 /* 1931 * mas_mab_cp() - Copy data from a maple state inclusively to a maple_big_node 1932 * and set @b_node->b_end to the next free slot. 1933 * @mas: The maple state 1934 * @mas_start: The starting slot to copy 1935 * @mas_end: The end slot to copy (inclusively) 1936 * @b_node: The maple_big_node to place the data 1937 * @mab_start: The starting location in maple_big_node to store the data. 1938 */ 1939 static inline void mas_mab_cp(struct ma_state *mas, unsigned char mas_start, 1940 unsigned char mas_end, struct maple_big_node *b_node, 1941 unsigned char mab_start) 1942 { 1943 enum maple_type mt; 1944 struct maple_node *node; 1945 void __rcu **slots; 1946 unsigned long *pivots, *gaps; 1947 int i = mas_start, j = mab_start; 1948 unsigned char piv_end; 1949 1950 node = mas_mn(mas); 1951 mt = mte_node_type(mas->node); 1952 pivots = ma_pivots(node, mt); 1953 if (!i) { 1954 b_node->pivot[j] = pivots[i++]; 1955 if (unlikely(i > mas_end)) 1956 goto complete; 1957 j++; 1958 } 1959 1960 piv_end = min(mas_end, mt_pivots[mt]); 1961 for (; i < piv_end; i++, j++) { 1962 b_node->pivot[j] = pivots[i]; 1963 if (unlikely(!b_node->pivot[j])) 1964 break; 1965 1966 if (unlikely(mas->max == b_node->pivot[j])) 1967 goto complete; 1968 } 1969 1970 if (likely(i <= mas_end)) 1971 b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt); 1972 1973 complete: 1974 b_node->b_end = ++j; 1975 j -= mab_start; 1976 slots = ma_slots(node, mt); 1977 memcpy(b_node->slot + mab_start, slots + mas_start, sizeof(void *) * j); 1978 if (!ma_is_leaf(mt) && mt_is_alloc(mas->tree)) { 1979 gaps = ma_gaps(node, mt); 1980 memcpy(b_node->gap + mab_start, gaps + mas_start, 1981 sizeof(unsigned long) * j); 1982 } 1983 } 1984 1985 /* 1986 * mas_leaf_set_meta() - Set the metadata of a leaf if possible. 1987 * @mas: The maple state 1988 * @node: The maple node 1989 * @pivots: pointer to the maple node pivots 1990 * @mt: The maple type 1991 * @end: The assumed end 1992 * 1993 * Note, end may be incremented within this function but not modified at the 1994 * source. This is fine since the metadata is the last thing to be stored in a 1995 * node during a write. 1996 */ 1997 static inline void mas_leaf_set_meta(struct ma_state *mas, 1998 struct maple_node *node, unsigned long *pivots, 1999 enum maple_type mt, unsigned char end) 2000 { 2001 /* There is no room for metadata already */ 2002 if (mt_pivots[mt] <= end) 2003 return; 2004 2005 if (pivots[end] && pivots[end] < mas->max) 2006 end++; 2007 2008 if (end < mt_slots[mt] - 1) 2009 ma_set_meta(node, mt, 0, end); 2010 } 2011 2012 /* 2013 * mab_mas_cp() - Copy data from maple_big_node to a maple encoded node. 2014 * @b_node: the maple_big_node that has the data 2015 * @mab_start: the start location in @b_node. 2016 * @mab_end: The end location in @b_node (inclusively) 2017 * @mas: The maple state with the maple encoded node. 2018 */ 2019 static inline void mab_mas_cp(struct maple_big_node *b_node, 2020 unsigned char mab_start, unsigned char mab_end, 2021 struct ma_state *mas, bool new_max) 2022 { 2023 int i, j = 0; 2024 enum maple_type mt = mte_node_type(mas->node); 2025 struct maple_node *node = mte_to_node(mas->node); 2026 void __rcu **slots = ma_slots(node, mt); 2027 unsigned long *pivots = ma_pivots(node, mt); 2028 unsigned long *gaps = NULL; 2029 unsigned char end; 2030 2031 if (mab_end - mab_start > mt_pivots[mt]) 2032 mab_end--; 2033 2034 if (!pivots[mt_pivots[mt] - 1]) 2035 slots[mt_pivots[mt]] = NULL; 2036 2037 i = mab_start; 2038 do { 2039 pivots[j++] = b_node->pivot[i++]; 2040 } while (i <= mab_end && likely(b_node->pivot[i])); 2041 2042 memcpy(slots, b_node->slot + mab_start, 2043 sizeof(void *) * (i - mab_start)); 2044 2045 if (new_max) 2046 mas->max = b_node->pivot[i - 1]; 2047 2048 end = j - 1; 2049 if (likely(!ma_is_leaf(mt) && mt_is_alloc(mas->tree))) { 2050 unsigned long max_gap = 0; 2051 unsigned char offset = 0; 2052 2053 gaps = ma_gaps(node, mt); 2054 do { 2055 gaps[--j] = b_node->gap[--i]; 2056 if (gaps[j] > max_gap) { 2057 offset = j; 2058 max_gap = gaps[j]; 2059 } 2060 } while (j); 2061 2062 ma_set_meta(node, mt, offset, end); 2063 } else { 2064 mas_leaf_set_meta(mas, node, pivots, mt, end); 2065 } 2066 } 2067 2068 /* 2069 * mas_descend_adopt() - Descend through a sub-tree and adopt children. 2070 * @mas: the maple state with the maple encoded node of the sub-tree. 2071 * 2072 * Descend through a sub-tree and adopt children who do not have the correct 2073 * parents set. Follow the parents which have the correct parents as they are 2074 * the new entries which need to be followed to find other incorrectly set 2075 * parents. 2076 */ 2077 static inline void mas_descend_adopt(struct ma_state *mas) 2078 { 2079 struct ma_state list[3], next[3]; 2080 int i, n; 2081 2082 /* 2083 * At each level there may be up to 3 correct parent pointers which indicates 2084 * the new nodes which need to be walked to find any new nodes at a lower level. 2085 */ 2086 2087 for (i = 0; i < 3; i++) { 2088 list[i] = *mas; 2089 list[i].offset = 0; 2090 next[i].offset = 0; 2091 } 2092 next[0] = *mas; 2093 2094 while (!mte_is_leaf(list[0].node)) { 2095 n = 0; 2096 for (i = 0; i < 3; i++) { 2097 if (mas_is_none(&list[i])) 2098 continue; 2099 2100 if (i && list[i-1].node == list[i].node) 2101 continue; 2102 2103 while ((n < 3) && (mas_new_child(&list[i], &next[n]))) 2104 n++; 2105 2106 mas_adopt_children(&list[i], list[i].node); 2107 } 2108 2109 while (n < 3) 2110 next[n++].node = MAS_NONE; 2111 2112 /* descend by setting the list to the children */ 2113 for (i = 0; i < 3; i++) 2114 list[i] = next[i]; 2115 } 2116 } 2117 2118 /* 2119 * mas_bulk_rebalance() - Rebalance the end of a tree after a bulk insert. 2120 * @mas: The maple state 2121 * @end: The maple node end 2122 * @mt: The maple node type 2123 */ 2124 static inline void mas_bulk_rebalance(struct ma_state *mas, unsigned char end, 2125 enum maple_type mt) 2126 { 2127 if (!(mas->mas_flags & MA_STATE_BULK)) 2128 return; 2129 2130 if (mte_is_root(mas->node)) 2131 return; 2132 2133 if (end > mt_min_slots[mt]) { 2134 mas->mas_flags &= ~MA_STATE_REBALANCE; 2135 return; 2136 } 2137 } 2138 2139 /* 2140 * mas_store_b_node() - Store an @entry into the b_node while also copying the 2141 * data from a maple encoded node. 2142 * @wr_mas: the maple write state 2143 * @b_node: the maple_big_node to fill with data 2144 * @offset_end: the offset to end copying 2145 * 2146 * Return: The actual end of the data stored in @b_node 2147 */ 2148 static noinline_for_kasan void mas_store_b_node(struct ma_wr_state *wr_mas, 2149 struct maple_big_node *b_node, unsigned char offset_end) 2150 { 2151 unsigned char slot; 2152 unsigned char b_end; 2153 /* Possible underflow of piv will wrap back to 0 before use. */ 2154 unsigned long piv; 2155 struct ma_state *mas = wr_mas->mas; 2156 2157 b_node->type = wr_mas->type; 2158 b_end = 0; 2159 slot = mas->offset; 2160 if (slot) { 2161 /* Copy start data up to insert. */ 2162 mas_mab_cp(mas, 0, slot - 1, b_node, 0); 2163 b_end = b_node->b_end; 2164 piv = b_node->pivot[b_end - 1]; 2165 } else 2166 piv = mas->min - 1; 2167 2168 if (piv + 1 < mas->index) { 2169 /* Handle range starting after old range */ 2170 b_node->slot[b_end] = wr_mas->content; 2171 if (!wr_mas->content) 2172 b_node->gap[b_end] = mas->index - 1 - piv; 2173 b_node->pivot[b_end++] = mas->index - 1; 2174 } 2175 2176 /* Store the new entry. */ 2177 mas->offset = b_end; 2178 b_node->slot[b_end] = wr_mas->entry; 2179 b_node->pivot[b_end] = mas->last; 2180 2181 /* Appended. */ 2182 if (mas->last >= mas->max) 2183 goto b_end; 2184 2185 /* Handle new range ending before old range ends */ 2186 piv = mas_safe_pivot(mas, wr_mas->pivots, offset_end, wr_mas->type); 2187 if (piv > mas->last) { 2188 if (piv == ULONG_MAX) 2189 mas_bulk_rebalance(mas, b_node->b_end, wr_mas->type); 2190 2191 if (offset_end != slot) 2192 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 2193 offset_end); 2194 2195 b_node->slot[++b_end] = wr_mas->content; 2196 if (!wr_mas->content) 2197 b_node->gap[b_end] = piv - mas->last + 1; 2198 b_node->pivot[b_end] = piv; 2199 } 2200 2201 slot = offset_end + 1; 2202 if (slot > wr_mas->node_end) 2203 goto b_end; 2204 2205 /* Copy end data to the end of the node. */ 2206 mas_mab_cp(mas, slot, wr_mas->node_end + 1, b_node, ++b_end); 2207 b_node->b_end--; 2208 return; 2209 2210 b_end: 2211 b_node->b_end = b_end; 2212 } 2213 2214 /* 2215 * mas_prev_sibling() - Find the previous node with the same parent. 2216 * @mas: the maple state 2217 * 2218 * Return: True if there is a previous sibling, false otherwise. 2219 */ 2220 static inline bool mas_prev_sibling(struct ma_state *mas) 2221 { 2222 unsigned int p_slot = mte_parent_slot(mas->node); 2223 2224 if (mte_is_root(mas->node)) 2225 return false; 2226 2227 if (!p_slot) 2228 return false; 2229 2230 mas_ascend(mas); 2231 mas->offset = p_slot - 1; 2232 mas_descend(mas); 2233 return true; 2234 } 2235 2236 /* 2237 * mas_next_sibling() - Find the next node with the same parent. 2238 * @mas: the maple state 2239 * 2240 * Return: true if there is a next sibling, false otherwise. 2241 */ 2242 static inline bool mas_next_sibling(struct ma_state *mas) 2243 { 2244 MA_STATE(parent, mas->tree, mas->index, mas->last); 2245 2246 if (mte_is_root(mas->node)) 2247 return false; 2248 2249 parent = *mas; 2250 mas_ascend(&parent); 2251 parent.offset = mte_parent_slot(mas->node) + 1; 2252 if (parent.offset > mas_data_end(&parent)) 2253 return false; 2254 2255 *mas = parent; 2256 mas_descend(mas); 2257 return true; 2258 } 2259 2260 /* 2261 * mte_node_or_node() - Return the encoded node or MAS_NONE. 2262 * @enode: The encoded maple node. 2263 * 2264 * Shorthand to avoid setting %NULLs in the tree or maple_subtree_state. 2265 * 2266 * Return: @enode or MAS_NONE 2267 */ 2268 static inline struct maple_enode *mte_node_or_none(struct maple_enode *enode) 2269 { 2270 if (enode) 2271 return enode; 2272 2273 return ma_enode_ptr(MAS_NONE); 2274 } 2275 2276 /* 2277 * mas_wr_node_walk() - Find the correct offset for the index in the @mas. 2278 * @wr_mas: The maple write state 2279 * 2280 * Uses mas_slot_locked() and does not need to worry about dead nodes. 2281 */ 2282 static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas) 2283 { 2284 struct ma_state *mas = wr_mas->mas; 2285 unsigned char count, offset; 2286 2287 if (unlikely(ma_is_dense(wr_mas->type))) { 2288 wr_mas->r_max = wr_mas->r_min = mas->index; 2289 mas->offset = mas->index = mas->min; 2290 return; 2291 } 2292 2293 wr_mas->node = mas_mn(wr_mas->mas); 2294 wr_mas->pivots = ma_pivots(wr_mas->node, wr_mas->type); 2295 count = wr_mas->node_end = ma_data_end(wr_mas->node, wr_mas->type, 2296 wr_mas->pivots, mas->max); 2297 offset = mas->offset; 2298 2299 while (offset < count && mas->index > wr_mas->pivots[offset]) 2300 offset++; 2301 2302 wr_mas->r_max = offset < count ? wr_mas->pivots[offset] : mas->max; 2303 wr_mas->r_min = mas_safe_min(mas, wr_mas->pivots, offset); 2304 wr_mas->offset_end = mas->offset = offset; 2305 } 2306 2307 /* 2308 * mas_topiary_range() - Add a range of slots to the topiary. 2309 * @mas: The maple state 2310 * @destroy: The topiary to add the slots (usually destroy) 2311 * @start: The starting slot inclusively 2312 * @end: The end slot inclusively 2313 */ 2314 static inline void mas_topiary_range(struct ma_state *mas, 2315 struct ma_topiary *destroy, unsigned char start, unsigned char end) 2316 { 2317 void __rcu **slots; 2318 unsigned char offset; 2319 2320 MAS_BUG_ON(mas, mte_is_leaf(mas->node)); 2321 2322 slots = ma_slots(mas_mn(mas), mte_node_type(mas->node)); 2323 for (offset = start; offset <= end; offset++) { 2324 struct maple_enode *enode = mas_slot_locked(mas, slots, offset); 2325 2326 if (mte_dead_node(enode)) 2327 continue; 2328 2329 mat_add(destroy, enode); 2330 } 2331 } 2332 2333 /* 2334 * mast_topiary() - Add the portions of the tree to the removal list; either to 2335 * be freed or discarded (destroy walk). 2336 * @mast: The maple_subtree_state. 2337 */ 2338 static inline void mast_topiary(struct maple_subtree_state *mast) 2339 { 2340 MA_WR_STATE(wr_mas, mast->orig_l, NULL); 2341 unsigned char r_start, r_end; 2342 unsigned char l_start, l_end; 2343 void __rcu **l_slots, **r_slots; 2344 2345 wr_mas.type = mte_node_type(mast->orig_l->node); 2346 mast->orig_l->index = mast->orig_l->last; 2347 mas_wr_node_walk(&wr_mas); 2348 l_start = mast->orig_l->offset + 1; 2349 l_end = mas_data_end(mast->orig_l); 2350 r_start = 0; 2351 r_end = mast->orig_r->offset; 2352 2353 if (r_end) 2354 r_end--; 2355 2356 l_slots = ma_slots(mas_mn(mast->orig_l), 2357 mte_node_type(mast->orig_l->node)); 2358 2359 r_slots = ma_slots(mas_mn(mast->orig_r), 2360 mte_node_type(mast->orig_r->node)); 2361 2362 if ((l_start < l_end) && 2363 mte_dead_node(mas_slot_locked(mast->orig_l, l_slots, l_start))) { 2364 l_start++; 2365 } 2366 2367 if (mte_dead_node(mas_slot_locked(mast->orig_r, r_slots, r_end))) { 2368 if (r_end) 2369 r_end--; 2370 } 2371 2372 if ((l_start > r_end) && (mast->orig_l->node == mast->orig_r->node)) 2373 return; 2374 2375 /* At the node where left and right sides meet, add the parts between */ 2376 if (mast->orig_l->node == mast->orig_r->node) { 2377 return mas_topiary_range(mast->orig_l, mast->destroy, 2378 l_start, r_end); 2379 } 2380 2381 /* mast->orig_r is different and consumed. */ 2382 if (mte_is_leaf(mast->orig_r->node)) 2383 return; 2384 2385 if (mte_dead_node(mas_slot_locked(mast->orig_l, l_slots, l_end))) 2386 l_end--; 2387 2388 2389 if (l_start <= l_end) 2390 mas_topiary_range(mast->orig_l, mast->destroy, l_start, l_end); 2391 2392 if (mte_dead_node(mas_slot_locked(mast->orig_r, r_slots, r_start))) 2393 r_start++; 2394 2395 if (r_start <= r_end) 2396 mas_topiary_range(mast->orig_r, mast->destroy, 0, r_end); 2397 } 2398 2399 /* 2400 * mast_rebalance_next() - Rebalance against the next node 2401 * @mast: The maple subtree state 2402 * @old_r: The encoded maple node to the right (next node). 2403 */ 2404 static inline void mast_rebalance_next(struct maple_subtree_state *mast) 2405 { 2406 unsigned char b_end = mast->bn->b_end; 2407 2408 mas_mab_cp(mast->orig_r, 0, mt_slot_count(mast->orig_r->node), 2409 mast->bn, b_end); 2410 mast->orig_r->last = mast->orig_r->max; 2411 } 2412 2413 /* 2414 * mast_rebalance_prev() - Rebalance against the previous node 2415 * @mast: The maple subtree state 2416 * @old_l: The encoded maple node to the left (previous node) 2417 */ 2418 static inline void mast_rebalance_prev(struct maple_subtree_state *mast) 2419 { 2420 unsigned char end = mas_data_end(mast->orig_l) + 1; 2421 unsigned char b_end = mast->bn->b_end; 2422 2423 mab_shift_right(mast->bn, end); 2424 mas_mab_cp(mast->orig_l, 0, end - 1, mast->bn, 0); 2425 mast->l->min = mast->orig_l->min; 2426 mast->orig_l->index = mast->orig_l->min; 2427 mast->bn->b_end = end + b_end; 2428 mast->l->offset += end; 2429 } 2430 2431 /* 2432 * mast_spanning_rebalance() - Rebalance nodes with nearest neighbour favouring 2433 * the node to the right. Checking the nodes to the right then the left at each 2434 * level upwards until root is reached. Free and destroy as needed. 2435 * Data is copied into the @mast->bn. 2436 * @mast: The maple_subtree_state. 2437 */ 2438 static inline 2439 bool mast_spanning_rebalance(struct maple_subtree_state *mast) 2440 { 2441 struct ma_state r_tmp = *mast->orig_r; 2442 struct ma_state l_tmp = *mast->orig_l; 2443 struct maple_enode *ancestor = NULL; 2444 unsigned char start, end; 2445 unsigned char depth = 0; 2446 2447 r_tmp = *mast->orig_r; 2448 l_tmp = *mast->orig_l; 2449 do { 2450 mas_ascend(mast->orig_r); 2451 mas_ascend(mast->orig_l); 2452 depth++; 2453 if (!ancestor && 2454 (mast->orig_r->node == mast->orig_l->node)) { 2455 ancestor = mast->orig_r->node; 2456 end = mast->orig_r->offset - 1; 2457 start = mast->orig_l->offset + 1; 2458 } 2459 2460 if (mast->orig_r->offset < mas_data_end(mast->orig_r)) { 2461 if (!ancestor) { 2462 ancestor = mast->orig_r->node; 2463 start = 0; 2464 } 2465 2466 mast->orig_r->offset++; 2467 do { 2468 mas_descend(mast->orig_r); 2469 mast->orig_r->offset = 0; 2470 depth--; 2471 } while (depth); 2472 2473 mast_rebalance_next(mast); 2474 do { 2475 unsigned char l_off = 0; 2476 struct maple_enode *child = r_tmp.node; 2477 2478 mas_ascend(&r_tmp); 2479 if (ancestor == r_tmp.node) 2480 l_off = start; 2481 2482 if (r_tmp.offset) 2483 r_tmp.offset--; 2484 2485 if (l_off < r_tmp.offset) 2486 mas_topiary_range(&r_tmp, mast->destroy, 2487 l_off, r_tmp.offset); 2488 2489 if (l_tmp.node != child) 2490 mat_add(mast->free, child); 2491 2492 } while (r_tmp.node != ancestor); 2493 2494 *mast->orig_l = l_tmp; 2495 return true; 2496 2497 } else if (mast->orig_l->offset != 0) { 2498 if (!ancestor) { 2499 ancestor = mast->orig_l->node; 2500 end = mas_data_end(mast->orig_l); 2501 } 2502 2503 mast->orig_l->offset--; 2504 do { 2505 mas_descend(mast->orig_l); 2506 mast->orig_l->offset = 2507 mas_data_end(mast->orig_l); 2508 depth--; 2509 } while (depth); 2510 2511 mast_rebalance_prev(mast); 2512 do { 2513 unsigned char r_off; 2514 struct maple_enode *child = l_tmp.node; 2515 2516 mas_ascend(&l_tmp); 2517 if (ancestor == l_tmp.node) 2518 r_off = end; 2519 else 2520 r_off = mas_data_end(&l_tmp); 2521 2522 if (l_tmp.offset < r_off) 2523 l_tmp.offset++; 2524 2525 if (l_tmp.offset < r_off) 2526 mas_topiary_range(&l_tmp, mast->destroy, 2527 l_tmp.offset, r_off); 2528 2529 if (r_tmp.node != child) 2530 mat_add(mast->free, child); 2531 2532 } while (l_tmp.node != ancestor); 2533 2534 *mast->orig_r = r_tmp; 2535 return true; 2536 } 2537 } while (!mte_is_root(mast->orig_r->node)); 2538 2539 *mast->orig_r = r_tmp; 2540 *mast->orig_l = l_tmp; 2541 return false; 2542 } 2543 2544 /* 2545 * mast_ascend_free() - Add current original maple state nodes to the free list 2546 * and ascend. 2547 * @mast: the maple subtree state. 2548 * 2549 * Ascend the original left and right sides and add the previous nodes to the 2550 * free list. Set the slots to point to the correct location in the new nodes. 2551 */ 2552 static inline void 2553 mast_ascend_free(struct maple_subtree_state *mast) 2554 { 2555 MA_WR_STATE(wr_mas, mast->orig_r, NULL); 2556 struct maple_enode *left = mast->orig_l->node; 2557 struct maple_enode *right = mast->orig_r->node; 2558 2559 mas_ascend(mast->orig_l); 2560 mas_ascend(mast->orig_r); 2561 mat_add(mast->free, left); 2562 2563 if (left != right) 2564 mat_add(mast->free, right); 2565 2566 mast->orig_r->offset = 0; 2567 mast->orig_r->index = mast->r->max; 2568 /* last should be larger than or equal to index */ 2569 if (mast->orig_r->last < mast->orig_r->index) 2570 mast->orig_r->last = mast->orig_r->index; 2571 /* 2572 * The node may not contain the value so set slot to ensure all 2573 * of the nodes contents are freed or destroyed. 2574 */ 2575 wr_mas.type = mte_node_type(mast->orig_r->node); 2576 mas_wr_node_walk(&wr_mas); 2577 /* Set up the left side of things */ 2578 mast->orig_l->offset = 0; 2579 mast->orig_l->index = mast->l->min; 2580 wr_mas.mas = mast->orig_l; 2581 wr_mas.type = mte_node_type(mast->orig_l->node); 2582 mas_wr_node_walk(&wr_mas); 2583 2584 mast->bn->type = wr_mas.type; 2585 } 2586 2587 /* 2588 * mas_new_ma_node() - Create and return a new maple node. Helper function. 2589 * @mas: the maple state with the allocations. 2590 * @b_node: the maple_big_node with the type encoding. 2591 * 2592 * Use the node type from the maple_big_node to allocate a new node from the 2593 * ma_state. This function exists mainly for code readability. 2594 * 2595 * Return: A new maple encoded node 2596 */ 2597 static inline struct maple_enode 2598 *mas_new_ma_node(struct ma_state *mas, struct maple_big_node *b_node) 2599 { 2600 return mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), b_node->type); 2601 } 2602 2603 /* 2604 * mas_mab_to_node() - Set up right and middle nodes 2605 * 2606 * @mas: the maple state that contains the allocations. 2607 * @b_node: the node which contains the data. 2608 * @left: The pointer which will have the left node 2609 * @right: The pointer which may have the right node 2610 * @middle: the pointer which may have the middle node (rare) 2611 * @mid_split: the split location for the middle node 2612 * 2613 * Return: the split of left. 2614 */ 2615 static inline unsigned char mas_mab_to_node(struct ma_state *mas, 2616 struct maple_big_node *b_node, struct maple_enode **left, 2617 struct maple_enode **right, struct maple_enode **middle, 2618 unsigned char *mid_split, unsigned long min) 2619 { 2620 unsigned char split = 0; 2621 unsigned char slot_count = mt_slots[b_node->type]; 2622 2623 *left = mas_new_ma_node(mas, b_node); 2624 *right = NULL; 2625 *middle = NULL; 2626 *mid_split = 0; 2627 2628 if (b_node->b_end < slot_count) { 2629 split = b_node->b_end; 2630 } else { 2631 split = mab_calc_split(mas, b_node, mid_split, min); 2632 *right = mas_new_ma_node(mas, b_node); 2633 } 2634 2635 if (*mid_split) 2636 *middle = mas_new_ma_node(mas, b_node); 2637 2638 return split; 2639 2640 } 2641 2642 /* 2643 * mab_set_b_end() - Add entry to b_node at b_node->b_end and increment the end 2644 * pointer. 2645 * @b_node - the big node to add the entry 2646 * @mas - the maple state to get the pivot (mas->max) 2647 * @entry - the entry to add, if NULL nothing happens. 2648 */ 2649 static inline void mab_set_b_end(struct maple_big_node *b_node, 2650 struct ma_state *mas, 2651 void *entry) 2652 { 2653 if (!entry) 2654 return; 2655 2656 b_node->slot[b_node->b_end] = entry; 2657 if (mt_is_alloc(mas->tree)) 2658 b_node->gap[b_node->b_end] = mas_max_gap(mas); 2659 b_node->pivot[b_node->b_end++] = mas->max; 2660 } 2661 2662 /* 2663 * mas_set_split_parent() - combine_then_separate helper function. Sets the parent 2664 * of @mas->node to either @left or @right, depending on @slot and @split 2665 * 2666 * @mas - the maple state with the node that needs a parent 2667 * @left - possible parent 1 2668 * @right - possible parent 2 2669 * @slot - the slot the mas->node was placed 2670 * @split - the split location between @left and @right 2671 */ 2672 static inline void mas_set_split_parent(struct ma_state *mas, 2673 struct maple_enode *left, 2674 struct maple_enode *right, 2675 unsigned char *slot, unsigned char split) 2676 { 2677 if (mas_is_none(mas)) 2678 return; 2679 2680 if ((*slot) <= split) 2681 mas_set_parent(mas, mas->node, left, *slot); 2682 else if (right) 2683 mas_set_parent(mas, mas->node, right, (*slot) - split - 1); 2684 2685 (*slot)++; 2686 } 2687 2688 /* 2689 * mte_mid_split_check() - Check if the next node passes the mid-split 2690 * @**l: Pointer to left encoded maple node. 2691 * @**m: Pointer to middle encoded maple node. 2692 * @**r: Pointer to right encoded maple node. 2693 * @slot: The offset 2694 * @*split: The split location. 2695 * @mid_split: The middle split. 2696 */ 2697 static inline void mte_mid_split_check(struct maple_enode **l, 2698 struct maple_enode **r, 2699 struct maple_enode *right, 2700 unsigned char slot, 2701 unsigned char *split, 2702 unsigned char mid_split) 2703 { 2704 if (*r == right) 2705 return; 2706 2707 if (slot < mid_split) 2708 return; 2709 2710 *l = *r; 2711 *r = right; 2712 *split = mid_split; 2713 } 2714 2715 /* 2716 * mast_set_split_parents() - Helper function to set three nodes parents. Slot 2717 * is taken from @mast->l. 2718 * @mast - the maple subtree state 2719 * @left - the left node 2720 * @right - the right node 2721 * @split - the split location. 2722 */ 2723 static inline void mast_set_split_parents(struct maple_subtree_state *mast, 2724 struct maple_enode *left, 2725 struct maple_enode *middle, 2726 struct maple_enode *right, 2727 unsigned char split, 2728 unsigned char mid_split) 2729 { 2730 unsigned char slot; 2731 struct maple_enode *l = left; 2732 struct maple_enode *r = right; 2733 2734 if (mas_is_none(mast->l)) 2735 return; 2736 2737 if (middle) 2738 r = middle; 2739 2740 slot = mast->l->offset; 2741 2742 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2743 mas_set_split_parent(mast->l, l, r, &slot, split); 2744 2745 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2746 mas_set_split_parent(mast->m, l, r, &slot, split); 2747 2748 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2749 mas_set_split_parent(mast->r, l, r, &slot, split); 2750 } 2751 2752 /* 2753 * mas_wmb_replace() - Write memory barrier and replace 2754 * @mas: The maple state 2755 * @free: the maple topiary list of nodes to free 2756 * @destroy: The maple topiary list of nodes to destroy (walk and free) 2757 * 2758 * Updates gap as necessary. 2759 */ 2760 static inline void mas_wmb_replace(struct ma_state *mas, 2761 struct ma_topiary *free, 2762 struct ma_topiary *destroy) 2763 { 2764 struct maple_enode *old_enode; 2765 2766 if (mte_is_root(mas->node)) { 2767 old_enode = mas_root_locked(mas); 2768 } else { 2769 unsigned char offset = mte_parent_slot(mas->node); 2770 void __rcu **slots = ma_slots(mte_parent(mas->node), 2771 mas_parent_type(mas, mas->node)); 2772 2773 old_enode = mas_slot_locked(mas, slots, offset); 2774 } 2775 2776 /* Insert the new data in the tree */ 2777 mas_put_in_tree(mas, old_enode); 2778 2779 if (!mte_is_leaf(mas->node)) 2780 mas_descend_adopt(mas); 2781 2782 mas_mat_free(mas, free); 2783 2784 if (destroy) 2785 mas_mat_destroy(mas, destroy); 2786 2787 if (mte_is_leaf(mas->node)) 2788 return; 2789 2790 mas_update_gap(mas); 2791 } 2792 2793 /* 2794 * mast_new_root() - Set a new tree root during subtree creation 2795 * @mast: The maple subtree state 2796 * @mas: The maple state 2797 */ 2798 static inline void mast_new_root(struct maple_subtree_state *mast, 2799 struct ma_state *mas) 2800 { 2801 mas_mn(mast->l)->parent = ma_parent_ptr(mas_tree_parent(mas)); 2802 if (!mte_dead_node(mast->orig_l->node) && 2803 !mte_is_root(mast->orig_l->node)) { 2804 do { 2805 mast_ascend_free(mast); 2806 mast_topiary(mast); 2807 } while (!mte_is_root(mast->orig_l->node)); 2808 } 2809 if ((mast->orig_l->node != mas->node) && 2810 (mast->l->depth > mas_mt_height(mas))) { 2811 mat_add(mast->free, mas->node); 2812 } 2813 } 2814 2815 /* 2816 * mast_cp_to_nodes() - Copy data out to nodes. 2817 * @mast: The maple subtree state 2818 * @left: The left encoded maple node 2819 * @middle: The middle encoded maple node 2820 * @right: The right encoded maple node 2821 * @split: The location to split between left and (middle ? middle : right) 2822 * @mid_split: The location to split between middle and right. 2823 */ 2824 static inline void mast_cp_to_nodes(struct maple_subtree_state *mast, 2825 struct maple_enode *left, struct maple_enode *middle, 2826 struct maple_enode *right, unsigned char split, unsigned char mid_split) 2827 { 2828 bool new_lmax = true; 2829 2830 mast->l->node = mte_node_or_none(left); 2831 mast->m->node = mte_node_or_none(middle); 2832 mast->r->node = mte_node_or_none(right); 2833 2834 mast->l->min = mast->orig_l->min; 2835 if (split == mast->bn->b_end) { 2836 mast->l->max = mast->orig_r->max; 2837 new_lmax = false; 2838 } 2839 2840 mab_mas_cp(mast->bn, 0, split, mast->l, new_lmax); 2841 2842 if (middle) { 2843 mab_mas_cp(mast->bn, 1 + split, mid_split, mast->m, true); 2844 mast->m->min = mast->bn->pivot[split] + 1; 2845 split = mid_split; 2846 } 2847 2848 mast->r->max = mast->orig_r->max; 2849 if (right) { 2850 mab_mas_cp(mast->bn, 1 + split, mast->bn->b_end, mast->r, false); 2851 mast->r->min = mast->bn->pivot[split] + 1; 2852 } 2853 } 2854 2855 /* 2856 * mast_combine_cp_left - Copy in the original left side of the tree into the 2857 * combined data set in the maple subtree state big node. 2858 * @mast: The maple subtree state 2859 */ 2860 static inline void mast_combine_cp_left(struct maple_subtree_state *mast) 2861 { 2862 unsigned char l_slot = mast->orig_l->offset; 2863 2864 if (!l_slot) 2865 return; 2866 2867 mas_mab_cp(mast->orig_l, 0, l_slot - 1, mast->bn, 0); 2868 } 2869 2870 /* 2871 * mast_combine_cp_right: Copy in the original right side of the tree into the 2872 * combined data set in the maple subtree state big node. 2873 * @mast: The maple subtree state 2874 */ 2875 static inline void mast_combine_cp_right(struct maple_subtree_state *mast) 2876 { 2877 if (mast->bn->pivot[mast->bn->b_end - 1] >= mast->orig_r->max) 2878 return; 2879 2880 mas_mab_cp(mast->orig_r, mast->orig_r->offset + 1, 2881 mt_slot_count(mast->orig_r->node), mast->bn, 2882 mast->bn->b_end); 2883 mast->orig_r->last = mast->orig_r->max; 2884 } 2885 2886 /* 2887 * mast_sufficient: Check if the maple subtree state has enough data in the big 2888 * node to create at least one sufficient node 2889 * @mast: the maple subtree state 2890 */ 2891 static inline bool mast_sufficient(struct maple_subtree_state *mast) 2892 { 2893 if (mast->bn->b_end > mt_min_slot_count(mast->orig_l->node)) 2894 return true; 2895 2896 return false; 2897 } 2898 2899 /* 2900 * mast_overflow: Check if there is too much data in the subtree state for a 2901 * single node. 2902 * @mast: The maple subtree state 2903 */ 2904 static inline bool mast_overflow(struct maple_subtree_state *mast) 2905 { 2906 if (mast->bn->b_end >= mt_slot_count(mast->orig_l->node)) 2907 return true; 2908 2909 return false; 2910 } 2911 2912 static inline void *mtree_range_walk(struct ma_state *mas) 2913 { 2914 unsigned long *pivots; 2915 unsigned char offset; 2916 struct maple_node *node; 2917 struct maple_enode *next, *last; 2918 enum maple_type type; 2919 void __rcu **slots; 2920 unsigned char end; 2921 unsigned long max, min; 2922 unsigned long prev_max, prev_min; 2923 2924 next = mas->node; 2925 min = mas->min; 2926 max = mas->max; 2927 do { 2928 offset = 0; 2929 last = next; 2930 node = mte_to_node(next); 2931 type = mte_node_type(next); 2932 pivots = ma_pivots(node, type); 2933 end = ma_data_end(node, type, pivots, max); 2934 if (unlikely(ma_dead_node(node))) 2935 goto dead_node; 2936 2937 if (pivots[offset] >= mas->index) { 2938 prev_max = max; 2939 prev_min = min; 2940 max = pivots[offset]; 2941 goto next; 2942 } 2943 2944 do { 2945 offset++; 2946 } while ((offset < end) && (pivots[offset] < mas->index)); 2947 2948 prev_min = min; 2949 min = pivots[offset - 1] + 1; 2950 prev_max = max; 2951 if (likely(offset < end && pivots[offset])) 2952 max = pivots[offset]; 2953 2954 next: 2955 slots = ma_slots(node, type); 2956 next = mt_slot(mas->tree, slots, offset); 2957 if (unlikely(ma_dead_node(node))) 2958 goto dead_node; 2959 } while (!ma_is_leaf(type)); 2960 2961 mas->offset = offset; 2962 mas->index = min; 2963 mas->last = max; 2964 mas->min = prev_min; 2965 mas->max = prev_max; 2966 mas->node = last; 2967 return (void *)next; 2968 2969 dead_node: 2970 mas_reset(mas); 2971 return NULL; 2972 } 2973 2974 /* 2975 * mas_spanning_rebalance() - Rebalance across two nodes which may not be peers. 2976 * @mas: The starting maple state 2977 * @mast: The maple_subtree_state, keeps track of 4 maple states. 2978 * @count: The estimated count of iterations needed. 2979 * 2980 * Follow the tree upwards from @l_mas and @r_mas for @count, or until the root 2981 * is hit. First @b_node is split into two entries which are inserted into the 2982 * next iteration of the loop. @b_node is returned populated with the final 2983 * iteration. @mas is used to obtain allocations. orig_l_mas keeps track of the 2984 * nodes that will remain active by using orig_l_mas->index and orig_l_mas->last 2985 * to account of what has been copied into the new sub-tree. The update of 2986 * orig_l_mas->last is used in mas_consume to find the slots that will need to 2987 * be either freed or destroyed. orig_l_mas->depth keeps track of the height of 2988 * the new sub-tree in case the sub-tree becomes the full tree. 2989 * 2990 * Return: the number of elements in b_node during the last loop. 2991 */ 2992 static int mas_spanning_rebalance(struct ma_state *mas, 2993 struct maple_subtree_state *mast, unsigned char count) 2994 { 2995 unsigned char split, mid_split; 2996 unsigned char slot = 0; 2997 struct maple_enode *left = NULL, *middle = NULL, *right = NULL; 2998 2999 MA_STATE(l_mas, mas->tree, mas->index, mas->index); 3000 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 3001 MA_STATE(m_mas, mas->tree, mas->index, mas->index); 3002 MA_TOPIARY(free, mas->tree); 3003 MA_TOPIARY(destroy, mas->tree); 3004 3005 /* 3006 * The tree needs to be rebalanced and leaves need to be kept at the same level. 3007 * Rebalancing is done by use of the ``struct maple_topiary``. 3008 */ 3009 mast->l = &l_mas; 3010 mast->m = &m_mas; 3011 mast->r = &r_mas; 3012 mast->free = &free; 3013 mast->destroy = &destroy; 3014 l_mas.node = r_mas.node = m_mas.node = MAS_NONE; 3015 3016 /* Check if this is not root and has sufficient data. */ 3017 if (((mast->orig_l->min != 0) || (mast->orig_r->max != ULONG_MAX)) && 3018 unlikely(mast->bn->b_end <= mt_min_slots[mast->bn->type])) 3019 mast_spanning_rebalance(mast); 3020 3021 mast->orig_l->depth = 0; 3022 3023 /* 3024 * Each level of the tree is examined and balanced, pushing data to the left or 3025 * right, or rebalancing against left or right nodes is employed to avoid 3026 * rippling up the tree to limit the amount of churn. Once a new sub-section of 3027 * the tree is created, there may be a mix of new and old nodes. The old nodes 3028 * will have the incorrect parent pointers and currently be in two trees: the 3029 * original tree and the partially new tree. To remedy the parent pointers in 3030 * the old tree, the new data is swapped into the active tree and a walk down 3031 * the tree is performed and the parent pointers are updated. 3032 * See mas_descend_adopt() for more information.. 3033 */ 3034 while (count--) { 3035 mast->bn->b_end--; 3036 mast->bn->type = mte_node_type(mast->orig_l->node); 3037 split = mas_mab_to_node(mas, mast->bn, &left, &right, &middle, 3038 &mid_split, mast->orig_l->min); 3039 mast_set_split_parents(mast, left, middle, right, split, 3040 mid_split); 3041 mast_cp_to_nodes(mast, left, middle, right, split, mid_split); 3042 3043 /* 3044 * Copy data from next level in the tree to mast->bn from next 3045 * iteration 3046 */ 3047 memset(mast->bn, 0, sizeof(struct maple_big_node)); 3048 mast->bn->type = mte_node_type(left); 3049 mast->orig_l->depth++; 3050 3051 /* Root already stored in l->node. */ 3052 if (mas_is_root_limits(mast->l)) 3053 goto new_root; 3054 3055 mast_ascend_free(mast); 3056 mast_combine_cp_left(mast); 3057 l_mas.offset = mast->bn->b_end; 3058 mab_set_b_end(mast->bn, &l_mas, left); 3059 mab_set_b_end(mast->bn, &m_mas, middle); 3060 mab_set_b_end(mast->bn, &r_mas, right); 3061 3062 /* Copy anything necessary out of the right node. */ 3063 mast_combine_cp_right(mast); 3064 mast_topiary(mast); 3065 mast->orig_l->last = mast->orig_l->max; 3066 3067 if (mast_sufficient(mast)) 3068 continue; 3069 3070 if (mast_overflow(mast)) 3071 continue; 3072 3073 /* May be a new root stored in mast->bn */ 3074 if (mas_is_root_limits(mast->orig_l)) 3075 break; 3076 3077 mast_spanning_rebalance(mast); 3078 3079 /* rebalancing from other nodes may require another loop. */ 3080 if (!count) 3081 count++; 3082 } 3083 3084 l_mas.node = mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), 3085 mte_node_type(mast->orig_l->node)); 3086 mast->orig_l->depth++; 3087 mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, &l_mas, true); 3088 mas_set_parent(mas, left, l_mas.node, slot); 3089 if (middle) 3090 mas_set_parent(mas, middle, l_mas.node, ++slot); 3091 3092 if (right) 3093 mas_set_parent(mas, right, l_mas.node, ++slot); 3094 3095 if (mas_is_root_limits(mast->l)) { 3096 new_root: 3097 mast_new_root(mast, mas); 3098 } else { 3099 mas_mn(&l_mas)->parent = mas_mn(mast->orig_l)->parent; 3100 } 3101 3102 if (!mte_dead_node(mast->orig_l->node)) 3103 mat_add(&free, mast->orig_l->node); 3104 3105 mas->depth = mast->orig_l->depth; 3106 *mast->orig_l = l_mas; 3107 mte_set_node_dead(mas->node); 3108 3109 /* Set up mas for insertion. */ 3110 mast->orig_l->depth = mas->depth; 3111 mast->orig_l->alloc = mas->alloc; 3112 *mas = *mast->orig_l; 3113 mas_wmb_replace(mas, &free, &destroy); 3114 mtree_range_walk(mas); 3115 return mast->bn->b_end; 3116 } 3117 3118 /* 3119 * mas_rebalance() - Rebalance a given node. 3120 * @mas: The maple state 3121 * @b_node: The big maple node. 3122 * 3123 * Rebalance two nodes into a single node or two new nodes that are sufficient. 3124 * Continue upwards until tree is sufficient. 3125 * 3126 * Return: the number of elements in b_node during the last loop. 3127 */ 3128 static inline int mas_rebalance(struct ma_state *mas, 3129 struct maple_big_node *b_node) 3130 { 3131 char empty_count = mas_mt_height(mas); 3132 struct maple_subtree_state mast; 3133 unsigned char shift, b_end = ++b_node->b_end; 3134 3135 MA_STATE(l_mas, mas->tree, mas->index, mas->last); 3136 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 3137 3138 trace_ma_op(__func__, mas); 3139 3140 /* 3141 * Rebalancing occurs if a node is insufficient. Data is rebalanced 3142 * against the node to the right if it exists, otherwise the node to the 3143 * left of this node is rebalanced against this node. If rebalancing 3144 * causes just one node to be produced instead of two, then the parent 3145 * is also examined and rebalanced if it is insufficient. Every level 3146 * tries to combine the data in the same way. If one node contains the 3147 * entire range of the tree, then that node is used as a new root node. 3148 */ 3149 mas_node_count(mas, empty_count * 2 - 1); 3150 if (mas_is_err(mas)) 3151 return 0; 3152 3153 mast.orig_l = &l_mas; 3154 mast.orig_r = &r_mas; 3155 mast.bn = b_node; 3156 mast.bn->type = mte_node_type(mas->node); 3157 3158 l_mas = r_mas = *mas; 3159 3160 if (mas_next_sibling(&r_mas)) { 3161 mas_mab_cp(&r_mas, 0, mt_slot_count(r_mas.node), b_node, b_end); 3162 r_mas.last = r_mas.index = r_mas.max; 3163 } else { 3164 mas_prev_sibling(&l_mas); 3165 shift = mas_data_end(&l_mas) + 1; 3166 mab_shift_right(b_node, shift); 3167 mas->offset += shift; 3168 mas_mab_cp(&l_mas, 0, shift - 1, b_node, 0); 3169 b_node->b_end = shift + b_end; 3170 l_mas.index = l_mas.last = l_mas.min; 3171 } 3172 3173 return mas_spanning_rebalance(mas, &mast, empty_count); 3174 } 3175 3176 /* 3177 * mas_destroy_rebalance() - Rebalance left-most node while destroying the maple 3178 * state. 3179 * @mas: The maple state 3180 * @end: The end of the left-most node. 3181 * 3182 * During a mass-insert event (such as forking), it may be necessary to 3183 * rebalance the left-most node when it is not sufficient. 3184 */ 3185 static inline void mas_destroy_rebalance(struct ma_state *mas, unsigned char end) 3186 { 3187 enum maple_type mt = mte_node_type(mas->node); 3188 struct maple_node reuse, *newnode, *parent, *new_left, *left, *node; 3189 struct maple_enode *eparent, *old_eparent; 3190 unsigned char offset, tmp, split = mt_slots[mt] / 2; 3191 void __rcu **l_slots, **slots; 3192 unsigned long *l_pivs, *pivs, gap; 3193 bool in_rcu = mt_in_rcu(mas->tree); 3194 3195 MA_STATE(l_mas, mas->tree, mas->index, mas->last); 3196 3197 l_mas = *mas; 3198 mas_prev_sibling(&l_mas); 3199 3200 /* set up node. */ 3201 if (in_rcu) { 3202 /* Allocate for both left and right as well as parent. */ 3203 mas_node_count(mas, 3); 3204 if (mas_is_err(mas)) 3205 return; 3206 3207 newnode = mas_pop_node(mas); 3208 } else { 3209 newnode = &reuse; 3210 } 3211 3212 node = mas_mn(mas); 3213 newnode->parent = node->parent; 3214 slots = ma_slots(newnode, mt); 3215 pivs = ma_pivots(newnode, mt); 3216 left = mas_mn(&l_mas); 3217 l_slots = ma_slots(left, mt); 3218 l_pivs = ma_pivots(left, mt); 3219 if (!l_slots[split]) 3220 split++; 3221 tmp = mas_data_end(&l_mas) - split; 3222 3223 memcpy(slots, l_slots + split + 1, sizeof(void *) * tmp); 3224 memcpy(pivs, l_pivs + split + 1, sizeof(unsigned long) * tmp); 3225 pivs[tmp] = l_mas.max; 3226 memcpy(slots + tmp, ma_slots(node, mt), sizeof(void *) * end); 3227 memcpy(pivs + tmp, ma_pivots(node, mt), sizeof(unsigned long) * end); 3228 3229 l_mas.max = l_pivs[split]; 3230 mas->min = l_mas.max + 1; 3231 old_eparent = mt_mk_node(mte_parent(l_mas.node), 3232 mas_parent_type(&l_mas, l_mas.node)); 3233 tmp += end; 3234 if (!in_rcu) { 3235 unsigned char max_p = mt_pivots[mt]; 3236 unsigned char max_s = mt_slots[mt]; 3237 3238 if (tmp < max_p) 3239 memset(pivs + tmp, 0, 3240 sizeof(unsigned long) * (max_p - tmp)); 3241 3242 if (tmp < mt_slots[mt]) 3243 memset(slots + tmp, 0, sizeof(void *) * (max_s - tmp)); 3244 3245 memcpy(node, newnode, sizeof(struct maple_node)); 3246 ma_set_meta(node, mt, 0, tmp - 1); 3247 mte_set_pivot(old_eparent, mte_parent_slot(l_mas.node), 3248 l_pivs[split]); 3249 3250 /* Remove data from l_pivs. */ 3251 tmp = split + 1; 3252 memset(l_pivs + tmp, 0, sizeof(unsigned long) * (max_p - tmp)); 3253 memset(l_slots + tmp, 0, sizeof(void *) * (max_s - tmp)); 3254 ma_set_meta(left, mt, 0, split); 3255 eparent = old_eparent; 3256 3257 goto done; 3258 } 3259 3260 /* RCU requires replacing both l_mas, mas, and parent. */ 3261 mas->node = mt_mk_node(newnode, mt); 3262 ma_set_meta(newnode, mt, 0, tmp); 3263 3264 new_left = mas_pop_node(mas); 3265 new_left->parent = left->parent; 3266 mt = mte_node_type(l_mas.node); 3267 slots = ma_slots(new_left, mt); 3268 pivs = ma_pivots(new_left, mt); 3269 memcpy(slots, l_slots, sizeof(void *) * split); 3270 memcpy(pivs, l_pivs, sizeof(unsigned long) * split); 3271 ma_set_meta(new_left, mt, 0, split); 3272 l_mas.node = mt_mk_node(new_left, mt); 3273 3274 /* replace parent. */ 3275 offset = mte_parent_slot(mas->node); 3276 mt = mas_parent_type(&l_mas, l_mas.node); 3277 parent = mas_pop_node(mas); 3278 slots = ma_slots(parent, mt); 3279 pivs = ma_pivots(parent, mt); 3280 memcpy(parent, mte_to_node(old_eparent), sizeof(struct maple_node)); 3281 rcu_assign_pointer(slots[offset], mas->node); 3282 rcu_assign_pointer(slots[offset - 1], l_mas.node); 3283 pivs[offset - 1] = l_mas.max; 3284 eparent = mt_mk_node(parent, mt); 3285 done: 3286 gap = mas_leaf_max_gap(mas); 3287 mte_set_gap(eparent, mte_parent_slot(mas->node), gap); 3288 gap = mas_leaf_max_gap(&l_mas); 3289 mte_set_gap(eparent, mte_parent_slot(l_mas.node), gap); 3290 mas_ascend(mas); 3291 3292 if (in_rcu) { 3293 mas_replace_node(mas, old_eparent); 3294 mas_adopt_children(mas, mas->node); 3295 } 3296 3297 mas_update_gap(mas); 3298 } 3299 3300 /* 3301 * mas_split_final_node() - Split the final node in a subtree operation. 3302 * @mast: the maple subtree state 3303 * @mas: The maple state 3304 * @height: The height of the tree in case it's a new root. 3305 */ 3306 static inline bool mas_split_final_node(struct maple_subtree_state *mast, 3307 struct ma_state *mas, int height) 3308 { 3309 struct maple_enode *ancestor; 3310 3311 if (mte_is_root(mas->node)) { 3312 if (mt_is_alloc(mas->tree)) 3313 mast->bn->type = maple_arange_64; 3314 else 3315 mast->bn->type = maple_range_64; 3316 mas->depth = height; 3317 } 3318 /* 3319 * Only a single node is used here, could be root. 3320 * The Big_node data should just fit in a single node. 3321 */ 3322 ancestor = mas_new_ma_node(mas, mast->bn); 3323 mas_set_parent(mas, mast->l->node, ancestor, mast->l->offset); 3324 mas_set_parent(mas, mast->r->node, ancestor, mast->r->offset); 3325 mte_to_node(ancestor)->parent = mas_mn(mas)->parent; 3326 3327 mast->l->node = ancestor; 3328 mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, mast->l, true); 3329 mas->offset = mast->bn->b_end - 1; 3330 return true; 3331 } 3332 3333 /* 3334 * mast_fill_bnode() - Copy data into the big node in the subtree state 3335 * @mast: The maple subtree state 3336 * @mas: the maple state 3337 * @skip: The number of entries to skip for new nodes insertion. 3338 */ 3339 static inline void mast_fill_bnode(struct maple_subtree_state *mast, 3340 struct ma_state *mas, 3341 unsigned char skip) 3342 { 3343 bool cp = true; 3344 struct maple_enode *old = mas->node; 3345 unsigned char split; 3346 3347 memset(mast->bn->gap, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->gap)); 3348 memset(mast->bn->slot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->slot)); 3349 memset(mast->bn->pivot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->pivot)); 3350 mast->bn->b_end = 0; 3351 3352 if (mte_is_root(mas->node)) { 3353 cp = false; 3354 } else { 3355 mas_ascend(mas); 3356 mat_add(mast->free, old); 3357 mas->offset = mte_parent_slot(mas->node); 3358 } 3359 3360 if (cp && mast->l->offset) 3361 mas_mab_cp(mas, 0, mast->l->offset - 1, mast->bn, 0); 3362 3363 split = mast->bn->b_end; 3364 mab_set_b_end(mast->bn, mast->l, mast->l->node); 3365 mast->r->offset = mast->bn->b_end; 3366 mab_set_b_end(mast->bn, mast->r, mast->r->node); 3367 if (mast->bn->pivot[mast->bn->b_end - 1] == mas->max) 3368 cp = false; 3369 3370 if (cp) 3371 mas_mab_cp(mas, split + skip, mt_slot_count(mas->node) - 1, 3372 mast->bn, mast->bn->b_end); 3373 3374 mast->bn->b_end--; 3375 mast->bn->type = mte_node_type(mas->node); 3376 } 3377 3378 /* 3379 * mast_split_data() - Split the data in the subtree state big node into regular 3380 * nodes. 3381 * @mast: The maple subtree state 3382 * @mas: The maple state 3383 * @split: The location to split the big node 3384 */ 3385 static inline void mast_split_data(struct maple_subtree_state *mast, 3386 struct ma_state *mas, unsigned char split) 3387 { 3388 unsigned char p_slot; 3389 3390 mab_mas_cp(mast->bn, 0, split, mast->l, true); 3391 mte_set_pivot(mast->r->node, 0, mast->r->max); 3392 mab_mas_cp(mast->bn, split + 1, mast->bn->b_end, mast->r, false); 3393 mast->l->offset = mte_parent_slot(mas->node); 3394 mast->l->max = mast->bn->pivot[split]; 3395 mast->r->min = mast->l->max + 1; 3396 if (mte_is_leaf(mas->node)) 3397 return; 3398 3399 p_slot = mast->orig_l->offset; 3400 mas_set_split_parent(mast->orig_l, mast->l->node, mast->r->node, 3401 &p_slot, split); 3402 mas_set_split_parent(mast->orig_r, mast->l->node, mast->r->node, 3403 &p_slot, split); 3404 } 3405 3406 /* 3407 * mas_push_data() - Instead of splitting a node, it is beneficial to push the 3408 * data to the right or left node if there is room. 3409 * @mas: The maple state 3410 * @height: The current height of the maple state 3411 * @mast: The maple subtree state 3412 * @left: Push left or not. 3413 * 3414 * Keeping the height of the tree low means faster lookups. 3415 * 3416 * Return: True if pushed, false otherwise. 3417 */ 3418 static inline bool mas_push_data(struct ma_state *mas, int height, 3419 struct maple_subtree_state *mast, bool left) 3420 { 3421 unsigned char slot_total = mast->bn->b_end; 3422 unsigned char end, space, split; 3423 3424 MA_STATE(tmp_mas, mas->tree, mas->index, mas->last); 3425 tmp_mas = *mas; 3426 tmp_mas.depth = mast->l->depth; 3427 3428 if (left && !mas_prev_sibling(&tmp_mas)) 3429 return false; 3430 else if (!left && !mas_next_sibling(&tmp_mas)) 3431 return false; 3432 3433 end = mas_data_end(&tmp_mas); 3434 slot_total += end; 3435 space = 2 * mt_slot_count(mas->node) - 2; 3436 /* -2 instead of -1 to ensure there isn't a triple split */ 3437 if (ma_is_leaf(mast->bn->type)) 3438 space--; 3439 3440 if (mas->max == ULONG_MAX) 3441 space--; 3442 3443 if (slot_total >= space) 3444 return false; 3445 3446 /* Get the data; Fill mast->bn */ 3447 mast->bn->b_end++; 3448 if (left) { 3449 mab_shift_right(mast->bn, end + 1); 3450 mas_mab_cp(&tmp_mas, 0, end, mast->bn, 0); 3451 mast->bn->b_end = slot_total + 1; 3452 } else { 3453 mas_mab_cp(&tmp_mas, 0, end, mast->bn, mast->bn->b_end); 3454 } 3455 3456 /* Configure mast for splitting of mast->bn */ 3457 split = mt_slots[mast->bn->type] - 2; 3458 if (left) { 3459 /* Switch mas to prev node */ 3460 mat_add(mast->free, mas->node); 3461 *mas = tmp_mas; 3462 /* Start using mast->l for the left side. */ 3463 tmp_mas.node = mast->l->node; 3464 *mast->l = tmp_mas; 3465 } else { 3466 mat_add(mast->free, tmp_mas.node); 3467 tmp_mas.node = mast->r->node; 3468 *mast->r = tmp_mas; 3469 split = slot_total - split; 3470 } 3471 split = mab_no_null_split(mast->bn, split, mt_slots[mast->bn->type]); 3472 /* Update parent slot for split calculation. */ 3473 if (left) 3474 mast->orig_l->offset += end + 1; 3475 3476 mast_split_data(mast, mas, split); 3477 mast_fill_bnode(mast, mas, 2); 3478 mas_split_final_node(mast, mas, height + 1); 3479 return true; 3480 } 3481 3482 /* 3483 * mas_split() - Split data that is too big for one node into two. 3484 * @mas: The maple state 3485 * @b_node: The maple big node 3486 * Return: 1 on success, 0 on failure. 3487 */ 3488 static int mas_split(struct ma_state *mas, struct maple_big_node *b_node) 3489 { 3490 struct maple_subtree_state mast; 3491 int height = 0; 3492 unsigned char mid_split, split = 0; 3493 3494 /* 3495 * Splitting is handled differently from any other B-tree; the Maple 3496 * Tree splits upwards. Splitting up means that the split operation 3497 * occurs when the walk of the tree hits the leaves and not on the way 3498 * down. The reason for splitting up is that it is impossible to know 3499 * how much space will be needed until the leaf is (or leaves are) 3500 * reached. Since overwriting data is allowed and a range could 3501 * overwrite more than one range or result in changing one entry into 3 3502 * entries, it is impossible to know if a split is required until the 3503 * data is examined. 3504 * 3505 * Splitting is a balancing act between keeping allocations to a minimum 3506 * and avoiding a 'jitter' event where a tree is expanded to make room 3507 * for an entry followed by a contraction when the entry is removed. To 3508 * accomplish the balance, there are empty slots remaining in both left 3509 * and right nodes after a split. 3510 */ 3511 MA_STATE(l_mas, mas->tree, mas->index, mas->last); 3512 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 3513 MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last); 3514 MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last); 3515 MA_TOPIARY(mat, mas->tree); 3516 3517 trace_ma_op(__func__, mas); 3518 mas->depth = mas_mt_height(mas); 3519 /* Allocation failures will happen early. */ 3520 mas_node_count(mas, 1 + mas->depth * 2); 3521 if (mas_is_err(mas)) 3522 return 0; 3523 3524 mast.l = &l_mas; 3525 mast.r = &r_mas; 3526 mast.orig_l = &prev_l_mas; 3527 mast.orig_r = &prev_r_mas; 3528 mast.free = &mat; 3529 mast.bn = b_node; 3530 3531 while (height++ <= mas->depth) { 3532 if (mt_slots[b_node->type] > b_node->b_end) { 3533 mas_split_final_node(&mast, mas, height); 3534 break; 3535 } 3536 3537 l_mas = r_mas = *mas; 3538 l_mas.node = mas_new_ma_node(mas, b_node); 3539 r_mas.node = mas_new_ma_node(mas, b_node); 3540 /* 3541 * Another way that 'jitter' is avoided is to terminate a split up early if the 3542 * left or right node has space to spare. This is referred to as "pushing left" 3543 * or "pushing right" and is similar to the B* tree, except the nodes left or 3544 * right can rarely be reused due to RCU, but the ripple upwards is halted which 3545 * is a significant savings. 3546 */ 3547 /* Try to push left. */ 3548 if (mas_push_data(mas, height, &mast, true)) 3549 break; 3550 3551 /* Try to push right. */ 3552 if (mas_push_data(mas, height, &mast, false)) 3553 break; 3554 3555 split = mab_calc_split(mas, b_node, &mid_split, prev_l_mas.min); 3556 mast_split_data(&mast, mas, split); 3557 /* 3558 * Usually correct, mab_mas_cp in the above call overwrites 3559 * r->max. 3560 */ 3561 mast.r->max = mas->max; 3562 mast_fill_bnode(&mast, mas, 1); 3563 prev_l_mas = *mast.l; 3564 prev_r_mas = *mast.r; 3565 } 3566 3567 /* Set the original node as dead */ 3568 mat_add(mast.free, mas->node); 3569 mas->node = l_mas.node; 3570 mas_wmb_replace(mas, mast.free, NULL); 3571 mtree_range_walk(mas); 3572 return 1; 3573 } 3574 3575 /* 3576 * mas_reuse_node() - Reuse the node to store the data. 3577 * @wr_mas: The maple write state 3578 * @bn: The maple big node 3579 * @end: The end of the data. 3580 * 3581 * Will always return false in RCU mode. 3582 * 3583 * Return: True if node was reused, false otherwise. 3584 */ 3585 static inline bool mas_reuse_node(struct ma_wr_state *wr_mas, 3586 struct maple_big_node *bn, unsigned char end) 3587 { 3588 /* Need to be rcu safe. */ 3589 if (mt_in_rcu(wr_mas->mas->tree)) 3590 return false; 3591 3592 if (end > bn->b_end) { 3593 int clear = mt_slots[wr_mas->type] - bn->b_end; 3594 3595 memset(wr_mas->slots + bn->b_end, 0, sizeof(void *) * clear--); 3596 memset(wr_mas->pivots + bn->b_end, 0, sizeof(void *) * clear); 3597 } 3598 mab_mas_cp(bn, 0, bn->b_end, wr_mas->mas, false); 3599 return true; 3600 } 3601 3602 /* 3603 * mas_commit_b_node() - Commit the big node into the tree. 3604 * @wr_mas: The maple write state 3605 * @b_node: The maple big node 3606 * @end: The end of the data. 3607 */ 3608 static noinline_for_kasan int mas_commit_b_node(struct ma_wr_state *wr_mas, 3609 struct maple_big_node *b_node, unsigned char end) 3610 { 3611 struct maple_node *node; 3612 struct maple_enode *old_enode; 3613 unsigned char b_end = b_node->b_end; 3614 enum maple_type b_type = b_node->type; 3615 3616 old_enode = wr_mas->mas->node; 3617 if ((b_end < mt_min_slots[b_type]) && 3618 (!mte_is_root(old_enode)) && 3619 (mas_mt_height(wr_mas->mas) > 1)) 3620 return mas_rebalance(wr_mas->mas, b_node); 3621 3622 if (b_end >= mt_slots[b_type]) 3623 return mas_split(wr_mas->mas, b_node); 3624 3625 if (mas_reuse_node(wr_mas, b_node, end)) 3626 goto reuse_node; 3627 3628 mas_node_count(wr_mas->mas, 1); 3629 if (mas_is_err(wr_mas->mas)) 3630 return 0; 3631 3632 node = mas_pop_node(wr_mas->mas); 3633 node->parent = mas_mn(wr_mas->mas)->parent; 3634 wr_mas->mas->node = mt_mk_node(node, b_type); 3635 mab_mas_cp(b_node, 0, b_end, wr_mas->mas, false); 3636 mas_replace_node(wr_mas->mas, old_enode); 3637 reuse_node: 3638 mas_update_gap(wr_mas->mas); 3639 return 1; 3640 } 3641 3642 /* 3643 * mas_root_expand() - Expand a root to a node 3644 * @mas: The maple state 3645 * @entry: The entry to store into the tree 3646 */ 3647 static inline int mas_root_expand(struct ma_state *mas, void *entry) 3648 { 3649 void *contents = mas_root_locked(mas); 3650 enum maple_type type = maple_leaf_64; 3651 struct maple_node *node; 3652 void __rcu **slots; 3653 unsigned long *pivots; 3654 int slot = 0; 3655 3656 mas_node_count(mas, 1); 3657 if (unlikely(mas_is_err(mas))) 3658 return 0; 3659 3660 node = mas_pop_node(mas); 3661 pivots = ma_pivots(node, type); 3662 slots = ma_slots(node, type); 3663 node->parent = ma_parent_ptr(mas_tree_parent(mas)); 3664 mas->node = mt_mk_node(node, type); 3665 3666 if (mas->index) { 3667 if (contents) { 3668 rcu_assign_pointer(slots[slot], contents); 3669 if (likely(mas->index > 1)) 3670 slot++; 3671 } 3672 pivots[slot++] = mas->index - 1; 3673 } 3674 3675 rcu_assign_pointer(slots[slot], entry); 3676 mas->offset = slot; 3677 pivots[slot] = mas->last; 3678 if (mas->last != ULONG_MAX) 3679 pivots[++slot] = ULONG_MAX; 3680 3681 mas->depth = 1; 3682 mas_set_height(mas); 3683 ma_set_meta(node, maple_leaf_64, 0, slot); 3684 /* swap the new root into the tree */ 3685 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 3686 return slot; 3687 } 3688 3689 static inline void mas_store_root(struct ma_state *mas, void *entry) 3690 { 3691 if (likely((mas->last != 0) || (mas->index != 0))) 3692 mas_root_expand(mas, entry); 3693 else if (((unsigned long) (entry) & 3) == 2) 3694 mas_root_expand(mas, entry); 3695 else { 3696 rcu_assign_pointer(mas->tree->ma_root, entry); 3697 mas->node = MAS_START; 3698 } 3699 } 3700 3701 /* 3702 * mas_is_span_wr() - Check if the write needs to be treated as a write that 3703 * spans the node. 3704 * @mas: The maple state 3705 * @piv: The pivot value being written 3706 * @type: The maple node type 3707 * @entry: The data to write 3708 * 3709 * Spanning writes are writes that start in one node and end in another OR if 3710 * the write of a %NULL will cause the node to end with a %NULL. 3711 * 3712 * Return: True if this is a spanning write, false otherwise. 3713 */ 3714 static bool mas_is_span_wr(struct ma_wr_state *wr_mas) 3715 { 3716 unsigned long max = wr_mas->r_max; 3717 unsigned long last = wr_mas->mas->last; 3718 enum maple_type type = wr_mas->type; 3719 void *entry = wr_mas->entry; 3720 3721 /* Contained in this pivot, fast path */ 3722 if (last < max) 3723 return false; 3724 3725 if (ma_is_leaf(type)) { 3726 max = wr_mas->mas->max; 3727 if (last < max) 3728 return false; 3729 } 3730 3731 if (last == max) { 3732 /* 3733 * The last entry of leaf node cannot be NULL unless it is the 3734 * rightmost node (writing ULONG_MAX), otherwise it spans slots. 3735 */ 3736 if (entry || last == ULONG_MAX) 3737 return false; 3738 } 3739 3740 trace_ma_write(__func__, wr_mas->mas, wr_mas->r_max, entry); 3741 return true; 3742 } 3743 3744 static inline void mas_wr_walk_descend(struct ma_wr_state *wr_mas) 3745 { 3746 wr_mas->type = mte_node_type(wr_mas->mas->node); 3747 mas_wr_node_walk(wr_mas); 3748 wr_mas->slots = ma_slots(wr_mas->node, wr_mas->type); 3749 } 3750 3751 static inline void mas_wr_walk_traverse(struct ma_wr_state *wr_mas) 3752 { 3753 wr_mas->mas->max = wr_mas->r_max; 3754 wr_mas->mas->min = wr_mas->r_min; 3755 wr_mas->mas->node = wr_mas->content; 3756 wr_mas->mas->offset = 0; 3757 wr_mas->mas->depth++; 3758 } 3759 /* 3760 * mas_wr_walk() - Walk the tree for a write. 3761 * @wr_mas: The maple write state 3762 * 3763 * Uses mas_slot_locked() and does not need to worry about dead nodes. 3764 * 3765 * Return: True if it's contained in a node, false on spanning write. 3766 */ 3767 static bool mas_wr_walk(struct ma_wr_state *wr_mas) 3768 { 3769 struct ma_state *mas = wr_mas->mas; 3770 3771 while (true) { 3772 mas_wr_walk_descend(wr_mas); 3773 if (unlikely(mas_is_span_wr(wr_mas))) 3774 return false; 3775 3776 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 3777 mas->offset); 3778 if (ma_is_leaf(wr_mas->type)) 3779 return true; 3780 3781 mas_wr_walk_traverse(wr_mas); 3782 } 3783 3784 return true; 3785 } 3786 3787 static bool mas_wr_walk_index(struct ma_wr_state *wr_mas) 3788 { 3789 struct ma_state *mas = wr_mas->mas; 3790 3791 while (true) { 3792 mas_wr_walk_descend(wr_mas); 3793 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 3794 mas->offset); 3795 if (ma_is_leaf(wr_mas->type)) 3796 return true; 3797 mas_wr_walk_traverse(wr_mas); 3798 3799 } 3800 return true; 3801 } 3802 /* 3803 * mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs. 3804 * @l_wr_mas: The left maple write state 3805 * @r_wr_mas: The right maple write state 3806 */ 3807 static inline void mas_extend_spanning_null(struct ma_wr_state *l_wr_mas, 3808 struct ma_wr_state *r_wr_mas) 3809 { 3810 struct ma_state *r_mas = r_wr_mas->mas; 3811 struct ma_state *l_mas = l_wr_mas->mas; 3812 unsigned char l_slot; 3813 3814 l_slot = l_mas->offset; 3815 if (!l_wr_mas->content) 3816 l_mas->index = l_wr_mas->r_min; 3817 3818 if ((l_mas->index == l_wr_mas->r_min) && 3819 (l_slot && 3820 !mas_slot_locked(l_mas, l_wr_mas->slots, l_slot - 1))) { 3821 if (l_slot > 1) 3822 l_mas->index = l_wr_mas->pivots[l_slot - 2] + 1; 3823 else 3824 l_mas->index = l_mas->min; 3825 3826 l_mas->offset = l_slot - 1; 3827 } 3828 3829 if (!r_wr_mas->content) { 3830 if (r_mas->last < r_wr_mas->r_max) 3831 r_mas->last = r_wr_mas->r_max; 3832 r_mas->offset++; 3833 } else if ((r_mas->last == r_wr_mas->r_max) && 3834 (r_mas->last < r_mas->max) && 3835 !mas_slot_locked(r_mas, r_wr_mas->slots, r_mas->offset + 1)) { 3836 r_mas->last = mas_safe_pivot(r_mas, r_wr_mas->pivots, 3837 r_wr_mas->type, r_mas->offset + 1); 3838 r_mas->offset++; 3839 } 3840 } 3841 3842 static inline void *mas_state_walk(struct ma_state *mas) 3843 { 3844 void *entry; 3845 3846 entry = mas_start(mas); 3847 if (mas_is_none(mas)) 3848 return NULL; 3849 3850 if (mas_is_ptr(mas)) 3851 return entry; 3852 3853 return mtree_range_walk(mas); 3854 } 3855 3856 /* 3857 * mtree_lookup_walk() - Internal quick lookup that does not keep maple state up 3858 * to date. 3859 * 3860 * @mas: The maple state. 3861 * 3862 * Note: Leaves mas in undesirable state. 3863 * Return: The entry for @mas->index or %NULL on dead node. 3864 */ 3865 static inline void *mtree_lookup_walk(struct ma_state *mas) 3866 { 3867 unsigned long *pivots; 3868 unsigned char offset; 3869 struct maple_node *node; 3870 struct maple_enode *next; 3871 enum maple_type type; 3872 void __rcu **slots; 3873 unsigned char end; 3874 unsigned long max; 3875 3876 next = mas->node; 3877 max = ULONG_MAX; 3878 do { 3879 offset = 0; 3880 node = mte_to_node(next); 3881 type = mte_node_type(next); 3882 pivots = ma_pivots(node, type); 3883 end = ma_data_end(node, type, pivots, max); 3884 if (unlikely(ma_dead_node(node))) 3885 goto dead_node; 3886 do { 3887 if (pivots[offset] >= mas->index) { 3888 max = pivots[offset]; 3889 break; 3890 } 3891 } while (++offset < end); 3892 3893 slots = ma_slots(node, type); 3894 next = mt_slot(mas->tree, slots, offset); 3895 if (unlikely(ma_dead_node(node))) 3896 goto dead_node; 3897 } while (!ma_is_leaf(type)); 3898 3899 return (void *)next; 3900 3901 dead_node: 3902 mas_reset(mas); 3903 return NULL; 3904 } 3905 3906 /* 3907 * mas_new_root() - Create a new root node that only contains the entry passed 3908 * in. 3909 * @mas: The maple state 3910 * @entry: The entry to store. 3911 * 3912 * Only valid when the index == 0 and the last == ULONG_MAX 3913 * 3914 * Return 0 on error, 1 on success. 3915 */ 3916 static inline int mas_new_root(struct ma_state *mas, void *entry) 3917 { 3918 struct maple_enode *root = mas_root_locked(mas); 3919 enum maple_type type = maple_leaf_64; 3920 struct maple_node *node; 3921 void __rcu **slots; 3922 unsigned long *pivots; 3923 3924 if (!entry && !mas->index && mas->last == ULONG_MAX) { 3925 mas->depth = 0; 3926 mas_set_height(mas); 3927 rcu_assign_pointer(mas->tree->ma_root, entry); 3928 mas->node = MAS_START; 3929 goto done; 3930 } 3931 3932 mas_node_count(mas, 1); 3933 if (mas_is_err(mas)) 3934 return 0; 3935 3936 node = mas_pop_node(mas); 3937 pivots = ma_pivots(node, type); 3938 slots = ma_slots(node, type); 3939 node->parent = ma_parent_ptr(mas_tree_parent(mas)); 3940 mas->node = mt_mk_node(node, type); 3941 rcu_assign_pointer(slots[0], entry); 3942 pivots[0] = mas->last; 3943 mas->depth = 1; 3944 mas_set_height(mas); 3945 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 3946 3947 done: 3948 if (xa_is_node(root)) 3949 mte_destroy_walk(root, mas->tree); 3950 3951 return 1; 3952 } 3953 /* 3954 * mas_wr_spanning_store() - Create a subtree with the store operation completed 3955 * and new nodes where necessary, then place the sub-tree in the actual tree. 3956 * Note that mas is expected to point to the node which caused the store to 3957 * span. 3958 * @wr_mas: The maple write state 3959 * 3960 * Return: 0 on error, positive on success. 3961 */ 3962 static inline int mas_wr_spanning_store(struct ma_wr_state *wr_mas) 3963 { 3964 struct maple_subtree_state mast; 3965 struct maple_big_node b_node; 3966 struct ma_state *mas; 3967 unsigned char height; 3968 3969 /* Left and Right side of spanning store */ 3970 MA_STATE(l_mas, NULL, 0, 0); 3971 MA_STATE(r_mas, NULL, 0, 0); 3972 3973 MA_WR_STATE(r_wr_mas, &r_mas, wr_mas->entry); 3974 MA_WR_STATE(l_wr_mas, &l_mas, wr_mas->entry); 3975 3976 /* 3977 * A store operation that spans multiple nodes is called a spanning 3978 * store and is handled early in the store call stack by the function 3979 * mas_is_span_wr(). When a spanning store is identified, the maple 3980 * state is duplicated. The first maple state walks the left tree path 3981 * to ``index``, the duplicate walks the right tree path to ``last``. 3982 * The data in the two nodes are combined into a single node, two nodes, 3983 * or possibly three nodes (see the 3-way split above). A ``NULL`` 3984 * written to the last entry of a node is considered a spanning store as 3985 * a rebalance is required for the operation to complete and an overflow 3986 * of data may happen. 3987 */ 3988 mas = wr_mas->mas; 3989 trace_ma_op(__func__, mas); 3990 3991 if (unlikely(!mas->index && mas->last == ULONG_MAX)) 3992 return mas_new_root(mas, wr_mas->entry); 3993 /* 3994 * Node rebalancing may occur due to this store, so there may be three new 3995 * entries per level plus a new root. 3996 */ 3997 height = mas_mt_height(mas); 3998 mas_node_count(mas, 1 + height * 3); 3999 if (mas_is_err(mas)) 4000 return 0; 4001 4002 /* 4003 * Set up right side. Need to get to the next offset after the spanning 4004 * store to ensure it's not NULL and to combine both the next node and 4005 * the node with the start together. 4006 */ 4007 r_mas = *mas; 4008 /* Avoid overflow, walk to next slot in the tree. */ 4009 if (r_mas.last + 1) 4010 r_mas.last++; 4011 4012 r_mas.index = r_mas.last; 4013 mas_wr_walk_index(&r_wr_mas); 4014 r_mas.last = r_mas.index = mas->last; 4015 4016 /* Set up left side. */ 4017 l_mas = *mas; 4018 mas_wr_walk_index(&l_wr_mas); 4019 4020 if (!wr_mas->entry) { 4021 mas_extend_spanning_null(&l_wr_mas, &r_wr_mas); 4022 mas->offset = l_mas.offset; 4023 mas->index = l_mas.index; 4024 mas->last = l_mas.last = r_mas.last; 4025 } 4026 4027 /* expanding NULLs may make this cover the entire range */ 4028 if (!l_mas.index && r_mas.last == ULONG_MAX) { 4029 mas_set_range(mas, 0, ULONG_MAX); 4030 return mas_new_root(mas, wr_mas->entry); 4031 } 4032 4033 memset(&b_node, 0, sizeof(struct maple_big_node)); 4034 /* Copy l_mas and store the value in b_node. */ 4035 mas_store_b_node(&l_wr_mas, &b_node, l_wr_mas.node_end); 4036 /* Copy r_mas into b_node. */ 4037 if (r_mas.offset <= r_wr_mas.node_end) 4038 mas_mab_cp(&r_mas, r_mas.offset, r_wr_mas.node_end, 4039 &b_node, b_node.b_end + 1); 4040 else 4041 b_node.b_end++; 4042 4043 /* Stop spanning searches by searching for just index. */ 4044 l_mas.index = l_mas.last = mas->index; 4045 4046 mast.bn = &b_node; 4047 mast.orig_l = &l_mas; 4048 mast.orig_r = &r_mas; 4049 /* Combine l_mas and r_mas and split them up evenly again. */ 4050 return mas_spanning_rebalance(mas, &mast, height + 1); 4051 } 4052 4053 /* 4054 * mas_wr_node_store() - Attempt to store the value in a node 4055 * @wr_mas: The maple write state 4056 * 4057 * Attempts to reuse the node, but may allocate. 4058 * 4059 * Return: True if stored, false otherwise 4060 */ 4061 static inline bool mas_wr_node_store(struct ma_wr_state *wr_mas, 4062 unsigned char new_end) 4063 { 4064 struct ma_state *mas = wr_mas->mas; 4065 void __rcu **dst_slots; 4066 unsigned long *dst_pivots; 4067 unsigned char dst_offset, offset_end = wr_mas->offset_end; 4068 struct maple_node reuse, *newnode; 4069 unsigned char copy_size, node_pivots = mt_pivots[wr_mas->type]; 4070 bool in_rcu = mt_in_rcu(mas->tree); 4071 4072 /* Check if there is enough data. The room is enough. */ 4073 if (!mte_is_root(mas->node) && (new_end <= mt_min_slots[wr_mas->type]) && 4074 !(mas->mas_flags & MA_STATE_BULK)) 4075 return false; 4076 4077 if (mas->last == wr_mas->end_piv) 4078 offset_end++; /* don't copy this offset */ 4079 else if (unlikely(wr_mas->r_max == ULONG_MAX)) 4080 mas_bulk_rebalance(mas, wr_mas->node_end, wr_mas->type); 4081 4082 /* set up node. */ 4083 if (in_rcu) { 4084 mas_node_count(mas, 1); 4085 if (mas_is_err(mas)) 4086 return false; 4087 4088 newnode = mas_pop_node(mas); 4089 } else { 4090 memset(&reuse, 0, sizeof(struct maple_node)); 4091 newnode = &reuse; 4092 } 4093 4094 newnode->parent = mas_mn(mas)->parent; 4095 dst_pivots = ma_pivots(newnode, wr_mas->type); 4096 dst_slots = ma_slots(newnode, wr_mas->type); 4097 /* Copy from start to insert point */ 4098 memcpy(dst_pivots, wr_mas->pivots, sizeof(unsigned long) * mas->offset); 4099 memcpy(dst_slots, wr_mas->slots, sizeof(void *) * mas->offset); 4100 4101 /* Handle insert of new range starting after old range */ 4102 if (wr_mas->r_min < mas->index) { 4103 rcu_assign_pointer(dst_slots[mas->offset], wr_mas->content); 4104 dst_pivots[mas->offset++] = mas->index - 1; 4105 } 4106 4107 /* Store the new entry and range end. */ 4108 if (mas->offset < node_pivots) 4109 dst_pivots[mas->offset] = mas->last; 4110 rcu_assign_pointer(dst_slots[mas->offset], wr_mas->entry); 4111 4112 /* 4113 * this range wrote to the end of the node or it overwrote the rest of 4114 * the data 4115 */ 4116 if (offset_end > wr_mas->node_end) 4117 goto done; 4118 4119 dst_offset = mas->offset + 1; 4120 /* Copy to the end of node if necessary. */ 4121 copy_size = wr_mas->node_end - offset_end + 1; 4122 memcpy(dst_slots + dst_offset, wr_mas->slots + offset_end, 4123 sizeof(void *) * copy_size); 4124 memcpy(dst_pivots + dst_offset, wr_mas->pivots + offset_end, 4125 sizeof(unsigned long) * (copy_size - 1)); 4126 4127 if (new_end < node_pivots) 4128 dst_pivots[new_end] = mas->max; 4129 4130 done: 4131 mas_leaf_set_meta(mas, newnode, dst_pivots, maple_leaf_64, new_end); 4132 if (in_rcu) { 4133 struct maple_enode *old_enode = mas->node; 4134 4135 mas->node = mt_mk_node(newnode, wr_mas->type); 4136 mas_replace_node(mas, old_enode); 4137 } else { 4138 memcpy(wr_mas->node, newnode, sizeof(struct maple_node)); 4139 } 4140 trace_ma_write(__func__, mas, 0, wr_mas->entry); 4141 mas_update_gap(mas); 4142 return true; 4143 } 4144 4145 /* 4146 * mas_wr_slot_store: Attempt to store a value in a slot. 4147 * @wr_mas: the maple write state 4148 * 4149 * Return: True if stored, false otherwise 4150 */ 4151 static inline bool mas_wr_slot_store(struct ma_wr_state *wr_mas) 4152 { 4153 struct ma_state *mas = wr_mas->mas; 4154 unsigned char offset = mas->offset; 4155 void __rcu **slots = wr_mas->slots; 4156 bool gap = false; 4157 4158 gap |= !mt_slot_locked(mas->tree, slots, offset); 4159 gap |= !mt_slot_locked(mas->tree, slots, offset + 1); 4160 4161 if (wr_mas->offset_end - offset == 1) { 4162 if (mas->index == wr_mas->r_min) { 4163 /* Overwriting the range and a part of the next one */ 4164 rcu_assign_pointer(slots[offset], wr_mas->entry); 4165 wr_mas->pivots[offset] = mas->last; 4166 } else { 4167 /* Overwriting a part of the range and the next one */ 4168 rcu_assign_pointer(slots[offset + 1], wr_mas->entry); 4169 wr_mas->pivots[offset] = mas->index - 1; 4170 mas->offset++; /* Keep mas accurate. */ 4171 } 4172 } else if (!mt_in_rcu(mas->tree)) { 4173 /* 4174 * Expand the range, only partially overwriting the previous and 4175 * next ranges 4176 */ 4177 gap |= !mt_slot_locked(mas->tree, slots, offset + 2); 4178 rcu_assign_pointer(slots[offset + 1], wr_mas->entry); 4179 wr_mas->pivots[offset] = mas->index - 1; 4180 wr_mas->pivots[offset + 1] = mas->last; 4181 mas->offset++; /* Keep mas accurate. */ 4182 } else { 4183 return false; 4184 } 4185 4186 trace_ma_write(__func__, mas, 0, wr_mas->entry); 4187 /* 4188 * Only update gap when the new entry is empty or there is an empty 4189 * entry in the original two ranges. 4190 */ 4191 if (!wr_mas->entry || gap) 4192 mas_update_gap(mas); 4193 4194 return true; 4195 } 4196 4197 static inline void mas_wr_extend_null(struct ma_wr_state *wr_mas) 4198 { 4199 struct ma_state *mas = wr_mas->mas; 4200 4201 if (!wr_mas->slots[wr_mas->offset_end]) { 4202 /* If this one is null, the next and prev are not */ 4203 mas->last = wr_mas->end_piv; 4204 } else { 4205 /* Check next slot(s) if we are overwriting the end */ 4206 if ((mas->last == wr_mas->end_piv) && 4207 (wr_mas->node_end != wr_mas->offset_end) && 4208 !wr_mas->slots[wr_mas->offset_end + 1]) { 4209 wr_mas->offset_end++; 4210 if (wr_mas->offset_end == wr_mas->node_end) 4211 mas->last = mas->max; 4212 else 4213 mas->last = wr_mas->pivots[wr_mas->offset_end]; 4214 wr_mas->end_piv = mas->last; 4215 } 4216 } 4217 4218 if (!wr_mas->content) { 4219 /* If this one is null, the next and prev are not */ 4220 mas->index = wr_mas->r_min; 4221 } else { 4222 /* Check prev slot if we are overwriting the start */ 4223 if (mas->index == wr_mas->r_min && mas->offset && 4224 !wr_mas->slots[mas->offset - 1]) { 4225 mas->offset--; 4226 wr_mas->r_min = mas->index = 4227 mas_safe_min(mas, wr_mas->pivots, mas->offset); 4228 wr_mas->r_max = wr_mas->pivots[mas->offset]; 4229 } 4230 } 4231 } 4232 4233 static inline void mas_wr_end_piv(struct ma_wr_state *wr_mas) 4234 { 4235 while ((wr_mas->offset_end < wr_mas->node_end) && 4236 (wr_mas->mas->last > wr_mas->pivots[wr_mas->offset_end])) 4237 wr_mas->offset_end++; 4238 4239 if (wr_mas->offset_end < wr_mas->node_end) 4240 wr_mas->end_piv = wr_mas->pivots[wr_mas->offset_end]; 4241 else 4242 wr_mas->end_piv = wr_mas->mas->max; 4243 4244 if (!wr_mas->entry) 4245 mas_wr_extend_null(wr_mas); 4246 } 4247 4248 static inline unsigned char mas_wr_new_end(struct ma_wr_state *wr_mas) 4249 { 4250 struct ma_state *mas = wr_mas->mas; 4251 unsigned char new_end = wr_mas->node_end + 2; 4252 4253 new_end -= wr_mas->offset_end - mas->offset; 4254 if (wr_mas->r_min == mas->index) 4255 new_end--; 4256 4257 if (wr_mas->end_piv == mas->last) 4258 new_end--; 4259 4260 return new_end; 4261 } 4262 4263 /* 4264 * mas_wr_append: Attempt to append 4265 * @wr_mas: the maple write state 4266 * 4267 * Return: True if appended, false otherwise 4268 */ 4269 static inline bool mas_wr_append(struct ma_wr_state *wr_mas, 4270 unsigned char new_end) 4271 { 4272 unsigned char end = wr_mas->node_end; 4273 struct ma_state *mas = wr_mas->mas; 4274 unsigned char node_pivots = mt_pivots[wr_mas->type]; 4275 4276 if (mas->offset != wr_mas->node_end) 4277 return false; 4278 4279 if (new_end < node_pivots) { 4280 wr_mas->pivots[new_end] = wr_mas->pivots[end]; 4281 ma_set_meta(wr_mas->node, maple_leaf_64, 0, new_end); 4282 } 4283 4284 if (new_end == wr_mas->node_end + 1) { 4285 if (mas->last == wr_mas->r_max) { 4286 /* Append to end of range */ 4287 rcu_assign_pointer(wr_mas->slots[new_end], 4288 wr_mas->entry); 4289 wr_mas->pivots[end] = mas->index - 1; 4290 mas->offset = new_end; 4291 } else { 4292 /* Append to start of range */ 4293 rcu_assign_pointer(wr_mas->slots[new_end], 4294 wr_mas->content); 4295 wr_mas->pivots[end] = mas->last; 4296 rcu_assign_pointer(wr_mas->slots[end], wr_mas->entry); 4297 } 4298 } else { 4299 /* Append to the range without touching any boundaries. */ 4300 rcu_assign_pointer(wr_mas->slots[new_end], wr_mas->content); 4301 wr_mas->pivots[end + 1] = mas->last; 4302 rcu_assign_pointer(wr_mas->slots[end + 1], wr_mas->entry); 4303 wr_mas->pivots[end] = mas->index - 1; 4304 mas->offset = end + 1; 4305 } 4306 4307 if (!wr_mas->content || !wr_mas->entry) 4308 mas_update_gap(mas); 4309 4310 return true; 4311 } 4312 4313 /* 4314 * mas_wr_bnode() - Slow path for a modification. 4315 * @wr_mas: The write maple state 4316 * 4317 * This is where split, rebalance end up. 4318 */ 4319 static void mas_wr_bnode(struct ma_wr_state *wr_mas) 4320 { 4321 struct maple_big_node b_node; 4322 4323 trace_ma_write(__func__, wr_mas->mas, 0, wr_mas->entry); 4324 memset(&b_node, 0, sizeof(struct maple_big_node)); 4325 mas_store_b_node(wr_mas, &b_node, wr_mas->offset_end); 4326 mas_commit_b_node(wr_mas, &b_node, wr_mas->node_end); 4327 } 4328 4329 static inline void mas_wr_modify(struct ma_wr_state *wr_mas) 4330 { 4331 struct ma_state *mas = wr_mas->mas; 4332 unsigned char new_end; 4333 4334 /* Direct replacement */ 4335 if (wr_mas->r_min == mas->index && wr_mas->r_max == mas->last) { 4336 rcu_assign_pointer(wr_mas->slots[mas->offset], wr_mas->entry); 4337 if (!!wr_mas->entry ^ !!wr_mas->content) 4338 mas_update_gap(mas); 4339 return; 4340 } 4341 4342 /* 4343 * new_end exceeds the size of the maple node and cannot enter the fast 4344 * path. 4345 */ 4346 new_end = mas_wr_new_end(wr_mas); 4347 if (new_end >= mt_slots[wr_mas->type]) 4348 goto slow_path; 4349 4350 /* Attempt to append */ 4351 if (mas_wr_append(wr_mas, new_end)) 4352 return; 4353 4354 if (new_end == wr_mas->node_end && mas_wr_slot_store(wr_mas)) 4355 return; 4356 4357 if (mas_wr_node_store(wr_mas, new_end)) 4358 return; 4359 4360 if (mas_is_err(mas)) 4361 return; 4362 4363 slow_path: 4364 mas_wr_bnode(wr_mas); 4365 } 4366 4367 /* 4368 * mas_wr_store_entry() - Internal call to store a value 4369 * @mas: The maple state 4370 * @entry: The entry to store. 4371 * 4372 * Return: The contents that was stored at the index. 4373 */ 4374 static inline void *mas_wr_store_entry(struct ma_wr_state *wr_mas) 4375 { 4376 struct ma_state *mas = wr_mas->mas; 4377 4378 wr_mas->content = mas_start(mas); 4379 if (mas_is_none(mas) || mas_is_ptr(mas)) { 4380 mas_store_root(mas, wr_mas->entry); 4381 return wr_mas->content; 4382 } 4383 4384 if (unlikely(!mas_wr_walk(wr_mas))) { 4385 mas_wr_spanning_store(wr_mas); 4386 return wr_mas->content; 4387 } 4388 4389 /* At this point, we are at the leaf node that needs to be altered. */ 4390 mas_wr_end_piv(wr_mas); 4391 /* New root for a single pointer */ 4392 if (unlikely(!mas->index && mas->last == ULONG_MAX)) { 4393 mas_new_root(mas, wr_mas->entry); 4394 return wr_mas->content; 4395 } 4396 4397 mas_wr_modify(wr_mas); 4398 return wr_mas->content; 4399 } 4400 4401 /** 4402 * mas_insert() - Internal call to insert a value 4403 * @mas: The maple state 4404 * @entry: The entry to store 4405 * 4406 * Return: %NULL or the contents that already exists at the requested index 4407 * otherwise. The maple state needs to be checked for error conditions. 4408 */ 4409 static inline void *mas_insert(struct ma_state *mas, void *entry) 4410 { 4411 MA_WR_STATE(wr_mas, mas, entry); 4412 4413 /* 4414 * Inserting a new range inserts either 0, 1, or 2 pivots within the 4415 * tree. If the insert fits exactly into an existing gap with a value 4416 * of NULL, then the slot only needs to be written with the new value. 4417 * If the range being inserted is adjacent to another range, then only a 4418 * single pivot needs to be inserted (as well as writing the entry). If 4419 * the new range is within a gap but does not touch any other ranges, 4420 * then two pivots need to be inserted: the start - 1, and the end. As 4421 * usual, the entry must be written. Most operations require a new node 4422 * to be allocated and replace an existing node to ensure RCU safety, 4423 * when in RCU mode. The exception to requiring a newly allocated node 4424 * is when inserting at the end of a node (appending). When done 4425 * carefully, appending can reuse the node in place. 4426 */ 4427 wr_mas.content = mas_start(mas); 4428 if (wr_mas.content) 4429 goto exists; 4430 4431 if (mas_is_none(mas) || mas_is_ptr(mas)) { 4432 mas_store_root(mas, entry); 4433 return NULL; 4434 } 4435 4436 /* spanning writes always overwrite something */ 4437 if (!mas_wr_walk(&wr_mas)) 4438 goto exists; 4439 4440 /* At this point, we are at the leaf node that needs to be altered. */ 4441 wr_mas.offset_end = mas->offset; 4442 wr_mas.end_piv = wr_mas.r_max; 4443 4444 if (wr_mas.content || (mas->last > wr_mas.r_max)) 4445 goto exists; 4446 4447 if (!entry) 4448 return NULL; 4449 4450 mas_wr_modify(&wr_mas); 4451 return wr_mas.content; 4452 4453 exists: 4454 mas_set_err(mas, -EEXIST); 4455 return wr_mas.content; 4456 4457 } 4458 4459 static inline void mas_rewalk(struct ma_state *mas, unsigned long index) 4460 { 4461 retry: 4462 mas_set(mas, index); 4463 mas_state_walk(mas); 4464 if (mas_is_start(mas)) 4465 goto retry; 4466 } 4467 4468 static inline bool mas_rewalk_if_dead(struct ma_state *mas, 4469 struct maple_node *node, const unsigned long index) 4470 { 4471 if (unlikely(ma_dead_node(node))) { 4472 mas_rewalk(mas, index); 4473 return true; 4474 } 4475 return false; 4476 } 4477 4478 /* 4479 * mas_prev_node() - Find the prev non-null entry at the same level in the 4480 * tree. The prev value will be mas->node[mas->offset] or MAS_NONE. 4481 * @mas: The maple state 4482 * @min: The lower limit to search 4483 * 4484 * The prev node value will be mas->node[mas->offset] or MAS_NONE. 4485 * Return: 1 if the node is dead, 0 otherwise. 4486 */ 4487 static inline int mas_prev_node(struct ma_state *mas, unsigned long min) 4488 { 4489 enum maple_type mt; 4490 int offset, level; 4491 void __rcu **slots; 4492 struct maple_node *node; 4493 unsigned long *pivots; 4494 unsigned long max; 4495 4496 node = mas_mn(mas); 4497 if (!mas->min) 4498 goto no_entry; 4499 4500 max = mas->min - 1; 4501 if (max < min) 4502 goto no_entry; 4503 4504 level = 0; 4505 do { 4506 if (ma_is_root(node)) 4507 goto no_entry; 4508 4509 /* Walk up. */ 4510 if (unlikely(mas_ascend(mas))) 4511 return 1; 4512 offset = mas->offset; 4513 level++; 4514 node = mas_mn(mas); 4515 } while (!offset); 4516 4517 offset--; 4518 mt = mte_node_type(mas->node); 4519 while (level > 1) { 4520 level--; 4521 slots = ma_slots(node, mt); 4522 mas->node = mas_slot(mas, slots, offset); 4523 if (unlikely(ma_dead_node(node))) 4524 return 1; 4525 4526 mt = mte_node_type(mas->node); 4527 node = mas_mn(mas); 4528 pivots = ma_pivots(node, mt); 4529 offset = ma_data_end(node, mt, pivots, max); 4530 if (unlikely(ma_dead_node(node))) 4531 return 1; 4532 } 4533 4534 slots = ma_slots(node, mt); 4535 mas->node = mas_slot(mas, slots, offset); 4536 pivots = ma_pivots(node, mt); 4537 if (unlikely(ma_dead_node(node))) 4538 return 1; 4539 4540 if (likely(offset)) 4541 mas->min = pivots[offset - 1] + 1; 4542 mas->max = max; 4543 mas->offset = mas_data_end(mas); 4544 if (unlikely(mte_dead_node(mas->node))) 4545 return 1; 4546 4547 return 0; 4548 4549 no_entry: 4550 if (unlikely(ma_dead_node(node))) 4551 return 1; 4552 4553 mas->node = MAS_NONE; 4554 return 0; 4555 } 4556 4557 /* 4558 * mas_prev_slot() - Get the entry in the previous slot 4559 * 4560 * @mas: The maple state 4561 * @max: The minimum starting range 4562 * 4563 * Return: The entry in the previous slot which is possibly NULL 4564 */ 4565 static void *mas_prev_slot(struct ma_state *mas, unsigned long min, bool empty) 4566 { 4567 void *entry; 4568 void __rcu **slots; 4569 unsigned long pivot; 4570 enum maple_type type; 4571 unsigned long *pivots; 4572 struct maple_node *node; 4573 unsigned long save_point = mas->index; 4574 4575 retry: 4576 node = mas_mn(mas); 4577 type = mte_node_type(mas->node); 4578 pivots = ma_pivots(node, type); 4579 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4580 goto retry; 4581 4582 again: 4583 if (mas->min <= min) { 4584 pivot = mas_safe_min(mas, pivots, mas->offset); 4585 4586 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4587 goto retry; 4588 4589 if (pivot <= min) 4590 return NULL; 4591 } 4592 4593 if (likely(mas->offset)) { 4594 mas->offset--; 4595 mas->last = mas->index - 1; 4596 mas->index = mas_safe_min(mas, pivots, mas->offset); 4597 } else { 4598 if (mas_prev_node(mas, min)) { 4599 mas_rewalk(mas, save_point); 4600 goto retry; 4601 } 4602 4603 if (mas_is_none(mas)) 4604 return NULL; 4605 4606 mas->last = mas->max; 4607 node = mas_mn(mas); 4608 type = mte_node_type(mas->node); 4609 pivots = ma_pivots(node, type); 4610 mas->index = pivots[mas->offset - 1] + 1; 4611 } 4612 4613 slots = ma_slots(node, type); 4614 entry = mas_slot(mas, slots, mas->offset); 4615 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4616 goto retry; 4617 4618 if (likely(entry)) 4619 return entry; 4620 4621 if (!empty) 4622 goto again; 4623 4624 return entry; 4625 } 4626 4627 /* 4628 * mas_next_node() - Get the next node at the same level in the tree. 4629 * @mas: The maple state 4630 * @max: The maximum pivot value to check. 4631 * 4632 * The next value will be mas->node[mas->offset] or MAS_NONE. 4633 * Return: 1 on dead node, 0 otherwise. 4634 */ 4635 static inline int mas_next_node(struct ma_state *mas, struct maple_node *node, 4636 unsigned long max) 4637 { 4638 unsigned long min; 4639 unsigned long *pivots; 4640 struct maple_enode *enode; 4641 int level = 0; 4642 unsigned char node_end; 4643 enum maple_type mt; 4644 void __rcu **slots; 4645 4646 if (mas->max >= max) 4647 goto no_entry; 4648 4649 min = mas->max + 1; 4650 level = 0; 4651 do { 4652 if (ma_is_root(node)) 4653 goto no_entry; 4654 4655 /* Walk up. */ 4656 if (unlikely(mas_ascend(mas))) 4657 return 1; 4658 4659 level++; 4660 node = mas_mn(mas); 4661 mt = mte_node_type(mas->node); 4662 pivots = ma_pivots(node, mt); 4663 node_end = ma_data_end(node, mt, pivots, mas->max); 4664 if (unlikely(ma_dead_node(node))) 4665 return 1; 4666 4667 } while (unlikely(mas->offset == node_end)); 4668 4669 slots = ma_slots(node, mt); 4670 mas->offset++; 4671 enode = mas_slot(mas, slots, mas->offset); 4672 if (unlikely(ma_dead_node(node))) 4673 return 1; 4674 4675 if (level > 1) 4676 mas->offset = 0; 4677 4678 while (unlikely(level > 1)) { 4679 level--; 4680 mas->node = enode; 4681 node = mas_mn(mas); 4682 mt = mte_node_type(mas->node); 4683 slots = ma_slots(node, mt); 4684 enode = mas_slot(mas, slots, 0); 4685 if (unlikely(ma_dead_node(node))) 4686 return 1; 4687 } 4688 4689 if (!mas->offset) 4690 pivots = ma_pivots(node, mt); 4691 4692 mas->max = mas_safe_pivot(mas, pivots, mas->offset, mt); 4693 if (unlikely(ma_dead_node(node))) 4694 return 1; 4695 4696 mas->node = enode; 4697 mas->min = min; 4698 return 0; 4699 4700 no_entry: 4701 if (unlikely(ma_dead_node(node))) 4702 return 1; 4703 4704 mas->node = MAS_NONE; 4705 return 0; 4706 } 4707 4708 /* 4709 * mas_next_slot() - Get the entry in the next slot 4710 * 4711 * @mas: The maple state 4712 * @max: The maximum starting range 4713 * @empty: Can be empty 4714 * 4715 * Return: The entry in the next slot which is possibly NULL 4716 */ 4717 static void *mas_next_slot(struct ma_state *mas, unsigned long max, bool empty) 4718 { 4719 void __rcu **slots; 4720 unsigned long *pivots; 4721 unsigned long pivot; 4722 enum maple_type type; 4723 struct maple_node *node; 4724 unsigned char data_end; 4725 unsigned long save_point = mas->last; 4726 void *entry; 4727 4728 retry: 4729 node = mas_mn(mas); 4730 type = mte_node_type(mas->node); 4731 pivots = ma_pivots(node, type); 4732 data_end = ma_data_end(node, type, pivots, mas->max); 4733 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4734 goto retry; 4735 4736 again: 4737 if (mas->max >= max) { 4738 if (likely(mas->offset < data_end)) 4739 pivot = pivots[mas->offset]; 4740 else 4741 return NULL; /* must be mas->max */ 4742 4743 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4744 goto retry; 4745 4746 if (pivot >= max) 4747 return NULL; 4748 } 4749 4750 if (likely(mas->offset < data_end)) { 4751 mas->index = pivots[mas->offset] + 1; 4752 mas->offset++; 4753 if (likely(mas->offset < data_end)) 4754 mas->last = pivots[mas->offset]; 4755 else 4756 mas->last = mas->max; 4757 } else { 4758 if (mas_next_node(mas, node, max)) { 4759 mas_rewalk(mas, save_point); 4760 goto retry; 4761 } 4762 4763 if (mas_is_none(mas)) 4764 return NULL; 4765 4766 mas->offset = 0; 4767 mas->index = mas->min; 4768 node = mas_mn(mas); 4769 type = mte_node_type(mas->node); 4770 pivots = ma_pivots(node, type); 4771 mas->last = pivots[0]; 4772 } 4773 4774 slots = ma_slots(node, type); 4775 entry = mt_slot(mas->tree, slots, mas->offset); 4776 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4777 goto retry; 4778 4779 if (entry) 4780 return entry; 4781 4782 if (!empty) { 4783 if (!mas->offset) 4784 data_end = 2; 4785 goto again; 4786 } 4787 4788 return entry; 4789 } 4790 4791 /* 4792 * mas_next_entry() - Internal function to get the next entry. 4793 * @mas: The maple state 4794 * @limit: The maximum range start. 4795 * 4796 * Set the @mas->node to the next entry and the range_start to 4797 * the beginning value for the entry. Does not check beyond @limit. 4798 * Sets @mas->index and @mas->last to the limit if it is hit. 4799 * Restarts on dead nodes. 4800 * 4801 * Return: the next entry or %NULL. 4802 */ 4803 static inline void *mas_next_entry(struct ma_state *mas, unsigned long limit) 4804 { 4805 if (mas->last >= limit) 4806 return NULL; 4807 4808 return mas_next_slot(mas, limit, false); 4809 } 4810 4811 /* 4812 * mas_rev_awalk() - Internal function. Reverse allocation walk. Find the 4813 * highest gap address of a given size in a given node and descend. 4814 * @mas: The maple state 4815 * @size: The needed size. 4816 * 4817 * Return: True if found in a leaf, false otherwise. 4818 * 4819 */ 4820 static bool mas_rev_awalk(struct ma_state *mas, unsigned long size, 4821 unsigned long *gap_min, unsigned long *gap_max) 4822 { 4823 enum maple_type type = mte_node_type(mas->node); 4824 struct maple_node *node = mas_mn(mas); 4825 unsigned long *pivots, *gaps; 4826 void __rcu **slots; 4827 unsigned long gap = 0; 4828 unsigned long max, min; 4829 unsigned char offset; 4830 4831 if (unlikely(mas_is_err(mas))) 4832 return true; 4833 4834 if (ma_is_dense(type)) { 4835 /* dense nodes. */ 4836 mas->offset = (unsigned char)(mas->index - mas->min); 4837 return true; 4838 } 4839 4840 pivots = ma_pivots(node, type); 4841 slots = ma_slots(node, type); 4842 gaps = ma_gaps(node, type); 4843 offset = mas->offset; 4844 min = mas_safe_min(mas, pivots, offset); 4845 /* Skip out of bounds. */ 4846 while (mas->last < min) 4847 min = mas_safe_min(mas, pivots, --offset); 4848 4849 max = mas_safe_pivot(mas, pivots, offset, type); 4850 while (mas->index <= max) { 4851 gap = 0; 4852 if (gaps) 4853 gap = gaps[offset]; 4854 else if (!mas_slot(mas, slots, offset)) 4855 gap = max - min + 1; 4856 4857 if (gap) { 4858 if ((size <= gap) && (size <= mas->last - min + 1)) 4859 break; 4860 4861 if (!gaps) { 4862 /* Skip the next slot, it cannot be a gap. */ 4863 if (offset < 2) 4864 goto ascend; 4865 4866 offset -= 2; 4867 max = pivots[offset]; 4868 min = mas_safe_min(mas, pivots, offset); 4869 continue; 4870 } 4871 } 4872 4873 if (!offset) 4874 goto ascend; 4875 4876 offset--; 4877 max = min - 1; 4878 min = mas_safe_min(mas, pivots, offset); 4879 } 4880 4881 if (unlikely((mas->index > max) || (size - 1 > max - mas->index))) 4882 goto no_space; 4883 4884 if (unlikely(ma_is_leaf(type))) { 4885 mas->offset = offset; 4886 *gap_min = min; 4887 *gap_max = min + gap - 1; 4888 return true; 4889 } 4890 4891 /* descend, only happens under lock. */ 4892 mas->node = mas_slot(mas, slots, offset); 4893 mas->min = min; 4894 mas->max = max; 4895 mas->offset = mas_data_end(mas); 4896 return false; 4897 4898 ascend: 4899 if (!mte_is_root(mas->node)) 4900 return false; 4901 4902 no_space: 4903 mas_set_err(mas, -EBUSY); 4904 return false; 4905 } 4906 4907 static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size) 4908 { 4909 enum maple_type type = mte_node_type(mas->node); 4910 unsigned long pivot, min, gap = 0; 4911 unsigned char offset, data_end; 4912 unsigned long *gaps, *pivots; 4913 void __rcu **slots; 4914 struct maple_node *node; 4915 bool found = false; 4916 4917 if (ma_is_dense(type)) { 4918 mas->offset = (unsigned char)(mas->index - mas->min); 4919 return true; 4920 } 4921 4922 node = mas_mn(mas); 4923 pivots = ma_pivots(node, type); 4924 slots = ma_slots(node, type); 4925 gaps = ma_gaps(node, type); 4926 offset = mas->offset; 4927 min = mas_safe_min(mas, pivots, offset); 4928 data_end = ma_data_end(node, type, pivots, mas->max); 4929 for (; offset <= data_end; offset++) { 4930 pivot = mas_safe_pivot(mas, pivots, offset, type); 4931 4932 /* Not within lower bounds */ 4933 if (mas->index > pivot) 4934 goto next_slot; 4935 4936 if (gaps) 4937 gap = gaps[offset]; 4938 else if (!mas_slot(mas, slots, offset)) 4939 gap = min(pivot, mas->last) - max(mas->index, min) + 1; 4940 else 4941 goto next_slot; 4942 4943 if (gap >= size) { 4944 if (ma_is_leaf(type)) { 4945 found = true; 4946 goto done; 4947 } 4948 if (mas->index <= pivot) { 4949 mas->node = mas_slot(mas, slots, offset); 4950 mas->min = min; 4951 mas->max = pivot; 4952 offset = 0; 4953 break; 4954 } 4955 } 4956 next_slot: 4957 min = pivot + 1; 4958 if (mas->last <= pivot) { 4959 mas_set_err(mas, -EBUSY); 4960 return true; 4961 } 4962 } 4963 4964 if (mte_is_root(mas->node)) 4965 found = true; 4966 done: 4967 mas->offset = offset; 4968 return found; 4969 } 4970 4971 /** 4972 * mas_walk() - Search for @mas->index in the tree. 4973 * @mas: The maple state. 4974 * 4975 * mas->index and mas->last will be set to the range if there is a value. If 4976 * mas->node is MAS_NONE, reset to MAS_START. 4977 * 4978 * Return: the entry at the location or %NULL. 4979 */ 4980 void *mas_walk(struct ma_state *mas) 4981 { 4982 void *entry; 4983 4984 if (mas_is_none(mas) || mas_is_paused(mas) || mas_is_ptr(mas)) 4985 mas->node = MAS_START; 4986 retry: 4987 entry = mas_state_walk(mas); 4988 if (mas_is_start(mas)) { 4989 goto retry; 4990 } else if (mas_is_none(mas)) { 4991 mas->index = 0; 4992 mas->last = ULONG_MAX; 4993 } else if (mas_is_ptr(mas)) { 4994 if (!mas->index) { 4995 mas->last = 0; 4996 return entry; 4997 } 4998 4999 mas->index = 1; 5000 mas->last = ULONG_MAX; 5001 mas->node = MAS_NONE; 5002 return NULL; 5003 } 5004 5005 return entry; 5006 } 5007 EXPORT_SYMBOL_GPL(mas_walk); 5008 5009 static inline bool mas_rewind_node(struct ma_state *mas) 5010 { 5011 unsigned char slot; 5012 5013 do { 5014 if (mte_is_root(mas->node)) { 5015 slot = mas->offset; 5016 if (!slot) 5017 return false; 5018 } else { 5019 mas_ascend(mas); 5020 slot = mas->offset; 5021 } 5022 } while (!slot); 5023 5024 mas->offset = --slot; 5025 return true; 5026 } 5027 5028 /* 5029 * mas_skip_node() - Internal function. Skip over a node. 5030 * @mas: The maple state. 5031 * 5032 * Return: true if there is another node, false otherwise. 5033 */ 5034 static inline bool mas_skip_node(struct ma_state *mas) 5035 { 5036 if (mas_is_err(mas)) 5037 return false; 5038 5039 do { 5040 if (mte_is_root(mas->node)) { 5041 if (mas->offset >= mas_data_end(mas)) { 5042 mas_set_err(mas, -EBUSY); 5043 return false; 5044 } 5045 } else { 5046 mas_ascend(mas); 5047 } 5048 } while (mas->offset >= mas_data_end(mas)); 5049 5050 mas->offset++; 5051 return true; 5052 } 5053 5054 /* 5055 * mas_awalk() - Allocation walk. Search from low address to high, for a gap of 5056 * @size 5057 * @mas: The maple state 5058 * @size: The size of the gap required 5059 * 5060 * Search between @mas->index and @mas->last for a gap of @size. 5061 */ 5062 static inline void mas_awalk(struct ma_state *mas, unsigned long size) 5063 { 5064 struct maple_enode *last = NULL; 5065 5066 /* 5067 * There are 4 options: 5068 * go to child (descend) 5069 * go back to parent (ascend) 5070 * no gap found. (return, slot == MAPLE_NODE_SLOTS) 5071 * found the gap. (return, slot != MAPLE_NODE_SLOTS) 5072 */ 5073 while (!mas_is_err(mas) && !mas_anode_descend(mas, size)) { 5074 if (last == mas->node) 5075 mas_skip_node(mas); 5076 else 5077 last = mas->node; 5078 } 5079 } 5080 5081 /* 5082 * mas_sparse_area() - Internal function. Return upper or lower limit when 5083 * searching for a gap in an empty tree. 5084 * @mas: The maple state 5085 * @min: the minimum range 5086 * @max: The maximum range 5087 * @size: The size of the gap 5088 * @fwd: Searching forward or back 5089 */ 5090 static inline int mas_sparse_area(struct ma_state *mas, unsigned long min, 5091 unsigned long max, unsigned long size, bool fwd) 5092 { 5093 if (!unlikely(mas_is_none(mas)) && min == 0) { 5094 min++; 5095 /* 5096 * At this time, min is increased, we need to recheck whether 5097 * the size is satisfied. 5098 */ 5099 if (min > max || max - min + 1 < size) 5100 return -EBUSY; 5101 } 5102 /* mas_is_ptr */ 5103 5104 if (fwd) { 5105 mas->index = min; 5106 mas->last = min + size - 1; 5107 } else { 5108 mas->last = max; 5109 mas->index = max - size + 1; 5110 } 5111 return 0; 5112 } 5113 5114 /* 5115 * mas_empty_area() - Get the lowest address within the range that is 5116 * sufficient for the size requested. 5117 * @mas: The maple state 5118 * @min: The lowest value of the range 5119 * @max: The highest value of the range 5120 * @size: The size needed 5121 */ 5122 int mas_empty_area(struct ma_state *mas, unsigned long min, 5123 unsigned long max, unsigned long size) 5124 { 5125 unsigned char offset; 5126 unsigned long *pivots; 5127 enum maple_type mt; 5128 5129 if (min > max) 5130 return -EINVAL; 5131 5132 if (size == 0 || max - min < size - 1) 5133 return -EINVAL; 5134 5135 if (mas_is_start(mas)) 5136 mas_start(mas); 5137 else if (mas->offset >= 2) 5138 mas->offset -= 2; 5139 else if (!mas_skip_node(mas)) 5140 return -EBUSY; 5141 5142 /* Empty set */ 5143 if (mas_is_none(mas) || mas_is_ptr(mas)) 5144 return mas_sparse_area(mas, min, max, size, true); 5145 5146 /* The start of the window can only be within these values */ 5147 mas->index = min; 5148 mas->last = max; 5149 mas_awalk(mas, size); 5150 5151 if (unlikely(mas_is_err(mas))) 5152 return xa_err(mas->node); 5153 5154 offset = mas->offset; 5155 if (unlikely(offset == MAPLE_NODE_SLOTS)) 5156 return -EBUSY; 5157 5158 mt = mte_node_type(mas->node); 5159 pivots = ma_pivots(mas_mn(mas), mt); 5160 min = mas_safe_min(mas, pivots, offset); 5161 if (mas->index < min) 5162 mas->index = min; 5163 mas->last = mas->index + size - 1; 5164 return 0; 5165 } 5166 EXPORT_SYMBOL_GPL(mas_empty_area); 5167 5168 /* 5169 * mas_empty_area_rev() - Get the highest address within the range that is 5170 * sufficient for the size requested. 5171 * @mas: The maple state 5172 * @min: The lowest value of the range 5173 * @max: The highest value of the range 5174 * @size: The size needed 5175 */ 5176 int mas_empty_area_rev(struct ma_state *mas, unsigned long min, 5177 unsigned long max, unsigned long size) 5178 { 5179 struct maple_enode *last = mas->node; 5180 5181 if (min > max) 5182 return -EINVAL; 5183 5184 if (size == 0 || max - min < size - 1) 5185 return -EINVAL; 5186 5187 if (mas_is_start(mas)) { 5188 mas_start(mas); 5189 mas->offset = mas_data_end(mas); 5190 } else if (mas->offset >= 2) { 5191 mas->offset -= 2; 5192 } else if (!mas_rewind_node(mas)) { 5193 return -EBUSY; 5194 } 5195 5196 /* Empty set. */ 5197 if (mas_is_none(mas) || mas_is_ptr(mas)) 5198 return mas_sparse_area(mas, min, max, size, false); 5199 5200 /* The start of the window can only be within these values. */ 5201 mas->index = min; 5202 mas->last = max; 5203 5204 while (!mas_rev_awalk(mas, size, &min, &max)) { 5205 if (last == mas->node) { 5206 if (!mas_rewind_node(mas)) 5207 return -EBUSY; 5208 } else { 5209 last = mas->node; 5210 } 5211 } 5212 5213 if (mas_is_err(mas)) 5214 return xa_err(mas->node); 5215 5216 if (unlikely(mas->offset == MAPLE_NODE_SLOTS)) 5217 return -EBUSY; 5218 5219 /* Trim the upper limit to the max. */ 5220 if (max < mas->last) 5221 mas->last = max; 5222 5223 mas->index = mas->last - size + 1; 5224 return 0; 5225 } 5226 EXPORT_SYMBOL_GPL(mas_empty_area_rev); 5227 5228 /* 5229 * mte_dead_leaves() - Mark all leaves of a node as dead. 5230 * @mas: The maple state 5231 * @slots: Pointer to the slot array 5232 * @type: The maple node type 5233 * 5234 * Must hold the write lock. 5235 * 5236 * Return: The number of leaves marked as dead. 5237 */ 5238 static inline 5239 unsigned char mte_dead_leaves(struct maple_enode *enode, struct maple_tree *mt, 5240 void __rcu **slots) 5241 { 5242 struct maple_node *node; 5243 enum maple_type type; 5244 void *entry; 5245 int offset; 5246 5247 for (offset = 0; offset < mt_slot_count(enode); offset++) { 5248 entry = mt_slot(mt, slots, offset); 5249 type = mte_node_type(entry); 5250 node = mte_to_node(entry); 5251 /* Use both node and type to catch LE & BE metadata */ 5252 if (!node || !type) 5253 break; 5254 5255 mte_set_node_dead(entry); 5256 node->type = type; 5257 rcu_assign_pointer(slots[offset], node); 5258 } 5259 5260 return offset; 5261 } 5262 5263 /** 5264 * mte_dead_walk() - Walk down a dead tree to just before the leaves 5265 * @enode: The maple encoded node 5266 * @offset: The starting offset 5267 * 5268 * Note: This can only be used from the RCU callback context. 5269 */ 5270 static void __rcu **mte_dead_walk(struct maple_enode **enode, unsigned char offset) 5271 { 5272 struct maple_node *node, *next; 5273 void __rcu **slots = NULL; 5274 5275 next = mte_to_node(*enode); 5276 do { 5277 *enode = ma_enode_ptr(next); 5278 node = mte_to_node(*enode); 5279 slots = ma_slots(node, node->type); 5280 next = rcu_dereference_protected(slots[offset], 5281 lock_is_held(&rcu_callback_map)); 5282 offset = 0; 5283 } while (!ma_is_leaf(next->type)); 5284 5285 return slots; 5286 } 5287 5288 /** 5289 * mt_free_walk() - Walk & free a tree in the RCU callback context 5290 * @head: The RCU head that's within the node. 5291 * 5292 * Note: This can only be used from the RCU callback context. 5293 */ 5294 static void mt_free_walk(struct rcu_head *head) 5295 { 5296 void __rcu **slots; 5297 struct maple_node *node, *start; 5298 struct maple_enode *enode; 5299 unsigned char offset; 5300 enum maple_type type; 5301 5302 node = container_of(head, struct maple_node, rcu); 5303 5304 if (ma_is_leaf(node->type)) 5305 goto free_leaf; 5306 5307 start = node; 5308 enode = mt_mk_node(node, node->type); 5309 slots = mte_dead_walk(&enode, 0); 5310 node = mte_to_node(enode); 5311 do { 5312 mt_free_bulk(node->slot_len, slots); 5313 offset = node->parent_slot + 1; 5314 enode = node->piv_parent; 5315 if (mte_to_node(enode) == node) 5316 goto free_leaf; 5317 5318 type = mte_node_type(enode); 5319 slots = ma_slots(mte_to_node(enode), type); 5320 if ((offset < mt_slots[type]) && 5321 rcu_dereference_protected(slots[offset], 5322 lock_is_held(&rcu_callback_map))) 5323 slots = mte_dead_walk(&enode, offset); 5324 node = mte_to_node(enode); 5325 } while ((node != start) || (node->slot_len < offset)); 5326 5327 slots = ma_slots(node, node->type); 5328 mt_free_bulk(node->slot_len, slots); 5329 5330 free_leaf: 5331 mt_free_rcu(&node->rcu); 5332 } 5333 5334 static inline void __rcu **mte_destroy_descend(struct maple_enode **enode, 5335 struct maple_tree *mt, struct maple_enode *prev, unsigned char offset) 5336 { 5337 struct maple_node *node; 5338 struct maple_enode *next = *enode; 5339 void __rcu **slots = NULL; 5340 enum maple_type type; 5341 unsigned char next_offset = 0; 5342 5343 do { 5344 *enode = next; 5345 node = mte_to_node(*enode); 5346 type = mte_node_type(*enode); 5347 slots = ma_slots(node, type); 5348 next = mt_slot_locked(mt, slots, next_offset); 5349 if ((mte_dead_node(next))) 5350 next = mt_slot_locked(mt, slots, ++next_offset); 5351 5352 mte_set_node_dead(*enode); 5353 node->type = type; 5354 node->piv_parent = prev; 5355 node->parent_slot = offset; 5356 offset = next_offset; 5357 next_offset = 0; 5358 prev = *enode; 5359 } while (!mte_is_leaf(next)); 5360 5361 return slots; 5362 } 5363 5364 static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, 5365 bool free) 5366 { 5367 void __rcu **slots; 5368 struct maple_node *node = mte_to_node(enode); 5369 struct maple_enode *start; 5370 5371 if (mte_is_leaf(enode)) { 5372 node->type = mte_node_type(enode); 5373 goto free_leaf; 5374 } 5375 5376 start = enode; 5377 slots = mte_destroy_descend(&enode, mt, start, 0); 5378 node = mte_to_node(enode); // Updated in the above call. 5379 do { 5380 enum maple_type type; 5381 unsigned char offset; 5382 struct maple_enode *parent, *tmp; 5383 5384 node->slot_len = mte_dead_leaves(enode, mt, slots); 5385 if (free) 5386 mt_free_bulk(node->slot_len, slots); 5387 offset = node->parent_slot + 1; 5388 enode = node->piv_parent; 5389 if (mte_to_node(enode) == node) 5390 goto free_leaf; 5391 5392 type = mte_node_type(enode); 5393 slots = ma_slots(mte_to_node(enode), type); 5394 if (offset >= mt_slots[type]) 5395 goto next; 5396 5397 tmp = mt_slot_locked(mt, slots, offset); 5398 if (mte_node_type(tmp) && mte_to_node(tmp)) { 5399 parent = enode; 5400 enode = tmp; 5401 slots = mte_destroy_descend(&enode, mt, parent, offset); 5402 } 5403 next: 5404 node = mte_to_node(enode); 5405 } while (start != enode); 5406 5407 node = mte_to_node(enode); 5408 node->slot_len = mte_dead_leaves(enode, mt, slots); 5409 if (free) 5410 mt_free_bulk(node->slot_len, slots); 5411 5412 free_leaf: 5413 if (free) 5414 mt_free_rcu(&node->rcu); 5415 else 5416 mt_clear_meta(mt, node, node->type); 5417 } 5418 5419 /* 5420 * mte_destroy_walk() - Free a tree or sub-tree. 5421 * @enode: the encoded maple node (maple_enode) to start 5422 * @mt: the tree to free - needed for node types. 5423 * 5424 * Must hold the write lock. 5425 */ 5426 static inline void mte_destroy_walk(struct maple_enode *enode, 5427 struct maple_tree *mt) 5428 { 5429 struct maple_node *node = mte_to_node(enode); 5430 5431 if (mt_in_rcu(mt)) { 5432 mt_destroy_walk(enode, mt, false); 5433 call_rcu(&node->rcu, mt_free_walk); 5434 } else { 5435 mt_destroy_walk(enode, mt, true); 5436 } 5437 } 5438 5439 static void mas_wr_store_setup(struct ma_wr_state *wr_mas) 5440 { 5441 if (mas_is_start(wr_mas->mas)) 5442 return; 5443 5444 if (unlikely(mas_is_paused(wr_mas->mas))) 5445 goto reset; 5446 5447 if (unlikely(mas_is_none(wr_mas->mas))) 5448 goto reset; 5449 5450 /* 5451 * A less strict version of mas_is_span_wr() where we allow spanning 5452 * writes within this node. This is to stop partial walks in 5453 * mas_prealloc() from being reset. 5454 */ 5455 if (wr_mas->mas->last > wr_mas->mas->max) 5456 goto reset; 5457 5458 if (wr_mas->entry) 5459 return; 5460 5461 if (mte_is_leaf(wr_mas->mas->node) && 5462 wr_mas->mas->last == wr_mas->mas->max) 5463 goto reset; 5464 5465 return; 5466 5467 reset: 5468 mas_reset(wr_mas->mas); 5469 } 5470 5471 /* Interface */ 5472 5473 /** 5474 * mas_store() - Store an @entry. 5475 * @mas: The maple state. 5476 * @entry: The entry to store. 5477 * 5478 * The @mas->index and @mas->last is used to set the range for the @entry. 5479 * Note: The @mas should have pre-allocated entries to ensure there is memory to 5480 * store the entry. Please see mas_expected_entries()/mas_destroy() for more details. 5481 * 5482 * Return: the first entry between mas->index and mas->last or %NULL. 5483 */ 5484 void *mas_store(struct ma_state *mas, void *entry) 5485 { 5486 MA_WR_STATE(wr_mas, mas, entry); 5487 5488 trace_ma_write(__func__, mas, 0, entry); 5489 #ifdef CONFIG_DEBUG_MAPLE_TREE 5490 if (MAS_WARN_ON(mas, mas->index > mas->last)) 5491 pr_err("Error %lX > %lX %p\n", mas->index, mas->last, entry); 5492 5493 if (mas->index > mas->last) { 5494 mas_set_err(mas, -EINVAL); 5495 return NULL; 5496 } 5497 5498 #endif 5499 5500 /* 5501 * Storing is the same operation as insert with the added caveat that it 5502 * can overwrite entries. Although this seems simple enough, one may 5503 * want to examine what happens if a single store operation was to 5504 * overwrite multiple entries within a self-balancing B-Tree. 5505 */ 5506 mas_wr_store_setup(&wr_mas); 5507 mas_wr_store_entry(&wr_mas); 5508 return wr_mas.content; 5509 } 5510 EXPORT_SYMBOL_GPL(mas_store); 5511 5512 /** 5513 * mas_store_gfp() - Store a value into the tree. 5514 * @mas: The maple state 5515 * @entry: The entry to store 5516 * @gfp: The GFP_FLAGS to use for allocations if necessary. 5517 * 5518 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 5519 * be allocated. 5520 */ 5521 int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp) 5522 { 5523 MA_WR_STATE(wr_mas, mas, entry); 5524 5525 mas_wr_store_setup(&wr_mas); 5526 trace_ma_write(__func__, mas, 0, entry); 5527 retry: 5528 mas_wr_store_entry(&wr_mas); 5529 if (unlikely(mas_nomem(mas, gfp))) 5530 goto retry; 5531 5532 if (unlikely(mas_is_err(mas))) 5533 return xa_err(mas->node); 5534 5535 return 0; 5536 } 5537 EXPORT_SYMBOL_GPL(mas_store_gfp); 5538 5539 /** 5540 * mas_store_prealloc() - Store a value into the tree using memory 5541 * preallocated in the maple state. 5542 * @mas: The maple state 5543 * @entry: The entry to store. 5544 */ 5545 void mas_store_prealloc(struct ma_state *mas, void *entry) 5546 { 5547 MA_WR_STATE(wr_mas, mas, entry); 5548 5549 mas_wr_store_setup(&wr_mas); 5550 trace_ma_write(__func__, mas, 0, entry); 5551 mas_wr_store_entry(&wr_mas); 5552 MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas)); 5553 mas_destroy(mas); 5554 } 5555 EXPORT_SYMBOL_GPL(mas_store_prealloc); 5556 5557 /** 5558 * mas_preallocate() - Preallocate enough nodes for a store operation 5559 * @mas: The maple state 5560 * @entry: The entry that will be stored 5561 * @gfp: The GFP_FLAGS to use for allocations. 5562 * 5563 * Return: 0 on success, -ENOMEM if memory could not be allocated. 5564 */ 5565 int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp) 5566 { 5567 MA_WR_STATE(wr_mas, mas, entry); 5568 unsigned char node_size; 5569 int request = 1; 5570 int ret; 5571 5572 5573 if (unlikely(!mas->index && mas->last == ULONG_MAX)) 5574 goto ask_now; 5575 5576 mas_wr_store_setup(&wr_mas); 5577 wr_mas.content = mas_start(mas); 5578 /* Root expand */ 5579 if (unlikely(mas_is_none(mas) || mas_is_ptr(mas))) 5580 goto ask_now; 5581 5582 if (unlikely(!mas_wr_walk(&wr_mas))) { 5583 /* Spanning store, use worst case for now */ 5584 request = 1 + mas_mt_height(mas) * 3; 5585 goto ask_now; 5586 } 5587 5588 /* At this point, we are at the leaf node that needs to be altered. */ 5589 /* Exact fit, no nodes needed. */ 5590 if (wr_mas.r_min == mas->index && wr_mas.r_max == mas->last) 5591 return 0; 5592 5593 mas_wr_end_piv(&wr_mas); 5594 node_size = mas_wr_new_end(&wr_mas); 5595 if (node_size >= mt_slots[wr_mas.type]) { 5596 /* Split, worst case for now. */ 5597 request = 1 + mas_mt_height(mas) * 2; 5598 goto ask_now; 5599 } 5600 5601 /* New root needs a singe node */ 5602 if (unlikely(mte_is_root(mas->node))) 5603 goto ask_now; 5604 5605 /* Potential spanning rebalance collapsing a node, use worst-case */ 5606 if (node_size - 1 <= mt_min_slots[wr_mas.type]) 5607 request = mas_mt_height(mas) * 2 - 1; 5608 5609 /* node store, slot store needs one node */ 5610 ask_now: 5611 mas_node_count_gfp(mas, request, gfp); 5612 mas->mas_flags |= MA_STATE_PREALLOC; 5613 if (likely(!mas_is_err(mas))) 5614 return 0; 5615 5616 mas_set_alloc_req(mas, 0); 5617 ret = xa_err(mas->node); 5618 mas_reset(mas); 5619 mas_destroy(mas); 5620 mas_reset(mas); 5621 return ret; 5622 } 5623 EXPORT_SYMBOL_GPL(mas_preallocate); 5624 5625 /* 5626 * mas_destroy() - destroy a maple state. 5627 * @mas: The maple state 5628 * 5629 * Upon completion, check the left-most node and rebalance against the node to 5630 * the right if necessary. Frees any allocated nodes associated with this maple 5631 * state. 5632 */ 5633 void mas_destroy(struct ma_state *mas) 5634 { 5635 struct maple_alloc *node; 5636 unsigned long total; 5637 5638 /* 5639 * When using mas_for_each() to insert an expected number of elements, 5640 * it is possible that the number inserted is less than the expected 5641 * number. To fix an invalid final node, a check is performed here to 5642 * rebalance the previous node with the final node. 5643 */ 5644 if (mas->mas_flags & MA_STATE_REBALANCE) { 5645 unsigned char end; 5646 5647 mas_start(mas); 5648 mtree_range_walk(mas); 5649 end = mas_data_end(mas) + 1; 5650 if (end < mt_min_slot_count(mas->node) - 1) 5651 mas_destroy_rebalance(mas, end); 5652 5653 mas->mas_flags &= ~MA_STATE_REBALANCE; 5654 } 5655 mas->mas_flags &= ~(MA_STATE_BULK|MA_STATE_PREALLOC); 5656 5657 total = mas_allocated(mas); 5658 while (total) { 5659 node = mas->alloc; 5660 mas->alloc = node->slot[0]; 5661 if (node->node_count > 1) { 5662 size_t count = node->node_count - 1; 5663 5664 mt_free_bulk(count, (void __rcu **)&node->slot[1]); 5665 total -= count; 5666 } 5667 kmem_cache_free(maple_node_cache, node); 5668 total--; 5669 } 5670 5671 mas->alloc = NULL; 5672 } 5673 EXPORT_SYMBOL_GPL(mas_destroy); 5674 5675 /* 5676 * mas_expected_entries() - Set the expected number of entries that will be inserted. 5677 * @mas: The maple state 5678 * @nr_entries: The number of expected entries. 5679 * 5680 * This will attempt to pre-allocate enough nodes to store the expected number 5681 * of entries. The allocations will occur using the bulk allocator interface 5682 * for speed. Please call mas_destroy() on the @mas after inserting the entries 5683 * to ensure any unused nodes are freed. 5684 * 5685 * Return: 0 on success, -ENOMEM if memory could not be allocated. 5686 */ 5687 int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries) 5688 { 5689 int nonleaf_cap = MAPLE_ARANGE64_SLOTS - 2; 5690 struct maple_enode *enode = mas->node; 5691 int nr_nodes; 5692 int ret; 5693 5694 /* 5695 * Sometimes it is necessary to duplicate a tree to a new tree, such as 5696 * forking a process and duplicating the VMAs from one tree to a new 5697 * tree. When such a situation arises, it is known that the new tree is 5698 * not going to be used until the entire tree is populated. For 5699 * performance reasons, it is best to use a bulk load with RCU disabled. 5700 * This allows for optimistic splitting that favours the left and reuse 5701 * of nodes during the operation. 5702 */ 5703 5704 /* Optimize splitting for bulk insert in-order */ 5705 mas->mas_flags |= MA_STATE_BULK; 5706 5707 /* 5708 * Avoid overflow, assume a gap between each entry and a trailing null. 5709 * If this is wrong, it just means allocation can happen during 5710 * insertion of entries. 5711 */ 5712 nr_nodes = max(nr_entries, nr_entries * 2 + 1); 5713 if (!mt_is_alloc(mas->tree)) 5714 nonleaf_cap = MAPLE_RANGE64_SLOTS - 2; 5715 5716 /* Leaves; reduce slots to keep space for expansion */ 5717 nr_nodes = DIV_ROUND_UP(nr_nodes, MAPLE_RANGE64_SLOTS - 2); 5718 /* Internal nodes */ 5719 nr_nodes += DIV_ROUND_UP(nr_nodes, nonleaf_cap); 5720 /* Add working room for split (2 nodes) + new parents */ 5721 mas_node_count(mas, nr_nodes + 3); 5722 5723 /* Detect if allocations run out */ 5724 mas->mas_flags |= MA_STATE_PREALLOC; 5725 5726 if (!mas_is_err(mas)) 5727 return 0; 5728 5729 ret = xa_err(mas->node); 5730 mas->node = enode; 5731 mas_destroy(mas); 5732 return ret; 5733 5734 } 5735 EXPORT_SYMBOL_GPL(mas_expected_entries); 5736 5737 static inline bool mas_next_setup(struct ma_state *mas, unsigned long max, 5738 void **entry) 5739 { 5740 bool was_none = mas_is_none(mas); 5741 5742 if (mas_is_none(mas) || mas_is_paused(mas)) 5743 mas->node = MAS_START; 5744 5745 if (mas_is_start(mas)) 5746 *entry = mas_walk(mas); /* Retries on dead nodes handled by mas_walk */ 5747 5748 if (mas_is_ptr(mas)) { 5749 *entry = NULL; 5750 if (was_none && mas->index == 0) { 5751 mas->index = mas->last = 0; 5752 return true; 5753 } 5754 mas->index = 1; 5755 mas->last = ULONG_MAX; 5756 mas->node = MAS_NONE; 5757 return true; 5758 } 5759 5760 if (mas_is_none(mas)) 5761 return true; 5762 return false; 5763 } 5764 5765 /** 5766 * mas_next() - Get the next entry. 5767 * @mas: The maple state 5768 * @max: The maximum index to check. 5769 * 5770 * Returns the next entry after @mas->index. 5771 * Must hold rcu_read_lock or the write lock. 5772 * Can return the zero entry. 5773 * 5774 * Return: The next entry or %NULL 5775 */ 5776 void *mas_next(struct ma_state *mas, unsigned long max) 5777 { 5778 void *entry = NULL; 5779 5780 if (mas_next_setup(mas, max, &entry)) 5781 return entry; 5782 5783 /* Retries on dead nodes handled by mas_next_slot */ 5784 return mas_next_slot(mas, max, false); 5785 } 5786 EXPORT_SYMBOL_GPL(mas_next); 5787 5788 /** 5789 * mas_next_range() - Advance the maple state to the next range 5790 * @mas: The maple state 5791 * @max: The maximum index to check. 5792 * 5793 * Sets @mas->index and @mas->last to the range. 5794 * Must hold rcu_read_lock or the write lock. 5795 * Can return the zero entry. 5796 * 5797 * Return: The next entry or %NULL 5798 */ 5799 void *mas_next_range(struct ma_state *mas, unsigned long max) 5800 { 5801 void *entry = NULL; 5802 5803 if (mas_next_setup(mas, max, &entry)) 5804 return entry; 5805 5806 /* Retries on dead nodes handled by mas_next_slot */ 5807 return mas_next_slot(mas, max, true); 5808 } 5809 EXPORT_SYMBOL_GPL(mas_next_range); 5810 5811 /** 5812 * mt_next() - get the next value in the maple tree 5813 * @mt: The maple tree 5814 * @index: The start index 5815 * @max: The maximum index to check 5816 * 5817 * Takes RCU read lock internally to protect the search, which does not 5818 * protect the returned pointer after dropping RCU read lock. 5819 * See also: Documentation/core-api/maple_tree.rst 5820 * 5821 * Return: The entry higher than @index or %NULL if nothing is found. 5822 */ 5823 void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max) 5824 { 5825 void *entry = NULL; 5826 MA_STATE(mas, mt, index, index); 5827 5828 rcu_read_lock(); 5829 entry = mas_next(&mas, max); 5830 rcu_read_unlock(); 5831 return entry; 5832 } 5833 EXPORT_SYMBOL_GPL(mt_next); 5834 5835 static inline bool mas_prev_setup(struct ma_state *mas, unsigned long min, 5836 void **entry) 5837 { 5838 if (mas->index <= min) 5839 goto none; 5840 5841 if (mas_is_none(mas) || mas_is_paused(mas)) 5842 mas->node = MAS_START; 5843 5844 if (mas_is_start(mas)) { 5845 mas_walk(mas); 5846 if (!mas->index) 5847 goto none; 5848 } 5849 5850 if (unlikely(mas_is_ptr(mas))) { 5851 if (!mas->index) 5852 goto none; 5853 mas->index = mas->last = 0; 5854 *entry = mas_root(mas); 5855 return true; 5856 } 5857 5858 if (mas_is_none(mas)) { 5859 if (mas->index) { 5860 /* Walked to out-of-range pointer? */ 5861 mas->index = mas->last = 0; 5862 mas->node = MAS_ROOT; 5863 *entry = mas_root(mas); 5864 return true; 5865 } 5866 return true; 5867 } 5868 5869 return false; 5870 5871 none: 5872 mas->node = MAS_NONE; 5873 return true; 5874 } 5875 5876 /** 5877 * mas_prev() - Get the previous entry 5878 * @mas: The maple state 5879 * @min: The minimum value to check. 5880 * 5881 * Must hold rcu_read_lock or the write lock. 5882 * Will reset mas to MAS_START if the node is MAS_NONE. Will stop on not 5883 * searchable nodes. 5884 * 5885 * Return: the previous value or %NULL. 5886 */ 5887 void *mas_prev(struct ma_state *mas, unsigned long min) 5888 { 5889 void *entry = NULL; 5890 5891 if (mas_prev_setup(mas, min, &entry)) 5892 return entry; 5893 5894 return mas_prev_slot(mas, min, false); 5895 } 5896 EXPORT_SYMBOL_GPL(mas_prev); 5897 5898 /** 5899 * mas_prev_range() - Advance to the previous range 5900 * @mas: The maple state 5901 * @min: The minimum value to check. 5902 * 5903 * Sets @mas->index and @mas->last to the range. 5904 * Must hold rcu_read_lock or the write lock. 5905 * Will reset mas to MAS_START if the node is MAS_NONE. Will stop on not 5906 * searchable nodes. 5907 * 5908 * Return: the previous value or %NULL. 5909 */ 5910 void *mas_prev_range(struct ma_state *mas, unsigned long min) 5911 { 5912 void *entry = NULL; 5913 5914 if (mas_prev_setup(mas, min, &entry)) 5915 return entry; 5916 5917 return mas_prev_slot(mas, min, true); 5918 } 5919 EXPORT_SYMBOL_GPL(mas_prev_range); 5920 5921 /** 5922 * mt_prev() - get the previous value in the maple tree 5923 * @mt: The maple tree 5924 * @index: The start index 5925 * @min: The minimum index to check 5926 * 5927 * Takes RCU read lock internally to protect the search, which does not 5928 * protect the returned pointer after dropping RCU read lock. 5929 * See also: Documentation/core-api/maple_tree.rst 5930 * 5931 * Return: The entry before @index or %NULL if nothing is found. 5932 */ 5933 void *mt_prev(struct maple_tree *mt, unsigned long index, unsigned long min) 5934 { 5935 void *entry = NULL; 5936 MA_STATE(mas, mt, index, index); 5937 5938 rcu_read_lock(); 5939 entry = mas_prev(&mas, min); 5940 rcu_read_unlock(); 5941 return entry; 5942 } 5943 EXPORT_SYMBOL_GPL(mt_prev); 5944 5945 /** 5946 * mas_pause() - Pause a mas_find/mas_for_each to drop the lock. 5947 * @mas: The maple state to pause 5948 * 5949 * Some users need to pause a walk and drop the lock they're holding in 5950 * order to yield to a higher priority thread or carry out an operation 5951 * on an entry. Those users should call this function before they drop 5952 * the lock. It resets the @mas to be suitable for the next iteration 5953 * of the loop after the user has reacquired the lock. If most entries 5954 * found during a walk require you to call mas_pause(), the mt_for_each() 5955 * iterator may be more appropriate. 5956 * 5957 */ 5958 void mas_pause(struct ma_state *mas) 5959 { 5960 mas->node = MAS_PAUSE; 5961 } 5962 EXPORT_SYMBOL_GPL(mas_pause); 5963 5964 /** 5965 * mas_find_setup() - Internal function to set up mas_find*(). 5966 * @mas: The maple state 5967 * @max: The maximum index 5968 * @entry: Pointer to the entry 5969 * 5970 * Returns: True if entry is the answer, false otherwise. 5971 */ 5972 static inline bool mas_find_setup(struct ma_state *mas, unsigned long max, 5973 void **entry) 5974 { 5975 *entry = NULL; 5976 5977 if (unlikely(mas_is_none(mas))) { 5978 if (unlikely(mas->last >= max)) 5979 return true; 5980 5981 mas->index = mas->last; 5982 mas->node = MAS_START; 5983 } else if (unlikely(mas_is_paused(mas))) { 5984 if (unlikely(mas->last >= max)) 5985 return true; 5986 5987 mas->node = MAS_START; 5988 mas->index = ++mas->last; 5989 } else if (unlikely(mas_is_ptr(mas))) 5990 goto ptr_out_of_range; 5991 5992 if (unlikely(mas_is_start(mas))) { 5993 /* First run or continue */ 5994 if (mas->index > max) 5995 return true; 5996 5997 *entry = mas_walk(mas); 5998 if (*entry) 5999 return true; 6000 6001 } 6002 6003 if (unlikely(!mas_searchable(mas))) { 6004 if (unlikely(mas_is_ptr(mas))) 6005 goto ptr_out_of_range; 6006 6007 return true; 6008 } 6009 6010 if (mas->index == max) 6011 return true; 6012 6013 return false; 6014 6015 ptr_out_of_range: 6016 mas->node = MAS_NONE; 6017 mas->index = 1; 6018 mas->last = ULONG_MAX; 6019 return true; 6020 } 6021 6022 /** 6023 * mas_find() - On the first call, find the entry at or after mas->index up to 6024 * %max. Otherwise, find the entry after mas->index. 6025 * @mas: The maple state 6026 * @max: The maximum value to check. 6027 * 6028 * Must hold rcu_read_lock or the write lock. 6029 * If an entry exists, last and index are updated accordingly. 6030 * May set @mas->node to MAS_NONE. 6031 * 6032 * Return: The entry or %NULL. 6033 */ 6034 void *mas_find(struct ma_state *mas, unsigned long max) 6035 { 6036 void *entry = NULL; 6037 6038 if (mas_find_setup(mas, max, &entry)) 6039 return entry; 6040 6041 /* Retries on dead nodes handled by mas_next_slot */ 6042 return mas_next_slot(mas, max, false); 6043 } 6044 EXPORT_SYMBOL_GPL(mas_find); 6045 6046 /** 6047 * mas_find_range() - On the first call, find the entry at or after 6048 * mas->index up to %max. Otherwise, advance to the next slot mas->index. 6049 * @mas: The maple state 6050 * @max: The maximum value to check. 6051 * 6052 * Must hold rcu_read_lock or the write lock. 6053 * If an entry exists, last and index are updated accordingly. 6054 * May set @mas->node to MAS_NONE. 6055 * 6056 * Return: The entry or %NULL. 6057 */ 6058 void *mas_find_range(struct ma_state *mas, unsigned long max) 6059 { 6060 void *entry; 6061 6062 if (mas_find_setup(mas, max, &entry)) 6063 return entry; 6064 6065 /* Retries on dead nodes handled by mas_next_slot */ 6066 return mas_next_slot(mas, max, true); 6067 } 6068 EXPORT_SYMBOL_GPL(mas_find_range); 6069 6070 /** 6071 * mas_find_rev_setup() - Internal function to set up mas_find_*_rev() 6072 * @mas: The maple state 6073 * @min: The minimum index 6074 * @entry: Pointer to the entry 6075 * 6076 * Returns: True if entry is the answer, false otherwise. 6077 */ 6078 static inline bool mas_find_rev_setup(struct ma_state *mas, unsigned long min, 6079 void **entry) 6080 { 6081 *entry = NULL; 6082 6083 if (unlikely(mas_is_none(mas))) { 6084 if (mas->index <= min) 6085 goto none; 6086 6087 mas->last = mas->index; 6088 mas->node = MAS_START; 6089 } 6090 6091 if (unlikely(mas_is_paused(mas))) { 6092 if (unlikely(mas->index <= min)) { 6093 mas->node = MAS_NONE; 6094 return true; 6095 } 6096 mas->node = MAS_START; 6097 mas->last = --mas->index; 6098 } 6099 6100 if (unlikely(mas_is_start(mas))) { 6101 /* First run or continue */ 6102 if (mas->index < min) 6103 return true; 6104 6105 *entry = mas_walk(mas); 6106 if (*entry) 6107 return true; 6108 } 6109 6110 if (unlikely(!mas_searchable(mas))) { 6111 if (mas_is_ptr(mas)) 6112 goto none; 6113 6114 if (mas_is_none(mas)) { 6115 /* 6116 * Walked to the location, and there was nothing so the 6117 * previous location is 0. 6118 */ 6119 mas->last = mas->index = 0; 6120 mas->node = MAS_ROOT; 6121 *entry = mas_root(mas); 6122 return true; 6123 } 6124 } 6125 6126 if (mas->index < min) 6127 return true; 6128 6129 return false; 6130 6131 none: 6132 mas->node = MAS_NONE; 6133 return true; 6134 } 6135 6136 /** 6137 * mas_find_rev: On the first call, find the first non-null entry at or below 6138 * mas->index down to %min. Otherwise find the first non-null entry below 6139 * mas->index down to %min. 6140 * @mas: The maple state 6141 * @min: The minimum value to check. 6142 * 6143 * Must hold rcu_read_lock or the write lock. 6144 * If an entry exists, last and index are updated accordingly. 6145 * May set @mas->node to MAS_NONE. 6146 * 6147 * Return: The entry or %NULL. 6148 */ 6149 void *mas_find_rev(struct ma_state *mas, unsigned long min) 6150 { 6151 void *entry; 6152 6153 if (mas_find_rev_setup(mas, min, &entry)) 6154 return entry; 6155 6156 /* Retries on dead nodes handled by mas_prev_slot */ 6157 return mas_prev_slot(mas, min, false); 6158 6159 } 6160 EXPORT_SYMBOL_GPL(mas_find_rev); 6161 6162 /** 6163 * mas_find_range_rev: On the first call, find the first non-null entry at or 6164 * below mas->index down to %min. Otherwise advance to the previous slot after 6165 * mas->index down to %min. 6166 * @mas: The maple state 6167 * @min: The minimum value to check. 6168 * 6169 * Must hold rcu_read_lock or the write lock. 6170 * If an entry exists, last and index are updated accordingly. 6171 * May set @mas->node to MAS_NONE. 6172 * 6173 * Return: The entry or %NULL. 6174 */ 6175 void *mas_find_range_rev(struct ma_state *mas, unsigned long min) 6176 { 6177 void *entry; 6178 6179 if (mas_find_rev_setup(mas, min, &entry)) 6180 return entry; 6181 6182 /* Retries on dead nodes handled by mas_prev_slot */ 6183 return mas_prev_slot(mas, min, true); 6184 } 6185 EXPORT_SYMBOL_GPL(mas_find_range_rev); 6186 6187 /** 6188 * mas_erase() - Find the range in which index resides and erase the entire 6189 * range. 6190 * @mas: The maple state 6191 * 6192 * Must hold the write lock. 6193 * Searches for @mas->index, sets @mas->index and @mas->last to the range and 6194 * erases that range. 6195 * 6196 * Return: the entry that was erased or %NULL, @mas->index and @mas->last are updated. 6197 */ 6198 void *mas_erase(struct ma_state *mas) 6199 { 6200 void *entry; 6201 MA_WR_STATE(wr_mas, mas, NULL); 6202 6203 if (mas_is_none(mas) || mas_is_paused(mas)) 6204 mas->node = MAS_START; 6205 6206 /* Retry unnecessary when holding the write lock. */ 6207 entry = mas_state_walk(mas); 6208 if (!entry) 6209 return NULL; 6210 6211 write_retry: 6212 /* Must reset to ensure spanning writes of last slot are detected */ 6213 mas_reset(mas); 6214 mas_wr_store_setup(&wr_mas); 6215 mas_wr_store_entry(&wr_mas); 6216 if (mas_nomem(mas, GFP_KERNEL)) 6217 goto write_retry; 6218 6219 return entry; 6220 } 6221 EXPORT_SYMBOL_GPL(mas_erase); 6222 6223 /** 6224 * mas_nomem() - Check if there was an error allocating and do the allocation 6225 * if necessary If there are allocations, then free them. 6226 * @mas: The maple state 6227 * @gfp: The GFP_FLAGS to use for allocations 6228 * Return: true on allocation, false otherwise. 6229 */ 6230 bool mas_nomem(struct ma_state *mas, gfp_t gfp) 6231 __must_hold(mas->tree->ma_lock) 6232 { 6233 if (likely(mas->node != MA_ERROR(-ENOMEM))) { 6234 mas_destroy(mas); 6235 return false; 6236 } 6237 6238 if (gfpflags_allow_blocking(gfp) && !mt_external_lock(mas->tree)) { 6239 mtree_unlock(mas->tree); 6240 mas_alloc_nodes(mas, gfp); 6241 mtree_lock(mas->tree); 6242 } else { 6243 mas_alloc_nodes(mas, gfp); 6244 } 6245 6246 if (!mas_allocated(mas)) 6247 return false; 6248 6249 mas->node = MAS_START; 6250 return true; 6251 } 6252 6253 void __init maple_tree_init(void) 6254 { 6255 maple_node_cache = kmem_cache_create("maple_node", 6256 sizeof(struct maple_node), sizeof(struct maple_node), 6257 SLAB_PANIC, NULL); 6258 } 6259 6260 /** 6261 * mtree_load() - Load a value stored in a maple tree 6262 * @mt: The maple tree 6263 * @index: The index to load 6264 * 6265 * Return: the entry or %NULL 6266 */ 6267 void *mtree_load(struct maple_tree *mt, unsigned long index) 6268 { 6269 MA_STATE(mas, mt, index, index); 6270 void *entry; 6271 6272 trace_ma_read(__func__, &mas); 6273 rcu_read_lock(); 6274 retry: 6275 entry = mas_start(&mas); 6276 if (unlikely(mas_is_none(&mas))) 6277 goto unlock; 6278 6279 if (unlikely(mas_is_ptr(&mas))) { 6280 if (index) 6281 entry = NULL; 6282 6283 goto unlock; 6284 } 6285 6286 entry = mtree_lookup_walk(&mas); 6287 if (!entry && unlikely(mas_is_start(&mas))) 6288 goto retry; 6289 unlock: 6290 rcu_read_unlock(); 6291 if (xa_is_zero(entry)) 6292 return NULL; 6293 6294 return entry; 6295 } 6296 EXPORT_SYMBOL(mtree_load); 6297 6298 /** 6299 * mtree_store_range() - Store an entry at a given range. 6300 * @mt: The maple tree 6301 * @index: The start of the range 6302 * @last: The end of the range 6303 * @entry: The entry to store 6304 * @gfp: The GFP_FLAGS to use for allocations 6305 * 6306 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 6307 * be allocated. 6308 */ 6309 int mtree_store_range(struct maple_tree *mt, unsigned long index, 6310 unsigned long last, void *entry, gfp_t gfp) 6311 { 6312 MA_STATE(mas, mt, index, last); 6313 MA_WR_STATE(wr_mas, &mas, entry); 6314 6315 trace_ma_write(__func__, &mas, 0, entry); 6316 if (WARN_ON_ONCE(xa_is_advanced(entry))) 6317 return -EINVAL; 6318 6319 if (index > last) 6320 return -EINVAL; 6321 6322 mtree_lock(mt); 6323 retry: 6324 mas_wr_store_entry(&wr_mas); 6325 if (mas_nomem(&mas, gfp)) 6326 goto retry; 6327 6328 mtree_unlock(mt); 6329 if (mas_is_err(&mas)) 6330 return xa_err(mas.node); 6331 6332 return 0; 6333 } 6334 EXPORT_SYMBOL(mtree_store_range); 6335 6336 /** 6337 * mtree_store() - Store an entry at a given index. 6338 * @mt: The maple tree 6339 * @index: The index to store the value 6340 * @entry: The entry to store 6341 * @gfp: The GFP_FLAGS to use for allocations 6342 * 6343 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 6344 * be allocated. 6345 */ 6346 int mtree_store(struct maple_tree *mt, unsigned long index, void *entry, 6347 gfp_t gfp) 6348 { 6349 return mtree_store_range(mt, index, index, entry, gfp); 6350 } 6351 EXPORT_SYMBOL(mtree_store); 6352 6353 /** 6354 * mtree_insert_range() - Insert an entry at a given range if there is no value. 6355 * @mt: The maple tree 6356 * @first: The start of the range 6357 * @last: The end of the range 6358 * @entry: The entry to store 6359 * @gfp: The GFP_FLAGS to use for allocations. 6360 * 6361 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid 6362 * request, -ENOMEM if memory could not be allocated. 6363 */ 6364 int mtree_insert_range(struct maple_tree *mt, unsigned long first, 6365 unsigned long last, void *entry, gfp_t gfp) 6366 { 6367 MA_STATE(ms, mt, first, last); 6368 6369 if (WARN_ON_ONCE(xa_is_advanced(entry))) 6370 return -EINVAL; 6371 6372 if (first > last) 6373 return -EINVAL; 6374 6375 mtree_lock(mt); 6376 retry: 6377 mas_insert(&ms, entry); 6378 if (mas_nomem(&ms, gfp)) 6379 goto retry; 6380 6381 mtree_unlock(mt); 6382 if (mas_is_err(&ms)) 6383 return xa_err(ms.node); 6384 6385 return 0; 6386 } 6387 EXPORT_SYMBOL(mtree_insert_range); 6388 6389 /** 6390 * mtree_insert() - Insert an entry at a given index if there is no value. 6391 * @mt: The maple tree 6392 * @index : The index to store the value 6393 * @entry: The entry to store 6394 * @gfp: The GFP_FLAGS to use for allocations. 6395 * 6396 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid 6397 * request, -ENOMEM if memory could not be allocated. 6398 */ 6399 int mtree_insert(struct maple_tree *mt, unsigned long index, void *entry, 6400 gfp_t gfp) 6401 { 6402 return mtree_insert_range(mt, index, index, entry, gfp); 6403 } 6404 EXPORT_SYMBOL(mtree_insert); 6405 6406 int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp, 6407 void *entry, unsigned long size, unsigned long min, 6408 unsigned long max, gfp_t gfp) 6409 { 6410 int ret = 0; 6411 6412 MA_STATE(mas, mt, 0, 0); 6413 if (!mt_is_alloc(mt)) 6414 return -EINVAL; 6415 6416 if (WARN_ON_ONCE(mt_is_reserved(entry))) 6417 return -EINVAL; 6418 6419 mtree_lock(mt); 6420 retry: 6421 ret = mas_empty_area(&mas, min, max, size); 6422 if (ret) 6423 goto unlock; 6424 6425 mas_insert(&mas, entry); 6426 /* 6427 * mas_nomem() may release the lock, causing the allocated area 6428 * to be unavailable, so try to allocate a free area again. 6429 */ 6430 if (mas_nomem(&mas, gfp)) 6431 goto retry; 6432 6433 if (mas_is_err(&mas)) 6434 ret = xa_err(mas.node); 6435 else 6436 *startp = mas.index; 6437 6438 unlock: 6439 mtree_unlock(mt); 6440 return ret; 6441 } 6442 EXPORT_SYMBOL(mtree_alloc_range); 6443 6444 int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp, 6445 void *entry, unsigned long size, unsigned long min, 6446 unsigned long max, gfp_t gfp) 6447 { 6448 int ret = 0; 6449 6450 MA_STATE(mas, mt, 0, 0); 6451 if (!mt_is_alloc(mt)) 6452 return -EINVAL; 6453 6454 if (WARN_ON_ONCE(mt_is_reserved(entry))) 6455 return -EINVAL; 6456 6457 mtree_lock(mt); 6458 retry: 6459 ret = mas_empty_area_rev(&mas, min, max, size); 6460 if (ret) 6461 goto unlock; 6462 6463 mas_insert(&mas, entry); 6464 /* 6465 * mas_nomem() may release the lock, causing the allocated area 6466 * to be unavailable, so try to allocate a free area again. 6467 */ 6468 if (mas_nomem(&mas, gfp)) 6469 goto retry; 6470 6471 if (mas_is_err(&mas)) 6472 ret = xa_err(mas.node); 6473 else 6474 *startp = mas.index; 6475 6476 unlock: 6477 mtree_unlock(mt); 6478 return ret; 6479 } 6480 EXPORT_SYMBOL(mtree_alloc_rrange); 6481 6482 /** 6483 * mtree_erase() - Find an index and erase the entire range. 6484 * @mt: The maple tree 6485 * @index: The index to erase 6486 * 6487 * Erasing is the same as a walk to an entry then a store of a NULL to that 6488 * ENTIRE range. In fact, it is implemented as such using the advanced API. 6489 * 6490 * Return: The entry stored at the @index or %NULL 6491 */ 6492 void *mtree_erase(struct maple_tree *mt, unsigned long index) 6493 { 6494 void *entry = NULL; 6495 6496 MA_STATE(mas, mt, index, index); 6497 trace_ma_op(__func__, &mas); 6498 6499 mtree_lock(mt); 6500 entry = mas_erase(&mas); 6501 mtree_unlock(mt); 6502 6503 return entry; 6504 } 6505 EXPORT_SYMBOL(mtree_erase); 6506 6507 /** 6508 * __mt_destroy() - Walk and free all nodes of a locked maple tree. 6509 * @mt: The maple tree 6510 * 6511 * Note: Does not handle locking. 6512 */ 6513 void __mt_destroy(struct maple_tree *mt) 6514 { 6515 void *root = mt_root_locked(mt); 6516 6517 rcu_assign_pointer(mt->ma_root, NULL); 6518 if (xa_is_node(root)) 6519 mte_destroy_walk(root, mt); 6520 6521 mt->ma_flags = 0; 6522 } 6523 EXPORT_SYMBOL_GPL(__mt_destroy); 6524 6525 /** 6526 * mtree_destroy() - Destroy a maple tree 6527 * @mt: The maple tree 6528 * 6529 * Frees all resources used by the tree. Handles locking. 6530 */ 6531 void mtree_destroy(struct maple_tree *mt) 6532 { 6533 mtree_lock(mt); 6534 __mt_destroy(mt); 6535 mtree_unlock(mt); 6536 } 6537 EXPORT_SYMBOL(mtree_destroy); 6538 6539 /** 6540 * mt_find() - Search from the start up until an entry is found. 6541 * @mt: The maple tree 6542 * @index: Pointer which contains the start location of the search 6543 * @max: The maximum value of the search range 6544 * 6545 * Takes RCU read lock internally to protect the search, which does not 6546 * protect the returned pointer after dropping RCU read lock. 6547 * See also: Documentation/core-api/maple_tree.rst 6548 * 6549 * In case that an entry is found @index is updated to point to the next 6550 * possible entry independent whether the found entry is occupying a 6551 * single index or a range if indices. 6552 * 6553 * Return: The entry at or after the @index or %NULL 6554 */ 6555 void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max) 6556 { 6557 MA_STATE(mas, mt, *index, *index); 6558 void *entry; 6559 #ifdef CONFIG_DEBUG_MAPLE_TREE 6560 unsigned long copy = *index; 6561 #endif 6562 6563 trace_ma_read(__func__, &mas); 6564 6565 if ((*index) > max) 6566 return NULL; 6567 6568 rcu_read_lock(); 6569 retry: 6570 entry = mas_state_walk(&mas); 6571 if (mas_is_start(&mas)) 6572 goto retry; 6573 6574 if (unlikely(xa_is_zero(entry))) 6575 entry = NULL; 6576 6577 if (entry) 6578 goto unlock; 6579 6580 while (mas_searchable(&mas) && (mas.last < max)) { 6581 entry = mas_next_entry(&mas, max); 6582 if (likely(entry && !xa_is_zero(entry))) 6583 break; 6584 } 6585 6586 if (unlikely(xa_is_zero(entry))) 6587 entry = NULL; 6588 unlock: 6589 rcu_read_unlock(); 6590 if (likely(entry)) { 6591 *index = mas.last + 1; 6592 #ifdef CONFIG_DEBUG_MAPLE_TREE 6593 if (MT_WARN_ON(mt, (*index) && ((*index) <= copy))) 6594 pr_err("index not increased! %lx <= %lx\n", 6595 *index, copy); 6596 #endif 6597 } 6598 6599 return entry; 6600 } 6601 EXPORT_SYMBOL(mt_find); 6602 6603 /** 6604 * mt_find_after() - Search from the start up until an entry is found. 6605 * @mt: The maple tree 6606 * @index: Pointer which contains the start location of the search 6607 * @max: The maximum value to check 6608 * 6609 * Same as mt_find() except that it checks @index for 0 before 6610 * searching. If @index == 0, the search is aborted. This covers a wrap 6611 * around of @index to 0 in an iterator loop. 6612 * 6613 * Return: The entry at or after the @index or %NULL 6614 */ 6615 void *mt_find_after(struct maple_tree *mt, unsigned long *index, 6616 unsigned long max) 6617 { 6618 if (!(*index)) 6619 return NULL; 6620 6621 return mt_find(mt, index, max); 6622 } 6623 EXPORT_SYMBOL(mt_find_after); 6624 6625 #ifdef CONFIG_DEBUG_MAPLE_TREE 6626 atomic_t maple_tree_tests_run; 6627 EXPORT_SYMBOL_GPL(maple_tree_tests_run); 6628 atomic_t maple_tree_tests_passed; 6629 EXPORT_SYMBOL_GPL(maple_tree_tests_passed); 6630 6631 #ifndef __KERNEL__ 6632 extern void kmem_cache_set_non_kernel(struct kmem_cache *, unsigned int); 6633 void mt_set_non_kernel(unsigned int val) 6634 { 6635 kmem_cache_set_non_kernel(maple_node_cache, val); 6636 } 6637 6638 extern unsigned long kmem_cache_get_alloc(struct kmem_cache *); 6639 unsigned long mt_get_alloc_size(void) 6640 { 6641 return kmem_cache_get_alloc(maple_node_cache); 6642 } 6643 6644 extern void kmem_cache_zero_nr_tallocated(struct kmem_cache *); 6645 void mt_zero_nr_tallocated(void) 6646 { 6647 kmem_cache_zero_nr_tallocated(maple_node_cache); 6648 } 6649 6650 extern unsigned int kmem_cache_nr_tallocated(struct kmem_cache *); 6651 unsigned int mt_nr_tallocated(void) 6652 { 6653 return kmem_cache_nr_tallocated(maple_node_cache); 6654 } 6655 6656 extern unsigned int kmem_cache_nr_allocated(struct kmem_cache *); 6657 unsigned int mt_nr_allocated(void) 6658 { 6659 return kmem_cache_nr_allocated(maple_node_cache); 6660 } 6661 6662 /* 6663 * mas_dead_node() - Check if the maple state is pointing to a dead node. 6664 * @mas: The maple state 6665 * @index: The index to restore in @mas. 6666 * 6667 * Used in test code. 6668 * Return: 1 if @mas has been reset to MAS_START, 0 otherwise. 6669 */ 6670 static inline int mas_dead_node(struct ma_state *mas, unsigned long index) 6671 { 6672 if (unlikely(!mas_searchable(mas) || mas_is_start(mas))) 6673 return 0; 6674 6675 if (likely(!mte_dead_node(mas->node))) 6676 return 0; 6677 6678 mas_rewalk(mas, index); 6679 return 1; 6680 } 6681 6682 void mt_cache_shrink(void) 6683 { 6684 } 6685 #else 6686 /* 6687 * mt_cache_shrink() - For testing, don't use this. 6688 * 6689 * Certain testcases can trigger an OOM when combined with other memory 6690 * debugging configuration options. This function is used to reduce the 6691 * possibility of an out of memory even due to kmem_cache objects remaining 6692 * around for longer than usual. 6693 */ 6694 void mt_cache_shrink(void) 6695 { 6696 kmem_cache_shrink(maple_node_cache); 6697 6698 } 6699 EXPORT_SYMBOL_GPL(mt_cache_shrink); 6700 6701 #endif /* not defined __KERNEL__ */ 6702 /* 6703 * mas_get_slot() - Get the entry in the maple state node stored at @offset. 6704 * @mas: The maple state 6705 * @offset: The offset into the slot array to fetch. 6706 * 6707 * Return: The entry stored at @offset. 6708 */ 6709 static inline struct maple_enode *mas_get_slot(struct ma_state *mas, 6710 unsigned char offset) 6711 { 6712 return mas_slot(mas, ma_slots(mas_mn(mas), mte_node_type(mas->node)), 6713 offset); 6714 } 6715 6716 /* Depth first search, post-order */ 6717 static void mas_dfs_postorder(struct ma_state *mas, unsigned long max) 6718 { 6719 6720 struct maple_enode *p = MAS_NONE, *mn = mas->node; 6721 unsigned long p_min, p_max; 6722 6723 mas_next_node(mas, mas_mn(mas), max); 6724 if (!mas_is_none(mas)) 6725 return; 6726 6727 if (mte_is_root(mn)) 6728 return; 6729 6730 mas->node = mn; 6731 mas_ascend(mas); 6732 do { 6733 p = mas->node; 6734 p_min = mas->min; 6735 p_max = mas->max; 6736 mas_prev_node(mas, 0); 6737 } while (!mas_is_none(mas)); 6738 6739 mas->node = p; 6740 mas->max = p_max; 6741 mas->min = p_min; 6742 } 6743 6744 /* Tree validations */ 6745 static void mt_dump_node(const struct maple_tree *mt, void *entry, 6746 unsigned long min, unsigned long max, unsigned int depth, 6747 enum mt_dump_format format); 6748 static void mt_dump_range(unsigned long min, unsigned long max, 6749 unsigned int depth, enum mt_dump_format format) 6750 { 6751 static const char spaces[] = " "; 6752 6753 switch(format) { 6754 case mt_dump_hex: 6755 if (min == max) 6756 pr_info("%.*s%lx: ", depth * 2, spaces, min); 6757 else 6758 pr_info("%.*s%lx-%lx: ", depth * 2, spaces, min, max); 6759 break; 6760 default: 6761 case mt_dump_dec: 6762 if (min == max) 6763 pr_info("%.*s%lu: ", depth * 2, spaces, min); 6764 else 6765 pr_info("%.*s%lu-%lu: ", depth * 2, spaces, min, max); 6766 } 6767 } 6768 6769 static void mt_dump_entry(void *entry, unsigned long min, unsigned long max, 6770 unsigned int depth, enum mt_dump_format format) 6771 { 6772 mt_dump_range(min, max, depth, format); 6773 6774 if (xa_is_value(entry)) 6775 pr_cont("value %ld (0x%lx) [%p]\n", xa_to_value(entry), 6776 xa_to_value(entry), entry); 6777 else if (xa_is_zero(entry)) 6778 pr_cont("zero (%ld)\n", xa_to_internal(entry)); 6779 else if (mt_is_reserved(entry)) 6780 pr_cont("UNKNOWN ENTRY (%p)\n", entry); 6781 else 6782 pr_cont("%p\n", entry); 6783 } 6784 6785 static void mt_dump_range64(const struct maple_tree *mt, void *entry, 6786 unsigned long min, unsigned long max, unsigned int depth, 6787 enum mt_dump_format format) 6788 { 6789 struct maple_range_64 *node = &mte_to_node(entry)->mr64; 6790 bool leaf = mte_is_leaf(entry); 6791 unsigned long first = min; 6792 int i; 6793 6794 pr_cont(" contents: "); 6795 for (i = 0; i < MAPLE_RANGE64_SLOTS - 1; i++) { 6796 switch(format) { 6797 case mt_dump_hex: 6798 pr_cont("%p %lX ", node->slot[i], node->pivot[i]); 6799 break; 6800 default: 6801 case mt_dump_dec: 6802 pr_cont("%p %lu ", node->slot[i], node->pivot[i]); 6803 } 6804 } 6805 pr_cont("%p\n", node->slot[i]); 6806 for (i = 0; i < MAPLE_RANGE64_SLOTS; i++) { 6807 unsigned long last = max; 6808 6809 if (i < (MAPLE_RANGE64_SLOTS - 1)) 6810 last = node->pivot[i]; 6811 else if (!node->slot[i] && max != mt_node_max(entry)) 6812 break; 6813 if (last == 0 && i > 0) 6814 break; 6815 if (leaf) 6816 mt_dump_entry(mt_slot(mt, node->slot, i), 6817 first, last, depth + 1, format); 6818 else if (node->slot[i]) 6819 mt_dump_node(mt, mt_slot(mt, node->slot, i), 6820 first, last, depth + 1, format); 6821 6822 if (last == max) 6823 break; 6824 if (last > max) { 6825 switch(format) { 6826 case mt_dump_hex: 6827 pr_err("node %p last (%lx) > max (%lx) at pivot %d!\n", 6828 node, last, max, i); 6829 break; 6830 default: 6831 case mt_dump_dec: 6832 pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n", 6833 node, last, max, i); 6834 } 6835 } 6836 first = last + 1; 6837 } 6838 } 6839 6840 static void mt_dump_arange64(const struct maple_tree *mt, void *entry, 6841 unsigned long min, unsigned long max, unsigned int depth, 6842 enum mt_dump_format format) 6843 { 6844 struct maple_arange_64 *node = &mte_to_node(entry)->ma64; 6845 bool leaf = mte_is_leaf(entry); 6846 unsigned long first = min; 6847 int i; 6848 6849 pr_cont(" contents: "); 6850 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) { 6851 switch (format) { 6852 case mt_dump_hex: 6853 pr_cont("%lx ", node->gap[i]); 6854 break; 6855 default: 6856 case mt_dump_dec: 6857 pr_cont("%lu ", node->gap[i]); 6858 } 6859 } 6860 pr_cont("| %02X %02X| ", node->meta.end, node->meta.gap); 6861 for (i = 0; i < MAPLE_ARANGE64_SLOTS - 1; i++) { 6862 switch (format) { 6863 case mt_dump_hex: 6864 pr_cont("%p %lX ", node->slot[i], node->pivot[i]); 6865 break; 6866 default: 6867 case mt_dump_dec: 6868 pr_cont("%p %lu ", node->slot[i], node->pivot[i]); 6869 } 6870 } 6871 pr_cont("%p\n", node->slot[i]); 6872 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) { 6873 unsigned long last = max; 6874 6875 if (i < (MAPLE_ARANGE64_SLOTS - 1)) 6876 last = node->pivot[i]; 6877 else if (!node->slot[i]) 6878 break; 6879 if (last == 0 && i > 0) 6880 break; 6881 if (leaf) 6882 mt_dump_entry(mt_slot(mt, node->slot, i), 6883 first, last, depth + 1, format); 6884 else if (node->slot[i]) 6885 mt_dump_node(mt, mt_slot(mt, node->slot, i), 6886 first, last, depth + 1, format); 6887 6888 if (last == max) 6889 break; 6890 if (last > max) { 6891 pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n", 6892 node, last, max, i); 6893 break; 6894 } 6895 first = last + 1; 6896 } 6897 } 6898 6899 static void mt_dump_node(const struct maple_tree *mt, void *entry, 6900 unsigned long min, unsigned long max, unsigned int depth, 6901 enum mt_dump_format format) 6902 { 6903 struct maple_node *node = mte_to_node(entry); 6904 unsigned int type = mte_node_type(entry); 6905 unsigned int i; 6906 6907 mt_dump_range(min, max, depth, format); 6908 6909 pr_cont("node %p depth %d type %d parent %p", node, depth, type, 6910 node ? node->parent : NULL); 6911 switch (type) { 6912 case maple_dense: 6913 pr_cont("\n"); 6914 for (i = 0; i < MAPLE_NODE_SLOTS; i++) { 6915 if (min + i > max) 6916 pr_cont("OUT OF RANGE: "); 6917 mt_dump_entry(mt_slot(mt, node->slot, i), 6918 min + i, min + i, depth, format); 6919 } 6920 break; 6921 case maple_leaf_64: 6922 case maple_range_64: 6923 mt_dump_range64(mt, entry, min, max, depth, format); 6924 break; 6925 case maple_arange_64: 6926 mt_dump_arange64(mt, entry, min, max, depth, format); 6927 break; 6928 6929 default: 6930 pr_cont(" UNKNOWN TYPE\n"); 6931 } 6932 } 6933 6934 void mt_dump(const struct maple_tree *mt, enum mt_dump_format format) 6935 { 6936 void *entry = rcu_dereference_check(mt->ma_root, mt_locked(mt)); 6937 6938 pr_info("maple_tree(%p) flags %X, height %u root %p\n", 6939 mt, mt->ma_flags, mt_height(mt), entry); 6940 if (!xa_is_node(entry)) 6941 mt_dump_entry(entry, 0, 0, 0, format); 6942 else if (entry) 6943 mt_dump_node(mt, entry, 0, mt_node_max(entry), 0, format); 6944 } 6945 EXPORT_SYMBOL_GPL(mt_dump); 6946 6947 /* 6948 * Calculate the maximum gap in a node and check if that's what is reported in 6949 * the parent (unless root). 6950 */ 6951 static void mas_validate_gaps(struct ma_state *mas) 6952 { 6953 struct maple_enode *mte = mas->node; 6954 struct maple_node *p_mn, *node = mte_to_node(mte); 6955 enum maple_type mt = mte_node_type(mas->node); 6956 unsigned long gap = 0, max_gap = 0; 6957 unsigned long p_end, p_start = mas->min; 6958 unsigned char p_slot, offset; 6959 unsigned long *gaps = NULL; 6960 unsigned long *pivots = ma_pivots(node, mt); 6961 unsigned int i; 6962 6963 if (ma_is_dense(mt)) { 6964 for (i = 0; i < mt_slot_count(mte); i++) { 6965 if (mas_get_slot(mas, i)) { 6966 if (gap > max_gap) 6967 max_gap = gap; 6968 gap = 0; 6969 continue; 6970 } 6971 gap++; 6972 } 6973 goto counted; 6974 } 6975 6976 gaps = ma_gaps(node, mt); 6977 for (i = 0; i < mt_slot_count(mte); i++) { 6978 p_end = mas_safe_pivot(mas, pivots, i, mt); 6979 6980 if (!gaps) { 6981 if (!mas_get_slot(mas, i)) 6982 gap = p_end - p_start + 1; 6983 } else { 6984 void *entry = mas_get_slot(mas, i); 6985 6986 gap = gaps[i]; 6987 MT_BUG_ON(mas->tree, !entry); 6988 6989 if (gap > p_end - p_start + 1) { 6990 pr_err("%p[%u] %lu >= %lu - %lu + 1 (%lu)\n", 6991 mas_mn(mas), i, gap, p_end, p_start, 6992 p_end - p_start + 1); 6993 MT_BUG_ON(mas->tree, gap > p_end - p_start + 1); 6994 } 6995 } 6996 6997 if (gap > max_gap) 6998 max_gap = gap; 6999 7000 p_start = p_end + 1; 7001 if (p_end >= mas->max) 7002 break; 7003 } 7004 7005 counted: 7006 if (mt == maple_arange_64) { 7007 offset = ma_meta_gap(node, mt); 7008 if (offset > i) { 7009 pr_err("gap offset %p[%u] is invalid\n", node, offset); 7010 MT_BUG_ON(mas->tree, 1); 7011 } 7012 7013 if (gaps[offset] != max_gap) { 7014 pr_err("gap %p[%u] is not the largest gap %lu\n", 7015 node, offset, max_gap); 7016 MT_BUG_ON(mas->tree, 1); 7017 } 7018 7019 MT_BUG_ON(mas->tree, !gaps); 7020 for (i++ ; i < mt_slot_count(mte); i++) { 7021 if (gaps[i] != 0) { 7022 pr_err("gap %p[%u] beyond node limit != 0\n", 7023 node, i); 7024 MT_BUG_ON(mas->tree, 1); 7025 } 7026 } 7027 } 7028 7029 if (mte_is_root(mte)) 7030 return; 7031 7032 p_slot = mte_parent_slot(mas->node); 7033 p_mn = mte_parent(mte); 7034 MT_BUG_ON(mas->tree, max_gap > mas->max); 7035 if (ma_gaps(p_mn, mas_parent_type(mas, mte))[p_slot] != max_gap) { 7036 pr_err("gap %p[%u] != %lu\n", p_mn, p_slot, max_gap); 7037 mt_dump(mas->tree, mt_dump_hex); 7038 MT_BUG_ON(mas->tree, 1); 7039 } 7040 } 7041 7042 static void mas_validate_parent_slot(struct ma_state *mas) 7043 { 7044 struct maple_node *parent; 7045 struct maple_enode *node; 7046 enum maple_type p_type; 7047 unsigned char p_slot; 7048 void __rcu **slots; 7049 int i; 7050 7051 if (mte_is_root(mas->node)) 7052 return; 7053 7054 p_slot = mte_parent_slot(mas->node); 7055 p_type = mas_parent_type(mas, mas->node); 7056 parent = mte_parent(mas->node); 7057 slots = ma_slots(parent, p_type); 7058 MT_BUG_ON(mas->tree, mas_mn(mas) == parent); 7059 7060 /* Check prev/next parent slot for duplicate node entry */ 7061 7062 for (i = 0; i < mt_slots[p_type]; i++) { 7063 node = mas_slot(mas, slots, i); 7064 if (i == p_slot) { 7065 if (node != mas->node) 7066 pr_err("parent %p[%u] does not have %p\n", 7067 parent, i, mas_mn(mas)); 7068 MT_BUG_ON(mas->tree, node != mas->node); 7069 } else if (node == mas->node) { 7070 pr_err("Invalid child %p at parent %p[%u] p_slot %u\n", 7071 mas_mn(mas), parent, i, p_slot); 7072 MT_BUG_ON(mas->tree, node == mas->node); 7073 } 7074 } 7075 } 7076 7077 static void mas_validate_child_slot(struct ma_state *mas) 7078 { 7079 enum maple_type type = mte_node_type(mas->node); 7080 void __rcu **slots = ma_slots(mte_to_node(mas->node), type); 7081 unsigned long *pivots = ma_pivots(mte_to_node(mas->node), type); 7082 struct maple_enode *child; 7083 unsigned char i; 7084 7085 if (mte_is_leaf(mas->node)) 7086 return; 7087 7088 for (i = 0; i < mt_slots[type]; i++) { 7089 child = mas_slot(mas, slots, i); 7090 7091 if (!child) { 7092 pr_err("Non-leaf node lacks child at %p[%u]\n", 7093 mas_mn(mas), i); 7094 MT_BUG_ON(mas->tree, 1); 7095 } 7096 7097 if (mte_parent_slot(child) != i) { 7098 pr_err("Slot error at %p[%u]: child %p has pslot %u\n", 7099 mas_mn(mas), i, mte_to_node(child), 7100 mte_parent_slot(child)); 7101 MT_BUG_ON(mas->tree, 1); 7102 } 7103 7104 if (mte_parent(child) != mte_to_node(mas->node)) { 7105 pr_err("child %p has parent %p not %p\n", 7106 mte_to_node(child), mte_parent(child), 7107 mte_to_node(mas->node)); 7108 MT_BUG_ON(mas->tree, 1); 7109 } 7110 7111 if (i < mt_pivots[type] && pivots[i] == mas->max) 7112 break; 7113 } 7114 } 7115 7116 /* 7117 * Validate all pivots are within mas->min and mas->max, check metadata ends 7118 * where the maximum ends and ensure there is no slots or pivots set outside of 7119 * the end of the data. 7120 */ 7121 static void mas_validate_limits(struct ma_state *mas) 7122 { 7123 int i; 7124 unsigned long prev_piv = 0; 7125 enum maple_type type = mte_node_type(mas->node); 7126 void __rcu **slots = ma_slots(mte_to_node(mas->node), type); 7127 unsigned long *pivots = ma_pivots(mas_mn(mas), type); 7128 7129 for (i = 0; i < mt_slots[type]; i++) { 7130 unsigned long piv; 7131 7132 piv = mas_safe_pivot(mas, pivots, i, type); 7133 7134 if (!piv && (i != 0)) { 7135 pr_err("Missing node limit pivot at %p[%u]", 7136 mas_mn(mas), i); 7137 MAS_WARN_ON(mas, 1); 7138 } 7139 7140 if (prev_piv > piv) { 7141 pr_err("%p[%u] piv %lu < prev_piv %lu\n", 7142 mas_mn(mas), i, piv, prev_piv); 7143 MAS_WARN_ON(mas, piv < prev_piv); 7144 } 7145 7146 if (piv < mas->min) { 7147 pr_err("%p[%u] %lu < %lu\n", mas_mn(mas), i, 7148 piv, mas->min); 7149 MAS_WARN_ON(mas, piv < mas->min); 7150 } 7151 if (piv > mas->max) { 7152 pr_err("%p[%u] %lu > %lu\n", mas_mn(mas), i, 7153 piv, mas->max); 7154 MAS_WARN_ON(mas, piv > mas->max); 7155 } 7156 prev_piv = piv; 7157 if (piv == mas->max) 7158 break; 7159 } 7160 7161 if (mas_data_end(mas) != i) { 7162 pr_err("node%p: data_end %u != the last slot offset %u\n", 7163 mas_mn(mas), mas_data_end(mas), i); 7164 MT_BUG_ON(mas->tree, 1); 7165 } 7166 7167 for (i += 1; i < mt_slots[type]; i++) { 7168 void *entry = mas_slot(mas, slots, i); 7169 7170 if (entry && (i != mt_slots[type] - 1)) { 7171 pr_err("%p[%u] should not have entry %p\n", mas_mn(mas), 7172 i, entry); 7173 MT_BUG_ON(mas->tree, entry != NULL); 7174 } 7175 7176 if (i < mt_pivots[type]) { 7177 unsigned long piv = pivots[i]; 7178 7179 if (!piv) 7180 continue; 7181 7182 pr_err("%p[%u] should not have piv %lu\n", 7183 mas_mn(mas), i, piv); 7184 MAS_WARN_ON(mas, i < mt_pivots[type] - 1); 7185 } 7186 } 7187 } 7188 7189 static void mt_validate_nulls(struct maple_tree *mt) 7190 { 7191 void *entry, *last = (void *)1; 7192 unsigned char offset = 0; 7193 void __rcu **slots; 7194 MA_STATE(mas, mt, 0, 0); 7195 7196 mas_start(&mas); 7197 if (mas_is_none(&mas) || (mas.node == MAS_ROOT)) 7198 return; 7199 7200 while (!mte_is_leaf(mas.node)) 7201 mas_descend(&mas); 7202 7203 slots = ma_slots(mte_to_node(mas.node), mte_node_type(mas.node)); 7204 do { 7205 entry = mas_slot(&mas, slots, offset); 7206 if (!last && !entry) { 7207 pr_err("Sequential nulls end at %p[%u]\n", 7208 mas_mn(&mas), offset); 7209 } 7210 MT_BUG_ON(mt, !last && !entry); 7211 last = entry; 7212 if (offset == mas_data_end(&mas)) { 7213 mas_next_node(&mas, mas_mn(&mas), ULONG_MAX); 7214 if (mas_is_none(&mas)) 7215 return; 7216 offset = 0; 7217 slots = ma_slots(mte_to_node(mas.node), 7218 mte_node_type(mas.node)); 7219 } else { 7220 offset++; 7221 } 7222 7223 } while (!mas_is_none(&mas)); 7224 } 7225 7226 /* 7227 * validate a maple tree by checking: 7228 * 1. The limits (pivots are within mas->min to mas->max) 7229 * 2. The gap is correctly set in the parents 7230 */ 7231 void mt_validate(struct maple_tree *mt) 7232 { 7233 unsigned char end; 7234 7235 MA_STATE(mas, mt, 0, 0); 7236 rcu_read_lock(); 7237 mas_start(&mas); 7238 if (!mas_searchable(&mas)) 7239 goto done; 7240 7241 while (!mte_is_leaf(mas.node)) 7242 mas_descend(&mas); 7243 7244 while (!mas_is_none(&mas)) { 7245 MAS_WARN_ON(&mas, mte_dead_node(mas.node)); 7246 end = mas_data_end(&mas); 7247 if (MAS_WARN_ON(&mas, (end < mt_min_slot_count(mas.node)) && 7248 (mas.max != ULONG_MAX))) { 7249 pr_err("Invalid size %u of %p\n", end, mas_mn(&mas)); 7250 } 7251 7252 mas_validate_parent_slot(&mas); 7253 mas_validate_limits(&mas); 7254 mas_validate_child_slot(&mas); 7255 if (mt_is_alloc(mt)) 7256 mas_validate_gaps(&mas); 7257 mas_dfs_postorder(&mas, ULONG_MAX); 7258 } 7259 mt_validate_nulls(mt); 7260 done: 7261 rcu_read_unlock(); 7262 7263 } 7264 EXPORT_SYMBOL_GPL(mt_validate); 7265 7266 void mas_dump(const struct ma_state *mas) 7267 { 7268 pr_err("MAS: tree=%p enode=%p ", mas->tree, mas->node); 7269 if (mas_is_none(mas)) 7270 pr_err("(MAS_NONE) "); 7271 else if (mas_is_ptr(mas)) 7272 pr_err("(MAS_ROOT) "); 7273 else if (mas_is_start(mas)) 7274 pr_err("(MAS_START) "); 7275 else if (mas_is_paused(mas)) 7276 pr_err("(MAS_PAUSED) "); 7277 7278 pr_err("[%u] index=%lx last=%lx\n", mas->offset, mas->index, mas->last); 7279 pr_err(" min=%lx max=%lx alloc=%p, depth=%u, flags=%x\n", 7280 mas->min, mas->max, mas->alloc, mas->depth, mas->mas_flags); 7281 if (mas->index > mas->last) 7282 pr_err("Check index & last\n"); 7283 } 7284 EXPORT_SYMBOL_GPL(mas_dump); 7285 7286 void mas_wr_dump(const struct ma_wr_state *wr_mas) 7287 { 7288 pr_err("WR_MAS: node=%p r_min=%lx r_max=%lx\n", 7289 wr_mas->node, wr_mas->r_min, wr_mas->r_max); 7290 pr_err(" type=%u off_end=%u, node_end=%u, end_piv=%lx\n", 7291 wr_mas->type, wr_mas->offset_end, wr_mas->node_end, 7292 wr_mas->end_piv); 7293 } 7294 EXPORT_SYMBOL_GPL(mas_wr_dump); 7295 7296 #endif /* CONFIG_DEBUG_MAPLE_TREE */ 7297