1 /* 2 * Sparse bit array 3 * 4 * Copyright (C) 2018, Google LLC. 5 * Copyright (C) 2018, Red Hat, Inc. (code style cleanup and fuzzing driver) 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2. 8 * 9 * This library provides functions to support a memory efficient bit array, 10 * with an index size of 2^64. A sparsebit array is allocated through 11 * the use sparsebit_alloc() and free'd via sparsebit_free(), 12 * such as in the following: 13 * 14 * struct sparsebit *s; 15 * s = sparsebit_alloc(); 16 * sparsebit_free(&s); 17 * 18 * The struct sparsebit type resolves down to a struct sparsebit. 19 * Note that, sparsebit_free() takes a pointer to the sparsebit 20 * structure. This is so that sparsebit_free() is able to poison 21 * the pointer (e.g. set it to NULL) to the struct sparsebit before 22 * returning to the caller. 23 * 24 * Between the return of sparsebit_alloc() and the call of 25 * sparsebit_free(), there are multiple query and modifying operations 26 * that can be performed on the allocated sparsebit array. All of 27 * these operations take as a parameter the value returned from 28 * sparsebit_alloc() and most also take a bit index. Frequently 29 * used routines include: 30 * 31 * ---- Query Operations 32 * sparsebit_is_set(s, idx) 33 * sparsebit_is_clear(s, idx) 34 * sparsebit_any_set(s) 35 * sparsebit_first_set(s) 36 * sparsebit_next_set(s, prev_idx) 37 * 38 * ---- Modifying Operations 39 * sparsebit_set(s, idx) 40 * sparsebit_clear(s, idx) 41 * sparsebit_set_num(s, idx, num); 42 * sparsebit_clear_num(s, idx, num); 43 * 44 * A common operation, is to itterate over all the bits set in a test 45 * sparsebit array. This can be done via code with the following structure: 46 * 47 * sparsebit_idx_t idx; 48 * if (sparsebit_any_set(s)) { 49 * idx = sparsebit_first_set(s); 50 * do { 51 * ... 52 * idx = sparsebit_next_set(s, idx); 53 * } while (idx != 0); 54 * } 55 * 56 * The index of the first bit set needs to be obtained via 57 * sparsebit_first_set(), because sparsebit_next_set(), needs 58 * the index of the previously set. The sparsebit_idx_t type is 59 * unsigned, so there is no previous index before 0 that is available. 60 * Also, the call to sparsebit_first_set() is not made unless there 61 * is at least 1 bit in the array set. This is because sparsebit_first_set() 62 * aborts if sparsebit_first_set() is called with no bits set. 63 * It is the callers responsibility to assure that the 64 * sparsebit array has at least a single bit set before calling 65 * sparsebit_first_set(). 66 * 67 * ==== Implementation Overview ==== 68 * For the most part the internal implementation of sparsebit is 69 * opaque to the caller. One important implementation detail that the 70 * caller may need to be aware of is the spatial complexity of the 71 * implementation. This implementation of a sparsebit array is not 72 * only sparse, in that it uses memory proportional to the number of bits 73 * set. It is also efficient in memory usage when most of the bits are 74 * set. 75 * 76 * At a high-level the state of the bit settings are maintained through 77 * the use of a binary-search tree, where each node contains at least 78 * the following members: 79 * 80 * typedef uint64_t sparsebit_idx_t; 81 * typedef uint64_t sparsebit_num_t; 82 * 83 * sparsebit_idx_t idx; 84 * uint32_t mask; 85 * sparsebit_num_t num_after; 86 * 87 * The idx member contains the bit index of the first bit described by this 88 * node, while the mask member stores the setting of the first 32-bits. 89 * The setting of the bit at idx + n, where 0 <= n < 32, is located in the 90 * mask member at 1 << n. 91 * 92 * Nodes are sorted by idx and the bits described by two nodes will never 93 * overlap. The idx member is always aligned to the mask size, i.e. a 94 * multiple of 32. 95 * 96 * Beyond a typical implementation, the nodes in this implementation also 97 * contains a member named num_after. The num_after member holds the 98 * number of bits immediately after the mask bits that are contiguously set. 99 * The use of the num_after member allows this implementation to efficiently 100 * represent cases where most bits are set. For example, the case of all 101 * but the last two bits set, is represented by the following two nodes: 102 * 103 * node 0 - idx: 0x0 mask: 0xffffffff num_after: 0xffffffffffffffc0 104 * node 1 - idx: 0xffffffffffffffe0 mask: 0x3fffffff num_after: 0 105 * 106 * ==== Invariants ==== 107 * This implementation usses the following invariants: 108 * 109 * + Node are only used to represent bits that are set. 110 * Nodes with a mask of 0 and num_after of 0 are not allowed. 111 * 112 * + Sum of bits set in all the nodes is equal to the value of 113 * the struct sparsebit_pvt num_set member. 114 * 115 * + The setting of at least one bit is always described in a nodes 116 * mask (mask >= 1). 117 * 118 * + A node with all mask bits set only occurs when the last bit 119 * described by the previous node is not equal to this nodes 120 * starting index - 1. All such occurences of this condition are 121 * avoided by moving the setting of the nodes mask bits into 122 * the previous nodes num_after setting. 123 * 124 * + Node starting index is evenly divisible by the number of bits 125 * within a nodes mask member. 126 * 127 * + Nodes never represent a range of bits that wrap around the 128 * highest supported index. 129 * 130 * (idx + MASK_BITS + num_after - 1) <= ((sparsebit_idx_t) 0) - 1) 131 * 132 * As a consequence of the above, the num_after member of a node 133 * will always be <=: 134 * 135 * maximum_index - nodes_starting_index - number_of_mask_bits 136 * 137 * + Nodes within the binary search tree are sorted based on each 138 * nodes starting index. 139 * 140 * + The range of bits described by any two nodes do not overlap. The 141 * range of bits described by a single node is: 142 * 143 * start: node->idx 144 * end (inclusive): node->idx + MASK_BITS + node->num_after - 1; 145 * 146 * Note, at times these invariants are temporarily violated for a 147 * specific portion of the code. For example, when setting a mask 148 * bit, there is a small delay between when the mask bit is set and the 149 * value in the struct sparsebit_pvt num_set member is updated. Other 150 * temporary violations occur when node_split() is called with a specified 151 * index and assures that a node where its mask represents the bit 152 * at the specified index exists. At times to do this node_split() 153 * must split an existing node into two nodes or create a node that 154 * has no bits set. Such temporary violations must be corrected before 155 * returning to the caller. These corrections are typically performed 156 * by the local function node_reduce(). 157 */ 158 159 #include "test_util.h" 160 #include "sparsebit.h" 161 #include <limits.h> 162 #include <assert.h> 163 164 #define DUMP_LINE_MAX 100 /* Does not include indent amount */ 165 166 typedef uint32_t mask_t; 167 #define MASK_BITS (sizeof(mask_t) * CHAR_BIT) 168 169 struct node { 170 struct node *parent; 171 struct node *left; 172 struct node *right; 173 sparsebit_idx_t idx; /* index of least-significant bit in mask */ 174 sparsebit_num_t num_after; /* num contiguously set after mask */ 175 mask_t mask; 176 }; 177 178 struct sparsebit { 179 /* 180 * Points to root node of the binary search 181 * tree. Equal to NULL when no bits are set in 182 * the entire sparsebit array. 183 */ 184 struct node *root; 185 186 /* 187 * A redundant count of the total number of bits set. Used for 188 * diagnostic purposes and to change the time complexity of 189 * sparsebit_num_set() from O(n) to O(1). 190 * Note: Due to overflow, a value of 0 means none or all set. 191 */ 192 sparsebit_num_t num_set; 193 }; 194 195 /* Returns the number of set bits described by the settings 196 * of the node pointed to by nodep. 197 */ 198 static sparsebit_num_t node_num_set(struct node *nodep) 199 { 200 return nodep->num_after + __builtin_popcount(nodep->mask); 201 } 202 203 /* Returns a pointer to the node that describes the 204 * lowest bit index. 205 */ 206 static struct node *node_first(struct sparsebit *s) 207 { 208 struct node *nodep; 209 210 for (nodep = s->root; nodep && nodep->left; nodep = nodep->left) 211 ; 212 213 return nodep; 214 } 215 216 /* Returns a pointer to the node that describes the 217 * lowest bit index > the index of the node pointed to by np. 218 * Returns NULL if no node with a higher index exists. 219 */ 220 static struct node *node_next(struct sparsebit *s, struct node *np) 221 { 222 struct node *nodep = np; 223 224 /* 225 * If current node has a right child, next node is the left-most 226 * of the right child. 227 */ 228 if (nodep->right) { 229 for (nodep = nodep->right; nodep->left; nodep = nodep->left) 230 ; 231 return nodep; 232 } 233 234 /* 235 * No right child. Go up until node is left child of a parent. 236 * That parent is then the next node. 237 */ 238 while (nodep->parent && nodep == nodep->parent->right) 239 nodep = nodep->parent; 240 241 return nodep->parent; 242 } 243 244 /* Searches for and returns a pointer to the node that describes the 245 * highest index < the index of the node pointed to by np. 246 * Returns NULL if no node with a lower index exists. 247 */ 248 static struct node *node_prev(struct sparsebit *s, struct node *np) 249 { 250 struct node *nodep = np; 251 252 /* 253 * If current node has a left child, next node is the right-most 254 * of the left child. 255 */ 256 if (nodep->left) { 257 for (nodep = nodep->left; nodep->right; nodep = nodep->right) 258 ; 259 return (struct node *) nodep; 260 } 261 262 /* 263 * No left child. Go up until node is right child of a parent. 264 * That parent is then the next node. 265 */ 266 while (nodep->parent && nodep == nodep->parent->left) 267 nodep = nodep->parent; 268 269 return (struct node *) nodep->parent; 270 } 271 272 273 /* Allocates space to hold a copy of the node sub-tree pointed to by 274 * subtree and duplicates the bit settings to the newly allocated nodes. 275 * Returns the newly allocated copy of subtree. 276 */ 277 static struct node *node_copy_subtree(struct node *subtree) 278 { 279 struct node *root; 280 281 /* Duplicate the node at the root of the subtree */ 282 root = calloc(1, sizeof(*root)); 283 if (!root) { 284 perror("calloc"); 285 abort(); 286 } 287 288 root->idx = subtree->idx; 289 root->mask = subtree->mask; 290 root->num_after = subtree->num_after; 291 292 /* As needed, recursively duplicate the left and right subtrees */ 293 if (subtree->left) { 294 root->left = node_copy_subtree(subtree->left); 295 root->left->parent = root; 296 } 297 298 if (subtree->right) { 299 root->right = node_copy_subtree(subtree->right); 300 root->right->parent = root; 301 } 302 303 return root; 304 } 305 306 /* Searches for and returns a pointer to the node that describes the setting 307 * of the bit given by idx. A node describes the setting of a bit if its 308 * index is within the bits described by the mask bits or the number of 309 * contiguous bits set after the mask. Returns NULL if there is no such node. 310 */ 311 static struct node *node_find(struct sparsebit *s, sparsebit_idx_t idx) 312 { 313 struct node *nodep; 314 315 /* Find the node that describes the setting of the bit at idx */ 316 for (nodep = s->root; nodep; 317 nodep = nodep->idx > idx ? nodep->left : nodep->right) { 318 if (idx >= nodep->idx && 319 idx <= nodep->idx + MASK_BITS + nodep->num_after - 1) 320 break; 321 } 322 323 return nodep; 324 } 325 326 /* Entry Requirements: 327 * + A node that describes the setting of idx is not already present. 328 * 329 * Adds a new node to describe the setting of the bit at the index given 330 * by idx. Returns a pointer to the newly added node. 331 * 332 * TODO(lhuemill): Degenerate cases causes the tree to get unbalanced. 333 */ 334 static struct node *node_add(struct sparsebit *s, sparsebit_idx_t idx) 335 { 336 struct node *nodep, *parentp, *prev; 337 338 /* Allocate and initialize the new node. */ 339 nodep = calloc(1, sizeof(*nodep)); 340 if (!nodep) { 341 perror("calloc"); 342 abort(); 343 } 344 345 nodep->idx = idx & -MASK_BITS; 346 347 /* If no nodes, set it up as the root node. */ 348 if (!s->root) { 349 s->root = nodep; 350 return nodep; 351 } 352 353 /* 354 * Find the parent where the new node should be attached 355 * and add the node there. 356 */ 357 parentp = s->root; 358 while (true) { 359 if (idx < parentp->idx) { 360 if (!parentp->left) { 361 parentp->left = nodep; 362 nodep->parent = parentp; 363 break; 364 } 365 parentp = parentp->left; 366 } else { 367 assert(idx > parentp->idx + MASK_BITS + parentp->num_after - 1); 368 if (!parentp->right) { 369 parentp->right = nodep; 370 nodep->parent = parentp; 371 break; 372 } 373 parentp = parentp->right; 374 } 375 } 376 377 /* 378 * Does num_after bits of previous node overlap with the mask 379 * of the new node? If so set the bits in the new nodes mask 380 * and reduce the previous nodes num_after. 381 */ 382 prev = node_prev(s, nodep); 383 while (prev && prev->idx + MASK_BITS + prev->num_after - 1 >= nodep->idx) { 384 unsigned int n1 = (prev->idx + MASK_BITS + prev->num_after - 1) 385 - nodep->idx; 386 assert(prev->num_after > 0); 387 assert(n1 < MASK_BITS); 388 assert(!(nodep->mask & (1 << n1))); 389 nodep->mask |= (1 << n1); 390 prev->num_after--; 391 } 392 393 return nodep; 394 } 395 396 /* Returns whether all the bits in the sparsebit array are set. */ 397 bool sparsebit_all_set(struct sparsebit *s) 398 { 399 /* 400 * If any nodes there must be at least one bit set. Only case 401 * where a bit is set and total num set is 0, is when all bits 402 * are set. 403 */ 404 return s->root && s->num_set == 0; 405 } 406 407 /* Clears all bits described by the node pointed to by nodep, then 408 * removes the node. 409 */ 410 static void node_rm(struct sparsebit *s, struct node *nodep) 411 { 412 struct node *tmp; 413 sparsebit_num_t num_set; 414 415 num_set = node_num_set(nodep); 416 assert(s->num_set >= num_set || sparsebit_all_set(s)); 417 s->num_set -= node_num_set(nodep); 418 419 /* Have both left and right child */ 420 if (nodep->left && nodep->right) { 421 /* 422 * Move left children to the leftmost leaf node 423 * of the right child. 424 */ 425 for (tmp = nodep->right; tmp->left; tmp = tmp->left) 426 ; 427 tmp->left = nodep->left; 428 nodep->left = NULL; 429 tmp->left->parent = tmp; 430 } 431 432 /* Left only child */ 433 if (nodep->left) { 434 if (!nodep->parent) { 435 s->root = nodep->left; 436 nodep->left->parent = NULL; 437 } else { 438 nodep->left->parent = nodep->parent; 439 if (nodep == nodep->parent->left) 440 nodep->parent->left = nodep->left; 441 else { 442 assert(nodep == nodep->parent->right); 443 nodep->parent->right = nodep->left; 444 } 445 } 446 447 nodep->parent = nodep->left = nodep->right = NULL; 448 free(nodep); 449 450 return; 451 } 452 453 454 /* Right only child */ 455 if (nodep->right) { 456 if (!nodep->parent) { 457 s->root = nodep->right; 458 nodep->right->parent = NULL; 459 } else { 460 nodep->right->parent = nodep->parent; 461 if (nodep == nodep->parent->left) 462 nodep->parent->left = nodep->right; 463 else { 464 assert(nodep == nodep->parent->right); 465 nodep->parent->right = nodep->right; 466 } 467 } 468 469 nodep->parent = nodep->left = nodep->right = NULL; 470 free(nodep); 471 472 return; 473 } 474 475 /* Leaf Node */ 476 if (!nodep->parent) { 477 s->root = NULL; 478 } else { 479 if (nodep->parent->left == nodep) 480 nodep->parent->left = NULL; 481 else { 482 assert(nodep == nodep->parent->right); 483 nodep->parent->right = NULL; 484 } 485 } 486 487 nodep->parent = nodep->left = nodep->right = NULL; 488 free(nodep); 489 490 return; 491 } 492 493 /* Splits the node containing the bit at idx so that there is a node 494 * that starts at the specified index. If no such node exists, a new 495 * node at the specified index is created. Returns the new node. 496 * 497 * idx must start of a mask boundary. 498 */ 499 static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx) 500 { 501 struct node *nodep1, *nodep2; 502 sparsebit_idx_t offset; 503 sparsebit_num_t orig_num_after; 504 505 assert(!(idx % MASK_BITS)); 506 507 /* 508 * Is there a node that describes the setting of idx? 509 * If not, add it. 510 */ 511 nodep1 = node_find(s, idx); 512 if (!nodep1) 513 return node_add(s, idx); 514 515 /* 516 * All done if the starting index of the node is where the 517 * split should occur. 518 */ 519 if (nodep1->idx == idx) 520 return nodep1; 521 522 /* 523 * Split point not at start of mask, so it must be part of 524 * bits described by num_after. 525 */ 526 527 /* 528 * Calculate offset within num_after for where the split is 529 * to occur. 530 */ 531 offset = idx - (nodep1->idx + MASK_BITS); 532 orig_num_after = nodep1->num_after; 533 534 /* 535 * Add a new node to describe the bits starting at 536 * the split point. 537 */ 538 nodep1->num_after = offset; 539 nodep2 = node_add(s, idx); 540 541 /* Move bits after the split point into the new node */ 542 nodep2->num_after = orig_num_after - offset; 543 if (nodep2->num_after >= MASK_BITS) { 544 nodep2->mask = ~(mask_t) 0; 545 nodep2->num_after -= MASK_BITS; 546 } else { 547 nodep2->mask = (1 << nodep2->num_after) - 1; 548 nodep2->num_after = 0; 549 } 550 551 return nodep2; 552 } 553 554 /* Iteratively reduces the node pointed to by nodep and its adjacent 555 * nodes into a more compact form. For example, a node with a mask with 556 * all bits set adjacent to a previous node, will get combined into a 557 * single node with an increased num_after setting. 558 * 559 * After each reduction, a further check is made to see if additional 560 * reductions are possible with the new previous and next nodes. Note, 561 * a search for a reduction is only done across the nodes nearest nodep 562 * and those that became part of a reduction. Reductions beyond nodep 563 * and the adjacent nodes that are reduced are not discovered. It is the 564 * responsibility of the caller to pass a nodep that is within one node 565 * of each possible reduction. 566 * 567 * This function does not fix the temporary violation of all invariants. 568 * For example it does not fix the case where the bit settings described 569 * by two or more nodes overlap. Such a violation introduces the potential 570 * complication of a bit setting for a specific index having different settings 571 * in different nodes. This would then introduce the further complication 572 * of which node has the correct setting of the bit and thus such conditions 573 * are not allowed. 574 * 575 * This function is designed to fix invariant violations that are introduced 576 * by node_split() and by changes to the nodes mask or num_after members. 577 * For example, when setting a bit within a nodes mask, the function that 578 * sets the bit doesn't have to worry about whether the setting of that 579 * bit caused the mask to have leading only or trailing only bits set. 580 * Instead, the function can call node_reduce(), with nodep equal to the 581 * node address that it set a mask bit in, and node_reduce() will notice 582 * the cases of leading or trailing only bits and that there is an 583 * adjacent node that the bit settings could be merged into. 584 * 585 * This implementation specifically detects and corrects violation of the 586 * following invariants: 587 * 588 * + Node are only used to represent bits that are set. 589 * Nodes with a mask of 0 and num_after of 0 are not allowed. 590 * 591 * + The setting of at least one bit is always described in a nodes 592 * mask (mask >= 1). 593 * 594 * + A node with all mask bits set only occurs when the last bit 595 * described by the previous node is not equal to this nodes 596 * starting index - 1. All such occurences of this condition are 597 * avoided by moving the setting of the nodes mask bits into 598 * the previous nodes num_after setting. 599 */ 600 static void node_reduce(struct sparsebit *s, struct node *nodep) 601 { 602 bool reduction_performed; 603 604 do { 605 reduction_performed = false; 606 struct node *prev, *next, *tmp; 607 608 /* 1) Potential reductions within the current node. */ 609 610 /* Nodes with all bits cleared may be removed. */ 611 if (nodep->mask == 0 && nodep->num_after == 0) { 612 /* 613 * About to remove the node pointed to by 614 * nodep, which normally would cause a problem 615 * for the next pass through the reduction loop, 616 * because the node at the starting point no longer 617 * exists. This potential problem is handled 618 * by first remembering the location of the next 619 * or previous nodes. Doesn't matter which, because 620 * once the node at nodep is removed, there will be 621 * no other nodes between prev and next. 622 * 623 * Note, the checks performed on nodep against both 624 * both prev and next both check for an adjacent 625 * node that can be reduced into a single node. As 626 * such, after removing the node at nodep, doesn't 627 * matter whether the nodep for the next pass 628 * through the loop is equal to the previous pass 629 * prev or next node. Either way, on the next pass 630 * the one not selected will become either the 631 * prev or next node. 632 */ 633 tmp = node_next(s, nodep); 634 if (!tmp) 635 tmp = node_prev(s, nodep); 636 637 node_rm(s, nodep); 638 nodep = NULL; 639 640 nodep = tmp; 641 reduction_performed = true; 642 continue; 643 } 644 645 /* 646 * When the mask is 0, can reduce the amount of num_after 647 * bits by moving the initial num_after bits into the mask. 648 */ 649 if (nodep->mask == 0) { 650 assert(nodep->num_after != 0); 651 assert(nodep->idx + MASK_BITS > nodep->idx); 652 653 nodep->idx += MASK_BITS; 654 655 if (nodep->num_after >= MASK_BITS) { 656 nodep->mask = ~0; 657 nodep->num_after -= MASK_BITS; 658 } else { 659 nodep->mask = (1u << nodep->num_after) - 1; 660 nodep->num_after = 0; 661 } 662 663 reduction_performed = true; 664 continue; 665 } 666 667 /* 668 * 2) Potential reductions between the current and 669 * previous nodes. 670 */ 671 prev = node_prev(s, nodep); 672 if (prev) { 673 sparsebit_idx_t prev_highest_bit; 674 675 /* Nodes with no bits set can be removed. */ 676 if (prev->mask == 0 && prev->num_after == 0) { 677 node_rm(s, prev); 678 679 reduction_performed = true; 680 continue; 681 } 682 683 /* 684 * All mask bits set and previous node has 685 * adjacent index. 686 */ 687 if (nodep->mask + 1 == 0 && 688 prev->idx + MASK_BITS == nodep->idx) { 689 prev->num_after += MASK_BITS + nodep->num_after; 690 nodep->mask = 0; 691 nodep->num_after = 0; 692 693 reduction_performed = true; 694 continue; 695 } 696 697 /* 698 * Is node adjacent to previous node and the node 699 * contains a single contiguous range of bits 700 * starting from the beginning of the mask? 701 */ 702 prev_highest_bit = prev->idx + MASK_BITS - 1 + prev->num_after; 703 if (prev_highest_bit + 1 == nodep->idx && 704 (nodep->mask | (nodep->mask >> 1)) == nodep->mask) { 705 /* 706 * How many contiguous bits are there? 707 * Is equal to the total number of set 708 * bits, due to an earlier check that 709 * there is a single contiguous range of 710 * set bits. 711 */ 712 unsigned int num_contiguous 713 = __builtin_popcount(nodep->mask); 714 assert((num_contiguous > 0) && 715 ((1ULL << num_contiguous) - 1) == nodep->mask); 716 717 prev->num_after += num_contiguous; 718 nodep->mask = 0; 719 720 /* 721 * For predictable performance, handle special 722 * case where all mask bits are set and there 723 * is a non-zero num_after setting. This code 724 * is functionally correct without the following 725 * conditionalized statements, but without them 726 * the value of num_after is only reduced by 727 * the number of mask bits per pass. There are 728 * cases where num_after can be close to 2^64. 729 * Without this code it could take nearly 730 * (2^64) / 32 passes to perform the full 731 * reduction. 732 */ 733 if (num_contiguous == MASK_BITS) { 734 prev->num_after += nodep->num_after; 735 nodep->num_after = 0; 736 } 737 738 reduction_performed = true; 739 continue; 740 } 741 } 742 743 /* 744 * 3) Potential reductions between the current and 745 * next nodes. 746 */ 747 next = node_next(s, nodep); 748 if (next) { 749 /* Nodes with no bits set can be removed. */ 750 if (next->mask == 0 && next->num_after == 0) { 751 node_rm(s, next); 752 reduction_performed = true; 753 continue; 754 } 755 756 /* 757 * Is next node index adjacent to current node 758 * and has a mask with all bits set? 759 */ 760 if (next->idx == nodep->idx + MASK_BITS + nodep->num_after && 761 next->mask == ~(mask_t) 0) { 762 nodep->num_after += MASK_BITS; 763 next->mask = 0; 764 nodep->num_after += next->num_after; 765 next->num_after = 0; 766 767 node_rm(s, next); 768 next = NULL; 769 770 reduction_performed = true; 771 continue; 772 } 773 } 774 } while (nodep && reduction_performed); 775 } 776 777 /* Returns whether the bit at the index given by idx, within the 778 * sparsebit array is set or not. 779 */ 780 bool sparsebit_is_set(struct sparsebit *s, sparsebit_idx_t idx) 781 { 782 struct node *nodep; 783 784 /* Find the node that describes the setting of the bit at idx */ 785 for (nodep = s->root; nodep; 786 nodep = nodep->idx > idx ? nodep->left : nodep->right) 787 if (idx >= nodep->idx && 788 idx <= nodep->idx + MASK_BITS + nodep->num_after - 1) 789 goto have_node; 790 791 return false; 792 793 have_node: 794 /* Bit is set if it is any of the bits described by num_after */ 795 if (nodep->num_after && idx >= nodep->idx + MASK_BITS) 796 return true; 797 798 /* Is the corresponding mask bit set */ 799 assert(idx >= nodep->idx && idx - nodep->idx < MASK_BITS); 800 return !!(nodep->mask & (1 << (idx - nodep->idx))); 801 } 802 803 /* Within the sparsebit array pointed to by s, sets the bit 804 * at the index given by idx. 805 */ 806 static void bit_set(struct sparsebit *s, sparsebit_idx_t idx) 807 { 808 struct node *nodep; 809 810 /* Skip bits that are already set */ 811 if (sparsebit_is_set(s, idx)) 812 return; 813 814 /* 815 * Get a node where the bit at idx is described by the mask. 816 * The node_split will also create a node, if there isn't 817 * already a node that describes the setting of bit. 818 */ 819 nodep = node_split(s, idx & -MASK_BITS); 820 821 /* Set the bit within the nodes mask */ 822 assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1); 823 assert(!(nodep->mask & (1 << (idx - nodep->idx)))); 824 nodep->mask |= 1 << (idx - nodep->idx); 825 s->num_set++; 826 827 node_reduce(s, nodep); 828 } 829 830 /* Within the sparsebit array pointed to by s, clears the bit 831 * at the index given by idx. 832 */ 833 static void bit_clear(struct sparsebit *s, sparsebit_idx_t idx) 834 { 835 struct node *nodep; 836 837 /* Skip bits that are already cleared */ 838 if (!sparsebit_is_set(s, idx)) 839 return; 840 841 /* Is there a node that describes the setting of this bit? */ 842 nodep = node_find(s, idx); 843 if (!nodep) 844 return; 845 846 /* 847 * If a num_after bit, split the node, so that the bit is 848 * part of a node mask. 849 */ 850 if (idx >= nodep->idx + MASK_BITS) 851 nodep = node_split(s, idx & -MASK_BITS); 852 853 /* 854 * After node_split above, bit at idx should be within the mask. 855 * Clear that bit. 856 */ 857 assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1); 858 assert(nodep->mask & (1 << (idx - nodep->idx))); 859 nodep->mask &= ~(1 << (idx - nodep->idx)); 860 assert(s->num_set > 0 || sparsebit_all_set(s)); 861 s->num_set--; 862 863 node_reduce(s, nodep); 864 } 865 866 /* Recursively dumps to the FILE stream given by stream the contents 867 * of the sub-tree of nodes pointed to by nodep. Each line of output 868 * is prefixed by the number of spaces given by indent. On each 869 * recursion, the indent amount is increased by 2. This causes nodes 870 * at each level deeper into the binary search tree to be displayed 871 * with a greater indent. 872 */ 873 static void dump_nodes(FILE *stream, struct node *nodep, 874 unsigned int indent) 875 { 876 char *node_type; 877 878 /* Dump contents of node */ 879 if (!nodep->parent) 880 node_type = "root"; 881 else if (nodep == nodep->parent->left) 882 node_type = "left"; 883 else { 884 assert(nodep == nodep->parent->right); 885 node_type = "right"; 886 } 887 fprintf(stream, "%*s---- %s nodep: %p\n", indent, "", node_type, nodep); 888 fprintf(stream, "%*s parent: %p left: %p right: %p\n", indent, "", 889 nodep->parent, nodep->left, nodep->right); 890 fprintf(stream, "%*s idx: 0x%lx mask: 0x%x num_after: 0x%lx\n", 891 indent, "", nodep->idx, nodep->mask, nodep->num_after); 892 893 /* If present, dump contents of left child nodes */ 894 if (nodep->left) 895 dump_nodes(stream, nodep->left, indent + 2); 896 897 /* If present, dump contents of right child nodes */ 898 if (nodep->right) 899 dump_nodes(stream, nodep->right, indent + 2); 900 } 901 902 static inline sparsebit_idx_t node_first_set(struct node *nodep, int start) 903 { 904 mask_t leading = (mask_t)1 << start; 905 int n1 = __builtin_ctz(nodep->mask & -leading); 906 907 return nodep->idx + n1; 908 } 909 910 static inline sparsebit_idx_t node_first_clear(struct node *nodep, int start) 911 { 912 mask_t leading = (mask_t)1 << start; 913 int n1 = __builtin_ctz(~nodep->mask & -leading); 914 915 return nodep->idx + n1; 916 } 917 918 /* Dumps to the FILE stream specified by stream, the implementation dependent 919 * internal state of s. Each line of output is prefixed with the number 920 * of spaces given by indent. The output is completely implementation 921 * dependent and subject to change. Output from this function should only 922 * be used for diagnostic purposes. For example, this function can be 923 * used by test cases after they detect an unexpected condition, as a means 924 * to capture diagnostic information. 925 */ 926 static void sparsebit_dump_internal(FILE *stream, struct sparsebit *s, 927 unsigned int indent) 928 { 929 /* Dump the contents of s */ 930 fprintf(stream, "%*sroot: %p\n", indent, "", s->root); 931 fprintf(stream, "%*snum_set: 0x%lx\n", indent, "", s->num_set); 932 933 if (s->root) 934 dump_nodes(stream, s->root, indent); 935 } 936 937 /* Allocates and returns a new sparsebit array. The initial state 938 * of the newly allocated sparsebit array has all bits cleared. 939 */ 940 struct sparsebit *sparsebit_alloc(void) 941 { 942 struct sparsebit *s; 943 944 /* Allocate top level structure. */ 945 s = calloc(1, sizeof(*s)); 946 if (!s) { 947 perror("calloc"); 948 abort(); 949 } 950 951 return s; 952 } 953 954 /* Frees the implementation dependent data for the sparsebit array 955 * pointed to by s and poisons the pointer to that data. 956 */ 957 void sparsebit_free(struct sparsebit **sbitp) 958 { 959 struct sparsebit *s = *sbitp; 960 961 if (!s) 962 return; 963 964 sparsebit_clear_all(s); 965 free(s); 966 *sbitp = NULL; 967 } 968 969 /* Makes a copy of the sparsebit array given by s, to the sparsebit 970 * array given by d. Note, d must have already been allocated via 971 * sparsebit_alloc(). It can though already have bits set, which 972 * if different from src will be cleared. 973 */ 974 void sparsebit_copy(struct sparsebit *d, struct sparsebit *s) 975 { 976 /* First clear any bits already set in the destination */ 977 sparsebit_clear_all(d); 978 979 if (s->root) { 980 d->root = node_copy_subtree(s->root); 981 d->num_set = s->num_set; 982 } 983 } 984 985 /* Returns whether num consecutive bits starting at idx are all set. */ 986 bool sparsebit_is_set_num(struct sparsebit *s, 987 sparsebit_idx_t idx, sparsebit_num_t num) 988 { 989 sparsebit_idx_t next_cleared; 990 991 assert(num > 0); 992 assert(idx + num - 1 >= idx); 993 994 /* With num > 0, the first bit must be set. */ 995 if (!sparsebit_is_set(s, idx)) 996 return false; 997 998 /* Find the next cleared bit */ 999 next_cleared = sparsebit_next_clear(s, idx); 1000 1001 /* 1002 * If no cleared bits beyond idx, then there are at least num 1003 * set bits. idx + num doesn't wrap. Otherwise check if 1004 * there are enough set bits between idx and the next cleared bit. 1005 */ 1006 return next_cleared == 0 || next_cleared - idx >= num; 1007 } 1008 1009 /* Returns whether the bit at the index given by idx. */ 1010 bool sparsebit_is_clear(struct sparsebit *s, 1011 sparsebit_idx_t idx) 1012 { 1013 return !sparsebit_is_set(s, idx); 1014 } 1015 1016 /* Returns whether num consecutive bits starting at idx are all cleared. */ 1017 bool sparsebit_is_clear_num(struct sparsebit *s, 1018 sparsebit_idx_t idx, sparsebit_num_t num) 1019 { 1020 sparsebit_idx_t next_set; 1021 1022 assert(num > 0); 1023 assert(idx + num - 1 >= idx); 1024 1025 /* With num > 0, the first bit must be cleared. */ 1026 if (!sparsebit_is_clear(s, idx)) 1027 return false; 1028 1029 /* Find the next set bit */ 1030 next_set = sparsebit_next_set(s, idx); 1031 1032 /* 1033 * If no set bits beyond idx, then there are at least num 1034 * cleared bits. idx + num doesn't wrap. Otherwise check if 1035 * there are enough cleared bits between idx and the next set bit. 1036 */ 1037 return next_set == 0 || next_set - idx >= num; 1038 } 1039 1040 /* Returns the total number of bits set. Note: 0 is also returned for 1041 * the case of all bits set. This is because with all bits set, there 1042 * is 1 additional bit set beyond what can be represented in the return 1043 * value. Use sparsebit_any_set(), instead of sparsebit_num_set() > 0, 1044 * to determine if the sparsebit array has any bits set. 1045 */ 1046 sparsebit_num_t sparsebit_num_set(struct sparsebit *s) 1047 { 1048 return s->num_set; 1049 } 1050 1051 /* Returns whether any bit is set in the sparsebit array. */ 1052 bool sparsebit_any_set(struct sparsebit *s) 1053 { 1054 /* 1055 * Nodes only describe set bits. If any nodes then there 1056 * is at least 1 bit set. 1057 */ 1058 if (!s->root) 1059 return false; 1060 1061 /* 1062 * Every node should have a non-zero mask. For now will 1063 * just assure that the root node has a non-zero mask, 1064 * which is a quick check that at least 1 bit is set. 1065 */ 1066 assert(s->root->mask != 0); 1067 assert(s->num_set > 0 || 1068 (s->root->num_after == ((sparsebit_num_t) 0) - MASK_BITS && 1069 s->root->mask == ~(mask_t) 0)); 1070 1071 return true; 1072 } 1073 1074 /* Returns whether all the bits in the sparsebit array are cleared. */ 1075 bool sparsebit_all_clear(struct sparsebit *s) 1076 { 1077 return !sparsebit_any_set(s); 1078 } 1079 1080 /* Returns whether all the bits in the sparsebit array are set. */ 1081 bool sparsebit_any_clear(struct sparsebit *s) 1082 { 1083 return !sparsebit_all_set(s); 1084 } 1085 1086 /* Returns the index of the first set bit. Abort if no bits are set. 1087 */ 1088 sparsebit_idx_t sparsebit_first_set(struct sparsebit *s) 1089 { 1090 struct node *nodep; 1091 1092 /* Validate at least 1 bit is set */ 1093 assert(sparsebit_any_set(s)); 1094 1095 nodep = node_first(s); 1096 return node_first_set(nodep, 0); 1097 } 1098 1099 /* Returns the index of the first cleared bit. Abort if 1100 * no bits are cleared. 1101 */ 1102 sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s) 1103 { 1104 struct node *nodep1, *nodep2; 1105 1106 /* Validate at least 1 bit is cleared. */ 1107 assert(sparsebit_any_clear(s)); 1108 1109 /* If no nodes or first node index > 0 then lowest cleared is 0 */ 1110 nodep1 = node_first(s); 1111 if (!nodep1 || nodep1->idx > 0) 1112 return 0; 1113 1114 /* Does the mask in the first node contain any cleared bits. */ 1115 if (nodep1->mask != ~(mask_t) 0) 1116 return node_first_clear(nodep1, 0); 1117 1118 /* 1119 * All mask bits set in first node. If there isn't a second node 1120 * then the first cleared bit is the first bit after the bits 1121 * described by the first node. 1122 */ 1123 nodep2 = node_next(s, nodep1); 1124 if (!nodep2) { 1125 /* 1126 * No second node. First cleared bit is first bit beyond 1127 * bits described by first node. 1128 */ 1129 assert(nodep1->mask == ~(mask_t) 0); 1130 assert(nodep1->idx + MASK_BITS + nodep1->num_after != (sparsebit_idx_t) 0); 1131 return nodep1->idx + MASK_BITS + nodep1->num_after; 1132 } 1133 1134 /* 1135 * There is a second node. 1136 * If it is not adjacent to the first node, then there is a gap 1137 * of cleared bits between the nodes, and the first cleared bit 1138 * is the first bit within the gap. 1139 */ 1140 if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx) 1141 return nodep1->idx + MASK_BITS + nodep1->num_after; 1142 1143 /* 1144 * Second node is adjacent to the first node. 1145 * Because it is adjacent, its mask should be non-zero. If all 1146 * its mask bits are set, then with it being adjacent, it should 1147 * have had the mask bits moved into the num_after setting of the 1148 * previous node. 1149 */ 1150 return node_first_clear(nodep2, 0); 1151 } 1152 1153 /* Returns index of next bit set within s after the index given by prev. 1154 * Returns 0 if there are no bits after prev that are set. 1155 */ 1156 sparsebit_idx_t sparsebit_next_set(struct sparsebit *s, 1157 sparsebit_idx_t prev) 1158 { 1159 sparsebit_idx_t lowest_possible = prev + 1; 1160 sparsebit_idx_t start; 1161 struct node *nodep; 1162 1163 /* A bit after the highest index can't be set. */ 1164 if (lowest_possible == 0) 1165 return 0; 1166 1167 /* 1168 * Find the leftmost 'candidate' overlapping or to the right 1169 * of lowest_possible. 1170 */ 1171 struct node *candidate = NULL; 1172 1173 /* True iff lowest_possible is within candidate */ 1174 bool contains = false; 1175 1176 /* 1177 * Find node that describes setting of bit at lowest_possible. 1178 * If such a node doesn't exist, find the node with the lowest 1179 * starting index that is > lowest_possible. 1180 */ 1181 for (nodep = s->root; nodep;) { 1182 if ((nodep->idx + MASK_BITS + nodep->num_after - 1) 1183 >= lowest_possible) { 1184 candidate = nodep; 1185 if (candidate->idx <= lowest_possible) { 1186 contains = true; 1187 break; 1188 } 1189 nodep = nodep->left; 1190 } else { 1191 nodep = nodep->right; 1192 } 1193 } 1194 if (!candidate) 1195 return 0; 1196 1197 assert(candidate->mask != 0); 1198 1199 /* Does the candidate node describe the setting of lowest_possible? */ 1200 if (!contains) { 1201 /* 1202 * Candidate doesn't describe setting of bit at lowest_possible. 1203 * Candidate points to the first node with a starting index 1204 * > lowest_possible. 1205 */ 1206 assert(candidate->idx > lowest_possible); 1207 1208 return node_first_set(candidate, 0); 1209 } 1210 1211 /* 1212 * Candidate describes setting of bit at lowest_possible. 1213 * Note: although the node describes the setting of the bit 1214 * at lowest_possible, its possible that its setting and the 1215 * setting of all latter bits described by this node are 0. 1216 * For now, just handle the cases where this node describes 1217 * a bit at or after an index of lowest_possible that is set. 1218 */ 1219 start = lowest_possible - candidate->idx; 1220 1221 if (start < MASK_BITS && candidate->mask >= (1 << start)) 1222 return node_first_set(candidate, start); 1223 1224 if (candidate->num_after) { 1225 sparsebit_idx_t first_num_after_idx = candidate->idx + MASK_BITS; 1226 1227 return lowest_possible < first_num_after_idx 1228 ? first_num_after_idx : lowest_possible; 1229 } 1230 1231 /* 1232 * Although candidate node describes setting of bit at 1233 * the index of lowest_possible, all bits at that index and 1234 * latter that are described by candidate are cleared. With 1235 * this, the next bit is the first bit in the next node, if 1236 * such a node exists. If a next node doesn't exist, then 1237 * there is no next set bit. 1238 */ 1239 candidate = node_next(s, candidate); 1240 if (!candidate) 1241 return 0; 1242 1243 return node_first_set(candidate, 0); 1244 } 1245 1246 /* Returns index of next bit cleared within s after the index given by prev. 1247 * Returns 0 if there are no bits after prev that are cleared. 1248 */ 1249 sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s, 1250 sparsebit_idx_t prev) 1251 { 1252 sparsebit_idx_t lowest_possible = prev + 1; 1253 sparsebit_idx_t idx; 1254 struct node *nodep1, *nodep2; 1255 1256 /* A bit after the highest index can't be set. */ 1257 if (lowest_possible == 0) 1258 return 0; 1259 1260 /* 1261 * Does a node describing the setting of lowest_possible exist? 1262 * If not, the bit at lowest_possible is cleared. 1263 */ 1264 nodep1 = node_find(s, lowest_possible); 1265 if (!nodep1) 1266 return lowest_possible; 1267 1268 /* Does a mask bit in node 1 describe the next cleared bit. */ 1269 for (idx = lowest_possible - nodep1->idx; idx < MASK_BITS; idx++) 1270 if (!(nodep1->mask & (1 << idx))) 1271 return nodep1->idx + idx; 1272 1273 /* 1274 * Next cleared bit is not described by node 1. If there 1275 * isn't a next node, then next cleared bit is described 1276 * by bit after the bits described by the first node. 1277 */ 1278 nodep2 = node_next(s, nodep1); 1279 if (!nodep2) 1280 return nodep1->idx + MASK_BITS + nodep1->num_after; 1281 1282 /* 1283 * There is a second node. 1284 * If it is not adjacent to the first node, then there is a gap 1285 * of cleared bits between the nodes, and the next cleared bit 1286 * is the first bit within the gap. 1287 */ 1288 if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx) 1289 return nodep1->idx + MASK_BITS + nodep1->num_after; 1290 1291 /* 1292 * Second node is adjacent to the first node. 1293 * Because it is adjacent, its mask should be non-zero. If all 1294 * its mask bits are set, then with it being adjacent, it should 1295 * have had the mask bits moved into the num_after setting of the 1296 * previous node. 1297 */ 1298 return node_first_clear(nodep2, 0); 1299 } 1300 1301 /* Starting with the index 1 greater than the index given by start, finds 1302 * and returns the index of the first sequence of num consecutively set 1303 * bits. Returns a value of 0 of no such sequence exists. 1304 */ 1305 sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s, 1306 sparsebit_idx_t start, sparsebit_num_t num) 1307 { 1308 sparsebit_idx_t idx; 1309 1310 assert(num >= 1); 1311 1312 for (idx = sparsebit_next_set(s, start); 1313 idx != 0 && idx + num - 1 >= idx; 1314 idx = sparsebit_next_set(s, idx)) { 1315 assert(sparsebit_is_set(s, idx)); 1316 1317 /* 1318 * Does the sequence of bits starting at idx consist of 1319 * num set bits? 1320 */ 1321 if (sparsebit_is_set_num(s, idx, num)) 1322 return idx; 1323 1324 /* 1325 * Sequence of set bits at idx isn't large enough. 1326 * Skip this entire sequence of set bits. 1327 */ 1328 idx = sparsebit_next_clear(s, idx); 1329 if (idx == 0) 1330 return 0; 1331 } 1332 1333 return 0; 1334 } 1335 1336 /* Starting with the index 1 greater than the index given by start, finds 1337 * and returns the index of the first sequence of num consecutively cleared 1338 * bits. Returns a value of 0 of no such sequence exists. 1339 */ 1340 sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *s, 1341 sparsebit_idx_t start, sparsebit_num_t num) 1342 { 1343 sparsebit_idx_t idx; 1344 1345 assert(num >= 1); 1346 1347 for (idx = sparsebit_next_clear(s, start); 1348 idx != 0 && idx + num - 1 >= idx; 1349 idx = sparsebit_next_clear(s, idx)) { 1350 assert(sparsebit_is_clear(s, idx)); 1351 1352 /* 1353 * Does the sequence of bits starting at idx consist of 1354 * num cleared bits? 1355 */ 1356 if (sparsebit_is_clear_num(s, idx, num)) 1357 return idx; 1358 1359 /* 1360 * Sequence of cleared bits at idx isn't large enough. 1361 * Skip this entire sequence of cleared bits. 1362 */ 1363 idx = sparsebit_next_set(s, idx); 1364 if (idx == 0) 1365 return 0; 1366 } 1367 1368 return 0; 1369 } 1370 1371 /* Sets the bits * in the inclusive range idx through idx + num - 1. */ 1372 void sparsebit_set_num(struct sparsebit *s, 1373 sparsebit_idx_t start, sparsebit_num_t num) 1374 { 1375 struct node *nodep, *next; 1376 unsigned int n1; 1377 sparsebit_idx_t idx; 1378 sparsebit_num_t n; 1379 sparsebit_idx_t middle_start, middle_end; 1380 1381 assert(num > 0); 1382 assert(start + num - 1 >= start); 1383 1384 /* 1385 * Leading - bits before first mask boundary. 1386 * 1387 * TODO(lhuemill): With some effort it may be possible to 1388 * replace the following loop with a sequential sequence 1389 * of statements. High level sequence would be: 1390 * 1391 * 1. Use node_split() to force node that describes setting 1392 * of idx to be within the mask portion of a node. 1393 * 2. Form mask of bits to be set. 1394 * 3. Determine number of mask bits already set in the node 1395 * and store in a local variable named num_already_set. 1396 * 4. Set the appropriate mask bits within the node. 1397 * 5. Increment struct sparsebit_pvt num_set member 1398 * by the number of bits that were actually set. 1399 * Exclude from the counts bits that were already set. 1400 * 6. Before returning to the caller, use node_reduce() to 1401 * handle the multiple corner cases that this method 1402 * introduces. 1403 */ 1404 for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--) 1405 bit_set(s, idx); 1406 1407 /* Middle - bits spanning one or more entire mask */ 1408 middle_start = idx; 1409 middle_end = middle_start + (n & -MASK_BITS) - 1; 1410 if (n >= MASK_BITS) { 1411 nodep = node_split(s, middle_start); 1412 1413 /* 1414 * As needed, split just after end of middle bits. 1415 * No split needed if end of middle bits is at highest 1416 * supported bit index. 1417 */ 1418 if (middle_end + 1 > middle_end) 1419 (void) node_split(s, middle_end + 1); 1420 1421 /* Delete nodes that only describe bits within the middle. */ 1422 for (next = node_next(s, nodep); 1423 next && (next->idx < middle_end); 1424 next = node_next(s, nodep)) { 1425 assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end); 1426 node_rm(s, next); 1427 next = NULL; 1428 } 1429 1430 /* As needed set each of the mask bits */ 1431 for (n1 = 0; n1 < MASK_BITS; n1++) { 1432 if (!(nodep->mask & (1 << n1))) { 1433 nodep->mask |= 1 << n1; 1434 s->num_set++; 1435 } 1436 } 1437 1438 s->num_set -= nodep->num_after; 1439 nodep->num_after = middle_end - middle_start + 1 - MASK_BITS; 1440 s->num_set += nodep->num_after; 1441 1442 node_reduce(s, nodep); 1443 } 1444 idx = middle_end + 1; 1445 n -= middle_end - middle_start + 1; 1446 1447 /* Trailing - bits at and beyond last mask boundary */ 1448 assert(n < MASK_BITS); 1449 for (; n > 0; idx++, n--) 1450 bit_set(s, idx); 1451 } 1452 1453 /* Clears the bits * in the inclusive range idx through idx + num - 1. */ 1454 void sparsebit_clear_num(struct sparsebit *s, 1455 sparsebit_idx_t start, sparsebit_num_t num) 1456 { 1457 struct node *nodep, *next; 1458 unsigned int n1; 1459 sparsebit_idx_t idx; 1460 sparsebit_num_t n; 1461 sparsebit_idx_t middle_start, middle_end; 1462 1463 assert(num > 0); 1464 assert(start + num - 1 >= start); 1465 1466 /* Leading - bits before first mask boundary */ 1467 for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--) 1468 bit_clear(s, idx); 1469 1470 /* Middle - bits spanning one or more entire mask */ 1471 middle_start = idx; 1472 middle_end = middle_start + (n & -MASK_BITS) - 1; 1473 if (n >= MASK_BITS) { 1474 nodep = node_split(s, middle_start); 1475 1476 /* 1477 * As needed, split just after end of middle bits. 1478 * No split needed if end of middle bits is at highest 1479 * supported bit index. 1480 */ 1481 if (middle_end + 1 > middle_end) 1482 (void) node_split(s, middle_end + 1); 1483 1484 /* Delete nodes that only describe bits within the middle. */ 1485 for (next = node_next(s, nodep); 1486 next && (next->idx < middle_end); 1487 next = node_next(s, nodep)) { 1488 assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end); 1489 node_rm(s, next); 1490 next = NULL; 1491 } 1492 1493 /* As needed clear each of the mask bits */ 1494 for (n1 = 0; n1 < MASK_BITS; n1++) { 1495 if (nodep->mask & (1 << n1)) { 1496 nodep->mask &= ~(1 << n1); 1497 s->num_set--; 1498 } 1499 } 1500 1501 /* Clear any bits described by num_after */ 1502 s->num_set -= nodep->num_after; 1503 nodep->num_after = 0; 1504 1505 /* 1506 * Delete the node that describes the beginning of 1507 * the middle bits and perform any allowed reductions 1508 * with the nodes prev or next of nodep. 1509 */ 1510 node_reduce(s, nodep); 1511 nodep = NULL; 1512 } 1513 idx = middle_end + 1; 1514 n -= middle_end - middle_start + 1; 1515 1516 /* Trailing - bits at and beyond last mask boundary */ 1517 assert(n < MASK_BITS); 1518 for (; n > 0; idx++, n--) 1519 bit_clear(s, idx); 1520 } 1521 1522 /* Sets the bit at the index given by idx. */ 1523 void sparsebit_set(struct sparsebit *s, sparsebit_idx_t idx) 1524 { 1525 sparsebit_set_num(s, idx, 1); 1526 } 1527 1528 /* Clears the bit at the index given by idx. */ 1529 void sparsebit_clear(struct sparsebit *s, sparsebit_idx_t idx) 1530 { 1531 sparsebit_clear_num(s, idx, 1); 1532 } 1533 1534 /* Sets the bits in the entire addressable range of the sparsebit array. */ 1535 void sparsebit_set_all(struct sparsebit *s) 1536 { 1537 sparsebit_set(s, 0); 1538 sparsebit_set_num(s, 1, ~(sparsebit_idx_t) 0); 1539 assert(sparsebit_all_set(s)); 1540 } 1541 1542 /* Clears the bits in the entire addressable range of the sparsebit array. */ 1543 void sparsebit_clear_all(struct sparsebit *s) 1544 { 1545 sparsebit_clear(s, 0); 1546 sparsebit_clear_num(s, 1, ~(sparsebit_idx_t) 0); 1547 assert(!sparsebit_any_set(s)); 1548 } 1549 1550 static size_t display_range(FILE *stream, sparsebit_idx_t low, 1551 sparsebit_idx_t high, bool prepend_comma_space) 1552 { 1553 char *fmt_str; 1554 size_t sz; 1555 1556 /* Determine the printf format string */ 1557 if (low == high) 1558 fmt_str = prepend_comma_space ? ", 0x%lx" : "0x%lx"; 1559 else 1560 fmt_str = prepend_comma_space ? ", 0x%lx:0x%lx" : "0x%lx:0x%lx"; 1561 1562 /* 1563 * When stream is NULL, just determine the size of what would 1564 * have been printed, else print the range. 1565 */ 1566 if (!stream) 1567 sz = snprintf(NULL, 0, fmt_str, low, high); 1568 else 1569 sz = fprintf(stream, fmt_str, low, high); 1570 1571 return sz; 1572 } 1573 1574 1575 /* Dumps to the FILE stream given by stream, the bit settings 1576 * of s. Each line of output is prefixed with the number of 1577 * spaces given by indent. The length of each line is implementation 1578 * dependent and does not depend on the indent amount. The following 1579 * is an example output of a sparsebit array that has bits: 1580 * 1581 * 0x5, 0x8, 0xa:0xe, 0x12 1582 * 1583 * This corresponds to a sparsebit whose bits 5, 8, 10, 11, 12, 13, 14, 18 1584 * are set. Note that a ':', instead of a '-' is used to specify a range of 1585 * contiguous bits. This is done because '-' is used to specify command-line 1586 * options, and sometimes ranges are specified as command-line arguments. 1587 */ 1588 void sparsebit_dump(FILE *stream, struct sparsebit *s, 1589 unsigned int indent) 1590 { 1591 size_t current_line_len = 0; 1592 size_t sz; 1593 struct node *nodep; 1594 1595 if (!sparsebit_any_set(s)) 1596 return; 1597 1598 /* Display initial indent */ 1599 fprintf(stream, "%*s", indent, ""); 1600 1601 /* For each node */ 1602 for (nodep = node_first(s); nodep; nodep = node_next(s, nodep)) { 1603 unsigned int n1; 1604 sparsebit_idx_t low, high; 1605 1606 /* For each group of bits in the mask */ 1607 for (n1 = 0; n1 < MASK_BITS; n1++) { 1608 if (nodep->mask & (1 << n1)) { 1609 low = high = nodep->idx + n1; 1610 1611 for (; n1 < MASK_BITS; n1++) { 1612 if (nodep->mask & (1 << n1)) 1613 high = nodep->idx + n1; 1614 else 1615 break; 1616 } 1617 1618 if ((n1 == MASK_BITS) && nodep->num_after) 1619 high += nodep->num_after; 1620 1621 /* 1622 * How much room will it take to display 1623 * this range. 1624 */ 1625 sz = display_range(NULL, low, high, 1626 current_line_len != 0); 1627 1628 /* 1629 * If there is not enough room, display 1630 * a newline plus the indent of the next 1631 * line. 1632 */ 1633 if (current_line_len + sz > DUMP_LINE_MAX) { 1634 fputs("\n", stream); 1635 fprintf(stream, "%*s", indent, ""); 1636 current_line_len = 0; 1637 } 1638 1639 /* Display the range */ 1640 sz = display_range(stream, low, high, 1641 current_line_len != 0); 1642 current_line_len += sz; 1643 } 1644 } 1645 1646 /* 1647 * If num_after and most significant-bit of mask is not 1648 * set, then still need to display a range for the bits 1649 * described by num_after. 1650 */ 1651 if (!(nodep->mask & (1 << (MASK_BITS - 1))) && nodep->num_after) { 1652 low = nodep->idx + MASK_BITS; 1653 high = nodep->idx + MASK_BITS + nodep->num_after - 1; 1654 1655 /* 1656 * How much room will it take to display 1657 * this range. 1658 */ 1659 sz = display_range(NULL, low, high, 1660 current_line_len != 0); 1661 1662 /* 1663 * If there is not enough room, display 1664 * a newline plus the indent of the next 1665 * line. 1666 */ 1667 if (current_line_len + sz > DUMP_LINE_MAX) { 1668 fputs("\n", stream); 1669 fprintf(stream, "%*s", indent, ""); 1670 current_line_len = 0; 1671 } 1672 1673 /* Display the range */ 1674 sz = display_range(stream, low, high, 1675 current_line_len != 0); 1676 current_line_len += sz; 1677 } 1678 } 1679 fputs("\n", stream); 1680 } 1681 1682 /* Validates the internal state of the sparsebit array given by 1683 * s. On error, diagnostic information is printed to stderr and 1684 * abort is called. 1685 */ 1686 void sparsebit_validate_internal(struct sparsebit *s) 1687 { 1688 bool error_detected = false; 1689 struct node *nodep, *prev = NULL; 1690 sparsebit_num_t total_bits_set = 0; 1691 unsigned int n1; 1692 1693 /* For each node */ 1694 for (nodep = node_first(s); nodep; 1695 prev = nodep, nodep = node_next(s, nodep)) { 1696 1697 /* 1698 * Increase total bits set by the number of bits set 1699 * in this node. 1700 */ 1701 for (n1 = 0; n1 < MASK_BITS; n1++) 1702 if (nodep->mask & (1 << n1)) 1703 total_bits_set++; 1704 1705 total_bits_set += nodep->num_after; 1706 1707 /* 1708 * Arbitrary choice as to whether a mask of 0 is allowed 1709 * or not. For diagnostic purposes it is beneficial to 1710 * have only one valid means to represent a set of bits. 1711 * To support this an arbitrary choice has been made 1712 * to not allow a mask of zero. 1713 */ 1714 if (nodep->mask == 0) { 1715 fprintf(stderr, "Node mask of zero, " 1716 "nodep: %p nodep->mask: 0x%x", 1717 nodep, nodep->mask); 1718 error_detected = true; 1719 break; 1720 } 1721 1722 /* 1723 * Validate num_after is not greater than the max index 1724 * - the number of mask bits. The num_after member 1725 * uses 0-based indexing and thus has no value that 1726 * represents all bits set. This limitation is handled 1727 * by requiring a non-zero mask. With a non-zero mask, 1728 * MASK_BITS worth of bits are described by the mask, 1729 * which makes the largest needed num_after equal to: 1730 * 1731 * (~(sparsebit_num_t) 0) - MASK_BITS + 1 1732 */ 1733 if (nodep->num_after 1734 > (~(sparsebit_num_t) 0) - MASK_BITS + 1) { 1735 fprintf(stderr, "num_after too large, " 1736 "nodep: %p nodep->num_after: 0x%lx", 1737 nodep, nodep->num_after); 1738 error_detected = true; 1739 break; 1740 } 1741 1742 /* Validate node index is divisible by the mask size */ 1743 if (nodep->idx % MASK_BITS) { 1744 fprintf(stderr, "Node index not divisible by " 1745 "mask size,\n" 1746 " nodep: %p nodep->idx: 0x%lx " 1747 "MASK_BITS: %lu\n", 1748 nodep, nodep->idx, MASK_BITS); 1749 error_detected = true; 1750 break; 1751 } 1752 1753 /* 1754 * Validate bits described by node don't wrap beyond the 1755 * highest supported index. 1756 */ 1757 if ((nodep->idx + MASK_BITS + nodep->num_after - 1) < nodep->idx) { 1758 fprintf(stderr, "Bits described by node wrap " 1759 "beyond highest supported index,\n" 1760 " nodep: %p nodep->idx: 0x%lx\n" 1761 " MASK_BITS: %lu nodep->num_after: 0x%lx", 1762 nodep, nodep->idx, MASK_BITS, nodep->num_after); 1763 error_detected = true; 1764 break; 1765 } 1766 1767 /* Check parent pointers. */ 1768 if (nodep->left) { 1769 if (nodep->left->parent != nodep) { 1770 fprintf(stderr, "Left child parent pointer " 1771 "doesn't point to this node,\n" 1772 " nodep: %p nodep->left: %p " 1773 "nodep->left->parent: %p", 1774 nodep, nodep->left, 1775 nodep->left->parent); 1776 error_detected = true; 1777 break; 1778 } 1779 } 1780 1781 if (nodep->right) { 1782 if (nodep->right->parent != nodep) { 1783 fprintf(stderr, "Right child parent pointer " 1784 "doesn't point to this node,\n" 1785 " nodep: %p nodep->right: %p " 1786 "nodep->right->parent: %p", 1787 nodep, nodep->right, 1788 nodep->right->parent); 1789 error_detected = true; 1790 break; 1791 } 1792 } 1793 1794 if (!nodep->parent) { 1795 if (s->root != nodep) { 1796 fprintf(stderr, "Unexpected root node, " 1797 "s->root: %p nodep: %p", 1798 s->root, nodep); 1799 error_detected = true; 1800 break; 1801 } 1802 } 1803 1804 if (prev) { 1805 /* 1806 * Is index of previous node before index of 1807 * current node? 1808 */ 1809 if (prev->idx >= nodep->idx) { 1810 fprintf(stderr, "Previous node index " 1811 ">= current node index,\n" 1812 " prev: %p prev->idx: 0x%lx\n" 1813 " nodep: %p nodep->idx: 0x%lx", 1814 prev, prev->idx, nodep, nodep->idx); 1815 error_detected = true; 1816 break; 1817 } 1818 1819 /* 1820 * Nodes occur in asscending order, based on each 1821 * nodes starting index. 1822 */ 1823 if ((prev->idx + MASK_BITS + prev->num_after - 1) 1824 >= nodep->idx) { 1825 fprintf(stderr, "Previous node bit range " 1826 "overlap with current node bit range,\n" 1827 " prev: %p prev->idx: 0x%lx " 1828 "prev->num_after: 0x%lx\n" 1829 " nodep: %p nodep->idx: 0x%lx " 1830 "nodep->num_after: 0x%lx\n" 1831 " MASK_BITS: %lu", 1832 prev, prev->idx, prev->num_after, 1833 nodep, nodep->idx, nodep->num_after, 1834 MASK_BITS); 1835 error_detected = true; 1836 break; 1837 } 1838 1839 /* 1840 * When the node has all mask bits set, it shouldn't 1841 * be adjacent to the last bit described by the 1842 * previous node. 1843 */ 1844 if (nodep->mask == ~(mask_t) 0 && 1845 prev->idx + MASK_BITS + prev->num_after == nodep->idx) { 1846 fprintf(stderr, "Current node has mask with " 1847 "all bits set and is adjacent to the " 1848 "previous node,\n" 1849 " prev: %p prev->idx: 0x%lx " 1850 "prev->num_after: 0x%lx\n" 1851 " nodep: %p nodep->idx: 0x%lx " 1852 "nodep->num_after: 0x%lx\n" 1853 " MASK_BITS: %lu", 1854 prev, prev->idx, prev->num_after, 1855 nodep, nodep->idx, nodep->num_after, 1856 MASK_BITS); 1857 1858 error_detected = true; 1859 break; 1860 } 1861 } 1862 } 1863 1864 if (!error_detected) { 1865 /* 1866 * Is sum of bits set in each node equal to the count 1867 * of total bits set. 1868 */ 1869 if (s->num_set != total_bits_set) { 1870 fprintf(stderr, "Number of bits set missmatch,\n" 1871 " s->num_set: 0x%lx total_bits_set: 0x%lx", 1872 s->num_set, total_bits_set); 1873 1874 error_detected = true; 1875 } 1876 } 1877 1878 if (error_detected) { 1879 fputs(" dump_internal:\n", stderr); 1880 sparsebit_dump_internal(stderr, s, 4); 1881 abort(); 1882 } 1883 } 1884 1885 1886 #ifdef FUZZ 1887 /* A simple but effective fuzzing driver. Look for bugs with the help 1888 * of some invariants and of a trivial representation of sparsebit. 1889 * Just use 512 bytes of /dev/zero and /dev/urandom as inputs, and let 1890 * afl-fuzz do the magic. :) 1891 */ 1892 1893 #include <stdlib.h> 1894 #include <assert.h> 1895 1896 struct range { 1897 sparsebit_idx_t first, last; 1898 bool set; 1899 }; 1900 1901 struct sparsebit *s; 1902 struct range ranges[1000]; 1903 int num_ranges; 1904 1905 static bool get_value(sparsebit_idx_t idx) 1906 { 1907 int i; 1908 1909 for (i = num_ranges; --i >= 0; ) 1910 if (ranges[i].first <= idx && idx <= ranges[i].last) 1911 return ranges[i].set; 1912 1913 return false; 1914 } 1915 1916 static void operate(int code, sparsebit_idx_t first, sparsebit_idx_t last) 1917 { 1918 sparsebit_num_t num; 1919 sparsebit_idx_t next; 1920 1921 if (first < last) { 1922 num = last - first + 1; 1923 } else { 1924 num = first - last + 1; 1925 first = last; 1926 last = first + num - 1; 1927 } 1928 1929 switch (code) { 1930 case 0: 1931 sparsebit_set(s, first); 1932 assert(sparsebit_is_set(s, first)); 1933 assert(!sparsebit_is_clear(s, first)); 1934 assert(sparsebit_any_set(s)); 1935 assert(!sparsebit_all_clear(s)); 1936 if (get_value(first)) 1937 return; 1938 if (num_ranges == 1000) 1939 exit(0); 1940 ranges[num_ranges++] = (struct range) 1941 { .first = first, .last = first, .set = true }; 1942 break; 1943 case 1: 1944 sparsebit_clear(s, first); 1945 assert(!sparsebit_is_set(s, first)); 1946 assert(sparsebit_is_clear(s, first)); 1947 assert(sparsebit_any_clear(s)); 1948 assert(!sparsebit_all_set(s)); 1949 if (!get_value(first)) 1950 return; 1951 if (num_ranges == 1000) 1952 exit(0); 1953 ranges[num_ranges++] = (struct range) 1954 { .first = first, .last = first, .set = false }; 1955 break; 1956 case 2: 1957 assert(sparsebit_is_set(s, first) == get_value(first)); 1958 assert(sparsebit_is_clear(s, first) == !get_value(first)); 1959 break; 1960 case 3: 1961 if (sparsebit_any_set(s)) 1962 assert(get_value(sparsebit_first_set(s))); 1963 if (sparsebit_any_clear(s)) 1964 assert(!get_value(sparsebit_first_clear(s))); 1965 sparsebit_set_all(s); 1966 assert(!sparsebit_any_clear(s)); 1967 assert(sparsebit_all_set(s)); 1968 num_ranges = 0; 1969 ranges[num_ranges++] = (struct range) 1970 { .first = 0, .last = ~(sparsebit_idx_t)0, .set = true }; 1971 break; 1972 case 4: 1973 if (sparsebit_any_set(s)) 1974 assert(get_value(sparsebit_first_set(s))); 1975 if (sparsebit_any_clear(s)) 1976 assert(!get_value(sparsebit_first_clear(s))); 1977 sparsebit_clear_all(s); 1978 assert(!sparsebit_any_set(s)); 1979 assert(sparsebit_all_clear(s)); 1980 num_ranges = 0; 1981 break; 1982 case 5: 1983 next = sparsebit_next_set(s, first); 1984 assert(next == 0 || next > first); 1985 assert(next == 0 || get_value(next)); 1986 break; 1987 case 6: 1988 next = sparsebit_next_clear(s, first); 1989 assert(next == 0 || next > first); 1990 assert(next == 0 || !get_value(next)); 1991 break; 1992 case 7: 1993 next = sparsebit_next_clear(s, first); 1994 if (sparsebit_is_set_num(s, first, num)) { 1995 assert(next == 0 || next > last); 1996 if (first) 1997 next = sparsebit_next_set(s, first - 1); 1998 else if (sparsebit_any_set(s)) 1999 next = sparsebit_first_set(s); 2000 else 2001 return; 2002 assert(next == first); 2003 } else { 2004 assert(sparsebit_is_clear(s, first) || next <= last); 2005 } 2006 break; 2007 case 8: 2008 next = sparsebit_next_set(s, first); 2009 if (sparsebit_is_clear_num(s, first, num)) { 2010 assert(next == 0 || next > last); 2011 if (first) 2012 next = sparsebit_next_clear(s, first - 1); 2013 else if (sparsebit_any_clear(s)) 2014 next = sparsebit_first_clear(s); 2015 else 2016 return; 2017 assert(next == first); 2018 } else { 2019 assert(sparsebit_is_set(s, first) || next <= last); 2020 } 2021 break; 2022 case 9: 2023 sparsebit_set_num(s, first, num); 2024 assert(sparsebit_is_set_num(s, first, num)); 2025 assert(!sparsebit_is_clear_num(s, first, num)); 2026 assert(sparsebit_any_set(s)); 2027 assert(!sparsebit_all_clear(s)); 2028 if (num_ranges == 1000) 2029 exit(0); 2030 ranges[num_ranges++] = (struct range) 2031 { .first = first, .last = last, .set = true }; 2032 break; 2033 case 10: 2034 sparsebit_clear_num(s, first, num); 2035 assert(!sparsebit_is_set_num(s, first, num)); 2036 assert(sparsebit_is_clear_num(s, first, num)); 2037 assert(sparsebit_any_clear(s)); 2038 assert(!sparsebit_all_set(s)); 2039 if (num_ranges == 1000) 2040 exit(0); 2041 ranges[num_ranges++] = (struct range) 2042 { .first = first, .last = last, .set = false }; 2043 break; 2044 case 11: 2045 sparsebit_validate_internal(s); 2046 break; 2047 default: 2048 break; 2049 } 2050 } 2051 2052 unsigned char get8(void) 2053 { 2054 int ch; 2055 2056 ch = getchar(); 2057 if (ch == EOF) 2058 exit(0); 2059 return ch; 2060 } 2061 2062 uint64_t get64(void) 2063 { 2064 uint64_t x; 2065 2066 x = get8(); 2067 x = (x << 8) | get8(); 2068 x = (x << 8) | get8(); 2069 x = (x << 8) | get8(); 2070 x = (x << 8) | get8(); 2071 x = (x << 8) | get8(); 2072 x = (x << 8) | get8(); 2073 return (x << 8) | get8(); 2074 } 2075 2076 int main(void) 2077 { 2078 s = sparsebit_alloc(); 2079 for (;;) { 2080 uint8_t op = get8() & 0xf; 2081 uint64_t first = get64(); 2082 uint64_t last = get64(); 2083 2084 operate(op, first, last); 2085 } 2086 } 2087 #endif 2088