1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Sparse bit array
4 *
5 * Copyright (C) 2018, Google LLC.
6 * Copyright (C) 2018, Red Hat, Inc. (code style cleanup and fuzzing driver)
7 *
8 * This library provides functions to support a memory efficient bit array,
9 * with an index size of 2^64. A sparsebit array is allocated through
10 * the use sparsebit_alloc() and free'd via sparsebit_free(),
11 * such as in the following:
12 *
13 * struct sparsebit *s;
14 * s = sparsebit_alloc();
15 * sparsebit_free(&s);
16 *
17 * The struct sparsebit type resolves down to a struct sparsebit.
18 * Note that, sparsebit_free() takes a pointer to the sparsebit
19 * structure. This is so that sparsebit_free() is able to poison
20 * the pointer (e.g. set it to NULL) to the struct sparsebit before
21 * returning to the caller.
22 *
23 * Between the return of sparsebit_alloc() and the call of
24 * sparsebit_free(), there are multiple query and modifying operations
25 * that can be performed on the allocated sparsebit array. All of
26 * these operations take as a parameter the value returned from
27 * sparsebit_alloc() and most also take a bit index. Frequently
28 * used routines include:
29 *
30 * ---- Query Operations
31 * sparsebit_is_set(s, idx)
32 * sparsebit_is_clear(s, idx)
33 * sparsebit_any_set(s)
34 * sparsebit_first_set(s)
35 * sparsebit_next_set(s, prev_idx)
36 *
37 * ---- Modifying Operations
38 * sparsebit_set(s, idx)
39 * sparsebit_clear(s, idx)
40 * sparsebit_set_num(s, idx, num);
41 * sparsebit_clear_num(s, idx, num);
42 *
43 * A common operation, is to itterate over all the bits set in a test
44 * sparsebit array. This can be done via code with the following structure:
45 *
46 * sparsebit_idx_t idx;
47 * if (sparsebit_any_set(s)) {
48 * idx = sparsebit_first_set(s);
49 * do {
50 * ...
51 * idx = sparsebit_next_set(s, idx);
52 * } while (idx != 0);
53 * }
54 *
55 * The index of the first bit set needs to be obtained via
56 * sparsebit_first_set(), because sparsebit_next_set(), needs
57 * the index of the previously set. The sparsebit_idx_t type is
58 * unsigned, so there is no previous index before 0 that is available.
59 * Also, the call to sparsebit_first_set() is not made unless there
60 * is at least 1 bit in the array set. This is because sparsebit_first_set()
61 * aborts if sparsebit_first_set() is called with no bits set.
62 * It is the callers responsibility to assure that the
63 * sparsebit array has at least a single bit set before calling
64 * sparsebit_first_set().
65 *
66 * ==== Implementation Overview ====
67 * For the most part the internal implementation of sparsebit is
68 * opaque to the caller. One important implementation detail that the
69 * caller may need to be aware of is the spatial complexity of the
70 * implementation. This implementation of a sparsebit array is not
71 * only sparse, in that it uses memory proportional to the number of bits
72 * set. It is also efficient in memory usage when most of the bits are
73 * set.
74 *
75 * At a high-level the state of the bit settings are maintained through
76 * the use of a binary-search tree, where each node contains at least
77 * the following members:
78 *
79 * typedef uint64_t sparsebit_idx_t;
80 * typedef uint64_t sparsebit_num_t;
81 *
82 * sparsebit_idx_t idx;
83 * uint32_t mask;
84 * sparsebit_num_t num_after;
85 *
86 * The idx member contains the bit index of the first bit described by this
87 * node, while the mask member stores the setting of the first 32-bits.
88 * The setting of the bit at idx + n, where 0 <= n < 32, is located in the
89 * mask member at 1 << n.
90 *
91 * Nodes are sorted by idx and the bits described by two nodes will never
92 * overlap. The idx member is always aligned to the mask size, i.e. a
93 * multiple of 32.
94 *
95 * Beyond a typical implementation, the nodes in this implementation also
96 * contains a member named num_after. The num_after member holds the
97 * number of bits immediately after the mask bits that are contiguously set.
98 * The use of the num_after member allows this implementation to efficiently
99 * represent cases where most bits are set. For example, the case of all
100 * but the last two bits set, is represented by the following two nodes:
101 *
102 * node 0 - idx: 0x0 mask: 0xffffffff num_after: 0xffffffffffffffc0
103 * node 1 - idx: 0xffffffffffffffe0 mask: 0x3fffffff num_after: 0
104 *
105 * ==== Invariants ====
106 * This implementation usses the following invariants:
107 *
108 * + Node are only used to represent bits that are set.
109 * Nodes with a mask of 0 and num_after of 0 are not allowed.
110 *
111 * + Sum of bits set in all the nodes is equal to the value of
112 * the struct sparsebit_pvt num_set member.
113 *
114 * + The setting of at least one bit is always described in a nodes
115 * mask (mask >= 1).
116 *
117 * + A node with all mask bits set only occurs when the last bit
118 * described by the previous node is not equal to this nodes
119 * starting index - 1. All such occurences of this condition are
120 * avoided by moving the setting of the nodes mask bits into
121 * the previous nodes num_after setting.
122 *
123 * + Node starting index is evenly divisible by the number of bits
124 * within a nodes mask member.
125 *
126 * + Nodes never represent a range of bits that wrap around the
127 * highest supported index.
128 *
129 * (idx + MASK_BITS + num_after - 1) <= ((sparsebit_idx_t) 0) - 1)
130 *
131 * As a consequence of the above, the num_after member of a node
132 * will always be <=:
133 *
134 * maximum_index - nodes_starting_index - number_of_mask_bits
135 *
136 * + Nodes within the binary search tree are sorted based on each
137 * nodes starting index.
138 *
139 * + The range of bits described by any two nodes do not overlap. The
140 * range of bits described by a single node is:
141 *
142 * start: node->idx
143 * end (inclusive): node->idx + MASK_BITS + node->num_after - 1;
144 *
145 * Note, at times these invariants are temporarily violated for a
146 * specific portion of the code. For example, when setting a mask
147 * bit, there is a small delay between when the mask bit is set and the
148 * value in the struct sparsebit_pvt num_set member is updated. Other
149 * temporary violations occur when node_split() is called with a specified
150 * index and assures that a node where its mask represents the bit
151 * at the specified index exists. At times to do this node_split()
152 * must split an existing node into two nodes or create a node that
153 * has no bits set. Such temporary violations must be corrected before
154 * returning to the caller. These corrections are typically performed
155 * by the local function node_reduce().
156 */
157
158 #include "test_util.h"
159 #include "sparsebit.h"
160 #include <limits.h>
161 #include <assert.h>
162
163 #define DUMP_LINE_MAX 100 /* Does not include indent amount */
164
165 typedef uint32_t mask_t;
166 #define MASK_BITS (sizeof(mask_t) * CHAR_BIT)
167
168 struct node {
169 struct node *parent;
170 struct node *left;
171 struct node *right;
172 sparsebit_idx_t idx; /* index of least-significant bit in mask */
173 sparsebit_num_t num_after; /* num contiguously set after mask */
174 mask_t mask;
175 };
176
177 struct sparsebit {
178 /*
179 * Points to root node of the binary search
180 * tree. Equal to NULL when no bits are set in
181 * the entire sparsebit array.
182 */
183 struct node *root;
184
185 /*
186 * A redundant count of the total number of bits set. Used for
187 * diagnostic purposes and to change the time complexity of
188 * sparsebit_num_set() from O(n) to O(1).
189 * Note: Due to overflow, a value of 0 means none or all set.
190 */
191 sparsebit_num_t num_set;
192 };
193
194 /* Returns the number of set bits described by the settings
195 * of the node pointed to by nodep.
196 */
node_num_set(struct node * nodep)197 static sparsebit_num_t node_num_set(struct node *nodep)
198 {
199 return nodep->num_after + __builtin_popcount(nodep->mask);
200 }
201
202 /* Returns a pointer to the node that describes the
203 * lowest bit index.
204 */
node_first(struct sparsebit * s)205 static struct node *node_first(struct sparsebit *s)
206 {
207 struct node *nodep;
208
209 for (nodep = s->root; nodep && nodep->left; nodep = nodep->left)
210 ;
211
212 return nodep;
213 }
214
215 /* Returns a pointer to the node that describes the
216 * lowest bit index > the index of the node pointed to by np.
217 * Returns NULL if no node with a higher index exists.
218 */
node_next(struct sparsebit * s,struct node * np)219 static struct node *node_next(struct sparsebit *s, struct node *np)
220 {
221 struct node *nodep = np;
222
223 /*
224 * If current node has a right child, next node is the left-most
225 * of the right child.
226 */
227 if (nodep->right) {
228 for (nodep = nodep->right; nodep->left; nodep = nodep->left)
229 ;
230 return nodep;
231 }
232
233 /*
234 * No right child. Go up until node is left child of a parent.
235 * That parent is then the next node.
236 */
237 while (nodep->parent && nodep == nodep->parent->right)
238 nodep = nodep->parent;
239
240 return nodep->parent;
241 }
242
243 /* Searches for and returns a pointer to the node that describes the
244 * highest index < the index of the node pointed to by np.
245 * Returns NULL if no node with a lower index exists.
246 */
node_prev(struct sparsebit * s,struct node * np)247 static struct node *node_prev(struct sparsebit *s, struct node *np)
248 {
249 struct node *nodep = np;
250
251 /*
252 * If current node has a left child, next node is the right-most
253 * of the left child.
254 */
255 if (nodep->left) {
256 for (nodep = nodep->left; nodep->right; nodep = nodep->right)
257 ;
258 return (struct node *) nodep;
259 }
260
261 /*
262 * No left child. Go up until node is right child of a parent.
263 * That parent is then the next node.
264 */
265 while (nodep->parent && nodep == nodep->parent->left)
266 nodep = nodep->parent;
267
268 return (struct node *) nodep->parent;
269 }
270
271
272 /* Allocates space to hold a copy of the node sub-tree pointed to by
273 * subtree and duplicates the bit settings to the newly allocated nodes.
274 * Returns the newly allocated copy of subtree.
275 */
node_copy_subtree(struct node * subtree)276 static struct node *node_copy_subtree(struct node *subtree)
277 {
278 struct node *root;
279
280 /* Duplicate the node at the root of the subtree */
281 root = calloc(1, sizeof(*root));
282 if (!root) {
283 perror("calloc");
284 abort();
285 }
286
287 root->idx = subtree->idx;
288 root->mask = subtree->mask;
289 root->num_after = subtree->num_after;
290
291 /* As needed, recursively duplicate the left and right subtrees */
292 if (subtree->left) {
293 root->left = node_copy_subtree(subtree->left);
294 root->left->parent = root;
295 }
296
297 if (subtree->right) {
298 root->right = node_copy_subtree(subtree->right);
299 root->right->parent = root;
300 }
301
302 return root;
303 }
304
305 /* Searches for and returns a pointer to the node that describes the setting
306 * of the bit given by idx. A node describes the setting of a bit if its
307 * index is within the bits described by the mask bits or the number of
308 * contiguous bits set after the mask. Returns NULL if there is no such node.
309 */
node_find(struct sparsebit * s,sparsebit_idx_t idx)310 static struct node *node_find(struct sparsebit *s, sparsebit_idx_t idx)
311 {
312 struct node *nodep;
313
314 /* Find the node that describes the setting of the bit at idx */
315 for (nodep = s->root; nodep;
316 nodep = nodep->idx > idx ? nodep->left : nodep->right) {
317 if (idx >= nodep->idx &&
318 idx <= nodep->idx + MASK_BITS + nodep->num_after - 1)
319 break;
320 }
321
322 return nodep;
323 }
324
325 /* Entry Requirements:
326 * + A node that describes the setting of idx is not already present.
327 *
328 * Adds a new node to describe the setting of the bit at the index given
329 * by idx. Returns a pointer to the newly added node.
330 *
331 * TODO(lhuemill): Degenerate cases causes the tree to get unbalanced.
332 */
node_add(struct sparsebit * s,sparsebit_idx_t idx)333 static struct node *node_add(struct sparsebit *s, sparsebit_idx_t idx)
334 {
335 struct node *nodep, *parentp, *prev;
336
337 /* Allocate and initialize the new node. */
338 nodep = calloc(1, sizeof(*nodep));
339 if (!nodep) {
340 perror("calloc");
341 abort();
342 }
343
344 nodep->idx = idx & -MASK_BITS;
345
346 /* If no nodes, set it up as the root node. */
347 if (!s->root) {
348 s->root = nodep;
349 return nodep;
350 }
351
352 /*
353 * Find the parent where the new node should be attached
354 * and add the node there.
355 */
356 parentp = s->root;
357 while (true) {
358 if (idx < parentp->idx) {
359 if (!parentp->left) {
360 parentp->left = nodep;
361 nodep->parent = parentp;
362 break;
363 }
364 parentp = parentp->left;
365 } else {
366 assert(idx > parentp->idx + MASK_BITS + parentp->num_after - 1);
367 if (!parentp->right) {
368 parentp->right = nodep;
369 nodep->parent = parentp;
370 break;
371 }
372 parentp = parentp->right;
373 }
374 }
375
376 /*
377 * Does num_after bits of previous node overlap with the mask
378 * of the new node? If so set the bits in the new nodes mask
379 * and reduce the previous nodes num_after.
380 */
381 prev = node_prev(s, nodep);
382 while (prev && prev->idx + MASK_BITS + prev->num_after - 1 >= nodep->idx) {
383 unsigned int n1 = (prev->idx + MASK_BITS + prev->num_after - 1)
384 - nodep->idx;
385 assert(prev->num_after > 0);
386 assert(n1 < MASK_BITS);
387 assert(!(nodep->mask & (1 << n1)));
388 nodep->mask |= (1 << n1);
389 prev->num_after--;
390 }
391
392 return nodep;
393 }
394
395 /* Returns whether all the bits in the sparsebit array are set. */
sparsebit_all_set(struct sparsebit * s)396 bool sparsebit_all_set(struct sparsebit *s)
397 {
398 /*
399 * If any nodes there must be at least one bit set. Only case
400 * where a bit is set and total num set is 0, is when all bits
401 * are set.
402 */
403 return s->root && s->num_set == 0;
404 }
405
406 /* Clears all bits described by the node pointed to by nodep, then
407 * removes the node.
408 */
node_rm(struct sparsebit * s,struct node * nodep)409 static void node_rm(struct sparsebit *s, struct node *nodep)
410 {
411 struct node *tmp;
412 sparsebit_num_t num_set;
413
414 num_set = node_num_set(nodep);
415 assert(s->num_set >= num_set || sparsebit_all_set(s));
416 s->num_set -= node_num_set(nodep);
417
418 /* Have both left and right child */
419 if (nodep->left && nodep->right) {
420 /*
421 * Move left children to the leftmost leaf node
422 * of the right child.
423 */
424 for (tmp = nodep->right; tmp->left; tmp = tmp->left)
425 ;
426 tmp->left = nodep->left;
427 nodep->left = NULL;
428 tmp->left->parent = tmp;
429 }
430
431 /* Left only child */
432 if (nodep->left) {
433 if (!nodep->parent) {
434 s->root = nodep->left;
435 nodep->left->parent = NULL;
436 } else {
437 nodep->left->parent = nodep->parent;
438 if (nodep == nodep->parent->left)
439 nodep->parent->left = nodep->left;
440 else {
441 assert(nodep == nodep->parent->right);
442 nodep->parent->right = nodep->left;
443 }
444 }
445
446 nodep->parent = nodep->left = nodep->right = NULL;
447 free(nodep);
448
449 return;
450 }
451
452
453 /* Right only child */
454 if (nodep->right) {
455 if (!nodep->parent) {
456 s->root = nodep->right;
457 nodep->right->parent = NULL;
458 } else {
459 nodep->right->parent = nodep->parent;
460 if (nodep == nodep->parent->left)
461 nodep->parent->left = nodep->right;
462 else {
463 assert(nodep == nodep->parent->right);
464 nodep->parent->right = nodep->right;
465 }
466 }
467
468 nodep->parent = nodep->left = nodep->right = NULL;
469 free(nodep);
470
471 return;
472 }
473
474 /* Leaf Node */
475 if (!nodep->parent) {
476 s->root = NULL;
477 } else {
478 if (nodep->parent->left == nodep)
479 nodep->parent->left = NULL;
480 else {
481 assert(nodep == nodep->parent->right);
482 nodep->parent->right = NULL;
483 }
484 }
485
486 nodep->parent = nodep->left = nodep->right = NULL;
487 free(nodep);
488
489 return;
490 }
491
492 /* Splits the node containing the bit at idx so that there is a node
493 * that starts at the specified index. If no such node exists, a new
494 * node at the specified index is created. Returns the new node.
495 *
496 * idx must start of a mask boundary.
497 */
node_split(struct sparsebit * s,sparsebit_idx_t idx)498 static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx)
499 {
500 struct node *nodep1, *nodep2;
501 sparsebit_idx_t offset;
502 sparsebit_num_t orig_num_after;
503
504 assert(!(idx % MASK_BITS));
505
506 /*
507 * Is there a node that describes the setting of idx?
508 * If not, add it.
509 */
510 nodep1 = node_find(s, idx);
511 if (!nodep1)
512 return node_add(s, idx);
513
514 /*
515 * All done if the starting index of the node is where the
516 * split should occur.
517 */
518 if (nodep1->idx == idx)
519 return nodep1;
520
521 /*
522 * Split point not at start of mask, so it must be part of
523 * bits described by num_after.
524 */
525
526 /*
527 * Calculate offset within num_after for where the split is
528 * to occur.
529 */
530 offset = idx - (nodep1->idx + MASK_BITS);
531 orig_num_after = nodep1->num_after;
532
533 /*
534 * Add a new node to describe the bits starting at
535 * the split point.
536 */
537 nodep1->num_after = offset;
538 nodep2 = node_add(s, idx);
539
540 /* Move bits after the split point into the new node */
541 nodep2->num_after = orig_num_after - offset;
542 if (nodep2->num_after >= MASK_BITS) {
543 nodep2->mask = ~(mask_t) 0;
544 nodep2->num_after -= MASK_BITS;
545 } else {
546 nodep2->mask = (1 << nodep2->num_after) - 1;
547 nodep2->num_after = 0;
548 }
549
550 return nodep2;
551 }
552
553 /* Iteratively reduces the node pointed to by nodep and its adjacent
554 * nodes into a more compact form. For example, a node with a mask with
555 * all bits set adjacent to a previous node, will get combined into a
556 * single node with an increased num_after setting.
557 *
558 * After each reduction, a further check is made to see if additional
559 * reductions are possible with the new previous and next nodes. Note,
560 * a search for a reduction is only done across the nodes nearest nodep
561 * and those that became part of a reduction. Reductions beyond nodep
562 * and the adjacent nodes that are reduced are not discovered. It is the
563 * responsibility of the caller to pass a nodep that is within one node
564 * of each possible reduction.
565 *
566 * This function does not fix the temporary violation of all invariants.
567 * For example it does not fix the case where the bit settings described
568 * by two or more nodes overlap. Such a violation introduces the potential
569 * complication of a bit setting for a specific index having different settings
570 * in different nodes. This would then introduce the further complication
571 * of which node has the correct setting of the bit and thus such conditions
572 * are not allowed.
573 *
574 * This function is designed to fix invariant violations that are introduced
575 * by node_split() and by changes to the nodes mask or num_after members.
576 * For example, when setting a bit within a nodes mask, the function that
577 * sets the bit doesn't have to worry about whether the setting of that
578 * bit caused the mask to have leading only or trailing only bits set.
579 * Instead, the function can call node_reduce(), with nodep equal to the
580 * node address that it set a mask bit in, and node_reduce() will notice
581 * the cases of leading or trailing only bits and that there is an
582 * adjacent node that the bit settings could be merged into.
583 *
584 * This implementation specifically detects and corrects violation of the
585 * following invariants:
586 *
587 * + Node are only used to represent bits that are set.
588 * Nodes with a mask of 0 and num_after of 0 are not allowed.
589 *
590 * + The setting of at least one bit is always described in a nodes
591 * mask (mask >= 1).
592 *
593 * + A node with all mask bits set only occurs when the last bit
594 * described by the previous node is not equal to this nodes
595 * starting index - 1. All such occurences of this condition are
596 * avoided by moving the setting of the nodes mask bits into
597 * the previous nodes num_after setting.
598 */
node_reduce(struct sparsebit * s,struct node * nodep)599 static void node_reduce(struct sparsebit *s, struct node *nodep)
600 {
601 bool reduction_performed;
602
603 do {
604 reduction_performed = false;
605 struct node *prev, *next, *tmp;
606
607 /* 1) Potential reductions within the current node. */
608
609 /* Nodes with all bits cleared may be removed. */
610 if (nodep->mask == 0 && nodep->num_after == 0) {
611 /*
612 * About to remove the node pointed to by
613 * nodep, which normally would cause a problem
614 * for the next pass through the reduction loop,
615 * because the node at the starting point no longer
616 * exists. This potential problem is handled
617 * by first remembering the location of the next
618 * or previous nodes. Doesn't matter which, because
619 * once the node at nodep is removed, there will be
620 * no other nodes between prev and next.
621 *
622 * Note, the checks performed on nodep against both
623 * both prev and next both check for an adjacent
624 * node that can be reduced into a single node. As
625 * such, after removing the node at nodep, doesn't
626 * matter whether the nodep for the next pass
627 * through the loop is equal to the previous pass
628 * prev or next node. Either way, on the next pass
629 * the one not selected will become either the
630 * prev or next node.
631 */
632 tmp = node_next(s, nodep);
633 if (!tmp)
634 tmp = node_prev(s, nodep);
635
636 node_rm(s, nodep);
637
638 nodep = tmp;
639 reduction_performed = true;
640 continue;
641 }
642
643 /*
644 * When the mask is 0, can reduce the amount of num_after
645 * bits by moving the initial num_after bits into the mask.
646 */
647 if (nodep->mask == 0) {
648 assert(nodep->num_after != 0);
649 assert(nodep->idx + MASK_BITS > nodep->idx);
650
651 nodep->idx += MASK_BITS;
652
653 if (nodep->num_after >= MASK_BITS) {
654 nodep->mask = ~0;
655 nodep->num_after -= MASK_BITS;
656 } else {
657 nodep->mask = (1u << nodep->num_after) - 1;
658 nodep->num_after = 0;
659 }
660
661 reduction_performed = true;
662 continue;
663 }
664
665 /*
666 * 2) Potential reductions between the current and
667 * previous nodes.
668 */
669 prev = node_prev(s, nodep);
670 if (prev) {
671 sparsebit_idx_t prev_highest_bit;
672
673 /* Nodes with no bits set can be removed. */
674 if (prev->mask == 0 && prev->num_after == 0) {
675 node_rm(s, prev);
676
677 reduction_performed = true;
678 continue;
679 }
680
681 /*
682 * All mask bits set and previous node has
683 * adjacent index.
684 */
685 if (nodep->mask + 1 == 0 &&
686 prev->idx + MASK_BITS == nodep->idx) {
687 prev->num_after += MASK_BITS + nodep->num_after;
688 nodep->mask = 0;
689 nodep->num_after = 0;
690
691 reduction_performed = true;
692 continue;
693 }
694
695 /*
696 * Is node adjacent to previous node and the node
697 * contains a single contiguous range of bits
698 * starting from the beginning of the mask?
699 */
700 prev_highest_bit = prev->idx + MASK_BITS - 1 + prev->num_after;
701 if (prev_highest_bit + 1 == nodep->idx &&
702 (nodep->mask | (nodep->mask >> 1)) == nodep->mask) {
703 /*
704 * How many contiguous bits are there?
705 * Is equal to the total number of set
706 * bits, due to an earlier check that
707 * there is a single contiguous range of
708 * set bits.
709 */
710 unsigned int num_contiguous
711 = __builtin_popcount(nodep->mask);
712 assert((num_contiguous > 0) &&
713 ((1ULL << num_contiguous) - 1) == nodep->mask);
714
715 prev->num_after += num_contiguous;
716 nodep->mask = 0;
717
718 /*
719 * For predictable performance, handle special
720 * case where all mask bits are set and there
721 * is a non-zero num_after setting. This code
722 * is functionally correct without the following
723 * conditionalized statements, but without them
724 * the value of num_after is only reduced by
725 * the number of mask bits per pass. There are
726 * cases where num_after can be close to 2^64.
727 * Without this code it could take nearly
728 * (2^64) / 32 passes to perform the full
729 * reduction.
730 */
731 if (num_contiguous == MASK_BITS) {
732 prev->num_after += nodep->num_after;
733 nodep->num_after = 0;
734 }
735
736 reduction_performed = true;
737 continue;
738 }
739 }
740
741 /*
742 * 3) Potential reductions between the current and
743 * next nodes.
744 */
745 next = node_next(s, nodep);
746 if (next) {
747 /* Nodes with no bits set can be removed. */
748 if (next->mask == 0 && next->num_after == 0) {
749 node_rm(s, next);
750 reduction_performed = true;
751 continue;
752 }
753
754 /*
755 * Is next node index adjacent to current node
756 * and has a mask with all bits set?
757 */
758 if (next->idx == nodep->idx + MASK_BITS + nodep->num_after &&
759 next->mask == ~(mask_t) 0) {
760 nodep->num_after += MASK_BITS;
761 next->mask = 0;
762 nodep->num_after += next->num_after;
763 next->num_after = 0;
764
765 node_rm(s, next);
766 next = NULL;
767
768 reduction_performed = true;
769 continue;
770 }
771 }
772 } while (nodep && reduction_performed);
773 }
774
775 /* Returns whether the bit at the index given by idx, within the
776 * sparsebit array is set or not.
777 */
sparsebit_is_set(struct sparsebit * s,sparsebit_idx_t idx)778 bool sparsebit_is_set(struct sparsebit *s, sparsebit_idx_t idx)
779 {
780 struct node *nodep;
781
782 /* Find the node that describes the setting of the bit at idx */
783 for (nodep = s->root; nodep;
784 nodep = nodep->idx > idx ? nodep->left : nodep->right)
785 if (idx >= nodep->idx &&
786 idx <= nodep->idx + MASK_BITS + nodep->num_after - 1)
787 goto have_node;
788
789 return false;
790
791 have_node:
792 /* Bit is set if it is any of the bits described by num_after */
793 if (nodep->num_after && idx >= nodep->idx + MASK_BITS)
794 return true;
795
796 /* Is the corresponding mask bit set */
797 assert(idx >= nodep->idx && idx - nodep->idx < MASK_BITS);
798 return !!(nodep->mask & (1 << (idx - nodep->idx)));
799 }
800
801 /* Within the sparsebit array pointed to by s, sets the bit
802 * at the index given by idx.
803 */
bit_set(struct sparsebit * s,sparsebit_idx_t idx)804 static void bit_set(struct sparsebit *s, sparsebit_idx_t idx)
805 {
806 struct node *nodep;
807
808 /* Skip bits that are already set */
809 if (sparsebit_is_set(s, idx))
810 return;
811
812 /*
813 * Get a node where the bit at idx is described by the mask.
814 * The node_split will also create a node, if there isn't
815 * already a node that describes the setting of bit.
816 */
817 nodep = node_split(s, idx & -MASK_BITS);
818
819 /* Set the bit within the nodes mask */
820 assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1);
821 assert(!(nodep->mask & (1 << (idx - nodep->idx))));
822 nodep->mask |= 1 << (idx - nodep->idx);
823 s->num_set++;
824
825 node_reduce(s, nodep);
826 }
827
828 /* Within the sparsebit array pointed to by s, clears the bit
829 * at the index given by idx.
830 */
bit_clear(struct sparsebit * s,sparsebit_idx_t idx)831 static void bit_clear(struct sparsebit *s, sparsebit_idx_t idx)
832 {
833 struct node *nodep;
834
835 /* Skip bits that are already cleared */
836 if (!sparsebit_is_set(s, idx))
837 return;
838
839 /* Is there a node that describes the setting of this bit? */
840 nodep = node_find(s, idx);
841 if (!nodep)
842 return;
843
844 /*
845 * If a num_after bit, split the node, so that the bit is
846 * part of a node mask.
847 */
848 if (idx >= nodep->idx + MASK_BITS)
849 nodep = node_split(s, idx & -MASK_BITS);
850
851 /*
852 * After node_split above, bit at idx should be within the mask.
853 * Clear that bit.
854 */
855 assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1);
856 assert(nodep->mask & (1 << (idx - nodep->idx)));
857 nodep->mask &= ~(1 << (idx - nodep->idx));
858 assert(s->num_set > 0 || sparsebit_all_set(s));
859 s->num_set--;
860
861 node_reduce(s, nodep);
862 }
863
864 /* Recursively dumps to the FILE stream given by stream the contents
865 * of the sub-tree of nodes pointed to by nodep. Each line of output
866 * is prefixed by the number of spaces given by indent. On each
867 * recursion, the indent amount is increased by 2. This causes nodes
868 * at each level deeper into the binary search tree to be displayed
869 * with a greater indent.
870 */
dump_nodes(FILE * stream,struct node * nodep,unsigned int indent)871 static void dump_nodes(FILE *stream, struct node *nodep,
872 unsigned int indent)
873 {
874 char *node_type;
875
876 /* Dump contents of node */
877 if (!nodep->parent)
878 node_type = "root";
879 else if (nodep == nodep->parent->left)
880 node_type = "left";
881 else {
882 assert(nodep == nodep->parent->right);
883 node_type = "right";
884 }
885 fprintf(stream, "%*s---- %s nodep: %p\n", indent, "", node_type, nodep);
886 fprintf(stream, "%*s parent: %p left: %p right: %p\n", indent, "",
887 nodep->parent, nodep->left, nodep->right);
888 fprintf(stream, "%*s idx: 0x%lx mask: 0x%x num_after: 0x%lx\n",
889 indent, "", nodep->idx, nodep->mask, nodep->num_after);
890
891 /* If present, dump contents of left child nodes */
892 if (nodep->left)
893 dump_nodes(stream, nodep->left, indent + 2);
894
895 /* If present, dump contents of right child nodes */
896 if (nodep->right)
897 dump_nodes(stream, nodep->right, indent + 2);
898 }
899
node_first_set(struct node * nodep,int start)900 static inline sparsebit_idx_t node_first_set(struct node *nodep, int start)
901 {
902 mask_t leading = (mask_t)1 << start;
903 int n1 = __builtin_ctz(nodep->mask & -leading);
904
905 return nodep->idx + n1;
906 }
907
node_first_clear(struct node * nodep,int start)908 static inline sparsebit_idx_t node_first_clear(struct node *nodep, int start)
909 {
910 mask_t leading = (mask_t)1 << start;
911 int n1 = __builtin_ctz(~nodep->mask & -leading);
912
913 return nodep->idx + n1;
914 }
915
916 /* Dumps to the FILE stream specified by stream, the implementation dependent
917 * internal state of s. Each line of output is prefixed with the number
918 * of spaces given by indent. The output is completely implementation
919 * dependent and subject to change. Output from this function should only
920 * be used for diagnostic purposes. For example, this function can be
921 * used by test cases after they detect an unexpected condition, as a means
922 * to capture diagnostic information.
923 */
sparsebit_dump_internal(FILE * stream,struct sparsebit * s,unsigned int indent)924 static void sparsebit_dump_internal(FILE *stream, struct sparsebit *s,
925 unsigned int indent)
926 {
927 /* Dump the contents of s */
928 fprintf(stream, "%*sroot: %p\n", indent, "", s->root);
929 fprintf(stream, "%*snum_set: 0x%lx\n", indent, "", s->num_set);
930
931 if (s->root)
932 dump_nodes(stream, s->root, indent);
933 }
934
935 /* Allocates and returns a new sparsebit array. The initial state
936 * of the newly allocated sparsebit array has all bits cleared.
937 */
sparsebit_alloc(void)938 struct sparsebit *sparsebit_alloc(void)
939 {
940 struct sparsebit *s;
941
942 /* Allocate top level structure. */
943 s = calloc(1, sizeof(*s));
944 if (!s) {
945 perror("calloc");
946 abort();
947 }
948
949 return s;
950 }
951
952 /* Frees the implementation dependent data for the sparsebit array
953 * pointed to by s and poisons the pointer to that data.
954 */
sparsebit_free(struct sparsebit ** sbitp)955 void sparsebit_free(struct sparsebit **sbitp)
956 {
957 struct sparsebit *s = *sbitp;
958
959 if (!s)
960 return;
961
962 sparsebit_clear_all(s);
963 free(s);
964 *sbitp = NULL;
965 }
966
967 /* Makes a copy of the sparsebit array given by s, to the sparsebit
968 * array given by d. Note, d must have already been allocated via
969 * sparsebit_alloc(). It can though already have bits set, which
970 * if different from src will be cleared.
971 */
sparsebit_copy(struct sparsebit * d,struct sparsebit * s)972 void sparsebit_copy(struct sparsebit *d, struct sparsebit *s)
973 {
974 /* First clear any bits already set in the destination */
975 sparsebit_clear_all(d);
976
977 if (s->root) {
978 d->root = node_copy_subtree(s->root);
979 d->num_set = s->num_set;
980 }
981 }
982
983 /* Returns whether num consecutive bits starting at idx are all set. */
sparsebit_is_set_num(struct sparsebit * s,sparsebit_idx_t idx,sparsebit_num_t num)984 bool sparsebit_is_set_num(struct sparsebit *s,
985 sparsebit_idx_t idx, sparsebit_num_t num)
986 {
987 sparsebit_idx_t next_cleared;
988
989 assert(num > 0);
990 assert(idx + num - 1 >= idx);
991
992 /* With num > 0, the first bit must be set. */
993 if (!sparsebit_is_set(s, idx))
994 return false;
995
996 /* Find the next cleared bit */
997 next_cleared = sparsebit_next_clear(s, idx);
998
999 /*
1000 * If no cleared bits beyond idx, then there are at least num
1001 * set bits. idx + num doesn't wrap. Otherwise check if
1002 * there are enough set bits between idx and the next cleared bit.
1003 */
1004 return next_cleared == 0 || next_cleared - idx >= num;
1005 }
1006
1007 /* Returns whether the bit at the index given by idx. */
sparsebit_is_clear(struct sparsebit * s,sparsebit_idx_t idx)1008 bool sparsebit_is_clear(struct sparsebit *s,
1009 sparsebit_idx_t idx)
1010 {
1011 return !sparsebit_is_set(s, idx);
1012 }
1013
1014 /* Returns whether num consecutive bits starting at idx are all cleared. */
sparsebit_is_clear_num(struct sparsebit * s,sparsebit_idx_t idx,sparsebit_num_t num)1015 bool sparsebit_is_clear_num(struct sparsebit *s,
1016 sparsebit_idx_t idx, sparsebit_num_t num)
1017 {
1018 sparsebit_idx_t next_set;
1019
1020 assert(num > 0);
1021 assert(idx + num - 1 >= idx);
1022
1023 /* With num > 0, the first bit must be cleared. */
1024 if (!sparsebit_is_clear(s, idx))
1025 return false;
1026
1027 /* Find the next set bit */
1028 next_set = sparsebit_next_set(s, idx);
1029
1030 /*
1031 * If no set bits beyond idx, then there are at least num
1032 * cleared bits. idx + num doesn't wrap. Otherwise check if
1033 * there are enough cleared bits between idx and the next set bit.
1034 */
1035 return next_set == 0 || next_set - idx >= num;
1036 }
1037
1038 /* Returns the total number of bits set. Note: 0 is also returned for
1039 * the case of all bits set. This is because with all bits set, there
1040 * is 1 additional bit set beyond what can be represented in the return
1041 * value. Use sparsebit_any_set(), instead of sparsebit_num_set() > 0,
1042 * to determine if the sparsebit array has any bits set.
1043 */
sparsebit_num_set(struct sparsebit * s)1044 sparsebit_num_t sparsebit_num_set(struct sparsebit *s)
1045 {
1046 return s->num_set;
1047 }
1048
1049 /* Returns whether any bit is set in the sparsebit array. */
sparsebit_any_set(struct sparsebit * s)1050 bool sparsebit_any_set(struct sparsebit *s)
1051 {
1052 /*
1053 * Nodes only describe set bits. If any nodes then there
1054 * is at least 1 bit set.
1055 */
1056 if (!s->root)
1057 return false;
1058
1059 /*
1060 * Every node should have a non-zero mask. For now will
1061 * just assure that the root node has a non-zero mask,
1062 * which is a quick check that at least 1 bit is set.
1063 */
1064 assert(s->root->mask != 0);
1065 assert(s->num_set > 0 ||
1066 (s->root->num_after == ((sparsebit_num_t) 0) - MASK_BITS &&
1067 s->root->mask == ~(mask_t) 0));
1068
1069 return true;
1070 }
1071
1072 /* Returns whether all the bits in the sparsebit array are cleared. */
sparsebit_all_clear(struct sparsebit * s)1073 bool sparsebit_all_clear(struct sparsebit *s)
1074 {
1075 return !sparsebit_any_set(s);
1076 }
1077
1078 /* Returns whether all the bits in the sparsebit array are set. */
sparsebit_any_clear(struct sparsebit * s)1079 bool sparsebit_any_clear(struct sparsebit *s)
1080 {
1081 return !sparsebit_all_set(s);
1082 }
1083
1084 /* Returns the index of the first set bit. Abort if no bits are set.
1085 */
sparsebit_first_set(struct sparsebit * s)1086 sparsebit_idx_t sparsebit_first_set(struct sparsebit *s)
1087 {
1088 struct node *nodep;
1089
1090 /* Validate at least 1 bit is set */
1091 assert(sparsebit_any_set(s));
1092
1093 nodep = node_first(s);
1094 return node_first_set(nodep, 0);
1095 }
1096
1097 /* Returns the index of the first cleared bit. Abort if
1098 * no bits are cleared.
1099 */
sparsebit_first_clear(struct sparsebit * s)1100 sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s)
1101 {
1102 struct node *nodep1, *nodep2;
1103
1104 /* Validate at least 1 bit is cleared. */
1105 assert(sparsebit_any_clear(s));
1106
1107 /* If no nodes or first node index > 0 then lowest cleared is 0 */
1108 nodep1 = node_first(s);
1109 if (!nodep1 || nodep1->idx > 0)
1110 return 0;
1111
1112 /* Does the mask in the first node contain any cleared bits. */
1113 if (nodep1->mask != ~(mask_t) 0)
1114 return node_first_clear(nodep1, 0);
1115
1116 /*
1117 * All mask bits set in first node. If there isn't a second node
1118 * then the first cleared bit is the first bit after the bits
1119 * described by the first node.
1120 */
1121 nodep2 = node_next(s, nodep1);
1122 if (!nodep2) {
1123 /*
1124 * No second node. First cleared bit is first bit beyond
1125 * bits described by first node.
1126 */
1127 assert(nodep1->mask == ~(mask_t) 0);
1128 assert(nodep1->idx + MASK_BITS + nodep1->num_after != (sparsebit_idx_t) 0);
1129 return nodep1->idx + MASK_BITS + nodep1->num_after;
1130 }
1131
1132 /*
1133 * There is a second node.
1134 * If it is not adjacent to the first node, then there is a gap
1135 * of cleared bits between the nodes, and the first cleared bit
1136 * is the first bit within the gap.
1137 */
1138 if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx)
1139 return nodep1->idx + MASK_BITS + nodep1->num_after;
1140
1141 /*
1142 * Second node is adjacent to the first node.
1143 * Because it is adjacent, its mask should be non-zero. If all
1144 * its mask bits are set, then with it being adjacent, it should
1145 * have had the mask bits moved into the num_after setting of the
1146 * previous node.
1147 */
1148 return node_first_clear(nodep2, 0);
1149 }
1150
1151 /* Returns index of next bit set within s after the index given by prev.
1152 * Returns 0 if there are no bits after prev that are set.
1153 */
sparsebit_next_set(struct sparsebit * s,sparsebit_idx_t prev)1154 sparsebit_idx_t sparsebit_next_set(struct sparsebit *s,
1155 sparsebit_idx_t prev)
1156 {
1157 sparsebit_idx_t lowest_possible = prev + 1;
1158 sparsebit_idx_t start;
1159 struct node *nodep;
1160
1161 /* A bit after the highest index can't be set. */
1162 if (lowest_possible == 0)
1163 return 0;
1164
1165 /*
1166 * Find the leftmost 'candidate' overlapping or to the right
1167 * of lowest_possible.
1168 */
1169 struct node *candidate = NULL;
1170
1171 /* True iff lowest_possible is within candidate */
1172 bool contains = false;
1173
1174 /*
1175 * Find node that describes setting of bit at lowest_possible.
1176 * If such a node doesn't exist, find the node with the lowest
1177 * starting index that is > lowest_possible.
1178 */
1179 for (nodep = s->root; nodep;) {
1180 if ((nodep->idx + MASK_BITS + nodep->num_after - 1)
1181 >= lowest_possible) {
1182 candidate = nodep;
1183 if (candidate->idx <= lowest_possible) {
1184 contains = true;
1185 break;
1186 }
1187 nodep = nodep->left;
1188 } else {
1189 nodep = nodep->right;
1190 }
1191 }
1192 if (!candidate)
1193 return 0;
1194
1195 assert(candidate->mask != 0);
1196
1197 /* Does the candidate node describe the setting of lowest_possible? */
1198 if (!contains) {
1199 /*
1200 * Candidate doesn't describe setting of bit at lowest_possible.
1201 * Candidate points to the first node with a starting index
1202 * > lowest_possible.
1203 */
1204 assert(candidate->idx > lowest_possible);
1205
1206 return node_first_set(candidate, 0);
1207 }
1208
1209 /*
1210 * Candidate describes setting of bit at lowest_possible.
1211 * Note: although the node describes the setting of the bit
1212 * at lowest_possible, its possible that its setting and the
1213 * setting of all latter bits described by this node are 0.
1214 * For now, just handle the cases where this node describes
1215 * a bit at or after an index of lowest_possible that is set.
1216 */
1217 start = lowest_possible - candidate->idx;
1218
1219 if (start < MASK_BITS && candidate->mask >= (1 << start))
1220 return node_first_set(candidate, start);
1221
1222 if (candidate->num_after) {
1223 sparsebit_idx_t first_num_after_idx = candidate->idx + MASK_BITS;
1224
1225 return lowest_possible < first_num_after_idx
1226 ? first_num_after_idx : lowest_possible;
1227 }
1228
1229 /*
1230 * Although candidate node describes setting of bit at
1231 * the index of lowest_possible, all bits at that index and
1232 * latter that are described by candidate are cleared. With
1233 * this, the next bit is the first bit in the next node, if
1234 * such a node exists. If a next node doesn't exist, then
1235 * there is no next set bit.
1236 */
1237 candidate = node_next(s, candidate);
1238 if (!candidate)
1239 return 0;
1240
1241 return node_first_set(candidate, 0);
1242 }
1243
1244 /* Returns index of next bit cleared within s after the index given by prev.
1245 * Returns 0 if there are no bits after prev that are cleared.
1246 */
sparsebit_next_clear(struct sparsebit * s,sparsebit_idx_t prev)1247 sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s,
1248 sparsebit_idx_t prev)
1249 {
1250 sparsebit_idx_t lowest_possible = prev + 1;
1251 sparsebit_idx_t idx;
1252 struct node *nodep1, *nodep2;
1253
1254 /* A bit after the highest index can't be set. */
1255 if (lowest_possible == 0)
1256 return 0;
1257
1258 /*
1259 * Does a node describing the setting of lowest_possible exist?
1260 * If not, the bit at lowest_possible is cleared.
1261 */
1262 nodep1 = node_find(s, lowest_possible);
1263 if (!nodep1)
1264 return lowest_possible;
1265
1266 /* Does a mask bit in node 1 describe the next cleared bit. */
1267 for (idx = lowest_possible - nodep1->idx; idx < MASK_BITS; idx++)
1268 if (!(nodep1->mask & (1 << idx)))
1269 return nodep1->idx + idx;
1270
1271 /*
1272 * Next cleared bit is not described by node 1. If there
1273 * isn't a next node, then next cleared bit is described
1274 * by bit after the bits described by the first node.
1275 */
1276 nodep2 = node_next(s, nodep1);
1277 if (!nodep2)
1278 return nodep1->idx + MASK_BITS + nodep1->num_after;
1279
1280 /*
1281 * There is a second node.
1282 * If it is not adjacent to the first node, then there is a gap
1283 * of cleared bits between the nodes, and the next cleared bit
1284 * is the first bit within the gap.
1285 */
1286 if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx)
1287 return nodep1->idx + MASK_BITS + nodep1->num_after;
1288
1289 /*
1290 * Second node is adjacent to the first node.
1291 * Because it is adjacent, its mask should be non-zero. If all
1292 * its mask bits are set, then with it being adjacent, it should
1293 * have had the mask bits moved into the num_after setting of the
1294 * previous node.
1295 */
1296 return node_first_clear(nodep2, 0);
1297 }
1298
1299 /* Starting with the index 1 greater than the index given by start, finds
1300 * and returns the index of the first sequence of num consecutively set
1301 * bits. Returns a value of 0 of no such sequence exists.
1302 */
sparsebit_next_set_num(struct sparsebit * s,sparsebit_idx_t start,sparsebit_num_t num)1303 sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s,
1304 sparsebit_idx_t start, sparsebit_num_t num)
1305 {
1306 sparsebit_idx_t idx;
1307
1308 assert(num >= 1);
1309
1310 for (idx = sparsebit_next_set(s, start);
1311 idx != 0 && idx + num - 1 >= idx;
1312 idx = sparsebit_next_set(s, idx)) {
1313 assert(sparsebit_is_set(s, idx));
1314
1315 /*
1316 * Does the sequence of bits starting at idx consist of
1317 * num set bits?
1318 */
1319 if (sparsebit_is_set_num(s, idx, num))
1320 return idx;
1321
1322 /*
1323 * Sequence of set bits at idx isn't large enough.
1324 * Skip this entire sequence of set bits.
1325 */
1326 idx = sparsebit_next_clear(s, idx);
1327 if (idx == 0)
1328 return 0;
1329 }
1330
1331 return 0;
1332 }
1333
1334 /* Starting with the index 1 greater than the index given by start, finds
1335 * and returns the index of the first sequence of num consecutively cleared
1336 * bits. Returns a value of 0 of no such sequence exists.
1337 */
sparsebit_next_clear_num(struct sparsebit * s,sparsebit_idx_t start,sparsebit_num_t num)1338 sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *s,
1339 sparsebit_idx_t start, sparsebit_num_t num)
1340 {
1341 sparsebit_idx_t idx;
1342
1343 assert(num >= 1);
1344
1345 for (idx = sparsebit_next_clear(s, start);
1346 idx != 0 && idx + num - 1 >= idx;
1347 idx = sparsebit_next_clear(s, idx)) {
1348 assert(sparsebit_is_clear(s, idx));
1349
1350 /*
1351 * Does the sequence of bits starting at idx consist of
1352 * num cleared bits?
1353 */
1354 if (sparsebit_is_clear_num(s, idx, num))
1355 return idx;
1356
1357 /*
1358 * Sequence of cleared bits at idx isn't large enough.
1359 * Skip this entire sequence of cleared bits.
1360 */
1361 idx = sparsebit_next_set(s, idx);
1362 if (idx == 0)
1363 return 0;
1364 }
1365
1366 return 0;
1367 }
1368
1369 /* Sets the bits * in the inclusive range idx through idx + num - 1. */
sparsebit_set_num(struct sparsebit * s,sparsebit_idx_t start,sparsebit_num_t num)1370 void sparsebit_set_num(struct sparsebit *s,
1371 sparsebit_idx_t start, sparsebit_num_t num)
1372 {
1373 struct node *nodep, *next;
1374 unsigned int n1;
1375 sparsebit_idx_t idx;
1376 sparsebit_num_t n;
1377 sparsebit_idx_t middle_start, middle_end;
1378
1379 assert(num > 0);
1380 assert(start + num - 1 >= start);
1381
1382 /*
1383 * Leading - bits before first mask boundary.
1384 *
1385 * TODO(lhuemill): With some effort it may be possible to
1386 * replace the following loop with a sequential sequence
1387 * of statements. High level sequence would be:
1388 *
1389 * 1. Use node_split() to force node that describes setting
1390 * of idx to be within the mask portion of a node.
1391 * 2. Form mask of bits to be set.
1392 * 3. Determine number of mask bits already set in the node
1393 * and store in a local variable named num_already_set.
1394 * 4. Set the appropriate mask bits within the node.
1395 * 5. Increment struct sparsebit_pvt num_set member
1396 * by the number of bits that were actually set.
1397 * Exclude from the counts bits that were already set.
1398 * 6. Before returning to the caller, use node_reduce() to
1399 * handle the multiple corner cases that this method
1400 * introduces.
1401 */
1402 for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--)
1403 bit_set(s, idx);
1404
1405 /* Middle - bits spanning one or more entire mask */
1406 middle_start = idx;
1407 middle_end = middle_start + (n & -MASK_BITS) - 1;
1408 if (n >= MASK_BITS) {
1409 nodep = node_split(s, middle_start);
1410
1411 /*
1412 * As needed, split just after end of middle bits.
1413 * No split needed if end of middle bits is at highest
1414 * supported bit index.
1415 */
1416 if (middle_end + 1 > middle_end)
1417 (void) node_split(s, middle_end + 1);
1418
1419 /* Delete nodes that only describe bits within the middle. */
1420 for (next = node_next(s, nodep);
1421 next && (next->idx < middle_end);
1422 next = node_next(s, nodep)) {
1423 assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end);
1424 node_rm(s, next);
1425 next = NULL;
1426 }
1427
1428 /* As needed set each of the mask bits */
1429 for (n1 = 0; n1 < MASK_BITS; n1++) {
1430 if (!(nodep->mask & (1 << n1))) {
1431 nodep->mask |= 1 << n1;
1432 s->num_set++;
1433 }
1434 }
1435
1436 s->num_set -= nodep->num_after;
1437 nodep->num_after = middle_end - middle_start + 1 - MASK_BITS;
1438 s->num_set += nodep->num_after;
1439
1440 node_reduce(s, nodep);
1441 }
1442 idx = middle_end + 1;
1443 n -= middle_end - middle_start + 1;
1444
1445 /* Trailing - bits at and beyond last mask boundary */
1446 assert(n < MASK_BITS);
1447 for (; n > 0; idx++, n--)
1448 bit_set(s, idx);
1449 }
1450
1451 /* Clears the bits * in the inclusive range idx through idx + num - 1. */
sparsebit_clear_num(struct sparsebit * s,sparsebit_idx_t start,sparsebit_num_t num)1452 void sparsebit_clear_num(struct sparsebit *s,
1453 sparsebit_idx_t start, sparsebit_num_t num)
1454 {
1455 struct node *nodep, *next;
1456 unsigned int n1;
1457 sparsebit_idx_t idx;
1458 sparsebit_num_t n;
1459 sparsebit_idx_t middle_start, middle_end;
1460
1461 assert(num > 0);
1462 assert(start + num - 1 >= start);
1463
1464 /* Leading - bits before first mask boundary */
1465 for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--)
1466 bit_clear(s, idx);
1467
1468 /* Middle - bits spanning one or more entire mask */
1469 middle_start = idx;
1470 middle_end = middle_start + (n & -MASK_BITS) - 1;
1471 if (n >= MASK_BITS) {
1472 nodep = node_split(s, middle_start);
1473
1474 /*
1475 * As needed, split just after end of middle bits.
1476 * No split needed if end of middle bits is at highest
1477 * supported bit index.
1478 */
1479 if (middle_end + 1 > middle_end)
1480 (void) node_split(s, middle_end + 1);
1481
1482 /* Delete nodes that only describe bits within the middle. */
1483 for (next = node_next(s, nodep);
1484 next && (next->idx < middle_end);
1485 next = node_next(s, nodep)) {
1486 assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end);
1487 node_rm(s, next);
1488 next = NULL;
1489 }
1490
1491 /* As needed clear each of the mask bits */
1492 for (n1 = 0; n1 < MASK_BITS; n1++) {
1493 if (nodep->mask & (1 << n1)) {
1494 nodep->mask &= ~(1 << n1);
1495 s->num_set--;
1496 }
1497 }
1498
1499 /* Clear any bits described by num_after */
1500 s->num_set -= nodep->num_after;
1501 nodep->num_after = 0;
1502
1503 /*
1504 * Delete the node that describes the beginning of
1505 * the middle bits and perform any allowed reductions
1506 * with the nodes prev or next of nodep.
1507 */
1508 node_reduce(s, nodep);
1509 nodep = NULL;
1510 }
1511 idx = middle_end + 1;
1512 n -= middle_end - middle_start + 1;
1513
1514 /* Trailing - bits at and beyond last mask boundary */
1515 assert(n < MASK_BITS);
1516 for (; n > 0; idx++, n--)
1517 bit_clear(s, idx);
1518 }
1519
1520 /* Sets the bit at the index given by idx. */
sparsebit_set(struct sparsebit * s,sparsebit_idx_t idx)1521 void sparsebit_set(struct sparsebit *s, sparsebit_idx_t idx)
1522 {
1523 sparsebit_set_num(s, idx, 1);
1524 }
1525
1526 /* Clears the bit at the index given by idx. */
sparsebit_clear(struct sparsebit * s,sparsebit_idx_t idx)1527 void sparsebit_clear(struct sparsebit *s, sparsebit_idx_t idx)
1528 {
1529 sparsebit_clear_num(s, idx, 1);
1530 }
1531
1532 /* Sets the bits in the entire addressable range of the sparsebit array. */
sparsebit_set_all(struct sparsebit * s)1533 void sparsebit_set_all(struct sparsebit *s)
1534 {
1535 sparsebit_set(s, 0);
1536 sparsebit_set_num(s, 1, ~(sparsebit_idx_t) 0);
1537 assert(sparsebit_all_set(s));
1538 }
1539
1540 /* Clears the bits in the entire addressable range of the sparsebit array. */
sparsebit_clear_all(struct sparsebit * s)1541 void sparsebit_clear_all(struct sparsebit *s)
1542 {
1543 sparsebit_clear(s, 0);
1544 sparsebit_clear_num(s, 1, ~(sparsebit_idx_t) 0);
1545 assert(!sparsebit_any_set(s));
1546 }
1547
display_range(FILE * stream,sparsebit_idx_t low,sparsebit_idx_t high,bool prepend_comma_space)1548 static size_t display_range(FILE *stream, sparsebit_idx_t low,
1549 sparsebit_idx_t high, bool prepend_comma_space)
1550 {
1551 char *fmt_str;
1552 size_t sz;
1553
1554 /* Determine the printf format string */
1555 if (low == high)
1556 fmt_str = prepend_comma_space ? ", 0x%lx" : "0x%lx";
1557 else
1558 fmt_str = prepend_comma_space ? ", 0x%lx:0x%lx" : "0x%lx:0x%lx";
1559
1560 /*
1561 * When stream is NULL, just determine the size of what would
1562 * have been printed, else print the range.
1563 */
1564 if (!stream)
1565 sz = snprintf(NULL, 0, fmt_str, low, high);
1566 else
1567 sz = fprintf(stream, fmt_str, low, high);
1568
1569 return sz;
1570 }
1571
1572
1573 /* Dumps to the FILE stream given by stream, the bit settings
1574 * of s. Each line of output is prefixed with the number of
1575 * spaces given by indent. The length of each line is implementation
1576 * dependent and does not depend on the indent amount. The following
1577 * is an example output of a sparsebit array that has bits:
1578 *
1579 * 0x5, 0x8, 0xa:0xe, 0x12
1580 *
1581 * This corresponds to a sparsebit whose bits 5, 8, 10, 11, 12, 13, 14, 18
1582 * are set. Note that a ':', instead of a '-' is used to specify a range of
1583 * contiguous bits. This is done because '-' is used to specify command-line
1584 * options, and sometimes ranges are specified as command-line arguments.
1585 */
sparsebit_dump(FILE * stream,struct sparsebit * s,unsigned int indent)1586 void sparsebit_dump(FILE *stream, struct sparsebit *s,
1587 unsigned int indent)
1588 {
1589 size_t current_line_len = 0;
1590 size_t sz;
1591 struct node *nodep;
1592
1593 if (!sparsebit_any_set(s))
1594 return;
1595
1596 /* Display initial indent */
1597 fprintf(stream, "%*s", indent, "");
1598
1599 /* For each node */
1600 for (nodep = node_first(s); nodep; nodep = node_next(s, nodep)) {
1601 unsigned int n1;
1602 sparsebit_idx_t low, high;
1603
1604 /* For each group of bits in the mask */
1605 for (n1 = 0; n1 < MASK_BITS; n1++) {
1606 if (nodep->mask & (1 << n1)) {
1607 low = high = nodep->idx + n1;
1608
1609 for (; n1 < MASK_BITS; n1++) {
1610 if (nodep->mask & (1 << n1))
1611 high = nodep->idx + n1;
1612 else
1613 break;
1614 }
1615
1616 if ((n1 == MASK_BITS) && nodep->num_after)
1617 high += nodep->num_after;
1618
1619 /*
1620 * How much room will it take to display
1621 * this range.
1622 */
1623 sz = display_range(NULL, low, high,
1624 current_line_len != 0);
1625
1626 /*
1627 * If there is not enough room, display
1628 * a newline plus the indent of the next
1629 * line.
1630 */
1631 if (current_line_len + sz > DUMP_LINE_MAX) {
1632 fputs("\n", stream);
1633 fprintf(stream, "%*s", indent, "");
1634 current_line_len = 0;
1635 }
1636
1637 /* Display the range */
1638 sz = display_range(stream, low, high,
1639 current_line_len != 0);
1640 current_line_len += sz;
1641 }
1642 }
1643
1644 /*
1645 * If num_after and most significant-bit of mask is not
1646 * set, then still need to display a range for the bits
1647 * described by num_after.
1648 */
1649 if (!(nodep->mask & (1 << (MASK_BITS - 1))) && nodep->num_after) {
1650 low = nodep->idx + MASK_BITS;
1651 high = nodep->idx + MASK_BITS + nodep->num_after - 1;
1652
1653 /*
1654 * How much room will it take to display
1655 * this range.
1656 */
1657 sz = display_range(NULL, low, high,
1658 current_line_len != 0);
1659
1660 /*
1661 * If there is not enough room, display
1662 * a newline plus the indent of the next
1663 * line.
1664 */
1665 if (current_line_len + sz > DUMP_LINE_MAX) {
1666 fputs("\n", stream);
1667 fprintf(stream, "%*s", indent, "");
1668 current_line_len = 0;
1669 }
1670
1671 /* Display the range */
1672 sz = display_range(stream, low, high,
1673 current_line_len != 0);
1674 current_line_len += sz;
1675 }
1676 }
1677 fputs("\n", stream);
1678 }
1679
1680 /* Validates the internal state of the sparsebit array given by
1681 * s. On error, diagnostic information is printed to stderr and
1682 * abort is called.
1683 */
sparsebit_validate_internal(struct sparsebit * s)1684 void sparsebit_validate_internal(struct sparsebit *s)
1685 {
1686 bool error_detected = false;
1687 struct node *nodep, *prev = NULL;
1688 sparsebit_num_t total_bits_set = 0;
1689 unsigned int n1;
1690
1691 /* For each node */
1692 for (nodep = node_first(s); nodep;
1693 prev = nodep, nodep = node_next(s, nodep)) {
1694
1695 /*
1696 * Increase total bits set by the number of bits set
1697 * in this node.
1698 */
1699 for (n1 = 0; n1 < MASK_BITS; n1++)
1700 if (nodep->mask & (1 << n1))
1701 total_bits_set++;
1702
1703 total_bits_set += nodep->num_after;
1704
1705 /*
1706 * Arbitrary choice as to whether a mask of 0 is allowed
1707 * or not. For diagnostic purposes it is beneficial to
1708 * have only one valid means to represent a set of bits.
1709 * To support this an arbitrary choice has been made
1710 * to not allow a mask of zero.
1711 */
1712 if (nodep->mask == 0) {
1713 fprintf(stderr, "Node mask of zero, "
1714 "nodep: %p nodep->mask: 0x%x",
1715 nodep, nodep->mask);
1716 error_detected = true;
1717 break;
1718 }
1719
1720 /*
1721 * Validate num_after is not greater than the max index
1722 * - the number of mask bits. The num_after member
1723 * uses 0-based indexing and thus has no value that
1724 * represents all bits set. This limitation is handled
1725 * by requiring a non-zero mask. With a non-zero mask,
1726 * MASK_BITS worth of bits are described by the mask,
1727 * which makes the largest needed num_after equal to:
1728 *
1729 * (~(sparsebit_num_t) 0) - MASK_BITS + 1
1730 */
1731 if (nodep->num_after
1732 > (~(sparsebit_num_t) 0) - MASK_BITS + 1) {
1733 fprintf(stderr, "num_after too large, "
1734 "nodep: %p nodep->num_after: 0x%lx",
1735 nodep, nodep->num_after);
1736 error_detected = true;
1737 break;
1738 }
1739
1740 /* Validate node index is divisible by the mask size */
1741 if (nodep->idx % MASK_BITS) {
1742 fprintf(stderr, "Node index not divisible by "
1743 "mask size,\n"
1744 " nodep: %p nodep->idx: 0x%lx "
1745 "MASK_BITS: %lu\n",
1746 nodep, nodep->idx, MASK_BITS);
1747 error_detected = true;
1748 break;
1749 }
1750
1751 /*
1752 * Validate bits described by node don't wrap beyond the
1753 * highest supported index.
1754 */
1755 if ((nodep->idx + MASK_BITS + nodep->num_after - 1) < nodep->idx) {
1756 fprintf(stderr, "Bits described by node wrap "
1757 "beyond highest supported index,\n"
1758 " nodep: %p nodep->idx: 0x%lx\n"
1759 " MASK_BITS: %lu nodep->num_after: 0x%lx",
1760 nodep, nodep->idx, MASK_BITS, nodep->num_after);
1761 error_detected = true;
1762 break;
1763 }
1764
1765 /* Check parent pointers. */
1766 if (nodep->left) {
1767 if (nodep->left->parent != nodep) {
1768 fprintf(stderr, "Left child parent pointer "
1769 "doesn't point to this node,\n"
1770 " nodep: %p nodep->left: %p "
1771 "nodep->left->parent: %p",
1772 nodep, nodep->left,
1773 nodep->left->parent);
1774 error_detected = true;
1775 break;
1776 }
1777 }
1778
1779 if (nodep->right) {
1780 if (nodep->right->parent != nodep) {
1781 fprintf(stderr, "Right child parent pointer "
1782 "doesn't point to this node,\n"
1783 " nodep: %p nodep->right: %p "
1784 "nodep->right->parent: %p",
1785 nodep, nodep->right,
1786 nodep->right->parent);
1787 error_detected = true;
1788 break;
1789 }
1790 }
1791
1792 if (!nodep->parent) {
1793 if (s->root != nodep) {
1794 fprintf(stderr, "Unexpected root node, "
1795 "s->root: %p nodep: %p",
1796 s->root, nodep);
1797 error_detected = true;
1798 break;
1799 }
1800 }
1801
1802 if (prev) {
1803 /*
1804 * Is index of previous node before index of
1805 * current node?
1806 */
1807 if (prev->idx >= nodep->idx) {
1808 fprintf(stderr, "Previous node index "
1809 ">= current node index,\n"
1810 " prev: %p prev->idx: 0x%lx\n"
1811 " nodep: %p nodep->idx: 0x%lx",
1812 prev, prev->idx, nodep, nodep->idx);
1813 error_detected = true;
1814 break;
1815 }
1816
1817 /*
1818 * Nodes occur in asscending order, based on each
1819 * nodes starting index.
1820 */
1821 if ((prev->idx + MASK_BITS + prev->num_after - 1)
1822 >= nodep->idx) {
1823 fprintf(stderr, "Previous node bit range "
1824 "overlap with current node bit range,\n"
1825 " prev: %p prev->idx: 0x%lx "
1826 "prev->num_after: 0x%lx\n"
1827 " nodep: %p nodep->idx: 0x%lx "
1828 "nodep->num_after: 0x%lx\n"
1829 " MASK_BITS: %lu",
1830 prev, prev->idx, prev->num_after,
1831 nodep, nodep->idx, nodep->num_after,
1832 MASK_BITS);
1833 error_detected = true;
1834 break;
1835 }
1836
1837 /*
1838 * When the node has all mask bits set, it shouldn't
1839 * be adjacent to the last bit described by the
1840 * previous node.
1841 */
1842 if (nodep->mask == ~(mask_t) 0 &&
1843 prev->idx + MASK_BITS + prev->num_after == nodep->idx) {
1844 fprintf(stderr, "Current node has mask with "
1845 "all bits set and is adjacent to the "
1846 "previous node,\n"
1847 " prev: %p prev->idx: 0x%lx "
1848 "prev->num_after: 0x%lx\n"
1849 " nodep: %p nodep->idx: 0x%lx "
1850 "nodep->num_after: 0x%lx\n"
1851 " MASK_BITS: %lu",
1852 prev, prev->idx, prev->num_after,
1853 nodep, nodep->idx, nodep->num_after,
1854 MASK_BITS);
1855
1856 error_detected = true;
1857 break;
1858 }
1859 }
1860 }
1861
1862 if (!error_detected) {
1863 /*
1864 * Is sum of bits set in each node equal to the count
1865 * of total bits set.
1866 */
1867 if (s->num_set != total_bits_set) {
1868 fprintf(stderr, "Number of bits set mismatch,\n"
1869 " s->num_set: 0x%lx total_bits_set: 0x%lx",
1870 s->num_set, total_bits_set);
1871
1872 error_detected = true;
1873 }
1874 }
1875
1876 if (error_detected) {
1877 fputs(" dump_internal:\n", stderr);
1878 sparsebit_dump_internal(stderr, s, 4);
1879 abort();
1880 }
1881 }
1882
1883
1884 #ifdef FUZZ
1885 /* A simple but effective fuzzing driver. Look for bugs with the help
1886 * of some invariants and of a trivial representation of sparsebit.
1887 * Just use 512 bytes of /dev/zero and /dev/urandom as inputs, and let
1888 * afl-fuzz do the magic. :)
1889 */
1890
1891 #include <stdlib.h>
1892
1893 struct range {
1894 sparsebit_idx_t first, last;
1895 bool set;
1896 };
1897
1898 struct sparsebit *s;
1899 struct range ranges[1000];
1900 int num_ranges;
1901
get_value(sparsebit_idx_t idx)1902 static bool get_value(sparsebit_idx_t idx)
1903 {
1904 int i;
1905
1906 for (i = num_ranges; --i >= 0; )
1907 if (ranges[i].first <= idx && idx <= ranges[i].last)
1908 return ranges[i].set;
1909
1910 return false;
1911 }
1912
operate(int code,sparsebit_idx_t first,sparsebit_idx_t last)1913 static void operate(int code, sparsebit_idx_t first, sparsebit_idx_t last)
1914 {
1915 sparsebit_num_t num;
1916 sparsebit_idx_t next;
1917
1918 if (first < last) {
1919 num = last - first + 1;
1920 } else {
1921 num = first - last + 1;
1922 first = last;
1923 last = first + num - 1;
1924 }
1925
1926 switch (code) {
1927 case 0:
1928 sparsebit_set(s, first);
1929 assert(sparsebit_is_set(s, first));
1930 assert(!sparsebit_is_clear(s, first));
1931 assert(sparsebit_any_set(s));
1932 assert(!sparsebit_all_clear(s));
1933 if (get_value(first))
1934 return;
1935 if (num_ranges == 1000)
1936 exit(0);
1937 ranges[num_ranges++] = (struct range)
1938 { .first = first, .last = first, .set = true };
1939 break;
1940 case 1:
1941 sparsebit_clear(s, first);
1942 assert(!sparsebit_is_set(s, first));
1943 assert(sparsebit_is_clear(s, first));
1944 assert(sparsebit_any_clear(s));
1945 assert(!sparsebit_all_set(s));
1946 if (!get_value(first))
1947 return;
1948 if (num_ranges == 1000)
1949 exit(0);
1950 ranges[num_ranges++] = (struct range)
1951 { .first = first, .last = first, .set = false };
1952 break;
1953 case 2:
1954 assert(sparsebit_is_set(s, first) == get_value(first));
1955 assert(sparsebit_is_clear(s, first) == !get_value(first));
1956 break;
1957 case 3:
1958 if (sparsebit_any_set(s))
1959 assert(get_value(sparsebit_first_set(s)));
1960 if (sparsebit_any_clear(s))
1961 assert(!get_value(sparsebit_first_clear(s)));
1962 sparsebit_set_all(s);
1963 assert(!sparsebit_any_clear(s));
1964 assert(sparsebit_all_set(s));
1965 num_ranges = 0;
1966 ranges[num_ranges++] = (struct range)
1967 { .first = 0, .last = ~(sparsebit_idx_t)0, .set = true };
1968 break;
1969 case 4:
1970 if (sparsebit_any_set(s))
1971 assert(get_value(sparsebit_first_set(s)));
1972 if (sparsebit_any_clear(s))
1973 assert(!get_value(sparsebit_first_clear(s)));
1974 sparsebit_clear_all(s);
1975 assert(!sparsebit_any_set(s));
1976 assert(sparsebit_all_clear(s));
1977 num_ranges = 0;
1978 break;
1979 case 5:
1980 next = sparsebit_next_set(s, first);
1981 assert(next == 0 || next > first);
1982 assert(next == 0 || get_value(next));
1983 break;
1984 case 6:
1985 next = sparsebit_next_clear(s, first);
1986 assert(next == 0 || next > first);
1987 assert(next == 0 || !get_value(next));
1988 break;
1989 case 7:
1990 next = sparsebit_next_clear(s, first);
1991 if (sparsebit_is_set_num(s, first, num)) {
1992 assert(next == 0 || next > last);
1993 if (first)
1994 next = sparsebit_next_set(s, first - 1);
1995 else if (sparsebit_any_set(s))
1996 next = sparsebit_first_set(s);
1997 else
1998 return;
1999 assert(next == first);
2000 } else {
2001 assert(sparsebit_is_clear(s, first) || next <= last);
2002 }
2003 break;
2004 case 8:
2005 next = sparsebit_next_set(s, first);
2006 if (sparsebit_is_clear_num(s, first, num)) {
2007 assert(next == 0 || next > last);
2008 if (first)
2009 next = sparsebit_next_clear(s, first - 1);
2010 else if (sparsebit_any_clear(s))
2011 next = sparsebit_first_clear(s);
2012 else
2013 return;
2014 assert(next == first);
2015 } else {
2016 assert(sparsebit_is_set(s, first) || next <= last);
2017 }
2018 break;
2019 case 9:
2020 sparsebit_set_num(s, first, num);
2021 assert(sparsebit_is_set_num(s, first, num));
2022 assert(!sparsebit_is_clear_num(s, first, num));
2023 assert(sparsebit_any_set(s));
2024 assert(!sparsebit_all_clear(s));
2025 if (num_ranges == 1000)
2026 exit(0);
2027 ranges[num_ranges++] = (struct range)
2028 { .first = first, .last = last, .set = true };
2029 break;
2030 case 10:
2031 sparsebit_clear_num(s, first, num);
2032 assert(!sparsebit_is_set_num(s, first, num));
2033 assert(sparsebit_is_clear_num(s, first, num));
2034 assert(sparsebit_any_clear(s));
2035 assert(!sparsebit_all_set(s));
2036 if (num_ranges == 1000)
2037 exit(0);
2038 ranges[num_ranges++] = (struct range)
2039 { .first = first, .last = last, .set = false };
2040 break;
2041 case 11:
2042 sparsebit_validate_internal(s);
2043 break;
2044 default:
2045 break;
2046 }
2047 }
2048
get8(void)2049 unsigned char get8(void)
2050 {
2051 int ch;
2052
2053 ch = getchar();
2054 if (ch == EOF)
2055 exit(0);
2056 return ch;
2057 }
2058
get64(void)2059 uint64_t get64(void)
2060 {
2061 uint64_t x;
2062
2063 x = get8();
2064 x = (x << 8) | get8();
2065 x = (x << 8) | get8();
2066 x = (x << 8) | get8();
2067 x = (x << 8) | get8();
2068 x = (x << 8) | get8();
2069 x = (x << 8) | get8();
2070 return (x << 8) | get8();
2071 }
2072
main(void)2073 int main(void)
2074 {
2075 s = sparsebit_alloc();
2076 for (;;) {
2077 uint8_t op = get8() & 0xf;
2078 uint64_t first = get64();
2079 uint64_t last = get64();
2080
2081 operate(op, first, last);
2082 }
2083 }
2084 #endif
2085