xref: /openbmc/linux/drivers/md/persistent-data/dm-space-map-common.c (revision 8ef9ea1503d0a129cc6f5cf48fb63633efa5d766)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2011 Red Hat, Inc.
4  *
5  * This file is released under the GPL.
6  */
7 
8 #include "dm-space-map-common.h"
9 #include "dm-transaction-manager.h"
10 #include "dm-btree-internal.h"
11 #include "dm-persistent-data-internal.h"
12 
13 #include <linux/bitops.h>
14 #include <linux/device-mapper.h>
15 
16 #define DM_MSG_PREFIX "space map common"
17 
18 /*----------------------------------------------------------------*/
19 
20 /*
21  * Index validator.
22  */
23 #define INDEX_CSUM_XOR 160478
24 
25 static void index_prepare_for_write(struct dm_block_validator *v,
26 				    struct dm_block *b,
27 				    size_t block_size)
28 {
29 	struct disk_metadata_index *mi_le = dm_block_data(b);
30 
31 	mi_le->blocknr = cpu_to_le64(dm_block_location(b));
32 	mi_le->csum = cpu_to_le32(dm_bm_checksum(&mi_le->padding,
33 						 block_size - sizeof(__le32),
34 						 INDEX_CSUM_XOR));
35 }
36 
37 static int index_check(struct dm_block_validator *v,
38 		       struct dm_block *b,
39 		       size_t block_size)
40 {
41 	struct disk_metadata_index *mi_le = dm_block_data(b);
42 	__le32 csum_disk;
43 
44 	if (dm_block_location(b) != le64_to_cpu(mi_le->blocknr)) {
45 		DMERR_LIMIT("%s failed: blocknr %llu != wanted %llu", __func__,
46 			    le64_to_cpu(mi_le->blocknr), dm_block_location(b));
47 		return -ENOTBLK;
48 	}
49 
50 	csum_disk = cpu_to_le32(dm_bm_checksum(&mi_le->padding,
51 					       block_size - sizeof(__le32),
52 					       INDEX_CSUM_XOR));
53 	if (csum_disk != mi_le->csum) {
54 		DMERR_LIMIT("%s failed: csum %u != wanted %u", __func__,
55 			    le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum));
56 		return -EILSEQ;
57 	}
58 
59 	return 0;
60 }
61 
62 static struct dm_block_validator index_validator = {
63 	.name = "index",
64 	.prepare_for_write = index_prepare_for_write,
65 	.check = index_check
66 };
67 
68 /*----------------------------------------------------------------*/
69 
70 /*
71  * Bitmap validator
72  */
73 #define BITMAP_CSUM_XOR 240779
74 
75 static void dm_bitmap_prepare_for_write(struct dm_block_validator *v,
76 					struct dm_block *b,
77 					size_t block_size)
78 {
79 	struct disk_bitmap_header *disk_header = dm_block_data(b);
80 
81 	disk_header->blocknr = cpu_to_le64(dm_block_location(b));
82 	disk_header->csum = cpu_to_le32(dm_bm_checksum(&disk_header->not_used,
83 						       block_size - sizeof(__le32),
84 						       BITMAP_CSUM_XOR));
85 }
86 
87 static int dm_bitmap_check(struct dm_block_validator *v,
88 			   struct dm_block *b,
89 			   size_t block_size)
90 {
91 	struct disk_bitmap_header *disk_header = dm_block_data(b);
92 	__le32 csum_disk;
93 
94 	if (dm_block_location(b) != le64_to_cpu(disk_header->blocknr)) {
95 		DMERR_LIMIT("bitmap check failed: blocknr %llu != wanted %llu",
96 			    le64_to_cpu(disk_header->blocknr), dm_block_location(b));
97 		return -ENOTBLK;
98 	}
99 
100 	csum_disk = cpu_to_le32(dm_bm_checksum(&disk_header->not_used,
101 					       block_size - sizeof(__le32),
102 					       BITMAP_CSUM_XOR));
103 	if (csum_disk != disk_header->csum) {
104 		DMERR_LIMIT("bitmap check failed: csum %u != wanted %u",
105 			    le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum));
106 		return -EILSEQ;
107 	}
108 
109 	return 0;
110 }
111 
112 static struct dm_block_validator dm_sm_bitmap_validator = {
113 	.name = "sm_bitmap",
114 	.prepare_for_write = dm_bitmap_prepare_for_write,
115 	.check = dm_bitmap_check,
116 };
117 
118 /*----------------------------------------------------------------*/
119 
120 #define ENTRIES_PER_WORD 32
121 #define ENTRIES_SHIFT	5
122 
123 static void *dm_bitmap_data(struct dm_block *b)
124 {
125 	return dm_block_data(b) + sizeof(struct disk_bitmap_header);
126 }
127 
128 #define WORD_MASK_HIGH 0xAAAAAAAAAAAAAAAAULL
129 
130 static unsigned int dm_bitmap_word_used(void *addr, unsigned int b)
131 {
132 	__le64 *words_le = addr;
133 	__le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
134 
135 	uint64_t bits = le64_to_cpu(*w_le);
136 	uint64_t mask = (bits + WORD_MASK_HIGH + 1) & WORD_MASK_HIGH;
137 
138 	return !(~bits & mask);
139 }
140 
141 static unsigned int sm_lookup_bitmap(void *addr, unsigned int b)
142 {
143 	__le64 *words_le = addr;
144 	__le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
145 	unsigned int hi, lo;
146 
147 	b = (b & (ENTRIES_PER_WORD - 1)) << 1;
148 	hi = !!test_bit_le(b, (void *) w_le);
149 	lo = !!test_bit_le(b + 1, (void *) w_le);
150 	return (hi << 1) | lo;
151 }
152 
153 static void sm_set_bitmap(void *addr, unsigned int b, unsigned int val)
154 {
155 	__le64 *words_le = addr;
156 	__le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
157 
158 	b = (b & (ENTRIES_PER_WORD - 1)) << 1;
159 
160 	if (val & 2)
161 		__set_bit_le(b, (void *) w_le);
162 	else
163 		__clear_bit_le(b, (void *) w_le);
164 
165 	if (val & 1)
166 		__set_bit_le(b + 1, (void *) w_le);
167 	else
168 		__clear_bit_le(b + 1, (void *) w_le);
169 }
170 
171 static int sm_find_free(void *addr, unsigned int begin, unsigned int end,
172 			unsigned int *result)
173 {
174 	while (begin < end) {
175 		if (!(begin & (ENTRIES_PER_WORD - 1)) &&
176 		    dm_bitmap_word_used(addr, begin)) {
177 			begin += ENTRIES_PER_WORD;
178 			continue;
179 		}
180 
181 		if (!sm_lookup_bitmap(addr, begin)) {
182 			*result = begin;
183 			return 0;
184 		}
185 
186 		begin++;
187 	}
188 
189 	return -ENOSPC;
190 }
191 
192 /*----------------------------------------------------------------*/
193 
194 static int sm_ll_init(struct ll_disk *ll, struct dm_transaction_manager *tm)
195 {
196 	memset(ll, 0, sizeof(struct ll_disk));
197 
198 	ll->tm = tm;
199 
200 	ll->bitmap_info.tm = tm;
201 	ll->bitmap_info.levels = 1;
202 
203 	/*
204 	 * Because the new bitmap blocks are created via a shadow
205 	 * operation, the old entry has already had its reference count
206 	 * decremented and we don't need the btree to do any bookkeeping.
207 	 */
208 	ll->bitmap_info.value_type.size = sizeof(struct disk_index_entry);
209 	ll->bitmap_info.value_type.inc = NULL;
210 	ll->bitmap_info.value_type.dec = NULL;
211 	ll->bitmap_info.value_type.equal = NULL;
212 
213 	ll->ref_count_info.tm = tm;
214 	ll->ref_count_info.levels = 1;
215 	ll->ref_count_info.value_type.size = sizeof(uint32_t);
216 	ll->ref_count_info.value_type.inc = NULL;
217 	ll->ref_count_info.value_type.dec = NULL;
218 	ll->ref_count_info.value_type.equal = NULL;
219 
220 	ll->block_size = dm_bm_block_size(dm_tm_get_bm(tm));
221 
222 	if (ll->block_size > (1 << 30)) {
223 		DMERR("block size too big to hold bitmaps");
224 		return -EINVAL;
225 	}
226 
227 	ll->entries_per_block = (ll->block_size - sizeof(struct disk_bitmap_header)) *
228 		ENTRIES_PER_BYTE;
229 	ll->nr_blocks = 0;
230 	ll->bitmap_root = 0;
231 	ll->ref_count_root = 0;
232 	ll->bitmap_index_changed = false;
233 
234 	return 0;
235 }
236 
237 int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks)
238 {
239 	int r;
240 	dm_block_t i, nr_blocks, nr_indexes;
241 	unsigned int old_blocks, blocks;
242 
243 	nr_blocks = ll->nr_blocks + extra_blocks;
244 	old_blocks = dm_sector_div_up(ll->nr_blocks, ll->entries_per_block);
245 	blocks = dm_sector_div_up(nr_blocks, ll->entries_per_block);
246 
247 	nr_indexes = dm_sector_div_up(nr_blocks, ll->entries_per_block);
248 	if (nr_indexes > ll->max_entries(ll)) {
249 		DMERR("space map too large");
250 		return -EINVAL;
251 	}
252 
253 	/*
254 	 * We need to set this before the dm_tm_new_block() call below.
255 	 */
256 	ll->nr_blocks = nr_blocks;
257 	for (i = old_blocks; i < blocks; i++) {
258 		struct dm_block *b;
259 		struct disk_index_entry idx;
260 
261 		r = dm_tm_new_block(ll->tm, &dm_sm_bitmap_validator, &b);
262 		if (r < 0)
263 			return r;
264 
265 		idx.blocknr = cpu_to_le64(dm_block_location(b));
266 
267 		dm_tm_unlock(ll->tm, b);
268 
269 		idx.nr_free = cpu_to_le32(ll->entries_per_block);
270 		idx.none_free_before = 0;
271 
272 		r = ll->save_ie(ll, i, &idx);
273 		if (r < 0)
274 			return r;
275 	}
276 
277 	return 0;
278 }
279 
280 int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result)
281 {
282 	int r;
283 	dm_block_t index = b;
284 	struct disk_index_entry ie_disk;
285 	struct dm_block *blk;
286 
287 	if (b >= ll->nr_blocks) {
288 		DMERR_LIMIT("metadata block out of bounds");
289 		return -EINVAL;
290 	}
291 
292 	b = do_div(index, ll->entries_per_block);
293 	r = ll->load_ie(ll, index, &ie_disk);
294 	if (r < 0)
295 		return r;
296 
297 	r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr),
298 			    &dm_sm_bitmap_validator, &blk);
299 	if (r < 0)
300 		return r;
301 
302 	*result = sm_lookup_bitmap(dm_bitmap_data(blk), b);
303 
304 	dm_tm_unlock(ll->tm, blk);
305 
306 	return 0;
307 }
308 
309 static int sm_ll_lookup_big_ref_count(struct ll_disk *ll, dm_block_t b,
310 				      uint32_t *result)
311 {
312 	__le32 le_rc;
313 	int r;
314 
315 	r = dm_btree_lookup(&ll->ref_count_info, ll->ref_count_root, &b, &le_rc);
316 	if (r < 0)
317 		return r;
318 
319 	*result = le32_to_cpu(le_rc);
320 
321 	return r;
322 }
323 
324 int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result)
325 {
326 	int r = sm_ll_lookup_bitmap(ll, b, result);
327 
328 	if (r)
329 		return r;
330 
331 	if (*result != 3)
332 		return r;
333 
334 	return sm_ll_lookup_big_ref_count(ll, b, result);
335 }
336 
337 int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
338 			  dm_block_t end, dm_block_t *result)
339 {
340 	int r;
341 	struct disk_index_entry ie_disk;
342 	dm_block_t i, index_begin = begin;
343 	dm_block_t index_end = dm_sector_div_up(end, ll->entries_per_block);
344 
345 	/*
346 	 * FIXME: Use shifts
347 	 */
348 	begin = do_div(index_begin, ll->entries_per_block);
349 	end = do_div(end, ll->entries_per_block);
350 	if (end == 0)
351 		end = ll->entries_per_block;
352 
353 	for (i = index_begin; i < index_end; i++, begin = 0) {
354 		struct dm_block *blk;
355 		unsigned int position;
356 		uint32_t bit_end;
357 
358 		r = ll->load_ie(ll, i, &ie_disk);
359 		if (r < 0)
360 			return r;
361 
362 		if (le32_to_cpu(ie_disk.nr_free) == 0)
363 			continue;
364 
365 		r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr),
366 				    &dm_sm_bitmap_validator, &blk);
367 		if (r < 0)
368 			return r;
369 
370 		bit_end = (i == index_end - 1) ?  end : ll->entries_per_block;
371 
372 		r = sm_find_free(dm_bitmap_data(blk),
373 				 max_t(unsigned int, begin, le32_to_cpu(ie_disk.none_free_before)),
374 				 bit_end, &position);
375 		if (r == -ENOSPC) {
376 			/*
377 			 * This might happen because we started searching
378 			 * part way through the bitmap.
379 			 */
380 			dm_tm_unlock(ll->tm, blk);
381 			continue;
382 		}
383 
384 		dm_tm_unlock(ll->tm, blk);
385 
386 		*result = i * ll->entries_per_block + (dm_block_t) position;
387 		return 0;
388 	}
389 
390 	return -ENOSPC;
391 }
392 
393 int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll,
394 				 dm_block_t begin, dm_block_t end, dm_block_t *b)
395 {
396 	int r;
397 	uint32_t count;
398 
399 	do {
400 		r = sm_ll_find_free_block(new_ll, begin, new_ll->nr_blocks, b);
401 		if (r)
402 			break;
403 
404 		/* double check this block wasn't used in the old transaction */
405 		if (*b >= old_ll->nr_blocks)
406 			count = 0;
407 		else {
408 			r = sm_ll_lookup(old_ll, *b, &count);
409 			if (r)
410 				break;
411 
412 			if (count)
413 				begin = *b + 1;
414 		}
415 	} while (count);
416 
417 	return r;
418 }
419 
420 /*----------------------------------------------------------------*/
421 
422 int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
423 		 uint32_t ref_count, int32_t *nr_allocations)
424 {
425 	int r;
426 	uint32_t bit, old;
427 	struct dm_block *nb;
428 	dm_block_t index = b;
429 	struct disk_index_entry ie_disk;
430 	void *bm_le;
431 	int inc;
432 
433 	bit = do_div(index, ll->entries_per_block);
434 	r = ll->load_ie(ll, index, &ie_disk);
435 	if (r < 0)
436 		return r;
437 
438 	r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ie_disk.blocknr),
439 			       &dm_sm_bitmap_validator, &nb, &inc);
440 	if (r < 0) {
441 		DMERR("dm_tm_shadow_block() failed");
442 		return r;
443 	}
444 	ie_disk.blocknr = cpu_to_le64(dm_block_location(nb));
445 	bm_le = dm_bitmap_data(nb);
446 
447 	old = sm_lookup_bitmap(bm_le, bit);
448 	if (old > 2) {
449 		r = sm_ll_lookup_big_ref_count(ll, b, &old);
450 		if (r < 0) {
451 			dm_tm_unlock(ll->tm, nb);
452 			return r;
453 		}
454 	}
455 
456 	if (r) {
457 		dm_tm_unlock(ll->tm, nb);
458 		return r;
459 	}
460 
461 	if (ref_count <= 2) {
462 		sm_set_bitmap(bm_le, bit, ref_count);
463 		dm_tm_unlock(ll->tm, nb);
464 
465 		if (old > 2) {
466 			r = dm_btree_remove(&ll->ref_count_info,
467 					    ll->ref_count_root,
468 					    &b, &ll->ref_count_root);
469 			if (r)
470 				return r;
471 		}
472 
473 	} else {
474 		__le32 le_rc = cpu_to_le32(ref_count);
475 
476 		sm_set_bitmap(bm_le, bit, 3);
477 		dm_tm_unlock(ll->tm, nb);
478 
479 		__dm_bless_for_disk(&le_rc);
480 		r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
481 				    &b, &le_rc, &ll->ref_count_root);
482 		if (r < 0) {
483 			DMERR("ref count insert failed");
484 			return r;
485 		}
486 	}
487 
488 	if (ref_count && !old) {
489 		*nr_allocations = 1;
490 		ll->nr_allocated++;
491 		le32_add_cpu(&ie_disk.nr_free, -1);
492 		if (le32_to_cpu(ie_disk.none_free_before) == bit)
493 			ie_disk.none_free_before = cpu_to_le32(bit + 1);
494 
495 	} else if (old && !ref_count) {
496 		*nr_allocations = -1;
497 		ll->nr_allocated--;
498 		le32_add_cpu(&ie_disk.nr_free, 1);
499 		ie_disk.none_free_before = cpu_to_le32(min(le32_to_cpu(ie_disk.none_free_before), bit));
500 	} else
501 		*nr_allocations = 0;
502 
503 	return ll->save_ie(ll, index, &ie_disk);
504 }
505 
506 /*----------------------------------------------------------------*/
507 
508 /*
509  * Holds useful intermediate results for the range based inc and dec
510  * operations.
511  */
512 struct inc_context {
513 	struct disk_index_entry ie_disk;
514 	struct dm_block *bitmap_block;
515 	void *bitmap;
516 
517 	struct dm_block *overflow_leaf;
518 };
519 
520 static inline void init_inc_context(struct inc_context *ic)
521 {
522 	ic->bitmap_block = NULL;
523 	ic->bitmap = NULL;
524 	ic->overflow_leaf = NULL;
525 }
526 
527 static inline void exit_inc_context(struct ll_disk *ll, struct inc_context *ic)
528 {
529 	if (ic->bitmap_block)
530 		dm_tm_unlock(ll->tm, ic->bitmap_block);
531 	if (ic->overflow_leaf)
532 		dm_tm_unlock(ll->tm, ic->overflow_leaf);
533 }
534 
535 static inline void reset_inc_context(struct ll_disk *ll, struct inc_context *ic)
536 {
537 	exit_inc_context(ll, ic);
538 	init_inc_context(ic);
539 }
540 
541 /*
542  * Confirms a btree node contains a particular key at an index.
543  */
544 static bool contains_key(struct btree_node *n, uint64_t key, int index)
545 {
546 	return index >= 0 &&
547 		index < le32_to_cpu(n->header.nr_entries) &&
548 		le64_to_cpu(n->keys[index]) == key;
549 }
550 
551 static int __sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
552 {
553 	int r;
554 	int index;
555 	struct btree_node *n;
556 	__le32 *v_ptr;
557 	uint32_t rc;
558 
559 	/*
560 	 * bitmap_block needs to be unlocked because getting the
561 	 * overflow_leaf may need to allocate, and thus use the space map.
562 	 */
563 	reset_inc_context(ll, ic);
564 
565 	r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
566 				     b, &index, &ll->ref_count_root, &ic->overflow_leaf);
567 	if (r < 0)
568 		return r;
569 
570 	n = dm_block_data(ic->overflow_leaf);
571 
572 	if (!contains_key(n, b, index)) {
573 		DMERR("overflow btree is missing an entry");
574 		return -EINVAL;
575 	}
576 
577 	v_ptr = value_ptr(n, index);
578 	rc = le32_to_cpu(*v_ptr) + 1;
579 	*v_ptr = cpu_to_le32(rc);
580 
581 	return 0;
582 }
583 
584 static int sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
585 {
586 	int index;
587 	struct btree_node *n;
588 	__le32 *v_ptr;
589 	uint32_t rc;
590 
591 	/*
592 	 * Do we already have the correct overflow leaf?
593 	 */
594 	if (ic->overflow_leaf) {
595 		n = dm_block_data(ic->overflow_leaf);
596 		index = lower_bound(n, b);
597 		if (contains_key(n, b, index)) {
598 			v_ptr = value_ptr(n, index);
599 			rc = le32_to_cpu(*v_ptr) + 1;
600 			*v_ptr = cpu_to_le32(rc);
601 
602 			return 0;
603 		}
604 	}
605 
606 	return __sm_ll_inc_overflow(ll, b, ic);
607 }
608 
609 static inline int shadow_bitmap(struct ll_disk *ll, struct inc_context *ic)
610 {
611 	int r, inc;
612 
613 	r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ic->ie_disk.blocknr),
614 			       &dm_sm_bitmap_validator, &ic->bitmap_block, &inc);
615 	if (r < 0) {
616 		DMERR("dm_tm_shadow_block() failed");
617 		return r;
618 	}
619 	ic->ie_disk.blocknr = cpu_to_le64(dm_block_location(ic->bitmap_block));
620 	ic->bitmap = dm_bitmap_data(ic->bitmap_block);
621 	return 0;
622 }
623 
624 /*
625  * Once shadow_bitmap has been called, which always happens at the start of inc/dec,
626  * we can reopen the bitmap with a simple write lock, rather than re calling
627  * dm_tm_shadow_block().
628  */
629 static inline int ensure_bitmap(struct ll_disk *ll, struct inc_context *ic)
630 {
631 	if (!ic->bitmap_block) {
632 		int r = dm_bm_write_lock(dm_tm_get_bm(ll->tm), le64_to_cpu(ic->ie_disk.blocknr),
633 					 &dm_sm_bitmap_validator, &ic->bitmap_block);
634 		if (r) {
635 			DMERR("unable to re-get write lock for bitmap");
636 			return r;
637 		}
638 		ic->bitmap = dm_bitmap_data(ic->bitmap_block);
639 	}
640 
641 	return 0;
642 }
643 
644 /*
645  * Loops round incrementing entries in a single bitmap.
646  */
647 static inline int sm_ll_inc_bitmap(struct ll_disk *ll, dm_block_t b,
648 				   uint32_t bit, uint32_t bit_end,
649 				   int32_t *nr_allocations, dm_block_t *new_b,
650 				   struct inc_context *ic)
651 {
652 	int r;
653 	__le32 le_rc;
654 	uint32_t old;
655 
656 	for (; bit != bit_end; bit++, b++) {
657 		/*
658 		 * We only need to drop the bitmap if we need to find a new btree
659 		 * leaf for the overflow.  So if it was dropped last iteration,
660 		 * we now re-get it.
661 		 */
662 		r = ensure_bitmap(ll, ic);
663 		if (r)
664 			return r;
665 
666 		old = sm_lookup_bitmap(ic->bitmap, bit);
667 		switch (old) {
668 		case 0:
669 			/* inc bitmap, adjust nr_allocated */
670 			sm_set_bitmap(ic->bitmap, bit, 1);
671 			(*nr_allocations)++;
672 			ll->nr_allocated++;
673 			le32_add_cpu(&ic->ie_disk.nr_free, -1);
674 			if (le32_to_cpu(ic->ie_disk.none_free_before) == bit)
675 				ic->ie_disk.none_free_before = cpu_to_le32(bit + 1);
676 			break;
677 
678 		case 1:
679 			/* inc bitmap */
680 			sm_set_bitmap(ic->bitmap, bit, 2);
681 			break;
682 
683 		case 2:
684 			/* inc bitmap and insert into overflow */
685 			sm_set_bitmap(ic->bitmap, bit, 3);
686 			reset_inc_context(ll, ic);
687 
688 			le_rc = cpu_to_le32(3);
689 			__dm_bless_for_disk(&le_rc);
690 			r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
691 					    &b, &le_rc, &ll->ref_count_root);
692 			if (r < 0) {
693 				DMERR("ref count insert failed");
694 				return r;
695 			}
696 			break;
697 
698 		default:
699 			/*
700 			 * inc within the overflow tree only.
701 			 */
702 			r = sm_ll_inc_overflow(ll, b, ic);
703 			if (r < 0)
704 				return r;
705 		}
706 	}
707 
708 	*new_b = b;
709 	return 0;
710 }
711 
712 /*
713  * Finds a bitmap that contains entries in the block range, and increments
714  * them.
715  */
716 static int __sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
717 		       int32_t *nr_allocations, dm_block_t *new_b)
718 {
719 	int r;
720 	struct inc_context ic;
721 	uint32_t bit, bit_end;
722 	dm_block_t index = b;
723 
724 	init_inc_context(&ic);
725 
726 	bit = do_div(index, ll->entries_per_block);
727 	r = ll->load_ie(ll, index, &ic.ie_disk);
728 	if (r < 0)
729 		return r;
730 
731 	r = shadow_bitmap(ll, &ic);
732 	if (r)
733 		return r;
734 
735 	bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
736 	r = sm_ll_inc_bitmap(ll, b, bit, bit_end, nr_allocations, new_b, &ic);
737 
738 	exit_inc_context(ll, &ic);
739 
740 	if (r)
741 		return r;
742 
743 	return ll->save_ie(ll, index, &ic.ie_disk);
744 }
745 
746 int sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
747 	      int32_t *nr_allocations)
748 {
749 	*nr_allocations = 0;
750 	while (b != e) {
751 		int r = __sm_ll_inc(ll, b, e, nr_allocations, &b);
752 
753 		if (r)
754 			return r;
755 	}
756 
757 	return 0;
758 }
759 
760 /*----------------------------------------------------------------*/
761 
762 static int __sm_ll_del_overflow(struct ll_disk *ll, dm_block_t b,
763 				struct inc_context *ic)
764 {
765 	reset_inc_context(ll, ic);
766 	return dm_btree_remove(&ll->ref_count_info, ll->ref_count_root,
767 			       &b, &ll->ref_count_root);
768 }
769 
770 static int __sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
771 				struct inc_context *ic, uint32_t *old_rc)
772 {
773 	int r;
774 	int index = -1;
775 	struct btree_node *n;
776 	__le32 *v_ptr;
777 	uint32_t rc;
778 
779 	reset_inc_context(ll, ic);
780 	r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
781 				     b, &index, &ll->ref_count_root, &ic->overflow_leaf);
782 	if (r < 0)
783 		return r;
784 
785 	n = dm_block_data(ic->overflow_leaf);
786 
787 	if (!contains_key(n, b, index)) {
788 		DMERR("overflow btree is missing an entry");
789 		return -EINVAL;
790 	}
791 
792 	v_ptr = value_ptr(n, index);
793 	rc = le32_to_cpu(*v_ptr);
794 	*old_rc = rc;
795 
796 	if (rc == 3)
797 		return __sm_ll_del_overflow(ll, b, ic);
798 
799 	rc--;
800 	*v_ptr = cpu_to_le32(rc);
801 	return 0;
802 }
803 
804 static int sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
805 			      struct inc_context *ic, uint32_t *old_rc)
806 {
807 	/*
808 	 * Do we already have the correct overflow leaf?
809 	 */
810 	if (ic->overflow_leaf) {
811 		int index;
812 		struct btree_node *n;
813 		__le32 *v_ptr;
814 		uint32_t rc;
815 
816 		n = dm_block_data(ic->overflow_leaf);
817 		index = lower_bound(n, b);
818 		if (contains_key(n, b, index)) {
819 			v_ptr = value_ptr(n, index);
820 			rc = le32_to_cpu(*v_ptr);
821 			*old_rc = rc;
822 
823 			if (rc > 3) {
824 				rc--;
825 				*v_ptr = cpu_to_le32(rc);
826 				return 0;
827 			} else {
828 				return __sm_ll_del_overflow(ll, b, ic);
829 			}
830 
831 		}
832 	}
833 
834 	return __sm_ll_dec_overflow(ll, b, ic, old_rc);
835 }
836 
837 /*
838  * Loops round incrementing entries in a single bitmap.
839  */
840 static inline int sm_ll_dec_bitmap(struct ll_disk *ll, dm_block_t b,
841 				   uint32_t bit, uint32_t bit_end,
842 				   struct inc_context *ic,
843 				   int32_t *nr_allocations, dm_block_t *new_b)
844 {
845 	int r;
846 	uint32_t old;
847 
848 	for (; bit != bit_end; bit++, b++) {
849 		/*
850 		 * We only need to drop the bitmap if we need to find a new btree
851 		 * leaf for the overflow.  So if it was dropped last iteration,
852 		 * we now re-get it.
853 		 */
854 		r = ensure_bitmap(ll, ic);
855 		if (r)
856 			return r;
857 
858 		old = sm_lookup_bitmap(ic->bitmap, bit);
859 		switch (old) {
860 		case 0:
861 			DMERR("unable to decrement block");
862 			return -EINVAL;
863 
864 		case 1:
865 			/* dec bitmap */
866 			sm_set_bitmap(ic->bitmap, bit, 0);
867 			(*nr_allocations)--;
868 			ll->nr_allocated--;
869 			le32_add_cpu(&ic->ie_disk.nr_free, 1);
870 			ic->ie_disk.none_free_before =
871 				cpu_to_le32(min(le32_to_cpu(ic->ie_disk.none_free_before), bit));
872 			break;
873 
874 		case 2:
875 			/* dec bitmap and insert into overflow */
876 			sm_set_bitmap(ic->bitmap, bit, 1);
877 			break;
878 
879 		case 3:
880 			r = sm_ll_dec_overflow(ll, b, ic, &old);
881 			if (r < 0)
882 				return r;
883 
884 			if (old == 3) {
885 				r = ensure_bitmap(ll, ic);
886 				if (r)
887 					return r;
888 
889 				sm_set_bitmap(ic->bitmap, bit, 2);
890 			}
891 			break;
892 		}
893 	}
894 
895 	*new_b = b;
896 	return 0;
897 }
898 
899 static int __sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
900 		       int32_t *nr_allocations, dm_block_t *new_b)
901 {
902 	int r;
903 	uint32_t bit, bit_end;
904 	struct inc_context ic;
905 	dm_block_t index = b;
906 
907 	init_inc_context(&ic);
908 
909 	bit = do_div(index, ll->entries_per_block);
910 	r = ll->load_ie(ll, index, &ic.ie_disk);
911 	if (r < 0)
912 		return r;
913 
914 	r = shadow_bitmap(ll, &ic);
915 	if (r)
916 		return r;
917 
918 	bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
919 	r = sm_ll_dec_bitmap(ll, b, bit, bit_end, &ic, nr_allocations, new_b);
920 	exit_inc_context(ll, &ic);
921 
922 	if (r)
923 		return r;
924 
925 	return ll->save_ie(ll, index, &ic.ie_disk);
926 }
927 
928 int sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
929 	      int32_t *nr_allocations)
930 {
931 	*nr_allocations = 0;
932 	while (b != e) {
933 		int r = __sm_ll_dec(ll, b, e, nr_allocations, &b);
934 
935 		if (r)
936 			return r;
937 	}
938 
939 	return 0;
940 }
941 
942 /*----------------------------------------------------------------*/
943 
944 int sm_ll_commit(struct ll_disk *ll)
945 {
946 	int r = 0;
947 
948 	if (ll->bitmap_index_changed) {
949 		r = ll->commit(ll);
950 		if (!r)
951 			ll->bitmap_index_changed = false;
952 	}
953 
954 	return r;
955 }
956 
957 /*----------------------------------------------------------------*/
958 
959 static int metadata_ll_load_ie(struct ll_disk *ll, dm_block_t index,
960 			       struct disk_index_entry *ie)
961 {
962 	memcpy(ie, ll->mi_le.index + index, sizeof(*ie));
963 	return 0;
964 }
965 
966 static int metadata_ll_save_ie(struct ll_disk *ll, dm_block_t index,
967 			       struct disk_index_entry *ie)
968 {
969 	ll->bitmap_index_changed = true;
970 	memcpy(ll->mi_le.index + index, ie, sizeof(*ie));
971 	return 0;
972 }
973 
974 static int metadata_ll_init_index(struct ll_disk *ll)
975 {
976 	int r;
977 	struct dm_block *b;
978 
979 	r = dm_tm_new_block(ll->tm, &index_validator, &b);
980 	if (r < 0)
981 		return r;
982 
983 	ll->bitmap_root = dm_block_location(b);
984 
985 	dm_tm_unlock(ll->tm, b);
986 
987 	return 0;
988 }
989 
990 static int metadata_ll_open(struct ll_disk *ll)
991 {
992 	int r;
993 	struct dm_block *block;
994 
995 	r = dm_tm_read_lock(ll->tm, ll->bitmap_root,
996 			    &index_validator, &block);
997 	if (r)
998 		return r;
999 
1000 	memcpy(&ll->mi_le, dm_block_data(block), sizeof(ll->mi_le));
1001 	dm_tm_unlock(ll->tm, block);
1002 
1003 	return 0;
1004 }
1005 
1006 static dm_block_t metadata_ll_max_entries(struct ll_disk *ll)
1007 {
1008 	return MAX_METADATA_BITMAPS;
1009 }
1010 
1011 static int metadata_ll_commit(struct ll_disk *ll)
1012 {
1013 	int r, inc;
1014 	struct dm_block *b;
1015 
1016 	r = dm_tm_shadow_block(ll->tm, ll->bitmap_root, &index_validator, &b, &inc);
1017 	if (r)
1018 		return r;
1019 
1020 	memcpy(dm_block_data(b), &ll->mi_le, sizeof(ll->mi_le));
1021 	ll->bitmap_root = dm_block_location(b);
1022 
1023 	dm_tm_unlock(ll->tm, b);
1024 
1025 	return 0;
1026 }
1027 
1028 int sm_ll_new_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm)
1029 {
1030 	int r;
1031 
1032 	r = sm_ll_init(ll, tm);
1033 	if (r < 0)
1034 		return r;
1035 
1036 	ll->load_ie = metadata_ll_load_ie;
1037 	ll->save_ie = metadata_ll_save_ie;
1038 	ll->init_index = metadata_ll_init_index;
1039 	ll->open_index = metadata_ll_open;
1040 	ll->max_entries = metadata_ll_max_entries;
1041 	ll->commit = metadata_ll_commit;
1042 
1043 	ll->nr_blocks = 0;
1044 	ll->nr_allocated = 0;
1045 
1046 	r = ll->init_index(ll);
1047 	if (r < 0)
1048 		return r;
1049 
1050 	r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root);
1051 	if (r < 0)
1052 		return r;
1053 
1054 	return 0;
1055 }
1056 
1057 int sm_ll_open_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm,
1058 			void *root_le, size_t len)
1059 {
1060 	int r;
1061 	struct disk_sm_root smr;
1062 
1063 	if (len < sizeof(struct disk_sm_root)) {
1064 		DMERR("sm_metadata root too small");
1065 		return -ENOMEM;
1066 	}
1067 
1068 	/*
1069 	 * We don't know the alignment of the root_le buffer, so need to
1070 	 * copy into a new structure.
1071 	 */
1072 	memcpy(&smr, root_le, sizeof(smr));
1073 
1074 	r = sm_ll_init(ll, tm);
1075 	if (r < 0)
1076 		return r;
1077 
1078 	ll->load_ie = metadata_ll_load_ie;
1079 	ll->save_ie = metadata_ll_save_ie;
1080 	ll->init_index = metadata_ll_init_index;
1081 	ll->open_index = metadata_ll_open;
1082 	ll->max_entries = metadata_ll_max_entries;
1083 	ll->commit = metadata_ll_commit;
1084 
1085 	ll->nr_blocks = le64_to_cpu(smr.nr_blocks);
1086 	ll->nr_allocated = le64_to_cpu(smr.nr_allocated);
1087 	ll->bitmap_root = le64_to_cpu(smr.bitmap_root);
1088 	ll->ref_count_root = le64_to_cpu(smr.ref_count_root);
1089 
1090 	return ll->open_index(ll);
1091 }
1092 
1093 /*----------------------------------------------------------------*/
1094 
1095 static inline int ie_cache_writeback(struct ll_disk *ll, struct ie_cache *iec)
1096 {
1097 	iec->dirty = false;
1098 	__dm_bless_for_disk(iec->ie);
1099 	return dm_btree_insert(&ll->bitmap_info, ll->bitmap_root,
1100 			       &iec->index, &iec->ie, &ll->bitmap_root);
1101 }
1102 
1103 static inline unsigned int hash_index(dm_block_t index)
1104 {
1105 	return dm_hash_block(index, IE_CACHE_MASK);
1106 }
1107 
1108 static int disk_ll_load_ie(struct ll_disk *ll, dm_block_t index,
1109 			   struct disk_index_entry *ie)
1110 {
1111 	int r;
1112 	unsigned int h = hash_index(index);
1113 	struct ie_cache *iec = ll->ie_cache + h;
1114 
1115 	if (iec->valid) {
1116 		if (iec->index == index) {
1117 			memcpy(ie, &iec->ie, sizeof(*ie));
1118 			return 0;
1119 		}
1120 
1121 		if (iec->dirty) {
1122 			r = ie_cache_writeback(ll, iec);
1123 			if (r)
1124 				return r;
1125 		}
1126 	}
1127 
1128 	r = dm_btree_lookup(&ll->bitmap_info, ll->bitmap_root, &index, ie);
1129 	if (!r) {
1130 		iec->valid = true;
1131 		iec->dirty = false;
1132 		iec->index = index;
1133 		memcpy(&iec->ie, ie, sizeof(*ie));
1134 	}
1135 
1136 	return r;
1137 }
1138 
1139 static int disk_ll_save_ie(struct ll_disk *ll, dm_block_t index,
1140 			   struct disk_index_entry *ie)
1141 {
1142 	int r;
1143 	unsigned int h = hash_index(index);
1144 	struct ie_cache *iec = ll->ie_cache + h;
1145 
1146 	ll->bitmap_index_changed = true;
1147 	if (iec->valid) {
1148 		if (iec->index == index) {
1149 			memcpy(&iec->ie, ie, sizeof(*ie));
1150 			iec->dirty = true;
1151 			return 0;
1152 		}
1153 
1154 		if (iec->dirty) {
1155 			r = ie_cache_writeback(ll, iec);
1156 			if (r)
1157 				return r;
1158 		}
1159 	}
1160 
1161 	iec->valid = true;
1162 	iec->dirty = true;
1163 	iec->index = index;
1164 	memcpy(&iec->ie, ie, sizeof(*ie));
1165 	return 0;
1166 }
1167 
1168 static int disk_ll_init_index(struct ll_disk *ll)
1169 {
1170 	unsigned int i;
1171 
1172 	for (i = 0; i < IE_CACHE_SIZE; i++) {
1173 		struct ie_cache *iec = ll->ie_cache + i;
1174 
1175 		iec->valid = false;
1176 		iec->dirty = false;
1177 	}
1178 	return dm_btree_empty(&ll->bitmap_info, &ll->bitmap_root);
1179 }
1180 
1181 static int disk_ll_open(struct ll_disk *ll)
1182 {
1183 	return 0;
1184 }
1185 
1186 static dm_block_t disk_ll_max_entries(struct ll_disk *ll)
1187 {
1188 	return -1ULL;
1189 }
1190 
1191 static int disk_ll_commit(struct ll_disk *ll)
1192 {
1193 	int r = 0;
1194 	unsigned int i;
1195 
1196 	for (i = 0; i < IE_CACHE_SIZE; i++) {
1197 		struct ie_cache *iec = ll->ie_cache + i;
1198 
1199 		if (iec->valid && iec->dirty)
1200 			r = ie_cache_writeback(ll, iec);
1201 	}
1202 
1203 	return r;
1204 }
1205 
1206 int sm_ll_new_disk(struct ll_disk *ll, struct dm_transaction_manager *tm)
1207 {
1208 	int r;
1209 
1210 	r = sm_ll_init(ll, tm);
1211 	if (r < 0)
1212 		return r;
1213 
1214 	ll->load_ie = disk_ll_load_ie;
1215 	ll->save_ie = disk_ll_save_ie;
1216 	ll->init_index = disk_ll_init_index;
1217 	ll->open_index = disk_ll_open;
1218 	ll->max_entries = disk_ll_max_entries;
1219 	ll->commit = disk_ll_commit;
1220 
1221 	ll->nr_blocks = 0;
1222 	ll->nr_allocated = 0;
1223 
1224 	r = ll->init_index(ll);
1225 	if (r < 0)
1226 		return r;
1227 
1228 	r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root);
1229 	if (r < 0)
1230 		return r;
1231 
1232 	return 0;
1233 }
1234 
1235 int sm_ll_open_disk(struct ll_disk *ll, struct dm_transaction_manager *tm,
1236 		    void *root_le, size_t len)
1237 {
1238 	int r;
1239 	struct disk_sm_root *smr = root_le;
1240 
1241 	if (len < sizeof(struct disk_sm_root)) {
1242 		DMERR("sm_metadata root too small");
1243 		return -ENOMEM;
1244 	}
1245 
1246 	r = sm_ll_init(ll, tm);
1247 	if (r < 0)
1248 		return r;
1249 
1250 	ll->load_ie = disk_ll_load_ie;
1251 	ll->save_ie = disk_ll_save_ie;
1252 	ll->init_index = disk_ll_init_index;
1253 	ll->open_index = disk_ll_open;
1254 	ll->max_entries = disk_ll_max_entries;
1255 	ll->commit = disk_ll_commit;
1256 
1257 	ll->nr_blocks = le64_to_cpu(smr->nr_blocks);
1258 	ll->nr_allocated = le64_to_cpu(smr->nr_allocated);
1259 	ll->bitmap_root = le64_to_cpu(smr->bitmap_root);
1260 	ll->ref_count_root = le64_to_cpu(smr->ref_count_root);
1261 
1262 	return ll->open_index(ll);
1263 }
1264 
1265 /*----------------------------------------------------------------*/
1266