xref: /openbmc/linux/drivers/md/persistent-data/dm-space-map-common.c (revision c83eeec79ff64f777cbd59a8bd15d0a3fe1f92c0)
1 /*
2  * Copyright (C) 2011 Red Hat, Inc.
3  *
4  * This file is released under the GPL.
5  */
6 
7 #include "dm-space-map-common.h"
8 #include "dm-transaction-manager.h"
9 #include "dm-btree-internal.h"
10 #include "dm-persistent-data-internal.h"
11 
12 #include <linux/bitops.h>
13 #include <linux/device-mapper.h>
14 
15 #define DM_MSG_PREFIX "space map common"
16 
17 /*----------------------------------------------------------------*/
18 
19 /*
20  * Index validator.
21  */
22 #define INDEX_CSUM_XOR 160478
23 
24 static void index_prepare_for_write(struct dm_block_validator *v,
25 				    struct dm_block *b,
26 				    size_t block_size)
27 {
28 	struct disk_metadata_index *mi_le = dm_block_data(b);
29 
30 	mi_le->blocknr = cpu_to_le64(dm_block_location(b));
31 	mi_le->csum = cpu_to_le32(dm_bm_checksum(&mi_le->padding,
32 						 block_size - sizeof(__le32),
33 						 INDEX_CSUM_XOR));
34 }
35 
36 static int index_check(struct dm_block_validator *v,
37 		       struct dm_block *b,
38 		       size_t block_size)
39 {
40 	struct disk_metadata_index *mi_le = dm_block_data(b);
41 	__le32 csum_disk;
42 
43 	if (dm_block_location(b) != le64_to_cpu(mi_le->blocknr)) {
44 		DMERR_LIMIT("index_check failed: blocknr %llu != wanted %llu",
45 			    le64_to_cpu(mi_le->blocknr), dm_block_location(b));
46 		return -ENOTBLK;
47 	}
48 
49 	csum_disk = cpu_to_le32(dm_bm_checksum(&mi_le->padding,
50 					       block_size - sizeof(__le32),
51 					       INDEX_CSUM_XOR));
52 	if (csum_disk != mi_le->csum) {
53 		DMERR_LIMIT("index_check failed: csum %u != wanted %u",
54 			    le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum));
55 		return -EILSEQ;
56 	}
57 
58 	return 0;
59 }
60 
61 static struct dm_block_validator index_validator = {
62 	.name = "index",
63 	.prepare_for_write = index_prepare_for_write,
64 	.check = index_check
65 };
66 
67 /*----------------------------------------------------------------*/
68 
69 /*
70  * Bitmap validator
71  */
72 #define BITMAP_CSUM_XOR 240779
73 
74 static void dm_bitmap_prepare_for_write(struct dm_block_validator *v,
75 					struct dm_block *b,
76 					size_t block_size)
77 {
78 	struct disk_bitmap_header *disk_header = dm_block_data(b);
79 
80 	disk_header->blocknr = cpu_to_le64(dm_block_location(b));
81 	disk_header->csum = cpu_to_le32(dm_bm_checksum(&disk_header->not_used,
82 						       block_size - sizeof(__le32),
83 						       BITMAP_CSUM_XOR));
84 }
85 
86 static int dm_bitmap_check(struct dm_block_validator *v,
87 			   struct dm_block *b,
88 			   size_t block_size)
89 {
90 	struct disk_bitmap_header *disk_header = dm_block_data(b);
91 	__le32 csum_disk;
92 
93 	if (dm_block_location(b) != le64_to_cpu(disk_header->blocknr)) {
94 		DMERR_LIMIT("bitmap check failed: blocknr %llu != wanted %llu",
95 			    le64_to_cpu(disk_header->blocknr), dm_block_location(b));
96 		return -ENOTBLK;
97 	}
98 
99 	csum_disk = cpu_to_le32(dm_bm_checksum(&disk_header->not_used,
100 					       block_size - sizeof(__le32),
101 					       BITMAP_CSUM_XOR));
102 	if (csum_disk != disk_header->csum) {
103 		DMERR_LIMIT("bitmap check failed: csum %u != wanted %u",
104 			    le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum));
105 		return -EILSEQ;
106 	}
107 
108 	return 0;
109 }
110 
111 static struct dm_block_validator dm_sm_bitmap_validator = {
112 	.name = "sm_bitmap",
113 	.prepare_for_write = dm_bitmap_prepare_for_write,
114 	.check = dm_bitmap_check,
115 };
116 
117 /*----------------------------------------------------------------*/
118 
119 #define ENTRIES_PER_WORD 32
120 #define ENTRIES_SHIFT	5
121 
122 static void *dm_bitmap_data(struct dm_block *b)
123 {
124 	return dm_block_data(b) + sizeof(struct disk_bitmap_header);
125 }
126 
127 #define WORD_MASK_HIGH 0xAAAAAAAAAAAAAAAAULL
128 
129 static unsigned dm_bitmap_word_used(void *addr, unsigned b)
130 {
131 	__le64 *words_le = addr;
132 	__le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
133 
134 	uint64_t bits = le64_to_cpu(*w_le);
135 	uint64_t mask = (bits + WORD_MASK_HIGH + 1) & WORD_MASK_HIGH;
136 
137 	return !(~bits & mask);
138 }
139 
140 static unsigned sm_lookup_bitmap(void *addr, unsigned b)
141 {
142 	__le64 *words_le = addr;
143 	__le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
144 	unsigned hi, lo;
145 
146 	b = (b & (ENTRIES_PER_WORD - 1)) << 1;
147 	hi = !!test_bit_le(b, (void *) w_le);
148 	lo = !!test_bit_le(b + 1, (void *) w_le);
149 	return (hi << 1) | lo;
150 }
151 
152 static void sm_set_bitmap(void *addr, unsigned b, unsigned val)
153 {
154 	__le64 *words_le = addr;
155 	__le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
156 
157 	b = (b & (ENTRIES_PER_WORD - 1)) << 1;
158 
159 	if (val & 2)
160 		__set_bit_le(b, (void *) w_le);
161 	else
162 		__clear_bit_le(b, (void *) w_le);
163 
164 	if (val & 1)
165 		__set_bit_le(b + 1, (void *) w_le);
166 	else
167 		__clear_bit_le(b + 1, (void *) w_le);
168 }
169 
170 static int sm_find_free(void *addr, unsigned begin, unsigned end,
171 			unsigned *result)
172 {
173 	while (begin < end) {
174 		if (!(begin & (ENTRIES_PER_WORD - 1)) &&
175 		    dm_bitmap_word_used(addr, begin)) {
176 			begin += ENTRIES_PER_WORD;
177 			continue;
178 		}
179 
180 		if (!sm_lookup_bitmap(addr, begin)) {
181 			*result = begin;
182 			return 0;
183 		}
184 
185 		begin++;
186 	}
187 
188 	return -ENOSPC;
189 }
190 
191 /*----------------------------------------------------------------*/
192 
193 static int sm_ll_init(struct ll_disk *ll, struct dm_transaction_manager *tm)
194 {
195 	memset(ll, 0, sizeof(struct ll_disk));
196 
197 	ll->tm = tm;
198 
199 	ll->bitmap_info.tm = tm;
200 	ll->bitmap_info.levels = 1;
201 
202 	/*
203 	 * Because the new bitmap blocks are created via a shadow
204 	 * operation, the old entry has already had its reference count
205 	 * decremented and we don't need the btree to do any bookkeeping.
206 	 */
207 	ll->bitmap_info.value_type.size = sizeof(struct disk_index_entry);
208 	ll->bitmap_info.value_type.inc = NULL;
209 	ll->bitmap_info.value_type.dec = NULL;
210 	ll->bitmap_info.value_type.equal = NULL;
211 
212 	ll->ref_count_info.tm = tm;
213 	ll->ref_count_info.levels = 1;
214 	ll->ref_count_info.value_type.size = sizeof(uint32_t);
215 	ll->ref_count_info.value_type.inc = NULL;
216 	ll->ref_count_info.value_type.dec = NULL;
217 	ll->ref_count_info.value_type.equal = NULL;
218 
219 	ll->block_size = dm_bm_block_size(dm_tm_get_bm(tm));
220 
221 	if (ll->block_size > (1 << 30)) {
222 		DMERR("block size too big to hold bitmaps");
223 		return -EINVAL;
224 	}
225 
226 	ll->entries_per_block = (ll->block_size - sizeof(struct disk_bitmap_header)) *
227 		ENTRIES_PER_BYTE;
228 	ll->nr_blocks = 0;
229 	ll->bitmap_root = 0;
230 	ll->ref_count_root = 0;
231 	ll->bitmap_index_changed = false;
232 
233 	return 0;
234 }
235 
236 int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks)
237 {
238 	int r;
239 	dm_block_t i, nr_blocks, nr_indexes;
240 	unsigned old_blocks, blocks;
241 
242 	nr_blocks = ll->nr_blocks + extra_blocks;
243 	old_blocks = dm_sector_div_up(ll->nr_blocks, ll->entries_per_block);
244 	blocks = dm_sector_div_up(nr_blocks, ll->entries_per_block);
245 
246 	nr_indexes = dm_sector_div_up(nr_blocks, ll->entries_per_block);
247 	if (nr_indexes > ll->max_entries(ll)) {
248 		DMERR("space map too large");
249 		return -EINVAL;
250 	}
251 
252 	/*
253 	 * We need to set this before the dm_tm_new_block() call below.
254 	 */
255 	ll->nr_blocks = nr_blocks;
256 	for (i = old_blocks; i < blocks; i++) {
257 		struct dm_block *b;
258 		struct disk_index_entry idx;
259 
260 		r = dm_tm_new_block(ll->tm, &dm_sm_bitmap_validator, &b);
261 		if (r < 0)
262 			return r;
263 
264 		idx.blocknr = cpu_to_le64(dm_block_location(b));
265 
266 		dm_tm_unlock(ll->tm, b);
267 
268 		idx.nr_free = cpu_to_le32(ll->entries_per_block);
269 		idx.none_free_before = 0;
270 
271 		r = ll->save_ie(ll, i, &idx);
272 		if (r < 0)
273 			return r;
274 	}
275 
276 	return 0;
277 }
278 
279 int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result)
280 {
281 	int r;
282 	dm_block_t index = b;
283 	struct disk_index_entry ie_disk;
284 	struct dm_block *blk;
285 
286 	b = do_div(index, ll->entries_per_block);
287 	r = ll->load_ie(ll, index, &ie_disk);
288 	if (r < 0)
289 		return r;
290 
291 	r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr),
292 			    &dm_sm_bitmap_validator, &blk);
293 	if (r < 0)
294 		return r;
295 
296 	*result = sm_lookup_bitmap(dm_bitmap_data(blk), b);
297 
298 	dm_tm_unlock(ll->tm, blk);
299 
300 	return 0;
301 }
302 
303 static int sm_ll_lookup_big_ref_count(struct ll_disk *ll, dm_block_t b,
304 				      uint32_t *result)
305 {
306 	__le32 le_rc;
307 	int r;
308 
309 	r = dm_btree_lookup(&ll->ref_count_info, ll->ref_count_root, &b, &le_rc);
310 	if (r < 0)
311 		return r;
312 
313 	*result = le32_to_cpu(le_rc);
314 
315 	return r;
316 }
317 
318 int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result)
319 {
320 	int r = sm_ll_lookup_bitmap(ll, b, result);
321 
322 	if (r)
323 		return r;
324 
325 	if (*result != 3)
326 		return r;
327 
328 	return sm_ll_lookup_big_ref_count(ll, b, result);
329 }
330 
331 int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
332 			  dm_block_t end, dm_block_t *result)
333 {
334 	int r;
335 	struct disk_index_entry ie_disk;
336 	dm_block_t i, index_begin = begin;
337 	dm_block_t index_end = dm_sector_div_up(end, ll->entries_per_block);
338 
339 	/*
340 	 * FIXME: Use shifts
341 	 */
342 	begin = do_div(index_begin, ll->entries_per_block);
343 	end = do_div(end, ll->entries_per_block);
344 	if (end == 0)
345 		end = ll->entries_per_block;
346 
347 	for (i = index_begin; i < index_end; i++, begin = 0) {
348 		struct dm_block *blk;
349 		unsigned position;
350 		uint32_t bit_end;
351 
352 		r = ll->load_ie(ll, i, &ie_disk);
353 		if (r < 0)
354 			return r;
355 
356 		if (le32_to_cpu(ie_disk.nr_free) == 0)
357 			continue;
358 
359 		r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr),
360 				    &dm_sm_bitmap_validator, &blk);
361 		if (r < 0)
362 			return r;
363 
364 		bit_end = (i == index_end - 1) ?  end : ll->entries_per_block;
365 
366 		r = sm_find_free(dm_bitmap_data(blk),
367 				 max_t(unsigned, begin, le32_to_cpu(ie_disk.none_free_before)),
368 				 bit_end, &position);
369 		if (r == -ENOSPC) {
370 			/*
371 			 * This might happen because we started searching
372 			 * part way through the bitmap.
373 			 */
374 			dm_tm_unlock(ll->tm, blk);
375 			continue;
376 		}
377 
378 		dm_tm_unlock(ll->tm, blk);
379 
380 		*result = i * ll->entries_per_block + (dm_block_t) position;
381 		return 0;
382 	}
383 
384 	return -ENOSPC;
385 }
386 
387 int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll,
388 	                         dm_block_t begin, dm_block_t end, dm_block_t *b)
389 {
390 	int r;
391 	uint32_t count;
392 
393 	do {
394 		r = sm_ll_find_free_block(new_ll, begin, new_ll->nr_blocks, b);
395 		if (r)
396 			break;
397 
398 		/* double check this block wasn't used in the old transaction */
399 		if (*b >= old_ll->nr_blocks)
400 			count = 0;
401 		else {
402 			r = sm_ll_lookup(old_ll, *b, &count);
403 			if (r)
404 				break;
405 
406 			if (count)
407 				begin = *b + 1;
408 		}
409 	} while (count);
410 
411 	return r;
412 }
413 
414 /*----------------------------------------------------------------*/
415 
416 int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
417 		 uint32_t ref_count, int32_t *nr_allocations)
418 {
419 	int r;
420 	uint32_t bit, old;
421 	struct dm_block *nb;
422 	dm_block_t index = b;
423 	struct disk_index_entry ie_disk;
424 	void *bm_le;
425 	int inc;
426 
427 	bit = do_div(index, ll->entries_per_block);
428 	r = ll->load_ie(ll, index, &ie_disk);
429 	if (r < 0)
430 		return r;
431 
432 	r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ie_disk.blocknr),
433 			       &dm_sm_bitmap_validator, &nb, &inc);
434 	if (r < 0) {
435 		DMERR("dm_tm_shadow_block() failed");
436 		return r;
437 	}
438 	ie_disk.blocknr = cpu_to_le64(dm_block_location(nb));
439 	bm_le = dm_bitmap_data(nb);
440 
441 	old = sm_lookup_bitmap(bm_le, bit);
442 	if (old > 2) {
443 		r = sm_ll_lookup_big_ref_count(ll, b, &old);
444 		if (r < 0) {
445 			dm_tm_unlock(ll->tm, nb);
446 			return r;
447 		}
448 	}
449 
450 	if (r) {
451 		dm_tm_unlock(ll->tm, nb);
452 		return r;
453 	}
454 
455 	if (ref_count <= 2) {
456 		sm_set_bitmap(bm_le, bit, ref_count);
457 		dm_tm_unlock(ll->tm, nb);
458 
459 		if (old > 2) {
460 			r = dm_btree_remove(&ll->ref_count_info,
461 					    ll->ref_count_root,
462 					    &b, &ll->ref_count_root);
463 			if (r)
464 				return r;
465 		}
466 
467 	} else {
468 		__le32 le_rc = cpu_to_le32(ref_count);
469 
470 		sm_set_bitmap(bm_le, bit, 3);
471 		dm_tm_unlock(ll->tm, nb);
472 
473 		__dm_bless_for_disk(&le_rc);
474 		r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
475 				    &b, &le_rc, &ll->ref_count_root);
476 		if (r < 0) {
477 			DMERR("ref count insert failed");
478 			return r;
479 		}
480 	}
481 
482 	if (ref_count && !old) {
483 		*nr_allocations = 1;
484 		ll->nr_allocated++;
485 		le32_add_cpu(&ie_disk.nr_free, -1);
486 		if (le32_to_cpu(ie_disk.none_free_before) == bit)
487 			ie_disk.none_free_before = cpu_to_le32(bit + 1);
488 
489 	} else if (old && !ref_count) {
490 		*nr_allocations = -1;
491 		ll->nr_allocated--;
492 		le32_add_cpu(&ie_disk.nr_free, 1);
493 		ie_disk.none_free_before = cpu_to_le32(min(le32_to_cpu(ie_disk.none_free_before), bit));
494 	} else
495 		*nr_allocations = 0;
496 
497 	return ll->save_ie(ll, index, &ie_disk);
498 }
499 
500 /*----------------------------------------------------------------*/
501 
502 /*
503  * Holds useful intermediate results for the range based inc and dec
504  * operations.
505  */
506 struct inc_context {
507 	struct disk_index_entry ie_disk;
508 	struct dm_block *bitmap_block;
509 	void *bitmap;
510 
511 	struct dm_block *overflow_leaf;
512 };
513 
514 static inline void init_inc_context(struct inc_context *ic)
515 {
516 	ic->bitmap_block = NULL;
517 	ic->bitmap = NULL;
518 	ic->overflow_leaf = NULL;
519 }
520 
521 static inline void exit_inc_context(struct ll_disk *ll, struct inc_context *ic)
522 {
523 	if (ic->bitmap_block)
524 		dm_tm_unlock(ll->tm, ic->bitmap_block);
525 	if (ic->overflow_leaf)
526 		dm_tm_unlock(ll->tm, ic->overflow_leaf);
527 }
528 
529 static inline void reset_inc_context(struct ll_disk *ll, struct inc_context *ic)
530 {
531 	exit_inc_context(ll, ic);
532 	init_inc_context(ic);
533 }
534 
535 /*
536  * Confirms a btree node contains a particular key at an index.
537  */
538 static bool contains_key(struct btree_node *n, uint64_t key, int index)
539 {
540 	return index >= 0 &&
541 		index < le32_to_cpu(n->header.nr_entries) &&
542 		le64_to_cpu(n->keys[index]) == key;
543 }
544 
545 static int __sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
546 {
547 	int r;
548 	int index;
549 	struct btree_node *n;
550 	__le32 *v_ptr;
551 	uint32_t rc;
552 
553 	/*
554 	 * bitmap_block needs to be unlocked because getting the
555 	 * overflow_leaf may need to allocate, and thus use the space map.
556 	 */
557 	reset_inc_context(ll, ic);
558 
559 	r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
560 				     b, &index, &ll->ref_count_root, &ic->overflow_leaf);
561 	if (r < 0)
562 		return r;
563 
564 	n = dm_block_data(ic->overflow_leaf);
565 
566 	if (!contains_key(n, b, index)) {
567 		DMERR("overflow btree is missing an entry");
568 		return -EINVAL;
569 	}
570 
571 	v_ptr = value_ptr(n, index);
572 	rc = le32_to_cpu(*v_ptr) + 1;
573 	*v_ptr = cpu_to_le32(rc);
574 
575 	return 0;
576 }
577 
578 static int sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
579 {
580 	int index;
581 	struct btree_node *n;
582 	__le32 *v_ptr;
583 	uint32_t rc;
584 
585 	/*
586 	 * Do we already have the correct overflow leaf?
587 	 */
588 	if (ic->overflow_leaf) {
589 		n = dm_block_data(ic->overflow_leaf);
590 		index = lower_bound(n, b);
591 		if (contains_key(n, b, index)) {
592 			v_ptr = value_ptr(n, index);
593 			rc = le32_to_cpu(*v_ptr) + 1;
594 			*v_ptr = cpu_to_le32(rc);
595 
596 			return 0;
597 		}
598 	}
599 
600 	return __sm_ll_inc_overflow(ll, b, ic);
601 }
602 
603 static inline int shadow_bitmap(struct ll_disk *ll, struct inc_context *ic)
604 {
605 	int r, inc;
606 	r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ic->ie_disk.blocknr),
607 			       &dm_sm_bitmap_validator, &ic->bitmap_block, &inc);
608 	if (r < 0) {
609 		DMERR("dm_tm_shadow_block() failed");
610 		return r;
611 	}
612 	ic->ie_disk.blocknr = cpu_to_le64(dm_block_location(ic->bitmap_block));
613 	ic->bitmap = dm_bitmap_data(ic->bitmap_block);
614 	return 0;
615 }
616 
617 /*
618  * Once shadow_bitmap has been called, which always happens at the start of inc/dec,
619  * we can reopen the bitmap with a simple write lock, rather than re calling
620  * dm_tm_shadow_block().
621  */
622 static inline int ensure_bitmap(struct ll_disk *ll, struct inc_context *ic)
623 {
624 	if (!ic->bitmap_block) {
625 		int r = dm_bm_write_lock(dm_tm_get_bm(ll->tm), le64_to_cpu(ic->ie_disk.blocknr),
626 					 &dm_sm_bitmap_validator, &ic->bitmap_block);
627 		if (r) {
628 			DMERR("unable to re-get write lock for bitmap");
629 			return r;
630 		}
631 		ic->bitmap = dm_bitmap_data(ic->bitmap_block);
632 	}
633 
634 	return 0;
635 }
636 
637 /*
638  * Loops round incrementing entries in a single bitmap.
639  */
640 static inline int sm_ll_inc_bitmap(struct ll_disk *ll, dm_block_t b,
641 				   uint32_t bit, uint32_t bit_end,
642 				   int32_t *nr_allocations, dm_block_t *new_b,
643 				   struct inc_context *ic)
644 {
645 	int r;
646 	__le32 le_rc;
647 	uint32_t old;
648 
649 	for (; bit != bit_end; bit++, b++) {
650 		/*
651 		 * We only need to drop the bitmap if we need to find a new btree
652 		 * leaf for the overflow.  So if it was dropped last iteration,
653 		 * we now re-get it.
654 		 */
655 		r = ensure_bitmap(ll, ic);
656 		if (r)
657 			return r;
658 
659 		old = sm_lookup_bitmap(ic->bitmap, bit);
660 		switch (old) {
661 		case 0:
662 			/* inc bitmap, adjust nr_allocated */
663 			sm_set_bitmap(ic->bitmap, bit, 1);
664 			(*nr_allocations)++;
665 			ll->nr_allocated++;
666 			le32_add_cpu(&ic->ie_disk.nr_free, -1);
667 			if (le32_to_cpu(ic->ie_disk.none_free_before) == bit)
668 				ic->ie_disk.none_free_before = cpu_to_le32(bit + 1);
669 			break;
670 
671 		case 1:
672 			/* inc bitmap */
673 			sm_set_bitmap(ic->bitmap, bit, 2);
674 			break;
675 
676 		case 2:
677 			/* inc bitmap and insert into overflow */
678 			sm_set_bitmap(ic->bitmap, bit, 3);
679 			reset_inc_context(ll, ic);
680 
681 			le_rc = cpu_to_le32(3);
682 			__dm_bless_for_disk(&le_rc);
683 			r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
684 					    &b, &le_rc, &ll->ref_count_root);
685 			if (r < 0) {
686 				DMERR("ref count insert failed");
687 				return r;
688 			}
689 			break;
690 
691 		default:
692 			/*
693 			 * inc within the overflow tree only.
694 			 */
695 			r = sm_ll_inc_overflow(ll, b, ic);
696 			if (r < 0)
697 				return r;
698 		}
699 	}
700 
701 	*new_b = b;
702 	return 0;
703 }
704 
705 /*
706  * Finds a bitmap that contains entries in the block range, and increments
707  * them.
708  */
709 static int __sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
710 		       int32_t *nr_allocations, dm_block_t *new_b)
711 {
712 	int r;
713 	struct inc_context ic;
714 	uint32_t bit, bit_end;
715 	dm_block_t index = b;
716 
717 	init_inc_context(&ic);
718 
719 	bit = do_div(index, ll->entries_per_block);
720 	r = ll->load_ie(ll, index, &ic.ie_disk);
721 	if (r < 0)
722 		return r;
723 
724 	r = shadow_bitmap(ll, &ic);
725 	if (r)
726 		return r;
727 
728 	bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
729 	r = sm_ll_inc_bitmap(ll, b, bit, bit_end, nr_allocations, new_b, &ic);
730 
731 	exit_inc_context(ll, &ic);
732 
733 	if (r)
734 		return r;
735 
736 	return ll->save_ie(ll, index, &ic.ie_disk);
737 }
738 
739 int sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
740 	      int32_t *nr_allocations)
741 {
742 	*nr_allocations = 0;
743 	while (b != e) {
744 		int r = __sm_ll_inc(ll, b, e, nr_allocations, &b);
745 		if (r)
746 			return r;
747 	}
748 
749 	return 0;
750 }
751 
752 /*----------------------------------------------------------------*/
753 
754 static int __sm_ll_del_overflow(struct ll_disk *ll, dm_block_t b,
755 				struct inc_context *ic)
756 {
757 	reset_inc_context(ll, ic);
758 	return dm_btree_remove(&ll->ref_count_info, ll->ref_count_root,
759 			       &b, &ll->ref_count_root);
760 }
761 
762 static int __sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
763 				struct inc_context *ic, uint32_t *old_rc)
764 {
765 	int r;
766 	int index = -1;
767 	struct btree_node *n;
768 	__le32 *v_ptr;
769 	uint32_t rc;
770 
771 	reset_inc_context(ll, ic);
772 	r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
773 				     b, &index, &ll->ref_count_root, &ic->overflow_leaf);
774 	if (r < 0)
775 		return r;
776 
777 	n = dm_block_data(ic->overflow_leaf);
778 
779 	if (!contains_key(n, b, index)) {
780 		DMERR("overflow btree is missing an entry");
781 		return -EINVAL;
782 	}
783 
784 	v_ptr = value_ptr(n, index);
785 	rc = le32_to_cpu(*v_ptr);
786 	*old_rc = rc;
787 
788 	if (rc == 3) {
789 		return __sm_ll_del_overflow(ll, b, ic);
790 	} else {
791 		rc--;
792 		*v_ptr = cpu_to_le32(rc);
793 		return 0;
794 	}
795 }
796 
797 static int sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
798 			      struct inc_context *ic, uint32_t *old_rc)
799 {
800 	/*
801 	 * Do we already have the correct overflow leaf?
802 	 */
803 	if (ic->overflow_leaf) {
804 		int index;
805 		struct btree_node *n;
806 		__le32 *v_ptr;
807 		uint32_t rc;
808 
809 		n = dm_block_data(ic->overflow_leaf);
810 		index = lower_bound(n, b);
811 		if (contains_key(n, b, index)) {
812 			v_ptr = value_ptr(n, index);
813 			rc = le32_to_cpu(*v_ptr);
814 			*old_rc = rc;
815 
816 			if (rc > 3) {
817 				rc--;
818 				*v_ptr = cpu_to_le32(rc);
819 				return 0;
820 			} else {
821 				return __sm_ll_del_overflow(ll, b, ic);
822 			}
823 
824 		}
825 	}
826 
827 	return __sm_ll_dec_overflow(ll, b, ic, old_rc);
828 }
829 
830 /*
831  * Loops round incrementing entries in a single bitmap.
832  */
833 static inline int sm_ll_dec_bitmap(struct ll_disk *ll, dm_block_t b,
834 				   uint32_t bit, uint32_t bit_end,
835 				   struct inc_context *ic,
836 				   int32_t *nr_allocations, dm_block_t *new_b)
837 {
838 	int r;
839 	uint32_t old;
840 
841 	for (; bit != bit_end; bit++, b++) {
842 		/*
843 		 * We only need to drop the bitmap if we need to find a new btree
844 		 * leaf for the overflow.  So if it was dropped last iteration,
845 		 * we now re-get it.
846 		 */
847 		r = ensure_bitmap(ll, ic);
848 		if (r)
849 			return r;
850 
851 		old = sm_lookup_bitmap(ic->bitmap, bit);
852 		switch (old) {
853 		case 0:
854 			DMERR("unable to decrement block");
855 			return -EINVAL;
856 
857 		case 1:
858 			/* dec bitmap */
859 			sm_set_bitmap(ic->bitmap, bit, 0);
860 			(*nr_allocations)--;
861 			ll->nr_allocated--;
862 			le32_add_cpu(&ic->ie_disk.nr_free, 1);
863 			ic->ie_disk.none_free_before =
864 				cpu_to_le32(min(le32_to_cpu(ic->ie_disk.none_free_before), bit));
865 			break;
866 
867 		case 2:
868 			/* dec bitmap and insert into overflow */
869 			sm_set_bitmap(ic->bitmap, bit, 1);
870 			break;
871 
872 		case 3:
873 			r = sm_ll_dec_overflow(ll, b, ic, &old);
874 			if (r < 0)
875 				return r;
876 
877 			if (old == 3) {
878 				r = ensure_bitmap(ll, ic);
879 				if (r)
880 					return r;
881 
882 				sm_set_bitmap(ic->bitmap, bit, 2);
883 			}
884 			break;
885 		}
886 	}
887 
888 	*new_b = b;
889 	return 0;
890 }
891 
892 static int __sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
893 		       int32_t *nr_allocations, dm_block_t *new_b)
894 {
895 	int r;
896 	uint32_t bit, bit_end;
897 	struct inc_context ic;
898 	dm_block_t index = b;
899 
900 	init_inc_context(&ic);
901 
902 	bit = do_div(index, ll->entries_per_block);
903 	r = ll->load_ie(ll, index, &ic.ie_disk);
904 	if (r < 0)
905 		return r;
906 
907 	r = shadow_bitmap(ll, &ic);
908 	if (r)
909 		return r;
910 
911 	bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
912 	r = sm_ll_dec_bitmap(ll, b, bit, bit_end, &ic, nr_allocations, new_b);
913 	exit_inc_context(ll, &ic);
914 
915 	if (r)
916 		return r;
917 
918 	return ll->save_ie(ll, index, &ic.ie_disk);
919 }
920 
921 int sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
922 	      int32_t *nr_allocations)
923 {
924 	*nr_allocations = 0;
925 	while (b != e) {
926 		int r = __sm_ll_dec(ll, b, e, nr_allocations, &b);
927 		if (r)
928 			return r;
929 	}
930 
931 	return 0;
932 }
933 
934 /*----------------------------------------------------------------*/
935 
936 int sm_ll_commit(struct ll_disk *ll)
937 {
938 	int r = 0;
939 
940 	if (ll->bitmap_index_changed) {
941 		r = ll->commit(ll);
942 		if (!r)
943 			ll->bitmap_index_changed = false;
944 	}
945 
946 	return r;
947 }
948 
949 /*----------------------------------------------------------------*/
950 
951 static int metadata_ll_load_ie(struct ll_disk *ll, dm_block_t index,
952 			       struct disk_index_entry *ie)
953 {
954 	memcpy(ie, ll->mi_le.index + index, sizeof(*ie));
955 	return 0;
956 }
957 
958 static int metadata_ll_save_ie(struct ll_disk *ll, dm_block_t index,
959 			       struct disk_index_entry *ie)
960 {
961 	ll->bitmap_index_changed = true;
962 	memcpy(ll->mi_le.index + index, ie, sizeof(*ie));
963 	return 0;
964 }
965 
966 static int metadata_ll_init_index(struct ll_disk *ll)
967 {
968 	int r;
969 	struct dm_block *b;
970 
971 	r = dm_tm_new_block(ll->tm, &index_validator, &b);
972 	if (r < 0)
973 		return r;
974 
975 	ll->bitmap_root = dm_block_location(b);
976 
977 	dm_tm_unlock(ll->tm, b);
978 
979 	return 0;
980 }
981 
982 static int metadata_ll_open(struct ll_disk *ll)
983 {
984 	int r;
985 	struct dm_block *block;
986 
987 	r = dm_tm_read_lock(ll->tm, ll->bitmap_root,
988 			    &index_validator, &block);
989 	if (r)
990 		return r;
991 
992 	memcpy(&ll->mi_le, dm_block_data(block), sizeof(ll->mi_le));
993 	dm_tm_unlock(ll->tm, block);
994 
995 	return 0;
996 }
997 
998 static dm_block_t metadata_ll_max_entries(struct ll_disk *ll)
999 {
1000 	return MAX_METADATA_BITMAPS;
1001 }
1002 
1003 static int metadata_ll_commit(struct ll_disk *ll)
1004 {
1005 	int r, inc;
1006 	struct dm_block *b;
1007 
1008 	r = dm_tm_shadow_block(ll->tm, ll->bitmap_root, &index_validator, &b, &inc);
1009 	if (r)
1010 		return r;
1011 
1012 	memcpy(dm_block_data(b), &ll->mi_le, sizeof(ll->mi_le));
1013 	ll->bitmap_root = dm_block_location(b);
1014 
1015 	dm_tm_unlock(ll->tm, b);
1016 
1017 	return 0;
1018 }
1019 
1020 int sm_ll_new_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm)
1021 {
1022 	int r;
1023 
1024 	r = sm_ll_init(ll, tm);
1025 	if (r < 0)
1026 		return r;
1027 
1028 	ll->load_ie = metadata_ll_load_ie;
1029 	ll->save_ie = metadata_ll_save_ie;
1030 	ll->init_index = metadata_ll_init_index;
1031 	ll->open_index = metadata_ll_open;
1032 	ll->max_entries = metadata_ll_max_entries;
1033 	ll->commit = metadata_ll_commit;
1034 
1035 	ll->nr_blocks = 0;
1036 	ll->nr_allocated = 0;
1037 
1038 	r = ll->init_index(ll);
1039 	if (r < 0)
1040 		return r;
1041 
1042 	r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root);
1043 	if (r < 0)
1044 		return r;
1045 
1046 	return 0;
1047 }
1048 
1049 int sm_ll_open_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm,
1050 			void *root_le, size_t len)
1051 {
1052 	int r;
1053 	struct disk_sm_root smr;
1054 
1055 	if (len < sizeof(struct disk_sm_root)) {
1056 		DMERR("sm_metadata root too small");
1057 		return -ENOMEM;
1058 	}
1059 
1060 	/*
1061 	 * We don't know the alignment of the root_le buffer, so need to
1062 	 * copy into a new structure.
1063 	 */
1064 	memcpy(&smr, root_le, sizeof(smr));
1065 
1066 	r = sm_ll_init(ll, tm);
1067 	if (r < 0)
1068 		return r;
1069 
1070 	ll->load_ie = metadata_ll_load_ie;
1071 	ll->save_ie = metadata_ll_save_ie;
1072 	ll->init_index = metadata_ll_init_index;
1073 	ll->open_index = metadata_ll_open;
1074 	ll->max_entries = metadata_ll_max_entries;
1075 	ll->commit = metadata_ll_commit;
1076 
1077 	ll->nr_blocks = le64_to_cpu(smr.nr_blocks);
1078 	ll->nr_allocated = le64_to_cpu(smr.nr_allocated);
1079 	ll->bitmap_root = le64_to_cpu(smr.bitmap_root);
1080 	ll->ref_count_root = le64_to_cpu(smr.ref_count_root);
1081 
1082 	return ll->open_index(ll);
1083 }
1084 
1085 /*----------------------------------------------------------------*/
1086 
1087 static inline int ie_cache_writeback(struct ll_disk *ll, struct ie_cache *iec)
1088 {
1089 	iec->dirty = false;
1090 	__dm_bless_for_disk(iec->ie);
1091 	return dm_btree_insert(&ll->bitmap_info, ll->bitmap_root,
1092 			       &iec->index, &iec->ie, &ll->bitmap_root);
1093 }
1094 
1095 static inline unsigned hash_index(dm_block_t index)
1096 {
1097 	return dm_hash_block(index, IE_CACHE_MASK);
1098 }
1099 
1100 static int disk_ll_load_ie(struct ll_disk *ll, dm_block_t index,
1101 			   struct disk_index_entry *ie)
1102 {
1103 	int r;
1104 	unsigned h = hash_index(index);
1105 	struct ie_cache *iec = ll->ie_cache + h;
1106 
1107 	if (iec->valid) {
1108 		if (iec->index == index) {
1109 			memcpy(ie, &iec->ie, sizeof(*ie));
1110 			return 0;
1111 		}
1112 
1113 		if (iec->dirty) {
1114 			r = ie_cache_writeback(ll, iec);
1115 			if (r)
1116 				return r;
1117 		}
1118 	}
1119 
1120 	r = dm_btree_lookup(&ll->bitmap_info, ll->bitmap_root, &index, ie);
1121 	if (!r) {
1122 		iec->valid = true;
1123 		iec->dirty = false;
1124 		iec->index = index;
1125 		memcpy(&iec->ie, ie, sizeof(*ie));
1126 	}
1127 
1128 	return r;
1129 }
1130 
1131 static int disk_ll_save_ie(struct ll_disk *ll, dm_block_t index,
1132 			   struct disk_index_entry *ie)
1133 {
1134 	int r;
1135 	unsigned h = hash_index(index);
1136 	struct ie_cache *iec = ll->ie_cache + h;
1137 
1138 	ll->bitmap_index_changed = true;
1139 	if (iec->valid) {
1140 		if (iec->index == index) {
1141 			memcpy(&iec->ie, ie, sizeof(*ie));
1142 			iec->dirty = true;
1143 			return 0;
1144 		}
1145 
1146 		if (iec->dirty) {
1147 			r = ie_cache_writeback(ll, iec);
1148 			if (r)
1149 				return r;
1150 		}
1151 	}
1152 
1153 	iec->valid = true;
1154 	iec->dirty = true;
1155 	iec->index = index;
1156 	memcpy(&iec->ie, ie, sizeof(*ie));
1157 	return 0;
1158 }
1159 
1160 static int disk_ll_init_index(struct ll_disk *ll)
1161 {
1162 	unsigned i;
1163 	for (i = 0; i < IE_CACHE_SIZE; i++) {
1164 		struct ie_cache *iec = ll->ie_cache + i;
1165 		iec->valid = false;
1166 		iec->dirty = false;
1167 	}
1168 	return dm_btree_empty(&ll->bitmap_info, &ll->bitmap_root);
1169 }
1170 
1171 static int disk_ll_open(struct ll_disk *ll)
1172 {
1173 	return 0;
1174 }
1175 
1176 static dm_block_t disk_ll_max_entries(struct ll_disk *ll)
1177 {
1178 	return -1ULL;
1179 }
1180 
1181 static int disk_ll_commit(struct ll_disk *ll)
1182 {
1183 	int r = 0;
1184 	unsigned i;
1185 
1186 	for (i = 0; i < IE_CACHE_SIZE; i++) {
1187 		struct ie_cache *iec = ll->ie_cache + i;
1188 		if (iec->valid && iec->dirty)
1189 			r = ie_cache_writeback(ll, iec);
1190 	}
1191 
1192 	return r;
1193 }
1194 
1195 int sm_ll_new_disk(struct ll_disk *ll, struct dm_transaction_manager *tm)
1196 {
1197 	int r;
1198 
1199 	r = sm_ll_init(ll, tm);
1200 	if (r < 0)
1201 		return r;
1202 
1203 	ll->load_ie = disk_ll_load_ie;
1204 	ll->save_ie = disk_ll_save_ie;
1205 	ll->init_index = disk_ll_init_index;
1206 	ll->open_index = disk_ll_open;
1207 	ll->max_entries = disk_ll_max_entries;
1208 	ll->commit = disk_ll_commit;
1209 
1210 	ll->nr_blocks = 0;
1211 	ll->nr_allocated = 0;
1212 
1213 	r = ll->init_index(ll);
1214 	if (r < 0)
1215 		return r;
1216 
1217 	r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root);
1218 	if (r < 0)
1219 		return r;
1220 
1221 	return 0;
1222 }
1223 
1224 int sm_ll_open_disk(struct ll_disk *ll, struct dm_transaction_manager *tm,
1225 		    void *root_le, size_t len)
1226 {
1227 	int r;
1228 	struct disk_sm_root *smr = root_le;
1229 
1230 	if (len < sizeof(struct disk_sm_root)) {
1231 		DMERR("sm_metadata root too small");
1232 		return -ENOMEM;
1233 	}
1234 
1235 	r = sm_ll_init(ll, tm);
1236 	if (r < 0)
1237 		return r;
1238 
1239 	ll->load_ie = disk_ll_load_ie;
1240 	ll->save_ie = disk_ll_save_ie;
1241 	ll->init_index = disk_ll_init_index;
1242 	ll->open_index = disk_ll_open;
1243 	ll->max_entries = disk_ll_max_entries;
1244 	ll->commit = disk_ll_commit;
1245 
1246 	ll->nr_blocks = le64_to_cpu(smr->nr_blocks);
1247 	ll->nr_allocated = le64_to_cpu(smr->nr_allocated);
1248 	ll->bitmap_root = le64_to_cpu(smr->bitmap_root);
1249 	ll->ref_count_root = le64_to_cpu(smr->ref_count_root);
1250 
1251 	return ll->open_index(ll);
1252 }
1253 
1254 /*----------------------------------------------------------------*/
1255