1 /*
2  * Copyright (C) 2012 Red Hat, Inc.
3  *
4  * This file is released under the GPL.
5  */
6 
7 #include "dm-cache-metadata.h"
8 
9 #include "persistent-data/dm-array.h"
10 #include "persistent-data/dm-bitset.h"
11 #include "persistent-data/dm-space-map.h"
12 #include "persistent-data/dm-space-map-disk.h"
13 #include "persistent-data/dm-transaction-manager.h"
14 
15 #include <linux/device-mapper.h>
16 
17 /*----------------------------------------------------------------*/
18 
19 #define DM_MSG_PREFIX   "cache metadata"
20 
21 #define CACHE_SUPERBLOCK_MAGIC 06142003
22 #define CACHE_SUPERBLOCK_LOCATION 0
23 
24 /*
25  * defines a range of metadata versions that this module can handle.
26  */
27 #define MIN_CACHE_VERSION 1
28 #define MAX_CACHE_VERSION 1
29 
30 #define CACHE_METADATA_CACHE_SIZE 64
31 
32 /*
33  *  3 for btree insert +
34  *  2 for btree lookup used within space map
35  */
36 #define CACHE_MAX_CONCURRENT_LOCKS 5
37 #define SPACE_MAP_ROOT_SIZE 128
38 
39 enum superblock_flag_bits {
40 	/* for spotting crashes that would invalidate the dirty bitset */
41 	CLEAN_SHUTDOWN,
42 	/* metadata must be checked using the tools */
43 	NEEDS_CHECK,
44 };
45 
46 /*
47  * Each mapping from cache block -> origin block carries a set of flags.
48  */
49 enum mapping_bits {
50 	/*
51 	 * A valid mapping.  Because we're using an array we clear this
52 	 * flag for an non existant mapping.
53 	 */
54 	M_VALID = 1,
55 
56 	/*
57 	 * The data on the cache is different from that on the origin.
58 	 */
59 	M_DIRTY = 2
60 };
61 
62 struct cache_disk_superblock {
63 	__le32 csum;
64 	__le32 flags;
65 	__le64 blocknr;
66 
67 	__u8 uuid[16];
68 	__le64 magic;
69 	__le32 version;
70 
71 	__u8 policy_name[CACHE_POLICY_NAME_SIZE];
72 	__le32 policy_hint_size;
73 
74 	__u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
75 	__le64 mapping_root;
76 	__le64 hint_root;
77 
78 	__le64 discard_root;
79 	__le64 discard_block_size;
80 	__le64 discard_nr_blocks;
81 
82 	__le32 data_block_size;
83 	__le32 metadata_block_size;
84 	__le32 cache_blocks;
85 
86 	__le32 compat_flags;
87 	__le32 compat_ro_flags;
88 	__le32 incompat_flags;
89 
90 	__le32 read_hits;
91 	__le32 read_misses;
92 	__le32 write_hits;
93 	__le32 write_misses;
94 
95 	__le32 policy_version[CACHE_POLICY_VERSION_SIZE];
96 } __packed;
97 
98 struct dm_cache_metadata {
99 	atomic_t ref_count;
100 	struct list_head list;
101 
102 	struct block_device *bdev;
103 	struct dm_block_manager *bm;
104 	struct dm_space_map *metadata_sm;
105 	struct dm_transaction_manager *tm;
106 
107 	struct dm_array_info info;
108 	struct dm_array_info hint_info;
109 	struct dm_disk_bitset discard_info;
110 
111 	struct rw_semaphore root_lock;
112 	unsigned long flags;
113 	dm_block_t root;
114 	dm_block_t hint_root;
115 	dm_block_t discard_root;
116 
117 	sector_t discard_block_size;
118 	dm_dblock_t discard_nr_blocks;
119 
120 	sector_t data_block_size;
121 	dm_cblock_t cache_blocks;
122 	bool changed:1;
123 	bool clean_when_opened:1;
124 
125 	char policy_name[CACHE_POLICY_NAME_SIZE];
126 	unsigned policy_version[CACHE_POLICY_VERSION_SIZE];
127 	size_t policy_hint_size;
128 	struct dm_cache_statistics stats;
129 
130 	/*
131 	 * Reading the space map root can fail, so we read it into this
132 	 * buffer before the superblock is locked and updated.
133 	 */
134 	__u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
135 
136 	/*
137 	 * Set if a transaction has to be aborted but the attempt to roll
138 	 * back to the previous (good) transaction failed.  The only
139 	 * metadata operation permissible in this state is the closing of
140 	 * the device.
141 	 */
142 	bool fail_io:1;
143 
144 	/*
145 	 * These structures are used when loading metadata.  They're too
146 	 * big to put on the stack.
147 	 */
148 	struct dm_array_cursor mapping_cursor;
149 	struct dm_array_cursor hint_cursor;
150 };
151 
152 /*-------------------------------------------------------------------
153  * superblock validator
154  *-----------------------------------------------------------------*/
155 
156 #define SUPERBLOCK_CSUM_XOR 9031977
157 
158 static void sb_prepare_for_write(struct dm_block_validator *v,
159 				 struct dm_block *b,
160 				 size_t sb_block_size)
161 {
162 	struct cache_disk_superblock *disk_super = dm_block_data(b);
163 
164 	disk_super->blocknr = cpu_to_le64(dm_block_location(b));
165 	disk_super->csum = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
166 						      sb_block_size - sizeof(__le32),
167 						      SUPERBLOCK_CSUM_XOR));
168 }
169 
170 static int check_metadata_version(struct cache_disk_superblock *disk_super)
171 {
172 	uint32_t metadata_version = le32_to_cpu(disk_super->version);
173 	if (metadata_version < MIN_CACHE_VERSION || metadata_version > MAX_CACHE_VERSION) {
174 		DMERR("Cache metadata version %u found, but only versions between %u and %u supported.",
175 		      metadata_version, MIN_CACHE_VERSION, MAX_CACHE_VERSION);
176 		return -EINVAL;
177 	}
178 
179 	return 0;
180 }
181 
182 static int sb_check(struct dm_block_validator *v,
183 		    struct dm_block *b,
184 		    size_t sb_block_size)
185 {
186 	struct cache_disk_superblock *disk_super = dm_block_data(b);
187 	__le32 csum_le;
188 
189 	if (dm_block_location(b) != le64_to_cpu(disk_super->blocknr)) {
190 		DMERR("sb_check failed: blocknr %llu: wanted %llu",
191 		      le64_to_cpu(disk_super->blocknr),
192 		      (unsigned long long)dm_block_location(b));
193 		return -ENOTBLK;
194 	}
195 
196 	if (le64_to_cpu(disk_super->magic) != CACHE_SUPERBLOCK_MAGIC) {
197 		DMERR("sb_check failed: magic %llu: wanted %llu",
198 		      le64_to_cpu(disk_super->magic),
199 		      (unsigned long long)CACHE_SUPERBLOCK_MAGIC);
200 		return -EILSEQ;
201 	}
202 
203 	csum_le = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
204 					     sb_block_size - sizeof(__le32),
205 					     SUPERBLOCK_CSUM_XOR));
206 	if (csum_le != disk_super->csum) {
207 		DMERR("sb_check failed: csum %u: wanted %u",
208 		      le32_to_cpu(csum_le), le32_to_cpu(disk_super->csum));
209 		return -EILSEQ;
210 	}
211 
212 	return check_metadata_version(disk_super);
213 }
214 
215 static struct dm_block_validator sb_validator = {
216 	.name = "superblock",
217 	.prepare_for_write = sb_prepare_for_write,
218 	.check = sb_check
219 };
220 
221 /*----------------------------------------------------------------*/
222 
223 static int superblock_read_lock(struct dm_cache_metadata *cmd,
224 				struct dm_block **sblock)
225 {
226 	return dm_bm_read_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
227 			       &sb_validator, sblock);
228 }
229 
230 static int superblock_lock_zero(struct dm_cache_metadata *cmd,
231 				struct dm_block **sblock)
232 {
233 	return dm_bm_write_lock_zero(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
234 				     &sb_validator, sblock);
235 }
236 
237 static int superblock_lock(struct dm_cache_metadata *cmd,
238 			   struct dm_block **sblock)
239 {
240 	return dm_bm_write_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
241 				&sb_validator, sblock);
242 }
243 
244 /*----------------------------------------------------------------*/
245 
246 static int __superblock_all_zeroes(struct dm_block_manager *bm, bool *result)
247 {
248 	int r;
249 	unsigned i;
250 	struct dm_block *b;
251 	__le64 *data_le, zero = cpu_to_le64(0);
252 	unsigned sb_block_size = dm_bm_block_size(bm) / sizeof(__le64);
253 
254 	/*
255 	 * We can't use a validator here - it may be all zeroes.
256 	 */
257 	r = dm_bm_read_lock(bm, CACHE_SUPERBLOCK_LOCATION, NULL, &b);
258 	if (r)
259 		return r;
260 
261 	data_le = dm_block_data(b);
262 	*result = true;
263 	for (i = 0; i < sb_block_size; i++) {
264 		if (data_le[i] != zero) {
265 			*result = false;
266 			break;
267 		}
268 	}
269 
270 	dm_bm_unlock(b);
271 
272 	return 0;
273 }
274 
275 static void __setup_mapping_info(struct dm_cache_metadata *cmd)
276 {
277 	struct dm_btree_value_type vt;
278 
279 	vt.context = NULL;
280 	vt.size = sizeof(__le64);
281 	vt.inc = NULL;
282 	vt.dec = NULL;
283 	vt.equal = NULL;
284 	dm_array_info_init(&cmd->info, cmd->tm, &vt);
285 
286 	if (cmd->policy_hint_size) {
287 		vt.size = sizeof(__le32);
288 		dm_array_info_init(&cmd->hint_info, cmd->tm, &vt);
289 	}
290 }
291 
292 static int __save_sm_root(struct dm_cache_metadata *cmd)
293 {
294 	int r;
295 	size_t metadata_len;
296 
297 	r = dm_sm_root_size(cmd->metadata_sm, &metadata_len);
298 	if (r < 0)
299 		return r;
300 
301 	return dm_sm_copy_root(cmd->metadata_sm, &cmd->metadata_space_map_root,
302 			       metadata_len);
303 }
304 
305 static void __copy_sm_root(struct dm_cache_metadata *cmd,
306 			   struct cache_disk_superblock *disk_super)
307 {
308 	memcpy(&disk_super->metadata_space_map_root,
309 	       &cmd->metadata_space_map_root,
310 	       sizeof(cmd->metadata_space_map_root));
311 }
312 
313 static int __write_initial_superblock(struct dm_cache_metadata *cmd)
314 {
315 	int r;
316 	struct dm_block *sblock;
317 	struct cache_disk_superblock *disk_super;
318 	sector_t bdev_size = i_size_read(cmd->bdev->bd_inode) >> SECTOR_SHIFT;
319 
320 	/* FIXME: see if we can lose the max sectors limit */
321 	if (bdev_size > DM_CACHE_METADATA_MAX_SECTORS)
322 		bdev_size = DM_CACHE_METADATA_MAX_SECTORS;
323 
324 	r = dm_tm_pre_commit(cmd->tm);
325 	if (r < 0)
326 		return r;
327 
328 	/*
329 	 * dm_sm_copy_root() can fail.  So we need to do it before we start
330 	 * updating the superblock.
331 	 */
332 	r = __save_sm_root(cmd);
333 	if (r)
334 		return r;
335 
336 	r = superblock_lock_zero(cmd, &sblock);
337 	if (r)
338 		return r;
339 
340 	disk_super = dm_block_data(sblock);
341 	disk_super->flags = 0;
342 	memset(disk_super->uuid, 0, sizeof(disk_super->uuid));
343 	disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC);
344 	disk_super->version = cpu_to_le32(MAX_CACHE_VERSION);
345 	memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name));
346 	memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version));
347 	disk_super->policy_hint_size = 0;
348 
349 	__copy_sm_root(cmd, disk_super);
350 
351 	disk_super->mapping_root = cpu_to_le64(cmd->root);
352 	disk_super->hint_root = cpu_to_le64(cmd->hint_root);
353 	disk_super->discard_root = cpu_to_le64(cmd->discard_root);
354 	disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
355 	disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks));
356 	disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE);
357 	disk_super->data_block_size = cpu_to_le32(cmd->data_block_size);
358 	disk_super->cache_blocks = cpu_to_le32(0);
359 
360 	disk_super->read_hits = cpu_to_le32(0);
361 	disk_super->read_misses = cpu_to_le32(0);
362 	disk_super->write_hits = cpu_to_le32(0);
363 	disk_super->write_misses = cpu_to_le32(0);
364 
365 	return dm_tm_commit(cmd->tm, sblock);
366 }
367 
368 static int __format_metadata(struct dm_cache_metadata *cmd)
369 {
370 	int r;
371 
372 	r = dm_tm_create_with_sm(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
373 				 &cmd->tm, &cmd->metadata_sm);
374 	if (r < 0) {
375 		DMERR("tm_create_with_sm failed");
376 		return r;
377 	}
378 
379 	__setup_mapping_info(cmd);
380 
381 	r = dm_array_empty(&cmd->info, &cmd->root);
382 	if (r < 0)
383 		goto bad;
384 
385 	dm_disk_bitset_init(cmd->tm, &cmd->discard_info);
386 
387 	r = dm_bitset_empty(&cmd->discard_info, &cmd->discard_root);
388 	if (r < 0)
389 		goto bad;
390 
391 	cmd->discard_block_size = 0;
392 	cmd->discard_nr_blocks = 0;
393 
394 	r = __write_initial_superblock(cmd);
395 	if (r)
396 		goto bad;
397 
398 	cmd->clean_when_opened = true;
399 	return 0;
400 
401 bad:
402 	dm_tm_destroy(cmd->tm);
403 	dm_sm_destroy(cmd->metadata_sm);
404 
405 	return r;
406 }
407 
408 static int __check_incompat_features(struct cache_disk_superblock *disk_super,
409 				     struct dm_cache_metadata *cmd)
410 {
411 	uint32_t features;
412 
413 	features = le32_to_cpu(disk_super->incompat_flags) & ~DM_CACHE_FEATURE_INCOMPAT_SUPP;
414 	if (features) {
415 		DMERR("could not access metadata due to unsupported optional features (%lx).",
416 		      (unsigned long)features);
417 		return -EINVAL;
418 	}
419 
420 	/*
421 	 * Check for read-only metadata to skip the following RDWR checks.
422 	 */
423 	if (get_disk_ro(cmd->bdev->bd_disk))
424 		return 0;
425 
426 	features = le32_to_cpu(disk_super->compat_ro_flags) & ~DM_CACHE_FEATURE_COMPAT_RO_SUPP;
427 	if (features) {
428 		DMERR("could not access metadata RDWR due to unsupported optional features (%lx).",
429 		      (unsigned long)features);
430 		return -EINVAL;
431 	}
432 
433 	return 0;
434 }
435 
436 static int __open_metadata(struct dm_cache_metadata *cmd)
437 {
438 	int r;
439 	struct dm_block *sblock;
440 	struct cache_disk_superblock *disk_super;
441 	unsigned long sb_flags;
442 
443 	r = superblock_read_lock(cmd, &sblock);
444 	if (r < 0) {
445 		DMERR("couldn't read lock superblock");
446 		return r;
447 	}
448 
449 	disk_super = dm_block_data(sblock);
450 
451 	/* Verify the data block size hasn't changed */
452 	if (le32_to_cpu(disk_super->data_block_size) != cmd->data_block_size) {
453 		DMERR("changing the data block size (from %u to %llu) is not supported",
454 		      le32_to_cpu(disk_super->data_block_size),
455 		      (unsigned long long)cmd->data_block_size);
456 		r = -EINVAL;
457 		goto bad;
458 	}
459 
460 	r = __check_incompat_features(disk_super, cmd);
461 	if (r < 0)
462 		goto bad;
463 
464 	r = dm_tm_open_with_sm(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
465 			       disk_super->metadata_space_map_root,
466 			       sizeof(disk_super->metadata_space_map_root),
467 			       &cmd->tm, &cmd->metadata_sm);
468 	if (r < 0) {
469 		DMERR("tm_open_with_sm failed");
470 		goto bad;
471 	}
472 
473 	__setup_mapping_info(cmd);
474 	dm_disk_bitset_init(cmd->tm, &cmd->discard_info);
475 	sb_flags = le32_to_cpu(disk_super->flags);
476 	cmd->clean_when_opened = test_bit(CLEAN_SHUTDOWN, &sb_flags);
477 	dm_bm_unlock(sblock);
478 
479 	return 0;
480 
481 bad:
482 	dm_bm_unlock(sblock);
483 	return r;
484 }
485 
486 static int __open_or_format_metadata(struct dm_cache_metadata *cmd,
487 				     bool format_device)
488 {
489 	int r;
490 	bool unformatted = false;
491 
492 	r = __superblock_all_zeroes(cmd->bm, &unformatted);
493 	if (r)
494 		return r;
495 
496 	if (unformatted)
497 		return format_device ? __format_metadata(cmd) : -EPERM;
498 
499 	return __open_metadata(cmd);
500 }
501 
502 static int __create_persistent_data_objects(struct dm_cache_metadata *cmd,
503 					    bool may_format_device)
504 {
505 	int r;
506 	cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
507 					  CACHE_METADATA_CACHE_SIZE,
508 					  CACHE_MAX_CONCURRENT_LOCKS);
509 	if (IS_ERR(cmd->bm)) {
510 		DMERR("could not create block manager");
511 		return PTR_ERR(cmd->bm);
512 	}
513 
514 	r = __open_or_format_metadata(cmd, may_format_device);
515 	if (r)
516 		dm_block_manager_destroy(cmd->bm);
517 
518 	return r;
519 }
520 
521 static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd)
522 {
523 	dm_sm_destroy(cmd->metadata_sm);
524 	dm_tm_destroy(cmd->tm);
525 	dm_block_manager_destroy(cmd->bm);
526 }
527 
528 typedef unsigned long (*flags_mutator)(unsigned long);
529 
530 static void update_flags(struct cache_disk_superblock *disk_super,
531 			 flags_mutator mutator)
532 {
533 	uint32_t sb_flags = mutator(le32_to_cpu(disk_super->flags));
534 	disk_super->flags = cpu_to_le32(sb_flags);
535 }
536 
537 static unsigned long set_clean_shutdown(unsigned long flags)
538 {
539 	set_bit(CLEAN_SHUTDOWN, &flags);
540 	return flags;
541 }
542 
543 static unsigned long clear_clean_shutdown(unsigned long flags)
544 {
545 	clear_bit(CLEAN_SHUTDOWN, &flags);
546 	return flags;
547 }
548 
549 static void read_superblock_fields(struct dm_cache_metadata *cmd,
550 				   struct cache_disk_superblock *disk_super)
551 {
552 	cmd->flags = le32_to_cpu(disk_super->flags);
553 	cmd->root = le64_to_cpu(disk_super->mapping_root);
554 	cmd->hint_root = le64_to_cpu(disk_super->hint_root);
555 	cmd->discard_root = le64_to_cpu(disk_super->discard_root);
556 	cmd->discard_block_size = le64_to_cpu(disk_super->discard_block_size);
557 	cmd->discard_nr_blocks = to_dblock(le64_to_cpu(disk_super->discard_nr_blocks));
558 	cmd->data_block_size = le32_to_cpu(disk_super->data_block_size);
559 	cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks));
560 	strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name));
561 	cmd->policy_version[0] = le32_to_cpu(disk_super->policy_version[0]);
562 	cmd->policy_version[1] = le32_to_cpu(disk_super->policy_version[1]);
563 	cmd->policy_version[2] = le32_to_cpu(disk_super->policy_version[2]);
564 	cmd->policy_hint_size = le32_to_cpu(disk_super->policy_hint_size);
565 
566 	cmd->stats.read_hits = le32_to_cpu(disk_super->read_hits);
567 	cmd->stats.read_misses = le32_to_cpu(disk_super->read_misses);
568 	cmd->stats.write_hits = le32_to_cpu(disk_super->write_hits);
569 	cmd->stats.write_misses = le32_to_cpu(disk_super->write_misses);
570 
571 	cmd->changed = false;
572 }
573 
574 /*
575  * The mutator updates the superblock flags.
576  */
577 static int __begin_transaction_flags(struct dm_cache_metadata *cmd,
578 				     flags_mutator mutator)
579 {
580 	int r;
581 	struct cache_disk_superblock *disk_super;
582 	struct dm_block *sblock;
583 
584 	r = superblock_lock(cmd, &sblock);
585 	if (r)
586 		return r;
587 
588 	disk_super = dm_block_data(sblock);
589 	update_flags(disk_super, mutator);
590 	read_superblock_fields(cmd, disk_super);
591 	dm_bm_unlock(sblock);
592 
593 	return dm_bm_flush(cmd->bm);
594 }
595 
596 static int __begin_transaction(struct dm_cache_metadata *cmd)
597 {
598 	int r;
599 	struct cache_disk_superblock *disk_super;
600 	struct dm_block *sblock;
601 
602 	/*
603 	 * We re-read the superblock every time.  Shouldn't need to do this
604 	 * really.
605 	 */
606 	r = superblock_read_lock(cmd, &sblock);
607 	if (r)
608 		return r;
609 
610 	disk_super = dm_block_data(sblock);
611 	read_superblock_fields(cmd, disk_super);
612 	dm_bm_unlock(sblock);
613 
614 	return 0;
615 }
616 
617 static int __commit_transaction(struct dm_cache_metadata *cmd,
618 				flags_mutator mutator)
619 {
620 	int r;
621 	struct cache_disk_superblock *disk_super;
622 	struct dm_block *sblock;
623 
624 	/*
625 	 * We need to know if the cache_disk_superblock exceeds a 512-byte sector.
626 	 */
627 	BUILD_BUG_ON(sizeof(struct cache_disk_superblock) > 512);
628 
629 	r = dm_bitset_flush(&cmd->discard_info, cmd->discard_root,
630 			    &cmd->discard_root);
631 	if (r)
632 		return r;
633 
634 	r = dm_tm_pre_commit(cmd->tm);
635 	if (r < 0)
636 		return r;
637 
638 	r = __save_sm_root(cmd);
639 	if (r)
640 		return r;
641 
642 	r = superblock_lock(cmd, &sblock);
643 	if (r)
644 		return r;
645 
646 	disk_super = dm_block_data(sblock);
647 
648 	disk_super->flags = cpu_to_le32(cmd->flags);
649 	if (mutator)
650 		update_flags(disk_super, mutator);
651 
652 	disk_super->mapping_root = cpu_to_le64(cmd->root);
653 	disk_super->hint_root = cpu_to_le64(cmd->hint_root);
654 	disk_super->discard_root = cpu_to_le64(cmd->discard_root);
655 	disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
656 	disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks));
657 	disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks));
658 	strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name));
659 	disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]);
660 	disk_super->policy_version[1] = cpu_to_le32(cmd->policy_version[1]);
661 	disk_super->policy_version[2] = cpu_to_le32(cmd->policy_version[2]);
662 
663 	disk_super->read_hits = cpu_to_le32(cmd->stats.read_hits);
664 	disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses);
665 	disk_super->write_hits = cpu_to_le32(cmd->stats.write_hits);
666 	disk_super->write_misses = cpu_to_le32(cmd->stats.write_misses);
667 	__copy_sm_root(cmd, disk_super);
668 
669 	return dm_tm_commit(cmd->tm, sblock);
670 }
671 
672 /*----------------------------------------------------------------*/
673 
674 /*
675  * The mappings are held in a dm-array that has 64-bit values stored in
676  * little-endian format.  The index is the cblock, the high 48bits of the
677  * value are the oblock and the low 16 bit the flags.
678  */
679 #define FLAGS_MASK ((1 << 16) - 1)
680 
681 static __le64 pack_value(dm_oblock_t block, unsigned flags)
682 {
683 	uint64_t value = from_oblock(block);
684 	value <<= 16;
685 	value = value | (flags & FLAGS_MASK);
686 	return cpu_to_le64(value);
687 }
688 
689 static void unpack_value(__le64 value_le, dm_oblock_t *block, unsigned *flags)
690 {
691 	uint64_t value = le64_to_cpu(value_le);
692 	uint64_t b = value >> 16;
693 	*block = to_oblock(b);
694 	*flags = value & FLAGS_MASK;
695 }
696 
697 /*----------------------------------------------------------------*/
698 
699 static struct dm_cache_metadata *metadata_open(struct block_device *bdev,
700 					       sector_t data_block_size,
701 					       bool may_format_device,
702 					       size_t policy_hint_size)
703 {
704 	int r;
705 	struct dm_cache_metadata *cmd;
706 
707 	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
708 	if (!cmd) {
709 		DMERR("could not allocate metadata struct");
710 		return ERR_PTR(-ENOMEM);
711 	}
712 
713 	atomic_set(&cmd->ref_count, 1);
714 	init_rwsem(&cmd->root_lock);
715 	cmd->bdev = bdev;
716 	cmd->data_block_size = data_block_size;
717 	cmd->cache_blocks = 0;
718 	cmd->policy_hint_size = policy_hint_size;
719 	cmd->changed = true;
720 	cmd->fail_io = false;
721 
722 	r = __create_persistent_data_objects(cmd, may_format_device);
723 	if (r) {
724 		kfree(cmd);
725 		return ERR_PTR(r);
726 	}
727 
728 	r = __begin_transaction_flags(cmd, clear_clean_shutdown);
729 	if (r < 0) {
730 		dm_cache_metadata_close(cmd);
731 		return ERR_PTR(r);
732 	}
733 
734 	return cmd;
735 }
736 
737 /*
738  * We keep a little list of ref counted metadata objects to prevent two
739  * different target instances creating separate bufio instances.  This is
740  * an issue if a table is reloaded before the suspend.
741  */
742 static DEFINE_MUTEX(table_lock);
743 static LIST_HEAD(table);
744 
745 static struct dm_cache_metadata *lookup(struct block_device *bdev)
746 {
747 	struct dm_cache_metadata *cmd;
748 
749 	list_for_each_entry(cmd, &table, list)
750 		if (cmd->bdev == bdev) {
751 			atomic_inc(&cmd->ref_count);
752 			return cmd;
753 		}
754 
755 	return NULL;
756 }
757 
758 static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev,
759 						sector_t data_block_size,
760 						bool may_format_device,
761 						size_t policy_hint_size)
762 {
763 	struct dm_cache_metadata *cmd, *cmd2;
764 
765 	mutex_lock(&table_lock);
766 	cmd = lookup(bdev);
767 	mutex_unlock(&table_lock);
768 
769 	if (cmd)
770 		return cmd;
771 
772 	cmd = metadata_open(bdev, data_block_size, may_format_device, policy_hint_size);
773 	if (!IS_ERR(cmd)) {
774 		mutex_lock(&table_lock);
775 		cmd2 = lookup(bdev);
776 		if (cmd2) {
777 			mutex_unlock(&table_lock);
778 			__destroy_persistent_data_objects(cmd);
779 			kfree(cmd);
780 			return cmd2;
781 		}
782 		list_add(&cmd->list, &table);
783 		mutex_unlock(&table_lock);
784 	}
785 
786 	return cmd;
787 }
788 
789 static bool same_params(struct dm_cache_metadata *cmd, sector_t data_block_size)
790 {
791 	if (cmd->data_block_size != data_block_size) {
792 		DMERR("data_block_size (%llu) different from that in metadata (%llu)\n",
793 		      (unsigned long long) data_block_size,
794 		      (unsigned long long) cmd->data_block_size);
795 		return false;
796 	}
797 
798 	return true;
799 }
800 
801 struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
802 						 sector_t data_block_size,
803 						 bool may_format_device,
804 						 size_t policy_hint_size)
805 {
806 	struct dm_cache_metadata *cmd = lookup_or_open(bdev, data_block_size,
807 						       may_format_device, policy_hint_size);
808 
809 	if (!IS_ERR(cmd) && !same_params(cmd, data_block_size)) {
810 		dm_cache_metadata_close(cmd);
811 		return ERR_PTR(-EINVAL);
812 	}
813 
814 	return cmd;
815 }
816 
817 void dm_cache_metadata_close(struct dm_cache_metadata *cmd)
818 {
819 	if (atomic_dec_and_test(&cmd->ref_count)) {
820 		mutex_lock(&table_lock);
821 		list_del(&cmd->list);
822 		mutex_unlock(&table_lock);
823 
824 		if (!cmd->fail_io)
825 			__destroy_persistent_data_objects(cmd);
826 		kfree(cmd);
827 	}
828 }
829 
830 /*
831  * Checks that the given cache block is either unmapped or clean.
832  */
833 static int block_unmapped_or_clean(struct dm_cache_metadata *cmd, dm_cblock_t b,
834 				   bool *result)
835 {
836 	int r;
837 	__le64 value;
838 	dm_oblock_t ob;
839 	unsigned flags;
840 
841 	r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(b), &value);
842 	if (r) {
843 		DMERR("block_unmapped_or_clean failed");
844 		return r;
845 	}
846 
847 	unpack_value(value, &ob, &flags);
848 	*result = !((flags & M_VALID) && (flags & M_DIRTY));
849 
850 	return 0;
851 }
852 
853 static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd,
854 					dm_cblock_t begin, dm_cblock_t end,
855 					bool *result)
856 {
857 	int r;
858 	*result = true;
859 
860 	while (begin != end) {
861 		r = block_unmapped_or_clean(cmd, begin, result);
862 		if (r)
863 			return r;
864 
865 		if (!*result) {
866 			DMERR("cache block %llu is dirty",
867 			      (unsigned long long) from_cblock(begin));
868 			return 0;
869 		}
870 
871 		begin = to_cblock(from_cblock(begin) + 1);
872 	}
873 
874 	return 0;
875 }
876 
877 static bool cmd_write_lock(struct dm_cache_metadata *cmd)
878 {
879 	down_write(&cmd->root_lock);
880 	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) {
881 		up_write(&cmd->root_lock);
882 		return false;
883 	}
884 	return true;
885 }
886 
887 #define WRITE_LOCK(cmd)				\
888 	do {					\
889 		if (!cmd_write_lock((cmd)))	\
890 			return -EINVAL;		\
891 	} while(0)
892 
893 #define WRITE_LOCK_VOID(cmd)			\
894 	do {					\
895 		if (!cmd_write_lock((cmd)))	\
896 			return;			\
897 	} while(0)
898 
899 #define WRITE_UNLOCK(cmd) \
900 	up_write(&(cmd)->root_lock)
901 
902 static bool cmd_read_lock(struct dm_cache_metadata *cmd)
903 {
904 	down_read(&cmd->root_lock);
905 	if (cmd->fail_io) {
906 		up_read(&cmd->root_lock);
907 		return false;
908 	}
909 	return true;
910 }
911 
912 #define READ_LOCK(cmd)				\
913 	do {					\
914 		if (!cmd_read_lock((cmd)))	\
915 			return -EINVAL;		\
916 	} while(0)
917 
918 #define READ_LOCK_VOID(cmd)			\
919 	do {					\
920 		if (!cmd_read_lock((cmd)))	\
921 			return;			\
922 	} while(0)
923 
924 #define READ_UNLOCK(cmd) \
925 	up_read(&(cmd)->root_lock)
926 
927 int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size)
928 {
929 	int r;
930 	bool clean;
931 	__le64 null_mapping = pack_value(0, 0);
932 
933 	WRITE_LOCK(cmd);
934 	__dm_bless_for_disk(&null_mapping);
935 
936 	if (from_cblock(new_cache_size) < from_cblock(cmd->cache_blocks)) {
937 		r = blocks_are_unmapped_or_clean(cmd, new_cache_size, cmd->cache_blocks, &clean);
938 		if (r) {
939 			__dm_unbless_for_disk(&null_mapping);
940 			goto out;
941 		}
942 
943 		if (!clean) {
944 			DMERR("unable to shrink cache due to dirty blocks");
945 			r = -EINVAL;
946 			__dm_unbless_for_disk(&null_mapping);
947 			goto out;
948 		}
949 	}
950 
951 	r = dm_array_resize(&cmd->info, cmd->root, from_cblock(cmd->cache_blocks),
952 			    from_cblock(new_cache_size),
953 			    &null_mapping, &cmd->root);
954 	if (!r)
955 		cmd->cache_blocks = new_cache_size;
956 	cmd->changed = true;
957 
958 out:
959 	WRITE_UNLOCK(cmd);
960 
961 	return r;
962 }
963 
964 int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd,
965 				   sector_t discard_block_size,
966 				   dm_dblock_t new_nr_entries)
967 {
968 	int r;
969 
970 	WRITE_LOCK(cmd);
971 	r = dm_bitset_resize(&cmd->discard_info,
972 			     cmd->discard_root,
973 			     from_dblock(cmd->discard_nr_blocks),
974 			     from_dblock(new_nr_entries),
975 			     false, &cmd->discard_root);
976 	if (!r) {
977 		cmd->discard_block_size = discard_block_size;
978 		cmd->discard_nr_blocks = new_nr_entries;
979 	}
980 
981 	cmd->changed = true;
982 	WRITE_UNLOCK(cmd);
983 
984 	return r;
985 }
986 
987 static int __set_discard(struct dm_cache_metadata *cmd, dm_dblock_t b)
988 {
989 	return dm_bitset_set_bit(&cmd->discard_info, cmd->discard_root,
990 				 from_dblock(b), &cmd->discard_root);
991 }
992 
993 static int __clear_discard(struct dm_cache_metadata *cmd, dm_dblock_t b)
994 {
995 	return dm_bitset_clear_bit(&cmd->discard_info, cmd->discard_root,
996 				   from_dblock(b), &cmd->discard_root);
997 }
998 
999 static int __is_discarded(struct dm_cache_metadata *cmd, dm_dblock_t b,
1000 			  bool *is_discarded)
1001 {
1002 	return dm_bitset_test_bit(&cmd->discard_info, cmd->discard_root,
1003 				  from_dblock(b), &cmd->discard_root,
1004 				  is_discarded);
1005 }
1006 
1007 static int __discard(struct dm_cache_metadata *cmd,
1008 		     dm_dblock_t dblock, bool discard)
1009 {
1010 	int r;
1011 
1012 	r = (discard ? __set_discard : __clear_discard)(cmd, dblock);
1013 	if (r)
1014 		return r;
1015 
1016 	cmd->changed = true;
1017 	return 0;
1018 }
1019 
1020 int dm_cache_set_discard(struct dm_cache_metadata *cmd,
1021 			 dm_dblock_t dblock, bool discard)
1022 {
1023 	int r;
1024 
1025 	WRITE_LOCK(cmd);
1026 	r = __discard(cmd, dblock, discard);
1027 	WRITE_UNLOCK(cmd);
1028 
1029 	return r;
1030 }
1031 
1032 static int __load_discards(struct dm_cache_metadata *cmd,
1033 			   load_discard_fn fn, void *context)
1034 {
1035 	int r = 0;
1036 	dm_block_t b;
1037 	bool discard;
1038 
1039 	for (b = 0; b < from_dblock(cmd->discard_nr_blocks); b++) {
1040 		dm_dblock_t dblock = to_dblock(b);
1041 
1042 		if (cmd->clean_when_opened) {
1043 			r = __is_discarded(cmd, dblock, &discard);
1044 			if (r)
1045 				return r;
1046 		} else
1047 			discard = false;
1048 
1049 		r = fn(context, cmd->discard_block_size, dblock, discard);
1050 		if (r)
1051 			break;
1052 	}
1053 
1054 	return r;
1055 }
1056 
1057 int dm_cache_load_discards(struct dm_cache_metadata *cmd,
1058 			   load_discard_fn fn, void *context)
1059 {
1060 	int r;
1061 
1062 	READ_LOCK(cmd);
1063 	r = __load_discards(cmd, fn, context);
1064 	READ_UNLOCK(cmd);
1065 
1066 	return r;
1067 }
1068 
1069 int dm_cache_size(struct dm_cache_metadata *cmd, dm_cblock_t *result)
1070 {
1071 	READ_LOCK(cmd);
1072 	*result = cmd->cache_blocks;
1073 	READ_UNLOCK(cmd);
1074 
1075 	return 0;
1076 }
1077 
1078 static int __remove(struct dm_cache_metadata *cmd, dm_cblock_t cblock)
1079 {
1080 	int r;
1081 	__le64 value = pack_value(0, 0);
1082 
1083 	__dm_bless_for_disk(&value);
1084 	r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
1085 			       &value, &cmd->root);
1086 	if (r)
1087 		return r;
1088 
1089 	cmd->changed = true;
1090 	return 0;
1091 }
1092 
1093 int dm_cache_remove_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock)
1094 {
1095 	int r;
1096 
1097 	WRITE_LOCK(cmd);
1098 	r = __remove(cmd, cblock);
1099 	WRITE_UNLOCK(cmd);
1100 
1101 	return r;
1102 }
1103 
1104 static int __insert(struct dm_cache_metadata *cmd,
1105 		    dm_cblock_t cblock, dm_oblock_t oblock)
1106 {
1107 	int r;
1108 	__le64 value = pack_value(oblock, M_VALID);
1109 	__dm_bless_for_disk(&value);
1110 
1111 	r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
1112 			       &value, &cmd->root);
1113 	if (r)
1114 		return r;
1115 
1116 	cmd->changed = true;
1117 	return 0;
1118 }
1119 
1120 int dm_cache_insert_mapping(struct dm_cache_metadata *cmd,
1121 			    dm_cblock_t cblock, dm_oblock_t oblock)
1122 {
1123 	int r;
1124 
1125 	WRITE_LOCK(cmd);
1126 	r = __insert(cmd, cblock, oblock);
1127 	WRITE_UNLOCK(cmd);
1128 
1129 	return r;
1130 }
1131 
1132 struct thunk {
1133 	load_mapping_fn fn;
1134 	void *context;
1135 
1136 	struct dm_cache_metadata *cmd;
1137 	bool respect_dirty_flags;
1138 	bool hints_valid;
1139 };
1140 
1141 static bool policy_unchanged(struct dm_cache_metadata *cmd,
1142 			     struct dm_cache_policy *policy)
1143 {
1144 	const char *policy_name = dm_cache_policy_get_name(policy);
1145 	const unsigned *policy_version = dm_cache_policy_get_version(policy);
1146 	size_t policy_hint_size = dm_cache_policy_get_hint_size(policy);
1147 
1148 	/*
1149 	 * Ensure policy names match.
1150 	 */
1151 	if (strncmp(cmd->policy_name, policy_name, sizeof(cmd->policy_name)))
1152 		return false;
1153 
1154 	/*
1155 	 * Ensure policy major versions match.
1156 	 */
1157 	if (cmd->policy_version[0] != policy_version[0])
1158 		return false;
1159 
1160 	/*
1161 	 * Ensure policy hint sizes match.
1162 	 */
1163 	if (cmd->policy_hint_size != policy_hint_size)
1164 		return false;
1165 
1166 	return true;
1167 }
1168 
1169 static bool hints_array_initialized(struct dm_cache_metadata *cmd)
1170 {
1171 	return cmd->hint_root && cmd->policy_hint_size;
1172 }
1173 
1174 static bool hints_array_available(struct dm_cache_metadata *cmd,
1175 				  struct dm_cache_policy *policy)
1176 {
1177 	return cmd->clean_when_opened && policy_unchanged(cmd, policy) &&
1178 		hints_array_initialized(cmd);
1179 }
1180 
1181 static int __load_mapping(struct dm_cache_metadata *cmd,
1182 			  uint64_t cb, bool hints_valid,
1183 			  struct dm_array_cursor *mapping_cursor,
1184 			  struct dm_array_cursor *hint_cursor,
1185 			  load_mapping_fn fn, void *context)
1186 {
1187 	int r = 0;
1188 
1189 	__le64 mapping;
1190 	__le32 hint = 0;
1191 
1192 	__le64 *mapping_value_le;
1193 	__le32 *hint_value_le;
1194 
1195 	dm_oblock_t oblock;
1196 	unsigned flags;
1197 
1198 	dm_array_cursor_get_value(mapping_cursor, (void **) &mapping_value_le);
1199 	memcpy(&mapping, mapping_value_le, sizeof(mapping));
1200 	unpack_value(mapping, &oblock, &flags);
1201 
1202 	if (flags & M_VALID) {
1203 		if (hints_valid) {
1204 			dm_array_cursor_get_value(hint_cursor, (void **) &hint_value_le);
1205 			memcpy(&hint, hint_value_le, sizeof(hint));
1206 		}
1207 
1208 		r = fn(context, oblock, to_cblock(cb), flags & M_DIRTY,
1209 		       le32_to_cpu(hint), hints_valid);
1210 		if (r)
1211 			DMERR("policy couldn't load cblock");
1212 	}
1213 
1214 	return r;
1215 }
1216 
1217 static int __load_mappings(struct dm_cache_metadata *cmd,
1218 			   struct dm_cache_policy *policy,
1219 			   load_mapping_fn fn, void *context)
1220 {
1221 	int r;
1222 	uint64_t cb;
1223 
1224 	bool hints_valid = hints_array_available(cmd, policy);
1225 
1226 	if (from_cblock(cmd->cache_blocks) == 0)
1227 		/* Nothing to do */
1228 		return 0;
1229 
1230 	r = dm_array_cursor_begin(&cmd->info, cmd->root, &cmd->mapping_cursor);
1231 	if (r)
1232 		return r;
1233 
1234 	if (hints_valid) {
1235 		r = dm_array_cursor_begin(&cmd->hint_info, cmd->hint_root, &cmd->hint_cursor);
1236 		if (r) {
1237 			dm_array_cursor_end(&cmd->mapping_cursor);
1238 			return r;
1239 		}
1240 	}
1241 
1242 	for (cb = 0; ; cb++) {
1243 		r = __load_mapping(cmd, cb, hints_valid,
1244 				   &cmd->mapping_cursor, &cmd->hint_cursor,
1245 				   fn, context);
1246 		if (r)
1247 			goto out;
1248 
1249 		/*
1250 		 * We need to break out before we move the cursors.
1251 		 */
1252 		if (cb >= (from_cblock(cmd->cache_blocks) - 1))
1253 			break;
1254 
1255 		r = dm_array_cursor_next(&cmd->mapping_cursor);
1256 		if (r) {
1257 			DMERR("dm_array_cursor_next for mapping failed");
1258 			goto out;
1259 		}
1260 
1261 		if (hints_valid) {
1262 			r = dm_array_cursor_next(&cmd->hint_cursor);
1263 			if (r) {
1264 				DMERR("dm_array_cursor_next for hint failed");
1265 				goto out;
1266 			}
1267 		}
1268 	}
1269 out:
1270 	dm_array_cursor_end(&cmd->mapping_cursor);
1271 	if (hints_valid)
1272 		dm_array_cursor_end(&cmd->hint_cursor);
1273 
1274 	return r;
1275 }
1276 
1277 int dm_cache_load_mappings(struct dm_cache_metadata *cmd,
1278 			   struct dm_cache_policy *policy,
1279 			   load_mapping_fn fn, void *context)
1280 {
1281 	int r;
1282 
1283 	READ_LOCK(cmd);
1284 	r = __load_mappings(cmd, policy, fn, context);
1285 	READ_UNLOCK(cmd);
1286 
1287 	return r;
1288 }
1289 
1290 static int __dump_mapping(void *context, uint64_t cblock, void *leaf)
1291 {
1292 	int r = 0;
1293 	__le64 value;
1294 	dm_oblock_t oblock;
1295 	unsigned flags;
1296 
1297 	memcpy(&value, leaf, sizeof(value));
1298 	unpack_value(value, &oblock, &flags);
1299 
1300 	return r;
1301 }
1302 
1303 static int __dump_mappings(struct dm_cache_metadata *cmd)
1304 {
1305 	return dm_array_walk(&cmd->info, cmd->root, __dump_mapping, NULL);
1306 }
1307 
1308 void dm_cache_dump(struct dm_cache_metadata *cmd)
1309 {
1310 	READ_LOCK_VOID(cmd);
1311 	__dump_mappings(cmd);
1312 	READ_UNLOCK(cmd);
1313 }
1314 
1315 int dm_cache_changed_this_transaction(struct dm_cache_metadata *cmd)
1316 {
1317 	int r;
1318 
1319 	READ_LOCK(cmd);
1320 	r = cmd->changed;
1321 	READ_UNLOCK(cmd);
1322 
1323 	return r;
1324 }
1325 
1326 static int __dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty)
1327 {
1328 	int r;
1329 	unsigned flags;
1330 	dm_oblock_t oblock;
1331 	__le64 value;
1332 
1333 	r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(cblock), &value);
1334 	if (r)
1335 		return r;
1336 
1337 	unpack_value(value, &oblock, &flags);
1338 
1339 	if (((flags & M_DIRTY) && dirty) || (!(flags & M_DIRTY) && !dirty))
1340 		/* nothing to be done */
1341 		return 0;
1342 
1343 	value = pack_value(oblock, (flags & ~M_DIRTY) | (dirty ? M_DIRTY : 0));
1344 	__dm_bless_for_disk(&value);
1345 
1346 	r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
1347 			       &value, &cmd->root);
1348 	if (r)
1349 		return r;
1350 
1351 	cmd->changed = true;
1352 	return 0;
1353 
1354 }
1355 
1356 int dm_cache_set_dirty(struct dm_cache_metadata *cmd,
1357 		       dm_cblock_t cblock, bool dirty)
1358 {
1359 	int r;
1360 
1361 	WRITE_LOCK(cmd);
1362 	r = __dirty(cmd, cblock, dirty);
1363 	WRITE_UNLOCK(cmd);
1364 
1365 	return r;
1366 }
1367 
1368 void dm_cache_metadata_get_stats(struct dm_cache_metadata *cmd,
1369 				 struct dm_cache_statistics *stats)
1370 {
1371 	READ_LOCK_VOID(cmd);
1372 	*stats = cmd->stats;
1373 	READ_UNLOCK(cmd);
1374 }
1375 
1376 void dm_cache_metadata_set_stats(struct dm_cache_metadata *cmd,
1377 				 struct dm_cache_statistics *stats)
1378 {
1379 	WRITE_LOCK_VOID(cmd);
1380 	cmd->stats = *stats;
1381 	WRITE_UNLOCK(cmd);
1382 }
1383 
1384 int dm_cache_commit(struct dm_cache_metadata *cmd, bool clean_shutdown)
1385 {
1386 	int r;
1387 	flags_mutator mutator = (clean_shutdown ? set_clean_shutdown :
1388 				 clear_clean_shutdown);
1389 
1390 	WRITE_LOCK(cmd);
1391 	r = __commit_transaction(cmd, mutator);
1392 	if (r)
1393 		goto out;
1394 
1395 	r = __begin_transaction(cmd);
1396 
1397 out:
1398 	WRITE_UNLOCK(cmd);
1399 	return r;
1400 }
1401 
1402 int dm_cache_get_free_metadata_block_count(struct dm_cache_metadata *cmd,
1403 					   dm_block_t *result)
1404 {
1405 	int r = -EINVAL;
1406 
1407 	READ_LOCK(cmd);
1408 	r = dm_sm_get_nr_free(cmd->metadata_sm, result);
1409 	READ_UNLOCK(cmd);
1410 
1411 	return r;
1412 }
1413 
1414 int dm_cache_get_metadata_dev_size(struct dm_cache_metadata *cmd,
1415 				   dm_block_t *result)
1416 {
1417 	int r = -EINVAL;
1418 
1419 	READ_LOCK(cmd);
1420 	r = dm_sm_get_nr_blocks(cmd->metadata_sm, result);
1421 	READ_UNLOCK(cmd);
1422 
1423 	return r;
1424 }
1425 
1426 /*----------------------------------------------------------------*/
1427 
1428 static int get_hint(uint32_t index, void *value_le, void *context)
1429 {
1430 	uint32_t value;
1431 	struct dm_cache_policy *policy = context;
1432 
1433 	value = policy_get_hint(policy, to_cblock(index));
1434 	*((__le32 *) value_le) = cpu_to_le32(value);
1435 
1436 	return 0;
1437 }
1438 
1439 /*
1440  * It's quicker to always delete the hint array, and recreate with
1441  * dm_array_new().
1442  */
1443 static int write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
1444 {
1445 	int r;
1446 	size_t hint_size;
1447 	const char *policy_name = dm_cache_policy_get_name(policy);
1448 	const unsigned *policy_version = dm_cache_policy_get_version(policy);
1449 
1450 	if (!policy_name[0] ||
1451 	    (strlen(policy_name) > sizeof(cmd->policy_name) - 1))
1452 		return -EINVAL;
1453 
1454 	strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name));
1455 	memcpy(cmd->policy_version, policy_version, sizeof(cmd->policy_version));
1456 
1457 	hint_size = dm_cache_policy_get_hint_size(policy);
1458 	if (!hint_size)
1459 		return 0; /* short-circuit hints initialization */
1460 	cmd->policy_hint_size = hint_size;
1461 
1462 	if (cmd->hint_root) {
1463 		r = dm_array_del(&cmd->hint_info, cmd->hint_root);
1464 		if (r)
1465 			return r;
1466 	}
1467 
1468 	return dm_array_new(&cmd->hint_info, &cmd->hint_root,
1469 			    from_cblock(cmd->cache_blocks),
1470 			    get_hint, policy);
1471 }
1472 
1473 int dm_cache_write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
1474 {
1475 	int r;
1476 
1477 	WRITE_LOCK(cmd);
1478 	r = write_hints(cmd, policy);
1479 	WRITE_UNLOCK(cmd);
1480 
1481 	return r;
1482 }
1483 
1484 int dm_cache_metadata_all_clean(struct dm_cache_metadata *cmd, bool *result)
1485 {
1486 	int r;
1487 
1488 	READ_LOCK(cmd);
1489 	r = blocks_are_unmapped_or_clean(cmd, 0, cmd->cache_blocks, result);
1490 	READ_UNLOCK(cmd);
1491 
1492 	return r;
1493 }
1494 
1495 void dm_cache_metadata_set_read_only(struct dm_cache_metadata *cmd)
1496 {
1497 	WRITE_LOCK_VOID(cmd);
1498 	dm_bm_set_read_only(cmd->bm);
1499 	WRITE_UNLOCK(cmd);
1500 }
1501 
1502 void dm_cache_metadata_set_read_write(struct dm_cache_metadata *cmd)
1503 {
1504 	WRITE_LOCK_VOID(cmd);
1505 	dm_bm_set_read_write(cmd->bm);
1506 	WRITE_UNLOCK(cmd);
1507 }
1508 
1509 int dm_cache_metadata_set_needs_check(struct dm_cache_metadata *cmd)
1510 {
1511 	int r;
1512 	struct dm_block *sblock;
1513 	struct cache_disk_superblock *disk_super;
1514 
1515 	WRITE_LOCK(cmd);
1516 	set_bit(NEEDS_CHECK, &cmd->flags);
1517 
1518 	r = superblock_lock(cmd, &sblock);
1519 	if (r) {
1520 		DMERR("couldn't read superblock");
1521 		goto out;
1522 	}
1523 
1524 	disk_super = dm_block_data(sblock);
1525 	disk_super->flags = cpu_to_le32(cmd->flags);
1526 
1527 	dm_bm_unlock(sblock);
1528 
1529 out:
1530 	WRITE_UNLOCK(cmd);
1531 	return r;
1532 }
1533 
1534 int dm_cache_metadata_needs_check(struct dm_cache_metadata *cmd, bool *result)
1535 {
1536 	READ_LOCK(cmd);
1537 	*result = !!test_bit(NEEDS_CHECK, &cmd->flags);
1538 	READ_UNLOCK(cmd);
1539 
1540 	return 0;
1541 }
1542 
1543 int dm_cache_metadata_abort(struct dm_cache_metadata *cmd)
1544 {
1545 	int r;
1546 
1547 	WRITE_LOCK(cmd);
1548 	__destroy_persistent_data_objects(cmd);
1549 	r = __create_persistent_data_objects(cmd, false);
1550 	if (r)
1551 		cmd->fail_io = true;
1552 	WRITE_UNLOCK(cmd);
1553 
1554 	return r;
1555 }
1556