1 /*
2  * Copyright (C) 2012 Red Hat, Inc.
3  *
4  * This file is released under the GPL.
5  */
6 
7 #include "dm-cache-metadata.h"
8 
9 #include "persistent-data/dm-array.h"
10 #include "persistent-data/dm-bitset.h"
11 #include "persistent-data/dm-space-map.h"
12 #include "persistent-data/dm-space-map-disk.h"
13 #include "persistent-data/dm-transaction-manager.h"
14 
15 #include <linux/device-mapper.h>
16 
17 /*----------------------------------------------------------------*/
18 
19 #define DM_MSG_PREFIX   "cache metadata"
20 
21 #define CACHE_SUPERBLOCK_MAGIC 06142003
22 #define CACHE_SUPERBLOCK_LOCATION 0
23 
24 /*
25  * defines a range of metadata versions that this module can handle.
26  */
27 #define MIN_CACHE_VERSION 1
28 #define MAX_CACHE_VERSION 1
29 
30 #define CACHE_METADATA_CACHE_SIZE 64
31 
32 /*
33  *  3 for btree insert +
34  *  2 for btree lookup used within space map
35  */
36 #define CACHE_MAX_CONCURRENT_LOCKS 5
37 #define SPACE_MAP_ROOT_SIZE 128
38 
39 enum superblock_flag_bits {
40 	/* for spotting crashes that would invalidate the dirty bitset */
41 	CLEAN_SHUTDOWN,
42 	/* metadata must be checked using the tools */
43 	NEEDS_CHECK,
44 };
45 
46 /*
47  * Each mapping from cache block -> origin block carries a set of flags.
48  */
49 enum mapping_bits {
50 	/*
51 	 * A valid mapping.  Because we're using an array we clear this
52 	 * flag for an non existant mapping.
53 	 */
54 	M_VALID = 1,
55 
56 	/*
57 	 * The data on the cache is different from that on the origin.
58 	 */
59 	M_DIRTY = 2
60 };
61 
62 struct cache_disk_superblock {
63 	__le32 csum;
64 	__le32 flags;
65 	__le64 blocknr;
66 
67 	__u8 uuid[16];
68 	__le64 magic;
69 	__le32 version;
70 
71 	__u8 policy_name[CACHE_POLICY_NAME_SIZE];
72 	__le32 policy_hint_size;
73 
74 	__u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
75 	__le64 mapping_root;
76 	__le64 hint_root;
77 
78 	__le64 discard_root;
79 	__le64 discard_block_size;
80 	__le64 discard_nr_blocks;
81 
82 	__le32 data_block_size;
83 	__le32 metadata_block_size;
84 	__le32 cache_blocks;
85 
86 	__le32 compat_flags;
87 	__le32 compat_ro_flags;
88 	__le32 incompat_flags;
89 
90 	__le32 read_hits;
91 	__le32 read_misses;
92 	__le32 write_hits;
93 	__le32 write_misses;
94 
95 	__le32 policy_version[CACHE_POLICY_VERSION_SIZE];
96 } __packed;
97 
98 struct dm_cache_metadata {
99 	atomic_t ref_count;
100 	struct list_head list;
101 
102 	struct block_device *bdev;
103 	struct dm_block_manager *bm;
104 	struct dm_space_map *metadata_sm;
105 	struct dm_transaction_manager *tm;
106 
107 	struct dm_array_info info;
108 	struct dm_array_info hint_info;
109 	struct dm_disk_bitset discard_info;
110 
111 	struct rw_semaphore root_lock;
112 	unsigned long flags;
113 	dm_block_t root;
114 	dm_block_t hint_root;
115 	dm_block_t discard_root;
116 
117 	sector_t discard_block_size;
118 	dm_dblock_t discard_nr_blocks;
119 
120 	sector_t data_block_size;
121 	dm_cblock_t cache_blocks;
122 	bool changed:1;
123 	bool clean_when_opened:1;
124 
125 	char policy_name[CACHE_POLICY_NAME_SIZE];
126 	unsigned policy_version[CACHE_POLICY_VERSION_SIZE];
127 	size_t policy_hint_size;
128 	struct dm_cache_statistics stats;
129 
130 	/*
131 	 * Reading the space map root can fail, so we read it into this
132 	 * buffer before the superblock is locked and updated.
133 	 */
134 	__u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
135 
136 	/*
137 	 * Set if a transaction has to be aborted but the attempt to roll
138 	 * back to the previous (good) transaction failed.  The only
139 	 * metadata operation permissible in this state is the closing of
140 	 * the device.
141 	 */
142 	bool fail_io:1;
143 };
144 
145 /*-------------------------------------------------------------------
146  * superblock validator
147  *-----------------------------------------------------------------*/
148 
149 #define SUPERBLOCK_CSUM_XOR 9031977
150 
151 static void sb_prepare_for_write(struct dm_block_validator *v,
152 				 struct dm_block *b,
153 				 size_t sb_block_size)
154 {
155 	struct cache_disk_superblock *disk_super = dm_block_data(b);
156 
157 	disk_super->blocknr = cpu_to_le64(dm_block_location(b));
158 	disk_super->csum = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
159 						      sb_block_size - sizeof(__le32),
160 						      SUPERBLOCK_CSUM_XOR));
161 }
162 
163 static int check_metadata_version(struct cache_disk_superblock *disk_super)
164 {
165 	uint32_t metadata_version = le32_to_cpu(disk_super->version);
166 	if (metadata_version < MIN_CACHE_VERSION || metadata_version > MAX_CACHE_VERSION) {
167 		DMERR("Cache metadata version %u found, but only versions between %u and %u supported.",
168 		      metadata_version, MIN_CACHE_VERSION, MAX_CACHE_VERSION);
169 		return -EINVAL;
170 	}
171 
172 	return 0;
173 }
174 
175 static int sb_check(struct dm_block_validator *v,
176 		    struct dm_block *b,
177 		    size_t sb_block_size)
178 {
179 	struct cache_disk_superblock *disk_super = dm_block_data(b);
180 	__le32 csum_le;
181 
182 	if (dm_block_location(b) != le64_to_cpu(disk_super->blocknr)) {
183 		DMERR("sb_check failed: blocknr %llu: wanted %llu",
184 		      le64_to_cpu(disk_super->blocknr),
185 		      (unsigned long long)dm_block_location(b));
186 		return -ENOTBLK;
187 	}
188 
189 	if (le64_to_cpu(disk_super->magic) != CACHE_SUPERBLOCK_MAGIC) {
190 		DMERR("sb_check failed: magic %llu: wanted %llu",
191 		      le64_to_cpu(disk_super->magic),
192 		      (unsigned long long)CACHE_SUPERBLOCK_MAGIC);
193 		return -EILSEQ;
194 	}
195 
196 	csum_le = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
197 					     sb_block_size - sizeof(__le32),
198 					     SUPERBLOCK_CSUM_XOR));
199 	if (csum_le != disk_super->csum) {
200 		DMERR("sb_check failed: csum %u: wanted %u",
201 		      le32_to_cpu(csum_le), le32_to_cpu(disk_super->csum));
202 		return -EILSEQ;
203 	}
204 
205 	return check_metadata_version(disk_super);
206 }
207 
208 static struct dm_block_validator sb_validator = {
209 	.name = "superblock",
210 	.prepare_for_write = sb_prepare_for_write,
211 	.check = sb_check
212 };
213 
214 /*----------------------------------------------------------------*/
215 
216 static int superblock_read_lock(struct dm_cache_metadata *cmd,
217 				struct dm_block **sblock)
218 {
219 	return dm_bm_read_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
220 			       &sb_validator, sblock);
221 }
222 
223 static int superblock_lock_zero(struct dm_cache_metadata *cmd,
224 				struct dm_block **sblock)
225 {
226 	return dm_bm_write_lock_zero(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
227 				     &sb_validator, sblock);
228 }
229 
230 static int superblock_lock(struct dm_cache_metadata *cmd,
231 			   struct dm_block **sblock)
232 {
233 	return dm_bm_write_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
234 				&sb_validator, sblock);
235 }
236 
237 /*----------------------------------------------------------------*/
238 
239 static int __superblock_all_zeroes(struct dm_block_manager *bm, bool *result)
240 {
241 	int r;
242 	unsigned i;
243 	struct dm_block *b;
244 	__le64 *data_le, zero = cpu_to_le64(0);
245 	unsigned sb_block_size = dm_bm_block_size(bm) / sizeof(__le64);
246 
247 	/*
248 	 * We can't use a validator here - it may be all zeroes.
249 	 */
250 	r = dm_bm_read_lock(bm, CACHE_SUPERBLOCK_LOCATION, NULL, &b);
251 	if (r)
252 		return r;
253 
254 	data_le = dm_block_data(b);
255 	*result = true;
256 	for (i = 0; i < sb_block_size; i++) {
257 		if (data_le[i] != zero) {
258 			*result = false;
259 			break;
260 		}
261 	}
262 
263 	dm_bm_unlock(b);
264 
265 	return 0;
266 }
267 
268 static void __setup_mapping_info(struct dm_cache_metadata *cmd)
269 {
270 	struct dm_btree_value_type vt;
271 
272 	vt.context = NULL;
273 	vt.size = sizeof(__le64);
274 	vt.inc = NULL;
275 	vt.dec = NULL;
276 	vt.equal = NULL;
277 	dm_array_info_init(&cmd->info, cmd->tm, &vt);
278 
279 	if (cmd->policy_hint_size) {
280 		vt.size = sizeof(__le32);
281 		dm_array_info_init(&cmd->hint_info, cmd->tm, &vt);
282 	}
283 }
284 
285 static int __save_sm_root(struct dm_cache_metadata *cmd)
286 {
287 	int r;
288 	size_t metadata_len;
289 
290 	r = dm_sm_root_size(cmd->metadata_sm, &metadata_len);
291 	if (r < 0)
292 		return r;
293 
294 	return dm_sm_copy_root(cmd->metadata_sm, &cmd->metadata_space_map_root,
295 			       metadata_len);
296 }
297 
298 static void __copy_sm_root(struct dm_cache_metadata *cmd,
299 			   struct cache_disk_superblock *disk_super)
300 {
301 	memcpy(&disk_super->metadata_space_map_root,
302 	       &cmd->metadata_space_map_root,
303 	       sizeof(cmd->metadata_space_map_root));
304 }
305 
306 static int __write_initial_superblock(struct dm_cache_metadata *cmd)
307 {
308 	int r;
309 	struct dm_block *sblock;
310 	struct cache_disk_superblock *disk_super;
311 	sector_t bdev_size = i_size_read(cmd->bdev->bd_inode) >> SECTOR_SHIFT;
312 
313 	/* FIXME: see if we can lose the max sectors limit */
314 	if (bdev_size > DM_CACHE_METADATA_MAX_SECTORS)
315 		bdev_size = DM_CACHE_METADATA_MAX_SECTORS;
316 
317 	r = dm_tm_pre_commit(cmd->tm);
318 	if (r < 0)
319 		return r;
320 
321 	/*
322 	 * dm_sm_copy_root() can fail.  So we need to do it before we start
323 	 * updating the superblock.
324 	 */
325 	r = __save_sm_root(cmd);
326 	if (r)
327 		return r;
328 
329 	r = superblock_lock_zero(cmd, &sblock);
330 	if (r)
331 		return r;
332 
333 	disk_super = dm_block_data(sblock);
334 	disk_super->flags = 0;
335 	memset(disk_super->uuid, 0, sizeof(disk_super->uuid));
336 	disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC);
337 	disk_super->version = cpu_to_le32(MAX_CACHE_VERSION);
338 	memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name));
339 	memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version));
340 	disk_super->policy_hint_size = 0;
341 
342 	__copy_sm_root(cmd, disk_super);
343 
344 	disk_super->mapping_root = cpu_to_le64(cmd->root);
345 	disk_super->hint_root = cpu_to_le64(cmd->hint_root);
346 	disk_super->discard_root = cpu_to_le64(cmd->discard_root);
347 	disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
348 	disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks));
349 	disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE);
350 	disk_super->data_block_size = cpu_to_le32(cmd->data_block_size);
351 	disk_super->cache_blocks = cpu_to_le32(0);
352 
353 	disk_super->read_hits = cpu_to_le32(0);
354 	disk_super->read_misses = cpu_to_le32(0);
355 	disk_super->write_hits = cpu_to_le32(0);
356 	disk_super->write_misses = cpu_to_le32(0);
357 
358 	return dm_tm_commit(cmd->tm, sblock);
359 }
360 
361 static int __format_metadata(struct dm_cache_metadata *cmd)
362 {
363 	int r;
364 
365 	r = dm_tm_create_with_sm(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
366 				 &cmd->tm, &cmd->metadata_sm);
367 	if (r < 0) {
368 		DMERR("tm_create_with_sm failed");
369 		return r;
370 	}
371 
372 	__setup_mapping_info(cmd);
373 
374 	r = dm_array_empty(&cmd->info, &cmd->root);
375 	if (r < 0)
376 		goto bad;
377 
378 	dm_disk_bitset_init(cmd->tm, &cmd->discard_info);
379 
380 	r = dm_bitset_empty(&cmd->discard_info, &cmd->discard_root);
381 	if (r < 0)
382 		goto bad;
383 
384 	cmd->discard_block_size = 0;
385 	cmd->discard_nr_blocks = 0;
386 
387 	r = __write_initial_superblock(cmd);
388 	if (r)
389 		goto bad;
390 
391 	cmd->clean_when_opened = true;
392 	return 0;
393 
394 bad:
395 	dm_tm_destroy(cmd->tm);
396 	dm_sm_destroy(cmd->metadata_sm);
397 
398 	return r;
399 }
400 
401 static int __check_incompat_features(struct cache_disk_superblock *disk_super,
402 				     struct dm_cache_metadata *cmd)
403 {
404 	uint32_t features;
405 
406 	features = le32_to_cpu(disk_super->incompat_flags) & ~DM_CACHE_FEATURE_INCOMPAT_SUPP;
407 	if (features) {
408 		DMERR("could not access metadata due to unsupported optional features (%lx).",
409 		      (unsigned long)features);
410 		return -EINVAL;
411 	}
412 
413 	/*
414 	 * Check for read-only metadata to skip the following RDWR checks.
415 	 */
416 	if (get_disk_ro(cmd->bdev->bd_disk))
417 		return 0;
418 
419 	features = le32_to_cpu(disk_super->compat_ro_flags) & ~DM_CACHE_FEATURE_COMPAT_RO_SUPP;
420 	if (features) {
421 		DMERR("could not access metadata RDWR due to unsupported optional features (%lx).",
422 		      (unsigned long)features);
423 		return -EINVAL;
424 	}
425 
426 	return 0;
427 }
428 
429 static int __open_metadata(struct dm_cache_metadata *cmd)
430 {
431 	int r;
432 	struct dm_block *sblock;
433 	struct cache_disk_superblock *disk_super;
434 	unsigned long sb_flags;
435 
436 	r = superblock_read_lock(cmd, &sblock);
437 	if (r < 0) {
438 		DMERR("couldn't read lock superblock");
439 		return r;
440 	}
441 
442 	disk_super = dm_block_data(sblock);
443 
444 	/* Verify the data block size hasn't changed */
445 	if (le32_to_cpu(disk_super->data_block_size) != cmd->data_block_size) {
446 		DMERR("changing the data block size (from %u to %llu) is not supported",
447 		      le32_to_cpu(disk_super->data_block_size),
448 		      (unsigned long long)cmd->data_block_size);
449 		r = -EINVAL;
450 		goto bad;
451 	}
452 
453 	r = __check_incompat_features(disk_super, cmd);
454 	if (r < 0)
455 		goto bad;
456 
457 	r = dm_tm_open_with_sm(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
458 			       disk_super->metadata_space_map_root,
459 			       sizeof(disk_super->metadata_space_map_root),
460 			       &cmd->tm, &cmd->metadata_sm);
461 	if (r < 0) {
462 		DMERR("tm_open_with_sm failed");
463 		goto bad;
464 	}
465 
466 	__setup_mapping_info(cmd);
467 	dm_disk_bitset_init(cmd->tm, &cmd->discard_info);
468 	sb_flags = le32_to_cpu(disk_super->flags);
469 	cmd->clean_when_opened = test_bit(CLEAN_SHUTDOWN, &sb_flags);
470 	dm_bm_unlock(sblock);
471 
472 	return 0;
473 
474 bad:
475 	dm_bm_unlock(sblock);
476 	return r;
477 }
478 
479 static int __open_or_format_metadata(struct dm_cache_metadata *cmd,
480 				     bool format_device)
481 {
482 	int r;
483 	bool unformatted = false;
484 
485 	r = __superblock_all_zeroes(cmd->bm, &unformatted);
486 	if (r)
487 		return r;
488 
489 	if (unformatted)
490 		return format_device ? __format_metadata(cmd) : -EPERM;
491 
492 	return __open_metadata(cmd);
493 }
494 
495 static int __create_persistent_data_objects(struct dm_cache_metadata *cmd,
496 					    bool may_format_device)
497 {
498 	int r;
499 	cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
500 					  CACHE_METADATA_CACHE_SIZE,
501 					  CACHE_MAX_CONCURRENT_LOCKS);
502 	if (IS_ERR(cmd->bm)) {
503 		DMERR("could not create block manager");
504 		return PTR_ERR(cmd->bm);
505 	}
506 
507 	r = __open_or_format_metadata(cmd, may_format_device);
508 	if (r)
509 		dm_block_manager_destroy(cmd->bm);
510 
511 	return r;
512 }
513 
514 static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd)
515 {
516 	dm_sm_destroy(cmd->metadata_sm);
517 	dm_tm_destroy(cmd->tm);
518 	dm_block_manager_destroy(cmd->bm);
519 }
520 
521 typedef unsigned long (*flags_mutator)(unsigned long);
522 
523 static void update_flags(struct cache_disk_superblock *disk_super,
524 			 flags_mutator mutator)
525 {
526 	uint32_t sb_flags = mutator(le32_to_cpu(disk_super->flags));
527 	disk_super->flags = cpu_to_le32(sb_flags);
528 }
529 
530 static unsigned long set_clean_shutdown(unsigned long flags)
531 {
532 	set_bit(CLEAN_SHUTDOWN, &flags);
533 	return flags;
534 }
535 
536 static unsigned long clear_clean_shutdown(unsigned long flags)
537 {
538 	clear_bit(CLEAN_SHUTDOWN, &flags);
539 	return flags;
540 }
541 
542 static void read_superblock_fields(struct dm_cache_metadata *cmd,
543 				   struct cache_disk_superblock *disk_super)
544 {
545 	cmd->flags = le32_to_cpu(disk_super->flags);
546 	cmd->root = le64_to_cpu(disk_super->mapping_root);
547 	cmd->hint_root = le64_to_cpu(disk_super->hint_root);
548 	cmd->discard_root = le64_to_cpu(disk_super->discard_root);
549 	cmd->discard_block_size = le64_to_cpu(disk_super->discard_block_size);
550 	cmd->discard_nr_blocks = to_dblock(le64_to_cpu(disk_super->discard_nr_blocks));
551 	cmd->data_block_size = le32_to_cpu(disk_super->data_block_size);
552 	cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks));
553 	strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name));
554 	cmd->policy_version[0] = le32_to_cpu(disk_super->policy_version[0]);
555 	cmd->policy_version[1] = le32_to_cpu(disk_super->policy_version[1]);
556 	cmd->policy_version[2] = le32_to_cpu(disk_super->policy_version[2]);
557 	cmd->policy_hint_size = le32_to_cpu(disk_super->policy_hint_size);
558 
559 	cmd->stats.read_hits = le32_to_cpu(disk_super->read_hits);
560 	cmd->stats.read_misses = le32_to_cpu(disk_super->read_misses);
561 	cmd->stats.write_hits = le32_to_cpu(disk_super->write_hits);
562 	cmd->stats.write_misses = le32_to_cpu(disk_super->write_misses);
563 
564 	cmd->changed = false;
565 }
566 
567 /*
568  * The mutator updates the superblock flags.
569  */
570 static int __begin_transaction_flags(struct dm_cache_metadata *cmd,
571 				     flags_mutator mutator)
572 {
573 	int r;
574 	struct cache_disk_superblock *disk_super;
575 	struct dm_block *sblock;
576 
577 	r = superblock_lock(cmd, &sblock);
578 	if (r)
579 		return r;
580 
581 	disk_super = dm_block_data(sblock);
582 	update_flags(disk_super, mutator);
583 	read_superblock_fields(cmd, disk_super);
584 	dm_bm_unlock(sblock);
585 
586 	return dm_bm_flush(cmd->bm);
587 }
588 
589 static int __begin_transaction(struct dm_cache_metadata *cmd)
590 {
591 	int r;
592 	struct cache_disk_superblock *disk_super;
593 	struct dm_block *sblock;
594 
595 	/*
596 	 * We re-read the superblock every time.  Shouldn't need to do this
597 	 * really.
598 	 */
599 	r = superblock_read_lock(cmd, &sblock);
600 	if (r)
601 		return r;
602 
603 	disk_super = dm_block_data(sblock);
604 	read_superblock_fields(cmd, disk_super);
605 	dm_bm_unlock(sblock);
606 
607 	return 0;
608 }
609 
610 static int __commit_transaction(struct dm_cache_metadata *cmd,
611 				flags_mutator mutator)
612 {
613 	int r;
614 	struct cache_disk_superblock *disk_super;
615 	struct dm_block *sblock;
616 
617 	/*
618 	 * We need to know if the cache_disk_superblock exceeds a 512-byte sector.
619 	 */
620 	BUILD_BUG_ON(sizeof(struct cache_disk_superblock) > 512);
621 
622 	r = dm_bitset_flush(&cmd->discard_info, cmd->discard_root,
623 			    &cmd->discard_root);
624 	if (r)
625 		return r;
626 
627 	r = dm_tm_pre_commit(cmd->tm);
628 	if (r < 0)
629 		return r;
630 
631 	r = __save_sm_root(cmd);
632 	if (r)
633 		return r;
634 
635 	r = superblock_lock(cmd, &sblock);
636 	if (r)
637 		return r;
638 
639 	disk_super = dm_block_data(sblock);
640 
641 	disk_super->flags = cpu_to_le32(cmd->flags);
642 	if (mutator)
643 		update_flags(disk_super, mutator);
644 
645 	disk_super->mapping_root = cpu_to_le64(cmd->root);
646 	disk_super->hint_root = cpu_to_le64(cmd->hint_root);
647 	disk_super->discard_root = cpu_to_le64(cmd->discard_root);
648 	disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
649 	disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks));
650 	disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks));
651 	strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name));
652 	disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]);
653 	disk_super->policy_version[1] = cpu_to_le32(cmd->policy_version[1]);
654 	disk_super->policy_version[2] = cpu_to_le32(cmd->policy_version[2]);
655 
656 	disk_super->read_hits = cpu_to_le32(cmd->stats.read_hits);
657 	disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses);
658 	disk_super->write_hits = cpu_to_le32(cmd->stats.write_hits);
659 	disk_super->write_misses = cpu_to_le32(cmd->stats.write_misses);
660 	__copy_sm_root(cmd, disk_super);
661 
662 	return dm_tm_commit(cmd->tm, sblock);
663 }
664 
665 /*----------------------------------------------------------------*/
666 
667 /*
668  * The mappings are held in a dm-array that has 64-bit values stored in
669  * little-endian format.  The index is the cblock, the high 48bits of the
670  * value are the oblock and the low 16 bit the flags.
671  */
672 #define FLAGS_MASK ((1 << 16) - 1)
673 
674 static __le64 pack_value(dm_oblock_t block, unsigned flags)
675 {
676 	uint64_t value = from_oblock(block);
677 	value <<= 16;
678 	value = value | (flags & FLAGS_MASK);
679 	return cpu_to_le64(value);
680 }
681 
682 static void unpack_value(__le64 value_le, dm_oblock_t *block, unsigned *flags)
683 {
684 	uint64_t value = le64_to_cpu(value_le);
685 	uint64_t b = value >> 16;
686 	*block = to_oblock(b);
687 	*flags = value & FLAGS_MASK;
688 }
689 
690 /*----------------------------------------------------------------*/
691 
692 static struct dm_cache_metadata *metadata_open(struct block_device *bdev,
693 					       sector_t data_block_size,
694 					       bool may_format_device,
695 					       size_t policy_hint_size)
696 {
697 	int r;
698 	struct dm_cache_metadata *cmd;
699 
700 	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
701 	if (!cmd) {
702 		DMERR("could not allocate metadata struct");
703 		return ERR_PTR(-ENOMEM);
704 	}
705 
706 	atomic_set(&cmd->ref_count, 1);
707 	init_rwsem(&cmd->root_lock);
708 	cmd->bdev = bdev;
709 	cmd->data_block_size = data_block_size;
710 	cmd->cache_blocks = 0;
711 	cmd->policy_hint_size = policy_hint_size;
712 	cmd->changed = true;
713 	cmd->fail_io = false;
714 
715 	r = __create_persistent_data_objects(cmd, may_format_device);
716 	if (r) {
717 		kfree(cmd);
718 		return ERR_PTR(r);
719 	}
720 
721 	r = __begin_transaction_flags(cmd, clear_clean_shutdown);
722 	if (r < 0) {
723 		dm_cache_metadata_close(cmd);
724 		return ERR_PTR(r);
725 	}
726 
727 	return cmd;
728 }
729 
730 /*
731  * We keep a little list of ref counted metadata objects to prevent two
732  * different target instances creating separate bufio instances.  This is
733  * an issue if a table is reloaded before the suspend.
734  */
735 static DEFINE_MUTEX(table_lock);
736 static LIST_HEAD(table);
737 
738 static struct dm_cache_metadata *lookup(struct block_device *bdev)
739 {
740 	struct dm_cache_metadata *cmd;
741 
742 	list_for_each_entry(cmd, &table, list)
743 		if (cmd->bdev == bdev) {
744 			atomic_inc(&cmd->ref_count);
745 			return cmd;
746 		}
747 
748 	return NULL;
749 }
750 
751 static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev,
752 						sector_t data_block_size,
753 						bool may_format_device,
754 						size_t policy_hint_size)
755 {
756 	struct dm_cache_metadata *cmd, *cmd2;
757 
758 	mutex_lock(&table_lock);
759 	cmd = lookup(bdev);
760 	mutex_unlock(&table_lock);
761 
762 	if (cmd)
763 		return cmd;
764 
765 	cmd = metadata_open(bdev, data_block_size, may_format_device, policy_hint_size);
766 	if (!IS_ERR(cmd)) {
767 		mutex_lock(&table_lock);
768 		cmd2 = lookup(bdev);
769 		if (cmd2) {
770 			mutex_unlock(&table_lock);
771 			__destroy_persistent_data_objects(cmd);
772 			kfree(cmd);
773 			return cmd2;
774 		}
775 		list_add(&cmd->list, &table);
776 		mutex_unlock(&table_lock);
777 	}
778 
779 	return cmd;
780 }
781 
782 static bool same_params(struct dm_cache_metadata *cmd, sector_t data_block_size)
783 {
784 	if (cmd->data_block_size != data_block_size) {
785 		DMERR("data_block_size (%llu) different from that in metadata (%llu)\n",
786 		      (unsigned long long) data_block_size,
787 		      (unsigned long long) cmd->data_block_size);
788 		return false;
789 	}
790 
791 	return true;
792 }
793 
794 struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
795 						 sector_t data_block_size,
796 						 bool may_format_device,
797 						 size_t policy_hint_size)
798 {
799 	struct dm_cache_metadata *cmd = lookup_or_open(bdev, data_block_size,
800 						       may_format_device, policy_hint_size);
801 
802 	if (!IS_ERR(cmd) && !same_params(cmd, data_block_size)) {
803 		dm_cache_metadata_close(cmd);
804 		return ERR_PTR(-EINVAL);
805 	}
806 
807 	return cmd;
808 }
809 
810 void dm_cache_metadata_close(struct dm_cache_metadata *cmd)
811 {
812 	if (atomic_dec_and_test(&cmd->ref_count)) {
813 		mutex_lock(&table_lock);
814 		list_del(&cmd->list);
815 		mutex_unlock(&table_lock);
816 
817 		if (!cmd->fail_io)
818 			__destroy_persistent_data_objects(cmd);
819 		kfree(cmd);
820 	}
821 }
822 
823 /*
824  * Checks that the given cache block is either unmapped or clean.
825  */
826 static int block_unmapped_or_clean(struct dm_cache_metadata *cmd, dm_cblock_t b,
827 				   bool *result)
828 {
829 	int r;
830 	__le64 value;
831 	dm_oblock_t ob;
832 	unsigned flags;
833 
834 	r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(b), &value);
835 	if (r) {
836 		DMERR("block_unmapped_or_clean failed");
837 		return r;
838 	}
839 
840 	unpack_value(value, &ob, &flags);
841 	*result = !((flags & M_VALID) && (flags & M_DIRTY));
842 
843 	return 0;
844 }
845 
846 static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd,
847 					dm_cblock_t begin, dm_cblock_t end,
848 					bool *result)
849 {
850 	int r;
851 	*result = true;
852 
853 	while (begin != end) {
854 		r = block_unmapped_or_clean(cmd, begin, result);
855 		if (r)
856 			return r;
857 
858 		if (!*result) {
859 			DMERR("cache block %llu is dirty",
860 			      (unsigned long long) from_cblock(begin));
861 			return 0;
862 		}
863 
864 		begin = to_cblock(from_cblock(begin) + 1);
865 	}
866 
867 	return 0;
868 }
869 
870 static bool cmd_write_lock(struct dm_cache_metadata *cmd)
871 {
872 	down_write(&cmd->root_lock);
873 	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) {
874 		up_write(&cmd->root_lock);
875 		return false;
876 	}
877 	return true;
878 }
879 
880 #define WRITE_LOCK(cmd)				\
881 	do {					\
882 		if (!cmd_write_lock((cmd)))	\
883 			return -EINVAL;		\
884 	} while(0)
885 
886 #define WRITE_LOCK_VOID(cmd)			\
887 	do {					\
888 		if (!cmd_write_lock((cmd)))	\
889 			return;			\
890 	} while(0)
891 
892 #define WRITE_UNLOCK(cmd) \
893 	up_write(&(cmd)->root_lock)
894 
895 static bool cmd_read_lock(struct dm_cache_metadata *cmd)
896 {
897 	down_read(&cmd->root_lock);
898 	if (cmd->fail_io) {
899 		up_read(&cmd->root_lock);
900 		return false;
901 	}
902 	return true;
903 }
904 
905 #define READ_LOCK(cmd)				\
906 	do {					\
907 		if (!cmd_read_lock((cmd)))	\
908 			return -EINVAL;		\
909 	} while(0)
910 
911 #define READ_LOCK_VOID(cmd)			\
912 	do {					\
913 		if (!cmd_read_lock((cmd)))	\
914 			return;			\
915 	} while(0)
916 
917 #define READ_UNLOCK(cmd) \
918 	up_read(&(cmd)->root_lock)
919 
920 int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size)
921 {
922 	int r;
923 	bool clean;
924 	__le64 null_mapping = pack_value(0, 0);
925 
926 	WRITE_LOCK(cmd);
927 	__dm_bless_for_disk(&null_mapping);
928 
929 	if (from_cblock(new_cache_size) < from_cblock(cmd->cache_blocks)) {
930 		r = blocks_are_unmapped_or_clean(cmd, new_cache_size, cmd->cache_blocks, &clean);
931 		if (r) {
932 			__dm_unbless_for_disk(&null_mapping);
933 			goto out;
934 		}
935 
936 		if (!clean) {
937 			DMERR("unable to shrink cache due to dirty blocks");
938 			r = -EINVAL;
939 			__dm_unbless_for_disk(&null_mapping);
940 			goto out;
941 		}
942 	}
943 
944 	r = dm_array_resize(&cmd->info, cmd->root, from_cblock(cmd->cache_blocks),
945 			    from_cblock(new_cache_size),
946 			    &null_mapping, &cmd->root);
947 	if (!r)
948 		cmd->cache_blocks = new_cache_size;
949 	cmd->changed = true;
950 
951 out:
952 	WRITE_UNLOCK(cmd);
953 
954 	return r;
955 }
956 
957 int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd,
958 				   sector_t discard_block_size,
959 				   dm_dblock_t new_nr_entries)
960 {
961 	int r;
962 
963 	WRITE_LOCK(cmd);
964 	r = dm_bitset_resize(&cmd->discard_info,
965 			     cmd->discard_root,
966 			     from_dblock(cmd->discard_nr_blocks),
967 			     from_dblock(new_nr_entries),
968 			     false, &cmd->discard_root);
969 	if (!r) {
970 		cmd->discard_block_size = discard_block_size;
971 		cmd->discard_nr_blocks = new_nr_entries;
972 	}
973 
974 	cmd->changed = true;
975 	WRITE_UNLOCK(cmd);
976 
977 	return r;
978 }
979 
980 static int __set_discard(struct dm_cache_metadata *cmd, dm_dblock_t b)
981 {
982 	return dm_bitset_set_bit(&cmd->discard_info, cmd->discard_root,
983 				 from_dblock(b), &cmd->discard_root);
984 }
985 
986 static int __clear_discard(struct dm_cache_metadata *cmd, dm_dblock_t b)
987 {
988 	return dm_bitset_clear_bit(&cmd->discard_info, cmd->discard_root,
989 				   from_dblock(b), &cmd->discard_root);
990 }
991 
992 static int __is_discarded(struct dm_cache_metadata *cmd, dm_dblock_t b,
993 			  bool *is_discarded)
994 {
995 	return dm_bitset_test_bit(&cmd->discard_info, cmd->discard_root,
996 				  from_dblock(b), &cmd->discard_root,
997 				  is_discarded);
998 }
999 
1000 static int __discard(struct dm_cache_metadata *cmd,
1001 		     dm_dblock_t dblock, bool discard)
1002 {
1003 	int r;
1004 
1005 	r = (discard ? __set_discard : __clear_discard)(cmd, dblock);
1006 	if (r)
1007 		return r;
1008 
1009 	cmd->changed = true;
1010 	return 0;
1011 }
1012 
1013 int dm_cache_set_discard(struct dm_cache_metadata *cmd,
1014 			 dm_dblock_t dblock, bool discard)
1015 {
1016 	int r;
1017 
1018 	WRITE_LOCK(cmd);
1019 	r = __discard(cmd, dblock, discard);
1020 	WRITE_UNLOCK(cmd);
1021 
1022 	return r;
1023 }
1024 
1025 static int __load_discards(struct dm_cache_metadata *cmd,
1026 			   load_discard_fn fn, void *context)
1027 {
1028 	int r = 0;
1029 	dm_block_t b;
1030 	bool discard;
1031 
1032 	for (b = 0; b < from_dblock(cmd->discard_nr_blocks); b++) {
1033 		dm_dblock_t dblock = to_dblock(b);
1034 
1035 		if (cmd->clean_when_opened) {
1036 			r = __is_discarded(cmd, dblock, &discard);
1037 			if (r)
1038 				return r;
1039 		} else
1040 			discard = false;
1041 
1042 		r = fn(context, cmd->discard_block_size, dblock, discard);
1043 		if (r)
1044 			break;
1045 	}
1046 
1047 	return r;
1048 }
1049 
1050 int dm_cache_load_discards(struct dm_cache_metadata *cmd,
1051 			   load_discard_fn fn, void *context)
1052 {
1053 	int r;
1054 
1055 	READ_LOCK(cmd);
1056 	r = __load_discards(cmd, fn, context);
1057 	READ_UNLOCK(cmd);
1058 
1059 	return r;
1060 }
1061 
1062 int dm_cache_size(struct dm_cache_metadata *cmd, dm_cblock_t *result)
1063 {
1064 	READ_LOCK(cmd);
1065 	*result = cmd->cache_blocks;
1066 	READ_UNLOCK(cmd);
1067 
1068 	return 0;
1069 }
1070 
1071 static int __remove(struct dm_cache_metadata *cmd, dm_cblock_t cblock)
1072 {
1073 	int r;
1074 	__le64 value = pack_value(0, 0);
1075 
1076 	__dm_bless_for_disk(&value);
1077 	r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
1078 			       &value, &cmd->root);
1079 	if (r)
1080 		return r;
1081 
1082 	cmd->changed = true;
1083 	return 0;
1084 }
1085 
1086 int dm_cache_remove_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock)
1087 {
1088 	int r;
1089 
1090 	WRITE_LOCK(cmd);
1091 	r = __remove(cmd, cblock);
1092 	WRITE_UNLOCK(cmd);
1093 
1094 	return r;
1095 }
1096 
1097 static int __insert(struct dm_cache_metadata *cmd,
1098 		    dm_cblock_t cblock, dm_oblock_t oblock)
1099 {
1100 	int r;
1101 	__le64 value = pack_value(oblock, M_VALID);
1102 	__dm_bless_for_disk(&value);
1103 
1104 	r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
1105 			       &value, &cmd->root);
1106 	if (r)
1107 		return r;
1108 
1109 	cmd->changed = true;
1110 	return 0;
1111 }
1112 
1113 int dm_cache_insert_mapping(struct dm_cache_metadata *cmd,
1114 			    dm_cblock_t cblock, dm_oblock_t oblock)
1115 {
1116 	int r;
1117 
1118 	WRITE_LOCK(cmd);
1119 	r = __insert(cmd, cblock, oblock);
1120 	WRITE_UNLOCK(cmd);
1121 
1122 	return r;
1123 }
1124 
1125 struct thunk {
1126 	load_mapping_fn fn;
1127 	void *context;
1128 
1129 	struct dm_cache_metadata *cmd;
1130 	bool respect_dirty_flags;
1131 	bool hints_valid;
1132 };
1133 
1134 static bool policy_unchanged(struct dm_cache_metadata *cmd,
1135 			     struct dm_cache_policy *policy)
1136 {
1137 	const char *policy_name = dm_cache_policy_get_name(policy);
1138 	const unsigned *policy_version = dm_cache_policy_get_version(policy);
1139 	size_t policy_hint_size = dm_cache_policy_get_hint_size(policy);
1140 
1141 	/*
1142 	 * Ensure policy names match.
1143 	 */
1144 	if (strncmp(cmd->policy_name, policy_name, sizeof(cmd->policy_name)))
1145 		return false;
1146 
1147 	/*
1148 	 * Ensure policy major versions match.
1149 	 */
1150 	if (cmd->policy_version[0] != policy_version[0])
1151 		return false;
1152 
1153 	/*
1154 	 * Ensure policy hint sizes match.
1155 	 */
1156 	if (cmd->policy_hint_size != policy_hint_size)
1157 		return false;
1158 
1159 	return true;
1160 }
1161 
1162 static bool hints_array_initialized(struct dm_cache_metadata *cmd)
1163 {
1164 	return cmd->hint_root && cmd->policy_hint_size;
1165 }
1166 
1167 static bool hints_array_available(struct dm_cache_metadata *cmd,
1168 				  struct dm_cache_policy *policy)
1169 {
1170 	return cmd->clean_when_opened && policy_unchanged(cmd, policy) &&
1171 		hints_array_initialized(cmd);
1172 }
1173 
1174 static int __load_mapping(void *context, uint64_t cblock, void *leaf)
1175 {
1176 	int r = 0;
1177 	bool dirty;
1178 	__le64 value;
1179 	__le32 hint_value = 0;
1180 	dm_oblock_t oblock;
1181 	unsigned flags;
1182 	struct thunk *thunk = context;
1183 	struct dm_cache_metadata *cmd = thunk->cmd;
1184 
1185 	memcpy(&value, leaf, sizeof(value));
1186 	unpack_value(value, &oblock, &flags);
1187 
1188 	if (flags & M_VALID) {
1189 		if (thunk->hints_valid) {
1190 			r = dm_array_get_value(&cmd->hint_info, cmd->hint_root,
1191 					       cblock, &hint_value);
1192 			if (r && r != -ENODATA)
1193 				return r;
1194 		}
1195 
1196 		dirty = thunk->respect_dirty_flags ? (flags & M_DIRTY) : true;
1197 		r = thunk->fn(thunk->context, oblock, to_cblock(cblock),
1198 			      dirty, le32_to_cpu(hint_value), thunk->hints_valid);
1199 	}
1200 
1201 	return r;
1202 }
1203 
1204 static int __load_mappings(struct dm_cache_metadata *cmd,
1205 			   struct dm_cache_policy *policy,
1206 			   load_mapping_fn fn, void *context)
1207 {
1208 	struct thunk thunk;
1209 
1210 	thunk.fn = fn;
1211 	thunk.context = context;
1212 
1213 	thunk.cmd = cmd;
1214 	thunk.respect_dirty_flags = cmd->clean_when_opened;
1215 	thunk.hints_valid = hints_array_available(cmd, policy);
1216 
1217 	return dm_array_walk(&cmd->info, cmd->root, __load_mapping, &thunk);
1218 }
1219 
1220 int dm_cache_load_mappings(struct dm_cache_metadata *cmd,
1221 			   struct dm_cache_policy *policy,
1222 			   load_mapping_fn fn, void *context)
1223 {
1224 	int r;
1225 
1226 	READ_LOCK(cmd);
1227 	r = __load_mappings(cmd, policy, fn, context);
1228 	READ_UNLOCK(cmd);
1229 
1230 	return r;
1231 }
1232 
1233 static int __dump_mapping(void *context, uint64_t cblock, void *leaf)
1234 {
1235 	int r = 0;
1236 	__le64 value;
1237 	dm_oblock_t oblock;
1238 	unsigned flags;
1239 
1240 	memcpy(&value, leaf, sizeof(value));
1241 	unpack_value(value, &oblock, &flags);
1242 
1243 	return r;
1244 }
1245 
1246 static int __dump_mappings(struct dm_cache_metadata *cmd)
1247 {
1248 	return dm_array_walk(&cmd->info, cmd->root, __dump_mapping, NULL);
1249 }
1250 
1251 void dm_cache_dump(struct dm_cache_metadata *cmd)
1252 {
1253 	READ_LOCK_VOID(cmd);
1254 	__dump_mappings(cmd);
1255 	READ_UNLOCK(cmd);
1256 }
1257 
1258 int dm_cache_changed_this_transaction(struct dm_cache_metadata *cmd)
1259 {
1260 	int r;
1261 
1262 	READ_LOCK(cmd);
1263 	r = cmd->changed;
1264 	READ_UNLOCK(cmd);
1265 
1266 	return r;
1267 }
1268 
1269 static int __dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty)
1270 {
1271 	int r;
1272 	unsigned flags;
1273 	dm_oblock_t oblock;
1274 	__le64 value;
1275 
1276 	r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(cblock), &value);
1277 	if (r)
1278 		return r;
1279 
1280 	unpack_value(value, &oblock, &flags);
1281 
1282 	if (((flags & M_DIRTY) && dirty) || (!(flags & M_DIRTY) && !dirty))
1283 		/* nothing to be done */
1284 		return 0;
1285 
1286 	value = pack_value(oblock, (flags & ~M_DIRTY) | (dirty ? M_DIRTY : 0));
1287 	__dm_bless_for_disk(&value);
1288 
1289 	r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
1290 			       &value, &cmd->root);
1291 	if (r)
1292 		return r;
1293 
1294 	cmd->changed = true;
1295 	return 0;
1296 
1297 }
1298 
1299 int dm_cache_set_dirty(struct dm_cache_metadata *cmd,
1300 		       dm_cblock_t cblock, bool dirty)
1301 {
1302 	int r;
1303 
1304 	WRITE_LOCK(cmd);
1305 	r = __dirty(cmd, cblock, dirty);
1306 	WRITE_UNLOCK(cmd);
1307 
1308 	return r;
1309 }
1310 
1311 void dm_cache_metadata_get_stats(struct dm_cache_metadata *cmd,
1312 				 struct dm_cache_statistics *stats)
1313 {
1314 	READ_LOCK_VOID(cmd);
1315 	*stats = cmd->stats;
1316 	READ_UNLOCK(cmd);
1317 }
1318 
1319 void dm_cache_metadata_set_stats(struct dm_cache_metadata *cmd,
1320 				 struct dm_cache_statistics *stats)
1321 {
1322 	WRITE_LOCK_VOID(cmd);
1323 	cmd->stats = *stats;
1324 	WRITE_UNLOCK(cmd);
1325 }
1326 
1327 int dm_cache_commit(struct dm_cache_metadata *cmd, bool clean_shutdown)
1328 {
1329 	int r;
1330 	flags_mutator mutator = (clean_shutdown ? set_clean_shutdown :
1331 				 clear_clean_shutdown);
1332 
1333 	WRITE_LOCK(cmd);
1334 	r = __commit_transaction(cmd, mutator);
1335 	if (r)
1336 		goto out;
1337 
1338 	r = __begin_transaction(cmd);
1339 
1340 out:
1341 	WRITE_UNLOCK(cmd);
1342 	return r;
1343 }
1344 
1345 int dm_cache_get_free_metadata_block_count(struct dm_cache_metadata *cmd,
1346 					   dm_block_t *result)
1347 {
1348 	int r = -EINVAL;
1349 
1350 	READ_LOCK(cmd);
1351 	r = dm_sm_get_nr_free(cmd->metadata_sm, result);
1352 	READ_UNLOCK(cmd);
1353 
1354 	return r;
1355 }
1356 
1357 int dm_cache_get_metadata_dev_size(struct dm_cache_metadata *cmd,
1358 				   dm_block_t *result)
1359 {
1360 	int r = -EINVAL;
1361 
1362 	READ_LOCK(cmd);
1363 	r = dm_sm_get_nr_blocks(cmd->metadata_sm, result);
1364 	READ_UNLOCK(cmd);
1365 
1366 	return r;
1367 }
1368 
1369 /*----------------------------------------------------------------*/
1370 
1371 static int begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
1372 {
1373 	int r;
1374 	__le32 value;
1375 	size_t hint_size;
1376 	const char *policy_name = dm_cache_policy_get_name(policy);
1377 	const unsigned *policy_version = dm_cache_policy_get_version(policy);
1378 
1379 	if (!policy_name[0] ||
1380 	    (strlen(policy_name) > sizeof(cmd->policy_name) - 1))
1381 		return -EINVAL;
1382 
1383 	if (!policy_unchanged(cmd, policy)) {
1384 		strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name));
1385 		memcpy(cmd->policy_version, policy_version, sizeof(cmd->policy_version));
1386 
1387 		hint_size = dm_cache_policy_get_hint_size(policy);
1388 		if (!hint_size)
1389 			return 0; /* short-circuit hints initialization */
1390 		cmd->policy_hint_size = hint_size;
1391 
1392 		if (cmd->hint_root) {
1393 			r = dm_array_del(&cmd->hint_info, cmd->hint_root);
1394 			if (r)
1395 				return r;
1396 		}
1397 
1398 		r = dm_array_empty(&cmd->hint_info, &cmd->hint_root);
1399 		if (r)
1400 			return r;
1401 
1402 		value = cpu_to_le32(0);
1403 		__dm_bless_for_disk(&value);
1404 		r = dm_array_resize(&cmd->hint_info, cmd->hint_root, 0,
1405 				    from_cblock(cmd->cache_blocks),
1406 				    &value, &cmd->hint_root);
1407 		if (r)
1408 			return r;
1409 	}
1410 
1411 	return 0;
1412 }
1413 
1414 static int save_hint(void *context, dm_cblock_t cblock, dm_oblock_t oblock, uint32_t hint)
1415 {
1416 	struct dm_cache_metadata *cmd = context;
1417 	__le32 value = cpu_to_le32(hint);
1418 	int r;
1419 
1420 	__dm_bless_for_disk(&value);
1421 
1422 	r = dm_array_set_value(&cmd->hint_info, cmd->hint_root,
1423 			       from_cblock(cblock), &value, &cmd->hint_root);
1424 	cmd->changed = true;
1425 
1426 	return r;
1427 }
1428 
1429 static int write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
1430 {
1431 	int r;
1432 
1433 	r = begin_hints(cmd, policy);
1434 	if (r) {
1435 		DMERR("begin_hints failed");
1436 		return r;
1437 	}
1438 
1439 	return policy_walk_mappings(policy, save_hint, cmd);
1440 }
1441 
1442 int dm_cache_write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
1443 {
1444 	int r;
1445 
1446 	WRITE_LOCK(cmd);
1447 	r = write_hints(cmd, policy);
1448 	WRITE_UNLOCK(cmd);
1449 
1450 	return r;
1451 }
1452 
1453 int dm_cache_metadata_all_clean(struct dm_cache_metadata *cmd, bool *result)
1454 {
1455 	int r;
1456 
1457 	READ_LOCK(cmd);
1458 	r = blocks_are_unmapped_or_clean(cmd, 0, cmd->cache_blocks, result);
1459 	READ_UNLOCK(cmd);
1460 
1461 	return r;
1462 }
1463 
1464 void dm_cache_metadata_set_read_only(struct dm_cache_metadata *cmd)
1465 {
1466 	WRITE_LOCK_VOID(cmd);
1467 	dm_bm_set_read_only(cmd->bm);
1468 	WRITE_UNLOCK(cmd);
1469 }
1470 
1471 void dm_cache_metadata_set_read_write(struct dm_cache_metadata *cmd)
1472 {
1473 	WRITE_LOCK_VOID(cmd);
1474 	dm_bm_set_read_write(cmd->bm);
1475 	WRITE_UNLOCK(cmd);
1476 }
1477 
1478 int dm_cache_metadata_set_needs_check(struct dm_cache_metadata *cmd)
1479 {
1480 	int r;
1481 	struct dm_block *sblock;
1482 	struct cache_disk_superblock *disk_super;
1483 
1484 	WRITE_LOCK(cmd);
1485 	set_bit(NEEDS_CHECK, &cmd->flags);
1486 
1487 	r = superblock_lock(cmd, &sblock);
1488 	if (r) {
1489 		DMERR("couldn't read superblock");
1490 		goto out;
1491 	}
1492 
1493 	disk_super = dm_block_data(sblock);
1494 	disk_super->flags = cpu_to_le32(cmd->flags);
1495 
1496 	dm_bm_unlock(sblock);
1497 
1498 out:
1499 	WRITE_UNLOCK(cmd);
1500 	return r;
1501 }
1502 
1503 int dm_cache_metadata_needs_check(struct dm_cache_metadata *cmd, bool *result)
1504 {
1505 	READ_LOCK(cmd);
1506 	*result = !!test_bit(NEEDS_CHECK, &cmd->flags);
1507 	READ_UNLOCK(cmd);
1508 
1509 	return 0;
1510 }
1511 
1512 int dm_cache_metadata_abort(struct dm_cache_metadata *cmd)
1513 {
1514 	int r;
1515 
1516 	WRITE_LOCK(cmd);
1517 	__destroy_persistent_data_objects(cmd);
1518 	r = __create_persistent_data_objects(cmd, false);
1519 	if (r)
1520 		cmd->fail_io = true;
1521 	WRITE_UNLOCK(cmd);
1522 
1523 	return r;
1524 }
1525