xref: /openbmc/linux/drivers/md/dm-region-hash.c (revision a4a82ce3)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2003 Sistina Software Limited.
4  * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
5  *
6  * This file is released under the GPL.
7  */
8 
9 #include <linux/dm-dirty-log.h>
10 #include <linux/dm-region-hash.h>
11 
12 #include <linux/ctype.h>
13 #include <linux/init.h>
14 #include <linux/module.h>
15 #include <linux/slab.h>
16 #include <linux/vmalloc.h>
17 
18 #include "dm.h"
19 
20 #define	DM_MSG_PREFIX	"region hash"
21 
22 /*
23  *------------------------------------------------------------------
24  * Region hash
25  *
26  * The mirror splits itself up into discrete regions.  Each
27  * region can be in one of three states: clean, dirty,
28  * nosync.  There is no need to put clean regions in the hash.
29  *
30  * In addition to being present in the hash table a region _may_
31  * be present on one of three lists.
32  *
33  *   clean_regions: Regions on this list have no io pending to
34  *   them, they are in sync, we are no longer interested in them,
35  *   they are dull.  dm_rh_update_states() will remove them from the
36  *   hash table.
37  *
38  *   quiesced_regions: These regions have been spun down, ready
39  *   for recovery.  rh_recovery_start() will remove regions from
40  *   this list and hand them to kmirrord, which will schedule the
41  *   recovery io with kcopyd.
42  *
43  *   recovered_regions: Regions that kcopyd has successfully
44  *   recovered.  dm_rh_update_states() will now schedule any delayed
45  *   io, up the recovery_count, and remove the region from the
46  *   hash.
47  *
48  * There are 2 locks:
49  *   A rw spin lock 'hash_lock' protects just the hash table,
50  *   this is never held in write mode from interrupt context,
51  *   which I believe means that we only have to disable irqs when
52  *   doing a write lock.
53  *
54  *   An ordinary spin lock 'region_lock' that protects the three
55  *   lists in the region_hash, with the 'state', 'list' and
56  *   'delayed_bios' fields of the regions.  This is used from irq
57  *   context, so all other uses will have to suspend local irqs.
58  *------------------------------------------------------------------
59  */
60 struct dm_region_hash {
61 	uint32_t region_size;
62 	unsigned int region_shift;
63 
64 	/* holds persistent region state */
65 	struct dm_dirty_log *log;
66 
67 	/* hash table */
68 	rwlock_t hash_lock;
69 	unsigned int mask;
70 	unsigned int nr_buckets;
71 	unsigned int prime;
72 	unsigned int shift;
73 	struct list_head *buckets;
74 
75 	/*
76 	 * If there was a flush failure no regions can be marked clean.
77 	 */
78 	int flush_failure;
79 
80 	unsigned int max_recovery; /* Max # of regions to recover in parallel */
81 
82 	spinlock_t region_lock;
83 	atomic_t recovery_in_flight;
84 	struct list_head clean_regions;
85 	struct list_head quiesced_regions;
86 	struct list_head recovered_regions;
87 	struct list_head failed_recovered_regions;
88 	struct semaphore recovery_count;
89 
90 	mempool_t region_pool;
91 
92 	void *context;
93 	sector_t target_begin;
94 
95 	/* Callback function to schedule bios writes */
96 	void (*dispatch_bios)(void *context, struct bio_list *bios);
97 
98 	/* Callback function to wakeup callers worker thread. */
99 	void (*wakeup_workers)(void *context);
100 
101 	/* Callback function to wakeup callers recovery waiters. */
102 	void (*wakeup_all_recovery_waiters)(void *context);
103 };
104 
105 struct dm_region {
106 	struct dm_region_hash *rh;	/* FIXME: can we get rid of this ? */
107 	region_t key;
108 	int state;
109 
110 	struct list_head hash_list;
111 	struct list_head list;
112 
113 	atomic_t pending;
114 	struct bio_list delayed_bios;
115 };
116 
117 /*
118  * Conversion fns
119  */
dm_rh_sector_to_region(struct dm_region_hash * rh,sector_t sector)120 static region_t dm_rh_sector_to_region(struct dm_region_hash *rh, sector_t sector)
121 {
122 	return sector >> rh->region_shift;
123 }
124 
dm_rh_region_to_sector(struct dm_region_hash * rh,region_t region)125 sector_t dm_rh_region_to_sector(struct dm_region_hash *rh, region_t region)
126 {
127 	return region << rh->region_shift;
128 }
129 EXPORT_SYMBOL_GPL(dm_rh_region_to_sector);
130 
dm_rh_bio_to_region(struct dm_region_hash * rh,struct bio * bio)131 region_t dm_rh_bio_to_region(struct dm_region_hash *rh, struct bio *bio)
132 {
133 	return dm_rh_sector_to_region(rh, bio->bi_iter.bi_sector -
134 				      rh->target_begin);
135 }
136 EXPORT_SYMBOL_GPL(dm_rh_bio_to_region);
137 
dm_rh_region_context(struct dm_region * reg)138 void *dm_rh_region_context(struct dm_region *reg)
139 {
140 	return reg->rh->context;
141 }
142 EXPORT_SYMBOL_GPL(dm_rh_region_context);
143 
dm_rh_get_region_key(struct dm_region * reg)144 region_t dm_rh_get_region_key(struct dm_region *reg)
145 {
146 	return reg->key;
147 }
148 EXPORT_SYMBOL_GPL(dm_rh_get_region_key);
149 
dm_rh_get_region_size(struct dm_region_hash * rh)150 sector_t dm_rh_get_region_size(struct dm_region_hash *rh)
151 {
152 	return rh->region_size;
153 }
154 EXPORT_SYMBOL_GPL(dm_rh_get_region_size);
155 
156 /*
157  * FIXME: shall we pass in a structure instead of all these args to
158  * dm_region_hash_create()????
159  */
160 #define RH_HASH_MULT 2654435387U
161 #define RH_HASH_SHIFT 12
162 
163 #define MIN_REGIONS 64
dm_region_hash_create(void * context,void (* dispatch_bios)(void * context,struct bio_list * bios),void (* wakeup_workers)(void * context),void (* wakeup_all_recovery_waiters)(void * context),sector_t target_begin,unsigned int max_recovery,struct dm_dirty_log * log,uint32_t region_size,region_t nr_regions)164 struct dm_region_hash *dm_region_hash_create(
165 		void *context, void (*dispatch_bios)(void *context,
166 						     struct bio_list *bios),
167 		void (*wakeup_workers)(void *context),
168 		void (*wakeup_all_recovery_waiters)(void *context),
169 		sector_t target_begin, unsigned int max_recovery,
170 		struct dm_dirty_log *log, uint32_t region_size,
171 		region_t nr_regions)
172 {
173 	struct dm_region_hash *rh;
174 	unsigned int nr_buckets, max_buckets;
175 	size_t i;
176 	int ret;
177 
178 	/*
179 	 * Calculate a suitable number of buckets for our hash
180 	 * table.
181 	 */
182 	max_buckets = nr_regions >> 6;
183 	for (nr_buckets = 128u; nr_buckets < max_buckets; nr_buckets <<= 1)
184 		;
185 	nr_buckets >>= 1;
186 
187 	rh = kzalloc(sizeof(*rh), GFP_KERNEL);
188 	if (!rh) {
189 		DMERR("unable to allocate region hash memory");
190 		return ERR_PTR(-ENOMEM);
191 	}
192 
193 	rh->context = context;
194 	rh->dispatch_bios = dispatch_bios;
195 	rh->wakeup_workers = wakeup_workers;
196 	rh->wakeup_all_recovery_waiters = wakeup_all_recovery_waiters;
197 	rh->target_begin = target_begin;
198 	rh->max_recovery = max_recovery;
199 	rh->log = log;
200 	rh->region_size = region_size;
201 	rh->region_shift = __ffs(region_size);
202 	rwlock_init(&rh->hash_lock);
203 	rh->mask = nr_buckets - 1;
204 	rh->nr_buckets = nr_buckets;
205 
206 	rh->shift = RH_HASH_SHIFT;
207 	rh->prime = RH_HASH_MULT;
208 
209 	rh->buckets = vmalloc(array_size(nr_buckets, sizeof(*rh->buckets)));
210 	if (!rh->buckets) {
211 		DMERR("unable to allocate region hash bucket memory");
212 		kfree(rh);
213 		return ERR_PTR(-ENOMEM);
214 	}
215 
216 	for (i = 0; i < nr_buckets; i++)
217 		INIT_LIST_HEAD(rh->buckets + i);
218 
219 	spin_lock_init(&rh->region_lock);
220 	sema_init(&rh->recovery_count, 0);
221 	atomic_set(&rh->recovery_in_flight, 0);
222 	INIT_LIST_HEAD(&rh->clean_regions);
223 	INIT_LIST_HEAD(&rh->quiesced_regions);
224 	INIT_LIST_HEAD(&rh->recovered_regions);
225 	INIT_LIST_HEAD(&rh->failed_recovered_regions);
226 	rh->flush_failure = 0;
227 
228 	ret = mempool_init_kmalloc_pool(&rh->region_pool, MIN_REGIONS,
229 					sizeof(struct dm_region));
230 	if (ret) {
231 		vfree(rh->buckets);
232 		kfree(rh);
233 		rh = ERR_PTR(-ENOMEM);
234 	}
235 
236 	return rh;
237 }
238 EXPORT_SYMBOL_GPL(dm_region_hash_create);
239 
dm_region_hash_destroy(struct dm_region_hash * rh)240 void dm_region_hash_destroy(struct dm_region_hash *rh)
241 {
242 	unsigned int h;
243 	struct dm_region *reg, *nreg;
244 
245 	BUG_ON(!list_empty(&rh->quiesced_regions));
246 	for (h = 0; h < rh->nr_buckets; h++) {
247 		list_for_each_entry_safe(reg, nreg, rh->buckets + h,
248 					 hash_list) {
249 			BUG_ON(atomic_read(&reg->pending));
250 			mempool_free(reg, &rh->region_pool);
251 		}
252 	}
253 
254 	if (rh->log)
255 		dm_dirty_log_destroy(rh->log);
256 
257 	mempool_exit(&rh->region_pool);
258 	vfree(rh->buckets);
259 	kfree(rh);
260 }
261 EXPORT_SYMBOL_GPL(dm_region_hash_destroy);
262 
dm_rh_dirty_log(struct dm_region_hash * rh)263 struct dm_dirty_log *dm_rh_dirty_log(struct dm_region_hash *rh)
264 {
265 	return rh->log;
266 }
267 EXPORT_SYMBOL_GPL(dm_rh_dirty_log);
268 
rh_hash(struct dm_region_hash * rh,region_t region)269 static unsigned int rh_hash(struct dm_region_hash *rh, region_t region)
270 {
271 	return (unsigned int) ((region * rh->prime) >> rh->shift) & rh->mask;
272 }
273 
__rh_lookup(struct dm_region_hash * rh,region_t region)274 static struct dm_region *__rh_lookup(struct dm_region_hash *rh, region_t region)
275 {
276 	struct dm_region *reg;
277 	struct list_head *bucket = rh->buckets + rh_hash(rh, region);
278 
279 	list_for_each_entry(reg, bucket, hash_list)
280 		if (reg->key == region)
281 			return reg;
282 
283 	return NULL;
284 }
285 
__rh_insert(struct dm_region_hash * rh,struct dm_region * reg)286 static void __rh_insert(struct dm_region_hash *rh, struct dm_region *reg)
287 {
288 	list_add(&reg->hash_list, rh->buckets + rh_hash(rh, reg->key));
289 }
290 
__rh_alloc(struct dm_region_hash * rh,region_t region)291 static struct dm_region *__rh_alloc(struct dm_region_hash *rh, region_t region)
292 {
293 	struct dm_region *reg, *nreg;
294 
295 	nreg = mempool_alloc(&rh->region_pool, GFP_ATOMIC);
296 	if (unlikely(!nreg))
297 		nreg = kmalloc(sizeof(*nreg), GFP_NOIO | __GFP_NOFAIL);
298 
299 	nreg->state = rh->log->type->in_sync(rh->log, region, 1) ?
300 		      DM_RH_CLEAN : DM_RH_NOSYNC;
301 	nreg->rh = rh;
302 	nreg->key = region;
303 	INIT_LIST_HEAD(&nreg->list);
304 	atomic_set(&nreg->pending, 0);
305 	bio_list_init(&nreg->delayed_bios);
306 
307 	write_lock_irq(&rh->hash_lock);
308 	reg = __rh_lookup(rh, region);
309 	if (reg)
310 		/* We lost the race. */
311 		mempool_free(nreg, &rh->region_pool);
312 	else {
313 		__rh_insert(rh, nreg);
314 		if (nreg->state == DM_RH_CLEAN) {
315 			spin_lock(&rh->region_lock);
316 			list_add(&nreg->list, &rh->clean_regions);
317 			spin_unlock(&rh->region_lock);
318 		}
319 
320 		reg = nreg;
321 	}
322 	write_unlock_irq(&rh->hash_lock);
323 
324 	return reg;
325 }
326 
__rh_find(struct dm_region_hash * rh,region_t region)327 static struct dm_region *__rh_find(struct dm_region_hash *rh, region_t region)
328 {
329 	struct dm_region *reg;
330 
331 	reg = __rh_lookup(rh, region);
332 	if (!reg) {
333 		read_unlock(&rh->hash_lock);
334 		reg = __rh_alloc(rh, region);
335 		read_lock(&rh->hash_lock);
336 	}
337 
338 	return reg;
339 }
340 
dm_rh_get_state(struct dm_region_hash * rh,region_t region,int may_block)341 int dm_rh_get_state(struct dm_region_hash *rh, region_t region, int may_block)
342 {
343 	int r;
344 	struct dm_region *reg;
345 
346 	read_lock(&rh->hash_lock);
347 	reg = __rh_lookup(rh, region);
348 	read_unlock(&rh->hash_lock);
349 
350 	if (reg)
351 		return reg->state;
352 
353 	/*
354 	 * The region wasn't in the hash, so we fall back to the
355 	 * dirty log.
356 	 */
357 	r = rh->log->type->in_sync(rh->log, region, may_block);
358 
359 	/*
360 	 * Any error from the dirty log (eg. -EWOULDBLOCK) gets
361 	 * taken as a DM_RH_NOSYNC
362 	 */
363 	return r == 1 ? DM_RH_CLEAN : DM_RH_NOSYNC;
364 }
365 EXPORT_SYMBOL_GPL(dm_rh_get_state);
366 
complete_resync_work(struct dm_region * reg,int success)367 static void complete_resync_work(struct dm_region *reg, int success)
368 {
369 	struct dm_region_hash *rh = reg->rh;
370 
371 	rh->log->type->set_region_sync(rh->log, reg->key, success);
372 
373 	/*
374 	 * Dispatch the bios before we call 'wake_up_all'.
375 	 * This is important because if we are suspending,
376 	 * we want to know that recovery is complete and
377 	 * the work queue is flushed.  If we wake_up_all
378 	 * before we dispatch_bios (queue bios and call wake()),
379 	 * then we risk suspending before the work queue
380 	 * has been properly flushed.
381 	 */
382 	rh->dispatch_bios(rh->context, &reg->delayed_bios);
383 	if (atomic_dec_and_test(&rh->recovery_in_flight))
384 		rh->wakeup_all_recovery_waiters(rh->context);
385 	up(&rh->recovery_count);
386 }
387 
388 /* dm_rh_mark_nosync
389  * @ms
390  * @bio
391  *
392  * The bio was written on some mirror(s) but failed on other mirror(s).
393  * We can successfully endio the bio but should avoid the region being
394  * marked clean by setting the state DM_RH_NOSYNC.
395  *
396  * This function is _not_ safe in interrupt context!
397  */
dm_rh_mark_nosync(struct dm_region_hash * rh,struct bio * bio)398 void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio)
399 {
400 	unsigned long flags;
401 	struct dm_dirty_log *log = rh->log;
402 	struct dm_region *reg;
403 	region_t region = dm_rh_bio_to_region(rh, bio);
404 	int recovering = 0;
405 
406 	if (bio->bi_opf & REQ_PREFLUSH) {
407 		rh->flush_failure = 1;
408 		return;
409 	}
410 
411 	if (bio_op(bio) == REQ_OP_DISCARD)
412 		return;
413 
414 	/* We must inform the log that the sync count has changed. */
415 	log->type->set_region_sync(log, region, 0);
416 
417 	read_lock(&rh->hash_lock);
418 	reg = __rh_find(rh, region);
419 	read_unlock(&rh->hash_lock);
420 
421 	/* region hash entry should exist because write was in-flight */
422 	BUG_ON(!reg);
423 	BUG_ON(!list_empty(&reg->list));
424 
425 	spin_lock_irqsave(&rh->region_lock, flags);
426 	/*
427 	 * Possible cases:
428 	 *   1) DM_RH_DIRTY
429 	 *   2) DM_RH_NOSYNC: was dirty, other preceding writes failed
430 	 *   3) DM_RH_RECOVERING: flushing pending writes
431 	 * Either case, the region should have not been connected to list.
432 	 */
433 	recovering = (reg->state == DM_RH_RECOVERING);
434 	reg->state = DM_RH_NOSYNC;
435 	BUG_ON(!list_empty(&reg->list));
436 	spin_unlock_irqrestore(&rh->region_lock, flags);
437 
438 	if (recovering)
439 		complete_resync_work(reg, 0);
440 }
441 EXPORT_SYMBOL_GPL(dm_rh_mark_nosync);
442 
dm_rh_update_states(struct dm_region_hash * rh,int errors_handled)443 void dm_rh_update_states(struct dm_region_hash *rh, int errors_handled)
444 {
445 	struct dm_region *reg, *next;
446 
447 	LIST_HEAD(clean);
448 	LIST_HEAD(recovered);
449 	LIST_HEAD(failed_recovered);
450 
451 	/*
452 	 * Quickly grab the lists.
453 	 */
454 	write_lock_irq(&rh->hash_lock);
455 	spin_lock(&rh->region_lock);
456 	if (!list_empty(&rh->clean_regions)) {
457 		list_splice_init(&rh->clean_regions, &clean);
458 
459 		list_for_each_entry(reg, &clean, list)
460 			list_del(&reg->hash_list);
461 	}
462 
463 	if (!list_empty(&rh->recovered_regions)) {
464 		list_splice_init(&rh->recovered_regions, &recovered);
465 
466 		list_for_each_entry(reg, &recovered, list)
467 			list_del(&reg->hash_list);
468 	}
469 
470 	if (!list_empty(&rh->failed_recovered_regions)) {
471 		list_splice_init(&rh->failed_recovered_regions,
472 				 &failed_recovered);
473 
474 		list_for_each_entry(reg, &failed_recovered, list)
475 			list_del(&reg->hash_list);
476 	}
477 
478 	spin_unlock(&rh->region_lock);
479 	write_unlock_irq(&rh->hash_lock);
480 
481 	/*
482 	 * All the regions on the recovered and clean lists have
483 	 * now been pulled out of the system, so no need to do
484 	 * any more locking.
485 	 */
486 	list_for_each_entry_safe(reg, next, &recovered, list) {
487 		rh->log->type->clear_region(rh->log, reg->key);
488 		complete_resync_work(reg, 1);
489 		mempool_free(reg, &rh->region_pool);
490 	}
491 
492 	list_for_each_entry_safe(reg, next, &failed_recovered, list) {
493 		complete_resync_work(reg, errors_handled ? 0 : 1);
494 		mempool_free(reg, &rh->region_pool);
495 	}
496 
497 	list_for_each_entry_safe(reg, next, &clean, list) {
498 		rh->log->type->clear_region(rh->log, reg->key);
499 		mempool_free(reg, &rh->region_pool);
500 	}
501 
502 	rh->log->type->flush(rh->log);
503 }
504 EXPORT_SYMBOL_GPL(dm_rh_update_states);
505 
rh_inc(struct dm_region_hash * rh,region_t region)506 static void rh_inc(struct dm_region_hash *rh, region_t region)
507 {
508 	struct dm_region *reg;
509 
510 	read_lock(&rh->hash_lock);
511 	reg = __rh_find(rh, region);
512 
513 	spin_lock_irq(&rh->region_lock);
514 	atomic_inc(&reg->pending);
515 
516 	if (reg->state == DM_RH_CLEAN) {
517 		reg->state = DM_RH_DIRTY;
518 		list_del_init(&reg->list);	/* take off the clean list */
519 		spin_unlock_irq(&rh->region_lock);
520 
521 		rh->log->type->mark_region(rh->log, reg->key);
522 	} else
523 		spin_unlock_irq(&rh->region_lock);
524 
525 
526 	read_unlock(&rh->hash_lock);
527 }
528 
dm_rh_inc_pending(struct dm_region_hash * rh,struct bio_list * bios)529 void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios)
530 {
531 	struct bio *bio;
532 
533 	for (bio = bios->head; bio; bio = bio->bi_next) {
534 		if (bio->bi_opf & REQ_PREFLUSH || bio_op(bio) == REQ_OP_DISCARD)
535 			continue;
536 		rh_inc(rh, dm_rh_bio_to_region(rh, bio));
537 	}
538 }
539 EXPORT_SYMBOL_GPL(dm_rh_inc_pending);
540 
dm_rh_dec(struct dm_region_hash * rh,region_t region)541 void dm_rh_dec(struct dm_region_hash *rh, region_t region)
542 {
543 	unsigned long flags;
544 	struct dm_region *reg;
545 	int should_wake = 0;
546 
547 	read_lock(&rh->hash_lock);
548 	reg = __rh_lookup(rh, region);
549 	read_unlock(&rh->hash_lock);
550 
551 	spin_lock_irqsave(&rh->region_lock, flags);
552 	if (atomic_dec_and_test(&reg->pending)) {
553 		/*
554 		 * There is no pending I/O for this region.
555 		 * We can move the region to corresponding list for next action.
556 		 * At this point, the region is not yet connected to any list.
557 		 *
558 		 * If the state is DM_RH_NOSYNC, the region should be kept off
559 		 * from clean list.
560 		 * The hash entry for DM_RH_NOSYNC will remain in memory
561 		 * until the region is recovered or the map is reloaded.
562 		 */
563 
564 		/* do nothing for DM_RH_NOSYNC */
565 		if (unlikely(rh->flush_failure)) {
566 			/*
567 			 * If a write flush failed some time ago, we
568 			 * don't know whether or not this write made it
569 			 * to the disk, so we must resync the device.
570 			 */
571 			reg->state = DM_RH_NOSYNC;
572 		} else if (reg->state == DM_RH_RECOVERING) {
573 			list_add_tail(&reg->list, &rh->quiesced_regions);
574 		} else if (reg->state == DM_RH_DIRTY) {
575 			reg->state = DM_RH_CLEAN;
576 			list_add(&reg->list, &rh->clean_regions);
577 		}
578 		should_wake = 1;
579 	}
580 	spin_unlock_irqrestore(&rh->region_lock, flags);
581 
582 	if (should_wake)
583 		rh->wakeup_workers(rh->context);
584 }
585 EXPORT_SYMBOL_GPL(dm_rh_dec);
586 
587 /*
588  * Starts quiescing a region in preparation for recovery.
589  */
__rh_recovery_prepare(struct dm_region_hash * rh)590 static int __rh_recovery_prepare(struct dm_region_hash *rh)
591 {
592 	int r;
593 	region_t region;
594 	struct dm_region *reg;
595 
596 	/*
597 	 * Ask the dirty log what's next.
598 	 */
599 	r = rh->log->type->get_resync_work(rh->log, &region);
600 	if (r <= 0)
601 		return r;
602 
603 	/*
604 	 * Get this region, and start it quiescing by setting the
605 	 * recovering flag.
606 	 */
607 	read_lock(&rh->hash_lock);
608 	reg = __rh_find(rh, region);
609 	read_unlock(&rh->hash_lock);
610 
611 	spin_lock_irq(&rh->region_lock);
612 	reg->state = DM_RH_RECOVERING;
613 
614 	/* Already quiesced ? */
615 	if (atomic_read(&reg->pending))
616 		list_del_init(&reg->list);
617 	else
618 		list_move(&reg->list, &rh->quiesced_regions);
619 
620 	spin_unlock_irq(&rh->region_lock);
621 
622 	return 1;
623 }
624 
dm_rh_recovery_prepare(struct dm_region_hash * rh)625 void dm_rh_recovery_prepare(struct dm_region_hash *rh)
626 {
627 	/* Extra reference to avoid race with dm_rh_stop_recovery */
628 	atomic_inc(&rh->recovery_in_flight);
629 
630 	while (!down_trylock(&rh->recovery_count)) {
631 		atomic_inc(&rh->recovery_in_flight);
632 		if (__rh_recovery_prepare(rh) <= 0) {
633 			atomic_dec(&rh->recovery_in_flight);
634 			up(&rh->recovery_count);
635 			break;
636 		}
637 	}
638 
639 	/* Drop the extra reference */
640 	if (atomic_dec_and_test(&rh->recovery_in_flight))
641 		rh->wakeup_all_recovery_waiters(rh->context);
642 }
643 EXPORT_SYMBOL_GPL(dm_rh_recovery_prepare);
644 
645 /*
646  * Returns any quiesced regions.
647  */
dm_rh_recovery_start(struct dm_region_hash * rh)648 struct dm_region *dm_rh_recovery_start(struct dm_region_hash *rh)
649 {
650 	struct dm_region *reg = NULL;
651 
652 	spin_lock_irq(&rh->region_lock);
653 	if (!list_empty(&rh->quiesced_regions)) {
654 		reg = list_entry(rh->quiesced_regions.next,
655 				 struct dm_region, list);
656 		list_del_init(&reg->list);  /* remove from the quiesced list */
657 	}
658 	spin_unlock_irq(&rh->region_lock);
659 
660 	return reg;
661 }
662 EXPORT_SYMBOL_GPL(dm_rh_recovery_start);
663 
dm_rh_recovery_end(struct dm_region * reg,int success)664 void dm_rh_recovery_end(struct dm_region *reg, int success)
665 {
666 	struct dm_region_hash *rh = reg->rh;
667 
668 	spin_lock_irq(&rh->region_lock);
669 	if (success)
670 		list_add(&reg->list, &reg->rh->recovered_regions);
671 	else
672 		list_add(&reg->list, &reg->rh->failed_recovered_regions);
673 
674 	spin_unlock_irq(&rh->region_lock);
675 
676 	rh->wakeup_workers(rh->context);
677 }
678 EXPORT_SYMBOL_GPL(dm_rh_recovery_end);
679 
680 /* Return recovery in flight count. */
dm_rh_recovery_in_flight(struct dm_region_hash * rh)681 int dm_rh_recovery_in_flight(struct dm_region_hash *rh)
682 {
683 	return atomic_read(&rh->recovery_in_flight);
684 }
685 EXPORT_SYMBOL_GPL(dm_rh_recovery_in_flight);
686 
dm_rh_flush(struct dm_region_hash * rh)687 int dm_rh_flush(struct dm_region_hash *rh)
688 {
689 	return rh->log->type->flush(rh->log);
690 }
691 EXPORT_SYMBOL_GPL(dm_rh_flush);
692 
dm_rh_delay(struct dm_region_hash * rh,struct bio * bio)693 void dm_rh_delay(struct dm_region_hash *rh, struct bio *bio)
694 {
695 	struct dm_region *reg;
696 
697 	read_lock(&rh->hash_lock);
698 	reg = __rh_find(rh, dm_rh_bio_to_region(rh, bio));
699 	bio_list_add(&reg->delayed_bios, bio);
700 	read_unlock(&rh->hash_lock);
701 }
702 EXPORT_SYMBOL_GPL(dm_rh_delay);
703 
dm_rh_stop_recovery(struct dm_region_hash * rh)704 void dm_rh_stop_recovery(struct dm_region_hash *rh)
705 {
706 	int i;
707 
708 	/* wait for any recovering regions */
709 	for (i = 0; i < rh->max_recovery; i++)
710 		down(&rh->recovery_count);
711 }
712 EXPORT_SYMBOL_GPL(dm_rh_stop_recovery);
713 
dm_rh_start_recovery(struct dm_region_hash * rh)714 void dm_rh_start_recovery(struct dm_region_hash *rh)
715 {
716 	int i;
717 
718 	for (i = 0; i < rh->max_recovery; i++)
719 		up(&rh->recovery_count);
720 
721 	rh->wakeup_workers(rh->context);
722 }
723 EXPORT_SYMBOL_GPL(dm_rh_start_recovery);
724 
725 MODULE_DESCRIPTION(DM_NAME " region hash");
726 MODULE_AUTHOR("Joe Thornber/Heinz Mauelshagen <dm-devel@redhat.com>");
727 MODULE_LICENSE("GPL");
728