xref: /openbmc/linux/drivers/md/dm-raid1.c (revision 5ee9cd065836e5934710ca35653bce7905add20b)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * Copyright (C) 2003 Sistina Software Limited.
4   * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved.
5   *
6   * This file is released under the GPL.
7   */
8  
9  #include "dm-bio-record.h"
10  
11  #include <linux/init.h>
12  #include <linux/mempool.h>
13  #include <linux/module.h>
14  #include <linux/pagemap.h>
15  #include <linux/slab.h>
16  #include <linux/workqueue.h>
17  #include <linux/device-mapper.h>
18  #include <linux/dm-io.h>
19  #include <linux/dm-dirty-log.h>
20  #include <linux/dm-kcopyd.h>
21  #include <linux/dm-region-hash.h>
22  
23  static struct workqueue_struct *dm_raid1_wq;
24  
25  #define DM_MSG_PREFIX "raid1"
26  
27  #define MAX_RECOVERY 1	/* Maximum number of regions recovered in parallel. */
28  
29  #define MAX_NR_MIRRORS	(DM_KCOPYD_MAX_REGIONS + 1)
30  
31  #define DM_RAID1_HANDLE_ERRORS	0x01
32  #define DM_RAID1_KEEP_LOG	0x02
33  #define errors_handled(p)	((p)->features & DM_RAID1_HANDLE_ERRORS)
34  #define keep_log(p)		((p)->features & DM_RAID1_KEEP_LOG)
35  
36  static DECLARE_WAIT_QUEUE_HEAD(_kmirrord_recovery_stopped);
37  
38  /*
39   *---------------------------------------------------------------
40   * Mirror set structures.
41   *---------------------------------------------------------------
42   */
43  enum dm_raid1_error {
44  	DM_RAID1_WRITE_ERROR,
45  	DM_RAID1_FLUSH_ERROR,
46  	DM_RAID1_SYNC_ERROR,
47  	DM_RAID1_READ_ERROR
48  };
49  
50  struct mirror {
51  	struct mirror_set *ms;
52  	atomic_t error_count;
53  	unsigned long error_type;
54  	struct dm_dev *dev;
55  	sector_t offset;
56  };
57  
58  struct mirror_set {
59  	struct dm_target *ti;
60  	struct list_head list;
61  
62  	uint64_t features;
63  
64  	spinlock_t lock;	/* protects the lists */
65  	struct bio_list reads;
66  	struct bio_list writes;
67  	struct bio_list failures;
68  	struct bio_list holds;	/* bios are waiting until suspend */
69  
70  	struct dm_region_hash *rh;
71  	struct dm_kcopyd_client *kcopyd_client;
72  	struct dm_io_client *io_client;
73  
74  	/* recovery */
75  	region_t nr_regions;
76  	int in_sync;
77  	int log_failure;
78  	int leg_failure;
79  	atomic_t suspend;
80  
81  	atomic_t default_mirror;	/* Default mirror */
82  
83  	struct workqueue_struct *kmirrord_wq;
84  	struct work_struct kmirrord_work;
85  	struct timer_list timer;
86  	unsigned long timer_pending;
87  
88  	struct work_struct trigger_event;
89  
90  	unsigned int nr_mirrors;
91  	struct mirror mirror[];
92  };
93  
94  DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(raid1_resync_throttle,
95  		"A percentage of time allocated for raid resynchronization");
96  
wakeup_mirrord(void * context)97  static void wakeup_mirrord(void *context)
98  {
99  	struct mirror_set *ms = context;
100  
101  	queue_work(ms->kmirrord_wq, &ms->kmirrord_work);
102  }
103  
delayed_wake_fn(struct timer_list * t)104  static void delayed_wake_fn(struct timer_list *t)
105  {
106  	struct mirror_set *ms = from_timer(ms, t, timer);
107  
108  	clear_bit(0, &ms->timer_pending);
109  	wakeup_mirrord(ms);
110  }
111  
delayed_wake(struct mirror_set * ms)112  static void delayed_wake(struct mirror_set *ms)
113  {
114  	if (test_and_set_bit(0, &ms->timer_pending))
115  		return;
116  
117  	ms->timer.expires = jiffies + HZ / 5;
118  	add_timer(&ms->timer);
119  }
120  
wakeup_all_recovery_waiters(void * context)121  static void wakeup_all_recovery_waiters(void *context)
122  {
123  	wake_up_all(&_kmirrord_recovery_stopped);
124  }
125  
queue_bio(struct mirror_set * ms,struct bio * bio,int rw)126  static void queue_bio(struct mirror_set *ms, struct bio *bio, int rw)
127  {
128  	unsigned long flags;
129  	int should_wake = 0;
130  	struct bio_list *bl;
131  
132  	bl = (rw == WRITE) ? &ms->writes : &ms->reads;
133  	spin_lock_irqsave(&ms->lock, flags);
134  	should_wake = !(bl->head);
135  	bio_list_add(bl, bio);
136  	spin_unlock_irqrestore(&ms->lock, flags);
137  
138  	if (should_wake)
139  		wakeup_mirrord(ms);
140  }
141  
dispatch_bios(void * context,struct bio_list * bio_list)142  static void dispatch_bios(void *context, struct bio_list *bio_list)
143  {
144  	struct mirror_set *ms = context;
145  	struct bio *bio;
146  
147  	while ((bio = bio_list_pop(bio_list)))
148  		queue_bio(ms, bio, WRITE);
149  }
150  
151  struct dm_raid1_bio_record {
152  	struct mirror *m;
153  	/* if details->bi_bdev == NULL, details were not saved */
154  	struct dm_bio_details details;
155  	region_t write_region;
156  };
157  
158  /*
159   * Every mirror should look like this one.
160   */
161  #define DEFAULT_MIRROR 0
162  
163  /*
164   * This is yucky.  We squirrel the mirror struct away inside
165   * bi_next for read/write buffers.  This is safe since the bh
166   * doesn't get submitted to the lower levels of block layer.
167   */
bio_get_m(struct bio * bio)168  static struct mirror *bio_get_m(struct bio *bio)
169  {
170  	return (struct mirror *) bio->bi_next;
171  }
172  
bio_set_m(struct bio * bio,struct mirror * m)173  static void bio_set_m(struct bio *bio, struct mirror *m)
174  {
175  	bio->bi_next = (struct bio *) m;
176  }
177  
get_default_mirror(struct mirror_set * ms)178  static struct mirror *get_default_mirror(struct mirror_set *ms)
179  {
180  	return &ms->mirror[atomic_read(&ms->default_mirror)];
181  }
182  
set_default_mirror(struct mirror * m)183  static void set_default_mirror(struct mirror *m)
184  {
185  	struct mirror_set *ms = m->ms;
186  	struct mirror *m0 = &(ms->mirror[0]);
187  
188  	atomic_set(&ms->default_mirror, m - m0);
189  }
190  
get_valid_mirror(struct mirror_set * ms)191  static struct mirror *get_valid_mirror(struct mirror_set *ms)
192  {
193  	struct mirror *m;
194  
195  	for (m = ms->mirror; m < ms->mirror + ms->nr_mirrors; m++)
196  		if (!atomic_read(&m->error_count))
197  			return m;
198  
199  	return NULL;
200  }
201  
202  /* fail_mirror
203   * @m: mirror device to fail
204   * @error_type: one of the enum's, DM_RAID1_*_ERROR
205   *
206   * If errors are being handled, record the type of
207   * error encountered for this device.  If this type
208   * of error has already been recorded, we can return;
209   * otherwise, we must signal userspace by triggering
210   * an event.  Additionally, if the device is the
211   * primary device, we must choose a new primary, but
212   * only if the mirror is in-sync.
213   *
214   * This function must not block.
215   */
fail_mirror(struct mirror * m,enum dm_raid1_error error_type)216  static void fail_mirror(struct mirror *m, enum dm_raid1_error error_type)
217  {
218  	struct mirror_set *ms = m->ms;
219  	struct mirror *new;
220  
221  	ms->leg_failure = 1;
222  
223  	/*
224  	 * error_count is used for nothing more than a
225  	 * simple way to tell if a device has encountered
226  	 * errors.
227  	 */
228  	atomic_inc(&m->error_count);
229  
230  	if (test_and_set_bit(error_type, &m->error_type))
231  		return;
232  
233  	if (!errors_handled(ms))
234  		return;
235  
236  	if (m != get_default_mirror(ms))
237  		goto out;
238  
239  	if (!ms->in_sync && !keep_log(ms)) {
240  		/*
241  		 * Better to issue requests to same failing device
242  		 * than to risk returning corrupt data.
243  		 */
244  		DMERR("Primary mirror (%s) failed while out-of-sync: Reads may fail.",
245  		      m->dev->name);
246  		goto out;
247  	}
248  
249  	new = get_valid_mirror(ms);
250  	if (new)
251  		set_default_mirror(new);
252  	else
253  		DMWARN("All sides of mirror have failed.");
254  
255  out:
256  	queue_work(dm_raid1_wq, &ms->trigger_event);
257  }
258  
mirror_flush(struct dm_target * ti)259  static int mirror_flush(struct dm_target *ti)
260  {
261  	struct mirror_set *ms = ti->private;
262  	unsigned long error_bits;
263  
264  	unsigned int i;
265  	struct dm_io_region io[MAX_NR_MIRRORS];
266  	struct mirror *m;
267  	struct dm_io_request io_req = {
268  		.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC,
269  		.mem.type = DM_IO_KMEM,
270  		.mem.ptr.addr = NULL,
271  		.client = ms->io_client,
272  	};
273  
274  	for (i = 0, m = ms->mirror; i < ms->nr_mirrors; i++, m++) {
275  		io[i].bdev = m->dev->bdev;
276  		io[i].sector = 0;
277  		io[i].count = 0;
278  	}
279  
280  	error_bits = -1;
281  	dm_io(&io_req, ms->nr_mirrors, io, &error_bits, IOPRIO_DEFAULT);
282  	if (unlikely(error_bits != 0)) {
283  		for (i = 0; i < ms->nr_mirrors; i++)
284  			if (test_bit(i, &error_bits))
285  				fail_mirror(ms->mirror + i,
286  					    DM_RAID1_FLUSH_ERROR);
287  		return -EIO;
288  	}
289  
290  	return 0;
291  }
292  
293  /*
294   *---------------------------------------------------------------
295   * Recovery.
296   *
297   * When a mirror is first activated we may find that some regions
298   * are in the no-sync state.  We have to recover these by
299   * recopying from the default mirror to all the others.
300   *---------------------------------------------------------------
301   */
recovery_complete(int read_err,unsigned long write_err,void * context)302  static void recovery_complete(int read_err, unsigned long write_err,
303  			      void *context)
304  {
305  	struct dm_region *reg = context;
306  	struct mirror_set *ms = dm_rh_region_context(reg);
307  	int m, bit = 0;
308  
309  	if (read_err) {
310  		/* Read error means the failure of default mirror. */
311  		DMERR_LIMIT("Unable to read primary mirror during recovery");
312  		fail_mirror(get_default_mirror(ms), DM_RAID1_SYNC_ERROR);
313  	}
314  
315  	if (write_err) {
316  		DMERR_LIMIT("Write error during recovery (error = 0x%lx)",
317  			    write_err);
318  		/*
319  		 * Bits correspond to devices (excluding default mirror).
320  		 * The default mirror cannot change during recovery.
321  		 */
322  		for (m = 0; m < ms->nr_mirrors; m++) {
323  			if (&ms->mirror[m] == get_default_mirror(ms))
324  				continue;
325  			if (test_bit(bit, &write_err))
326  				fail_mirror(ms->mirror + m,
327  					    DM_RAID1_SYNC_ERROR);
328  			bit++;
329  		}
330  	}
331  
332  	dm_rh_recovery_end(reg, !(read_err || write_err));
333  }
334  
recover(struct mirror_set * ms,struct dm_region * reg)335  static void recover(struct mirror_set *ms, struct dm_region *reg)
336  {
337  	unsigned int i;
338  	struct dm_io_region from, to[DM_KCOPYD_MAX_REGIONS], *dest;
339  	struct mirror *m;
340  	unsigned long flags = 0;
341  	region_t key = dm_rh_get_region_key(reg);
342  	sector_t region_size = dm_rh_get_region_size(ms->rh);
343  
344  	/* fill in the source */
345  	m = get_default_mirror(ms);
346  	from.bdev = m->dev->bdev;
347  	from.sector = m->offset + dm_rh_region_to_sector(ms->rh, key);
348  	if (key == (ms->nr_regions - 1)) {
349  		/*
350  		 * The final region may be smaller than
351  		 * region_size.
352  		 */
353  		from.count = ms->ti->len & (region_size - 1);
354  		if (!from.count)
355  			from.count = region_size;
356  	} else
357  		from.count = region_size;
358  
359  	/* fill in the destinations */
360  	for (i = 0, dest = to; i < ms->nr_mirrors; i++) {
361  		if (&ms->mirror[i] == get_default_mirror(ms))
362  			continue;
363  
364  		m = ms->mirror + i;
365  		dest->bdev = m->dev->bdev;
366  		dest->sector = m->offset + dm_rh_region_to_sector(ms->rh, key);
367  		dest->count = from.count;
368  		dest++;
369  	}
370  
371  	/* hand to kcopyd */
372  	if (!errors_handled(ms))
373  		flags |= BIT(DM_KCOPYD_IGNORE_ERROR);
374  
375  	dm_kcopyd_copy(ms->kcopyd_client, &from, ms->nr_mirrors - 1, to,
376  		       flags, recovery_complete, reg);
377  }
378  
reset_ms_flags(struct mirror_set * ms)379  static void reset_ms_flags(struct mirror_set *ms)
380  {
381  	unsigned int m;
382  
383  	ms->leg_failure = 0;
384  	for (m = 0; m < ms->nr_mirrors; m++) {
385  		atomic_set(&(ms->mirror[m].error_count), 0);
386  		ms->mirror[m].error_type = 0;
387  	}
388  }
389  
do_recovery(struct mirror_set * ms)390  static void do_recovery(struct mirror_set *ms)
391  {
392  	struct dm_region *reg;
393  	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
394  
395  	/*
396  	 * Start quiescing some regions.
397  	 */
398  	dm_rh_recovery_prepare(ms->rh);
399  
400  	/*
401  	 * Copy any already quiesced regions.
402  	 */
403  	while ((reg = dm_rh_recovery_start(ms->rh)))
404  		recover(ms, reg);
405  
406  	/*
407  	 * Update the in sync flag.
408  	 */
409  	if (!ms->in_sync &&
410  	    (log->type->get_sync_count(log) == ms->nr_regions)) {
411  		/* the sync is complete */
412  		dm_table_event(ms->ti->table);
413  		ms->in_sync = 1;
414  		reset_ms_flags(ms);
415  	}
416  }
417  
418  /*
419   *---------------------------------------------------------------
420   * Reads
421   *---------------------------------------------------------------
422   */
choose_mirror(struct mirror_set * ms,sector_t sector)423  static struct mirror *choose_mirror(struct mirror_set *ms, sector_t sector)
424  {
425  	struct mirror *m = get_default_mirror(ms);
426  
427  	do {
428  		if (likely(!atomic_read(&m->error_count)))
429  			return m;
430  
431  		if (m-- == ms->mirror)
432  			m += ms->nr_mirrors;
433  	} while (m != get_default_mirror(ms));
434  
435  	return NULL;
436  }
437  
default_ok(struct mirror * m)438  static int default_ok(struct mirror *m)
439  {
440  	struct mirror *default_mirror = get_default_mirror(m->ms);
441  
442  	return !atomic_read(&default_mirror->error_count);
443  }
444  
mirror_available(struct mirror_set * ms,struct bio * bio)445  static int mirror_available(struct mirror_set *ms, struct bio *bio)
446  {
447  	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
448  	region_t region = dm_rh_bio_to_region(ms->rh, bio);
449  
450  	if (log->type->in_sync(log, region, 0))
451  		return choose_mirror(ms,  bio->bi_iter.bi_sector) ? 1 : 0;
452  
453  	return 0;
454  }
455  
456  /*
457   * remap a buffer to a particular mirror.
458   */
map_sector(struct mirror * m,struct bio * bio)459  static sector_t map_sector(struct mirror *m, struct bio *bio)
460  {
461  	if (unlikely(!bio->bi_iter.bi_size))
462  		return 0;
463  	return m->offset + dm_target_offset(m->ms->ti, bio->bi_iter.bi_sector);
464  }
465  
map_bio(struct mirror * m,struct bio * bio)466  static void map_bio(struct mirror *m, struct bio *bio)
467  {
468  	bio_set_dev(bio, m->dev->bdev);
469  	bio->bi_iter.bi_sector = map_sector(m, bio);
470  }
471  
map_region(struct dm_io_region * io,struct mirror * m,struct bio * bio)472  static void map_region(struct dm_io_region *io, struct mirror *m,
473  		       struct bio *bio)
474  {
475  	io->bdev = m->dev->bdev;
476  	io->sector = map_sector(m, bio);
477  	io->count = bio_sectors(bio);
478  }
479  
hold_bio(struct mirror_set * ms,struct bio * bio)480  static void hold_bio(struct mirror_set *ms, struct bio *bio)
481  {
482  	/*
483  	 * Lock is required to avoid race condition during suspend
484  	 * process.
485  	 */
486  	spin_lock_irq(&ms->lock);
487  
488  	if (atomic_read(&ms->suspend)) {
489  		spin_unlock_irq(&ms->lock);
490  
491  		/*
492  		 * If device is suspended, complete the bio.
493  		 */
494  		if (dm_noflush_suspending(ms->ti))
495  			bio->bi_status = BLK_STS_DM_REQUEUE;
496  		else
497  			bio->bi_status = BLK_STS_IOERR;
498  
499  		bio_endio(bio);
500  		return;
501  	}
502  
503  	/*
504  	 * Hold bio until the suspend is complete.
505  	 */
506  	bio_list_add(&ms->holds, bio);
507  	spin_unlock_irq(&ms->lock);
508  }
509  
510  /*
511   *---------------------------------------------------------------
512   * Reads
513   *---------------------------------------------------------------
514   */
read_callback(unsigned long error,void * context)515  static void read_callback(unsigned long error, void *context)
516  {
517  	struct bio *bio = context;
518  	struct mirror *m;
519  
520  	m = bio_get_m(bio);
521  	bio_set_m(bio, NULL);
522  
523  	if (likely(!error)) {
524  		bio_endio(bio);
525  		return;
526  	}
527  
528  	fail_mirror(m, DM_RAID1_READ_ERROR);
529  
530  	if (likely(default_ok(m)) || mirror_available(m->ms, bio)) {
531  		DMWARN_LIMIT("Read failure on mirror device %s. Trying alternative device.",
532  			     m->dev->name);
533  		queue_bio(m->ms, bio, bio_data_dir(bio));
534  		return;
535  	}
536  
537  	DMERR_LIMIT("Read failure on mirror device %s.  Failing I/O.",
538  		    m->dev->name);
539  	bio_io_error(bio);
540  }
541  
542  /* Asynchronous read. */
read_async_bio(struct mirror * m,struct bio * bio)543  static void read_async_bio(struct mirror *m, struct bio *bio)
544  {
545  	struct dm_io_region io;
546  	struct dm_io_request io_req = {
547  		.bi_opf = REQ_OP_READ,
548  		.mem.type = DM_IO_BIO,
549  		.mem.ptr.bio = bio,
550  		.notify.fn = read_callback,
551  		.notify.context = bio,
552  		.client = m->ms->io_client,
553  	};
554  
555  	map_region(&io, m, bio);
556  	bio_set_m(bio, m);
557  	BUG_ON(dm_io(&io_req, 1, &io, NULL, IOPRIO_DEFAULT));
558  }
559  
region_in_sync(struct mirror_set * ms,region_t region,int may_block)560  static inline int region_in_sync(struct mirror_set *ms, region_t region,
561  				 int may_block)
562  {
563  	int state = dm_rh_get_state(ms->rh, region, may_block);
564  	return state == DM_RH_CLEAN || state == DM_RH_DIRTY;
565  }
566  
do_reads(struct mirror_set * ms,struct bio_list * reads)567  static void do_reads(struct mirror_set *ms, struct bio_list *reads)
568  {
569  	region_t region;
570  	struct bio *bio;
571  	struct mirror *m;
572  
573  	while ((bio = bio_list_pop(reads))) {
574  		region = dm_rh_bio_to_region(ms->rh, bio);
575  		m = get_default_mirror(ms);
576  
577  		/*
578  		 * We can only read balance if the region is in sync.
579  		 */
580  		if (likely(region_in_sync(ms, region, 1)))
581  			m = choose_mirror(ms, bio->bi_iter.bi_sector);
582  		else if (m && atomic_read(&m->error_count))
583  			m = NULL;
584  
585  		if (likely(m))
586  			read_async_bio(m, bio);
587  		else
588  			bio_io_error(bio);
589  	}
590  }
591  
592  /*
593   *---------------------------------------------------------------------
594   * Writes.
595   *
596   * We do different things with the write io depending on the
597   * state of the region that it's in:
598   *
599   * SYNC:	increment pending, use kcopyd to write to *all* mirrors
600   * RECOVERING:	delay the io until recovery completes
601   * NOSYNC:	increment pending, just write to the default mirror
602   *---------------------------------------------------------------------
603   */
write_callback(unsigned long error,void * context)604  static void write_callback(unsigned long error, void *context)
605  {
606  	unsigned int i;
607  	struct bio *bio = context;
608  	struct mirror_set *ms;
609  	int should_wake = 0;
610  	unsigned long flags;
611  
612  	ms = bio_get_m(bio)->ms;
613  	bio_set_m(bio, NULL);
614  
615  	/*
616  	 * NOTE: We don't decrement the pending count here,
617  	 * instead it is done by the targets endio function.
618  	 * This way we handle both writes to SYNC and NOSYNC
619  	 * regions with the same code.
620  	 */
621  	if (likely(!error)) {
622  		bio_endio(bio);
623  		return;
624  	}
625  
626  	/*
627  	 * If the bio is discard, return an error, but do not
628  	 * degrade the array.
629  	 */
630  	if (bio_op(bio) == REQ_OP_DISCARD) {
631  		bio->bi_status = BLK_STS_NOTSUPP;
632  		bio_endio(bio);
633  		return;
634  	}
635  
636  	for (i = 0; i < ms->nr_mirrors; i++)
637  		if (test_bit(i, &error))
638  			fail_mirror(ms->mirror + i, DM_RAID1_WRITE_ERROR);
639  
640  	/*
641  	 * Need to raise event.  Since raising
642  	 * events can block, we need to do it in
643  	 * the main thread.
644  	 */
645  	spin_lock_irqsave(&ms->lock, flags);
646  	if (!ms->failures.head)
647  		should_wake = 1;
648  	bio_list_add(&ms->failures, bio);
649  	spin_unlock_irqrestore(&ms->lock, flags);
650  	if (should_wake)
651  		wakeup_mirrord(ms);
652  }
653  
do_write(struct mirror_set * ms,struct bio * bio)654  static void do_write(struct mirror_set *ms, struct bio *bio)
655  {
656  	unsigned int i;
657  	struct dm_io_region io[MAX_NR_MIRRORS], *dest = io;
658  	struct mirror *m;
659  	blk_opf_t op_flags = bio->bi_opf & (REQ_FUA | REQ_PREFLUSH);
660  	struct dm_io_request io_req = {
661  		.bi_opf = REQ_OP_WRITE | op_flags,
662  		.mem.type = DM_IO_BIO,
663  		.mem.ptr.bio = bio,
664  		.notify.fn = write_callback,
665  		.notify.context = bio,
666  		.client = ms->io_client,
667  	};
668  
669  	if (bio_op(bio) == REQ_OP_DISCARD) {
670  		io_req.bi_opf = REQ_OP_DISCARD | op_flags;
671  		io_req.mem.type = DM_IO_KMEM;
672  		io_req.mem.ptr.addr = NULL;
673  	}
674  
675  	for (i = 0, m = ms->mirror; i < ms->nr_mirrors; i++, m++)
676  		map_region(dest++, m, bio);
677  
678  	/*
679  	 * Use default mirror because we only need it to retrieve the reference
680  	 * to the mirror set in write_callback().
681  	 */
682  	bio_set_m(bio, get_default_mirror(ms));
683  
684  	BUG_ON(dm_io(&io_req, ms->nr_mirrors, io, NULL, IOPRIO_DEFAULT));
685  }
686  
do_writes(struct mirror_set * ms,struct bio_list * writes)687  static void do_writes(struct mirror_set *ms, struct bio_list *writes)
688  {
689  	int state;
690  	struct bio *bio;
691  	struct bio_list sync, nosync, recover, *this_list = NULL;
692  	struct bio_list requeue;
693  	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
694  	region_t region;
695  
696  	if (!writes->head)
697  		return;
698  
699  	/*
700  	 * Classify each write.
701  	 */
702  	bio_list_init(&sync);
703  	bio_list_init(&nosync);
704  	bio_list_init(&recover);
705  	bio_list_init(&requeue);
706  
707  	while ((bio = bio_list_pop(writes))) {
708  		if ((bio->bi_opf & REQ_PREFLUSH) ||
709  		    (bio_op(bio) == REQ_OP_DISCARD)) {
710  			bio_list_add(&sync, bio);
711  			continue;
712  		}
713  
714  		region = dm_rh_bio_to_region(ms->rh, bio);
715  
716  		if (log->type->is_remote_recovering &&
717  		    log->type->is_remote_recovering(log, region)) {
718  			bio_list_add(&requeue, bio);
719  			continue;
720  		}
721  
722  		state = dm_rh_get_state(ms->rh, region, 1);
723  		switch (state) {
724  		case DM_RH_CLEAN:
725  		case DM_RH_DIRTY:
726  			this_list = &sync;
727  			break;
728  
729  		case DM_RH_NOSYNC:
730  			this_list = &nosync;
731  			break;
732  
733  		case DM_RH_RECOVERING:
734  			this_list = &recover;
735  			break;
736  		}
737  
738  		bio_list_add(this_list, bio);
739  	}
740  
741  	/*
742  	 * Add bios that are delayed due to remote recovery
743  	 * back on to the write queue
744  	 */
745  	if (unlikely(requeue.head)) {
746  		spin_lock_irq(&ms->lock);
747  		bio_list_merge(&ms->writes, &requeue);
748  		spin_unlock_irq(&ms->lock);
749  		delayed_wake(ms);
750  	}
751  
752  	/*
753  	 * Increment the pending counts for any regions that will
754  	 * be written to (writes to recover regions are going to
755  	 * be delayed).
756  	 */
757  	dm_rh_inc_pending(ms->rh, &sync);
758  	dm_rh_inc_pending(ms->rh, &nosync);
759  
760  	/*
761  	 * If the flush fails on a previous call and succeeds here,
762  	 * we must not reset the log_failure variable.  We need
763  	 * userspace interaction to do that.
764  	 */
765  	ms->log_failure = dm_rh_flush(ms->rh) ? 1 : ms->log_failure;
766  
767  	/*
768  	 * Dispatch io.
769  	 */
770  	if (unlikely(ms->log_failure) && errors_handled(ms)) {
771  		spin_lock_irq(&ms->lock);
772  		bio_list_merge(&ms->failures, &sync);
773  		spin_unlock_irq(&ms->lock);
774  		wakeup_mirrord(ms);
775  	} else
776  		while ((bio = bio_list_pop(&sync)))
777  			do_write(ms, bio);
778  
779  	while ((bio = bio_list_pop(&recover)))
780  		dm_rh_delay(ms->rh, bio);
781  
782  	while ((bio = bio_list_pop(&nosync))) {
783  		if (unlikely(ms->leg_failure) && errors_handled(ms) && !keep_log(ms)) {
784  			spin_lock_irq(&ms->lock);
785  			bio_list_add(&ms->failures, bio);
786  			spin_unlock_irq(&ms->lock);
787  			wakeup_mirrord(ms);
788  		} else {
789  			map_bio(get_default_mirror(ms), bio);
790  			submit_bio_noacct(bio);
791  		}
792  	}
793  }
794  
do_failures(struct mirror_set * ms,struct bio_list * failures)795  static void do_failures(struct mirror_set *ms, struct bio_list *failures)
796  {
797  	struct bio *bio;
798  
799  	if (likely(!failures->head))
800  		return;
801  
802  	/*
803  	 * If the log has failed, unattempted writes are being
804  	 * put on the holds list.  We can't issue those writes
805  	 * until a log has been marked, so we must store them.
806  	 *
807  	 * If a 'noflush' suspend is in progress, we can requeue
808  	 * the I/O's to the core.  This give userspace a chance
809  	 * to reconfigure the mirror, at which point the core
810  	 * will reissue the writes.  If the 'noflush' flag is
811  	 * not set, we have no choice but to return errors.
812  	 *
813  	 * Some writes on the failures list may have been
814  	 * submitted before the log failure and represent a
815  	 * failure to write to one of the devices.  It is ok
816  	 * for us to treat them the same and requeue them
817  	 * as well.
818  	 */
819  	while ((bio = bio_list_pop(failures))) {
820  		if (!ms->log_failure) {
821  			ms->in_sync = 0;
822  			dm_rh_mark_nosync(ms->rh, bio);
823  		}
824  
825  		/*
826  		 * If all the legs are dead, fail the I/O.
827  		 * If the device has failed and keep_log is enabled,
828  		 * fail the I/O.
829  		 *
830  		 * If we have been told to handle errors, and keep_log
831  		 * isn't enabled, hold the bio and wait for userspace to
832  		 * deal with the problem.
833  		 *
834  		 * Otherwise pretend that the I/O succeeded. (This would
835  		 * be wrong if the failed leg returned after reboot and
836  		 * got replicated back to the good legs.)
837  		 */
838  		if (unlikely(!get_valid_mirror(ms) || (keep_log(ms) && ms->log_failure)))
839  			bio_io_error(bio);
840  		else if (errors_handled(ms) && !keep_log(ms))
841  			hold_bio(ms, bio);
842  		else
843  			bio_endio(bio);
844  	}
845  }
846  
trigger_event(struct work_struct * work)847  static void trigger_event(struct work_struct *work)
848  {
849  	struct mirror_set *ms =
850  		container_of(work, struct mirror_set, trigger_event);
851  
852  	dm_table_event(ms->ti->table);
853  }
854  
855  /*
856   *---------------------------------------------------------------
857   * kmirrord
858   *---------------------------------------------------------------
859   */
do_mirror(struct work_struct * work)860  static void do_mirror(struct work_struct *work)
861  {
862  	struct mirror_set *ms = container_of(work, struct mirror_set,
863  					     kmirrord_work);
864  	struct bio_list reads, writes, failures;
865  	unsigned long flags;
866  
867  	spin_lock_irqsave(&ms->lock, flags);
868  	reads = ms->reads;
869  	writes = ms->writes;
870  	failures = ms->failures;
871  	bio_list_init(&ms->reads);
872  	bio_list_init(&ms->writes);
873  	bio_list_init(&ms->failures);
874  	spin_unlock_irqrestore(&ms->lock, flags);
875  
876  	dm_rh_update_states(ms->rh, errors_handled(ms));
877  	do_recovery(ms);
878  	do_reads(ms, &reads);
879  	do_writes(ms, &writes);
880  	do_failures(ms, &failures);
881  }
882  
883  /*
884   *---------------------------------------------------------------
885   * Target functions
886   *---------------------------------------------------------------
887   */
alloc_context(unsigned int nr_mirrors,uint32_t region_size,struct dm_target * ti,struct dm_dirty_log * dl)888  static struct mirror_set *alloc_context(unsigned int nr_mirrors,
889  					uint32_t region_size,
890  					struct dm_target *ti,
891  					struct dm_dirty_log *dl)
892  {
893  	struct mirror_set *ms =
894  		kzalloc(struct_size(ms, mirror, nr_mirrors), GFP_KERNEL);
895  
896  	if (!ms) {
897  		ti->error = "Cannot allocate mirror context";
898  		return NULL;
899  	}
900  
901  	spin_lock_init(&ms->lock);
902  	bio_list_init(&ms->reads);
903  	bio_list_init(&ms->writes);
904  	bio_list_init(&ms->failures);
905  	bio_list_init(&ms->holds);
906  
907  	ms->ti = ti;
908  	ms->nr_mirrors = nr_mirrors;
909  	ms->nr_regions = dm_sector_div_up(ti->len, region_size);
910  	ms->in_sync = 0;
911  	ms->log_failure = 0;
912  	ms->leg_failure = 0;
913  	atomic_set(&ms->suspend, 0);
914  	atomic_set(&ms->default_mirror, DEFAULT_MIRROR);
915  
916  	ms->io_client = dm_io_client_create();
917  	if (IS_ERR(ms->io_client)) {
918  		ti->error = "Error creating dm_io client";
919  		kfree(ms);
920  		return NULL;
921  	}
922  
923  	ms->rh = dm_region_hash_create(ms, dispatch_bios, wakeup_mirrord,
924  				       wakeup_all_recovery_waiters,
925  				       ms->ti->begin, MAX_RECOVERY,
926  				       dl, region_size, ms->nr_regions);
927  	if (IS_ERR(ms->rh)) {
928  		ti->error = "Error creating dirty region hash";
929  		dm_io_client_destroy(ms->io_client);
930  		kfree(ms);
931  		return NULL;
932  	}
933  
934  	return ms;
935  }
936  
free_context(struct mirror_set * ms,struct dm_target * ti,unsigned int m)937  static void free_context(struct mirror_set *ms, struct dm_target *ti,
938  			 unsigned int m)
939  {
940  	while (m--)
941  		dm_put_device(ti, ms->mirror[m].dev);
942  
943  	dm_io_client_destroy(ms->io_client);
944  	dm_region_hash_destroy(ms->rh);
945  	kfree(ms);
946  }
947  
get_mirror(struct mirror_set * ms,struct dm_target * ti,unsigned int mirror,char ** argv)948  static int get_mirror(struct mirror_set *ms, struct dm_target *ti,
949  		      unsigned int mirror, char **argv)
950  {
951  	unsigned long long offset;
952  	char dummy;
953  	int ret;
954  
955  	if (sscanf(argv[1], "%llu%c", &offset, &dummy) != 1 ||
956  	    offset != (sector_t)offset) {
957  		ti->error = "Invalid offset";
958  		return -EINVAL;
959  	}
960  
961  	ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table),
962  			    &ms->mirror[mirror].dev);
963  	if (ret) {
964  		ti->error = "Device lookup failure";
965  		return ret;
966  	}
967  
968  	ms->mirror[mirror].ms = ms;
969  	atomic_set(&(ms->mirror[mirror].error_count), 0);
970  	ms->mirror[mirror].error_type = 0;
971  	ms->mirror[mirror].offset = offset;
972  
973  	return 0;
974  }
975  
976  /*
977   * Create dirty log: log_type #log_params <log_params>
978   */
create_dirty_log(struct dm_target * ti,unsigned int argc,char ** argv,unsigned int * args_used)979  static struct dm_dirty_log *create_dirty_log(struct dm_target *ti,
980  					     unsigned int argc, char **argv,
981  					     unsigned int *args_used)
982  {
983  	unsigned int param_count;
984  	struct dm_dirty_log *dl;
985  	char dummy;
986  
987  	if (argc < 2) {
988  		ti->error = "Insufficient mirror log arguments";
989  		return NULL;
990  	}
991  
992  	if (sscanf(argv[1], "%u%c", &param_count, &dummy) != 1) {
993  		ti->error = "Invalid mirror log argument count";
994  		return NULL;
995  	}
996  
997  	*args_used = 2 + param_count;
998  
999  	if (argc < *args_used) {
1000  		ti->error = "Insufficient mirror log arguments";
1001  		return NULL;
1002  	}
1003  
1004  	dl = dm_dirty_log_create(argv[0], ti, mirror_flush, param_count,
1005  				 argv + 2);
1006  	if (!dl) {
1007  		ti->error = "Error creating mirror dirty log";
1008  		return NULL;
1009  	}
1010  
1011  	return dl;
1012  }
1013  
parse_features(struct mirror_set * ms,unsigned int argc,char ** argv,unsigned int * args_used)1014  static int parse_features(struct mirror_set *ms, unsigned int argc, char **argv,
1015  			  unsigned int *args_used)
1016  {
1017  	unsigned int num_features;
1018  	struct dm_target *ti = ms->ti;
1019  	char dummy;
1020  	int i;
1021  
1022  	*args_used = 0;
1023  
1024  	if (!argc)
1025  		return 0;
1026  
1027  	if (sscanf(argv[0], "%u%c", &num_features, &dummy) != 1) {
1028  		ti->error = "Invalid number of features";
1029  		return -EINVAL;
1030  	}
1031  
1032  	argc--;
1033  	argv++;
1034  	(*args_used)++;
1035  
1036  	if (num_features > argc) {
1037  		ti->error = "Not enough arguments to support feature count";
1038  		return -EINVAL;
1039  	}
1040  
1041  	for (i = 0; i < num_features; i++) {
1042  		if (!strcmp("handle_errors", argv[0]))
1043  			ms->features |= DM_RAID1_HANDLE_ERRORS;
1044  		else if (!strcmp("keep_log", argv[0]))
1045  			ms->features |= DM_RAID1_KEEP_LOG;
1046  		else {
1047  			ti->error = "Unrecognised feature requested";
1048  			return -EINVAL;
1049  		}
1050  
1051  		argc--;
1052  		argv++;
1053  		(*args_used)++;
1054  	}
1055  	if (!errors_handled(ms) && keep_log(ms)) {
1056  		ti->error = "keep_log feature requires the handle_errors feature";
1057  		return -EINVAL;
1058  	}
1059  
1060  	return 0;
1061  }
1062  
1063  /*
1064   * Construct a mirror mapping:
1065   *
1066   * log_type #log_params <log_params>
1067   * #mirrors [mirror_path offset]{2,}
1068   * [#features <features>]
1069   *
1070   * log_type is "core" or "disk"
1071   * #log_params is between 1 and 3
1072   *
1073   * If present, supported features are "handle_errors" and "keep_log".
1074   */
mirror_ctr(struct dm_target * ti,unsigned int argc,char ** argv)1075  static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1076  {
1077  	int r;
1078  	unsigned int nr_mirrors, m, args_used;
1079  	struct mirror_set *ms;
1080  	struct dm_dirty_log *dl;
1081  	char dummy;
1082  
1083  	dl = create_dirty_log(ti, argc, argv, &args_used);
1084  	if (!dl)
1085  		return -EINVAL;
1086  
1087  	argv += args_used;
1088  	argc -= args_used;
1089  
1090  	if (!argc || sscanf(argv[0], "%u%c", &nr_mirrors, &dummy) != 1 ||
1091  	    nr_mirrors < 2 || nr_mirrors > MAX_NR_MIRRORS) {
1092  		ti->error = "Invalid number of mirrors";
1093  		dm_dirty_log_destroy(dl);
1094  		return -EINVAL;
1095  	}
1096  
1097  	argv++, argc--;
1098  
1099  	if (argc < nr_mirrors * 2) {
1100  		ti->error = "Too few mirror arguments";
1101  		dm_dirty_log_destroy(dl);
1102  		return -EINVAL;
1103  	}
1104  
1105  	ms = alloc_context(nr_mirrors, dl->type->get_region_size(dl), ti, dl);
1106  	if (!ms) {
1107  		dm_dirty_log_destroy(dl);
1108  		return -ENOMEM;
1109  	}
1110  
1111  	/* Get the mirror parameter sets */
1112  	for (m = 0; m < nr_mirrors; m++) {
1113  		r = get_mirror(ms, ti, m, argv);
1114  		if (r) {
1115  			free_context(ms, ti, m);
1116  			return r;
1117  		}
1118  		argv += 2;
1119  		argc -= 2;
1120  	}
1121  
1122  	ti->private = ms;
1123  
1124  	r = dm_set_target_max_io_len(ti, dm_rh_get_region_size(ms->rh));
1125  	if (r)
1126  		goto err_free_context;
1127  
1128  	ti->num_flush_bios = 1;
1129  	ti->num_discard_bios = 1;
1130  	ti->per_io_data_size = sizeof(struct dm_raid1_bio_record);
1131  
1132  	ms->kmirrord_wq = alloc_workqueue("kmirrord", WQ_MEM_RECLAIM, 0);
1133  	if (!ms->kmirrord_wq) {
1134  		DMERR("couldn't start kmirrord");
1135  		r = -ENOMEM;
1136  		goto err_free_context;
1137  	}
1138  	INIT_WORK(&ms->kmirrord_work, do_mirror);
1139  	timer_setup(&ms->timer, delayed_wake_fn, 0);
1140  	ms->timer_pending = 0;
1141  	INIT_WORK(&ms->trigger_event, trigger_event);
1142  
1143  	r = parse_features(ms, argc, argv, &args_used);
1144  	if (r)
1145  		goto err_destroy_wq;
1146  
1147  	argv += args_used;
1148  	argc -= args_used;
1149  
1150  	/*
1151  	 * Any read-balancing addition depends on the
1152  	 * DM_RAID1_HANDLE_ERRORS flag being present.
1153  	 * This is because the decision to balance depends
1154  	 * on the sync state of a region.  If the above
1155  	 * flag is not present, we ignore errors; and
1156  	 * the sync state may be inaccurate.
1157  	 */
1158  
1159  	if (argc) {
1160  		ti->error = "Too many mirror arguments";
1161  		r = -EINVAL;
1162  		goto err_destroy_wq;
1163  	}
1164  
1165  	ms->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle);
1166  	if (IS_ERR(ms->kcopyd_client)) {
1167  		r = PTR_ERR(ms->kcopyd_client);
1168  		goto err_destroy_wq;
1169  	}
1170  
1171  	wakeup_mirrord(ms);
1172  	return 0;
1173  
1174  err_destroy_wq:
1175  	destroy_workqueue(ms->kmirrord_wq);
1176  err_free_context:
1177  	free_context(ms, ti, ms->nr_mirrors);
1178  	return r;
1179  }
1180  
mirror_dtr(struct dm_target * ti)1181  static void mirror_dtr(struct dm_target *ti)
1182  {
1183  	struct mirror_set *ms = ti->private;
1184  
1185  	del_timer_sync(&ms->timer);
1186  	flush_workqueue(ms->kmirrord_wq);
1187  	flush_work(&ms->trigger_event);
1188  	dm_kcopyd_client_destroy(ms->kcopyd_client);
1189  	destroy_workqueue(ms->kmirrord_wq);
1190  	free_context(ms, ti, ms->nr_mirrors);
1191  }
1192  
1193  /*
1194   * Mirror mapping function
1195   */
mirror_map(struct dm_target * ti,struct bio * bio)1196  static int mirror_map(struct dm_target *ti, struct bio *bio)
1197  {
1198  	int r, rw = bio_data_dir(bio);
1199  	struct mirror *m;
1200  	struct mirror_set *ms = ti->private;
1201  	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
1202  	struct dm_raid1_bio_record *bio_record =
1203  	  dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record));
1204  
1205  	bio_record->details.bi_bdev = NULL;
1206  
1207  	if (rw == WRITE) {
1208  		/* Save region for mirror_end_io() handler */
1209  		bio_record->write_region = dm_rh_bio_to_region(ms->rh, bio);
1210  		queue_bio(ms, bio, rw);
1211  		return DM_MAPIO_SUBMITTED;
1212  	}
1213  
1214  	r = log->type->in_sync(log, dm_rh_bio_to_region(ms->rh, bio), 0);
1215  	if (r < 0 && r != -EWOULDBLOCK)
1216  		return DM_MAPIO_KILL;
1217  
1218  	/*
1219  	 * If region is not in-sync queue the bio.
1220  	 */
1221  	if (!r || (r == -EWOULDBLOCK)) {
1222  		if (bio->bi_opf & REQ_RAHEAD)
1223  			return DM_MAPIO_KILL;
1224  
1225  		queue_bio(ms, bio, rw);
1226  		return DM_MAPIO_SUBMITTED;
1227  	}
1228  
1229  	/*
1230  	 * The region is in-sync and we can perform reads directly.
1231  	 * Store enough information so we can retry if it fails.
1232  	 */
1233  	m = choose_mirror(ms, bio->bi_iter.bi_sector);
1234  	if (unlikely(!m))
1235  		return DM_MAPIO_KILL;
1236  
1237  	dm_bio_record(&bio_record->details, bio);
1238  	bio_record->m = m;
1239  
1240  	map_bio(m, bio);
1241  
1242  	return DM_MAPIO_REMAPPED;
1243  }
1244  
mirror_end_io(struct dm_target * ti,struct bio * bio,blk_status_t * error)1245  static int mirror_end_io(struct dm_target *ti, struct bio *bio,
1246  		blk_status_t *error)
1247  {
1248  	int rw = bio_data_dir(bio);
1249  	struct mirror_set *ms = ti->private;
1250  	struct mirror *m = NULL;
1251  	struct dm_bio_details *bd = NULL;
1252  	struct dm_raid1_bio_record *bio_record =
1253  	  dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record));
1254  
1255  	/*
1256  	 * We need to dec pending if this was a write.
1257  	 */
1258  	if (rw == WRITE) {
1259  		if (!(bio->bi_opf & REQ_PREFLUSH) &&
1260  		    bio_op(bio) != REQ_OP_DISCARD)
1261  			dm_rh_dec(ms->rh, bio_record->write_region);
1262  		return DM_ENDIO_DONE;
1263  	}
1264  
1265  	if (*error == BLK_STS_NOTSUPP)
1266  		goto out;
1267  
1268  	if (bio->bi_opf & REQ_RAHEAD)
1269  		goto out;
1270  
1271  	if (unlikely(*error)) {
1272  		if (!bio_record->details.bi_bdev) {
1273  			/*
1274  			 * There wasn't enough memory to record necessary
1275  			 * information for a retry or there was no other
1276  			 * mirror in-sync.
1277  			 */
1278  			DMERR_LIMIT("Mirror read failed.");
1279  			return DM_ENDIO_DONE;
1280  		}
1281  
1282  		m = bio_record->m;
1283  
1284  		DMERR("Mirror read failed from %s. Trying alternative device.",
1285  		      m->dev->name);
1286  
1287  		fail_mirror(m, DM_RAID1_READ_ERROR);
1288  
1289  		/*
1290  		 * A failed read is requeued for another attempt using an intact
1291  		 * mirror.
1292  		 */
1293  		if (default_ok(m) || mirror_available(ms, bio)) {
1294  			bd = &bio_record->details;
1295  
1296  			dm_bio_restore(bd, bio);
1297  			bio_record->details.bi_bdev = NULL;
1298  			bio->bi_status = 0;
1299  
1300  			queue_bio(ms, bio, rw);
1301  			return DM_ENDIO_INCOMPLETE;
1302  		}
1303  		DMERR("All replicated volumes dead, failing I/O");
1304  	}
1305  
1306  out:
1307  	bio_record->details.bi_bdev = NULL;
1308  
1309  	return DM_ENDIO_DONE;
1310  }
1311  
mirror_presuspend(struct dm_target * ti)1312  static void mirror_presuspend(struct dm_target *ti)
1313  {
1314  	struct mirror_set *ms = ti->private;
1315  	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
1316  
1317  	struct bio_list holds;
1318  	struct bio *bio;
1319  
1320  	atomic_set(&ms->suspend, 1);
1321  
1322  	/*
1323  	 * Process bios in the hold list to start recovery waiting
1324  	 * for bios in the hold list. After the process, no bio has
1325  	 * a chance to be added in the hold list because ms->suspend
1326  	 * is set.
1327  	 */
1328  	spin_lock_irq(&ms->lock);
1329  	holds = ms->holds;
1330  	bio_list_init(&ms->holds);
1331  	spin_unlock_irq(&ms->lock);
1332  
1333  	while ((bio = bio_list_pop(&holds)))
1334  		hold_bio(ms, bio);
1335  
1336  	/*
1337  	 * We must finish up all the work that we've
1338  	 * generated (i.e. recovery work).
1339  	 */
1340  	dm_rh_stop_recovery(ms->rh);
1341  
1342  	wait_event(_kmirrord_recovery_stopped,
1343  		   !dm_rh_recovery_in_flight(ms->rh));
1344  
1345  	if (log->type->presuspend && log->type->presuspend(log))
1346  		/* FIXME: need better error handling */
1347  		DMWARN("log presuspend failed");
1348  
1349  	/*
1350  	 * Now that recovery is complete/stopped and the
1351  	 * delayed bios are queued, we need to wait for
1352  	 * the worker thread to complete.  This way,
1353  	 * we know that all of our I/O has been pushed.
1354  	 */
1355  	flush_workqueue(ms->kmirrord_wq);
1356  }
1357  
mirror_postsuspend(struct dm_target * ti)1358  static void mirror_postsuspend(struct dm_target *ti)
1359  {
1360  	struct mirror_set *ms = ti->private;
1361  	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
1362  
1363  	if (log->type->postsuspend && log->type->postsuspend(log))
1364  		/* FIXME: need better error handling */
1365  		DMWARN("log postsuspend failed");
1366  }
1367  
mirror_resume(struct dm_target * ti)1368  static void mirror_resume(struct dm_target *ti)
1369  {
1370  	struct mirror_set *ms = ti->private;
1371  	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
1372  
1373  	atomic_set(&ms->suspend, 0);
1374  	if (log->type->resume && log->type->resume(log))
1375  		/* FIXME: need better error handling */
1376  		DMWARN("log resume failed");
1377  	dm_rh_start_recovery(ms->rh);
1378  }
1379  
1380  /*
1381   * device_status_char
1382   * @m: mirror device/leg we want the status of
1383   *
1384   * We return one character representing the most severe error
1385   * we have encountered.
1386   *    A => Alive - No failures
1387   *    D => Dead - A write failure occurred leaving mirror out-of-sync
1388   *    S => Sync - A sychronization failure occurred, mirror out-of-sync
1389   *    R => Read - A read failure occurred, mirror data unaffected
1390   *
1391   * Returns: <char>
1392   */
device_status_char(struct mirror * m)1393  static char device_status_char(struct mirror *m)
1394  {
1395  	if (!atomic_read(&(m->error_count)))
1396  		return 'A';
1397  
1398  	return (test_bit(DM_RAID1_FLUSH_ERROR, &(m->error_type))) ? 'F' :
1399  		(test_bit(DM_RAID1_WRITE_ERROR, &(m->error_type))) ? 'D' :
1400  		(test_bit(DM_RAID1_SYNC_ERROR, &(m->error_type))) ? 'S' :
1401  		(test_bit(DM_RAID1_READ_ERROR, &(m->error_type))) ? 'R' : 'U';
1402  }
1403  
1404  
mirror_status(struct dm_target * ti,status_type_t type,unsigned int status_flags,char * result,unsigned int maxlen)1405  static void mirror_status(struct dm_target *ti, status_type_t type,
1406  			  unsigned int status_flags, char *result, unsigned int maxlen)
1407  {
1408  	unsigned int m, sz = 0;
1409  	int num_feature_args = 0;
1410  	struct mirror_set *ms = ti->private;
1411  	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
1412  	char buffer[MAX_NR_MIRRORS + 1];
1413  
1414  	switch (type) {
1415  	case STATUSTYPE_INFO:
1416  		DMEMIT("%d ", ms->nr_mirrors);
1417  		for (m = 0; m < ms->nr_mirrors; m++) {
1418  			DMEMIT("%s ", ms->mirror[m].dev->name);
1419  			buffer[m] = device_status_char(&(ms->mirror[m]));
1420  		}
1421  		buffer[m] = '\0';
1422  
1423  		DMEMIT("%llu/%llu 1 %s ",
1424  		      (unsigned long long)log->type->get_sync_count(log),
1425  		      (unsigned long long)ms->nr_regions, buffer);
1426  
1427  		sz += log->type->status(log, type, result+sz, maxlen-sz);
1428  
1429  		break;
1430  
1431  	case STATUSTYPE_TABLE:
1432  		sz = log->type->status(log, type, result, maxlen);
1433  
1434  		DMEMIT("%d", ms->nr_mirrors);
1435  		for (m = 0; m < ms->nr_mirrors; m++)
1436  			DMEMIT(" %s %llu", ms->mirror[m].dev->name,
1437  			       (unsigned long long)ms->mirror[m].offset);
1438  
1439  		num_feature_args += !!errors_handled(ms);
1440  		num_feature_args += !!keep_log(ms);
1441  		if (num_feature_args) {
1442  			DMEMIT(" %d", num_feature_args);
1443  			if (errors_handled(ms))
1444  				DMEMIT(" handle_errors");
1445  			if (keep_log(ms))
1446  				DMEMIT(" keep_log");
1447  		}
1448  
1449  		break;
1450  
1451  	case STATUSTYPE_IMA:
1452  		DMEMIT_TARGET_NAME_VERSION(ti->type);
1453  		DMEMIT(",nr_mirrors=%d", ms->nr_mirrors);
1454  		for (m = 0; m < ms->nr_mirrors; m++) {
1455  			DMEMIT(",mirror_device_%d=%s", m, ms->mirror[m].dev->name);
1456  			DMEMIT(",mirror_device_%d_status=%c",
1457  			       m, device_status_char(&(ms->mirror[m])));
1458  		}
1459  
1460  		DMEMIT(",handle_errors=%c", errors_handled(ms) ? 'y' : 'n');
1461  		DMEMIT(",keep_log=%c", keep_log(ms) ? 'y' : 'n');
1462  
1463  		DMEMIT(",log_type_status=");
1464  		sz += log->type->status(log, type, result+sz, maxlen-sz);
1465  		DMEMIT(";");
1466  		break;
1467  	}
1468  }
1469  
mirror_iterate_devices(struct dm_target * ti,iterate_devices_callout_fn fn,void * data)1470  static int mirror_iterate_devices(struct dm_target *ti,
1471  				  iterate_devices_callout_fn fn, void *data)
1472  {
1473  	struct mirror_set *ms = ti->private;
1474  	int ret = 0;
1475  	unsigned int i;
1476  
1477  	for (i = 0; !ret && i < ms->nr_mirrors; i++)
1478  		ret = fn(ti, ms->mirror[i].dev,
1479  			 ms->mirror[i].offset, ti->len, data);
1480  
1481  	return ret;
1482  }
1483  
1484  static struct target_type mirror_target = {
1485  	.name	 = "mirror",
1486  	.version = {1, 14, 0},
1487  	.module	 = THIS_MODULE,
1488  	.ctr	 = mirror_ctr,
1489  	.dtr	 = mirror_dtr,
1490  	.map	 = mirror_map,
1491  	.end_io	 = mirror_end_io,
1492  	.presuspend = mirror_presuspend,
1493  	.postsuspend = mirror_postsuspend,
1494  	.resume	 = mirror_resume,
1495  	.status	 = mirror_status,
1496  	.iterate_devices = mirror_iterate_devices,
1497  };
1498  
dm_mirror_init(void)1499  static int __init dm_mirror_init(void)
1500  {
1501  	int r;
1502  
1503  	dm_raid1_wq = alloc_workqueue("dm_raid1_wq", 0, 0);
1504  	if (!dm_raid1_wq) {
1505  		DMERR("Failed to alloc workqueue");
1506  		return -ENOMEM;
1507  	}
1508  
1509  	r = dm_register_target(&mirror_target);
1510  	if (r < 0) {
1511  		destroy_workqueue(dm_raid1_wq);
1512  		return r;
1513  	}
1514  
1515  	return 0;
1516  }
1517  
dm_mirror_exit(void)1518  static void __exit dm_mirror_exit(void)
1519  {
1520  	destroy_workqueue(dm_raid1_wq);
1521  	dm_unregister_target(&mirror_target);
1522  }
1523  
1524  /* Module hooks */
1525  module_init(dm_mirror_init);
1526  module_exit(dm_mirror_exit);
1527  
1528  MODULE_DESCRIPTION(DM_NAME " mirror target");
1529  MODULE_AUTHOR("Joe Thornber");
1530  MODULE_LICENSE("GPL");
1531