xref: /openbmc/linux/drivers/md/raid0.c (revision 7b73a9c8)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3    raid0.c : Multiple Devices driver for Linux
4 	     Copyright (C) 1994-96 Marc ZYNGIER
5 	     <zyngier@ufr-info-p7.ibp.fr> or
6 	     <maz@gloups.fdn.fr>
7 	     Copyright (C) 1999, 2000 Ingo Molnar, Red Hat
8 
9    RAID-0 management functions.
10 
11 */
12 
13 #include <linux/blkdev.h>
14 #include <linux/seq_file.h>
15 #include <linux/module.h>
16 #include <linux/slab.h>
17 #include <trace/events/block.h>
18 #include "md.h"
19 #include "raid0.h"
20 #include "raid5.h"
21 
22 static int default_layout = 0;
23 module_param(default_layout, int, 0644);
24 
25 #define UNSUPPORTED_MDDEV_FLAGS		\
26 	((1L << MD_HAS_JOURNAL) |	\
27 	 (1L << MD_JOURNAL_CLEAN) |	\
28 	 (1L << MD_FAILFAST_SUPPORTED) |\
29 	 (1L << MD_HAS_PPL) |		\
30 	 (1L << MD_HAS_MULTIPLE_PPLS))
31 
32 static int raid0_congested(struct mddev *mddev, int bits)
33 {
34 	struct r0conf *conf = mddev->private;
35 	struct md_rdev **devlist = conf->devlist;
36 	int raid_disks = conf->strip_zone[0].nb_dev;
37 	int i, ret = 0;
38 
39 	for (i = 0; i < raid_disks && !ret ; i++) {
40 		struct request_queue *q = bdev_get_queue(devlist[i]->bdev);
41 
42 		ret |= bdi_congested(q->backing_dev_info, bits);
43 	}
44 	return ret;
45 }
46 
47 /*
48  * inform the user of the raid configuration
49 */
50 static void dump_zones(struct mddev *mddev)
51 {
52 	int j, k;
53 	sector_t zone_size = 0;
54 	sector_t zone_start = 0;
55 	char b[BDEVNAME_SIZE];
56 	struct r0conf *conf = mddev->private;
57 	int raid_disks = conf->strip_zone[0].nb_dev;
58 	pr_debug("md: RAID0 configuration for %s - %d zone%s\n",
59 		 mdname(mddev),
60 		 conf->nr_strip_zones, conf->nr_strip_zones==1?"":"s");
61 	for (j = 0; j < conf->nr_strip_zones; j++) {
62 		char line[200];
63 		int len = 0;
64 
65 		for (k = 0; k < conf->strip_zone[j].nb_dev; k++)
66 			len += snprintf(line+len, 200-len, "%s%s", k?"/":"",
67 					bdevname(conf->devlist[j*raid_disks
68 							       + k]->bdev, b));
69 		pr_debug("md: zone%d=[%s]\n", j, line);
70 
71 		zone_size  = conf->strip_zone[j].zone_end - zone_start;
72 		pr_debug("      zone-offset=%10lluKB, device-offset=%10lluKB, size=%10lluKB\n",
73 			(unsigned long long)zone_start>>1,
74 			(unsigned long long)conf->strip_zone[j].dev_start>>1,
75 			(unsigned long long)zone_size>>1);
76 		zone_start = conf->strip_zone[j].zone_end;
77 	}
78 }
79 
80 static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
81 {
82 	int i, c, err;
83 	sector_t curr_zone_end, sectors;
84 	struct md_rdev *smallest, *rdev1, *rdev2, *rdev, **dev;
85 	struct strip_zone *zone;
86 	int cnt;
87 	char b[BDEVNAME_SIZE];
88 	char b2[BDEVNAME_SIZE];
89 	struct r0conf *conf = kzalloc(sizeof(*conf), GFP_KERNEL);
90 	unsigned short blksize = 512;
91 
92 	*private_conf = ERR_PTR(-ENOMEM);
93 	if (!conf)
94 		return -ENOMEM;
95 	rdev_for_each(rdev1, mddev) {
96 		pr_debug("md/raid0:%s: looking at %s\n",
97 			 mdname(mddev),
98 			 bdevname(rdev1->bdev, b));
99 		c = 0;
100 
101 		/* round size to chunk_size */
102 		sectors = rdev1->sectors;
103 		sector_div(sectors, mddev->chunk_sectors);
104 		rdev1->sectors = sectors * mddev->chunk_sectors;
105 
106 		blksize = max(blksize, queue_logical_block_size(
107 				      rdev1->bdev->bd_disk->queue));
108 
109 		rdev_for_each(rdev2, mddev) {
110 			pr_debug("md/raid0:%s:   comparing %s(%llu)"
111 				 " with %s(%llu)\n",
112 				 mdname(mddev),
113 				 bdevname(rdev1->bdev,b),
114 				 (unsigned long long)rdev1->sectors,
115 				 bdevname(rdev2->bdev,b2),
116 				 (unsigned long long)rdev2->sectors);
117 			if (rdev2 == rdev1) {
118 				pr_debug("md/raid0:%s:   END\n",
119 					 mdname(mddev));
120 				break;
121 			}
122 			if (rdev2->sectors == rdev1->sectors) {
123 				/*
124 				 * Not unique, don't count it as a new
125 				 * group
126 				 */
127 				pr_debug("md/raid0:%s:   EQUAL\n",
128 					 mdname(mddev));
129 				c = 1;
130 				break;
131 			}
132 			pr_debug("md/raid0:%s:   NOT EQUAL\n",
133 				 mdname(mddev));
134 		}
135 		if (!c) {
136 			pr_debug("md/raid0:%s:   ==> UNIQUE\n",
137 				 mdname(mddev));
138 			conf->nr_strip_zones++;
139 			pr_debug("md/raid0:%s: %d zones\n",
140 				 mdname(mddev), conf->nr_strip_zones);
141 		}
142 	}
143 	pr_debug("md/raid0:%s: FINAL %d zones\n",
144 		 mdname(mddev), conf->nr_strip_zones);
145 
146 	if (conf->nr_strip_zones == 1) {
147 		conf->layout = RAID0_ORIG_LAYOUT;
148 	} else if (mddev->layout == RAID0_ORIG_LAYOUT ||
149 		   mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
150 		conf->layout = mddev->layout;
151 	} else if (default_layout == RAID0_ORIG_LAYOUT ||
152 		   default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
153 		conf->layout = default_layout;
154 	} else {
155 		pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
156 		       mdname(mddev));
157 		pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
158 		err = -ENOTSUPP;
159 		goto abort;
160 	}
161 	/*
162 	 * now since we have the hard sector sizes, we can make sure
163 	 * chunk size is a multiple of that sector size
164 	 */
165 	if ((mddev->chunk_sectors << 9) % blksize) {
166 		pr_warn("md/raid0:%s: chunk_size of %d not multiple of block size %d\n",
167 			mdname(mddev),
168 			mddev->chunk_sectors << 9, blksize);
169 		err = -EINVAL;
170 		goto abort;
171 	}
172 
173 	err = -ENOMEM;
174 	conf->strip_zone = kcalloc(conf->nr_strip_zones,
175 				   sizeof(struct strip_zone),
176 				   GFP_KERNEL);
177 	if (!conf->strip_zone)
178 		goto abort;
179 	conf->devlist = kzalloc(array3_size(sizeof(struct md_rdev *),
180 					    conf->nr_strip_zones,
181 					    mddev->raid_disks),
182 				GFP_KERNEL);
183 	if (!conf->devlist)
184 		goto abort;
185 
186 	/* The first zone must contain all devices, so here we check that
187 	 * there is a proper alignment of slots to devices and find them all
188 	 */
189 	zone = &conf->strip_zone[0];
190 	cnt = 0;
191 	smallest = NULL;
192 	dev = conf->devlist;
193 	err = -EINVAL;
194 	rdev_for_each(rdev1, mddev) {
195 		int j = rdev1->raid_disk;
196 
197 		if (mddev->level == 10) {
198 			/* taking over a raid10-n2 array */
199 			j /= 2;
200 			rdev1->new_raid_disk = j;
201 		}
202 
203 		if (mddev->level == 1) {
204 			/* taiking over a raid1 array-
205 			 * we have only one active disk
206 			 */
207 			j = 0;
208 			rdev1->new_raid_disk = j;
209 		}
210 
211 		if (j < 0) {
212 			pr_warn("md/raid0:%s: remove inactive devices before converting to RAID0\n",
213 				mdname(mddev));
214 			goto abort;
215 		}
216 		if (j >= mddev->raid_disks) {
217 			pr_warn("md/raid0:%s: bad disk number %d - aborting!\n",
218 				mdname(mddev), j);
219 			goto abort;
220 		}
221 		if (dev[j]) {
222 			pr_warn("md/raid0:%s: multiple devices for %d - aborting!\n",
223 				mdname(mddev), j);
224 			goto abort;
225 		}
226 		dev[j] = rdev1;
227 
228 		if (!smallest || (rdev1->sectors < smallest->sectors))
229 			smallest = rdev1;
230 		cnt++;
231 	}
232 	if (cnt != mddev->raid_disks) {
233 		pr_warn("md/raid0:%s: too few disks (%d of %d) - aborting!\n",
234 			mdname(mddev), cnt, mddev->raid_disks);
235 		goto abort;
236 	}
237 	zone->nb_dev = cnt;
238 	zone->zone_end = smallest->sectors * cnt;
239 
240 	curr_zone_end = zone->zone_end;
241 
242 	/* now do the other zones */
243 	for (i = 1; i < conf->nr_strip_zones; i++)
244 	{
245 		int j;
246 
247 		zone = conf->strip_zone + i;
248 		dev = conf->devlist + i * mddev->raid_disks;
249 
250 		pr_debug("md/raid0:%s: zone %d\n", mdname(mddev), i);
251 		zone->dev_start = smallest->sectors;
252 		smallest = NULL;
253 		c = 0;
254 
255 		for (j=0; j<cnt; j++) {
256 			rdev = conf->devlist[j];
257 			if (rdev->sectors <= zone->dev_start) {
258 				pr_debug("md/raid0:%s: checking %s ... nope\n",
259 					 mdname(mddev),
260 					 bdevname(rdev->bdev, b));
261 				continue;
262 			}
263 			pr_debug("md/raid0:%s: checking %s ..."
264 				 " contained as device %d\n",
265 				 mdname(mddev),
266 				 bdevname(rdev->bdev, b), c);
267 			dev[c] = rdev;
268 			c++;
269 			if (!smallest || rdev->sectors < smallest->sectors) {
270 				smallest = rdev;
271 				pr_debug("md/raid0:%s:  (%llu) is smallest!.\n",
272 					 mdname(mddev),
273 					 (unsigned long long)rdev->sectors);
274 			}
275 		}
276 
277 		zone->nb_dev = c;
278 		sectors = (smallest->sectors - zone->dev_start) * c;
279 		pr_debug("md/raid0:%s: zone->nb_dev: %d, sectors: %llu\n",
280 			 mdname(mddev),
281 			 zone->nb_dev, (unsigned long long)sectors);
282 
283 		curr_zone_end += sectors;
284 		zone->zone_end = curr_zone_end;
285 
286 		pr_debug("md/raid0:%s: current zone start: %llu\n",
287 			 mdname(mddev),
288 			 (unsigned long long)smallest->sectors);
289 	}
290 
291 	pr_debug("md/raid0:%s: done.\n", mdname(mddev));
292 	*private_conf = conf;
293 
294 	return 0;
295 abort:
296 	kfree(conf->strip_zone);
297 	kfree(conf->devlist);
298 	kfree(conf);
299 	*private_conf = ERR_PTR(err);
300 	return err;
301 }
302 
303 /* Find the zone which holds a particular offset
304  * Update *sectorp to be an offset in that zone
305  */
306 static struct strip_zone *find_zone(struct r0conf *conf,
307 				    sector_t *sectorp)
308 {
309 	int i;
310 	struct strip_zone *z = conf->strip_zone;
311 	sector_t sector = *sectorp;
312 
313 	for (i = 0; i < conf->nr_strip_zones; i++)
314 		if (sector < z[i].zone_end) {
315 			if (i)
316 				*sectorp = sector - z[i-1].zone_end;
317 			return z + i;
318 		}
319 	BUG();
320 }
321 
322 /*
323  * remaps the bio to the target device. we separate two flows.
324  * power 2 flow and a general flow for the sake of performance
325 */
326 static struct md_rdev *map_sector(struct mddev *mddev, struct strip_zone *zone,
327 				sector_t sector, sector_t *sector_offset)
328 {
329 	unsigned int sect_in_chunk;
330 	sector_t chunk;
331 	struct r0conf *conf = mddev->private;
332 	int raid_disks = conf->strip_zone[0].nb_dev;
333 	unsigned int chunk_sects = mddev->chunk_sectors;
334 
335 	if (is_power_of_2(chunk_sects)) {
336 		int chunksect_bits = ffz(~chunk_sects);
337 		/* find the sector offset inside the chunk */
338 		sect_in_chunk  = sector & (chunk_sects - 1);
339 		sector >>= chunksect_bits;
340 		/* chunk in zone */
341 		chunk = *sector_offset;
342 		/* quotient is the chunk in real device*/
343 		sector_div(chunk, zone->nb_dev << chunksect_bits);
344 	} else{
345 		sect_in_chunk = sector_div(sector, chunk_sects);
346 		chunk = *sector_offset;
347 		sector_div(chunk, chunk_sects * zone->nb_dev);
348 	}
349 	/*
350 	*  position the bio over the real device
351 	*  real sector = chunk in device + starting of zone
352 	*	+ the position in the chunk
353 	*/
354 	*sector_offset = (chunk * chunk_sects) + sect_in_chunk;
355 	return conf->devlist[(zone - conf->strip_zone)*raid_disks
356 			     + sector_div(sector, zone->nb_dev)];
357 }
358 
359 static sector_t raid0_size(struct mddev *mddev, sector_t sectors, int raid_disks)
360 {
361 	sector_t array_sectors = 0;
362 	struct md_rdev *rdev;
363 
364 	WARN_ONCE(sectors || raid_disks,
365 		  "%s does not support generic reshape\n", __func__);
366 
367 	rdev_for_each(rdev, mddev)
368 		array_sectors += (rdev->sectors &
369 				  ~(sector_t)(mddev->chunk_sectors-1));
370 
371 	return array_sectors;
372 }
373 
374 static void raid0_free(struct mddev *mddev, void *priv);
375 
376 static int raid0_run(struct mddev *mddev)
377 {
378 	struct r0conf *conf;
379 	int ret;
380 
381 	if (mddev->chunk_sectors == 0) {
382 		pr_warn("md/raid0:%s: chunk size must be set.\n", mdname(mddev));
383 		return -EINVAL;
384 	}
385 	if (md_check_no_bitmap(mddev))
386 		return -EINVAL;
387 
388 	/* if private is not null, we are here after takeover */
389 	if (mddev->private == NULL) {
390 		ret = create_strip_zones(mddev, &conf);
391 		if (ret < 0)
392 			return ret;
393 		mddev->private = conf;
394 	}
395 	conf = mddev->private;
396 	if (mddev->queue) {
397 		struct md_rdev *rdev;
398 		bool discard_supported = false;
399 
400 		blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
401 		blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);
402 		blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors);
403 		blk_queue_max_discard_sectors(mddev->queue, UINT_MAX);
404 
405 		blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
406 		blk_queue_io_opt(mddev->queue,
407 				 (mddev->chunk_sectors << 9) * mddev->raid_disks);
408 
409 		rdev_for_each(rdev, mddev) {
410 			disk_stack_limits(mddev->gendisk, rdev->bdev,
411 					  rdev->data_offset << 9);
412 			if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
413 				discard_supported = true;
414 		}
415 		if (!discard_supported)
416 			blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue);
417 		else
418 			blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
419 	}
420 
421 	/* calculate array device size */
422 	md_set_array_sectors(mddev, raid0_size(mddev, 0, 0));
423 
424 	pr_debug("md/raid0:%s: md_size is %llu sectors.\n",
425 		 mdname(mddev),
426 		 (unsigned long long)mddev->array_sectors);
427 
428 	if (mddev->queue) {
429 		/* calculate the max read-ahead size.
430 		 * For read-ahead of large files to be effective, we need to
431 		 * readahead at least twice a whole stripe. i.e. number of devices
432 		 * multiplied by chunk size times 2.
433 		 * If an individual device has an ra_pages greater than the
434 		 * chunk size, then we will not drive that device as hard as it
435 		 * wants.  We consider this a configuration error: a larger
436 		 * chunksize should be used in that case.
437 		 */
438 		int stripe = mddev->raid_disks *
439 			(mddev->chunk_sectors << 9) / PAGE_SIZE;
440 		if (mddev->queue->backing_dev_info->ra_pages < 2* stripe)
441 			mddev->queue->backing_dev_info->ra_pages = 2* stripe;
442 	}
443 
444 	dump_zones(mddev);
445 
446 	ret = md_integrity_register(mddev);
447 
448 	return ret;
449 }
450 
451 static void raid0_free(struct mddev *mddev, void *priv)
452 {
453 	struct r0conf *conf = priv;
454 
455 	kfree(conf->strip_zone);
456 	kfree(conf->devlist);
457 	kfree(conf);
458 }
459 
460 /*
461  * Is io distribute over 1 or more chunks ?
462 */
463 static inline int is_io_in_chunk_boundary(struct mddev *mddev,
464 			unsigned int chunk_sects, struct bio *bio)
465 {
466 	if (likely(is_power_of_2(chunk_sects))) {
467 		return chunk_sects >=
468 			((bio->bi_iter.bi_sector & (chunk_sects-1))
469 					+ bio_sectors(bio));
470 	} else{
471 		sector_t sector = bio->bi_iter.bi_sector;
472 		return chunk_sects >= (sector_div(sector, chunk_sects)
473 						+ bio_sectors(bio));
474 	}
475 }
476 
477 static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
478 {
479 	struct r0conf *conf = mddev->private;
480 	struct strip_zone *zone;
481 	sector_t start = bio->bi_iter.bi_sector;
482 	sector_t end;
483 	unsigned int stripe_size;
484 	sector_t first_stripe_index, last_stripe_index;
485 	sector_t start_disk_offset;
486 	unsigned int start_disk_index;
487 	sector_t end_disk_offset;
488 	unsigned int end_disk_index;
489 	unsigned int disk;
490 
491 	zone = find_zone(conf, &start);
492 
493 	if (bio_end_sector(bio) > zone->zone_end) {
494 		struct bio *split = bio_split(bio,
495 			zone->zone_end - bio->bi_iter.bi_sector, GFP_NOIO,
496 			&mddev->bio_set);
497 		bio_chain(split, bio);
498 		generic_make_request(bio);
499 		bio = split;
500 		end = zone->zone_end;
501 	} else
502 		end = bio_end_sector(bio);
503 
504 	if (zone != conf->strip_zone)
505 		end = end - zone[-1].zone_end;
506 
507 	/* Now start and end is the offset in zone */
508 	stripe_size = zone->nb_dev * mddev->chunk_sectors;
509 
510 	first_stripe_index = start;
511 	sector_div(first_stripe_index, stripe_size);
512 	last_stripe_index = end;
513 	sector_div(last_stripe_index, stripe_size);
514 
515 	start_disk_index = (int)(start - first_stripe_index * stripe_size) /
516 		mddev->chunk_sectors;
517 	start_disk_offset = ((int)(start - first_stripe_index * stripe_size) %
518 		mddev->chunk_sectors) +
519 		first_stripe_index * mddev->chunk_sectors;
520 	end_disk_index = (int)(end - last_stripe_index * stripe_size) /
521 		mddev->chunk_sectors;
522 	end_disk_offset = ((int)(end - last_stripe_index * stripe_size) %
523 		mddev->chunk_sectors) +
524 		last_stripe_index * mddev->chunk_sectors;
525 
526 	for (disk = 0; disk < zone->nb_dev; disk++) {
527 		sector_t dev_start, dev_end;
528 		struct bio *discard_bio = NULL;
529 		struct md_rdev *rdev;
530 
531 		if (disk < start_disk_index)
532 			dev_start = (first_stripe_index + 1) *
533 				mddev->chunk_sectors;
534 		else if (disk > start_disk_index)
535 			dev_start = first_stripe_index * mddev->chunk_sectors;
536 		else
537 			dev_start = start_disk_offset;
538 
539 		if (disk < end_disk_index)
540 			dev_end = (last_stripe_index + 1) * mddev->chunk_sectors;
541 		else if (disk > end_disk_index)
542 			dev_end = last_stripe_index * mddev->chunk_sectors;
543 		else
544 			dev_end = end_disk_offset;
545 
546 		if (dev_end <= dev_start)
547 			continue;
548 
549 		rdev = conf->devlist[(zone - conf->strip_zone) *
550 			conf->strip_zone[0].nb_dev + disk];
551 		if (__blkdev_issue_discard(rdev->bdev,
552 			dev_start + zone->dev_start + rdev->data_offset,
553 			dev_end - dev_start, GFP_NOIO, 0, &discard_bio) ||
554 		    !discard_bio)
555 			continue;
556 		bio_chain(discard_bio, bio);
557 		bio_clone_blkg_association(discard_bio, bio);
558 		if (mddev->gendisk)
559 			trace_block_bio_remap(bdev_get_queue(rdev->bdev),
560 				discard_bio, disk_devt(mddev->gendisk),
561 				bio->bi_iter.bi_sector);
562 		generic_make_request(discard_bio);
563 	}
564 	bio_endio(bio);
565 }
566 
567 static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
568 {
569 	struct r0conf *conf = mddev->private;
570 	struct strip_zone *zone;
571 	struct md_rdev *tmp_dev;
572 	sector_t bio_sector;
573 	sector_t sector;
574 	sector_t orig_sector;
575 	unsigned chunk_sects;
576 	unsigned sectors;
577 
578 	if (unlikely(bio->bi_opf & REQ_PREFLUSH)
579 	    && md_flush_request(mddev, bio))
580 		return true;
581 
582 	if (unlikely((bio_op(bio) == REQ_OP_DISCARD))) {
583 		raid0_handle_discard(mddev, bio);
584 		return true;
585 	}
586 
587 	bio_sector = bio->bi_iter.bi_sector;
588 	sector = bio_sector;
589 	chunk_sects = mddev->chunk_sectors;
590 
591 	sectors = chunk_sects -
592 		(likely(is_power_of_2(chunk_sects))
593 		 ? (sector & (chunk_sects-1))
594 		 : sector_div(sector, chunk_sects));
595 
596 	/* Restore due to sector_div */
597 	sector = bio_sector;
598 
599 	if (sectors < bio_sectors(bio)) {
600 		struct bio *split = bio_split(bio, sectors, GFP_NOIO,
601 					      &mddev->bio_set);
602 		bio_chain(split, bio);
603 		generic_make_request(bio);
604 		bio = split;
605 	}
606 
607 	orig_sector = sector;
608 	zone = find_zone(mddev->private, &sector);
609 	switch (conf->layout) {
610 	case RAID0_ORIG_LAYOUT:
611 		tmp_dev = map_sector(mddev, zone, orig_sector, &sector);
612 		break;
613 	case RAID0_ALT_MULTIZONE_LAYOUT:
614 		tmp_dev = map_sector(mddev, zone, sector, &sector);
615 		break;
616 	default:
617 		WARN(1, "md/raid0:%s: Invalid layout\n", mdname(mddev));
618 		bio_io_error(bio);
619 		return true;
620 	}
621 
622 	if (unlikely(is_mddev_broken(tmp_dev, "raid0"))) {
623 		bio_io_error(bio);
624 		return true;
625 	}
626 
627 	bio_set_dev(bio, tmp_dev->bdev);
628 	bio->bi_iter.bi_sector = sector + zone->dev_start +
629 		tmp_dev->data_offset;
630 
631 	if (mddev->gendisk)
632 		trace_block_bio_remap(bio->bi_disk->queue, bio,
633 				disk_devt(mddev->gendisk), bio_sector);
634 	mddev_check_writesame(mddev, bio);
635 	mddev_check_write_zeroes(mddev, bio);
636 	generic_make_request(bio);
637 	return true;
638 }
639 
640 static void raid0_status(struct seq_file *seq, struct mddev *mddev)
641 {
642 	seq_printf(seq, " %dk chunks", mddev->chunk_sectors / 2);
643 	return;
644 }
645 
646 static void *raid0_takeover_raid45(struct mddev *mddev)
647 {
648 	struct md_rdev *rdev;
649 	struct r0conf *priv_conf;
650 
651 	if (mddev->degraded != 1) {
652 		pr_warn("md/raid0:%s: raid5 must be degraded! Degraded disks: %d\n",
653 			mdname(mddev),
654 			mddev->degraded);
655 		return ERR_PTR(-EINVAL);
656 	}
657 
658 	rdev_for_each(rdev, mddev) {
659 		/* check slot number for a disk */
660 		if (rdev->raid_disk == mddev->raid_disks-1) {
661 			pr_warn("md/raid0:%s: raid5 must have missing parity disk!\n",
662 				mdname(mddev));
663 			return ERR_PTR(-EINVAL);
664 		}
665 		rdev->sectors = mddev->dev_sectors;
666 	}
667 
668 	/* Set new parameters */
669 	mddev->new_level = 0;
670 	mddev->new_layout = 0;
671 	mddev->new_chunk_sectors = mddev->chunk_sectors;
672 	mddev->raid_disks--;
673 	mddev->delta_disks = -1;
674 	/* make sure it will be not marked as dirty */
675 	mddev->recovery_cp = MaxSector;
676 	mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS);
677 
678 	create_strip_zones(mddev, &priv_conf);
679 
680 	return priv_conf;
681 }
682 
683 static void *raid0_takeover_raid10(struct mddev *mddev)
684 {
685 	struct r0conf *priv_conf;
686 
687 	/* Check layout:
688 	 *  - far_copies must be 1
689 	 *  - near_copies must be 2
690 	 *  - disks number must be even
691 	 *  - all mirrors must be already degraded
692 	 */
693 	if (mddev->layout != ((1 << 8) + 2)) {
694 		pr_warn("md/raid0:%s:: Raid0 cannot takeover layout: 0x%x\n",
695 			mdname(mddev),
696 			mddev->layout);
697 		return ERR_PTR(-EINVAL);
698 	}
699 	if (mddev->raid_disks & 1) {
700 		pr_warn("md/raid0:%s: Raid0 cannot takeover Raid10 with odd disk number.\n",
701 			mdname(mddev));
702 		return ERR_PTR(-EINVAL);
703 	}
704 	if (mddev->degraded != (mddev->raid_disks>>1)) {
705 		pr_warn("md/raid0:%s: All mirrors must be already degraded!\n",
706 			mdname(mddev));
707 		return ERR_PTR(-EINVAL);
708 	}
709 
710 	/* Set new parameters */
711 	mddev->new_level = 0;
712 	mddev->new_layout = 0;
713 	mddev->new_chunk_sectors = mddev->chunk_sectors;
714 	mddev->delta_disks = - mddev->raid_disks / 2;
715 	mddev->raid_disks += mddev->delta_disks;
716 	mddev->degraded = 0;
717 	/* make sure it will be not marked as dirty */
718 	mddev->recovery_cp = MaxSector;
719 	mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS);
720 
721 	create_strip_zones(mddev, &priv_conf);
722 	return priv_conf;
723 }
724 
725 static void *raid0_takeover_raid1(struct mddev *mddev)
726 {
727 	struct r0conf *priv_conf;
728 	int chunksect;
729 
730 	/* Check layout:
731 	 *  - (N - 1) mirror drives must be already faulty
732 	 */
733 	if ((mddev->raid_disks - 1) != mddev->degraded) {
734 		pr_err("md/raid0:%s: (N - 1) mirrors drives must be already faulty!\n",
735 		       mdname(mddev));
736 		return ERR_PTR(-EINVAL);
737 	}
738 
739 	/*
740 	 * a raid1 doesn't have the notion of chunk size, so
741 	 * figure out the largest suitable size we can use.
742 	 */
743 	chunksect = 64 * 2; /* 64K by default */
744 
745 	/* The array must be an exact multiple of chunksize */
746 	while (chunksect && (mddev->array_sectors & (chunksect - 1)))
747 		chunksect >>= 1;
748 
749 	if ((chunksect << 9) < PAGE_SIZE)
750 		/* array size does not allow a suitable chunk size */
751 		return ERR_PTR(-EINVAL);
752 
753 	/* Set new parameters */
754 	mddev->new_level = 0;
755 	mddev->new_layout = 0;
756 	mddev->new_chunk_sectors = chunksect;
757 	mddev->chunk_sectors = chunksect;
758 	mddev->delta_disks = 1 - mddev->raid_disks;
759 	mddev->raid_disks = 1;
760 	/* make sure it will be not marked as dirty */
761 	mddev->recovery_cp = MaxSector;
762 	mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS);
763 
764 	create_strip_zones(mddev, &priv_conf);
765 	return priv_conf;
766 }
767 
768 static void *raid0_takeover(struct mddev *mddev)
769 {
770 	/* raid0 can take over:
771 	 *  raid4 - if all data disks are active.
772 	 *  raid5 - providing it is Raid4 layout and one disk is faulty
773 	 *  raid10 - assuming we have all necessary active disks
774 	 *  raid1 - with (N -1) mirror drives faulty
775 	 */
776 
777 	if (mddev->bitmap) {
778 		pr_warn("md/raid0: %s: cannot takeover array with bitmap\n",
779 			mdname(mddev));
780 		return ERR_PTR(-EBUSY);
781 	}
782 	if (mddev->level == 4)
783 		return raid0_takeover_raid45(mddev);
784 
785 	if (mddev->level == 5) {
786 		if (mddev->layout == ALGORITHM_PARITY_N)
787 			return raid0_takeover_raid45(mddev);
788 
789 		pr_warn("md/raid0:%s: Raid can only takeover Raid5 with layout: %d\n",
790 			mdname(mddev), ALGORITHM_PARITY_N);
791 	}
792 
793 	if (mddev->level == 10)
794 		return raid0_takeover_raid10(mddev);
795 
796 	if (mddev->level == 1)
797 		return raid0_takeover_raid1(mddev);
798 
799 	pr_warn("Takeover from raid%i to raid0 not supported\n",
800 		mddev->level);
801 
802 	return ERR_PTR(-EINVAL);
803 }
804 
805 static void raid0_quiesce(struct mddev *mddev, int quiesce)
806 {
807 }
808 
809 static struct md_personality raid0_personality=
810 {
811 	.name		= "raid0",
812 	.level		= 0,
813 	.owner		= THIS_MODULE,
814 	.make_request	= raid0_make_request,
815 	.run		= raid0_run,
816 	.free		= raid0_free,
817 	.status		= raid0_status,
818 	.size		= raid0_size,
819 	.takeover	= raid0_takeover,
820 	.quiesce	= raid0_quiesce,
821 	.congested	= raid0_congested,
822 };
823 
824 static int __init raid0_init (void)
825 {
826 	return register_md_personality (&raid0_personality);
827 }
828 
829 static void raid0_exit (void)
830 {
831 	unregister_md_personality (&raid0_personality);
832 }
833 
834 module_init(raid0_init);
835 module_exit(raid0_exit);
836 MODULE_LICENSE("GPL");
837 MODULE_DESCRIPTION("RAID0 (striping) personality for MD");
838 MODULE_ALIAS("md-personality-2"); /* RAID0 */
839 MODULE_ALIAS("md-raid0");
840 MODULE_ALIAS("md-level-0");
841