xref: /openbmc/linux/drivers/mtd/mtdswap.c (revision 3ae7c96d)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Swap block device support for MTDs
4  * Turns an MTD device into a swap device with block wear leveling
5  *
6  * Copyright © 2007,2011 Nokia Corporation. All rights reserved.
7  *
8  * Authors: Jarkko Lavinen <jarkko.lavinen@nokia.com>
9  *
10  * Based on Richard Purdie's earlier implementation in 2007. Background
11  * support and lock-less operation written by Adrian Hunter.
12  */
13 
14 #include <linux/kernel.h>
15 #include <linux/module.h>
16 #include <linux/mtd/mtd.h>
17 #include <linux/mtd/blktrans.h>
18 #include <linux/rbtree.h>
19 #include <linux/sched.h>
20 #include <linux/slab.h>
21 #include <linux/vmalloc.h>
22 #include <linux/blkdev.h>
23 #include <linux/swap.h>
24 #include <linux/debugfs.h>
25 #include <linux/seq_file.h>
26 #include <linux/device.h>
27 #include <linux/math64.h>
28 
29 #define MTDSWAP_PREFIX "mtdswap"
30 
31 /*
32  * The number of free eraseblocks when GC should stop
33  */
34 #define CLEAN_BLOCK_THRESHOLD	20
35 
36 /*
37  * Number of free eraseblocks below which GC can also collect low frag
38  * blocks.
39  */
40 #define LOW_FRAG_GC_THRESHOLD	5
41 
42 /*
43  * Wear level cost amortization. We want to do wear leveling on the background
44  * without disturbing gc too much. This is made by defining max GC frequency.
45  * Frequency value 6 means 1/6 of the GC passes will pick an erase block based
46  * on the biggest wear difference rather than the biggest dirtiness.
47  *
48  * The lower freq2 should be chosen so that it makes sure the maximum erase
49  * difference will decrease even if a malicious application is deliberately
50  * trying to make erase differences large.
51  */
52 #define MAX_ERASE_DIFF		4000
53 #define COLLECT_NONDIRTY_BASE	MAX_ERASE_DIFF
54 #define COLLECT_NONDIRTY_FREQ1	6
55 #define COLLECT_NONDIRTY_FREQ2	4
56 
57 #define PAGE_UNDEF		UINT_MAX
58 #define BLOCK_UNDEF		UINT_MAX
59 #define BLOCK_ERROR		(UINT_MAX - 1)
60 #define BLOCK_MAX		(UINT_MAX - 2)
61 
62 #define EBLOCK_BAD		(1 << 0)
63 #define EBLOCK_NOMAGIC		(1 << 1)
64 #define EBLOCK_BITFLIP		(1 << 2)
65 #define EBLOCK_FAILED		(1 << 3)
66 #define EBLOCK_READERR		(1 << 4)
67 #define EBLOCK_IDX_SHIFT	5
68 
69 struct swap_eb {
70 	struct rb_node rb;
71 	struct rb_root *root;
72 
73 	unsigned int flags;
74 	unsigned int active_count;
75 	unsigned int erase_count;
76 	unsigned int pad;		/* speeds up pointer decrement */
77 };
78 
79 #define MTDSWAP_ECNT_MIN(rbroot) (rb_entry(rb_first(rbroot), struct swap_eb, \
80 				rb)->erase_count)
81 #define MTDSWAP_ECNT_MAX(rbroot) (rb_entry(rb_last(rbroot), struct swap_eb, \
82 				rb)->erase_count)
83 
84 struct mtdswap_tree {
85 	struct rb_root root;
86 	unsigned int count;
87 };
88 
89 enum {
90 	MTDSWAP_CLEAN,
91 	MTDSWAP_USED,
92 	MTDSWAP_LOWFRAG,
93 	MTDSWAP_HIFRAG,
94 	MTDSWAP_DIRTY,
95 	MTDSWAP_BITFLIP,
96 	MTDSWAP_FAILING,
97 	MTDSWAP_TREE_CNT,
98 };
99 
100 struct mtdswap_dev {
101 	struct mtd_blktrans_dev *mbd_dev;
102 	struct mtd_info *mtd;
103 	struct device *dev;
104 
105 	unsigned int *page_data;
106 	unsigned int *revmap;
107 
108 	unsigned int eblks;
109 	unsigned int spare_eblks;
110 	unsigned int pages_per_eblk;
111 	unsigned int max_erase_count;
112 	struct swap_eb *eb_data;
113 
114 	struct mtdswap_tree trees[MTDSWAP_TREE_CNT];
115 
116 	unsigned long long sect_read_count;
117 	unsigned long long sect_write_count;
118 	unsigned long long mtd_write_count;
119 	unsigned long long mtd_read_count;
120 	unsigned long long discard_count;
121 	unsigned long long discard_page_count;
122 
123 	unsigned int curr_write_pos;
124 	struct swap_eb *curr_write;
125 
126 	char *page_buf;
127 	char *oob_buf;
128 };
129 
130 struct mtdswap_oobdata {
131 	__le16 magic;
132 	__le32 count;
133 } __packed;
134 
135 #define MTDSWAP_MAGIC_CLEAN	0x2095
136 #define MTDSWAP_MAGIC_DIRTY	(MTDSWAP_MAGIC_CLEAN + 1)
137 #define MTDSWAP_TYPE_CLEAN	0
138 #define MTDSWAP_TYPE_DIRTY	1
139 #define MTDSWAP_OOBSIZE		sizeof(struct mtdswap_oobdata)
140 
141 #define MTDSWAP_ERASE_RETRIES	3 /* Before marking erase block bad */
142 #define MTDSWAP_IO_RETRIES	3
143 
144 enum {
145 	MTDSWAP_SCANNED_CLEAN,
146 	MTDSWAP_SCANNED_DIRTY,
147 	MTDSWAP_SCANNED_BITFLIP,
148 	MTDSWAP_SCANNED_BAD,
149 };
150 
151 /*
152  * In the worst case mtdswap_writesect() has allocated the last clean
153  * page from the current block and is then pre-empted by the GC
154  * thread. The thread can consume a full erase block when moving a
155  * block.
156  */
157 #define MIN_SPARE_EBLOCKS	2
158 #define MIN_ERASE_BLOCKS	(MIN_SPARE_EBLOCKS + 1)
159 
160 #define TREE_ROOT(d, name) (&d->trees[MTDSWAP_ ## name].root)
161 #define TREE_EMPTY(d, name) (TREE_ROOT(d, name)->rb_node == NULL)
162 #define TREE_NONEMPTY(d, name) (!TREE_EMPTY(d, name))
163 #define TREE_COUNT(d, name) (d->trees[MTDSWAP_ ## name].count)
164 
165 #define MTDSWAP_MBD_TO_MTDSWAP(dev) ((struct mtdswap_dev *)dev->priv)
166 
167 static char partitions[128] = "";
168 module_param_string(partitions, partitions, sizeof(partitions), 0444);
169 MODULE_PARM_DESC(partitions, "MTD partition numbers to use as swap "
170 		"partitions=\"1,3,5\"");
171 
172 static unsigned int spare_eblocks = 10;
173 module_param(spare_eblocks, uint, 0444);
174 MODULE_PARM_DESC(spare_eblocks, "Percentage of spare erase blocks for "
175 		"garbage collection (default 10%)");
176 
177 static bool header; /* false */
178 module_param(header, bool, 0444);
179 MODULE_PARM_DESC(header,
180 		"Include builtin swap header (default 0, without header)");
181 
182 static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background);
183 
184 static loff_t mtdswap_eb_offset(struct mtdswap_dev *d, struct swap_eb *eb)
185 {
186 	return (loff_t)(eb - d->eb_data) * d->mtd->erasesize;
187 }
188 
189 static void mtdswap_eb_detach(struct mtdswap_dev *d, struct swap_eb *eb)
190 {
191 	unsigned int oldidx;
192 	struct mtdswap_tree *tp;
193 
194 	if (eb->root) {
195 		tp = container_of(eb->root, struct mtdswap_tree, root);
196 		oldidx = tp - &d->trees[0];
197 
198 		d->trees[oldidx].count--;
199 		rb_erase(&eb->rb, eb->root);
200 	}
201 }
202 
203 static void __mtdswap_rb_add(struct rb_root *root, struct swap_eb *eb)
204 {
205 	struct rb_node **p, *parent = NULL;
206 	struct swap_eb *cur;
207 
208 	p = &root->rb_node;
209 	while (*p) {
210 		parent = *p;
211 		cur = rb_entry(parent, struct swap_eb, rb);
212 		if (eb->erase_count > cur->erase_count)
213 			p = &(*p)->rb_right;
214 		else
215 			p = &(*p)->rb_left;
216 	}
217 
218 	rb_link_node(&eb->rb, parent, p);
219 	rb_insert_color(&eb->rb, root);
220 }
221 
222 static void mtdswap_rb_add(struct mtdswap_dev *d, struct swap_eb *eb, int idx)
223 {
224 	struct rb_root *root;
225 
226 	if (eb->root == &d->trees[idx].root)
227 		return;
228 
229 	mtdswap_eb_detach(d, eb);
230 	root = &d->trees[idx].root;
231 	__mtdswap_rb_add(root, eb);
232 	eb->root = root;
233 	d->trees[idx].count++;
234 }
235 
236 static struct rb_node *mtdswap_rb_index(struct rb_root *root, unsigned int idx)
237 {
238 	struct rb_node *p;
239 	unsigned int i;
240 
241 	p = rb_first(root);
242 	i = 0;
243 	while (i < idx && p) {
244 		p = rb_next(p);
245 		i++;
246 	}
247 
248 	return p;
249 }
250 
251 static int mtdswap_handle_badblock(struct mtdswap_dev *d, struct swap_eb *eb)
252 {
253 	int ret;
254 	loff_t offset;
255 
256 	d->spare_eblks--;
257 	eb->flags |= EBLOCK_BAD;
258 	mtdswap_eb_detach(d, eb);
259 	eb->root = NULL;
260 
261 	/* badblocks not supported */
262 	if (!mtd_can_have_bb(d->mtd))
263 		return 1;
264 
265 	offset = mtdswap_eb_offset(d, eb);
266 	dev_warn(d->dev, "Marking bad block at %08llx\n", offset);
267 	ret = mtd_block_markbad(d->mtd, offset);
268 
269 	if (ret) {
270 		dev_warn(d->dev, "Mark block bad failed for block at %08llx "
271 			"error %d\n", offset, ret);
272 		return ret;
273 	}
274 
275 	return 1;
276 
277 }
278 
279 static int mtdswap_handle_write_error(struct mtdswap_dev *d, struct swap_eb *eb)
280 {
281 	unsigned int marked = eb->flags & EBLOCK_FAILED;
282 	struct swap_eb *curr_write = d->curr_write;
283 
284 	eb->flags |= EBLOCK_FAILED;
285 	if (curr_write == eb) {
286 		d->curr_write = NULL;
287 
288 		if (!marked && d->curr_write_pos != 0) {
289 			mtdswap_rb_add(d, eb, MTDSWAP_FAILING);
290 			return 0;
291 		}
292 	}
293 
294 	return mtdswap_handle_badblock(d, eb);
295 }
296 
297 static int mtdswap_read_oob(struct mtdswap_dev *d, loff_t from,
298 			struct mtd_oob_ops *ops)
299 {
300 	int ret = mtd_read_oob(d->mtd, from, ops);
301 
302 	if (mtd_is_bitflip(ret))
303 		return ret;
304 
305 	if (ret) {
306 		dev_warn(d->dev, "Read OOB failed %d for block at %08llx\n",
307 			ret, from);
308 		return ret;
309 	}
310 
311 	if (ops->oobretlen < ops->ooblen) {
312 		dev_warn(d->dev, "Read OOB return short read (%zd bytes not "
313 			"%zd) for block at %08llx\n",
314 			ops->oobretlen, ops->ooblen, from);
315 		return -EIO;
316 	}
317 
318 	return 0;
319 }
320 
321 static int mtdswap_read_markers(struct mtdswap_dev *d, struct swap_eb *eb)
322 {
323 	struct mtdswap_oobdata *data, *data2;
324 	int ret;
325 	loff_t offset;
326 	struct mtd_oob_ops ops = { };
327 
328 	offset = mtdswap_eb_offset(d, eb);
329 
330 	/* Check first if the block is bad. */
331 	if (mtd_can_have_bb(d->mtd) && mtd_block_isbad(d->mtd, offset))
332 		return MTDSWAP_SCANNED_BAD;
333 
334 	ops.ooblen = 2 * d->mtd->oobavail;
335 	ops.oobbuf = d->oob_buf;
336 	ops.ooboffs = 0;
337 	ops.datbuf = NULL;
338 	ops.mode = MTD_OPS_AUTO_OOB;
339 
340 	ret = mtdswap_read_oob(d, offset, &ops);
341 
342 	if (ret && !mtd_is_bitflip(ret))
343 		return ret;
344 
345 	data = (struct mtdswap_oobdata *)d->oob_buf;
346 	data2 = (struct mtdswap_oobdata *)
347 		(d->oob_buf + d->mtd->oobavail);
348 
349 	if (le16_to_cpu(data->magic) == MTDSWAP_MAGIC_CLEAN) {
350 		eb->erase_count = le32_to_cpu(data->count);
351 		if (mtd_is_bitflip(ret))
352 			ret = MTDSWAP_SCANNED_BITFLIP;
353 		else {
354 			if (le16_to_cpu(data2->magic) == MTDSWAP_MAGIC_DIRTY)
355 				ret = MTDSWAP_SCANNED_DIRTY;
356 			else
357 				ret = MTDSWAP_SCANNED_CLEAN;
358 		}
359 	} else {
360 		eb->flags |= EBLOCK_NOMAGIC;
361 		ret = MTDSWAP_SCANNED_DIRTY;
362 	}
363 
364 	return ret;
365 }
366 
367 static int mtdswap_write_marker(struct mtdswap_dev *d, struct swap_eb *eb,
368 				u16 marker)
369 {
370 	struct mtdswap_oobdata n;
371 	int ret;
372 	loff_t offset;
373 	struct mtd_oob_ops ops = { };
374 
375 	ops.ooboffs = 0;
376 	ops.oobbuf = (uint8_t *)&n;
377 	ops.mode = MTD_OPS_AUTO_OOB;
378 	ops.datbuf = NULL;
379 
380 	if (marker == MTDSWAP_TYPE_CLEAN) {
381 		n.magic = cpu_to_le16(MTDSWAP_MAGIC_CLEAN);
382 		n.count = cpu_to_le32(eb->erase_count);
383 		ops.ooblen = MTDSWAP_OOBSIZE;
384 		offset = mtdswap_eb_offset(d, eb);
385 	} else {
386 		n.magic = cpu_to_le16(MTDSWAP_MAGIC_DIRTY);
387 		ops.ooblen = sizeof(n.magic);
388 		offset = mtdswap_eb_offset(d, eb) + d->mtd->writesize;
389 	}
390 
391 	ret = mtd_write_oob(d->mtd, offset, &ops);
392 
393 	if (ret) {
394 		dev_warn(d->dev, "Write OOB failed for block at %08llx "
395 			"error %d\n", offset, ret);
396 		if (ret == -EIO || mtd_is_eccerr(ret))
397 			mtdswap_handle_write_error(d, eb);
398 		return ret;
399 	}
400 
401 	if (ops.oobretlen != ops.ooblen) {
402 		dev_warn(d->dev, "Short OOB write for block at %08llx: "
403 			"%zd not %zd\n",
404 			offset, ops.oobretlen, ops.ooblen);
405 		return ret;
406 	}
407 
408 	return 0;
409 }
410 
411 /*
412  * Are there any erase blocks without MAGIC_CLEAN header, presumably
413  * because power was cut off after erase but before header write? We
414  * need to guestimate the erase count.
415  */
416 static void mtdswap_check_counts(struct mtdswap_dev *d)
417 {
418 	struct rb_root hist_root = RB_ROOT;
419 	struct rb_node *medrb;
420 	struct swap_eb *eb;
421 	unsigned int i, cnt, median;
422 
423 	cnt = 0;
424 	for (i = 0; i < d->eblks; i++) {
425 		eb = d->eb_data + i;
426 
427 		if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR))
428 			continue;
429 
430 		__mtdswap_rb_add(&hist_root, eb);
431 		cnt++;
432 	}
433 
434 	if (cnt == 0)
435 		return;
436 
437 	medrb = mtdswap_rb_index(&hist_root, cnt / 2);
438 	median = rb_entry(medrb, struct swap_eb, rb)->erase_count;
439 
440 	d->max_erase_count = MTDSWAP_ECNT_MAX(&hist_root);
441 
442 	for (i = 0; i < d->eblks; i++) {
443 		eb = d->eb_data + i;
444 
445 		if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_READERR))
446 			eb->erase_count = median;
447 
448 		if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR))
449 			continue;
450 
451 		rb_erase(&eb->rb, &hist_root);
452 	}
453 }
454 
455 static void mtdswap_scan_eblks(struct mtdswap_dev *d)
456 {
457 	int status;
458 	unsigned int i, idx;
459 	struct swap_eb *eb;
460 
461 	for (i = 0; i < d->eblks; i++) {
462 		eb = d->eb_data + i;
463 
464 		status = mtdswap_read_markers(d, eb);
465 		if (status < 0)
466 			eb->flags |= EBLOCK_READERR;
467 		else if (status == MTDSWAP_SCANNED_BAD) {
468 			eb->flags |= EBLOCK_BAD;
469 			continue;
470 		}
471 
472 		switch (status) {
473 		case MTDSWAP_SCANNED_CLEAN:
474 			idx = MTDSWAP_CLEAN;
475 			break;
476 		case MTDSWAP_SCANNED_DIRTY:
477 		case MTDSWAP_SCANNED_BITFLIP:
478 			idx = MTDSWAP_DIRTY;
479 			break;
480 		default:
481 			idx = MTDSWAP_FAILING;
482 		}
483 
484 		eb->flags |= (idx << EBLOCK_IDX_SHIFT);
485 	}
486 
487 	mtdswap_check_counts(d);
488 
489 	for (i = 0; i < d->eblks; i++) {
490 		eb = d->eb_data + i;
491 
492 		if (eb->flags & EBLOCK_BAD)
493 			continue;
494 
495 		idx = eb->flags >> EBLOCK_IDX_SHIFT;
496 		mtdswap_rb_add(d, eb, idx);
497 	}
498 }
499 
500 /*
501  * Place eblk into a tree corresponding to its number of active blocks
502  * it contains.
503  */
504 static void mtdswap_store_eb(struct mtdswap_dev *d, struct swap_eb *eb)
505 {
506 	unsigned int weight = eb->active_count;
507 	unsigned int maxweight = d->pages_per_eblk;
508 
509 	if (eb == d->curr_write)
510 		return;
511 
512 	if (eb->flags & EBLOCK_BITFLIP)
513 		mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP);
514 	else if (eb->flags & (EBLOCK_READERR | EBLOCK_FAILED))
515 		mtdswap_rb_add(d, eb, MTDSWAP_FAILING);
516 	if (weight == maxweight)
517 		mtdswap_rb_add(d, eb, MTDSWAP_USED);
518 	else if (weight == 0)
519 		mtdswap_rb_add(d, eb, MTDSWAP_DIRTY);
520 	else if (weight > (maxweight/2))
521 		mtdswap_rb_add(d, eb, MTDSWAP_LOWFRAG);
522 	else
523 		mtdswap_rb_add(d, eb, MTDSWAP_HIFRAG);
524 }
525 
526 static int mtdswap_erase_block(struct mtdswap_dev *d, struct swap_eb *eb)
527 {
528 	struct mtd_info *mtd = d->mtd;
529 	struct erase_info erase;
530 	unsigned int retries = 0;
531 	int ret;
532 
533 	eb->erase_count++;
534 	if (eb->erase_count > d->max_erase_count)
535 		d->max_erase_count = eb->erase_count;
536 
537 retry:
538 	memset(&erase, 0, sizeof(struct erase_info));
539 	erase.addr	= mtdswap_eb_offset(d, eb);
540 	erase.len	= mtd->erasesize;
541 
542 	ret = mtd_erase(mtd, &erase);
543 	if (ret) {
544 		if (retries++ < MTDSWAP_ERASE_RETRIES) {
545 			dev_warn(d->dev,
546 				"erase of erase block %#llx on %s failed",
547 				erase.addr, mtd->name);
548 			yield();
549 			goto retry;
550 		}
551 
552 		dev_err(d->dev, "Cannot erase erase block %#llx on %s\n",
553 			erase.addr, mtd->name);
554 
555 		mtdswap_handle_badblock(d, eb);
556 		return -EIO;
557 	}
558 
559 	return 0;
560 }
561 
562 static int mtdswap_map_free_block(struct mtdswap_dev *d, unsigned int page,
563 				unsigned int *block)
564 {
565 	int ret;
566 	struct swap_eb *old_eb = d->curr_write;
567 	struct rb_root *clean_root;
568 	struct swap_eb *eb;
569 
570 	if (old_eb == NULL || d->curr_write_pos >= d->pages_per_eblk) {
571 		do {
572 			if (TREE_EMPTY(d, CLEAN))
573 				return -ENOSPC;
574 
575 			clean_root = TREE_ROOT(d, CLEAN);
576 			eb = rb_entry(rb_first(clean_root), struct swap_eb, rb);
577 			rb_erase(&eb->rb, clean_root);
578 			eb->root = NULL;
579 			TREE_COUNT(d, CLEAN)--;
580 
581 			ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_DIRTY);
582 		} while (ret == -EIO || mtd_is_eccerr(ret));
583 
584 		if (ret)
585 			return ret;
586 
587 		d->curr_write_pos = 0;
588 		d->curr_write = eb;
589 		if (old_eb)
590 			mtdswap_store_eb(d, old_eb);
591 	}
592 
593 	*block = (d->curr_write - d->eb_data) * d->pages_per_eblk +
594 		d->curr_write_pos;
595 
596 	d->curr_write->active_count++;
597 	d->revmap[*block] = page;
598 	d->curr_write_pos++;
599 
600 	return 0;
601 }
602 
603 static unsigned int mtdswap_free_page_cnt(struct mtdswap_dev *d)
604 {
605 	return TREE_COUNT(d, CLEAN) * d->pages_per_eblk +
606 		d->pages_per_eblk - d->curr_write_pos;
607 }
608 
609 static unsigned int mtdswap_enough_free_pages(struct mtdswap_dev *d)
610 {
611 	return mtdswap_free_page_cnt(d) > d->pages_per_eblk;
612 }
613 
614 static int mtdswap_write_block(struct mtdswap_dev *d, char *buf,
615 			unsigned int page, unsigned int *bp, int gc_context)
616 {
617 	struct mtd_info *mtd = d->mtd;
618 	struct swap_eb *eb;
619 	size_t retlen;
620 	loff_t writepos;
621 	int ret;
622 
623 retry:
624 	if (!gc_context)
625 		while (!mtdswap_enough_free_pages(d))
626 			if (mtdswap_gc(d, 0) > 0)
627 				return -ENOSPC;
628 
629 	ret = mtdswap_map_free_block(d, page, bp);
630 	eb = d->eb_data + (*bp / d->pages_per_eblk);
631 
632 	if (ret == -EIO || mtd_is_eccerr(ret)) {
633 		d->curr_write = NULL;
634 		eb->active_count--;
635 		d->revmap[*bp] = PAGE_UNDEF;
636 		goto retry;
637 	}
638 
639 	if (ret < 0)
640 		return ret;
641 
642 	writepos = (loff_t)*bp << PAGE_SHIFT;
643 	ret =  mtd_write(mtd, writepos, PAGE_SIZE, &retlen, buf);
644 	if (ret == -EIO || mtd_is_eccerr(ret)) {
645 		d->curr_write_pos--;
646 		eb->active_count--;
647 		d->revmap[*bp] = PAGE_UNDEF;
648 		mtdswap_handle_write_error(d, eb);
649 		goto retry;
650 	}
651 
652 	if (ret < 0) {
653 		dev_err(d->dev, "Write to MTD device failed: %d (%zd written)",
654 			ret, retlen);
655 		goto err;
656 	}
657 
658 	if (retlen != PAGE_SIZE) {
659 		dev_err(d->dev, "Short write to MTD device: %zd written",
660 			retlen);
661 		ret = -EIO;
662 		goto err;
663 	}
664 
665 	return ret;
666 
667 err:
668 	d->curr_write_pos--;
669 	eb->active_count--;
670 	d->revmap[*bp] = PAGE_UNDEF;
671 
672 	return ret;
673 }
674 
675 static int mtdswap_move_block(struct mtdswap_dev *d, unsigned int oldblock,
676 		unsigned int *newblock)
677 {
678 	struct mtd_info *mtd = d->mtd;
679 	struct swap_eb *eb, *oldeb;
680 	int ret;
681 	size_t retlen;
682 	unsigned int page, retries;
683 	loff_t readpos;
684 
685 	page = d->revmap[oldblock];
686 	readpos = (loff_t) oldblock << PAGE_SHIFT;
687 	retries = 0;
688 
689 retry:
690 	ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, d->page_buf);
691 
692 	if (ret < 0 && !mtd_is_bitflip(ret)) {
693 		oldeb = d->eb_data + oldblock / d->pages_per_eblk;
694 		oldeb->flags |= EBLOCK_READERR;
695 
696 		dev_err(d->dev, "Read Error: %d (block %u)\n", ret,
697 			oldblock);
698 		retries++;
699 		if (retries < MTDSWAP_IO_RETRIES)
700 			goto retry;
701 
702 		goto read_error;
703 	}
704 
705 	if (retlen != PAGE_SIZE) {
706 		dev_err(d->dev, "Short read: %zd (block %u)\n", retlen,
707 		       oldblock);
708 		ret = -EIO;
709 		goto read_error;
710 	}
711 
712 	ret = mtdswap_write_block(d, d->page_buf, page, newblock, 1);
713 	if (ret < 0) {
714 		d->page_data[page] = BLOCK_ERROR;
715 		dev_err(d->dev, "Write error: %d\n", ret);
716 		return ret;
717 	}
718 
719 	d->page_data[page] = *newblock;
720 	d->revmap[oldblock] = PAGE_UNDEF;
721 	eb = d->eb_data + oldblock / d->pages_per_eblk;
722 	eb->active_count--;
723 
724 	return 0;
725 
726 read_error:
727 	d->page_data[page] = BLOCK_ERROR;
728 	d->revmap[oldblock] = PAGE_UNDEF;
729 	return ret;
730 }
731 
732 static int mtdswap_gc_eblock(struct mtdswap_dev *d, struct swap_eb *eb)
733 {
734 	unsigned int i, block, eblk_base, newblock;
735 	int ret, errcode;
736 
737 	errcode = 0;
738 	eblk_base = (eb - d->eb_data) * d->pages_per_eblk;
739 
740 	for (i = 0; i < d->pages_per_eblk; i++) {
741 		if (d->spare_eblks < MIN_SPARE_EBLOCKS)
742 			return -ENOSPC;
743 
744 		block = eblk_base + i;
745 		if (d->revmap[block] == PAGE_UNDEF)
746 			continue;
747 
748 		ret = mtdswap_move_block(d, block, &newblock);
749 		if (ret < 0 && !errcode)
750 			errcode = ret;
751 	}
752 
753 	return errcode;
754 }
755 
756 static int __mtdswap_choose_gc_tree(struct mtdswap_dev *d)
757 {
758 	int idx, stopat;
759 
760 	if (TREE_COUNT(d, CLEAN) < LOW_FRAG_GC_THRESHOLD)
761 		stopat = MTDSWAP_LOWFRAG;
762 	else
763 		stopat = MTDSWAP_HIFRAG;
764 
765 	for (idx = MTDSWAP_BITFLIP; idx >= stopat; idx--)
766 		if (d->trees[idx].root.rb_node != NULL)
767 			return idx;
768 
769 	return -1;
770 }
771 
772 static int mtdswap_wlfreq(unsigned int maxdiff)
773 {
774 	unsigned int h, x, y, dist, base;
775 
776 	/*
777 	 * Calculate linear ramp down from f1 to f2 when maxdiff goes from
778 	 * MAX_ERASE_DIFF to MAX_ERASE_DIFF + COLLECT_NONDIRTY_BASE.  Similar
779 	 * to triangle with height f1 - f1 and width COLLECT_NONDIRTY_BASE.
780 	 */
781 
782 	dist = maxdiff - MAX_ERASE_DIFF;
783 	if (dist > COLLECT_NONDIRTY_BASE)
784 		dist = COLLECT_NONDIRTY_BASE;
785 
786 	/*
787 	 * Modelling the slop as right angular triangle with base
788 	 * COLLECT_NONDIRTY_BASE and height freq1 - freq2. The ratio y/x is
789 	 * equal to the ratio h/base.
790 	 */
791 	h = COLLECT_NONDIRTY_FREQ1 - COLLECT_NONDIRTY_FREQ2;
792 	base = COLLECT_NONDIRTY_BASE;
793 
794 	x = dist - base;
795 	y = (x * h + base / 2) / base;
796 
797 	return COLLECT_NONDIRTY_FREQ2 + y;
798 }
799 
800 static int mtdswap_choose_wl_tree(struct mtdswap_dev *d)
801 {
802 	static unsigned int pick_cnt;
803 	unsigned int i, idx = -1, wear, max;
804 	struct rb_root *root;
805 
806 	max = 0;
807 	for (i = 0; i <= MTDSWAP_DIRTY; i++) {
808 		root = &d->trees[i].root;
809 		if (root->rb_node == NULL)
810 			continue;
811 
812 		wear = d->max_erase_count - MTDSWAP_ECNT_MIN(root);
813 		if (wear > max) {
814 			max = wear;
815 			idx = i;
816 		}
817 	}
818 
819 	if (max > MAX_ERASE_DIFF && pick_cnt >= mtdswap_wlfreq(max) - 1) {
820 		pick_cnt = 0;
821 		return idx;
822 	}
823 
824 	pick_cnt++;
825 	return -1;
826 }
827 
828 static int mtdswap_choose_gc_tree(struct mtdswap_dev *d,
829 				unsigned int background)
830 {
831 	int idx;
832 
833 	if (TREE_NONEMPTY(d, FAILING) &&
834 		(background || (TREE_EMPTY(d, CLEAN) && TREE_EMPTY(d, DIRTY))))
835 		return MTDSWAP_FAILING;
836 
837 	idx = mtdswap_choose_wl_tree(d);
838 	if (idx >= MTDSWAP_CLEAN)
839 		return idx;
840 
841 	return __mtdswap_choose_gc_tree(d);
842 }
843 
844 static struct swap_eb *mtdswap_pick_gc_eblk(struct mtdswap_dev *d,
845 					unsigned int background)
846 {
847 	struct rb_root *rp = NULL;
848 	struct swap_eb *eb = NULL;
849 	int idx;
850 
851 	if (background && TREE_COUNT(d, CLEAN) > CLEAN_BLOCK_THRESHOLD &&
852 		TREE_EMPTY(d, DIRTY) && TREE_EMPTY(d, FAILING))
853 		return NULL;
854 
855 	idx = mtdswap_choose_gc_tree(d, background);
856 	if (idx < 0)
857 		return NULL;
858 
859 	rp = &d->trees[idx].root;
860 	eb = rb_entry(rb_first(rp), struct swap_eb, rb);
861 
862 	rb_erase(&eb->rb, rp);
863 	eb->root = NULL;
864 	d->trees[idx].count--;
865 	return eb;
866 }
867 
868 static unsigned int mtdswap_test_patt(unsigned int i)
869 {
870 	return i % 2 ? 0x55555555 : 0xAAAAAAAA;
871 }
872 
873 static unsigned int mtdswap_eblk_passes(struct mtdswap_dev *d,
874 					struct swap_eb *eb)
875 {
876 	struct mtd_info *mtd = d->mtd;
877 	unsigned int test, i, j, patt, mtd_pages;
878 	loff_t base, pos;
879 	unsigned int *p1 = (unsigned int *)d->page_buf;
880 	unsigned char *p2 = (unsigned char *)d->oob_buf;
881 	struct mtd_oob_ops ops = { };
882 	int ret;
883 
884 	ops.mode = MTD_OPS_AUTO_OOB;
885 	ops.len = mtd->writesize;
886 	ops.ooblen = mtd->oobavail;
887 	ops.ooboffs = 0;
888 	ops.datbuf = d->page_buf;
889 	ops.oobbuf = d->oob_buf;
890 	base = mtdswap_eb_offset(d, eb);
891 	mtd_pages = d->pages_per_eblk * PAGE_SIZE / mtd->writesize;
892 
893 	for (test = 0; test < 2; test++) {
894 		pos = base;
895 		for (i = 0; i < mtd_pages; i++) {
896 			patt = mtdswap_test_patt(test + i);
897 			memset(d->page_buf, patt, mtd->writesize);
898 			memset(d->oob_buf, patt, mtd->oobavail);
899 			ret = mtd_write_oob(mtd, pos, &ops);
900 			if (ret)
901 				goto error;
902 
903 			pos += mtd->writesize;
904 		}
905 
906 		pos = base;
907 		for (i = 0; i < mtd_pages; i++) {
908 			ret = mtd_read_oob(mtd, pos, &ops);
909 			if (ret)
910 				goto error;
911 
912 			patt = mtdswap_test_patt(test + i);
913 			for (j = 0; j < mtd->writesize/sizeof(int); j++)
914 				if (p1[j] != patt)
915 					goto error;
916 
917 			for (j = 0; j < mtd->oobavail; j++)
918 				if (p2[j] != (unsigned char)patt)
919 					goto error;
920 
921 			pos += mtd->writesize;
922 		}
923 
924 		ret = mtdswap_erase_block(d, eb);
925 		if (ret)
926 			goto error;
927 	}
928 
929 	eb->flags &= ~EBLOCK_READERR;
930 	return 1;
931 
932 error:
933 	mtdswap_handle_badblock(d, eb);
934 	return 0;
935 }
936 
937 static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background)
938 {
939 	struct swap_eb *eb;
940 	int ret;
941 
942 	if (d->spare_eblks < MIN_SPARE_EBLOCKS)
943 		return 1;
944 
945 	eb = mtdswap_pick_gc_eblk(d, background);
946 	if (!eb)
947 		return 1;
948 
949 	ret = mtdswap_gc_eblock(d, eb);
950 	if (ret == -ENOSPC)
951 		return 1;
952 
953 	if (eb->flags & EBLOCK_FAILED) {
954 		mtdswap_handle_badblock(d, eb);
955 		return 0;
956 	}
957 
958 	eb->flags &= ~EBLOCK_BITFLIP;
959 	ret = mtdswap_erase_block(d, eb);
960 	if ((eb->flags & EBLOCK_READERR) &&
961 		(ret || !mtdswap_eblk_passes(d, eb)))
962 		return 0;
963 
964 	if (ret == 0)
965 		ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_CLEAN);
966 
967 	if (ret == 0)
968 		mtdswap_rb_add(d, eb, MTDSWAP_CLEAN);
969 	else if (ret != -EIO && !mtd_is_eccerr(ret))
970 		mtdswap_rb_add(d, eb, MTDSWAP_DIRTY);
971 
972 	return 0;
973 }
974 
975 static void mtdswap_background(struct mtd_blktrans_dev *dev)
976 {
977 	struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
978 	int ret;
979 
980 	while (1) {
981 		ret = mtdswap_gc(d, 1);
982 		if (ret || mtd_blktrans_cease_background(dev))
983 			return;
984 	}
985 }
986 
987 static void mtdswap_cleanup(struct mtdswap_dev *d)
988 {
989 	vfree(d->eb_data);
990 	vfree(d->revmap);
991 	vfree(d->page_data);
992 	kfree(d->oob_buf);
993 	kfree(d->page_buf);
994 }
995 
996 static int mtdswap_flush(struct mtd_blktrans_dev *dev)
997 {
998 	struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
999 
1000 	mtd_sync(d->mtd);
1001 	return 0;
1002 }
1003 
1004 static unsigned int mtdswap_badblocks(struct mtd_info *mtd, uint64_t size)
1005 {
1006 	loff_t offset;
1007 	unsigned int badcnt;
1008 
1009 	badcnt = 0;
1010 
1011 	if (mtd_can_have_bb(mtd))
1012 		for (offset = 0; offset < size; offset += mtd->erasesize)
1013 			if (mtd_block_isbad(mtd, offset))
1014 				badcnt++;
1015 
1016 	return badcnt;
1017 }
1018 
1019 static int mtdswap_writesect(struct mtd_blktrans_dev *dev,
1020 			unsigned long page, char *buf)
1021 {
1022 	struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1023 	unsigned int newblock, mapped;
1024 	struct swap_eb *eb;
1025 	int ret;
1026 
1027 	d->sect_write_count++;
1028 
1029 	if (d->spare_eblks < MIN_SPARE_EBLOCKS)
1030 		return -ENOSPC;
1031 
1032 	if (header) {
1033 		/* Ignore writes to the header page */
1034 		if (unlikely(page == 0))
1035 			return 0;
1036 
1037 		page--;
1038 	}
1039 
1040 	mapped = d->page_data[page];
1041 	if (mapped <= BLOCK_MAX) {
1042 		eb = d->eb_data + (mapped / d->pages_per_eblk);
1043 		eb->active_count--;
1044 		mtdswap_store_eb(d, eb);
1045 		d->page_data[page] = BLOCK_UNDEF;
1046 		d->revmap[mapped] = PAGE_UNDEF;
1047 	}
1048 
1049 	ret = mtdswap_write_block(d, buf, page, &newblock, 0);
1050 	d->mtd_write_count++;
1051 
1052 	if (ret < 0)
1053 		return ret;
1054 
1055 	d->page_data[page] = newblock;
1056 
1057 	return 0;
1058 }
1059 
1060 /* Provide a dummy swap header for the kernel */
1061 static int mtdswap_auto_header(struct mtdswap_dev *d, char *buf)
1062 {
1063 	union swap_header *hd = (union swap_header *)(buf);
1064 
1065 	memset(buf, 0, PAGE_SIZE - 10);
1066 
1067 	hd->info.version = 1;
1068 	hd->info.last_page = d->mbd_dev->size - 1;
1069 	hd->info.nr_badpages = 0;
1070 
1071 	memcpy(buf + PAGE_SIZE - 10, "SWAPSPACE2", 10);
1072 
1073 	return 0;
1074 }
1075 
1076 static int mtdswap_readsect(struct mtd_blktrans_dev *dev,
1077 			unsigned long page, char *buf)
1078 {
1079 	struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1080 	struct mtd_info *mtd = d->mtd;
1081 	unsigned int realblock, retries;
1082 	loff_t readpos;
1083 	struct swap_eb *eb;
1084 	size_t retlen;
1085 	int ret;
1086 
1087 	d->sect_read_count++;
1088 
1089 	if (header) {
1090 		if (unlikely(page == 0))
1091 			return mtdswap_auto_header(d, buf);
1092 
1093 		page--;
1094 	}
1095 
1096 	realblock = d->page_data[page];
1097 	if (realblock > BLOCK_MAX) {
1098 		memset(buf, 0x0, PAGE_SIZE);
1099 		if (realblock == BLOCK_UNDEF)
1100 			return 0;
1101 		else
1102 			return -EIO;
1103 	}
1104 
1105 	eb = d->eb_data + (realblock / d->pages_per_eblk);
1106 	BUG_ON(d->revmap[realblock] == PAGE_UNDEF);
1107 
1108 	readpos = (loff_t)realblock << PAGE_SHIFT;
1109 	retries = 0;
1110 
1111 retry:
1112 	ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, buf);
1113 
1114 	d->mtd_read_count++;
1115 	if (mtd_is_bitflip(ret)) {
1116 		eb->flags |= EBLOCK_BITFLIP;
1117 		mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP);
1118 		ret = 0;
1119 	}
1120 
1121 	if (ret < 0) {
1122 		dev_err(d->dev, "Read error %d\n", ret);
1123 		eb->flags |= EBLOCK_READERR;
1124 		mtdswap_rb_add(d, eb, MTDSWAP_FAILING);
1125 		retries++;
1126 		if (retries < MTDSWAP_IO_RETRIES)
1127 			goto retry;
1128 
1129 		return ret;
1130 	}
1131 
1132 	if (retlen != PAGE_SIZE) {
1133 		dev_err(d->dev, "Short read %zd\n", retlen);
1134 		return -EIO;
1135 	}
1136 
1137 	return 0;
1138 }
1139 
1140 static int mtdswap_discard(struct mtd_blktrans_dev *dev, unsigned long first,
1141 			unsigned nr_pages)
1142 {
1143 	struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1144 	unsigned long page;
1145 	struct swap_eb *eb;
1146 	unsigned int mapped;
1147 
1148 	d->discard_count++;
1149 
1150 	for (page = first; page < first + nr_pages; page++) {
1151 		mapped = d->page_data[page];
1152 		if (mapped <= BLOCK_MAX) {
1153 			eb = d->eb_data + (mapped / d->pages_per_eblk);
1154 			eb->active_count--;
1155 			mtdswap_store_eb(d, eb);
1156 			d->page_data[page] = BLOCK_UNDEF;
1157 			d->revmap[mapped] = PAGE_UNDEF;
1158 			d->discard_page_count++;
1159 		} else if (mapped == BLOCK_ERROR) {
1160 			d->page_data[page] = BLOCK_UNDEF;
1161 			d->discard_page_count++;
1162 		}
1163 	}
1164 
1165 	return 0;
1166 }
1167 
1168 static int mtdswap_show(struct seq_file *s, void *data)
1169 {
1170 	struct mtdswap_dev *d = (struct mtdswap_dev *) s->private;
1171 	unsigned long sum;
1172 	unsigned int count[MTDSWAP_TREE_CNT];
1173 	unsigned int min[MTDSWAP_TREE_CNT];
1174 	unsigned int max[MTDSWAP_TREE_CNT];
1175 	unsigned int i, cw = 0, cwp = 0, cwecount = 0, bb_cnt, mapped, pages;
1176 	uint64_t use_size;
1177 	static const char * const name[] = {
1178 		"clean", "used", "low", "high", "dirty", "bitflip", "failing"
1179 	};
1180 
1181 	mutex_lock(&d->mbd_dev->lock);
1182 
1183 	for (i = 0; i < MTDSWAP_TREE_CNT; i++) {
1184 		struct rb_root *root = &d->trees[i].root;
1185 
1186 		if (root->rb_node) {
1187 			count[i] = d->trees[i].count;
1188 			min[i] = MTDSWAP_ECNT_MIN(root);
1189 			max[i] = MTDSWAP_ECNT_MAX(root);
1190 		} else
1191 			count[i] = 0;
1192 	}
1193 
1194 	if (d->curr_write) {
1195 		cw = 1;
1196 		cwp = d->curr_write_pos;
1197 		cwecount = d->curr_write->erase_count;
1198 	}
1199 
1200 	sum = 0;
1201 	for (i = 0; i < d->eblks; i++)
1202 		sum += d->eb_data[i].erase_count;
1203 
1204 	use_size = (uint64_t)d->eblks * d->mtd->erasesize;
1205 	bb_cnt = mtdswap_badblocks(d->mtd, use_size);
1206 
1207 	mapped = 0;
1208 	pages = d->mbd_dev->size;
1209 	for (i = 0; i < pages; i++)
1210 		if (d->page_data[i] != BLOCK_UNDEF)
1211 			mapped++;
1212 
1213 	mutex_unlock(&d->mbd_dev->lock);
1214 
1215 	for (i = 0; i < MTDSWAP_TREE_CNT; i++) {
1216 		if (!count[i])
1217 			continue;
1218 
1219 		if (min[i] != max[i])
1220 			seq_printf(s, "%s:\t%5d erase blocks, erased min %d, "
1221 				"max %d times\n",
1222 				name[i], count[i], min[i], max[i]);
1223 		else
1224 			seq_printf(s, "%s:\t%5d erase blocks, all erased %d "
1225 				"times\n", name[i], count[i], min[i]);
1226 	}
1227 
1228 	if (bb_cnt)
1229 		seq_printf(s, "bad:\t%5u erase blocks\n", bb_cnt);
1230 
1231 	if (cw)
1232 		seq_printf(s, "current erase block: %u pages used, %u free, "
1233 			"erased %u times\n",
1234 			cwp, d->pages_per_eblk - cwp, cwecount);
1235 
1236 	seq_printf(s, "total erasures: %lu\n", sum);
1237 
1238 	seq_puts(s, "\n");
1239 
1240 	seq_printf(s, "mtdswap_readsect count: %llu\n", d->sect_read_count);
1241 	seq_printf(s, "mtdswap_writesect count: %llu\n", d->sect_write_count);
1242 	seq_printf(s, "mtdswap_discard count: %llu\n", d->discard_count);
1243 	seq_printf(s, "mtd read count: %llu\n", d->mtd_read_count);
1244 	seq_printf(s, "mtd write count: %llu\n", d->mtd_write_count);
1245 	seq_printf(s, "discarded pages count: %llu\n", d->discard_page_count);
1246 
1247 	seq_puts(s, "\n");
1248 	seq_printf(s, "total pages: %u\n", pages);
1249 	seq_printf(s, "pages mapped: %u\n", mapped);
1250 
1251 	return 0;
1252 }
1253 DEFINE_SHOW_ATTRIBUTE(mtdswap);
1254 
1255 static int mtdswap_add_debugfs(struct mtdswap_dev *d)
1256 {
1257 	struct dentry *root = d->mtd->dbg.dfs_dir;
1258 
1259 	if (!IS_ENABLED(CONFIG_DEBUG_FS))
1260 		return 0;
1261 
1262 	if (IS_ERR_OR_NULL(root))
1263 		return -1;
1264 
1265 	debugfs_create_file("mtdswap_stats", S_IRUSR, root, d, &mtdswap_fops);
1266 
1267 	return 0;
1268 }
1269 
1270 static int mtdswap_init(struct mtdswap_dev *d, unsigned int eblocks,
1271 			unsigned int spare_cnt)
1272 {
1273 	struct mtd_info *mtd = d->mbd_dev->mtd;
1274 	unsigned int i, eblk_bytes, pages, blocks;
1275 	int ret = -ENOMEM;
1276 
1277 	d->mtd = mtd;
1278 	d->eblks = eblocks;
1279 	d->spare_eblks = spare_cnt;
1280 	d->pages_per_eblk = mtd->erasesize >> PAGE_SHIFT;
1281 
1282 	pages = d->mbd_dev->size;
1283 	blocks = eblocks * d->pages_per_eblk;
1284 
1285 	for (i = 0; i < MTDSWAP_TREE_CNT; i++)
1286 		d->trees[i].root = RB_ROOT;
1287 
1288 	d->page_data = vmalloc(array_size(pages, sizeof(int)));
1289 	if (!d->page_data)
1290 		goto page_data_fail;
1291 
1292 	d->revmap = vmalloc(array_size(blocks, sizeof(int)));
1293 	if (!d->revmap)
1294 		goto revmap_fail;
1295 
1296 	eblk_bytes = sizeof(struct swap_eb)*d->eblks;
1297 	d->eb_data = vzalloc(eblk_bytes);
1298 	if (!d->eb_data)
1299 		goto eb_data_fail;
1300 
1301 	for (i = 0; i < pages; i++)
1302 		d->page_data[i] = BLOCK_UNDEF;
1303 
1304 	for (i = 0; i < blocks; i++)
1305 		d->revmap[i] = PAGE_UNDEF;
1306 
1307 	d->page_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
1308 	if (!d->page_buf)
1309 		goto page_buf_fail;
1310 
1311 	d->oob_buf = kmalloc_array(2, mtd->oobavail, GFP_KERNEL);
1312 	if (!d->oob_buf)
1313 		goto oob_buf_fail;
1314 
1315 	mtdswap_scan_eblks(d);
1316 
1317 	return 0;
1318 
1319 oob_buf_fail:
1320 	kfree(d->page_buf);
1321 page_buf_fail:
1322 	vfree(d->eb_data);
1323 eb_data_fail:
1324 	vfree(d->revmap);
1325 revmap_fail:
1326 	vfree(d->page_data);
1327 page_data_fail:
1328 	printk(KERN_ERR "%s: init failed (%d)\n", MTDSWAP_PREFIX, ret);
1329 	return ret;
1330 }
1331 
1332 static void mtdswap_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
1333 {
1334 	struct mtdswap_dev *d;
1335 	struct mtd_blktrans_dev *mbd_dev;
1336 	char *parts;
1337 	char *this_opt;
1338 	unsigned long part;
1339 	unsigned int eblocks, eavailable, bad_blocks, spare_cnt;
1340 	uint64_t swap_size, use_size, size_limit;
1341 	int ret;
1342 
1343 	parts = &partitions[0];
1344 	if (!*parts)
1345 		return;
1346 
1347 	while ((this_opt = strsep(&parts, ",")) != NULL) {
1348 		if (kstrtoul(this_opt, 0, &part) < 0)
1349 			return;
1350 
1351 		if (mtd->index == part)
1352 			break;
1353 	}
1354 
1355 	if (mtd->index != part)
1356 		return;
1357 
1358 	if (mtd->erasesize < PAGE_SIZE || mtd->erasesize % PAGE_SIZE) {
1359 		printk(KERN_ERR "%s: Erase size %u not multiple of PAGE_SIZE "
1360 			"%lu\n", MTDSWAP_PREFIX, mtd->erasesize, PAGE_SIZE);
1361 		return;
1362 	}
1363 
1364 	if (PAGE_SIZE % mtd->writesize || mtd->writesize > PAGE_SIZE) {
1365 		printk(KERN_ERR "%s: PAGE_SIZE %lu not multiple of write size"
1366 			" %u\n", MTDSWAP_PREFIX, PAGE_SIZE, mtd->writesize);
1367 		return;
1368 	}
1369 
1370 	if (!mtd->oobsize || mtd->oobavail < MTDSWAP_OOBSIZE) {
1371 		printk(KERN_ERR "%s: Not enough free bytes in OOB, "
1372 			"%d available, %zu needed.\n",
1373 			MTDSWAP_PREFIX, mtd->oobavail, MTDSWAP_OOBSIZE);
1374 		return;
1375 	}
1376 
1377 	if (spare_eblocks > 100)
1378 		spare_eblocks = 100;
1379 
1380 	use_size = mtd->size;
1381 	size_limit = (uint64_t) BLOCK_MAX * PAGE_SIZE;
1382 
1383 	if (mtd->size > size_limit) {
1384 		printk(KERN_WARNING "%s: Device too large. Limiting size to "
1385 			"%llu bytes\n", MTDSWAP_PREFIX, size_limit);
1386 		use_size = size_limit;
1387 	}
1388 
1389 	eblocks = mtd_div_by_eb(use_size, mtd);
1390 	use_size = (uint64_t)eblocks * mtd->erasesize;
1391 	bad_blocks = mtdswap_badblocks(mtd, use_size);
1392 	eavailable = eblocks - bad_blocks;
1393 
1394 	if (eavailable < MIN_ERASE_BLOCKS) {
1395 		printk(KERN_ERR "%s: Not enough erase blocks. %u available, "
1396 			"%d needed\n", MTDSWAP_PREFIX, eavailable,
1397 			MIN_ERASE_BLOCKS);
1398 		return;
1399 	}
1400 
1401 	spare_cnt = div_u64((uint64_t)eavailable * spare_eblocks, 100);
1402 
1403 	if (spare_cnt < MIN_SPARE_EBLOCKS)
1404 		spare_cnt = MIN_SPARE_EBLOCKS;
1405 
1406 	if (spare_cnt > eavailable - 1)
1407 		spare_cnt = eavailable - 1;
1408 
1409 	swap_size = (uint64_t)(eavailable - spare_cnt) * mtd->erasesize +
1410 		(header ? PAGE_SIZE : 0);
1411 
1412 	printk(KERN_INFO "%s: Enabling MTD swap on device %lu, size %llu KB, "
1413 		"%u spare, %u bad blocks\n",
1414 		MTDSWAP_PREFIX, part, swap_size / 1024, spare_cnt, bad_blocks);
1415 
1416 	d = kzalloc(sizeof(struct mtdswap_dev), GFP_KERNEL);
1417 	if (!d)
1418 		return;
1419 
1420 	mbd_dev = kzalloc(sizeof(struct mtd_blktrans_dev), GFP_KERNEL);
1421 	if (!mbd_dev) {
1422 		kfree(d);
1423 		return;
1424 	}
1425 
1426 	d->mbd_dev = mbd_dev;
1427 	mbd_dev->priv = d;
1428 
1429 	mbd_dev->mtd = mtd;
1430 	mbd_dev->devnum = mtd->index;
1431 	mbd_dev->size = swap_size >> PAGE_SHIFT;
1432 	mbd_dev->tr = tr;
1433 
1434 	if (!(mtd->flags & MTD_WRITEABLE))
1435 		mbd_dev->readonly = 1;
1436 
1437 	if (mtdswap_init(d, eblocks, spare_cnt) < 0)
1438 		goto init_failed;
1439 
1440 	if (add_mtd_blktrans_dev(mbd_dev) < 0)
1441 		goto cleanup;
1442 
1443 	d->dev = disk_to_dev(mbd_dev->disk);
1444 
1445 	ret = mtdswap_add_debugfs(d);
1446 	if (ret < 0)
1447 		goto debugfs_failed;
1448 
1449 	return;
1450 
1451 debugfs_failed:
1452 	del_mtd_blktrans_dev(mbd_dev);
1453 
1454 cleanup:
1455 	mtdswap_cleanup(d);
1456 
1457 init_failed:
1458 	kfree(mbd_dev);
1459 	kfree(d);
1460 }
1461 
1462 static void mtdswap_remove_dev(struct mtd_blktrans_dev *dev)
1463 {
1464 	struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1465 
1466 	del_mtd_blktrans_dev(dev);
1467 	mtdswap_cleanup(d);
1468 	kfree(d);
1469 }
1470 
1471 static struct mtd_blktrans_ops mtdswap_ops = {
1472 	.name		= "mtdswap",
1473 	.major		= 0,
1474 	.part_bits	= 0,
1475 	.blksize	= PAGE_SIZE,
1476 	.flush		= mtdswap_flush,
1477 	.readsect	= mtdswap_readsect,
1478 	.writesect	= mtdswap_writesect,
1479 	.discard	= mtdswap_discard,
1480 	.background	= mtdswap_background,
1481 	.add_mtd	= mtdswap_add_mtd,
1482 	.remove_dev	= mtdswap_remove_dev,
1483 	.owner		= THIS_MODULE,
1484 };
1485 
1486 module_mtd_blktrans(mtdswap_ops);
1487 
1488 MODULE_LICENSE("GPL");
1489 MODULE_AUTHOR("Jarkko Lavinen <jarkko.lavinen@nokia.com>");
1490 MODULE_DESCRIPTION("Block device access to an MTD suitable for using as "
1491 		"swap space");
1492