xref: /openbmc/linux/drivers/mtd/mtdswap.c (revision dc6a81c3)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Swap block device support for MTDs
4  * Turns an MTD device into a swap device with block wear leveling
5  *
6  * Copyright © 2007,2011 Nokia Corporation. All rights reserved.
7  *
8  * Authors: Jarkko Lavinen <jarkko.lavinen@nokia.com>
9  *
10  * Based on Richard Purdie's earlier implementation in 2007. Background
11  * support and lock-less operation written by Adrian Hunter.
12  */
13 
14 #include <linux/kernel.h>
15 #include <linux/module.h>
16 #include <linux/mtd/mtd.h>
17 #include <linux/mtd/blktrans.h>
18 #include <linux/rbtree.h>
19 #include <linux/sched.h>
20 #include <linux/slab.h>
21 #include <linux/vmalloc.h>
22 #include <linux/genhd.h>
23 #include <linux/swap.h>
24 #include <linux/debugfs.h>
25 #include <linux/seq_file.h>
26 #include <linux/device.h>
27 #include <linux/math64.h>
28 
29 #define MTDSWAP_PREFIX "mtdswap"
30 
31 /*
32  * The number of free eraseblocks when GC should stop
33  */
34 #define CLEAN_BLOCK_THRESHOLD	20
35 
36 /*
37  * Number of free eraseblocks below which GC can also collect low frag
38  * blocks.
39  */
40 #define LOW_FRAG_GC_THRESHOLD	5
41 
42 /*
43  * Wear level cost amortization. We want to do wear leveling on the background
44  * without disturbing gc too much. This is made by defining max GC frequency.
45  * Frequency value 6 means 1/6 of the GC passes will pick an erase block based
46  * on the biggest wear difference rather than the biggest dirtiness.
47  *
48  * The lower freq2 should be chosen so that it makes sure the maximum erase
49  * difference will decrease even if a malicious application is deliberately
50  * trying to make erase differences large.
51  */
52 #define MAX_ERASE_DIFF		4000
53 #define COLLECT_NONDIRTY_BASE	MAX_ERASE_DIFF
54 #define COLLECT_NONDIRTY_FREQ1	6
55 #define COLLECT_NONDIRTY_FREQ2	4
56 
57 #define PAGE_UNDEF		UINT_MAX
58 #define BLOCK_UNDEF		UINT_MAX
59 #define BLOCK_ERROR		(UINT_MAX - 1)
60 #define BLOCK_MAX		(UINT_MAX - 2)
61 
62 #define EBLOCK_BAD		(1 << 0)
63 #define EBLOCK_NOMAGIC		(1 << 1)
64 #define EBLOCK_BITFLIP		(1 << 2)
65 #define EBLOCK_FAILED		(1 << 3)
66 #define EBLOCK_READERR		(1 << 4)
67 #define EBLOCK_IDX_SHIFT	5
68 
69 struct swap_eb {
70 	struct rb_node rb;
71 	struct rb_root *root;
72 
73 	unsigned int flags;
74 	unsigned int active_count;
75 	unsigned int erase_count;
76 	unsigned int pad;		/* speeds up pointer decrement */
77 };
78 
79 #define MTDSWAP_ECNT_MIN(rbroot) (rb_entry(rb_first(rbroot), struct swap_eb, \
80 				rb)->erase_count)
81 #define MTDSWAP_ECNT_MAX(rbroot) (rb_entry(rb_last(rbroot), struct swap_eb, \
82 				rb)->erase_count)
83 
84 struct mtdswap_tree {
85 	struct rb_root root;
86 	unsigned int count;
87 };
88 
89 enum {
90 	MTDSWAP_CLEAN,
91 	MTDSWAP_USED,
92 	MTDSWAP_LOWFRAG,
93 	MTDSWAP_HIFRAG,
94 	MTDSWAP_DIRTY,
95 	MTDSWAP_BITFLIP,
96 	MTDSWAP_FAILING,
97 	MTDSWAP_TREE_CNT,
98 };
99 
100 struct mtdswap_dev {
101 	struct mtd_blktrans_dev *mbd_dev;
102 	struct mtd_info *mtd;
103 	struct device *dev;
104 
105 	unsigned int *page_data;
106 	unsigned int *revmap;
107 
108 	unsigned int eblks;
109 	unsigned int spare_eblks;
110 	unsigned int pages_per_eblk;
111 	unsigned int max_erase_count;
112 	struct swap_eb *eb_data;
113 
114 	struct mtdswap_tree trees[MTDSWAP_TREE_CNT];
115 
116 	unsigned long long sect_read_count;
117 	unsigned long long sect_write_count;
118 	unsigned long long mtd_write_count;
119 	unsigned long long mtd_read_count;
120 	unsigned long long discard_count;
121 	unsigned long long discard_page_count;
122 
123 	unsigned int curr_write_pos;
124 	struct swap_eb *curr_write;
125 
126 	char *page_buf;
127 	char *oob_buf;
128 };
129 
130 struct mtdswap_oobdata {
131 	__le16 magic;
132 	__le32 count;
133 } __packed;
134 
135 #define MTDSWAP_MAGIC_CLEAN	0x2095
136 #define MTDSWAP_MAGIC_DIRTY	(MTDSWAP_MAGIC_CLEAN + 1)
137 #define MTDSWAP_TYPE_CLEAN	0
138 #define MTDSWAP_TYPE_DIRTY	1
139 #define MTDSWAP_OOBSIZE		sizeof(struct mtdswap_oobdata)
140 
141 #define MTDSWAP_ERASE_RETRIES	3 /* Before marking erase block bad */
142 #define MTDSWAP_IO_RETRIES	3
143 
144 enum {
145 	MTDSWAP_SCANNED_CLEAN,
146 	MTDSWAP_SCANNED_DIRTY,
147 	MTDSWAP_SCANNED_BITFLIP,
148 	MTDSWAP_SCANNED_BAD,
149 };
150 
151 /*
152  * In the worst case mtdswap_writesect() has allocated the last clean
153  * page from the current block and is then pre-empted by the GC
154  * thread. The thread can consume a full erase block when moving a
155  * block.
156  */
157 #define MIN_SPARE_EBLOCKS	2
158 #define MIN_ERASE_BLOCKS	(MIN_SPARE_EBLOCKS + 1)
159 
160 #define TREE_ROOT(d, name) (&d->trees[MTDSWAP_ ## name].root)
161 #define TREE_EMPTY(d, name) (TREE_ROOT(d, name)->rb_node == NULL)
162 #define TREE_NONEMPTY(d, name) (!TREE_EMPTY(d, name))
163 #define TREE_COUNT(d, name) (d->trees[MTDSWAP_ ## name].count)
164 
165 #define MTDSWAP_MBD_TO_MTDSWAP(dev) ((struct mtdswap_dev *)dev->priv)
166 
167 static char partitions[128] = "";
168 module_param_string(partitions, partitions, sizeof(partitions), 0444);
169 MODULE_PARM_DESC(partitions, "MTD partition numbers to use as swap "
170 		"partitions=\"1,3,5\"");
171 
172 static unsigned int spare_eblocks = 10;
173 module_param(spare_eblocks, uint, 0444);
174 MODULE_PARM_DESC(spare_eblocks, "Percentage of spare erase blocks for "
175 		"garbage collection (default 10%)");
176 
177 static bool header; /* false */
178 module_param(header, bool, 0444);
179 MODULE_PARM_DESC(header,
180 		"Include builtin swap header (default 0, without header)");
181 
182 static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background);
183 
184 static loff_t mtdswap_eb_offset(struct mtdswap_dev *d, struct swap_eb *eb)
185 {
186 	return (loff_t)(eb - d->eb_data) * d->mtd->erasesize;
187 }
188 
189 static void mtdswap_eb_detach(struct mtdswap_dev *d, struct swap_eb *eb)
190 {
191 	unsigned int oldidx;
192 	struct mtdswap_tree *tp;
193 
194 	if (eb->root) {
195 		tp = container_of(eb->root, struct mtdswap_tree, root);
196 		oldidx = tp - &d->trees[0];
197 
198 		d->trees[oldidx].count--;
199 		rb_erase(&eb->rb, eb->root);
200 	}
201 }
202 
203 static void __mtdswap_rb_add(struct rb_root *root, struct swap_eb *eb)
204 {
205 	struct rb_node **p, *parent = NULL;
206 	struct swap_eb *cur;
207 
208 	p = &root->rb_node;
209 	while (*p) {
210 		parent = *p;
211 		cur = rb_entry(parent, struct swap_eb, rb);
212 		if (eb->erase_count > cur->erase_count)
213 			p = &(*p)->rb_right;
214 		else
215 			p = &(*p)->rb_left;
216 	}
217 
218 	rb_link_node(&eb->rb, parent, p);
219 	rb_insert_color(&eb->rb, root);
220 }
221 
222 static void mtdswap_rb_add(struct mtdswap_dev *d, struct swap_eb *eb, int idx)
223 {
224 	struct rb_root *root;
225 
226 	if (eb->root == &d->trees[idx].root)
227 		return;
228 
229 	mtdswap_eb_detach(d, eb);
230 	root = &d->trees[idx].root;
231 	__mtdswap_rb_add(root, eb);
232 	eb->root = root;
233 	d->trees[idx].count++;
234 }
235 
236 static struct rb_node *mtdswap_rb_index(struct rb_root *root, unsigned int idx)
237 {
238 	struct rb_node *p;
239 	unsigned int i;
240 
241 	p = rb_first(root);
242 	i = 0;
243 	while (i < idx && p) {
244 		p = rb_next(p);
245 		i++;
246 	}
247 
248 	return p;
249 }
250 
251 static int mtdswap_handle_badblock(struct mtdswap_dev *d, struct swap_eb *eb)
252 {
253 	int ret;
254 	loff_t offset;
255 
256 	d->spare_eblks--;
257 	eb->flags |= EBLOCK_BAD;
258 	mtdswap_eb_detach(d, eb);
259 	eb->root = NULL;
260 
261 	/* badblocks not supported */
262 	if (!mtd_can_have_bb(d->mtd))
263 		return 1;
264 
265 	offset = mtdswap_eb_offset(d, eb);
266 	dev_warn(d->dev, "Marking bad block at %08llx\n", offset);
267 	ret = mtd_block_markbad(d->mtd, offset);
268 
269 	if (ret) {
270 		dev_warn(d->dev, "Mark block bad failed for block at %08llx "
271 			"error %d\n", offset, ret);
272 		return ret;
273 	}
274 
275 	return 1;
276 
277 }
278 
279 static int mtdswap_handle_write_error(struct mtdswap_dev *d, struct swap_eb *eb)
280 {
281 	unsigned int marked = eb->flags & EBLOCK_FAILED;
282 	struct swap_eb *curr_write = d->curr_write;
283 
284 	eb->flags |= EBLOCK_FAILED;
285 	if (curr_write == eb) {
286 		d->curr_write = NULL;
287 
288 		if (!marked && d->curr_write_pos != 0) {
289 			mtdswap_rb_add(d, eb, MTDSWAP_FAILING);
290 			return 0;
291 		}
292 	}
293 
294 	return mtdswap_handle_badblock(d, eb);
295 }
296 
297 static int mtdswap_read_oob(struct mtdswap_dev *d, loff_t from,
298 			struct mtd_oob_ops *ops)
299 {
300 	int ret = mtd_read_oob(d->mtd, from, ops);
301 
302 	if (mtd_is_bitflip(ret))
303 		return ret;
304 
305 	if (ret) {
306 		dev_warn(d->dev, "Read OOB failed %d for block at %08llx\n",
307 			ret, from);
308 		return ret;
309 	}
310 
311 	if (ops->oobretlen < ops->ooblen) {
312 		dev_warn(d->dev, "Read OOB return short read (%zd bytes not "
313 			"%zd) for block at %08llx\n",
314 			ops->oobretlen, ops->ooblen, from);
315 		return -EIO;
316 	}
317 
318 	return 0;
319 }
320 
321 static int mtdswap_read_markers(struct mtdswap_dev *d, struct swap_eb *eb)
322 {
323 	struct mtdswap_oobdata *data, *data2;
324 	int ret;
325 	loff_t offset;
326 	struct mtd_oob_ops ops;
327 
328 	offset = mtdswap_eb_offset(d, eb);
329 
330 	/* Check first if the block is bad. */
331 	if (mtd_can_have_bb(d->mtd) && mtd_block_isbad(d->mtd, offset))
332 		return MTDSWAP_SCANNED_BAD;
333 
334 	ops.ooblen = 2 * d->mtd->oobavail;
335 	ops.oobbuf = d->oob_buf;
336 	ops.ooboffs = 0;
337 	ops.datbuf = NULL;
338 	ops.mode = MTD_OPS_AUTO_OOB;
339 
340 	ret = mtdswap_read_oob(d, offset, &ops);
341 
342 	if (ret && !mtd_is_bitflip(ret))
343 		return ret;
344 
345 	data = (struct mtdswap_oobdata *)d->oob_buf;
346 	data2 = (struct mtdswap_oobdata *)
347 		(d->oob_buf + d->mtd->oobavail);
348 
349 	if (le16_to_cpu(data->magic) == MTDSWAP_MAGIC_CLEAN) {
350 		eb->erase_count = le32_to_cpu(data->count);
351 		if (mtd_is_bitflip(ret))
352 			ret = MTDSWAP_SCANNED_BITFLIP;
353 		else {
354 			if (le16_to_cpu(data2->magic) == MTDSWAP_MAGIC_DIRTY)
355 				ret = MTDSWAP_SCANNED_DIRTY;
356 			else
357 				ret = MTDSWAP_SCANNED_CLEAN;
358 		}
359 	} else {
360 		eb->flags |= EBLOCK_NOMAGIC;
361 		ret = MTDSWAP_SCANNED_DIRTY;
362 	}
363 
364 	return ret;
365 }
366 
367 static int mtdswap_write_marker(struct mtdswap_dev *d, struct swap_eb *eb,
368 				u16 marker)
369 {
370 	struct mtdswap_oobdata n;
371 	int ret;
372 	loff_t offset;
373 	struct mtd_oob_ops ops;
374 
375 	ops.ooboffs = 0;
376 	ops.oobbuf = (uint8_t *)&n;
377 	ops.mode = MTD_OPS_AUTO_OOB;
378 	ops.datbuf = NULL;
379 
380 	if (marker == MTDSWAP_TYPE_CLEAN) {
381 		n.magic = cpu_to_le16(MTDSWAP_MAGIC_CLEAN);
382 		n.count = cpu_to_le32(eb->erase_count);
383 		ops.ooblen = MTDSWAP_OOBSIZE;
384 		offset = mtdswap_eb_offset(d, eb);
385 	} else {
386 		n.magic = cpu_to_le16(MTDSWAP_MAGIC_DIRTY);
387 		ops.ooblen = sizeof(n.magic);
388 		offset = mtdswap_eb_offset(d, eb) + d->mtd->writesize;
389 	}
390 
391 	ret = mtd_write_oob(d->mtd, offset, &ops);
392 
393 	if (ret) {
394 		dev_warn(d->dev, "Write OOB failed for block at %08llx "
395 			"error %d\n", offset, ret);
396 		if (ret == -EIO || mtd_is_eccerr(ret))
397 			mtdswap_handle_write_error(d, eb);
398 		return ret;
399 	}
400 
401 	if (ops.oobretlen != ops.ooblen) {
402 		dev_warn(d->dev, "Short OOB write for block at %08llx: "
403 			"%zd not %zd\n",
404 			offset, ops.oobretlen, ops.ooblen);
405 		return ret;
406 	}
407 
408 	return 0;
409 }
410 
411 /*
412  * Are there any erase blocks without MAGIC_CLEAN header, presumably
413  * because power was cut off after erase but before header write? We
414  * need to guestimate the erase count.
415  */
416 static void mtdswap_check_counts(struct mtdswap_dev *d)
417 {
418 	struct rb_root hist_root = RB_ROOT;
419 	struct rb_node *medrb;
420 	struct swap_eb *eb;
421 	unsigned int i, cnt, median;
422 
423 	cnt = 0;
424 	for (i = 0; i < d->eblks; i++) {
425 		eb = d->eb_data + i;
426 
427 		if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR))
428 			continue;
429 
430 		__mtdswap_rb_add(&hist_root, eb);
431 		cnt++;
432 	}
433 
434 	if (cnt == 0)
435 		return;
436 
437 	medrb = mtdswap_rb_index(&hist_root, cnt / 2);
438 	median = rb_entry(medrb, struct swap_eb, rb)->erase_count;
439 
440 	d->max_erase_count = MTDSWAP_ECNT_MAX(&hist_root);
441 
442 	for (i = 0; i < d->eblks; i++) {
443 		eb = d->eb_data + i;
444 
445 		if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_READERR))
446 			eb->erase_count = median;
447 
448 		if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR))
449 			continue;
450 
451 		rb_erase(&eb->rb, &hist_root);
452 	}
453 }
454 
455 static void mtdswap_scan_eblks(struct mtdswap_dev *d)
456 {
457 	int status;
458 	unsigned int i, idx;
459 	struct swap_eb *eb;
460 
461 	for (i = 0; i < d->eblks; i++) {
462 		eb = d->eb_data + i;
463 
464 		status = mtdswap_read_markers(d, eb);
465 		if (status < 0)
466 			eb->flags |= EBLOCK_READERR;
467 		else if (status == MTDSWAP_SCANNED_BAD) {
468 			eb->flags |= EBLOCK_BAD;
469 			continue;
470 		}
471 
472 		switch (status) {
473 		case MTDSWAP_SCANNED_CLEAN:
474 			idx = MTDSWAP_CLEAN;
475 			break;
476 		case MTDSWAP_SCANNED_DIRTY:
477 		case MTDSWAP_SCANNED_BITFLIP:
478 			idx = MTDSWAP_DIRTY;
479 			break;
480 		default:
481 			idx = MTDSWAP_FAILING;
482 		}
483 
484 		eb->flags |= (idx << EBLOCK_IDX_SHIFT);
485 	}
486 
487 	mtdswap_check_counts(d);
488 
489 	for (i = 0; i < d->eblks; i++) {
490 		eb = d->eb_data + i;
491 
492 		if (eb->flags & EBLOCK_BAD)
493 			continue;
494 
495 		idx = eb->flags >> EBLOCK_IDX_SHIFT;
496 		mtdswap_rb_add(d, eb, idx);
497 	}
498 }
499 
500 /*
501  * Place eblk into a tree corresponding to its number of active blocks
502  * it contains.
503  */
504 static void mtdswap_store_eb(struct mtdswap_dev *d, struct swap_eb *eb)
505 {
506 	unsigned int weight = eb->active_count;
507 	unsigned int maxweight = d->pages_per_eblk;
508 
509 	if (eb == d->curr_write)
510 		return;
511 
512 	if (eb->flags & EBLOCK_BITFLIP)
513 		mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP);
514 	else if (eb->flags & (EBLOCK_READERR | EBLOCK_FAILED))
515 		mtdswap_rb_add(d, eb, MTDSWAP_FAILING);
516 	if (weight == maxweight)
517 		mtdswap_rb_add(d, eb, MTDSWAP_USED);
518 	else if (weight == 0)
519 		mtdswap_rb_add(d, eb, MTDSWAP_DIRTY);
520 	else if (weight > (maxweight/2))
521 		mtdswap_rb_add(d, eb, MTDSWAP_LOWFRAG);
522 	else
523 		mtdswap_rb_add(d, eb, MTDSWAP_HIFRAG);
524 }
525 
526 static int mtdswap_erase_block(struct mtdswap_dev *d, struct swap_eb *eb)
527 {
528 	struct mtd_info *mtd = d->mtd;
529 	struct erase_info erase;
530 	unsigned int retries = 0;
531 	int ret;
532 
533 	eb->erase_count++;
534 	if (eb->erase_count > d->max_erase_count)
535 		d->max_erase_count = eb->erase_count;
536 
537 retry:
538 	memset(&erase, 0, sizeof(struct erase_info));
539 	erase.addr	= mtdswap_eb_offset(d, eb);
540 	erase.len	= mtd->erasesize;
541 
542 	ret = mtd_erase(mtd, &erase);
543 	if (ret) {
544 		if (retries++ < MTDSWAP_ERASE_RETRIES) {
545 			dev_warn(d->dev,
546 				"erase of erase block %#llx on %s failed",
547 				erase.addr, mtd->name);
548 			yield();
549 			goto retry;
550 		}
551 
552 		dev_err(d->dev, "Cannot erase erase block %#llx on %s\n",
553 			erase.addr, mtd->name);
554 
555 		mtdswap_handle_badblock(d, eb);
556 		return -EIO;
557 	}
558 
559 	return 0;
560 }
561 
562 static int mtdswap_map_free_block(struct mtdswap_dev *d, unsigned int page,
563 				unsigned int *block)
564 {
565 	int ret;
566 	struct swap_eb *old_eb = d->curr_write;
567 	struct rb_root *clean_root;
568 	struct swap_eb *eb;
569 
570 	if (old_eb == NULL || d->curr_write_pos >= d->pages_per_eblk) {
571 		do {
572 			if (TREE_EMPTY(d, CLEAN))
573 				return -ENOSPC;
574 
575 			clean_root = TREE_ROOT(d, CLEAN);
576 			eb = rb_entry(rb_first(clean_root), struct swap_eb, rb);
577 			rb_erase(&eb->rb, clean_root);
578 			eb->root = NULL;
579 			TREE_COUNT(d, CLEAN)--;
580 
581 			ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_DIRTY);
582 		} while (ret == -EIO || mtd_is_eccerr(ret));
583 
584 		if (ret)
585 			return ret;
586 
587 		d->curr_write_pos = 0;
588 		d->curr_write = eb;
589 		if (old_eb)
590 			mtdswap_store_eb(d, old_eb);
591 	}
592 
593 	*block = (d->curr_write - d->eb_data) * d->pages_per_eblk +
594 		d->curr_write_pos;
595 
596 	d->curr_write->active_count++;
597 	d->revmap[*block] = page;
598 	d->curr_write_pos++;
599 
600 	return 0;
601 }
602 
603 static unsigned int mtdswap_free_page_cnt(struct mtdswap_dev *d)
604 {
605 	return TREE_COUNT(d, CLEAN) * d->pages_per_eblk +
606 		d->pages_per_eblk - d->curr_write_pos;
607 }
608 
609 static unsigned int mtdswap_enough_free_pages(struct mtdswap_dev *d)
610 {
611 	return mtdswap_free_page_cnt(d) > d->pages_per_eblk;
612 }
613 
614 static int mtdswap_write_block(struct mtdswap_dev *d, char *buf,
615 			unsigned int page, unsigned int *bp, int gc_context)
616 {
617 	struct mtd_info *mtd = d->mtd;
618 	struct swap_eb *eb;
619 	size_t retlen;
620 	loff_t writepos;
621 	int ret;
622 
623 retry:
624 	if (!gc_context)
625 		while (!mtdswap_enough_free_pages(d))
626 			if (mtdswap_gc(d, 0) > 0)
627 				return -ENOSPC;
628 
629 	ret = mtdswap_map_free_block(d, page, bp);
630 	eb = d->eb_data + (*bp / d->pages_per_eblk);
631 
632 	if (ret == -EIO || mtd_is_eccerr(ret)) {
633 		d->curr_write = NULL;
634 		eb->active_count--;
635 		d->revmap[*bp] = PAGE_UNDEF;
636 		goto retry;
637 	}
638 
639 	if (ret < 0)
640 		return ret;
641 
642 	writepos = (loff_t)*bp << PAGE_SHIFT;
643 	ret =  mtd_write(mtd, writepos, PAGE_SIZE, &retlen, buf);
644 	if (ret == -EIO || mtd_is_eccerr(ret)) {
645 		d->curr_write_pos--;
646 		eb->active_count--;
647 		d->revmap[*bp] = PAGE_UNDEF;
648 		mtdswap_handle_write_error(d, eb);
649 		goto retry;
650 	}
651 
652 	if (ret < 0) {
653 		dev_err(d->dev, "Write to MTD device failed: %d (%zd written)",
654 			ret, retlen);
655 		goto err;
656 	}
657 
658 	if (retlen != PAGE_SIZE) {
659 		dev_err(d->dev, "Short write to MTD device: %zd written",
660 			retlen);
661 		ret = -EIO;
662 		goto err;
663 	}
664 
665 	return ret;
666 
667 err:
668 	d->curr_write_pos--;
669 	eb->active_count--;
670 	d->revmap[*bp] = PAGE_UNDEF;
671 
672 	return ret;
673 }
674 
675 static int mtdswap_move_block(struct mtdswap_dev *d, unsigned int oldblock,
676 		unsigned int *newblock)
677 {
678 	struct mtd_info *mtd = d->mtd;
679 	struct swap_eb *eb, *oldeb;
680 	int ret;
681 	size_t retlen;
682 	unsigned int page, retries;
683 	loff_t readpos;
684 
685 	page = d->revmap[oldblock];
686 	readpos = (loff_t) oldblock << PAGE_SHIFT;
687 	retries = 0;
688 
689 retry:
690 	ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, d->page_buf);
691 
692 	if (ret < 0 && !mtd_is_bitflip(ret)) {
693 		oldeb = d->eb_data + oldblock / d->pages_per_eblk;
694 		oldeb->flags |= EBLOCK_READERR;
695 
696 		dev_err(d->dev, "Read Error: %d (block %u)\n", ret,
697 			oldblock);
698 		retries++;
699 		if (retries < MTDSWAP_IO_RETRIES)
700 			goto retry;
701 
702 		goto read_error;
703 	}
704 
705 	if (retlen != PAGE_SIZE) {
706 		dev_err(d->dev, "Short read: %zd (block %u)\n", retlen,
707 		       oldblock);
708 		ret = -EIO;
709 		goto read_error;
710 	}
711 
712 	ret = mtdswap_write_block(d, d->page_buf, page, newblock, 1);
713 	if (ret < 0) {
714 		d->page_data[page] = BLOCK_ERROR;
715 		dev_err(d->dev, "Write error: %d\n", ret);
716 		return ret;
717 	}
718 
719 	eb = d->eb_data + *newblock / d->pages_per_eblk;
720 	d->page_data[page] = *newblock;
721 	d->revmap[oldblock] = PAGE_UNDEF;
722 	eb = d->eb_data + oldblock / d->pages_per_eblk;
723 	eb->active_count--;
724 
725 	return 0;
726 
727 read_error:
728 	d->page_data[page] = BLOCK_ERROR;
729 	d->revmap[oldblock] = PAGE_UNDEF;
730 	return ret;
731 }
732 
733 static int mtdswap_gc_eblock(struct mtdswap_dev *d, struct swap_eb *eb)
734 {
735 	unsigned int i, block, eblk_base, newblock;
736 	int ret, errcode;
737 
738 	errcode = 0;
739 	eblk_base = (eb - d->eb_data) * d->pages_per_eblk;
740 
741 	for (i = 0; i < d->pages_per_eblk; i++) {
742 		if (d->spare_eblks < MIN_SPARE_EBLOCKS)
743 			return -ENOSPC;
744 
745 		block = eblk_base + i;
746 		if (d->revmap[block] == PAGE_UNDEF)
747 			continue;
748 
749 		ret = mtdswap_move_block(d, block, &newblock);
750 		if (ret < 0 && !errcode)
751 			errcode = ret;
752 	}
753 
754 	return errcode;
755 }
756 
757 static int __mtdswap_choose_gc_tree(struct mtdswap_dev *d)
758 {
759 	int idx, stopat;
760 
761 	if (TREE_COUNT(d, CLEAN) < LOW_FRAG_GC_THRESHOLD)
762 		stopat = MTDSWAP_LOWFRAG;
763 	else
764 		stopat = MTDSWAP_HIFRAG;
765 
766 	for (idx = MTDSWAP_BITFLIP; idx >= stopat; idx--)
767 		if (d->trees[idx].root.rb_node != NULL)
768 			return idx;
769 
770 	return -1;
771 }
772 
773 static int mtdswap_wlfreq(unsigned int maxdiff)
774 {
775 	unsigned int h, x, y, dist, base;
776 
777 	/*
778 	 * Calculate linear ramp down from f1 to f2 when maxdiff goes from
779 	 * MAX_ERASE_DIFF to MAX_ERASE_DIFF + COLLECT_NONDIRTY_BASE.  Similar
780 	 * to triangle with height f1 - f1 and width COLLECT_NONDIRTY_BASE.
781 	 */
782 
783 	dist = maxdiff - MAX_ERASE_DIFF;
784 	if (dist > COLLECT_NONDIRTY_BASE)
785 		dist = COLLECT_NONDIRTY_BASE;
786 
787 	/*
788 	 * Modelling the slop as right angular triangle with base
789 	 * COLLECT_NONDIRTY_BASE and height freq1 - freq2. The ratio y/x is
790 	 * equal to the ratio h/base.
791 	 */
792 	h = COLLECT_NONDIRTY_FREQ1 - COLLECT_NONDIRTY_FREQ2;
793 	base = COLLECT_NONDIRTY_BASE;
794 
795 	x = dist - base;
796 	y = (x * h + base / 2) / base;
797 
798 	return COLLECT_NONDIRTY_FREQ2 + y;
799 }
800 
801 static int mtdswap_choose_wl_tree(struct mtdswap_dev *d)
802 {
803 	static unsigned int pick_cnt;
804 	unsigned int i, idx = -1, wear, max;
805 	struct rb_root *root;
806 
807 	max = 0;
808 	for (i = 0; i <= MTDSWAP_DIRTY; i++) {
809 		root = &d->trees[i].root;
810 		if (root->rb_node == NULL)
811 			continue;
812 
813 		wear = d->max_erase_count - MTDSWAP_ECNT_MIN(root);
814 		if (wear > max) {
815 			max = wear;
816 			idx = i;
817 		}
818 	}
819 
820 	if (max > MAX_ERASE_DIFF && pick_cnt >= mtdswap_wlfreq(max) - 1) {
821 		pick_cnt = 0;
822 		return idx;
823 	}
824 
825 	pick_cnt++;
826 	return -1;
827 }
828 
829 static int mtdswap_choose_gc_tree(struct mtdswap_dev *d,
830 				unsigned int background)
831 {
832 	int idx;
833 
834 	if (TREE_NONEMPTY(d, FAILING) &&
835 		(background || (TREE_EMPTY(d, CLEAN) && TREE_EMPTY(d, DIRTY))))
836 		return MTDSWAP_FAILING;
837 
838 	idx = mtdswap_choose_wl_tree(d);
839 	if (idx >= MTDSWAP_CLEAN)
840 		return idx;
841 
842 	return __mtdswap_choose_gc_tree(d);
843 }
844 
845 static struct swap_eb *mtdswap_pick_gc_eblk(struct mtdswap_dev *d,
846 					unsigned int background)
847 {
848 	struct rb_root *rp = NULL;
849 	struct swap_eb *eb = NULL;
850 	int idx;
851 
852 	if (background && TREE_COUNT(d, CLEAN) > CLEAN_BLOCK_THRESHOLD &&
853 		TREE_EMPTY(d, DIRTY) && TREE_EMPTY(d, FAILING))
854 		return NULL;
855 
856 	idx = mtdswap_choose_gc_tree(d, background);
857 	if (idx < 0)
858 		return NULL;
859 
860 	rp = &d->trees[idx].root;
861 	eb = rb_entry(rb_first(rp), struct swap_eb, rb);
862 
863 	rb_erase(&eb->rb, rp);
864 	eb->root = NULL;
865 	d->trees[idx].count--;
866 	return eb;
867 }
868 
869 static unsigned int mtdswap_test_patt(unsigned int i)
870 {
871 	return i % 2 ? 0x55555555 : 0xAAAAAAAA;
872 }
873 
874 static unsigned int mtdswap_eblk_passes(struct mtdswap_dev *d,
875 					struct swap_eb *eb)
876 {
877 	struct mtd_info *mtd = d->mtd;
878 	unsigned int test, i, j, patt, mtd_pages;
879 	loff_t base, pos;
880 	unsigned int *p1 = (unsigned int *)d->page_buf;
881 	unsigned char *p2 = (unsigned char *)d->oob_buf;
882 	struct mtd_oob_ops ops;
883 	int ret;
884 
885 	ops.mode = MTD_OPS_AUTO_OOB;
886 	ops.len = mtd->writesize;
887 	ops.ooblen = mtd->oobavail;
888 	ops.ooboffs = 0;
889 	ops.datbuf = d->page_buf;
890 	ops.oobbuf = d->oob_buf;
891 	base = mtdswap_eb_offset(d, eb);
892 	mtd_pages = d->pages_per_eblk * PAGE_SIZE / mtd->writesize;
893 
894 	for (test = 0; test < 2; test++) {
895 		pos = base;
896 		for (i = 0; i < mtd_pages; i++) {
897 			patt = mtdswap_test_patt(test + i);
898 			memset(d->page_buf, patt, mtd->writesize);
899 			memset(d->oob_buf, patt, mtd->oobavail);
900 			ret = mtd_write_oob(mtd, pos, &ops);
901 			if (ret)
902 				goto error;
903 
904 			pos += mtd->writesize;
905 		}
906 
907 		pos = base;
908 		for (i = 0; i < mtd_pages; i++) {
909 			ret = mtd_read_oob(mtd, pos, &ops);
910 			if (ret)
911 				goto error;
912 
913 			patt = mtdswap_test_patt(test + i);
914 			for (j = 0; j < mtd->writesize/sizeof(int); j++)
915 				if (p1[j] != patt)
916 					goto error;
917 
918 			for (j = 0; j < mtd->oobavail; j++)
919 				if (p2[j] != (unsigned char)patt)
920 					goto error;
921 
922 			pos += mtd->writesize;
923 		}
924 
925 		ret = mtdswap_erase_block(d, eb);
926 		if (ret)
927 			goto error;
928 	}
929 
930 	eb->flags &= ~EBLOCK_READERR;
931 	return 1;
932 
933 error:
934 	mtdswap_handle_badblock(d, eb);
935 	return 0;
936 }
937 
938 static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background)
939 {
940 	struct swap_eb *eb;
941 	int ret;
942 
943 	if (d->spare_eblks < MIN_SPARE_EBLOCKS)
944 		return 1;
945 
946 	eb = mtdswap_pick_gc_eblk(d, background);
947 	if (!eb)
948 		return 1;
949 
950 	ret = mtdswap_gc_eblock(d, eb);
951 	if (ret == -ENOSPC)
952 		return 1;
953 
954 	if (eb->flags & EBLOCK_FAILED) {
955 		mtdswap_handle_badblock(d, eb);
956 		return 0;
957 	}
958 
959 	eb->flags &= ~EBLOCK_BITFLIP;
960 	ret = mtdswap_erase_block(d, eb);
961 	if ((eb->flags & EBLOCK_READERR) &&
962 		(ret || !mtdswap_eblk_passes(d, eb)))
963 		return 0;
964 
965 	if (ret == 0)
966 		ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_CLEAN);
967 
968 	if (ret == 0)
969 		mtdswap_rb_add(d, eb, MTDSWAP_CLEAN);
970 	else if (ret != -EIO && !mtd_is_eccerr(ret))
971 		mtdswap_rb_add(d, eb, MTDSWAP_DIRTY);
972 
973 	return 0;
974 }
975 
976 static void mtdswap_background(struct mtd_blktrans_dev *dev)
977 {
978 	struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
979 	int ret;
980 
981 	while (1) {
982 		ret = mtdswap_gc(d, 1);
983 		if (ret || mtd_blktrans_cease_background(dev))
984 			return;
985 	}
986 }
987 
988 static void mtdswap_cleanup(struct mtdswap_dev *d)
989 {
990 	vfree(d->eb_data);
991 	vfree(d->revmap);
992 	vfree(d->page_data);
993 	kfree(d->oob_buf);
994 	kfree(d->page_buf);
995 }
996 
997 static int mtdswap_flush(struct mtd_blktrans_dev *dev)
998 {
999 	struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1000 
1001 	mtd_sync(d->mtd);
1002 	return 0;
1003 }
1004 
1005 static unsigned int mtdswap_badblocks(struct mtd_info *mtd, uint64_t size)
1006 {
1007 	loff_t offset;
1008 	unsigned int badcnt;
1009 
1010 	badcnt = 0;
1011 
1012 	if (mtd_can_have_bb(mtd))
1013 		for (offset = 0; offset < size; offset += mtd->erasesize)
1014 			if (mtd_block_isbad(mtd, offset))
1015 				badcnt++;
1016 
1017 	return badcnt;
1018 }
1019 
1020 static int mtdswap_writesect(struct mtd_blktrans_dev *dev,
1021 			unsigned long page, char *buf)
1022 {
1023 	struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1024 	unsigned int newblock, mapped;
1025 	struct swap_eb *eb;
1026 	int ret;
1027 
1028 	d->sect_write_count++;
1029 
1030 	if (d->spare_eblks < MIN_SPARE_EBLOCKS)
1031 		return -ENOSPC;
1032 
1033 	if (header) {
1034 		/* Ignore writes to the header page */
1035 		if (unlikely(page == 0))
1036 			return 0;
1037 
1038 		page--;
1039 	}
1040 
1041 	mapped = d->page_data[page];
1042 	if (mapped <= BLOCK_MAX) {
1043 		eb = d->eb_data + (mapped / d->pages_per_eblk);
1044 		eb->active_count--;
1045 		mtdswap_store_eb(d, eb);
1046 		d->page_data[page] = BLOCK_UNDEF;
1047 		d->revmap[mapped] = PAGE_UNDEF;
1048 	}
1049 
1050 	ret = mtdswap_write_block(d, buf, page, &newblock, 0);
1051 	d->mtd_write_count++;
1052 
1053 	if (ret < 0)
1054 		return ret;
1055 
1056 	eb = d->eb_data + (newblock / d->pages_per_eblk);
1057 	d->page_data[page] = newblock;
1058 
1059 	return 0;
1060 }
1061 
1062 /* Provide a dummy swap header for the kernel */
1063 static int mtdswap_auto_header(struct mtdswap_dev *d, char *buf)
1064 {
1065 	union swap_header *hd = (union swap_header *)(buf);
1066 
1067 	memset(buf, 0, PAGE_SIZE - 10);
1068 
1069 	hd->info.version = 1;
1070 	hd->info.last_page = d->mbd_dev->size - 1;
1071 	hd->info.nr_badpages = 0;
1072 
1073 	memcpy(buf + PAGE_SIZE - 10, "SWAPSPACE2", 10);
1074 
1075 	return 0;
1076 }
1077 
1078 static int mtdswap_readsect(struct mtd_blktrans_dev *dev,
1079 			unsigned long page, char *buf)
1080 {
1081 	struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1082 	struct mtd_info *mtd = d->mtd;
1083 	unsigned int realblock, retries;
1084 	loff_t readpos;
1085 	struct swap_eb *eb;
1086 	size_t retlen;
1087 	int ret;
1088 
1089 	d->sect_read_count++;
1090 
1091 	if (header) {
1092 		if (unlikely(page == 0))
1093 			return mtdswap_auto_header(d, buf);
1094 
1095 		page--;
1096 	}
1097 
1098 	realblock = d->page_data[page];
1099 	if (realblock > BLOCK_MAX) {
1100 		memset(buf, 0x0, PAGE_SIZE);
1101 		if (realblock == BLOCK_UNDEF)
1102 			return 0;
1103 		else
1104 			return -EIO;
1105 	}
1106 
1107 	eb = d->eb_data + (realblock / d->pages_per_eblk);
1108 	BUG_ON(d->revmap[realblock] == PAGE_UNDEF);
1109 
1110 	readpos = (loff_t)realblock << PAGE_SHIFT;
1111 	retries = 0;
1112 
1113 retry:
1114 	ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, buf);
1115 
1116 	d->mtd_read_count++;
1117 	if (mtd_is_bitflip(ret)) {
1118 		eb->flags |= EBLOCK_BITFLIP;
1119 		mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP);
1120 		ret = 0;
1121 	}
1122 
1123 	if (ret < 0) {
1124 		dev_err(d->dev, "Read error %d\n", ret);
1125 		eb->flags |= EBLOCK_READERR;
1126 		mtdswap_rb_add(d, eb, MTDSWAP_FAILING);
1127 		retries++;
1128 		if (retries < MTDSWAP_IO_RETRIES)
1129 			goto retry;
1130 
1131 		return ret;
1132 	}
1133 
1134 	if (retlen != PAGE_SIZE) {
1135 		dev_err(d->dev, "Short read %zd\n", retlen);
1136 		return -EIO;
1137 	}
1138 
1139 	return 0;
1140 }
1141 
1142 static int mtdswap_discard(struct mtd_blktrans_dev *dev, unsigned long first,
1143 			unsigned nr_pages)
1144 {
1145 	struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1146 	unsigned long page;
1147 	struct swap_eb *eb;
1148 	unsigned int mapped;
1149 
1150 	d->discard_count++;
1151 
1152 	for (page = first; page < first + nr_pages; page++) {
1153 		mapped = d->page_data[page];
1154 		if (mapped <= BLOCK_MAX) {
1155 			eb = d->eb_data + (mapped / d->pages_per_eblk);
1156 			eb->active_count--;
1157 			mtdswap_store_eb(d, eb);
1158 			d->page_data[page] = BLOCK_UNDEF;
1159 			d->revmap[mapped] = PAGE_UNDEF;
1160 			d->discard_page_count++;
1161 		} else if (mapped == BLOCK_ERROR) {
1162 			d->page_data[page] = BLOCK_UNDEF;
1163 			d->discard_page_count++;
1164 		}
1165 	}
1166 
1167 	return 0;
1168 }
1169 
1170 static int mtdswap_show(struct seq_file *s, void *data)
1171 {
1172 	struct mtdswap_dev *d = (struct mtdswap_dev *) s->private;
1173 	unsigned long sum;
1174 	unsigned int count[MTDSWAP_TREE_CNT];
1175 	unsigned int min[MTDSWAP_TREE_CNT];
1176 	unsigned int max[MTDSWAP_TREE_CNT];
1177 	unsigned int i, cw = 0, cwp = 0, cwecount = 0, bb_cnt, mapped, pages;
1178 	uint64_t use_size;
1179 	static const char * const name[] = {
1180 		"clean", "used", "low", "high", "dirty", "bitflip", "failing"
1181 	};
1182 
1183 	mutex_lock(&d->mbd_dev->lock);
1184 
1185 	for (i = 0; i < MTDSWAP_TREE_CNT; i++) {
1186 		struct rb_root *root = &d->trees[i].root;
1187 
1188 		if (root->rb_node) {
1189 			count[i] = d->trees[i].count;
1190 			min[i] = MTDSWAP_ECNT_MIN(root);
1191 			max[i] = MTDSWAP_ECNT_MAX(root);
1192 		} else
1193 			count[i] = 0;
1194 	}
1195 
1196 	if (d->curr_write) {
1197 		cw = 1;
1198 		cwp = d->curr_write_pos;
1199 		cwecount = d->curr_write->erase_count;
1200 	}
1201 
1202 	sum = 0;
1203 	for (i = 0; i < d->eblks; i++)
1204 		sum += d->eb_data[i].erase_count;
1205 
1206 	use_size = (uint64_t)d->eblks * d->mtd->erasesize;
1207 	bb_cnt = mtdswap_badblocks(d->mtd, use_size);
1208 
1209 	mapped = 0;
1210 	pages = d->mbd_dev->size;
1211 	for (i = 0; i < pages; i++)
1212 		if (d->page_data[i] != BLOCK_UNDEF)
1213 			mapped++;
1214 
1215 	mutex_unlock(&d->mbd_dev->lock);
1216 
1217 	for (i = 0; i < MTDSWAP_TREE_CNT; i++) {
1218 		if (!count[i])
1219 			continue;
1220 
1221 		if (min[i] != max[i])
1222 			seq_printf(s, "%s:\t%5d erase blocks, erased min %d, "
1223 				"max %d times\n",
1224 				name[i], count[i], min[i], max[i]);
1225 		else
1226 			seq_printf(s, "%s:\t%5d erase blocks, all erased %d "
1227 				"times\n", name[i], count[i], min[i]);
1228 	}
1229 
1230 	if (bb_cnt)
1231 		seq_printf(s, "bad:\t%5u erase blocks\n", bb_cnt);
1232 
1233 	if (cw)
1234 		seq_printf(s, "current erase block: %u pages used, %u free, "
1235 			"erased %u times\n",
1236 			cwp, d->pages_per_eblk - cwp, cwecount);
1237 
1238 	seq_printf(s, "total erasures: %lu\n", sum);
1239 
1240 	seq_puts(s, "\n");
1241 
1242 	seq_printf(s, "mtdswap_readsect count: %llu\n", d->sect_read_count);
1243 	seq_printf(s, "mtdswap_writesect count: %llu\n", d->sect_write_count);
1244 	seq_printf(s, "mtdswap_discard count: %llu\n", d->discard_count);
1245 	seq_printf(s, "mtd read count: %llu\n", d->mtd_read_count);
1246 	seq_printf(s, "mtd write count: %llu\n", d->mtd_write_count);
1247 	seq_printf(s, "discarded pages count: %llu\n", d->discard_page_count);
1248 
1249 	seq_puts(s, "\n");
1250 	seq_printf(s, "total pages: %u\n", pages);
1251 	seq_printf(s, "pages mapped: %u\n", mapped);
1252 
1253 	return 0;
1254 }
1255 DEFINE_SHOW_ATTRIBUTE(mtdswap);
1256 
1257 static int mtdswap_add_debugfs(struct mtdswap_dev *d)
1258 {
1259 	struct dentry *root = d->mtd->dbg.dfs_dir;
1260 
1261 	if (!IS_ENABLED(CONFIG_DEBUG_FS))
1262 		return 0;
1263 
1264 	if (IS_ERR_OR_NULL(root))
1265 		return -1;
1266 
1267 	debugfs_create_file("mtdswap_stats", S_IRUSR, root, d, &mtdswap_fops);
1268 
1269 	return 0;
1270 }
1271 
1272 static int mtdswap_init(struct mtdswap_dev *d, unsigned int eblocks,
1273 			unsigned int spare_cnt)
1274 {
1275 	struct mtd_info *mtd = d->mbd_dev->mtd;
1276 	unsigned int i, eblk_bytes, pages, blocks;
1277 	int ret = -ENOMEM;
1278 
1279 	d->mtd = mtd;
1280 	d->eblks = eblocks;
1281 	d->spare_eblks = spare_cnt;
1282 	d->pages_per_eblk = mtd->erasesize >> PAGE_SHIFT;
1283 
1284 	pages = d->mbd_dev->size;
1285 	blocks = eblocks * d->pages_per_eblk;
1286 
1287 	for (i = 0; i < MTDSWAP_TREE_CNT; i++)
1288 		d->trees[i].root = RB_ROOT;
1289 
1290 	d->page_data = vmalloc(array_size(pages, sizeof(int)));
1291 	if (!d->page_data)
1292 		goto page_data_fail;
1293 
1294 	d->revmap = vmalloc(array_size(blocks, sizeof(int)));
1295 	if (!d->revmap)
1296 		goto revmap_fail;
1297 
1298 	eblk_bytes = sizeof(struct swap_eb)*d->eblks;
1299 	d->eb_data = vzalloc(eblk_bytes);
1300 	if (!d->eb_data)
1301 		goto eb_data_fail;
1302 
1303 	for (i = 0; i < pages; i++)
1304 		d->page_data[i] = BLOCK_UNDEF;
1305 
1306 	for (i = 0; i < blocks; i++)
1307 		d->revmap[i] = PAGE_UNDEF;
1308 
1309 	d->page_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
1310 	if (!d->page_buf)
1311 		goto page_buf_fail;
1312 
1313 	d->oob_buf = kmalloc_array(2, mtd->oobavail, GFP_KERNEL);
1314 	if (!d->oob_buf)
1315 		goto oob_buf_fail;
1316 
1317 	mtdswap_scan_eblks(d);
1318 
1319 	return 0;
1320 
1321 oob_buf_fail:
1322 	kfree(d->page_buf);
1323 page_buf_fail:
1324 	vfree(d->eb_data);
1325 eb_data_fail:
1326 	vfree(d->revmap);
1327 revmap_fail:
1328 	vfree(d->page_data);
1329 page_data_fail:
1330 	printk(KERN_ERR "%s: init failed (%d)\n", MTDSWAP_PREFIX, ret);
1331 	return ret;
1332 }
1333 
1334 static void mtdswap_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
1335 {
1336 	struct mtdswap_dev *d;
1337 	struct mtd_blktrans_dev *mbd_dev;
1338 	char *parts;
1339 	char *this_opt;
1340 	unsigned long part;
1341 	unsigned int eblocks, eavailable, bad_blocks, spare_cnt;
1342 	uint64_t swap_size, use_size, size_limit;
1343 	int ret;
1344 
1345 	parts = &partitions[0];
1346 	if (!*parts)
1347 		return;
1348 
1349 	while ((this_opt = strsep(&parts, ",")) != NULL) {
1350 		if (kstrtoul(this_opt, 0, &part) < 0)
1351 			return;
1352 
1353 		if (mtd->index == part)
1354 			break;
1355 	}
1356 
1357 	if (mtd->index != part)
1358 		return;
1359 
1360 	if (mtd->erasesize < PAGE_SIZE || mtd->erasesize % PAGE_SIZE) {
1361 		printk(KERN_ERR "%s: Erase size %u not multiple of PAGE_SIZE "
1362 			"%lu\n", MTDSWAP_PREFIX, mtd->erasesize, PAGE_SIZE);
1363 		return;
1364 	}
1365 
1366 	if (PAGE_SIZE % mtd->writesize || mtd->writesize > PAGE_SIZE) {
1367 		printk(KERN_ERR "%s: PAGE_SIZE %lu not multiple of write size"
1368 			" %u\n", MTDSWAP_PREFIX, PAGE_SIZE, mtd->writesize);
1369 		return;
1370 	}
1371 
1372 	if (!mtd->oobsize || mtd->oobavail < MTDSWAP_OOBSIZE) {
1373 		printk(KERN_ERR "%s: Not enough free bytes in OOB, "
1374 			"%d available, %zu needed.\n",
1375 			MTDSWAP_PREFIX, mtd->oobavail, MTDSWAP_OOBSIZE);
1376 		return;
1377 	}
1378 
1379 	if (spare_eblocks > 100)
1380 		spare_eblocks = 100;
1381 
1382 	use_size = mtd->size;
1383 	size_limit = (uint64_t) BLOCK_MAX * PAGE_SIZE;
1384 
1385 	if (mtd->size > size_limit) {
1386 		printk(KERN_WARNING "%s: Device too large. Limiting size to "
1387 			"%llu bytes\n", MTDSWAP_PREFIX, size_limit);
1388 		use_size = size_limit;
1389 	}
1390 
1391 	eblocks = mtd_div_by_eb(use_size, mtd);
1392 	use_size = (uint64_t)eblocks * mtd->erasesize;
1393 	bad_blocks = mtdswap_badblocks(mtd, use_size);
1394 	eavailable = eblocks - bad_blocks;
1395 
1396 	if (eavailable < MIN_ERASE_BLOCKS) {
1397 		printk(KERN_ERR "%s: Not enough erase blocks. %u available, "
1398 			"%d needed\n", MTDSWAP_PREFIX, eavailable,
1399 			MIN_ERASE_BLOCKS);
1400 		return;
1401 	}
1402 
1403 	spare_cnt = div_u64((uint64_t)eavailable * spare_eblocks, 100);
1404 
1405 	if (spare_cnt < MIN_SPARE_EBLOCKS)
1406 		spare_cnt = MIN_SPARE_EBLOCKS;
1407 
1408 	if (spare_cnt > eavailable - 1)
1409 		spare_cnt = eavailable - 1;
1410 
1411 	swap_size = (uint64_t)(eavailable - spare_cnt) * mtd->erasesize +
1412 		(header ? PAGE_SIZE : 0);
1413 
1414 	printk(KERN_INFO "%s: Enabling MTD swap on device %lu, size %llu KB, "
1415 		"%u spare, %u bad blocks\n",
1416 		MTDSWAP_PREFIX, part, swap_size / 1024, spare_cnt, bad_blocks);
1417 
1418 	d = kzalloc(sizeof(struct mtdswap_dev), GFP_KERNEL);
1419 	if (!d)
1420 		return;
1421 
1422 	mbd_dev = kzalloc(sizeof(struct mtd_blktrans_dev), GFP_KERNEL);
1423 	if (!mbd_dev) {
1424 		kfree(d);
1425 		return;
1426 	}
1427 
1428 	d->mbd_dev = mbd_dev;
1429 	mbd_dev->priv = d;
1430 
1431 	mbd_dev->mtd = mtd;
1432 	mbd_dev->devnum = mtd->index;
1433 	mbd_dev->size = swap_size >> PAGE_SHIFT;
1434 	mbd_dev->tr = tr;
1435 
1436 	if (!(mtd->flags & MTD_WRITEABLE))
1437 		mbd_dev->readonly = 1;
1438 
1439 	if (mtdswap_init(d, eblocks, spare_cnt) < 0)
1440 		goto init_failed;
1441 
1442 	if (add_mtd_blktrans_dev(mbd_dev) < 0)
1443 		goto cleanup;
1444 
1445 	d->dev = disk_to_dev(mbd_dev->disk);
1446 
1447 	ret = mtdswap_add_debugfs(d);
1448 	if (ret < 0)
1449 		goto debugfs_failed;
1450 
1451 	return;
1452 
1453 debugfs_failed:
1454 	del_mtd_blktrans_dev(mbd_dev);
1455 
1456 cleanup:
1457 	mtdswap_cleanup(d);
1458 
1459 init_failed:
1460 	kfree(mbd_dev);
1461 	kfree(d);
1462 }
1463 
1464 static void mtdswap_remove_dev(struct mtd_blktrans_dev *dev)
1465 {
1466 	struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1467 
1468 	del_mtd_blktrans_dev(dev);
1469 	mtdswap_cleanup(d);
1470 	kfree(d);
1471 }
1472 
1473 static struct mtd_blktrans_ops mtdswap_ops = {
1474 	.name		= "mtdswap",
1475 	.major		= 0,
1476 	.part_bits	= 0,
1477 	.blksize	= PAGE_SIZE,
1478 	.flush		= mtdswap_flush,
1479 	.readsect	= mtdswap_readsect,
1480 	.writesect	= mtdswap_writesect,
1481 	.discard	= mtdswap_discard,
1482 	.background	= mtdswap_background,
1483 	.add_mtd	= mtdswap_add_mtd,
1484 	.remove_dev	= mtdswap_remove_dev,
1485 	.owner		= THIS_MODULE,
1486 };
1487 
1488 static int __init mtdswap_modinit(void)
1489 {
1490 	return register_mtd_blktrans(&mtdswap_ops);
1491 }
1492 
1493 static void __exit mtdswap_modexit(void)
1494 {
1495 	deregister_mtd_blktrans(&mtdswap_ops);
1496 }
1497 
1498 module_init(mtdswap_modinit);
1499 module_exit(mtdswap_modexit);
1500 
1501 
1502 MODULE_LICENSE("GPL");
1503 MODULE_AUTHOR("Jarkko Lavinen <jarkko.lavinen@nokia.com>");
1504 MODULE_DESCRIPTION("Block device access to an MTD suitable for using as "
1505 		"swap space");
1506