xref: /openbmc/linux/drivers/block/zram/zram_drv.c (revision 36926a7d)
1 /*
2  * Compressed RAM block device
3  *
4  * Copyright (C) 2008, 2009, 2010  Nitin Gupta
5  *               2012, 2013 Minchan Kim
6  *
7  * This code is released using a dual license strategy: BSD/GPL
8  * You can choose the licence that better fits your requirements.
9  *
10  * Released under the terms of 3-clause BSD License
11  * Released under the terms of GNU General Public License Version 2.0
12  *
13  */
14 
15 #define KMSG_COMPONENT "zram"
16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17 
18 #include <linux/module.h>
19 #include <linux/kernel.h>
20 #include <linux/bio.h>
21 #include <linux/bitops.h>
22 #include <linux/blkdev.h>
23 #include <linux/buffer_head.h>
24 #include <linux/device.h>
25 #include <linux/highmem.h>
26 #include <linux/slab.h>
27 #include <linux/backing-dev.h>
28 #include <linux/string.h>
29 #include <linux/vmalloc.h>
30 #include <linux/err.h>
31 #include <linux/idr.h>
32 #include <linux/sysfs.h>
33 #include <linux/debugfs.h>
34 #include <linux/cpuhotplug.h>
35 #include <linux/part_stat.h>
36 
37 #include "zram_drv.h"
38 
39 static DEFINE_IDR(zram_index_idr);
40 /* idr index must be protected */
41 static DEFINE_MUTEX(zram_index_mutex);
42 
43 static int zram_major;
44 static const char *default_compressor = CONFIG_ZRAM_DEF_COMP;
45 
46 /* Module params (documentation at end) */
47 static unsigned int num_devices = 1;
48 /*
49  * Pages that compress to sizes equals or greater than this are stored
50  * uncompressed in memory.
51  */
52 static size_t huge_class_size;
53 
54 static const struct block_device_operations zram_devops;
55 #ifdef CONFIG_ZRAM_WRITEBACK
56 static const struct block_device_operations zram_wb_devops;
57 #endif
58 
59 static void zram_free_page(struct zram *zram, size_t index);
60 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
61 				u32 index, int offset, struct bio *bio);
62 
63 
64 static int zram_slot_trylock(struct zram *zram, u32 index)
65 {
66 	return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags);
67 }
68 
69 static void zram_slot_lock(struct zram *zram, u32 index)
70 {
71 	bit_spin_lock(ZRAM_LOCK, &zram->table[index].flags);
72 }
73 
74 static void zram_slot_unlock(struct zram *zram, u32 index)
75 {
76 	bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
77 }
78 
79 static inline bool init_done(struct zram *zram)
80 {
81 	return zram->disksize;
82 }
83 
84 static inline struct zram *dev_to_zram(struct device *dev)
85 {
86 	return (struct zram *)dev_to_disk(dev)->private_data;
87 }
88 
89 static unsigned long zram_get_handle(struct zram *zram, u32 index)
90 {
91 	return zram->table[index].handle;
92 }
93 
94 static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
95 {
96 	zram->table[index].handle = handle;
97 }
98 
99 /* flag operations require table entry bit_spin_lock() being held */
100 static bool zram_test_flag(struct zram *zram, u32 index,
101 			enum zram_pageflags flag)
102 {
103 	return zram->table[index].flags & BIT(flag);
104 }
105 
106 static void zram_set_flag(struct zram *zram, u32 index,
107 			enum zram_pageflags flag)
108 {
109 	zram->table[index].flags |= BIT(flag);
110 }
111 
112 static void zram_clear_flag(struct zram *zram, u32 index,
113 			enum zram_pageflags flag)
114 {
115 	zram->table[index].flags &= ~BIT(flag);
116 }
117 
118 static inline void zram_set_element(struct zram *zram, u32 index,
119 			unsigned long element)
120 {
121 	zram->table[index].element = element;
122 }
123 
124 static unsigned long zram_get_element(struct zram *zram, u32 index)
125 {
126 	return zram->table[index].element;
127 }
128 
129 static size_t zram_get_obj_size(struct zram *zram, u32 index)
130 {
131 	return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1);
132 }
133 
134 static void zram_set_obj_size(struct zram *zram,
135 					u32 index, size_t size)
136 {
137 	unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT;
138 
139 	zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size;
140 }
141 
142 static inline bool zram_allocated(struct zram *zram, u32 index)
143 {
144 	return zram_get_obj_size(zram, index) ||
145 			zram_test_flag(zram, index, ZRAM_SAME) ||
146 			zram_test_flag(zram, index, ZRAM_WB);
147 }
148 
149 #if PAGE_SIZE != 4096
150 static inline bool is_partial_io(struct bio_vec *bvec)
151 {
152 	return bvec->bv_len != PAGE_SIZE;
153 }
154 #else
155 static inline bool is_partial_io(struct bio_vec *bvec)
156 {
157 	return false;
158 }
159 #endif
160 
161 /*
162  * Check if request is within bounds and aligned on zram logical blocks.
163  */
164 static inline bool valid_io_request(struct zram *zram,
165 		sector_t start, unsigned int size)
166 {
167 	u64 end, bound;
168 
169 	/* unaligned request */
170 	if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
171 		return false;
172 	if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
173 		return false;
174 
175 	end = start + (size >> SECTOR_SHIFT);
176 	bound = zram->disksize >> SECTOR_SHIFT;
177 	/* out of range range */
178 	if (unlikely(start >= bound || end > bound || start > end))
179 		return false;
180 
181 	/* I/O request is valid */
182 	return true;
183 }
184 
185 static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
186 {
187 	*index  += (*offset + bvec->bv_len) / PAGE_SIZE;
188 	*offset = (*offset + bvec->bv_len) % PAGE_SIZE;
189 }
190 
191 static inline void update_used_max(struct zram *zram,
192 					const unsigned long pages)
193 {
194 	unsigned long old_max, cur_max;
195 
196 	old_max = atomic_long_read(&zram->stats.max_used_pages);
197 
198 	do {
199 		cur_max = old_max;
200 		if (pages > cur_max)
201 			old_max = atomic_long_cmpxchg(
202 				&zram->stats.max_used_pages, cur_max, pages);
203 	} while (old_max != cur_max);
204 }
205 
206 static inline void zram_fill_page(void *ptr, unsigned long len,
207 					unsigned long value)
208 {
209 	WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
210 	memset_l(ptr, value, len / sizeof(unsigned long));
211 }
212 
213 static bool page_same_filled(void *ptr, unsigned long *element)
214 {
215 	unsigned long *page;
216 	unsigned long val;
217 	unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1;
218 
219 	page = (unsigned long *)ptr;
220 	val = page[0];
221 
222 	if (val != page[last_pos])
223 		return false;
224 
225 	for (pos = 1; pos < last_pos; pos++) {
226 		if (val != page[pos])
227 			return false;
228 	}
229 
230 	*element = val;
231 
232 	return true;
233 }
234 
235 static ssize_t initstate_show(struct device *dev,
236 		struct device_attribute *attr, char *buf)
237 {
238 	u32 val;
239 	struct zram *zram = dev_to_zram(dev);
240 
241 	down_read(&zram->init_lock);
242 	val = init_done(zram);
243 	up_read(&zram->init_lock);
244 
245 	return scnprintf(buf, PAGE_SIZE, "%u\n", val);
246 }
247 
248 static ssize_t disksize_show(struct device *dev,
249 		struct device_attribute *attr, char *buf)
250 {
251 	struct zram *zram = dev_to_zram(dev);
252 
253 	return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
254 }
255 
256 static ssize_t mem_limit_store(struct device *dev,
257 		struct device_attribute *attr, const char *buf, size_t len)
258 {
259 	u64 limit;
260 	char *tmp;
261 	struct zram *zram = dev_to_zram(dev);
262 
263 	limit = memparse(buf, &tmp);
264 	if (buf == tmp) /* no chars parsed, invalid input */
265 		return -EINVAL;
266 
267 	down_write(&zram->init_lock);
268 	zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
269 	up_write(&zram->init_lock);
270 
271 	return len;
272 }
273 
274 static ssize_t mem_used_max_store(struct device *dev,
275 		struct device_attribute *attr, const char *buf, size_t len)
276 {
277 	int err;
278 	unsigned long val;
279 	struct zram *zram = dev_to_zram(dev);
280 
281 	err = kstrtoul(buf, 10, &val);
282 	if (err || val != 0)
283 		return -EINVAL;
284 
285 	down_read(&zram->init_lock);
286 	if (init_done(zram)) {
287 		atomic_long_set(&zram->stats.max_used_pages,
288 				zs_get_total_pages(zram->mem_pool));
289 	}
290 	up_read(&zram->init_lock);
291 
292 	return len;
293 }
294 
295 /*
296  * Mark all pages which are older than or equal to cutoff as IDLE.
297  * Callers should hold the zram init lock in read mode
298  */
299 static void mark_idle(struct zram *zram, ktime_t cutoff)
300 {
301 	int is_idle = 1;
302 	unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
303 	int index;
304 
305 	for (index = 0; index < nr_pages; index++) {
306 		/*
307 		 * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
308 		 * See the comment in writeback_store.
309 		 */
310 		zram_slot_lock(zram, index);
311 		if (zram_allocated(zram, index) &&
312 				!zram_test_flag(zram, index, ZRAM_UNDER_WB)) {
313 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
314 			is_idle = !cutoff || ktime_after(cutoff, zram->table[index].ac_time);
315 #endif
316 			if (is_idle)
317 				zram_set_flag(zram, index, ZRAM_IDLE);
318 		}
319 		zram_slot_unlock(zram, index);
320 	}
321 }
322 
323 static ssize_t idle_store(struct device *dev,
324 		struct device_attribute *attr, const char *buf, size_t len)
325 {
326 	struct zram *zram = dev_to_zram(dev);
327 	ktime_t cutoff_time = 0;
328 	ssize_t rv = -EINVAL;
329 
330 	if (!sysfs_streq(buf, "all")) {
331 		/*
332 		 * If it did not parse as 'all' try to treat it as an integer
333 		 * when we have memory tracking enabled.
334 		 */
335 		u64 age_sec;
336 
337 		if (IS_ENABLED(CONFIG_ZRAM_MEMORY_TRACKING) && !kstrtoull(buf, 0, &age_sec))
338 			cutoff_time = ktime_sub(ktime_get_boottime(),
339 					ns_to_ktime(age_sec * NSEC_PER_SEC));
340 		else
341 			goto out;
342 	}
343 
344 	down_read(&zram->init_lock);
345 	if (!init_done(zram))
346 		goto out_unlock;
347 
348 	/*
349 	 * A cutoff_time of 0 marks everything as idle, this is the
350 	 * "all" behavior.
351 	 */
352 	mark_idle(zram, cutoff_time);
353 	rv = len;
354 
355 out_unlock:
356 	up_read(&zram->init_lock);
357 out:
358 	return rv;
359 }
360 
361 #ifdef CONFIG_ZRAM_WRITEBACK
362 static ssize_t writeback_limit_enable_store(struct device *dev,
363 		struct device_attribute *attr, const char *buf, size_t len)
364 {
365 	struct zram *zram = dev_to_zram(dev);
366 	u64 val;
367 	ssize_t ret = -EINVAL;
368 
369 	if (kstrtoull(buf, 10, &val))
370 		return ret;
371 
372 	down_read(&zram->init_lock);
373 	spin_lock(&zram->wb_limit_lock);
374 	zram->wb_limit_enable = val;
375 	spin_unlock(&zram->wb_limit_lock);
376 	up_read(&zram->init_lock);
377 	ret = len;
378 
379 	return ret;
380 }
381 
382 static ssize_t writeback_limit_enable_show(struct device *dev,
383 		struct device_attribute *attr, char *buf)
384 {
385 	bool val;
386 	struct zram *zram = dev_to_zram(dev);
387 
388 	down_read(&zram->init_lock);
389 	spin_lock(&zram->wb_limit_lock);
390 	val = zram->wb_limit_enable;
391 	spin_unlock(&zram->wb_limit_lock);
392 	up_read(&zram->init_lock);
393 
394 	return scnprintf(buf, PAGE_SIZE, "%d\n", val);
395 }
396 
397 static ssize_t writeback_limit_store(struct device *dev,
398 		struct device_attribute *attr, const char *buf, size_t len)
399 {
400 	struct zram *zram = dev_to_zram(dev);
401 	u64 val;
402 	ssize_t ret = -EINVAL;
403 
404 	if (kstrtoull(buf, 10, &val))
405 		return ret;
406 
407 	down_read(&zram->init_lock);
408 	spin_lock(&zram->wb_limit_lock);
409 	zram->bd_wb_limit = val;
410 	spin_unlock(&zram->wb_limit_lock);
411 	up_read(&zram->init_lock);
412 	ret = len;
413 
414 	return ret;
415 }
416 
417 static ssize_t writeback_limit_show(struct device *dev,
418 		struct device_attribute *attr, char *buf)
419 {
420 	u64 val;
421 	struct zram *zram = dev_to_zram(dev);
422 
423 	down_read(&zram->init_lock);
424 	spin_lock(&zram->wb_limit_lock);
425 	val = zram->bd_wb_limit;
426 	spin_unlock(&zram->wb_limit_lock);
427 	up_read(&zram->init_lock);
428 
429 	return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
430 }
431 
432 static void reset_bdev(struct zram *zram)
433 {
434 	struct block_device *bdev;
435 
436 	if (!zram->backing_dev)
437 		return;
438 
439 	bdev = zram->bdev;
440 	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
441 	/* hope filp_close flush all of IO */
442 	filp_close(zram->backing_dev, NULL);
443 	zram->backing_dev = NULL;
444 	zram->bdev = NULL;
445 	zram->disk->fops = &zram_devops;
446 	kvfree(zram->bitmap);
447 	zram->bitmap = NULL;
448 }
449 
450 static ssize_t backing_dev_show(struct device *dev,
451 		struct device_attribute *attr, char *buf)
452 {
453 	struct file *file;
454 	struct zram *zram = dev_to_zram(dev);
455 	char *p;
456 	ssize_t ret;
457 
458 	down_read(&zram->init_lock);
459 	file = zram->backing_dev;
460 	if (!file) {
461 		memcpy(buf, "none\n", 5);
462 		up_read(&zram->init_lock);
463 		return 5;
464 	}
465 
466 	p = file_path(file, buf, PAGE_SIZE - 1);
467 	if (IS_ERR(p)) {
468 		ret = PTR_ERR(p);
469 		goto out;
470 	}
471 
472 	ret = strlen(p);
473 	memmove(buf, p, ret);
474 	buf[ret++] = '\n';
475 out:
476 	up_read(&zram->init_lock);
477 	return ret;
478 }
479 
480 static ssize_t backing_dev_store(struct device *dev,
481 		struct device_attribute *attr, const char *buf, size_t len)
482 {
483 	char *file_name;
484 	size_t sz;
485 	struct file *backing_dev = NULL;
486 	struct inode *inode;
487 	struct address_space *mapping;
488 	unsigned int bitmap_sz;
489 	unsigned long nr_pages, *bitmap = NULL;
490 	struct block_device *bdev = NULL;
491 	int err;
492 	struct zram *zram = dev_to_zram(dev);
493 
494 	file_name = kmalloc(PATH_MAX, GFP_KERNEL);
495 	if (!file_name)
496 		return -ENOMEM;
497 
498 	down_write(&zram->init_lock);
499 	if (init_done(zram)) {
500 		pr_info("Can't setup backing device for initialized device\n");
501 		err = -EBUSY;
502 		goto out;
503 	}
504 
505 	strscpy(file_name, buf, PATH_MAX);
506 	/* ignore trailing newline */
507 	sz = strlen(file_name);
508 	if (sz > 0 && file_name[sz - 1] == '\n')
509 		file_name[sz - 1] = 0x00;
510 
511 	backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0);
512 	if (IS_ERR(backing_dev)) {
513 		err = PTR_ERR(backing_dev);
514 		backing_dev = NULL;
515 		goto out;
516 	}
517 
518 	mapping = backing_dev->f_mapping;
519 	inode = mapping->host;
520 
521 	/* Support only block device in this moment */
522 	if (!S_ISBLK(inode->i_mode)) {
523 		err = -ENOTBLK;
524 		goto out;
525 	}
526 
527 	bdev = blkdev_get_by_dev(inode->i_rdev,
528 			FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram);
529 	if (IS_ERR(bdev)) {
530 		err = PTR_ERR(bdev);
531 		bdev = NULL;
532 		goto out;
533 	}
534 
535 	nr_pages = i_size_read(inode) >> PAGE_SHIFT;
536 	bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
537 	bitmap = kvzalloc(bitmap_sz, GFP_KERNEL);
538 	if (!bitmap) {
539 		err = -ENOMEM;
540 		goto out;
541 	}
542 
543 	reset_bdev(zram);
544 
545 	zram->bdev = bdev;
546 	zram->backing_dev = backing_dev;
547 	zram->bitmap = bitmap;
548 	zram->nr_pages = nr_pages;
549 	/*
550 	 * With writeback feature, zram does asynchronous IO so it's no longer
551 	 * synchronous device so let's remove synchronous io flag. Othewise,
552 	 * upper layer(e.g., swap) could wait IO completion rather than
553 	 * (submit and return), which will cause system sluggish.
554 	 * Furthermore, when the IO function returns(e.g., swap_readpage),
555 	 * upper layer expects IO was done so it could deallocate the page
556 	 * freely but in fact, IO is going on so finally could cause
557 	 * use-after-free when the IO is really done.
558 	 */
559 	zram->disk->fops = &zram_wb_devops;
560 	up_write(&zram->init_lock);
561 
562 	pr_info("setup backing device %s\n", file_name);
563 	kfree(file_name);
564 
565 	return len;
566 out:
567 	kvfree(bitmap);
568 
569 	if (bdev)
570 		blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
571 
572 	if (backing_dev)
573 		filp_close(backing_dev, NULL);
574 
575 	up_write(&zram->init_lock);
576 
577 	kfree(file_name);
578 
579 	return err;
580 }
581 
582 static unsigned long alloc_block_bdev(struct zram *zram)
583 {
584 	unsigned long blk_idx = 1;
585 retry:
586 	/* skip 0 bit to confuse zram.handle = 0 */
587 	blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx);
588 	if (blk_idx == zram->nr_pages)
589 		return 0;
590 
591 	if (test_and_set_bit(blk_idx, zram->bitmap))
592 		goto retry;
593 
594 	atomic64_inc(&zram->stats.bd_count);
595 	return blk_idx;
596 }
597 
598 static void free_block_bdev(struct zram *zram, unsigned long blk_idx)
599 {
600 	int was_set;
601 
602 	was_set = test_and_clear_bit(blk_idx, zram->bitmap);
603 	WARN_ON_ONCE(!was_set);
604 	atomic64_dec(&zram->stats.bd_count);
605 }
606 
607 static void zram_page_end_io(struct bio *bio)
608 {
609 	struct page *page = bio_first_page_all(bio);
610 
611 	page_endio(page, op_is_write(bio_op(bio)),
612 			blk_status_to_errno(bio->bi_status));
613 	bio_put(bio);
614 }
615 
616 /*
617  * Returns 1 if the submission is successful.
618  */
619 static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
620 			unsigned long entry, struct bio *parent)
621 {
622 	struct bio *bio;
623 
624 	bio = bio_alloc(zram->bdev, 1, parent ? parent->bi_opf : REQ_OP_READ,
625 			GFP_NOIO);
626 	if (!bio)
627 		return -ENOMEM;
628 
629 	bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
630 	if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) {
631 		bio_put(bio);
632 		return -EIO;
633 	}
634 
635 	if (!parent)
636 		bio->bi_end_io = zram_page_end_io;
637 	else
638 		bio_chain(bio, parent);
639 
640 	submit_bio(bio);
641 	return 1;
642 }
643 
644 #define PAGE_WB_SIG "page_index="
645 
646 #define PAGE_WRITEBACK 0
647 #define HUGE_WRITEBACK (1<<0)
648 #define IDLE_WRITEBACK (1<<1)
649 
650 
651 static ssize_t writeback_store(struct device *dev,
652 		struct device_attribute *attr, const char *buf, size_t len)
653 {
654 	struct zram *zram = dev_to_zram(dev);
655 	unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
656 	unsigned long index = 0;
657 	struct bio bio;
658 	struct bio_vec bio_vec;
659 	struct page *page;
660 	ssize_t ret = len;
661 	int mode, err;
662 	unsigned long blk_idx = 0;
663 
664 	if (sysfs_streq(buf, "idle"))
665 		mode = IDLE_WRITEBACK;
666 	else if (sysfs_streq(buf, "huge"))
667 		mode = HUGE_WRITEBACK;
668 	else if (sysfs_streq(buf, "huge_idle"))
669 		mode = IDLE_WRITEBACK | HUGE_WRITEBACK;
670 	else {
671 		if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1))
672 			return -EINVAL;
673 
674 		if (kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index) ||
675 				index >= nr_pages)
676 			return -EINVAL;
677 
678 		nr_pages = 1;
679 		mode = PAGE_WRITEBACK;
680 	}
681 
682 	down_read(&zram->init_lock);
683 	if (!init_done(zram)) {
684 		ret = -EINVAL;
685 		goto release_init_lock;
686 	}
687 
688 	if (!zram->backing_dev) {
689 		ret = -ENODEV;
690 		goto release_init_lock;
691 	}
692 
693 	page = alloc_page(GFP_KERNEL);
694 	if (!page) {
695 		ret = -ENOMEM;
696 		goto release_init_lock;
697 	}
698 
699 	for (; nr_pages != 0; index++, nr_pages--) {
700 		struct bio_vec bvec;
701 
702 		bvec.bv_page = page;
703 		bvec.bv_len = PAGE_SIZE;
704 		bvec.bv_offset = 0;
705 
706 		spin_lock(&zram->wb_limit_lock);
707 		if (zram->wb_limit_enable && !zram->bd_wb_limit) {
708 			spin_unlock(&zram->wb_limit_lock);
709 			ret = -EIO;
710 			break;
711 		}
712 		spin_unlock(&zram->wb_limit_lock);
713 
714 		if (!blk_idx) {
715 			blk_idx = alloc_block_bdev(zram);
716 			if (!blk_idx) {
717 				ret = -ENOSPC;
718 				break;
719 			}
720 		}
721 
722 		zram_slot_lock(zram, index);
723 		if (!zram_allocated(zram, index))
724 			goto next;
725 
726 		if (zram_test_flag(zram, index, ZRAM_WB) ||
727 				zram_test_flag(zram, index, ZRAM_SAME) ||
728 				zram_test_flag(zram, index, ZRAM_UNDER_WB))
729 			goto next;
730 
731 		if (mode & IDLE_WRITEBACK &&
732 			  !zram_test_flag(zram, index, ZRAM_IDLE))
733 			goto next;
734 		if (mode & HUGE_WRITEBACK &&
735 			  !zram_test_flag(zram, index, ZRAM_HUGE))
736 			goto next;
737 		/*
738 		 * Clearing ZRAM_UNDER_WB is duty of caller.
739 		 * IOW, zram_free_page never clear it.
740 		 */
741 		zram_set_flag(zram, index, ZRAM_UNDER_WB);
742 		/* Need for hugepage writeback racing */
743 		zram_set_flag(zram, index, ZRAM_IDLE);
744 		zram_slot_unlock(zram, index);
745 		if (zram_bvec_read(zram, &bvec, index, 0, NULL)) {
746 			zram_slot_lock(zram, index);
747 			zram_clear_flag(zram, index, ZRAM_UNDER_WB);
748 			zram_clear_flag(zram, index, ZRAM_IDLE);
749 			zram_slot_unlock(zram, index);
750 			continue;
751 		}
752 
753 		bio_init(&bio, zram->bdev, &bio_vec, 1,
754 			 REQ_OP_WRITE | REQ_SYNC);
755 		bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9);
756 
757 		bio_add_page(&bio, bvec.bv_page, bvec.bv_len,
758 				bvec.bv_offset);
759 		/*
760 		 * XXX: A single page IO would be inefficient for write
761 		 * but it would be not bad as starter.
762 		 */
763 		err = submit_bio_wait(&bio);
764 		if (err) {
765 			zram_slot_lock(zram, index);
766 			zram_clear_flag(zram, index, ZRAM_UNDER_WB);
767 			zram_clear_flag(zram, index, ZRAM_IDLE);
768 			zram_slot_unlock(zram, index);
769 			/*
770 			 * Return last IO error unless every IO were
771 			 * not suceeded.
772 			 */
773 			ret = err;
774 			continue;
775 		}
776 
777 		atomic64_inc(&zram->stats.bd_writes);
778 		/*
779 		 * We released zram_slot_lock so need to check if the slot was
780 		 * changed. If there is freeing for the slot, we can catch it
781 		 * easily by zram_allocated.
782 		 * A subtle case is the slot is freed/reallocated/marked as
783 		 * ZRAM_IDLE again. To close the race, idle_store doesn't
784 		 * mark ZRAM_IDLE once it found the slot was ZRAM_UNDER_WB.
785 		 * Thus, we could close the race by checking ZRAM_IDLE bit.
786 		 */
787 		zram_slot_lock(zram, index);
788 		if (!zram_allocated(zram, index) ||
789 			  !zram_test_flag(zram, index, ZRAM_IDLE)) {
790 			zram_clear_flag(zram, index, ZRAM_UNDER_WB);
791 			zram_clear_flag(zram, index, ZRAM_IDLE);
792 			goto next;
793 		}
794 
795 		zram_free_page(zram, index);
796 		zram_clear_flag(zram, index, ZRAM_UNDER_WB);
797 		zram_set_flag(zram, index, ZRAM_WB);
798 		zram_set_element(zram, index, blk_idx);
799 		blk_idx = 0;
800 		atomic64_inc(&zram->stats.pages_stored);
801 		spin_lock(&zram->wb_limit_lock);
802 		if (zram->wb_limit_enable && zram->bd_wb_limit > 0)
803 			zram->bd_wb_limit -=  1UL << (PAGE_SHIFT - 12);
804 		spin_unlock(&zram->wb_limit_lock);
805 next:
806 		zram_slot_unlock(zram, index);
807 	}
808 
809 	if (blk_idx)
810 		free_block_bdev(zram, blk_idx);
811 	__free_page(page);
812 release_init_lock:
813 	up_read(&zram->init_lock);
814 
815 	return ret;
816 }
817 
818 struct zram_work {
819 	struct work_struct work;
820 	struct zram *zram;
821 	unsigned long entry;
822 	struct bio *bio;
823 	struct bio_vec bvec;
824 };
825 
826 #if PAGE_SIZE != 4096
827 static void zram_sync_read(struct work_struct *work)
828 {
829 	struct zram_work *zw = container_of(work, struct zram_work, work);
830 	struct zram *zram = zw->zram;
831 	unsigned long entry = zw->entry;
832 	struct bio *bio = zw->bio;
833 
834 	read_from_bdev_async(zram, &zw->bvec, entry, bio);
835 }
836 
837 /*
838  * Block layer want one ->submit_bio to be active at a time, so if we use
839  * chained IO with parent IO in same context, it's a deadlock. To avoid that,
840  * use a worker thread context.
841  */
842 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
843 				unsigned long entry, struct bio *bio)
844 {
845 	struct zram_work work;
846 
847 	work.bvec = *bvec;
848 	work.zram = zram;
849 	work.entry = entry;
850 	work.bio = bio;
851 
852 	INIT_WORK_ONSTACK(&work.work, zram_sync_read);
853 	queue_work(system_unbound_wq, &work.work);
854 	flush_work(&work.work);
855 	destroy_work_on_stack(&work.work);
856 
857 	return 1;
858 }
859 #else
860 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
861 				unsigned long entry, struct bio *bio)
862 {
863 	WARN_ON(1);
864 	return -EIO;
865 }
866 #endif
867 
868 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
869 			unsigned long entry, struct bio *parent, bool sync)
870 {
871 	atomic64_inc(&zram->stats.bd_reads);
872 	if (sync)
873 		return read_from_bdev_sync(zram, bvec, entry, parent);
874 	else
875 		return read_from_bdev_async(zram, bvec, entry, parent);
876 }
877 #else
878 static inline void reset_bdev(struct zram *zram) {};
879 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
880 			unsigned long entry, struct bio *parent, bool sync)
881 {
882 	return -EIO;
883 }
884 
885 static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {};
886 #endif
887 
888 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
889 
890 static struct dentry *zram_debugfs_root;
891 
892 static void zram_debugfs_create(void)
893 {
894 	zram_debugfs_root = debugfs_create_dir("zram", NULL);
895 }
896 
897 static void zram_debugfs_destroy(void)
898 {
899 	debugfs_remove_recursive(zram_debugfs_root);
900 }
901 
902 static void zram_accessed(struct zram *zram, u32 index)
903 {
904 	zram_clear_flag(zram, index, ZRAM_IDLE);
905 	zram->table[index].ac_time = ktime_get_boottime();
906 }
907 
908 static ssize_t read_block_state(struct file *file, char __user *buf,
909 				size_t count, loff_t *ppos)
910 {
911 	char *kbuf;
912 	ssize_t index, written = 0;
913 	struct zram *zram = file->private_data;
914 	unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
915 	struct timespec64 ts;
916 
917 	kbuf = kvmalloc(count, GFP_KERNEL);
918 	if (!kbuf)
919 		return -ENOMEM;
920 
921 	down_read(&zram->init_lock);
922 	if (!init_done(zram)) {
923 		up_read(&zram->init_lock);
924 		kvfree(kbuf);
925 		return -EINVAL;
926 	}
927 
928 	for (index = *ppos; index < nr_pages; index++) {
929 		int copied;
930 
931 		zram_slot_lock(zram, index);
932 		if (!zram_allocated(zram, index))
933 			goto next;
934 
935 		ts = ktime_to_timespec64(zram->table[index].ac_time);
936 		copied = snprintf(kbuf + written, count,
937 			"%12zd %12lld.%06lu %c%c%c%c\n",
938 			index, (s64)ts.tv_sec,
939 			ts.tv_nsec / NSEC_PER_USEC,
940 			zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
941 			zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
942 			zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
943 			zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.');
944 
945 		if (count <= copied) {
946 			zram_slot_unlock(zram, index);
947 			break;
948 		}
949 		written += copied;
950 		count -= copied;
951 next:
952 		zram_slot_unlock(zram, index);
953 		*ppos += 1;
954 	}
955 
956 	up_read(&zram->init_lock);
957 	if (copy_to_user(buf, kbuf, written))
958 		written = -EFAULT;
959 	kvfree(kbuf);
960 
961 	return written;
962 }
963 
964 static const struct file_operations proc_zram_block_state_op = {
965 	.open = simple_open,
966 	.read = read_block_state,
967 	.llseek = default_llseek,
968 };
969 
970 static void zram_debugfs_register(struct zram *zram)
971 {
972 	if (!zram_debugfs_root)
973 		return;
974 
975 	zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name,
976 						zram_debugfs_root);
977 	debugfs_create_file("block_state", 0400, zram->debugfs_dir,
978 				zram, &proc_zram_block_state_op);
979 }
980 
981 static void zram_debugfs_unregister(struct zram *zram)
982 {
983 	debugfs_remove_recursive(zram->debugfs_dir);
984 }
985 #else
986 static void zram_debugfs_create(void) {};
987 static void zram_debugfs_destroy(void) {};
988 static void zram_accessed(struct zram *zram, u32 index)
989 {
990 	zram_clear_flag(zram, index, ZRAM_IDLE);
991 };
992 static void zram_debugfs_register(struct zram *zram) {};
993 static void zram_debugfs_unregister(struct zram *zram) {};
994 #endif
995 
996 /*
997  * We switched to per-cpu streams and this attr is not needed anymore.
998  * However, we will keep it around for some time, because:
999  * a) we may revert per-cpu streams in the future
1000  * b) it's visible to user space and we need to follow our 2 years
1001  *    retirement rule; but we already have a number of 'soon to be
1002  *    altered' attrs, so max_comp_streams need to wait for the next
1003  *    layoff cycle.
1004  */
1005 static ssize_t max_comp_streams_show(struct device *dev,
1006 		struct device_attribute *attr, char *buf)
1007 {
1008 	return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus());
1009 }
1010 
1011 static ssize_t max_comp_streams_store(struct device *dev,
1012 		struct device_attribute *attr, const char *buf, size_t len)
1013 {
1014 	return len;
1015 }
1016 
1017 static ssize_t comp_algorithm_show(struct device *dev,
1018 		struct device_attribute *attr, char *buf)
1019 {
1020 	size_t sz;
1021 	struct zram *zram = dev_to_zram(dev);
1022 
1023 	down_read(&zram->init_lock);
1024 	sz = zcomp_available_show(zram->compressor, buf);
1025 	up_read(&zram->init_lock);
1026 
1027 	return sz;
1028 }
1029 
1030 static ssize_t comp_algorithm_store(struct device *dev,
1031 		struct device_attribute *attr, const char *buf, size_t len)
1032 {
1033 	struct zram *zram = dev_to_zram(dev);
1034 	char compressor[ARRAY_SIZE(zram->compressor)];
1035 	size_t sz;
1036 
1037 	strscpy(compressor, buf, sizeof(compressor));
1038 	/* ignore trailing newline */
1039 	sz = strlen(compressor);
1040 	if (sz > 0 && compressor[sz - 1] == '\n')
1041 		compressor[sz - 1] = 0x00;
1042 
1043 	if (!zcomp_available_algorithm(compressor))
1044 		return -EINVAL;
1045 
1046 	down_write(&zram->init_lock);
1047 	if (init_done(zram)) {
1048 		up_write(&zram->init_lock);
1049 		pr_info("Can't change algorithm for initialized device\n");
1050 		return -EBUSY;
1051 	}
1052 
1053 	strcpy(zram->compressor, compressor);
1054 	up_write(&zram->init_lock);
1055 	return len;
1056 }
1057 
1058 static ssize_t compact_store(struct device *dev,
1059 		struct device_attribute *attr, const char *buf, size_t len)
1060 {
1061 	struct zram *zram = dev_to_zram(dev);
1062 
1063 	down_read(&zram->init_lock);
1064 	if (!init_done(zram)) {
1065 		up_read(&zram->init_lock);
1066 		return -EINVAL;
1067 	}
1068 
1069 	zs_compact(zram->mem_pool);
1070 	up_read(&zram->init_lock);
1071 
1072 	return len;
1073 }
1074 
1075 static ssize_t io_stat_show(struct device *dev,
1076 		struct device_attribute *attr, char *buf)
1077 {
1078 	struct zram *zram = dev_to_zram(dev);
1079 	ssize_t ret;
1080 
1081 	down_read(&zram->init_lock);
1082 	ret = scnprintf(buf, PAGE_SIZE,
1083 			"%8llu %8llu %8llu %8llu\n",
1084 			(u64)atomic64_read(&zram->stats.failed_reads),
1085 			(u64)atomic64_read(&zram->stats.failed_writes),
1086 			(u64)atomic64_read(&zram->stats.invalid_io),
1087 			(u64)atomic64_read(&zram->stats.notify_free));
1088 	up_read(&zram->init_lock);
1089 
1090 	return ret;
1091 }
1092 
1093 static ssize_t mm_stat_show(struct device *dev,
1094 		struct device_attribute *attr, char *buf)
1095 {
1096 	struct zram *zram = dev_to_zram(dev);
1097 	struct zs_pool_stats pool_stats;
1098 	u64 orig_size, mem_used = 0;
1099 	long max_used;
1100 	ssize_t ret;
1101 
1102 	memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
1103 
1104 	down_read(&zram->init_lock);
1105 	if (init_done(zram)) {
1106 		mem_used = zs_get_total_pages(zram->mem_pool);
1107 		zs_pool_stats(zram->mem_pool, &pool_stats);
1108 	}
1109 
1110 	orig_size = atomic64_read(&zram->stats.pages_stored);
1111 	max_used = atomic_long_read(&zram->stats.max_used_pages);
1112 
1113 	ret = scnprintf(buf, PAGE_SIZE,
1114 			"%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n",
1115 			orig_size << PAGE_SHIFT,
1116 			(u64)atomic64_read(&zram->stats.compr_data_size),
1117 			mem_used << PAGE_SHIFT,
1118 			zram->limit_pages << PAGE_SHIFT,
1119 			max_used << PAGE_SHIFT,
1120 			(u64)atomic64_read(&zram->stats.same_pages),
1121 			atomic_long_read(&pool_stats.pages_compacted),
1122 			(u64)atomic64_read(&zram->stats.huge_pages),
1123 			(u64)atomic64_read(&zram->stats.huge_pages_since));
1124 	up_read(&zram->init_lock);
1125 
1126 	return ret;
1127 }
1128 
1129 #ifdef CONFIG_ZRAM_WRITEBACK
1130 #define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
1131 static ssize_t bd_stat_show(struct device *dev,
1132 		struct device_attribute *attr, char *buf)
1133 {
1134 	struct zram *zram = dev_to_zram(dev);
1135 	ssize_t ret;
1136 
1137 	down_read(&zram->init_lock);
1138 	ret = scnprintf(buf, PAGE_SIZE,
1139 		"%8llu %8llu %8llu\n",
1140 			FOUR_K((u64)atomic64_read(&zram->stats.bd_count)),
1141 			FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)),
1142 			FOUR_K((u64)atomic64_read(&zram->stats.bd_writes)));
1143 	up_read(&zram->init_lock);
1144 
1145 	return ret;
1146 }
1147 #endif
1148 
1149 static ssize_t debug_stat_show(struct device *dev,
1150 		struct device_attribute *attr, char *buf)
1151 {
1152 	int version = 1;
1153 	struct zram *zram = dev_to_zram(dev);
1154 	ssize_t ret;
1155 
1156 	down_read(&zram->init_lock);
1157 	ret = scnprintf(buf, PAGE_SIZE,
1158 			"version: %d\n%8llu %8llu\n",
1159 			version,
1160 			(u64)atomic64_read(&zram->stats.writestall),
1161 			(u64)atomic64_read(&zram->stats.miss_free));
1162 	up_read(&zram->init_lock);
1163 
1164 	return ret;
1165 }
1166 
1167 static DEVICE_ATTR_RO(io_stat);
1168 static DEVICE_ATTR_RO(mm_stat);
1169 #ifdef CONFIG_ZRAM_WRITEBACK
1170 static DEVICE_ATTR_RO(bd_stat);
1171 #endif
1172 static DEVICE_ATTR_RO(debug_stat);
1173 
1174 static void zram_meta_free(struct zram *zram, u64 disksize)
1175 {
1176 	size_t num_pages = disksize >> PAGE_SHIFT;
1177 	size_t index;
1178 
1179 	/* Free all pages that are still in this zram device */
1180 	for (index = 0; index < num_pages; index++)
1181 		zram_free_page(zram, index);
1182 
1183 	zs_destroy_pool(zram->mem_pool);
1184 	vfree(zram->table);
1185 }
1186 
1187 static bool zram_meta_alloc(struct zram *zram, u64 disksize)
1188 {
1189 	size_t num_pages;
1190 
1191 	num_pages = disksize >> PAGE_SHIFT;
1192 	zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table)));
1193 	if (!zram->table)
1194 		return false;
1195 
1196 	zram->mem_pool = zs_create_pool(zram->disk->disk_name);
1197 	if (!zram->mem_pool) {
1198 		vfree(zram->table);
1199 		return false;
1200 	}
1201 
1202 	if (!huge_class_size)
1203 		huge_class_size = zs_huge_class_size(zram->mem_pool);
1204 	return true;
1205 }
1206 
1207 /*
1208  * To protect concurrent access to the same index entry,
1209  * caller should hold this table index entry's bit_spinlock to
1210  * indicate this index entry is accessing.
1211  */
1212 static void zram_free_page(struct zram *zram, size_t index)
1213 {
1214 	unsigned long handle;
1215 
1216 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
1217 	zram->table[index].ac_time = 0;
1218 #endif
1219 	if (zram_test_flag(zram, index, ZRAM_IDLE))
1220 		zram_clear_flag(zram, index, ZRAM_IDLE);
1221 
1222 	if (zram_test_flag(zram, index, ZRAM_HUGE)) {
1223 		zram_clear_flag(zram, index, ZRAM_HUGE);
1224 		atomic64_dec(&zram->stats.huge_pages);
1225 	}
1226 
1227 	if (zram_test_flag(zram, index, ZRAM_WB)) {
1228 		zram_clear_flag(zram, index, ZRAM_WB);
1229 		free_block_bdev(zram, zram_get_element(zram, index));
1230 		goto out;
1231 	}
1232 
1233 	/*
1234 	 * No memory is allocated for same element filled pages.
1235 	 * Simply clear same page flag.
1236 	 */
1237 	if (zram_test_flag(zram, index, ZRAM_SAME)) {
1238 		zram_clear_flag(zram, index, ZRAM_SAME);
1239 		atomic64_dec(&zram->stats.same_pages);
1240 		goto out;
1241 	}
1242 
1243 	handle = zram_get_handle(zram, index);
1244 	if (!handle)
1245 		return;
1246 
1247 	zs_free(zram->mem_pool, handle);
1248 
1249 	atomic64_sub(zram_get_obj_size(zram, index),
1250 			&zram->stats.compr_data_size);
1251 out:
1252 	atomic64_dec(&zram->stats.pages_stored);
1253 	zram_set_handle(zram, index, 0);
1254 	zram_set_obj_size(zram, index, 0);
1255 	WARN_ON_ONCE(zram->table[index].flags &
1256 		~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB));
1257 }
1258 
1259 static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
1260 				struct bio *bio, bool partial_io)
1261 {
1262 	struct zcomp_strm *zstrm;
1263 	unsigned long handle;
1264 	unsigned int size;
1265 	void *src, *dst;
1266 	int ret;
1267 
1268 	zram_slot_lock(zram, index);
1269 	if (zram_test_flag(zram, index, ZRAM_WB)) {
1270 		struct bio_vec bvec;
1271 
1272 		zram_slot_unlock(zram, index);
1273 
1274 		bvec.bv_page = page;
1275 		bvec.bv_len = PAGE_SIZE;
1276 		bvec.bv_offset = 0;
1277 		return read_from_bdev(zram, &bvec,
1278 				zram_get_element(zram, index),
1279 				bio, partial_io);
1280 	}
1281 
1282 	handle = zram_get_handle(zram, index);
1283 	if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) {
1284 		unsigned long value;
1285 		void *mem;
1286 
1287 		value = handle ? zram_get_element(zram, index) : 0;
1288 		mem = kmap_atomic(page);
1289 		zram_fill_page(mem, PAGE_SIZE, value);
1290 		kunmap_atomic(mem);
1291 		zram_slot_unlock(zram, index);
1292 		return 0;
1293 	}
1294 
1295 	size = zram_get_obj_size(zram, index);
1296 
1297 	if (size != PAGE_SIZE)
1298 		zstrm = zcomp_stream_get(zram->comp);
1299 
1300 	src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
1301 	if (size == PAGE_SIZE) {
1302 		dst = kmap_atomic(page);
1303 		memcpy(dst, src, PAGE_SIZE);
1304 		kunmap_atomic(dst);
1305 		ret = 0;
1306 	} else {
1307 		dst = kmap_atomic(page);
1308 		ret = zcomp_decompress(zstrm, src, size, dst);
1309 		kunmap_atomic(dst);
1310 		zcomp_stream_put(zram->comp);
1311 	}
1312 	zs_unmap_object(zram->mem_pool, handle);
1313 	zram_slot_unlock(zram, index);
1314 
1315 	/* Should NEVER happen. Return bio error if it does. */
1316 	if (WARN_ON(ret))
1317 		pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
1318 
1319 	return ret;
1320 }
1321 
1322 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
1323 				u32 index, int offset, struct bio *bio)
1324 {
1325 	int ret;
1326 	struct page *page;
1327 
1328 	page = bvec->bv_page;
1329 	if (is_partial_io(bvec)) {
1330 		/* Use a temporary buffer to decompress the page */
1331 		page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
1332 		if (!page)
1333 			return -ENOMEM;
1334 	}
1335 
1336 	ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec));
1337 	if (unlikely(ret))
1338 		goto out;
1339 
1340 	if (is_partial_io(bvec)) {
1341 		void *src = kmap_atomic(page);
1342 
1343 		memcpy_to_bvec(bvec, src + offset);
1344 		kunmap_atomic(src);
1345 	}
1346 out:
1347 	if (is_partial_io(bvec))
1348 		__free_page(page);
1349 
1350 	return ret;
1351 }
1352 
1353 static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
1354 				u32 index, struct bio *bio)
1355 {
1356 	int ret = 0;
1357 	unsigned long alloced_pages;
1358 	unsigned long handle = -ENOMEM;
1359 	unsigned int comp_len = 0;
1360 	void *src, *dst, *mem;
1361 	struct zcomp_strm *zstrm;
1362 	struct page *page = bvec->bv_page;
1363 	unsigned long element = 0;
1364 	enum zram_pageflags flags = 0;
1365 
1366 	mem = kmap_atomic(page);
1367 	if (page_same_filled(mem, &element)) {
1368 		kunmap_atomic(mem);
1369 		/* Free memory associated with this sector now. */
1370 		flags = ZRAM_SAME;
1371 		atomic64_inc(&zram->stats.same_pages);
1372 		goto out;
1373 	}
1374 	kunmap_atomic(mem);
1375 
1376 compress_again:
1377 	zstrm = zcomp_stream_get(zram->comp);
1378 	src = kmap_atomic(page);
1379 	ret = zcomp_compress(zstrm, src, &comp_len);
1380 	kunmap_atomic(src);
1381 
1382 	if (unlikely(ret)) {
1383 		zcomp_stream_put(zram->comp);
1384 		pr_err("Compression failed! err=%d\n", ret);
1385 		zs_free(zram->mem_pool, handle);
1386 		return ret;
1387 	}
1388 
1389 	if (comp_len >= huge_class_size)
1390 		comp_len = PAGE_SIZE;
1391 	/*
1392 	 * handle allocation has 2 paths:
1393 	 * a) fast path is executed with preemption disabled (for
1394 	 *  per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
1395 	 *  since we can't sleep;
1396 	 * b) slow path enables preemption and attempts to allocate
1397 	 *  the page with __GFP_DIRECT_RECLAIM bit set. we have to
1398 	 *  put per-cpu compression stream and, thus, to re-do
1399 	 *  the compression once handle is allocated.
1400 	 *
1401 	 * if we have a 'non-null' handle here then we are coming
1402 	 * from the slow path and handle has already been allocated.
1403 	 */
1404 	if (IS_ERR((void *)handle))
1405 		handle = zs_malloc(zram->mem_pool, comp_len,
1406 				__GFP_KSWAPD_RECLAIM |
1407 				__GFP_NOWARN |
1408 				__GFP_HIGHMEM |
1409 				__GFP_MOVABLE);
1410 	if (IS_ERR((void *)handle)) {
1411 		zcomp_stream_put(zram->comp);
1412 		atomic64_inc(&zram->stats.writestall);
1413 		handle = zs_malloc(zram->mem_pool, comp_len,
1414 				GFP_NOIO | __GFP_HIGHMEM |
1415 				__GFP_MOVABLE);
1416 		if (IS_ERR((void *)handle))
1417 			return PTR_ERR((void *)handle);
1418 
1419 		if (comp_len != PAGE_SIZE)
1420 			goto compress_again;
1421 		/*
1422 		 * If the page is not compressible, you need to acquire the
1423 		 * lock and execute the code below. The zcomp_stream_get()
1424 		 * call is needed to disable the cpu hotplug and grab the
1425 		 * zstrm buffer back. It is necessary that the dereferencing
1426 		 * of the zstrm variable below occurs correctly.
1427 		 */
1428 		zstrm = zcomp_stream_get(zram->comp);
1429 	}
1430 
1431 	alloced_pages = zs_get_total_pages(zram->mem_pool);
1432 	update_used_max(zram, alloced_pages);
1433 
1434 	if (zram->limit_pages && alloced_pages > zram->limit_pages) {
1435 		zcomp_stream_put(zram->comp);
1436 		zs_free(zram->mem_pool, handle);
1437 		return -ENOMEM;
1438 	}
1439 
1440 	dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO);
1441 
1442 	src = zstrm->buffer;
1443 	if (comp_len == PAGE_SIZE)
1444 		src = kmap_atomic(page);
1445 	memcpy(dst, src, comp_len);
1446 	if (comp_len == PAGE_SIZE)
1447 		kunmap_atomic(src);
1448 
1449 	zcomp_stream_put(zram->comp);
1450 	zs_unmap_object(zram->mem_pool, handle);
1451 	atomic64_add(comp_len, &zram->stats.compr_data_size);
1452 out:
1453 	/*
1454 	 * Free memory associated with this sector
1455 	 * before overwriting unused sectors.
1456 	 */
1457 	zram_slot_lock(zram, index);
1458 	zram_free_page(zram, index);
1459 
1460 	if (comp_len == PAGE_SIZE) {
1461 		zram_set_flag(zram, index, ZRAM_HUGE);
1462 		atomic64_inc(&zram->stats.huge_pages);
1463 		atomic64_inc(&zram->stats.huge_pages_since);
1464 	}
1465 
1466 	if (flags) {
1467 		zram_set_flag(zram, index, flags);
1468 		zram_set_element(zram, index, element);
1469 	}  else {
1470 		zram_set_handle(zram, index, handle);
1471 		zram_set_obj_size(zram, index, comp_len);
1472 	}
1473 	zram_slot_unlock(zram, index);
1474 
1475 	/* Update stats */
1476 	atomic64_inc(&zram->stats.pages_stored);
1477 	return ret;
1478 }
1479 
1480 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
1481 				u32 index, int offset, struct bio *bio)
1482 {
1483 	int ret;
1484 	struct page *page = NULL;
1485 	struct bio_vec vec;
1486 
1487 	vec = *bvec;
1488 	if (is_partial_io(bvec)) {
1489 		void *dst;
1490 		/*
1491 		 * This is a partial IO. We need to read the full page
1492 		 * before to write the changes.
1493 		 */
1494 		page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
1495 		if (!page)
1496 			return -ENOMEM;
1497 
1498 		ret = __zram_bvec_read(zram, page, index, bio, true);
1499 		if (ret)
1500 			goto out;
1501 
1502 		dst = kmap_atomic(page);
1503 		memcpy_from_bvec(dst + offset, bvec);
1504 		kunmap_atomic(dst);
1505 
1506 		vec.bv_page = page;
1507 		vec.bv_len = PAGE_SIZE;
1508 		vec.bv_offset = 0;
1509 	}
1510 
1511 	ret = __zram_bvec_write(zram, &vec, index, bio);
1512 out:
1513 	if (is_partial_io(bvec))
1514 		__free_page(page);
1515 	return ret;
1516 }
1517 
1518 /*
1519  * zram_bio_discard - handler on discard request
1520  * @index: physical block index in PAGE_SIZE units
1521  * @offset: byte offset within physical block
1522  */
1523 static void zram_bio_discard(struct zram *zram, u32 index,
1524 			     int offset, struct bio *bio)
1525 {
1526 	size_t n = bio->bi_iter.bi_size;
1527 
1528 	/*
1529 	 * zram manages data in physical block size units. Because logical block
1530 	 * size isn't identical with physical block size on some arch, we
1531 	 * could get a discard request pointing to a specific offset within a
1532 	 * certain physical block.  Although we can handle this request by
1533 	 * reading that physiclal block and decompressing and partially zeroing
1534 	 * and re-compressing and then re-storing it, this isn't reasonable
1535 	 * because our intent with a discard request is to save memory.  So
1536 	 * skipping this logical block is appropriate here.
1537 	 */
1538 	if (offset) {
1539 		if (n <= (PAGE_SIZE - offset))
1540 			return;
1541 
1542 		n -= (PAGE_SIZE - offset);
1543 		index++;
1544 	}
1545 
1546 	while (n >= PAGE_SIZE) {
1547 		zram_slot_lock(zram, index);
1548 		zram_free_page(zram, index);
1549 		zram_slot_unlock(zram, index);
1550 		atomic64_inc(&zram->stats.notify_free);
1551 		index++;
1552 		n -= PAGE_SIZE;
1553 	}
1554 }
1555 
1556 /*
1557  * Returns errno if it has some problem. Otherwise return 0 or 1.
1558  * Returns 0 if IO request was done synchronously
1559  * Returns 1 if IO request was successfully submitted.
1560  */
1561 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
1562 			int offset, enum req_op op, struct bio *bio)
1563 {
1564 	int ret;
1565 
1566 	if (!op_is_write(op)) {
1567 		atomic64_inc(&zram->stats.num_reads);
1568 		ret = zram_bvec_read(zram, bvec, index, offset, bio);
1569 		flush_dcache_page(bvec->bv_page);
1570 	} else {
1571 		atomic64_inc(&zram->stats.num_writes);
1572 		ret = zram_bvec_write(zram, bvec, index, offset, bio);
1573 	}
1574 
1575 	zram_slot_lock(zram, index);
1576 	zram_accessed(zram, index);
1577 	zram_slot_unlock(zram, index);
1578 
1579 	if (unlikely(ret < 0)) {
1580 		if (!op_is_write(op))
1581 			atomic64_inc(&zram->stats.failed_reads);
1582 		else
1583 			atomic64_inc(&zram->stats.failed_writes);
1584 	}
1585 
1586 	return ret;
1587 }
1588 
1589 static void __zram_make_request(struct zram *zram, struct bio *bio)
1590 {
1591 	int offset;
1592 	u32 index;
1593 	struct bio_vec bvec;
1594 	struct bvec_iter iter;
1595 	unsigned long start_time;
1596 
1597 	index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
1598 	offset = (bio->bi_iter.bi_sector &
1599 		  (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
1600 
1601 	switch (bio_op(bio)) {
1602 	case REQ_OP_DISCARD:
1603 	case REQ_OP_WRITE_ZEROES:
1604 		zram_bio_discard(zram, index, offset, bio);
1605 		bio_endio(bio);
1606 		return;
1607 	default:
1608 		break;
1609 	}
1610 
1611 	start_time = bio_start_io_acct(bio);
1612 	bio_for_each_segment(bvec, bio, iter) {
1613 		struct bio_vec bv = bvec;
1614 		unsigned int unwritten = bvec.bv_len;
1615 
1616 		do {
1617 			bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset,
1618 							unwritten);
1619 			if (zram_bvec_rw(zram, &bv, index, offset,
1620 					 bio_op(bio), bio) < 0) {
1621 				bio->bi_status = BLK_STS_IOERR;
1622 				break;
1623 			}
1624 
1625 			bv.bv_offset += bv.bv_len;
1626 			unwritten -= bv.bv_len;
1627 
1628 			update_position(&index, &offset, &bv);
1629 		} while (unwritten);
1630 	}
1631 	bio_end_io_acct(bio, start_time);
1632 	bio_endio(bio);
1633 }
1634 
1635 /*
1636  * Handler function for all zram I/O requests.
1637  */
1638 static void zram_submit_bio(struct bio *bio)
1639 {
1640 	struct zram *zram = bio->bi_bdev->bd_disk->private_data;
1641 
1642 	if (!valid_io_request(zram, bio->bi_iter.bi_sector,
1643 					bio->bi_iter.bi_size)) {
1644 		atomic64_inc(&zram->stats.invalid_io);
1645 		bio_io_error(bio);
1646 		return;
1647 	}
1648 
1649 	__zram_make_request(zram, bio);
1650 }
1651 
1652 static void zram_slot_free_notify(struct block_device *bdev,
1653 				unsigned long index)
1654 {
1655 	struct zram *zram;
1656 
1657 	zram = bdev->bd_disk->private_data;
1658 
1659 	atomic64_inc(&zram->stats.notify_free);
1660 	if (!zram_slot_trylock(zram, index)) {
1661 		atomic64_inc(&zram->stats.miss_free);
1662 		return;
1663 	}
1664 
1665 	zram_free_page(zram, index);
1666 	zram_slot_unlock(zram, index);
1667 }
1668 
1669 static int zram_rw_page(struct block_device *bdev, sector_t sector,
1670 		       struct page *page, enum req_op op)
1671 {
1672 	int offset, ret;
1673 	u32 index;
1674 	struct zram *zram;
1675 	struct bio_vec bv;
1676 	unsigned long start_time;
1677 
1678 	if (PageTransHuge(page))
1679 		return -ENOTSUPP;
1680 	zram = bdev->bd_disk->private_data;
1681 
1682 	if (!valid_io_request(zram, sector, PAGE_SIZE)) {
1683 		atomic64_inc(&zram->stats.invalid_io);
1684 		ret = -EINVAL;
1685 		goto out;
1686 	}
1687 
1688 	index = sector >> SECTORS_PER_PAGE_SHIFT;
1689 	offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
1690 
1691 	bv.bv_page = page;
1692 	bv.bv_len = PAGE_SIZE;
1693 	bv.bv_offset = 0;
1694 
1695 	start_time = bdev_start_io_acct(bdev->bd_disk->part0,
1696 			SECTORS_PER_PAGE, op, jiffies);
1697 	ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL);
1698 	bdev_end_io_acct(bdev->bd_disk->part0, op, start_time);
1699 out:
1700 	/*
1701 	 * If I/O fails, just return error(ie, non-zero) without
1702 	 * calling page_endio.
1703 	 * It causes resubmit the I/O with bio request by upper functions
1704 	 * of rw_page(e.g., swap_readpage, __swap_writepage) and
1705 	 * bio->bi_end_io does things to handle the error
1706 	 * (e.g., SetPageError, set_page_dirty and extra works).
1707 	 */
1708 	if (unlikely(ret < 0))
1709 		return ret;
1710 
1711 	switch (ret) {
1712 	case 0:
1713 		page_endio(page, op_is_write(op), 0);
1714 		break;
1715 	case 1:
1716 		ret = 0;
1717 		break;
1718 	default:
1719 		WARN_ON(1);
1720 	}
1721 	return ret;
1722 }
1723 
1724 static void zram_reset_device(struct zram *zram)
1725 {
1726 	down_write(&zram->init_lock);
1727 
1728 	zram->limit_pages = 0;
1729 
1730 	if (!init_done(zram)) {
1731 		up_write(&zram->init_lock);
1732 		return;
1733 	}
1734 
1735 	set_capacity_and_notify(zram->disk, 0);
1736 	part_stat_set_all(zram->disk->part0, 0);
1737 
1738 	/* I/O operation under all of CPU are done so let's free */
1739 	zram_meta_free(zram, zram->disksize);
1740 	zram->disksize = 0;
1741 	memset(&zram->stats, 0, sizeof(zram->stats));
1742 	zcomp_destroy(zram->comp);
1743 	zram->comp = NULL;
1744 	reset_bdev(zram);
1745 
1746 	up_write(&zram->init_lock);
1747 }
1748 
1749 static ssize_t disksize_store(struct device *dev,
1750 		struct device_attribute *attr, const char *buf, size_t len)
1751 {
1752 	u64 disksize;
1753 	struct zcomp *comp;
1754 	struct zram *zram = dev_to_zram(dev);
1755 	int err;
1756 
1757 	disksize = memparse(buf, NULL);
1758 	if (!disksize)
1759 		return -EINVAL;
1760 
1761 	down_write(&zram->init_lock);
1762 	if (init_done(zram)) {
1763 		pr_info("Cannot change disksize for initialized device\n");
1764 		err = -EBUSY;
1765 		goto out_unlock;
1766 	}
1767 
1768 	disksize = PAGE_ALIGN(disksize);
1769 	if (!zram_meta_alloc(zram, disksize)) {
1770 		err = -ENOMEM;
1771 		goto out_unlock;
1772 	}
1773 
1774 	comp = zcomp_create(zram->compressor);
1775 	if (IS_ERR(comp)) {
1776 		pr_err("Cannot initialise %s compressing backend\n",
1777 				zram->compressor);
1778 		err = PTR_ERR(comp);
1779 		goto out_free_meta;
1780 	}
1781 
1782 	zram->comp = comp;
1783 	zram->disksize = disksize;
1784 	set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT);
1785 	up_write(&zram->init_lock);
1786 
1787 	return len;
1788 
1789 out_free_meta:
1790 	zram_meta_free(zram, disksize);
1791 out_unlock:
1792 	up_write(&zram->init_lock);
1793 	return err;
1794 }
1795 
1796 static ssize_t reset_store(struct device *dev,
1797 		struct device_attribute *attr, const char *buf, size_t len)
1798 {
1799 	int ret;
1800 	unsigned short do_reset;
1801 	struct zram *zram;
1802 	struct gendisk *disk;
1803 
1804 	ret = kstrtou16(buf, 10, &do_reset);
1805 	if (ret)
1806 		return ret;
1807 
1808 	if (!do_reset)
1809 		return -EINVAL;
1810 
1811 	zram = dev_to_zram(dev);
1812 	disk = zram->disk;
1813 
1814 	mutex_lock(&disk->open_mutex);
1815 	/* Do not reset an active device or claimed device */
1816 	if (disk_openers(disk) || zram->claim) {
1817 		mutex_unlock(&disk->open_mutex);
1818 		return -EBUSY;
1819 	}
1820 
1821 	/* From now on, anyone can't open /dev/zram[0-9] */
1822 	zram->claim = true;
1823 	mutex_unlock(&disk->open_mutex);
1824 
1825 	/* Make sure all the pending I/O are finished */
1826 	sync_blockdev(disk->part0);
1827 	zram_reset_device(zram);
1828 
1829 	mutex_lock(&disk->open_mutex);
1830 	zram->claim = false;
1831 	mutex_unlock(&disk->open_mutex);
1832 
1833 	return len;
1834 }
1835 
1836 static int zram_open(struct block_device *bdev, fmode_t mode)
1837 {
1838 	int ret = 0;
1839 	struct zram *zram;
1840 
1841 	WARN_ON(!mutex_is_locked(&bdev->bd_disk->open_mutex));
1842 
1843 	zram = bdev->bd_disk->private_data;
1844 	/* zram was claimed to reset so open request fails */
1845 	if (zram->claim)
1846 		ret = -EBUSY;
1847 
1848 	return ret;
1849 }
1850 
1851 static const struct block_device_operations zram_devops = {
1852 	.open = zram_open,
1853 	.submit_bio = zram_submit_bio,
1854 	.swap_slot_free_notify = zram_slot_free_notify,
1855 	.rw_page = zram_rw_page,
1856 	.owner = THIS_MODULE
1857 };
1858 
1859 #ifdef CONFIG_ZRAM_WRITEBACK
1860 static const struct block_device_operations zram_wb_devops = {
1861 	.open = zram_open,
1862 	.submit_bio = zram_submit_bio,
1863 	.swap_slot_free_notify = zram_slot_free_notify,
1864 	.owner = THIS_MODULE
1865 };
1866 #endif
1867 
1868 static DEVICE_ATTR_WO(compact);
1869 static DEVICE_ATTR_RW(disksize);
1870 static DEVICE_ATTR_RO(initstate);
1871 static DEVICE_ATTR_WO(reset);
1872 static DEVICE_ATTR_WO(mem_limit);
1873 static DEVICE_ATTR_WO(mem_used_max);
1874 static DEVICE_ATTR_WO(idle);
1875 static DEVICE_ATTR_RW(max_comp_streams);
1876 static DEVICE_ATTR_RW(comp_algorithm);
1877 #ifdef CONFIG_ZRAM_WRITEBACK
1878 static DEVICE_ATTR_RW(backing_dev);
1879 static DEVICE_ATTR_WO(writeback);
1880 static DEVICE_ATTR_RW(writeback_limit);
1881 static DEVICE_ATTR_RW(writeback_limit_enable);
1882 #endif
1883 
1884 static struct attribute *zram_disk_attrs[] = {
1885 	&dev_attr_disksize.attr,
1886 	&dev_attr_initstate.attr,
1887 	&dev_attr_reset.attr,
1888 	&dev_attr_compact.attr,
1889 	&dev_attr_mem_limit.attr,
1890 	&dev_attr_mem_used_max.attr,
1891 	&dev_attr_idle.attr,
1892 	&dev_attr_max_comp_streams.attr,
1893 	&dev_attr_comp_algorithm.attr,
1894 #ifdef CONFIG_ZRAM_WRITEBACK
1895 	&dev_attr_backing_dev.attr,
1896 	&dev_attr_writeback.attr,
1897 	&dev_attr_writeback_limit.attr,
1898 	&dev_attr_writeback_limit_enable.attr,
1899 #endif
1900 	&dev_attr_io_stat.attr,
1901 	&dev_attr_mm_stat.attr,
1902 #ifdef CONFIG_ZRAM_WRITEBACK
1903 	&dev_attr_bd_stat.attr,
1904 #endif
1905 	&dev_attr_debug_stat.attr,
1906 	NULL,
1907 };
1908 
1909 ATTRIBUTE_GROUPS(zram_disk);
1910 
1911 /*
1912  * Allocate and initialize new zram device. the function returns
1913  * '>= 0' device_id upon success, and negative value otherwise.
1914  */
1915 static int zram_add(void)
1916 {
1917 	struct zram *zram;
1918 	int ret, device_id;
1919 
1920 	zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
1921 	if (!zram)
1922 		return -ENOMEM;
1923 
1924 	ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
1925 	if (ret < 0)
1926 		goto out_free_dev;
1927 	device_id = ret;
1928 
1929 	init_rwsem(&zram->init_lock);
1930 #ifdef CONFIG_ZRAM_WRITEBACK
1931 	spin_lock_init(&zram->wb_limit_lock);
1932 #endif
1933 
1934 	/* gendisk structure */
1935 	zram->disk = blk_alloc_disk(NUMA_NO_NODE);
1936 	if (!zram->disk) {
1937 		pr_err("Error allocating disk structure for device %d\n",
1938 			device_id);
1939 		ret = -ENOMEM;
1940 		goto out_free_idr;
1941 	}
1942 
1943 	zram->disk->major = zram_major;
1944 	zram->disk->first_minor = device_id;
1945 	zram->disk->minors = 1;
1946 	zram->disk->flags |= GENHD_FL_NO_PART;
1947 	zram->disk->fops = &zram_devops;
1948 	zram->disk->private_data = zram;
1949 	snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
1950 
1951 	/* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
1952 	set_capacity(zram->disk, 0);
1953 	/* zram devices sort of resembles non-rotational disks */
1954 	blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue);
1955 	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
1956 
1957 	/*
1958 	 * To ensure that we always get PAGE_SIZE aligned
1959 	 * and n*PAGE_SIZED sized I/O requests.
1960 	 */
1961 	blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
1962 	blk_queue_logical_block_size(zram->disk->queue,
1963 					ZRAM_LOGICAL_BLOCK_SIZE);
1964 	blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
1965 	blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
1966 	zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
1967 	blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
1968 
1969 	/*
1970 	 * zram_bio_discard() will clear all logical blocks if logical block
1971 	 * size is identical with physical block size(PAGE_SIZE). But if it is
1972 	 * different, we will skip discarding some parts of logical blocks in
1973 	 * the part of the request range which isn't aligned to physical block
1974 	 * size.  So we can't ensure that all discarded logical blocks are
1975 	 * zeroed.
1976 	 */
1977 	if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
1978 		blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
1979 
1980 	blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue);
1981 	ret = device_add_disk(NULL, zram->disk, zram_disk_groups);
1982 	if (ret)
1983 		goto out_cleanup_disk;
1984 
1985 	strscpy(zram->compressor, default_compressor, sizeof(zram->compressor));
1986 
1987 	zram_debugfs_register(zram);
1988 	pr_info("Added device: %s\n", zram->disk->disk_name);
1989 	return device_id;
1990 
1991 out_cleanup_disk:
1992 	put_disk(zram->disk);
1993 out_free_idr:
1994 	idr_remove(&zram_index_idr, device_id);
1995 out_free_dev:
1996 	kfree(zram);
1997 	return ret;
1998 }
1999 
2000 static int zram_remove(struct zram *zram)
2001 {
2002 	bool claimed;
2003 
2004 	mutex_lock(&zram->disk->open_mutex);
2005 	if (disk_openers(zram->disk)) {
2006 		mutex_unlock(&zram->disk->open_mutex);
2007 		return -EBUSY;
2008 	}
2009 
2010 	claimed = zram->claim;
2011 	if (!claimed)
2012 		zram->claim = true;
2013 	mutex_unlock(&zram->disk->open_mutex);
2014 
2015 	zram_debugfs_unregister(zram);
2016 
2017 	if (claimed) {
2018 		/*
2019 		 * If we were claimed by reset_store(), del_gendisk() will
2020 		 * wait until reset_store() is done, so nothing need to do.
2021 		 */
2022 		;
2023 	} else {
2024 		/* Make sure all the pending I/O are finished */
2025 		sync_blockdev(zram->disk->part0);
2026 		zram_reset_device(zram);
2027 	}
2028 
2029 	pr_info("Removed device: %s\n", zram->disk->disk_name);
2030 
2031 	del_gendisk(zram->disk);
2032 
2033 	/* del_gendisk drains pending reset_store */
2034 	WARN_ON_ONCE(claimed && zram->claim);
2035 
2036 	/*
2037 	 * disksize_store() may be called in between zram_reset_device()
2038 	 * and del_gendisk(), so run the last reset to avoid leaking
2039 	 * anything allocated with disksize_store()
2040 	 */
2041 	zram_reset_device(zram);
2042 
2043 	put_disk(zram->disk);
2044 	kfree(zram);
2045 	return 0;
2046 }
2047 
2048 /* zram-control sysfs attributes */
2049 
2050 /*
2051  * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
2052  * sense that reading from this file does alter the state of your system -- it
2053  * creates a new un-initialized zram device and returns back this device's
2054  * device_id (or an error code if it fails to create a new device).
2055  */
2056 static ssize_t hot_add_show(struct class *class,
2057 			struct class_attribute *attr,
2058 			char *buf)
2059 {
2060 	int ret;
2061 
2062 	mutex_lock(&zram_index_mutex);
2063 	ret = zram_add();
2064 	mutex_unlock(&zram_index_mutex);
2065 
2066 	if (ret < 0)
2067 		return ret;
2068 	return scnprintf(buf, PAGE_SIZE, "%d\n", ret);
2069 }
2070 static struct class_attribute class_attr_hot_add =
2071 	__ATTR(hot_add, 0400, hot_add_show, NULL);
2072 
2073 static ssize_t hot_remove_store(struct class *class,
2074 			struct class_attribute *attr,
2075 			const char *buf,
2076 			size_t count)
2077 {
2078 	struct zram *zram;
2079 	int ret, dev_id;
2080 
2081 	/* dev_id is gendisk->first_minor, which is `int' */
2082 	ret = kstrtoint(buf, 10, &dev_id);
2083 	if (ret)
2084 		return ret;
2085 	if (dev_id < 0)
2086 		return -EINVAL;
2087 
2088 	mutex_lock(&zram_index_mutex);
2089 
2090 	zram = idr_find(&zram_index_idr, dev_id);
2091 	if (zram) {
2092 		ret = zram_remove(zram);
2093 		if (!ret)
2094 			idr_remove(&zram_index_idr, dev_id);
2095 	} else {
2096 		ret = -ENODEV;
2097 	}
2098 
2099 	mutex_unlock(&zram_index_mutex);
2100 	return ret ? ret : count;
2101 }
2102 static CLASS_ATTR_WO(hot_remove);
2103 
2104 static struct attribute *zram_control_class_attrs[] = {
2105 	&class_attr_hot_add.attr,
2106 	&class_attr_hot_remove.attr,
2107 	NULL,
2108 };
2109 ATTRIBUTE_GROUPS(zram_control_class);
2110 
2111 static struct class zram_control_class = {
2112 	.name		= "zram-control",
2113 	.owner		= THIS_MODULE,
2114 	.class_groups	= zram_control_class_groups,
2115 };
2116 
2117 static int zram_remove_cb(int id, void *ptr, void *data)
2118 {
2119 	WARN_ON_ONCE(zram_remove(ptr));
2120 	return 0;
2121 }
2122 
2123 static void destroy_devices(void)
2124 {
2125 	class_unregister(&zram_control_class);
2126 	idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
2127 	zram_debugfs_destroy();
2128 	idr_destroy(&zram_index_idr);
2129 	unregister_blkdev(zram_major, "zram");
2130 	cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2131 }
2132 
2133 static int __init zram_init(void)
2134 {
2135 	int ret;
2136 
2137 	BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS > BITS_PER_LONG);
2138 
2139 	ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
2140 				      zcomp_cpu_up_prepare, zcomp_cpu_dead);
2141 	if (ret < 0)
2142 		return ret;
2143 
2144 	ret = class_register(&zram_control_class);
2145 	if (ret) {
2146 		pr_err("Unable to register zram-control class\n");
2147 		cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2148 		return ret;
2149 	}
2150 
2151 	zram_debugfs_create();
2152 	zram_major = register_blkdev(0, "zram");
2153 	if (zram_major <= 0) {
2154 		pr_err("Unable to get major number\n");
2155 		class_unregister(&zram_control_class);
2156 		cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2157 		return -EBUSY;
2158 	}
2159 
2160 	while (num_devices != 0) {
2161 		mutex_lock(&zram_index_mutex);
2162 		ret = zram_add();
2163 		mutex_unlock(&zram_index_mutex);
2164 		if (ret < 0)
2165 			goto out_error;
2166 		num_devices--;
2167 	}
2168 
2169 	return 0;
2170 
2171 out_error:
2172 	destroy_devices();
2173 	return ret;
2174 }
2175 
2176 static void __exit zram_exit(void)
2177 {
2178 	destroy_devices();
2179 }
2180 
2181 module_init(zram_init);
2182 module_exit(zram_exit);
2183 
2184 module_param(num_devices, uint, 0);
2185 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices");
2186 
2187 MODULE_LICENSE("Dual BSD/GPL");
2188 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
2189 MODULE_DESCRIPTION("Compressed RAM Block Device");
2190