xref: /openbmc/linux/drivers/block/zram/zram_drv.c (revision fc28ab18)
1 /*
2  * Compressed RAM block device
3  *
4  * Copyright (C) 2008, 2009, 2010  Nitin Gupta
5  *               2012, 2013 Minchan Kim
6  *
7  * This code is released using a dual license strategy: BSD/GPL
8  * You can choose the licence that better fits your requirements.
9  *
10  * Released under the terms of 3-clause BSD License
11  * Released under the terms of GNU General Public License Version 2.0
12  *
13  */
14 
15 #define KMSG_COMPONENT "zram"
16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17 
18 #include <linux/module.h>
19 #include <linux/kernel.h>
20 #include <linux/bio.h>
21 #include <linux/bitops.h>
22 #include <linux/blkdev.h>
23 #include <linux/buffer_head.h>
24 #include <linux/device.h>
25 #include <linux/genhd.h>
26 #include <linux/highmem.h>
27 #include <linux/slab.h>
28 #include <linux/backing-dev.h>
29 #include <linux/string.h>
30 #include <linux/vmalloc.h>
31 #include <linux/err.h>
32 #include <linux/idr.h>
33 #include <linux/sysfs.h>
34 #include <linux/cpuhotplug.h>
35 
36 #include "zram_drv.h"
37 
38 static DEFINE_IDR(zram_index_idr);
39 /* idr index must be protected */
40 static DEFINE_MUTEX(zram_index_mutex);
41 
42 static int zram_major;
43 static const char *default_compressor = "lzo";
44 
45 /* Module params (documentation at end) */
46 static unsigned int num_devices = 1;
47 
48 static inline void deprecated_attr_warn(const char *name)
49 {
50 	pr_warn_once("%d (%s) Attribute %s (and others) will be removed. %s\n",
51 			task_pid_nr(current),
52 			current->comm,
53 			name,
54 			"See zram documentation.");
55 }
56 
57 #define ZRAM_ATTR_RO(name)						\
58 static ssize_t name##_show(struct device *d,				\
59 				struct device_attribute *attr, char *b)	\
60 {									\
61 	struct zram *zram = dev_to_zram(d);				\
62 									\
63 	deprecated_attr_warn(__stringify(name));			\
64 	return scnprintf(b, PAGE_SIZE, "%llu\n",			\
65 		(u64)atomic64_read(&zram->stats.name));			\
66 }									\
67 static DEVICE_ATTR_RO(name);
68 
69 static inline bool init_done(struct zram *zram)
70 {
71 	return zram->disksize;
72 }
73 
74 static inline struct zram *dev_to_zram(struct device *dev)
75 {
76 	return (struct zram *)dev_to_disk(dev)->private_data;
77 }
78 
79 /* flag operations require table entry bit_spin_lock() being held */
80 static int zram_test_flag(struct zram_meta *meta, u32 index,
81 			enum zram_pageflags flag)
82 {
83 	return meta->table[index].value & BIT(flag);
84 }
85 
86 static void zram_set_flag(struct zram_meta *meta, u32 index,
87 			enum zram_pageflags flag)
88 {
89 	meta->table[index].value |= BIT(flag);
90 }
91 
92 static void zram_clear_flag(struct zram_meta *meta, u32 index,
93 			enum zram_pageflags flag)
94 {
95 	meta->table[index].value &= ~BIT(flag);
96 }
97 
98 static size_t zram_get_obj_size(struct zram_meta *meta, u32 index)
99 {
100 	return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
101 }
102 
103 static void zram_set_obj_size(struct zram_meta *meta,
104 					u32 index, size_t size)
105 {
106 	unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT;
107 
108 	meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
109 }
110 
111 static inline bool is_partial_io(struct bio_vec *bvec)
112 {
113 	return bvec->bv_len != PAGE_SIZE;
114 }
115 
116 static void zram_revalidate_disk(struct zram *zram)
117 {
118 	revalidate_disk(zram->disk);
119 	/* revalidate_disk reset the BDI_CAP_STABLE_WRITES so set again */
120 	zram->disk->queue->backing_dev_info.capabilities |=
121 		BDI_CAP_STABLE_WRITES;
122 }
123 
124 /*
125  * Check if request is within bounds and aligned on zram logical blocks.
126  */
127 static inline bool valid_io_request(struct zram *zram,
128 		sector_t start, unsigned int size)
129 {
130 	u64 end, bound;
131 
132 	/* unaligned request */
133 	if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
134 		return false;
135 	if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
136 		return false;
137 
138 	end = start + (size >> SECTOR_SHIFT);
139 	bound = zram->disksize >> SECTOR_SHIFT;
140 	/* out of range range */
141 	if (unlikely(start >= bound || end > bound || start > end))
142 		return false;
143 
144 	/* I/O request is valid */
145 	return true;
146 }
147 
148 static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
149 {
150 	if (*offset + bvec->bv_len >= PAGE_SIZE)
151 		(*index)++;
152 	*offset = (*offset + bvec->bv_len) % PAGE_SIZE;
153 }
154 
155 static inline void update_used_max(struct zram *zram,
156 					const unsigned long pages)
157 {
158 	unsigned long old_max, cur_max;
159 
160 	old_max = atomic_long_read(&zram->stats.max_used_pages);
161 
162 	do {
163 		cur_max = old_max;
164 		if (pages > cur_max)
165 			old_max = atomic_long_cmpxchg(
166 				&zram->stats.max_used_pages, cur_max, pages);
167 	} while (old_max != cur_max);
168 }
169 
170 static bool page_zero_filled(void *ptr)
171 {
172 	unsigned int pos;
173 	unsigned long *page;
174 
175 	page = (unsigned long *)ptr;
176 
177 	for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
178 		if (page[pos])
179 			return false;
180 	}
181 
182 	return true;
183 }
184 
185 static void handle_zero_page(struct bio_vec *bvec)
186 {
187 	struct page *page = bvec->bv_page;
188 	void *user_mem;
189 
190 	user_mem = kmap_atomic(page);
191 	if (is_partial_io(bvec))
192 		memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
193 	else
194 		clear_page(user_mem);
195 	kunmap_atomic(user_mem);
196 
197 	flush_dcache_page(page);
198 }
199 
200 static ssize_t initstate_show(struct device *dev,
201 		struct device_attribute *attr, char *buf)
202 {
203 	u32 val;
204 	struct zram *zram = dev_to_zram(dev);
205 
206 	down_read(&zram->init_lock);
207 	val = init_done(zram);
208 	up_read(&zram->init_lock);
209 
210 	return scnprintf(buf, PAGE_SIZE, "%u\n", val);
211 }
212 
213 static ssize_t disksize_show(struct device *dev,
214 		struct device_attribute *attr, char *buf)
215 {
216 	struct zram *zram = dev_to_zram(dev);
217 
218 	return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
219 }
220 
221 static ssize_t orig_data_size_show(struct device *dev,
222 		struct device_attribute *attr, char *buf)
223 {
224 	struct zram *zram = dev_to_zram(dev);
225 
226 	deprecated_attr_warn("orig_data_size");
227 	return scnprintf(buf, PAGE_SIZE, "%llu\n",
228 		(u64)(atomic64_read(&zram->stats.pages_stored)) << PAGE_SHIFT);
229 }
230 
231 static ssize_t mem_used_total_show(struct device *dev,
232 		struct device_attribute *attr, char *buf)
233 {
234 	u64 val = 0;
235 	struct zram *zram = dev_to_zram(dev);
236 
237 	deprecated_attr_warn("mem_used_total");
238 	down_read(&zram->init_lock);
239 	if (init_done(zram)) {
240 		struct zram_meta *meta = zram->meta;
241 		val = zs_get_total_pages(meta->mem_pool);
242 	}
243 	up_read(&zram->init_lock);
244 
245 	return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
246 }
247 
248 static ssize_t mem_limit_show(struct device *dev,
249 		struct device_attribute *attr, char *buf)
250 {
251 	u64 val;
252 	struct zram *zram = dev_to_zram(dev);
253 
254 	deprecated_attr_warn("mem_limit");
255 	down_read(&zram->init_lock);
256 	val = zram->limit_pages;
257 	up_read(&zram->init_lock);
258 
259 	return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
260 }
261 
262 static ssize_t mem_limit_store(struct device *dev,
263 		struct device_attribute *attr, const char *buf, size_t len)
264 {
265 	u64 limit;
266 	char *tmp;
267 	struct zram *zram = dev_to_zram(dev);
268 
269 	limit = memparse(buf, &tmp);
270 	if (buf == tmp) /* no chars parsed, invalid input */
271 		return -EINVAL;
272 
273 	down_write(&zram->init_lock);
274 	zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
275 	up_write(&zram->init_lock);
276 
277 	return len;
278 }
279 
280 static ssize_t mem_used_max_show(struct device *dev,
281 		struct device_attribute *attr, char *buf)
282 {
283 	u64 val = 0;
284 	struct zram *zram = dev_to_zram(dev);
285 
286 	deprecated_attr_warn("mem_used_max");
287 	down_read(&zram->init_lock);
288 	if (init_done(zram))
289 		val = atomic_long_read(&zram->stats.max_used_pages);
290 	up_read(&zram->init_lock);
291 
292 	return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
293 }
294 
295 static ssize_t mem_used_max_store(struct device *dev,
296 		struct device_attribute *attr, const char *buf, size_t len)
297 {
298 	int err;
299 	unsigned long val;
300 	struct zram *zram = dev_to_zram(dev);
301 
302 	err = kstrtoul(buf, 10, &val);
303 	if (err || val != 0)
304 		return -EINVAL;
305 
306 	down_read(&zram->init_lock);
307 	if (init_done(zram)) {
308 		struct zram_meta *meta = zram->meta;
309 		atomic_long_set(&zram->stats.max_used_pages,
310 				zs_get_total_pages(meta->mem_pool));
311 	}
312 	up_read(&zram->init_lock);
313 
314 	return len;
315 }
316 
317 /*
318  * We switched to per-cpu streams and this attr is not needed anymore.
319  * However, we will keep it around for some time, because:
320  * a) we may revert per-cpu streams in the future
321  * b) it's visible to user space and we need to follow our 2 years
322  *    retirement rule; but we already have a number of 'soon to be
323  *    altered' attrs, so max_comp_streams need to wait for the next
324  *    layoff cycle.
325  */
326 static ssize_t max_comp_streams_show(struct device *dev,
327 		struct device_attribute *attr, char *buf)
328 {
329 	return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus());
330 }
331 
332 static ssize_t max_comp_streams_store(struct device *dev,
333 		struct device_attribute *attr, const char *buf, size_t len)
334 {
335 	return len;
336 }
337 
338 static ssize_t comp_algorithm_show(struct device *dev,
339 		struct device_attribute *attr, char *buf)
340 {
341 	size_t sz;
342 	struct zram *zram = dev_to_zram(dev);
343 
344 	down_read(&zram->init_lock);
345 	sz = zcomp_available_show(zram->compressor, buf);
346 	up_read(&zram->init_lock);
347 
348 	return sz;
349 }
350 
351 static ssize_t comp_algorithm_store(struct device *dev,
352 		struct device_attribute *attr, const char *buf, size_t len)
353 {
354 	struct zram *zram = dev_to_zram(dev);
355 	char compressor[CRYPTO_MAX_ALG_NAME];
356 	size_t sz;
357 
358 	strlcpy(compressor, buf, sizeof(compressor));
359 	/* ignore trailing newline */
360 	sz = strlen(compressor);
361 	if (sz > 0 && compressor[sz - 1] == '\n')
362 		compressor[sz - 1] = 0x00;
363 
364 	if (!zcomp_available_algorithm(compressor))
365 		return -EINVAL;
366 
367 	down_write(&zram->init_lock);
368 	if (init_done(zram)) {
369 		up_write(&zram->init_lock);
370 		pr_info("Can't change algorithm for initialized device\n");
371 		return -EBUSY;
372 	}
373 
374 	strlcpy(zram->compressor, compressor, sizeof(compressor));
375 	up_write(&zram->init_lock);
376 	return len;
377 }
378 
379 static ssize_t compact_store(struct device *dev,
380 		struct device_attribute *attr, const char *buf, size_t len)
381 {
382 	struct zram *zram = dev_to_zram(dev);
383 	struct zram_meta *meta;
384 
385 	down_read(&zram->init_lock);
386 	if (!init_done(zram)) {
387 		up_read(&zram->init_lock);
388 		return -EINVAL;
389 	}
390 
391 	meta = zram->meta;
392 	zs_compact(meta->mem_pool);
393 	up_read(&zram->init_lock);
394 
395 	return len;
396 }
397 
398 static ssize_t io_stat_show(struct device *dev,
399 		struct device_attribute *attr, char *buf)
400 {
401 	struct zram *zram = dev_to_zram(dev);
402 	ssize_t ret;
403 
404 	down_read(&zram->init_lock);
405 	ret = scnprintf(buf, PAGE_SIZE,
406 			"%8llu %8llu %8llu %8llu\n",
407 			(u64)atomic64_read(&zram->stats.failed_reads),
408 			(u64)atomic64_read(&zram->stats.failed_writes),
409 			(u64)atomic64_read(&zram->stats.invalid_io),
410 			(u64)atomic64_read(&zram->stats.notify_free));
411 	up_read(&zram->init_lock);
412 
413 	return ret;
414 }
415 
416 static ssize_t mm_stat_show(struct device *dev,
417 		struct device_attribute *attr, char *buf)
418 {
419 	struct zram *zram = dev_to_zram(dev);
420 	struct zs_pool_stats pool_stats;
421 	u64 orig_size, mem_used = 0;
422 	long max_used;
423 	ssize_t ret;
424 
425 	memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
426 
427 	down_read(&zram->init_lock);
428 	if (init_done(zram)) {
429 		mem_used = zs_get_total_pages(zram->meta->mem_pool);
430 		zs_pool_stats(zram->meta->mem_pool, &pool_stats);
431 	}
432 
433 	orig_size = atomic64_read(&zram->stats.pages_stored);
434 	max_used = atomic_long_read(&zram->stats.max_used_pages);
435 
436 	ret = scnprintf(buf, PAGE_SIZE,
437 			"%8llu %8llu %8llu %8lu %8ld %8llu %8lu\n",
438 			orig_size << PAGE_SHIFT,
439 			(u64)atomic64_read(&zram->stats.compr_data_size),
440 			mem_used << PAGE_SHIFT,
441 			zram->limit_pages << PAGE_SHIFT,
442 			max_used << PAGE_SHIFT,
443 			(u64)atomic64_read(&zram->stats.zero_pages),
444 			pool_stats.pages_compacted);
445 	up_read(&zram->init_lock);
446 
447 	return ret;
448 }
449 
450 static ssize_t debug_stat_show(struct device *dev,
451 		struct device_attribute *attr, char *buf)
452 {
453 	int version = 1;
454 	struct zram *zram = dev_to_zram(dev);
455 	ssize_t ret;
456 
457 	down_read(&zram->init_lock);
458 	ret = scnprintf(buf, PAGE_SIZE,
459 			"version: %d\n%8llu\n",
460 			version,
461 			(u64)atomic64_read(&zram->stats.writestall));
462 	up_read(&zram->init_lock);
463 
464 	return ret;
465 }
466 
467 static DEVICE_ATTR_RO(io_stat);
468 static DEVICE_ATTR_RO(mm_stat);
469 static DEVICE_ATTR_RO(debug_stat);
470 ZRAM_ATTR_RO(num_reads);
471 ZRAM_ATTR_RO(num_writes);
472 ZRAM_ATTR_RO(failed_reads);
473 ZRAM_ATTR_RO(failed_writes);
474 ZRAM_ATTR_RO(invalid_io);
475 ZRAM_ATTR_RO(notify_free);
476 ZRAM_ATTR_RO(zero_pages);
477 ZRAM_ATTR_RO(compr_data_size);
478 
479 static inline bool zram_meta_get(struct zram *zram)
480 {
481 	if (atomic_inc_not_zero(&zram->refcount))
482 		return true;
483 	return false;
484 }
485 
486 static inline void zram_meta_put(struct zram *zram)
487 {
488 	atomic_dec(&zram->refcount);
489 }
490 
491 static void zram_meta_free(struct zram_meta *meta, u64 disksize)
492 {
493 	size_t num_pages = disksize >> PAGE_SHIFT;
494 	size_t index;
495 
496 	/* Free all pages that are still in this zram device */
497 	for (index = 0; index < num_pages; index++) {
498 		unsigned long handle = meta->table[index].handle;
499 
500 		if (!handle)
501 			continue;
502 
503 		zs_free(meta->mem_pool, handle);
504 	}
505 
506 	zs_destroy_pool(meta->mem_pool);
507 	vfree(meta->table);
508 	kfree(meta);
509 }
510 
511 static struct zram_meta *zram_meta_alloc(char *pool_name, u64 disksize)
512 {
513 	size_t num_pages;
514 	struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL);
515 
516 	if (!meta)
517 		return NULL;
518 
519 	num_pages = disksize >> PAGE_SHIFT;
520 	meta->table = vzalloc(num_pages * sizeof(*meta->table));
521 	if (!meta->table) {
522 		pr_err("Error allocating zram address table\n");
523 		goto out_error;
524 	}
525 
526 	meta->mem_pool = zs_create_pool(pool_name);
527 	if (!meta->mem_pool) {
528 		pr_err("Error creating memory pool\n");
529 		goto out_error;
530 	}
531 
532 	return meta;
533 
534 out_error:
535 	vfree(meta->table);
536 	kfree(meta);
537 	return NULL;
538 }
539 
540 /*
541  * To protect concurrent access to the same index entry,
542  * caller should hold this table index entry's bit_spinlock to
543  * indicate this index entry is accessing.
544  */
545 static void zram_free_page(struct zram *zram, size_t index)
546 {
547 	struct zram_meta *meta = zram->meta;
548 	unsigned long handle = meta->table[index].handle;
549 
550 	if (unlikely(!handle)) {
551 		/*
552 		 * No memory is allocated for zero filled pages.
553 		 * Simply clear zero page flag.
554 		 */
555 		if (zram_test_flag(meta, index, ZRAM_ZERO)) {
556 			zram_clear_flag(meta, index, ZRAM_ZERO);
557 			atomic64_dec(&zram->stats.zero_pages);
558 		}
559 		return;
560 	}
561 
562 	zs_free(meta->mem_pool, handle);
563 
564 	atomic64_sub(zram_get_obj_size(meta, index),
565 			&zram->stats.compr_data_size);
566 	atomic64_dec(&zram->stats.pages_stored);
567 
568 	meta->table[index].handle = 0;
569 	zram_set_obj_size(meta, index, 0);
570 }
571 
572 static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
573 {
574 	int ret = 0;
575 	unsigned char *cmem;
576 	struct zram_meta *meta = zram->meta;
577 	unsigned long handle;
578 	unsigned int size;
579 
580 	bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
581 	handle = meta->table[index].handle;
582 	size = zram_get_obj_size(meta, index);
583 
584 	if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
585 		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
586 		clear_page(mem);
587 		return 0;
588 	}
589 
590 	cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
591 	if (size == PAGE_SIZE) {
592 		copy_page(mem, cmem);
593 	} else {
594 		struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
595 
596 		ret = zcomp_decompress(zstrm, cmem, size, mem);
597 		zcomp_stream_put(zram->comp);
598 	}
599 	zs_unmap_object(meta->mem_pool, handle);
600 	bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
601 
602 	/* Should NEVER happen. Return bio error if it does. */
603 	if (unlikely(ret)) {
604 		pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
605 		return ret;
606 	}
607 
608 	return 0;
609 }
610 
611 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
612 			  u32 index, int offset)
613 {
614 	int ret;
615 	struct page *page;
616 	unsigned char *user_mem, *uncmem = NULL;
617 	struct zram_meta *meta = zram->meta;
618 	page = bvec->bv_page;
619 
620 	bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
621 	if (unlikely(!meta->table[index].handle) ||
622 			zram_test_flag(meta, index, ZRAM_ZERO)) {
623 		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
624 		handle_zero_page(bvec);
625 		return 0;
626 	}
627 	bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
628 
629 	if (is_partial_io(bvec))
630 		/* Use  a temporary buffer to decompress the page */
631 		uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
632 
633 	user_mem = kmap_atomic(page);
634 	if (!is_partial_io(bvec))
635 		uncmem = user_mem;
636 
637 	if (!uncmem) {
638 		pr_err("Unable to allocate temp memory\n");
639 		ret = -ENOMEM;
640 		goto out_cleanup;
641 	}
642 
643 	ret = zram_decompress_page(zram, uncmem, index);
644 	/* Should NEVER happen. Return bio error if it does. */
645 	if (unlikely(ret))
646 		goto out_cleanup;
647 
648 	if (is_partial_io(bvec))
649 		memcpy(user_mem + bvec->bv_offset, uncmem + offset,
650 				bvec->bv_len);
651 
652 	flush_dcache_page(page);
653 	ret = 0;
654 out_cleanup:
655 	kunmap_atomic(user_mem);
656 	if (is_partial_io(bvec))
657 		kfree(uncmem);
658 	return ret;
659 }
660 
661 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
662 			   int offset)
663 {
664 	int ret = 0;
665 	unsigned int clen;
666 	unsigned long handle = 0;
667 	struct page *page;
668 	unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
669 	struct zram_meta *meta = zram->meta;
670 	struct zcomp_strm *zstrm = NULL;
671 	unsigned long alloced_pages;
672 
673 	page = bvec->bv_page;
674 	if (is_partial_io(bvec)) {
675 		/*
676 		 * This is a partial IO. We need to read the full page
677 		 * before to write the changes.
678 		 */
679 		uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
680 		if (!uncmem) {
681 			ret = -ENOMEM;
682 			goto out;
683 		}
684 		ret = zram_decompress_page(zram, uncmem, index);
685 		if (ret)
686 			goto out;
687 	}
688 
689 compress_again:
690 	user_mem = kmap_atomic(page);
691 	if (is_partial_io(bvec)) {
692 		memcpy(uncmem + offset, user_mem + bvec->bv_offset,
693 		       bvec->bv_len);
694 		kunmap_atomic(user_mem);
695 		user_mem = NULL;
696 	} else {
697 		uncmem = user_mem;
698 	}
699 
700 	if (page_zero_filled(uncmem)) {
701 		if (user_mem)
702 			kunmap_atomic(user_mem);
703 		/* Free memory associated with this sector now. */
704 		bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
705 		zram_free_page(zram, index);
706 		zram_set_flag(meta, index, ZRAM_ZERO);
707 		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
708 
709 		atomic64_inc(&zram->stats.zero_pages);
710 		ret = 0;
711 		goto out;
712 	}
713 
714 	zstrm = zcomp_stream_get(zram->comp);
715 	ret = zcomp_compress(zstrm, uncmem, &clen);
716 	if (!is_partial_io(bvec)) {
717 		kunmap_atomic(user_mem);
718 		user_mem = NULL;
719 		uncmem = NULL;
720 	}
721 
722 	if (unlikely(ret)) {
723 		pr_err("Compression failed! err=%d\n", ret);
724 		goto out;
725 	}
726 
727 	src = zstrm->buffer;
728 	if (unlikely(clen > max_zpage_size)) {
729 		clen = PAGE_SIZE;
730 		if (is_partial_io(bvec))
731 			src = uncmem;
732 	}
733 
734 	/*
735 	 * handle allocation has 2 paths:
736 	 * a) fast path is executed with preemption disabled (for
737 	 *  per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
738 	 *  since we can't sleep;
739 	 * b) slow path enables preemption and attempts to allocate
740 	 *  the page with __GFP_DIRECT_RECLAIM bit set. we have to
741 	 *  put per-cpu compression stream and, thus, to re-do
742 	 *  the compression once handle is allocated.
743 	 *
744 	 * if we have a 'non-null' handle here then we are coming
745 	 * from the slow path and handle has already been allocated.
746 	 */
747 	if (!handle)
748 		handle = zs_malloc(meta->mem_pool, clen,
749 				__GFP_KSWAPD_RECLAIM |
750 				__GFP_NOWARN |
751 				__GFP_HIGHMEM |
752 				__GFP_MOVABLE);
753 	if (!handle) {
754 		zcomp_stream_put(zram->comp);
755 		zstrm = NULL;
756 
757 		atomic64_inc(&zram->stats.writestall);
758 
759 		handle = zs_malloc(meta->mem_pool, clen,
760 				GFP_NOIO | __GFP_HIGHMEM |
761 				__GFP_MOVABLE);
762 		if (handle)
763 			goto compress_again;
764 
765 		pr_err("Error allocating memory for compressed page: %u, size=%u\n",
766 			index, clen);
767 		ret = -ENOMEM;
768 		goto out;
769 	}
770 
771 	alloced_pages = zs_get_total_pages(meta->mem_pool);
772 	update_used_max(zram, alloced_pages);
773 
774 	if (zram->limit_pages && alloced_pages > zram->limit_pages) {
775 		zs_free(meta->mem_pool, handle);
776 		ret = -ENOMEM;
777 		goto out;
778 	}
779 
780 	cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO);
781 
782 	if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
783 		src = kmap_atomic(page);
784 		copy_page(cmem, src);
785 		kunmap_atomic(src);
786 	} else {
787 		memcpy(cmem, src, clen);
788 	}
789 
790 	zcomp_stream_put(zram->comp);
791 	zstrm = NULL;
792 	zs_unmap_object(meta->mem_pool, handle);
793 
794 	/*
795 	 * Free memory associated with this sector
796 	 * before overwriting unused sectors.
797 	 */
798 	bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
799 	zram_free_page(zram, index);
800 
801 	meta->table[index].handle = handle;
802 	zram_set_obj_size(meta, index, clen);
803 	bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
804 
805 	/* Update stats */
806 	atomic64_add(clen, &zram->stats.compr_data_size);
807 	atomic64_inc(&zram->stats.pages_stored);
808 out:
809 	if (zstrm)
810 		zcomp_stream_put(zram->comp);
811 	if (is_partial_io(bvec))
812 		kfree(uncmem);
813 	return ret;
814 }
815 
816 /*
817  * zram_bio_discard - handler on discard request
818  * @index: physical block index in PAGE_SIZE units
819  * @offset: byte offset within physical block
820  */
821 static void zram_bio_discard(struct zram *zram, u32 index,
822 			     int offset, struct bio *bio)
823 {
824 	size_t n = bio->bi_iter.bi_size;
825 	struct zram_meta *meta = zram->meta;
826 
827 	/*
828 	 * zram manages data in physical block size units. Because logical block
829 	 * size isn't identical with physical block size on some arch, we
830 	 * could get a discard request pointing to a specific offset within a
831 	 * certain physical block.  Although we can handle this request by
832 	 * reading that physiclal block and decompressing and partially zeroing
833 	 * and re-compressing and then re-storing it, this isn't reasonable
834 	 * because our intent with a discard request is to save memory.  So
835 	 * skipping this logical block is appropriate here.
836 	 */
837 	if (offset) {
838 		if (n <= (PAGE_SIZE - offset))
839 			return;
840 
841 		n -= (PAGE_SIZE - offset);
842 		index++;
843 	}
844 
845 	while (n >= PAGE_SIZE) {
846 		bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
847 		zram_free_page(zram, index);
848 		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
849 		atomic64_inc(&zram->stats.notify_free);
850 		index++;
851 		n -= PAGE_SIZE;
852 	}
853 }
854 
855 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
856 			int offset, bool is_write)
857 {
858 	unsigned long start_time = jiffies;
859 	int rw_acct = is_write ? REQ_OP_WRITE : REQ_OP_READ;
860 	int ret;
861 
862 	generic_start_io_acct(rw_acct, bvec->bv_len >> SECTOR_SHIFT,
863 			&zram->disk->part0);
864 
865 	if (!is_write) {
866 		atomic64_inc(&zram->stats.num_reads);
867 		ret = zram_bvec_read(zram, bvec, index, offset);
868 	} else {
869 		atomic64_inc(&zram->stats.num_writes);
870 		ret = zram_bvec_write(zram, bvec, index, offset);
871 	}
872 
873 	generic_end_io_acct(rw_acct, &zram->disk->part0, start_time);
874 
875 	if (unlikely(ret)) {
876 		if (!is_write)
877 			atomic64_inc(&zram->stats.failed_reads);
878 		else
879 			atomic64_inc(&zram->stats.failed_writes);
880 	}
881 
882 	return ret;
883 }
884 
885 static void __zram_make_request(struct zram *zram, struct bio *bio)
886 {
887 	int offset;
888 	u32 index;
889 	struct bio_vec bvec;
890 	struct bvec_iter iter;
891 
892 	index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
893 	offset = (bio->bi_iter.bi_sector &
894 		  (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
895 
896 	if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
897 		zram_bio_discard(zram, index, offset, bio);
898 		bio_endio(bio);
899 		return;
900 	}
901 
902 	bio_for_each_segment(bvec, bio, iter) {
903 		int max_transfer_size = PAGE_SIZE - offset;
904 
905 		if (bvec.bv_len > max_transfer_size) {
906 			/*
907 			 * zram_bvec_rw() can only make operation on a single
908 			 * zram page. Split the bio vector.
909 			 */
910 			struct bio_vec bv;
911 
912 			bv.bv_page = bvec.bv_page;
913 			bv.bv_len = max_transfer_size;
914 			bv.bv_offset = bvec.bv_offset;
915 
916 			if (zram_bvec_rw(zram, &bv, index, offset,
917 					 op_is_write(bio_op(bio))) < 0)
918 				goto out;
919 
920 			bv.bv_len = bvec.bv_len - max_transfer_size;
921 			bv.bv_offset += max_transfer_size;
922 			if (zram_bvec_rw(zram, &bv, index + 1, 0,
923 					 op_is_write(bio_op(bio))) < 0)
924 				goto out;
925 		} else
926 			if (zram_bvec_rw(zram, &bvec, index, offset,
927 					 op_is_write(bio_op(bio))) < 0)
928 				goto out;
929 
930 		update_position(&index, &offset, &bvec);
931 	}
932 
933 	bio_endio(bio);
934 	return;
935 
936 out:
937 	bio_io_error(bio);
938 }
939 
940 /*
941  * Handler function for all zram I/O requests.
942  */
943 static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
944 {
945 	struct zram *zram = queue->queuedata;
946 
947 	if (unlikely(!zram_meta_get(zram)))
948 		goto error;
949 
950 	blk_queue_split(queue, &bio, queue->bio_split);
951 
952 	if (!valid_io_request(zram, bio->bi_iter.bi_sector,
953 					bio->bi_iter.bi_size)) {
954 		atomic64_inc(&zram->stats.invalid_io);
955 		goto put_zram;
956 	}
957 
958 	__zram_make_request(zram, bio);
959 	zram_meta_put(zram);
960 	return BLK_QC_T_NONE;
961 put_zram:
962 	zram_meta_put(zram);
963 error:
964 	bio_io_error(bio);
965 	return BLK_QC_T_NONE;
966 }
967 
968 static void zram_slot_free_notify(struct block_device *bdev,
969 				unsigned long index)
970 {
971 	struct zram *zram;
972 	struct zram_meta *meta;
973 
974 	zram = bdev->bd_disk->private_data;
975 	meta = zram->meta;
976 
977 	bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
978 	zram_free_page(zram, index);
979 	bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
980 	atomic64_inc(&zram->stats.notify_free);
981 }
982 
983 static int zram_rw_page(struct block_device *bdev, sector_t sector,
984 		       struct page *page, bool is_write)
985 {
986 	int offset, err = -EIO;
987 	u32 index;
988 	struct zram *zram;
989 	struct bio_vec bv;
990 
991 	zram = bdev->bd_disk->private_data;
992 	if (unlikely(!zram_meta_get(zram)))
993 		goto out;
994 
995 	if (!valid_io_request(zram, sector, PAGE_SIZE)) {
996 		atomic64_inc(&zram->stats.invalid_io);
997 		err = -EINVAL;
998 		goto put_zram;
999 	}
1000 
1001 	index = sector >> SECTORS_PER_PAGE_SHIFT;
1002 	offset = sector & (SECTORS_PER_PAGE - 1) << SECTOR_SHIFT;
1003 
1004 	bv.bv_page = page;
1005 	bv.bv_len = PAGE_SIZE;
1006 	bv.bv_offset = 0;
1007 
1008 	err = zram_bvec_rw(zram, &bv, index, offset, is_write);
1009 put_zram:
1010 	zram_meta_put(zram);
1011 out:
1012 	/*
1013 	 * If I/O fails, just return error(ie, non-zero) without
1014 	 * calling page_endio.
1015 	 * It causes resubmit the I/O with bio request by upper functions
1016 	 * of rw_page(e.g., swap_readpage, __swap_writepage) and
1017 	 * bio->bi_end_io does things to handle the error
1018 	 * (e.g., SetPageError, set_page_dirty and extra works).
1019 	 */
1020 	if (err == 0)
1021 		page_endio(page, is_write, 0);
1022 	return err;
1023 }
1024 
1025 static void zram_reset_device(struct zram *zram)
1026 {
1027 	struct zram_meta *meta;
1028 	struct zcomp *comp;
1029 	u64 disksize;
1030 
1031 	down_write(&zram->init_lock);
1032 
1033 	zram->limit_pages = 0;
1034 
1035 	if (!init_done(zram)) {
1036 		up_write(&zram->init_lock);
1037 		return;
1038 	}
1039 
1040 	meta = zram->meta;
1041 	comp = zram->comp;
1042 	disksize = zram->disksize;
1043 	/*
1044 	 * Refcount will go down to 0 eventually and r/w handler
1045 	 * cannot handle further I/O so it will bail out by
1046 	 * check zram_meta_get.
1047 	 */
1048 	zram_meta_put(zram);
1049 	/*
1050 	 * We want to free zram_meta in process context to avoid
1051 	 * deadlock between reclaim path and any other locks.
1052 	 */
1053 	wait_event(zram->io_done, atomic_read(&zram->refcount) == 0);
1054 
1055 	/* Reset stats */
1056 	memset(&zram->stats, 0, sizeof(zram->stats));
1057 	zram->disksize = 0;
1058 
1059 	set_capacity(zram->disk, 0);
1060 	part_stat_set_all(&zram->disk->part0, 0);
1061 
1062 	up_write(&zram->init_lock);
1063 	/* I/O operation under all of CPU are done so let's free */
1064 	zram_meta_free(meta, disksize);
1065 	zcomp_destroy(comp);
1066 }
1067 
1068 static ssize_t disksize_store(struct device *dev,
1069 		struct device_attribute *attr, const char *buf, size_t len)
1070 {
1071 	u64 disksize;
1072 	struct zcomp *comp;
1073 	struct zram_meta *meta;
1074 	struct zram *zram = dev_to_zram(dev);
1075 	int err;
1076 
1077 	disksize = memparse(buf, NULL);
1078 	if (!disksize)
1079 		return -EINVAL;
1080 
1081 	disksize = PAGE_ALIGN(disksize);
1082 	meta = zram_meta_alloc(zram->disk->disk_name, disksize);
1083 	if (!meta)
1084 		return -ENOMEM;
1085 
1086 	comp = zcomp_create(zram->compressor);
1087 	if (IS_ERR(comp)) {
1088 		pr_err("Cannot initialise %s compressing backend\n",
1089 				zram->compressor);
1090 		err = PTR_ERR(comp);
1091 		goto out_free_meta;
1092 	}
1093 
1094 	down_write(&zram->init_lock);
1095 	if (init_done(zram)) {
1096 		pr_info("Cannot change disksize for initialized device\n");
1097 		err = -EBUSY;
1098 		goto out_destroy_comp;
1099 	}
1100 
1101 	init_waitqueue_head(&zram->io_done);
1102 	atomic_set(&zram->refcount, 1);
1103 	zram->meta = meta;
1104 	zram->comp = comp;
1105 	zram->disksize = disksize;
1106 	set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
1107 	zram_revalidate_disk(zram);
1108 	up_write(&zram->init_lock);
1109 
1110 	return len;
1111 
1112 out_destroy_comp:
1113 	up_write(&zram->init_lock);
1114 	zcomp_destroy(comp);
1115 out_free_meta:
1116 	zram_meta_free(meta, disksize);
1117 	return err;
1118 }
1119 
1120 static ssize_t reset_store(struct device *dev,
1121 		struct device_attribute *attr, const char *buf, size_t len)
1122 {
1123 	int ret;
1124 	unsigned short do_reset;
1125 	struct zram *zram;
1126 	struct block_device *bdev;
1127 
1128 	ret = kstrtou16(buf, 10, &do_reset);
1129 	if (ret)
1130 		return ret;
1131 
1132 	if (!do_reset)
1133 		return -EINVAL;
1134 
1135 	zram = dev_to_zram(dev);
1136 	bdev = bdget_disk(zram->disk, 0);
1137 	if (!bdev)
1138 		return -ENOMEM;
1139 
1140 	mutex_lock(&bdev->bd_mutex);
1141 	/* Do not reset an active device or claimed device */
1142 	if (bdev->bd_openers || zram->claim) {
1143 		mutex_unlock(&bdev->bd_mutex);
1144 		bdput(bdev);
1145 		return -EBUSY;
1146 	}
1147 
1148 	/* From now on, anyone can't open /dev/zram[0-9] */
1149 	zram->claim = true;
1150 	mutex_unlock(&bdev->bd_mutex);
1151 
1152 	/* Make sure all the pending I/O are finished */
1153 	fsync_bdev(bdev);
1154 	zram_reset_device(zram);
1155 	zram_revalidate_disk(zram);
1156 	bdput(bdev);
1157 
1158 	mutex_lock(&bdev->bd_mutex);
1159 	zram->claim = false;
1160 	mutex_unlock(&bdev->bd_mutex);
1161 
1162 	return len;
1163 }
1164 
1165 static int zram_open(struct block_device *bdev, fmode_t mode)
1166 {
1167 	int ret = 0;
1168 	struct zram *zram;
1169 
1170 	WARN_ON(!mutex_is_locked(&bdev->bd_mutex));
1171 
1172 	zram = bdev->bd_disk->private_data;
1173 	/* zram was claimed to reset so open request fails */
1174 	if (zram->claim)
1175 		ret = -EBUSY;
1176 
1177 	return ret;
1178 }
1179 
1180 static const struct block_device_operations zram_devops = {
1181 	.open = zram_open,
1182 	.swap_slot_free_notify = zram_slot_free_notify,
1183 	.rw_page = zram_rw_page,
1184 	.owner = THIS_MODULE
1185 };
1186 
1187 static DEVICE_ATTR_WO(compact);
1188 static DEVICE_ATTR_RW(disksize);
1189 static DEVICE_ATTR_RO(initstate);
1190 static DEVICE_ATTR_WO(reset);
1191 static DEVICE_ATTR_RO(orig_data_size);
1192 static DEVICE_ATTR_RO(mem_used_total);
1193 static DEVICE_ATTR_RW(mem_limit);
1194 static DEVICE_ATTR_RW(mem_used_max);
1195 static DEVICE_ATTR_RW(max_comp_streams);
1196 static DEVICE_ATTR_RW(comp_algorithm);
1197 
1198 static struct attribute *zram_disk_attrs[] = {
1199 	&dev_attr_disksize.attr,
1200 	&dev_attr_initstate.attr,
1201 	&dev_attr_reset.attr,
1202 	&dev_attr_num_reads.attr,
1203 	&dev_attr_num_writes.attr,
1204 	&dev_attr_failed_reads.attr,
1205 	&dev_attr_failed_writes.attr,
1206 	&dev_attr_compact.attr,
1207 	&dev_attr_invalid_io.attr,
1208 	&dev_attr_notify_free.attr,
1209 	&dev_attr_zero_pages.attr,
1210 	&dev_attr_orig_data_size.attr,
1211 	&dev_attr_compr_data_size.attr,
1212 	&dev_attr_mem_used_total.attr,
1213 	&dev_attr_mem_limit.attr,
1214 	&dev_attr_mem_used_max.attr,
1215 	&dev_attr_max_comp_streams.attr,
1216 	&dev_attr_comp_algorithm.attr,
1217 	&dev_attr_io_stat.attr,
1218 	&dev_attr_mm_stat.attr,
1219 	&dev_attr_debug_stat.attr,
1220 	NULL,
1221 };
1222 
1223 static struct attribute_group zram_disk_attr_group = {
1224 	.attrs = zram_disk_attrs,
1225 };
1226 
1227 /*
1228  * Allocate and initialize new zram device. the function returns
1229  * '>= 0' device_id upon success, and negative value otherwise.
1230  */
1231 static int zram_add(void)
1232 {
1233 	struct zram *zram;
1234 	struct request_queue *queue;
1235 	int ret, device_id;
1236 
1237 	zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
1238 	if (!zram)
1239 		return -ENOMEM;
1240 
1241 	ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
1242 	if (ret < 0)
1243 		goto out_free_dev;
1244 	device_id = ret;
1245 
1246 	init_rwsem(&zram->init_lock);
1247 
1248 	queue = blk_alloc_queue(GFP_KERNEL);
1249 	if (!queue) {
1250 		pr_err("Error allocating disk queue for device %d\n",
1251 			device_id);
1252 		ret = -ENOMEM;
1253 		goto out_free_idr;
1254 	}
1255 
1256 	blk_queue_make_request(queue, zram_make_request);
1257 
1258 	/* gendisk structure */
1259 	zram->disk = alloc_disk(1);
1260 	if (!zram->disk) {
1261 		pr_err("Error allocating disk structure for device %d\n",
1262 			device_id);
1263 		ret = -ENOMEM;
1264 		goto out_free_queue;
1265 	}
1266 
1267 	zram->disk->major = zram_major;
1268 	zram->disk->first_minor = device_id;
1269 	zram->disk->fops = &zram_devops;
1270 	zram->disk->queue = queue;
1271 	zram->disk->queue->queuedata = zram;
1272 	zram->disk->private_data = zram;
1273 	snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
1274 
1275 	/* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
1276 	set_capacity(zram->disk, 0);
1277 	/* zram devices sort of resembles non-rotational disks */
1278 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue);
1279 	queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
1280 	/*
1281 	 * To ensure that we always get PAGE_SIZE aligned
1282 	 * and n*PAGE_SIZED sized I/O requests.
1283 	 */
1284 	blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
1285 	blk_queue_logical_block_size(zram->disk->queue,
1286 					ZRAM_LOGICAL_BLOCK_SIZE);
1287 	blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
1288 	blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
1289 	zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
1290 	blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
1291 	/*
1292 	 * zram_bio_discard() will clear all logical blocks if logical block
1293 	 * size is identical with physical block size(PAGE_SIZE). But if it is
1294 	 * different, we will skip discarding some parts of logical blocks in
1295 	 * the part of the request range which isn't aligned to physical block
1296 	 * size.  So we can't ensure that all discarded logical blocks are
1297 	 * zeroed.
1298 	 */
1299 	if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
1300 		zram->disk->queue->limits.discard_zeroes_data = 1;
1301 	else
1302 		zram->disk->queue->limits.discard_zeroes_data = 0;
1303 	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);
1304 
1305 	add_disk(zram->disk);
1306 
1307 	ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj,
1308 				&zram_disk_attr_group);
1309 	if (ret < 0) {
1310 		pr_err("Error creating sysfs group for device %d\n",
1311 				device_id);
1312 		goto out_free_disk;
1313 	}
1314 	strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
1315 	zram->meta = NULL;
1316 
1317 	pr_info("Added device: %s\n", zram->disk->disk_name);
1318 	return device_id;
1319 
1320 out_free_disk:
1321 	del_gendisk(zram->disk);
1322 	put_disk(zram->disk);
1323 out_free_queue:
1324 	blk_cleanup_queue(queue);
1325 out_free_idr:
1326 	idr_remove(&zram_index_idr, device_id);
1327 out_free_dev:
1328 	kfree(zram);
1329 	return ret;
1330 }
1331 
1332 static int zram_remove(struct zram *zram)
1333 {
1334 	struct block_device *bdev;
1335 
1336 	bdev = bdget_disk(zram->disk, 0);
1337 	if (!bdev)
1338 		return -ENOMEM;
1339 
1340 	mutex_lock(&bdev->bd_mutex);
1341 	if (bdev->bd_openers || zram->claim) {
1342 		mutex_unlock(&bdev->bd_mutex);
1343 		bdput(bdev);
1344 		return -EBUSY;
1345 	}
1346 
1347 	zram->claim = true;
1348 	mutex_unlock(&bdev->bd_mutex);
1349 
1350 	/*
1351 	 * Remove sysfs first, so no one will perform a disksize
1352 	 * store while we destroy the devices. This also helps during
1353 	 * hot_remove -- zram_reset_device() is the last holder of
1354 	 * ->init_lock, no later/concurrent disksize_store() or any
1355 	 * other sysfs handlers are possible.
1356 	 */
1357 	sysfs_remove_group(&disk_to_dev(zram->disk)->kobj,
1358 			&zram_disk_attr_group);
1359 
1360 	/* Make sure all the pending I/O are finished */
1361 	fsync_bdev(bdev);
1362 	zram_reset_device(zram);
1363 	bdput(bdev);
1364 
1365 	pr_info("Removed device: %s\n", zram->disk->disk_name);
1366 
1367 	blk_cleanup_queue(zram->disk->queue);
1368 	del_gendisk(zram->disk);
1369 	put_disk(zram->disk);
1370 	kfree(zram);
1371 	return 0;
1372 }
1373 
1374 /* zram-control sysfs attributes */
1375 static ssize_t hot_add_show(struct class *class,
1376 			struct class_attribute *attr,
1377 			char *buf)
1378 {
1379 	int ret;
1380 
1381 	mutex_lock(&zram_index_mutex);
1382 	ret = zram_add();
1383 	mutex_unlock(&zram_index_mutex);
1384 
1385 	if (ret < 0)
1386 		return ret;
1387 	return scnprintf(buf, PAGE_SIZE, "%d\n", ret);
1388 }
1389 
1390 static ssize_t hot_remove_store(struct class *class,
1391 			struct class_attribute *attr,
1392 			const char *buf,
1393 			size_t count)
1394 {
1395 	struct zram *zram;
1396 	int ret, dev_id;
1397 
1398 	/* dev_id is gendisk->first_minor, which is `int' */
1399 	ret = kstrtoint(buf, 10, &dev_id);
1400 	if (ret)
1401 		return ret;
1402 	if (dev_id < 0)
1403 		return -EINVAL;
1404 
1405 	mutex_lock(&zram_index_mutex);
1406 
1407 	zram = idr_find(&zram_index_idr, dev_id);
1408 	if (zram) {
1409 		ret = zram_remove(zram);
1410 		if (!ret)
1411 			idr_remove(&zram_index_idr, dev_id);
1412 	} else {
1413 		ret = -ENODEV;
1414 	}
1415 
1416 	mutex_unlock(&zram_index_mutex);
1417 	return ret ? ret : count;
1418 }
1419 
1420 /*
1421  * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
1422  * sense that reading from this file does alter the state of your system -- it
1423  * creates a new un-initialized zram device and returns back this device's
1424  * device_id (or an error code if it fails to create a new device).
1425  */
1426 static struct class_attribute zram_control_class_attrs[] = {
1427 	__ATTR(hot_add, 0400, hot_add_show, NULL),
1428 	__ATTR_WO(hot_remove),
1429 	__ATTR_NULL,
1430 };
1431 
1432 static struct class zram_control_class = {
1433 	.name		= "zram-control",
1434 	.owner		= THIS_MODULE,
1435 	.class_attrs	= zram_control_class_attrs,
1436 };
1437 
1438 static int zram_remove_cb(int id, void *ptr, void *data)
1439 {
1440 	zram_remove(ptr);
1441 	return 0;
1442 }
1443 
1444 static void destroy_devices(void)
1445 {
1446 	class_unregister(&zram_control_class);
1447 	idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
1448 	idr_destroy(&zram_index_idr);
1449 	unregister_blkdev(zram_major, "zram");
1450 	cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
1451 }
1452 
1453 static int __init zram_init(void)
1454 {
1455 	int ret;
1456 
1457 	ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
1458 				      zcomp_cpu_up_prepare, zcomp_cpu_dead);
1459 	if (ret < 0)
1460 		return ret;
1461 
1462 	ret = class_register(&zram_control_class);
1463 	if (ret) {
1464 		pr_err("Unable to register zram-control class\n");
1465 		cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
1466 		return ret;
1467 	}
1468 
1469 	zram_major = register_blkdev(0, "zram");
1470 	if (zram_major <= 0) {
1471 		pr_err("Unable to get major number\n");
1472 		class_unregister(&zram_control_class);
1473 		cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
1474 		return -EBUSY;
1475 	}
1476 
1477 	while (num_devices != 0) {
1478 		mutex_lock(&zram_index_mutex);
1479 		ret = zram_add();
1480 		mutex_unlock(&zram_index_mutex);
1481 		if (ret < 0)
1482 			goto out_error;
1483 		num_devices--;
1484 	}
1485 
1486 	return 0;
1487 
1488 out_error:
1489 	destroy_devices();
1490 	return ret;
1491 }
1492 
1493 static void __exit zram_exit(void)
1494 {
1495 	destroy_devices();
1496 }
1497 
1498 module_init(zram_init);
1499 module_exit(zram_exit);
1500 
1501 module_param(num_devices, uint, 0);
1502 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices");
1503 
1504 MODULE_LICENSE("Dual BSD/GPL");
1505 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
1506 MODULE_DESCRIPTION("Compressed RAM Block Device");
1507