xref: /openbmc/linux/drivers/block/zram/zram_drv.c (revision bc5aa3a0)
1 /*
2  * Compressed RAM block device
3  *
4  * Copyright (C) 2008, 2009, 2010  Nitin Gupta
5  *               2012, 2013 Minchan Kim
6  *
7  * This code is released using a dual license strategy: BSD/GPL
8  * You can choose the licence that better fits your requirements.
9  *
10  * Released under the terms of 3-clause BSD License
11  * Released under the terms of GNU General Public License Version 2.0
12  *
13  */
14 
15 #define KMSG_COMPONENT "zram"
16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17 
18 #include <linux/module.h>
19 #include <linux/kernel.h>
20 #include <linux/bio.h>
21 #include <linux/bitops.h>
22 #include <linux/blkdev.h>
23 #include <linux/buffer_head.h>
24 #include <linux/device.h>
25 #include <linux/genhd.h>
26 #include <linux/highmem.h>
27 #include <linux/slab.h>
28 #include <linux/string.h>
29 #include <linux/vmalloc.h>
30 #include <linux/err.h>
31 #include <linux/idr.h>
32 #include <linux/sysfs.h>
33 
34 #include "zram_drv.h"
35 
36 static DEFINE_IDR(zram_index_idr);
37 /* idr index must be protected */
38 static DEFINE_MUTEX(zram_index_mutex);
39 
40 static int zram_major;
41 static const char *default_compressor = "lzo";
42 
43 /* Module params (documentation at end) */
44 static unsigned int num_devices = 1;
45 
46 static inline void deprecated_attr_warn(const char *name)
47 {
48 	pr_warn_once("%d (%s) Attribute %s (and others) will be removed. %s\n",
49 			task_pid_nr(current),
50 			current->comm,
51 			name,
52 			"See zram documentation.");
53 }
54 
55 #define ZRAM_ATTR_RO(name)						\
56 static ssize_t name##_show(struct device *d,				\
57 				struct device_attribute *attr, char *b)	\
58 {									\
59 	struct zram *zram = dev_to_zram(d);				\
60 									\
61 	deprecated_attr_warn(__stringify(name));			\
62 	return scnprintf(b, PAGE_SIZE, "%llu\n",			\
63 		(u64)atomic64_read(&zram->stats.name));			\
64 }									\
65 static DEVICE_ATTR_RO(name);
66 
67 static inline bool init_done(struct zram *zram)
68 {
69 	return zram->disksize;
70 }
71 
72 static inline struct zram *dev_to_zram(struct device *dev)
73 {
74 	return (struct zram *)dev_to_disk(dev)->private_data;
75 }
76 
77 /* flag operations require table entry bit_spin_lock() being held */
78 static int zram_test_flag(struct zram_meta *meta, u32 index,
79 			enum zram_pageflags flag)
80 {
81 	return meta->table[index].value & BIT(flag);
82 }
83 
84 static void zram_set_flag(struct zram_meta *meta, u32 index,
85 			enum zram_pageflags flag)
86 {
87 	meta->table[index].value |= BIT(flag);
88 }
89 
90 static void zram_clear_flag(struct zram_meta *meta, u32 index,
91 			enum zram_pageflags flag)
92 {
93 	meta->table[index].value &= ~BIT(flag);
94 }
95 
96 static size_t zram_get_obj_size(struct zram_meta *meta, u32 index)
97 {
98 	return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
99 }
100 
101 static void zram_set_obj_size(struct zram_meta *meta,
102 					u32 index, size_t size)
103 {
104 	unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT;
105 
106 	meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
107 }
108 
109 static inline bool is_partial_io(struct bio_vec *bvec)
110 {
111 	return bvec->bv_len != PAGE_SIZE;
112 }
113 
114 /*
115  * Check if request is within bounds and aligned on zram logical blocks.
116  */
117 static inline bool valid_io_request(struct zram *zram,
118 		sector_t start, unsigned int size)
119 {
120 	u64 end, bound;
121 
122 	/* unaligned request */
123 	if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
124 		return false;
125 	if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
126 		return false;
127 
128 	end = start + (size >> SECTOR_SHIFT);
129 	bound = zram->disksize >> SECTOR_SHIFT;
130 	/* out of range range */
131 	if (unlikely(start >= bound || end > bound || start > end))
132 		return false;
133 
134 	/* I/O request is valid */
135 	return true;
136 }
137 
138 static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
139 {
140 	if (*offset + bvec->bv_len >= PAGE_SIZE)
141 		(*index)++;
142 	*offset = (*offset + bvec->bv_len) % PAGE_SIZE;
143 }
144 
145 static inline void update_used_max(struct zram *zram,
146 					const unsigned long pages)
147 {
148 	unsigned long old_max, cur_max;
149 
150 	old_max = atomic_long_read(&zram->stats.max_used_pages);
151 
152 	do {
153 		cur_max = old_max;
154 		if (pages > cur_max)
155 			old_max = atomic_long_cmpxchg(
156 				&zram->stats.max_used_pages, cur_max, pages);
157 	} while (old_max != cur_max);
158 }
159 
160 static bool page_zero_filled(void *ptr)
161 {
162 	unsigned int pos;
163 	unsigned long *page;
164 
165 	page = (unsigned long *)ptr;
166 
167 	for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
168 		if (page[pos])
169 			return false;
170 	}
171 
172 	return true;
173 }
174 
175 static void handle_zero_page(struct bio_vec *bvec)
176 {
177 	struct page *page = bvec->bv_page;
178 	void *user_mem;
179 
180 	user_mem = kmap_atomic(page);
181 	if (is_partial_io(bvec))
182 		memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
183 	else
184 		clear_page(user_mem);
185 	kunmap_atomic(user_mem);
186 
187 	flush_dcache_page(page);
188 }
189 
190 static ssize_t initstate_show(struct device *dev,
191 		struct device_attribute *attr, char *buf)
192 {
193 	u32 val;
194 	struct zram *zram = dev_to_zram(dev);
195 
196 	down_read(&zram->init_lock);
197 	val = init_done(zram);
198 	up_read(&zram->init_lock);
199 
200 	return scnprintf(buf, PAGE_SIZE, "%u\n", val);
201 }
202 
203 static ssize_t disksize_show(struct device *dev,
204 		struct device_attribute *attr, char *buf)
205 {
206 	struct zram *zram = dev_to_zram(dev);
207 
208 	return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
209 }
210 
211 static ssize_t orig_data_size_show(struct device *dev,
212 		struct device_attribute *attr, char *buf)
213 {
214 	struct zram *zram = dev_to_zram(dev);
215 
216 	deprecated_attr_warn("orig_data_size");
217 	return scnprintf(buf, PAGE_SIZE, "%llu\n",
218 		(u64)(atomic64_read(&zram->stats.pages_stored)) << PAGE_SHIFT);
219 }
220 
221 static ssize_t mem_used_total_show(struct device *dev,
222 		struct device_attribute *attr, char *buf)
223 {
224 	u64 val = 0;
225 	struct zram *zram = dev_to_zram(dev);
226 
227 	deprecated_attr_warn("mem_used_total");
228 	down_read(&zram->init_lock);
229 	if (init_done(zram)) {
230 		struct zram_meta *meta = zram->meta;
231 		val = zs_get_total_pages(meta->mem_pool);
232 	}
233 	up_read(&zram->init_lock);
234 
235 	return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
236 }
237 
238 static ssize_t mem_limit_show(struct device *dev,
239 		struct device_attribute *attr, char *buf)
240 {
241 	u64 val;
242 	struct zram *zram = dev_to_zram(dev);
243 
244 	deprecated_attr_warn("mem_limit");
245 	down_read(&zram->init_lock);
246 	val = zram->limit_pages;
247 	up_read(&zram->init_lock);
248 
249 	return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
250 }
251 
252 static ssize_t mem_limit_store(struct device *dev,
253 		struct device_attribute *attr, const char *buf, size_t len)
254 {
255 	u64 limit;
256 	char *tmp;
257 	struct zram *zram = dev_to_zram(dev);
258 
259 	limit = memparse(buf, &tmp);
260 	if (buf == tmp) /* no chars parsed, invalid input */
261 		return -EINVAL;
262 
263 	down_write(&zram->init_lock);
264 	zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
265 	up_write(&zram->init_lock);
266 
267 	return len;
268 }
269 
270 static ssize_t mem_used_max_show(struct device *dev,
271 		struct device_attribute *attr, char *buf)
272 {
273 	u64 val = 0;
274 	struct zram *zram = dev_to_zram(dev);
275 
276 	deprecated_attr_warn("mem_used_max");
277 	down_read(&zram->init_lock);
278 	if (init_done(zram))
279 		val = atomic_long_read(&zram->stats.max_used_pages);
280 	up_read(&zram->init_lock);
281 
282 	return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
283 }
284 
285 static ssize_t mem_used_max_store(struct device *dev,
286 		struct device_attribute *attr, const char *buf, size_t len)
287 {
288 	int err;
289 	unsigned long val;
290 	struct zram *zram = dev_to_zram(dev);
291 
292 	err = kstrtoul(buf, 10, &val);
293 	if (err || val != 0)
294 		return -EINVAL;
295 
296 	down_read(&zram->init_lock);
297 	if (init_done(zram)) {
298 		struct zram_meta *meta = zram->meta;
299 		atomic_long_set(&zram->stats.max_used_pages,
300 				zs_get_total_pages(meta->mem_pool));
301 	}
302 	up_read(&zram->init_lock);
303 
304 	return len;
305 }
306 
307 /*
308  * We switched to per-cpu streams and this attr is not needed anymore.
309  * However, we will keep it around for some time, because:
310  * a) we may revert per-cpu streams in the future
311  * b) it's visible to user space and we need to follow our 2 years
312  *    retirement rule; but we already have a number of 'soon to be
313  *    altered' attrs, so max_comp_streams need to wait for the next
314  *    layoff cycle.
315  */
316 static ssize_t max_comp_streams_show(struct device *dev,
317 		struct device_attribute *attr, char *buf)
318 {
319 	return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus());
320 }
321 
322 static ssize_t max_comp_streams_store(struct device *dev,
323 		struct device_attribute *attr, const char *buf, size_t len)
324 {
325 	return len;
326 }
327 
328 static ssize_t comp_algorithm_show(struct device *dev,
329 		struct device_attribute *attr, char *buf)
330 {
331 	size_t sz;
332 	struct zram *zram = dev_to_zram(dev);
333 
334 	down_read(&zram->init_lock);
335 	sz = zcomp_available_show(zram->compressor, buf);
336 	up_read(&zram->init_lock);
337 
338 	return sz;
339 }
340 
341 static ssize_t comp_algorithm_store(struct device *dev,
342 		struct device_attribute *attr, const char *buf, size_t len)
343 {
344 	struct zram *zram = dev_to_zram(dev);
345 	char compressor[CRYPTO_MAX_ALG_NAME];
346 	size_t sz;
347 
348 	strlcpy(compressor, buf, sizeof(compressor));
349 	/* ignore trailing newline */
350 	sz = strlen(compressor);
351 	if (sz > 0 && compressor[sz - 1] == '\n')
352 		compressor[sz - 1] = 0x00;
353 
354 	if (!zcomp_available_algorithm(compressor))
355 		return -EINVAL;
356 
357 	down_write(&zram->init_lock);
358 	if (init_done(zram)) {
359 		up_write(&zram->init_lock);
360 		pr_info("Can't change algorithm for initialized device\n");
361 		return -EBUSY;
362 	}
363 
364 	strlcpy(zram->compressor, compressor, sizeof(compressor));
365 	up_write(&zram->init_lock);
366 	return len;
367 }
368 
369 static ssize_t compact_store(struct device *dev,
370 		struct device_attribute *attr, const char *buf, size_t len)
371 {
372 	struct zram *zram = dev_to_zram(dev);
373 	struct zram_meta *meta;
374 
375 	down_read(&zram->init_lock);
376 	if (!init_done(zram)) {
377 		up_read(&zram->init_lock);
378 		return -EINVAL;
379 	}
380 
381 	meta = zram->meta;
382 	zs_compact(meta->mem_pool);
383 	up_read(&zram->init_lock);
384 
385 	return len;
386 }
387 
388 static ssize_t io_stat_show(struct device *dev,
389 		struct device_attribute *attr, char *buf)
390 {
391 	struct zram *zram = dev_to_zram(dev);
392 	ssize_t ret;
393 
394 	down_read(&zram->init_lock);
395 	ret = scnprintf(buf, PAGE_SIZE,
396 			"%8llu %8llu %8llu %8llu\n",
397 			(u64)atomic64_read(&zram->stats.failed_reads),
398 			(u64)atomic64_read(&zram->stats.failed_writes),
399 			(u64)atomic64_read(&zram->stats.invalid_io),
400 			(u64)atomic64_read(&zram->stats.notify_free));
401 	up_read(&zram->init_lock);
402 
403 	return ret;
404 }
405 
406 static ssize_t mm_stat_show(struct device *dev,
407 		struct device_attribute *attr, char *buf)
408 {
409 	struct zram *zram = dev_to_zram(dev);
410 	struct zs_pool_stats pool_stats;
411 	u64 orig_size, mem_used = 0;
412 	long max_used;
413 	ssize_t ret;
414 
415 	memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
416 
417 	down_read(&zram->init_lock);
418 	if (init_done(zram)) {
419 		mem_used = zs_get_total_pages(zram->meta->mem_pool);
420 		zs_pool_stats(zram->meta->mem_pool, &pool_stats);
421 	}
422 
423 	orig_size = atomic64_read(&zram->stats.pages_stored);
424 	max_used = atomic_long_read(&zram->stats.max_used_pages);
425 
426 	ret = scnprintf(buf, PAGE_SIZE,
427 			"%8llu %8llu %8llu %8lu %8ld %8llu %8lu\n",
428 			orig_size << PAGE_SHIFT,
429 			(u64)atomic64_read(&zram->stats.compr_data_size),
430 			mem_used << PAGE_SHIFT,
431 			zram->limit_pages << PAGE_SHIFT,
432 			max_used << PAGE_SHIFT,
433 			(u64)atomic64_read(&zram->stats.zero_pages),
434 			pool_stats.pages_compacted);
435 	up_read(&zram->init_lock);
436 
437 	return ret;
438 }
439 
440 static ssize_t debug_stat_show(struct device *dev,
441 		struct device_attribute *attr, char *buf)
442 {
443 	int version = 1;
444 	struct zram *zram = dev_to_zram(dev);
445 	ssize_t ret;
446 
447 	down_read(&zram->init_lock);
448 	ret = scnprintf(buf, PAGE_SIZE,
449 			"version: %d\n%8llu\n",
450 			version,
451 			(u64)atomic64_read(&zram->stats.writestall));
452 	up_read(&zram->init_lock);
453 
454 	return ret;
455 }
456 
457 static DEVICE_ATTR_RO(io_stat);
458 static DEVICE_ATTR_RO(mm_stat);
459 static DEVICE_ATTR_RO(debug_stat);
460 ZRAM_ATTR_RO(num_reads);
461 ZRAM_ATTR_RO(num_writes);
462 ZRAM_ATTR_RO(failed_reads);
463 ZRAM_ATTR_RO(failed_writes);
464 ZRAM_ATTR_RO(invalid_io);
465 ZRAM_ATTR_RO(notify_free);
466 ZRAM_ATTR_RO(zero_pages);
467 ZRAM_ATTR_RO(compr_data_size);
468 
469 static inline bool zram_meta_get(struct zram *zram)
470 {
471 	if (atomic_inc_not_zero(&zram->refcount))
472 		return true;
473 	return false;
474 }
475 
476 static inline void zram_meta_put(struct zram *zram)
477 {
478 	atomic_dec(&zram->refcount);
479 }
480 
481 static void zram_meta_free(struct zram_meta *meta, u64 disksize)
482 {
483 	size_t num_pages = disksize >> PAGE_SHIFT;
484 	size_t index;
485 
486 	/* Free all pages that are still in this zram device */
487 	for (index = 0; index < num_pages; index++) {
488 		unsigned long handle = meta->table[index].handle;
489 
490 		if (!handle)
491 			continue;
492 
493 		zs_free(meta->mem_pool, handle);
494 	}
495 
496 	zs_destroy_pool(meta->mem_pool);
497 	vfree(meta->table);
498 	kfree(meta);
499 }
500 
501 static struct zram_meta *zram_meta_alloc(char *pool_name, u64 disksize)
502 {
503 	size_t num_pages;
504 	struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL);
505 
506 	if (!meta)
507 		return NULL;
508 
509 	num_pages = disksize >> PAGE_SHIFT;
510 	meta->table = vzalloc(num_pages * sizeof(*meta->table));
511 	if (!meta->table) {
512 		pr_err("Error allocating zram address table\n");
513 		goto out_error;
514 	}
515 
516 	meta->mem_pool = zs_create_pool(pool_name);
517 	if (!meta->mem_pool) {
518 		pr_err("Error creating memory pool\n");
519 		goto out_error;
520 	}
521 
522 	return meta;
523 
524 out_error:
525 	vfree(meta->table);
526 	kfree(meta);
527 	return NULL;
528 }
529 
530 /*
531  * To protect concurrent access to the same index entry,
532  * caller should hold this table index entry's bit_spinlock to
533  * indicate this index entry is accessing.
534  */
535 static void zram_free_page(struct zram *zram, size_t index)
536 {
537 	struct zram_meta *meta = zram->meta;
538 	unsigned long handle = meta->table[index].handle;
539 
540 	if (unlikely(!handle)) {
541 		/*
542 		 * No memory is allocated for zero filled pages.
543 		 * Simply clear zero page flag.
544 		 */
545 		if (zram_test_flag(meta, index, ZRAM_ZERO)) {
546 			zram_clear_flag(meta, index, ZRAM_ZERO);
547 			atomic64_dec(&zram->stats.zero_pages);
548 		}
549 		return;
550 	}
551 
552 	zs_free(meta->mem_pool, handle);
553 
554 	atomic64_sub(zram_get_obj_size(meta, index),
555 			&zram->stats.compr_data_size);
556 	atomic64_dec(&zram->stats.pages_stored);
557 
558 	meta->table[index].handle = 0;
559 	zram_set_obj_size(meta, index, 0);
560 }
561 
562 static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
563 {
564 	int ret = 0;
565 	unsigned char *cmem;
566 	struct zram_meta *meta = zram->meta;
567 	unsigned long handle;
568 	unsigned int size;
569 
570 	bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
571 	handle = meta->table[index].handle;
572 	size = zram_get_obj_size(meta, index);
573 
574 	if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
575 		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
576 		clear_page(mem);
577 		return 0;
578 	}
579 
580 	cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
581 	if (size == PAGE_SIZE) {
582 		copy_page(mem, cmem);
583 	} else {
584 		struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
585 
586 		ret = zcomp_decompress(zstrm, cmem, size, mem);
587 		zcomp_stream_put(zram->comp);
588 	}
589 	zs_unmap_object(meta->mem_pool, handle);
590 	bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
591 
592 	/* Should NEVER happen. Return bio error if it does. */
593 	if (unlikely(ret)) {
594 		pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
595 		return ret;
596 	}
597 
598 	return 0;
599 }
600 
601 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
602 			  u32 index, int offset)
603 {
604 	int ret;
605 	struct page *page;
606 	unsigned char *user_mem, *uncmem = NULL;
607 	struct zram_meta *meta = zram->meta;
608 	page = bvec->bv_page;
609 
610 	bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
611 	if (unlikely(!meta->table[index].handle) ||
612 			zram_test_flag(meta, index, ZRAM_ZERO)) {
613 		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
614 		handle_zero_page(bvec);
615 		return 0;
616 	}
617 	bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
618 
619 	if (is_partial_io(bvec))
620 		/* Use  a temporary buffer to decompress the page */
621 		uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
622 
623 	user_mem = kmap_atomic(page);
624 	if (!is_partial_io(bvec))
625 		uncmem = user_mem;
626 
627 	if (!uncmem) {
628 		pr_err("Unable to allocate temp memory\n");
629 		ret = -ENOMEM;
630 		goto out_cleanup;
631 	}
632 
633 	ret = zram_decompress_page(zram, uncmem, index);
634 	/* Should NEVER happen. Return bio error if it does. */
635 	if (unlikely(ret))
636 		goto out_cleanup;
637 
638 	if (is_partial_io(bvec))
639 		memcpy(user_mem + bvec->bv_offset, uncmem + offset,
640 				bvec->bv_len);
641 
642 	flush_dcache_page(page);
643 	ret = 0;
644 out_cleanup:
645 	kunmap_atomic(user_mem);
646 	if (is_partial_io(bvec))
647 		kfree(uncmem);
648 	return ret;
649 }
650 
651 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
652 			   int offset)
653 {
654 	int ret = 0;
655 	unsigned int clen;
656 	unsigned long handle = 0;
657 	struct page *page;
658 	unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
659 	struct zram_meta *meta = zram->meta;
660 	struct zcomp_strm *zstrm = NULL;
661 	unsigned long alloced_pages;
662 
663 	page = bvec->bv_page;
664 	if (is_partial_io(bvec)) {
665 		/*
666 		 * This is a partial IO. We need to read the full page
667 		 * before to write the changes.
668 		 */
669 		uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
670 		if (!uncmem) {
671 			ret = -ENOMEM;
672 			goto out;
673 		}
674 		ret = zram_decompress_page(zram, uncmem, index);
675 		if (ret)
676 			goto out;
677 	}
678 
679 compress_again:
680 	user_mem = kmap_atomic(page);
681 	if (is_partial_io(bvec)) {
682 		memcpy(uncmem + offset, user_mem + bvec->bv_offset,
683 		       bvec->bv_len);
684 		kunmap_atomic(user_mem);
685 		user_mem = NULL;
686 	} else {
687 		uncmem = user_mem;
688 	}
689 
690 	if (page_zero_filled(uncmem)) {
691 		if (user_mem)
692 			kunmap_atomic(user_mem);
693 		/* Free memory associated with this sector now. */
694 		bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
695 		zram_free_page(zram, index);
696 		zram_set_flag(meta, index, ZRAM_ZERO);
697 		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
698 
699 		atomic64_inc(&zram->stats.zero_pages);
700 		ret = 0;
701 		goto out;
702 	}
703 
704 	zstrm = zcomp_stream_get(zram->comp);
705 	ret = zcomp_compress(zstrm, uncmem, &clen);
706 	if (!is_partial_io(bvec)) {
707 		kunmap_atomic(user_mem);
708 		user_mem = NULL;
709 		uncmem = NULL;
710 	}
711 
712 	if (unlikely(ret)) {
713 		pr_err("Compression failed! err=%d\n", ret);
714 		goto out;
715 	}
716 
717 	src = zstrm->buffer;
718 	if (unlikely(clen > max_zpage_size)) {
719 		clen = PAGE_SIZE;
720 		if (is_partial_io(bvec))
721 			src = uncmem;
722 	}
723 
724 	/*
725 	 * handle allocation has 2 paths:
726 	 * a) fast path is executed with preemption disabled (for
727 	 *  per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
728 	 *  since we can't sleep;
729 	 * b) slow path enables preemption and attempts to allocate
730 	 *  the page with __GFP_DIRECT_RECLAIM bit set. we have to
731 	 *  put per-cpu compression stream and, thus, to re-do
732 	 *  the compression once handle is allocated.
733 	 *
734 	 * if we have a 'non-null' handle here then we are coming
735 	 * from the slow path and handle has already been allocated.
736 	 */
737 	if (!handle)
738 		handle = zs_malloc(meta->mem_pool, clen,
739 				__GFP_KSWAPD_RECLAIM |
740 				__GFP_NOWARN |
741 				__GFP_HIGHMEM |
742 				__GFP_MOVABLE);
743 	if (!handle) {
744 		zcomp_stream_put(zram->comp);
745 		zstrm = NULL;
746 
747 		atomic64_inc(&zram->stats.writestall);
748 
749 		handle = zs_malloc(meta->mem_pool, clen,
750 				GFP_NOIO | __GFP_HIGHMEM |
751 				__GFP_MOVABLE);
752 		if (handle)
753 			goto compress_again;
754 
755 		pr_err("Error allocating memory for compressed page: %u, size=%u\n",
756 			index, clen);
757 		ret = -ENOMEM;
758 		goto out;
759 	}
760 
761 	alloced_pages = zs_get_total_pages(meta->mem_pool);
762 	update_used_max(zram, alloced_pages);
763 
764 	if (zram->limit_pages && alloced_pages > zram->limit_pages) {
765 		zs_free(meta->mem_pool, handle);
766 		ret = -ENOMEM;
767 		goto out;
768 	}
769 
770 	cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO);
771 
772 	if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
773 		src = kmap_atomic(page);
774 		copy_page(cmem, src);
775 		kunmap_atomic(src);
776 	} else {
777 		memcpy(cmem, src, clen);
778 	}
779 
780 	zcomp_stream_put(zram->comp);
781 	zstrm = NULL;
782 	zs_unmap_object(meta->mem_pool, handle);
783 
784 	/*
785 	 * Free memory associated with this sector
786 	 * before overwriting unused sectors.
787 	 */
788 	bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
789 	zram_free_page(zram, index);
790 
791 	meta->table[index].handle = handle;
792 	zram_set_obj_size(meta, index, clen);
793 	bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
794 
795 	/* Update stats */
796 	atomic64_add(clen, &zram->stats.compr_data_size);
797 	atomic64_inc(&zram->stats.pages_stored);
798 out:
799 	if (zstrm)
800 		zcomp_stream_put(zram->comp);
801 	if (is_partial_io(bvec))
802 		kfree(uncmem);
803 	return ret;
804 }
805 
806 /*
807  * zram_bio_discard - handler on discard request
808  * @index: physical block index in PAGE_SIZE units
809  * @offset: byte offset within physical block
810  */
811 static void zram_bio_discard(struct zram *zram, u32 index,
812 			     int offset, struct bio *bio)
813 {
814 	size_t n = bio->bi_iter.bi_size;
815 	struct zram_meta *meta = zram->meta;
816 
817 	/*
818 	 * zram manages data in physical block size units. Because logical block
819 	 * size isn't identical with physical block size on some arch, we
820 	 * could get a discard request pointing to a specific offset within a
821 	 * certain physical block.  Although we can handle this request by
822 	 * reading that physiclal block and decompressing and partially zeroing
823 	 * and re-compressing and then re-storing it, this isn't reasonable
824 	 * because our intent with a discard request is to save memory.  So
825 	 * skipping this logical block is appropriate here.
826 	 */
827 	if (offset) {
828 		if (n <= (PAGE_SIZE - offset))
829 			return;
830 
831 		n -= (PAGE_SIZE - offset);
832 		index++;
833 	}
834 
835 	while (n >= PAGE_SIZE) {
836 		bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
837 		zram_free_page(zram, index);
838 		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
839 		atomic64_inc(&zram->stats.notify_free);
840 		index++;
841 		n -= PAGE_SIZE;
842 	}
843 }
844 
845 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
846 			int offset, bool is_write)
847 {
848 	unsigned long start_time = jiffies;
849 	int rw_acct = is_write ? REQ_OP_WRITE : REQ_OP_READ;
850 	int ret;
851 
852 	generic_start_io_acct(rw_acct, bvec->bv_len >> SECTOR_SHIFT,
853 			&zram->disk->part0);
854 
855 	if (!is_write) {
856 		atomic64_inc(&zram->stats.num_reads);
857 		ret = zram_bvec_read(zram, bvec, index, offset);
858 	} else {
859 		atomic64_inc(&zram->stats.num_writes);
860 		ret = zram_bvec_write(zram, bvec, index, offset);
861 	}
862 
863 	generic_end_io_acct(rw_acct, &zram->disk->part0, start_time);
864 
865 	if (unlikely(ret)) {
866 		if (!is_write)
867 			atomic64_inc(&zram->stats.failed_reads);
868 		else
869 			atomic64_inc(&zram->stats.failed_writes);
870 	}
871 
872 	return ret;
873 }
874 
875 static void __zram_make_request(struct zram *zram, struct bio *bio)
876 {
877 	int offset;
878 	u32 index;
879 	struct bio_vec bvec;
880 	struct bvec_iter iter;
881 
882 	index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
883 	offset = (bio->bi_iter.bi_sector &
884 		  (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
885 
886 	if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
887 		zram_bio_discard(zram, index, offset, bio);
888 		bio_endio(bio);
889 		return;
890 	}
891 
892 	bio_for_each_segment(bvec, bio, iter) {
893 		int max_transfer_size = PAGE_SIZE - offset;
894 
895 		if (bvec.bv_len > max_transfer_size) {
896 			/*
897 			 * zram_bvec_rw() can only make operation on a single
898 			 * zram page. Split the bio vector.
899 			 */
900 			struct bio_vec bv;
901 
902 			bv.bv_page = bvec.bv_page;
903 			bv.bv_len = max_transfer_size;
904 			bv.bv_offset = bvec.bv_offset;
905 
906 			if (zram_bvec_rw(zram, &bv, index, offset,
907 					 op_is_write(bio_op(bio))) < 0)
908 				goto out;
909 
910 			bv.bv_len = bvec.bv_len - max_transfer_size;
911 			bv.bv_offset += max_transfer_size;
912 			if (zram_bvec_rw(zram, &bv, index + 1, 0,
913 					 op_is_write(bio_op(bio))) < 0)
914 				goto out;
915 		} else
916 			if (zram_bvec_rw(zram, &bvec, index, offset,
917 					 op_is_write(bio_op(bio))) < 0)
918 				goto out;
919 
920 		update_position(&index, &offset, &bvec);
921 	}
922 
923 	bio_endio(bio);
924 	return;
925 
926 out:
927 	bio_io_error(bio);
928 }
929 
930 /*
931  * Handler function for all zram I/O requests.
932  */
933 static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
934 {
935 	struct zram *zram = queue->queuedata;
936 
937 	if (unlikely(!zram_meta_get(zram)))
938 		goto error;
939 
940 	blk_queue_split(queue, &bio, queue->bio_split);
941 
942 	if (!valid_io_request(zram, bio->bi_iter.bi_sector,
943 					bio->bi_iter.bi_size)) {
944 		atomic64_inc(&zram->stats.invalid_io);
945 		goto put_zram;
946 	}
947 
948 	__zram_make_request(zram, bio);
949 	zram_meta_put(zram);
950 	return BLK_QC_T_NONE;
951 put_zram:
952 	zram_meta_put(zram);
953 error:
954 	bio_io_error(bio);
955 	return BLK_QC_T_NONE;
956 }
957 
958 static void zram_slot_free_notify(struct block_device *bdev,
959 				unsigned long index)
960 {
961 	struct zram *zram;
962 	struct zram_meta *meta;
963 
964 	zram = bdev->bd_disk->private_data;
965 	meta = zram->meta;
966 
967 	bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
968 	zram_free_page(zram, index);
969 	bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
970 	atomic64_inc(&zram->stats.notify_free);
971 }
972 
973 static int zram_rw_page(struct block_device *bdev, sector_t sector,
974 		       struct page *page, bool is_write)
975 {
976 	int offset, err = -EIO;
977 	u32 index;
978 	struct zram *zram;
979 	struct bio_vec bv;
980 
981 	zram = bdev->bd_disk->private_data;
982 	if (unlikely(!zram_meta_get(zram)))
983 		goto out;
984 
985 	if (!valid_io_request(zram, sector, PAGE_SIZE)) {
986 		atomic64_inc(&zram->stats.invalid_io);
987 		err = -EINVAL;
988 		goto put_zram;
989 	}
990 
991 	index = sector >> SECTORS_PER_PAGE_SHIFT;
992 	offset = sector & (SECTORS_PER_PAGE - 1) << SECTOR_SHIFT;
993 
994 	bv.bv_page = page;
995 	bv.bv_len = PAGE_SIZE;
996 	bv.bv_offset = 0;
997 
998 	err = zram_bvec_rw(zram, &bv, index, offset, is_write);
999 put_zram:
1000 	zram_meta_put(zram);
1001 out:
1002 	/*
1003 	 * If I/O fails, just return error(ie, non-zero) without
1004 	 * calling page_endio.
1005 	 * It causes resubmit the I/O with bio request by upper functions
1006 	 * of rw_page(e.g., swap_readpage, __swap_writepage) and
1007 	 * bio->bi_end_io does things to handle the error
1008 	 * (e.g., SetPageError, set_page_dirty and extra works).
1009 	 */
1010 	if (err == 0)
1011 		page_endio(page, is_write, 0);
1012 	return err;
1013 }
1014 
1015 static void zram_reset_device(struct zram *zram)
1016 {
1017 	struct zram_meta *meta;
1018 	struct zcomp *comp;
1019 	u64 disksize;
1020 
1021 	down_write(&zram->init_lock);
1022 
1023 	zram->limit_pages = 0;
1024 
1025 	if (!init_done(zram)) {
1026 		up_write(&zram->init_lock);
1027 		return;
1028 	}
1029 
1030 	meta = zram->meta;
1031 	comp = zram->comp;
1032 	disksize = zram->disksize;
1033 	/*
1034 	 * Refcount will go down to 0 eventually and r/w handler
1035 	 * cannot handle further I/O so it will bail out by
1036 	 * check zram_meta_get.
1037 	 */
1038 	zram_meta_put(zram);
1039 	/*
1040 	 * We want to free zram_meta in process context to avoid
1041 	 * deadlock between reclaim path and any other locks.
1042 	 */
1043 	wait_event(zram->io_done, atomic_read(&zram->refcount) == 0);
1044 
1045 	/* Reset stats */
1046 	memset(&zram->stats, 0, sizeof(zram->stats));
1047 	zram->disksize = 0;
1048 
1049 	set_capacity(zram->disk, 0);
1050 	part_stat_set_all(&zram->disk->part0, 0);
1051 
1052 	up_write(&zram->init_lock);
1053 	/* I/O operation under all of CPU are done so let's free */
1054 	zram_meta_free(meta, disksize);
1055 	zcomp_destroy(comp);
1056 }
1057 
1058 static ssize_t disksize_store(struct device *dev,
1059 		struct device_attribute *attr, const char *buf, size_t len)
1060 {
1061 	u64 disksize;
1062 	struct zcomp *comp;
1063 	struct zram_meta *meta;
1064 	struct zram *zram = dev_to_zram(dev);
1065 	int err;
1066 
1067 	disksize = memparse(buf, NULL);
1068 	if (!disksize)
1069 		return -EINVAL;
1070 
1071 	disksize = PAGE_ALIGN(disksize);
1072 	meta = zram_meta_alloc(zram->disk->disk_name, disksize);
1073 	if (!meta)
1074 		return -ENOMEM;
1075 
1076 	comp = zcomp_create(zram->compressor);
1077 	if (IS_ERR(comp)) {
1078 		pr_err("Cannot initialise %s compressing backend\n",
1079 				zram->compressor);
1080 		err = PTR_ERR(comp);
1081 		goto out_free_meta;
1082 	}
1083 
1084 	down_write(&zram->init_lock);
1085 	if (init_done(zram)) {
1086 		pr_info("Cannot change disksize for initialized device\n");
1087 		err = -EBUSY;
1088 		goto out_destroy_comp;
1089 	}
1090 
1091 	init_waitqueue_head(&zram->io_done);
1092 	atomic_set(&zram->refcount, 1);
1093 	zram->meta = meta;
1094 	zram->comp = comp;
1095 	zram->disksize = disksize;
1096 	set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
1097 	up_write(&zram->init_lock);
1098 
1099 	/*
1100 	 * Revalidate disk out of the init_lock to avoid lockdep splat.
1101 	 * It's okay because disk's capacity is protected by init_lock
1102 	 * so that revalidate_disk always sees up-to-date capacity.
1103 	 */
1104 	revalidate_disk(zram->disk);
1105 
1106 	return len;
1107 
1108 out_destroy_comp:
1109 	up_write(&zram->init_lock);
1110 	zcomp_destroy(comp);
1111 out_free_meta:
1112 	zram_meta_free(meta, disksize);
1113 	return err;
1114 }
1115 
1116 static ssize_t reset_store(struct device *dev,
1117 		struct device_attribute *attr, const char *buf, size_t len)
1118 {
1119 	int ret;
1120 	unsigned short do_reset;
1121 	struct zram *zram;
1122 	struct block_device *bdev;
1123 
1124 	ret = kstrtou16(buf, 10, &do_reset);
1125 	if (ret)
1126 		return ret;
1127 
1128 	if (!do_reset)
1129 		return -EINVAL;
1130 
1131 	zram = dev_to_zram(dev);
1132 	bdev = bdget_disk(zram->disk, 0);
1133 	if (!bdev)
1134 		return -ENOMEM;
1135 
1136 	mutex_lock(&bdev->bd_mutex);
1137 	/* Do not reset an active device or claimed device */
1138 	if (bdev->bd_openers || zram->claim) {
1139 		mutex_unlock(&bdev->bd_mutex);
1140 		bdput(bdev);
1141 		return -EBUSY;
1142 	}
1143 
1144 	/* From now on, anyone can't open /dev/zram[0-9] */
1145 	zram->claim = true;
1146 	mutex_unlock(&bdev->bd_mutex);
1147 
1148 	/* Make sure all the pending I/O are finished */
1149 	fsync_bdev(bdev);
1150 	zram_reset_device(zram);
1151 	revalidate_disk(zram->disk);
1152 	bdput(bdev);
1153 
1154 	mutex_lock(&bdev->bd_mutex);
1155 	zram->claim = false;
1156 	mutex_unlock(&bdev->bd_mutex);
1157 
1158 	return len;
1159 }
1160 
1161 static int zram_open(struct block_device *bdev, fmode_t mode)
1162 {
1163 	int ret = 0;
1164 	struct zram *zram;
1165 
1166 	WARN_ON(!mutex_is_locked(&bdev->bd_mutex));
1167 
1168 	zram = bdev->bd_disk->private_data;
1169 	/* zram was claimed to reset so open request fails */
1170 	if (zram->claim)
1171 		ret = -EBUSY;
1172 
1173 	return ret;
1174 }
1175 
1176 static const struct block_device_operations zram_devops = {
1177 	.open = zram_open,
1178 	.swap_slot_free_notify = zram_slot_free_notify,
1179 	.rw_page = zram_rw_page,
1180 	.owner = THIS_MODULE
1181 };
1182 
1183 static DEVICE_ATTR_WO(compact);
1184 static DEVICE_ATTR_RW(disksize);
1185 static DEVICE_ATTR_RO(initstate);
1186 static DEVICE_ATTR_WO(reset);
1187 static DEVICE_ATTR_RO(orig_data_size);
1188 static DEVICE_ATTR_RO(mem_used_total);
1189 static DEVICE_ATTR_RW(mem_limit);
1190 static DEVICE_ATTR_RW(mem_used_max);
1191 static DEVICE_ATTR_RW(max_comp_streams);
1192 static DEVICE_ATTR_RW(comp_algorithm);
1193 
1194 static struct attribute *zram_disk_attrs[] = {
1195 	&dev_attr_disksize.attr,
1196 	&dev_attr_initstate.attr,
1197 	&dev_attr_reset.attr,
1198 	&dev_attr_num_reads.attr,
1199 	&dev_attr_num_writes.attr,
1200 	&dev_attr_failed_reads.attr,
1201 	&dev_attr_failed_writes.attr,
1202 	&dev_attr_compact.attr,
1203 	&dev_attr_invalid_io.attr,
1204 	&dev_attr_notify_free.attr,
1205 	&dev_attr_zero_pages.attr,
1206 	&dev_attr_orig_data_size.attr,
1207 	&dev_attr_compr_data_size.attr,
1208 	&dev_attr_mem_used_total.attr,
1209 	&dev_attr_mem_limit.attr,
1210 	&dev_attr_mem_used_max.attr,
1211 	&dev_attr_max_comp_streams.attr,
1212 	&dev_attr_comp_algorithm.attr,
1213 	&dev_attr_io_stat.attr,
1214 	&dev_attr_mm_stat.attr,
1215 	&dev_attr_debug_stat.attr,
1216 	NULL,
1217 };
1218 
1219 static struct attribute_group zram_disk_attr_group = {
1220 	.attrs = zram_disk_attrs,
1221 };
1222 
1223 /*
1224  * Allocate and initialize new zram device. the function returns
1225  * '>= 0' device_id upon success, and negative value otherwise.
1226  */
1227 static int zram_add(void)
1228 {
1229 	struct zram *zram;
1230 	struct request_queue *queue;
1231 	int ret, device_id;
1232 
1233 	zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
1234 	if (!zram)
1235 		return -ENOMEM;
1236 
1237 	ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
1238 	if (ret < 0)
1239 		goto out_free_dev;
1240 	device_id = ret;
1241 
1242 	init_rwsem(&zram->init_lock);
1243 
1244 	queue = blk_alloc_queue(GFP_KERNEL);
1245 	if (!queue) {
1246 		pr_err("Error allocating disk queue for device %d\n",
1247 			device_id);
1248 		ret = -ENOMEM;
1249 		goto out_free_idr;
1250 	}
1251 
1252 	blk_queue_make_request(queue, zram_make_request);
1253 
1254 	/* gendisk structure */
1255 	zram->disk = alloc_disk(1);
1256 	if (!zram->disk) {
1257 		pr_err("Error allocating disk structure for device %d\n",
1258 			device_id);
1259 		ret = -ENOMEM;
1260 		goto out_free_queue;
1261 	}
1262 
1263 	zram->disk->major = zram_major;
1264 	zram->disk->first_minor = device_id;
1265 	zram->disk->fops = &zram_devops;
1266 	zram->disk->queue = queue;
1267 	zram->disk->queue->queuedata = zram;
1268 	zram->disk->private_data = zram;
1269 	snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
1270 
1271 	/* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
1272 	set_capacity(zram->disk, 0);
1273 	/* zram devices sort of resembles non-rotational disks */
1274 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue);
1275 	queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
1276 	/*
1277 	 * To ensure that we always get PAGE_SIZE aligned
1278 	 * and n*PAGE_SIZED sized I/O requests.
1279 	 */
1280 	blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
1281 	blk_queue_logical_block_size(zram->disk->queue,
1282 					ZRAM_LOGICAL_BLOCK_SIZE);
1283 	blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
1284 	blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
1285 	zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
1286 	blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
1287 	/*
1288 	 * zram_bio_discard() will clear all logical blocks if logical block
1289 	 * size is identical with physical block size(PAGE_SIZE). But if it is
1290 	 * different, we will skip discarding some parts of logical blocks in
1291 	 * the part of the request range which isn't aligned to physical block
1292 	 * size.  So we can't ensure that all discarded logical blocks are
1293 	 * zeroed.
1294 	 */
1295 	if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
1296 		zram->disk->queue->limits.discard_zeroes_data = 1;
1297 	else
1298 		zram->disk->queue->limits.discard_zeroes_data = 0;
1299 	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);
1300 
1301 	add_disk(zram->disk);
1302 
1303 	ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj,
1304 				&zram_disk_attr_group);
1305 	if (ret < 0) {
1306 		pr_err("Error creating sysfs group for device %d\n",
1307 				device_id);
1308 		goto out_free_disk;
1309 	}
1310 	strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
1311 	zram->meta = NULL;
1312 
1313 	pr_info("Added device: %s\n", zram->disk->disk_name);
1314 	return device_id;
1315 
1316 out_free_disk:
1317 	del_gendisk(zram->disk);
1318 	put_disk(zram->disk);
1319 out_free_queue:
1320 	blk_cleanup_queue(queue);
1321 out_free_idr:
1322 	idr_remove(&zram_index_idr, device_id);
1323 out_free_dev:
1324 	kfree(zram);
1325 	return ret;
1326 }
1327 
1328 static int zram_remove(struct zram *zram)
1329 {
1330 	struct block_device *bdev;
1331 
1332 	bdev = bdget_disk(zram->disk, 0);
1333 	if (!bdev)
1334 		return -ENOMEM;
1335 
1336 	mutex_lock(&bdev->bd_mutex);
1337 	if (bdev->bd_openers || zram->claim) {
1338 		mutex_unlock(&bdev->bd_mutex);
1339 		bdput(bdev);
1340 		return -EBUSY;
1341 	}
1342 
1343 	zram->claim = true;
1344 	mutex_unlock(&bdev->bd_mutex);
1345 
1346 	/*
1347 	 * Remove sysfs first, so no one will perform a disksize
1348 	 * store while we destroy the devices. This also helps during
1349 	 * hot_remove -- zram_reset_device() is the last holder of
1350 	 * ->init_lock, no later/concurrent disksize_store() or any
1351 	 * other sysfs handlers are possible.
1352 	 */
1353 	sysfs_remove_group(&disk_to_dev(zram->disk)->kobj,
1354 			&zram_disk_attr_group);
1355 
1356 	/* Make sure all the pending I/O are finished */
1357 	fsync_bdev(bdev);
1358 	zram_reset_device(zram);
1359 	bdput(bdev);
1360 
1361 	pr_info("Removed device: %s\n", zram->disk->disk_name);
1362 
1363 	blk_cleanup_queue(zram->disk->queue);
1364 	del_gendisk(zram->disk);
1365 	put_disk(zram->disk);
1366 	kfree(zram);
1367 	return 0;
1368 }
1369 
1370 /* zram-control sysfs attributes */
1371 static ssize_t hot_add_show(struct class *class,
1372 			struct class_attribute *attr,
1373 			char *buf)
1374 {
1375 	int ret;
1376 
1377 	mutex_lock(&zram_index_mutex);
1378 	ret = zram_add();
1379 	mutex_unlock(&zram_index_mutex);
1380 
1381 	if (ret < 0)
1382 		return ret;
1383 	return scnprintf(buf, PAGE_SIZE, "%d\n", ret);
1384 }
1385 
1386 static ssize_t hot_remove_store(struct class *class,
1387 			struct class_attribute *attr,
1388 			const char *buf,
1389 			size_t count)
1390 {
1391 	struct zram *zram;
1392 	int ret, dev_id;
1393 
1394 	/* dev_id is gendisk->first_minor, which is `int' */
1395 	ret = kstrtoint(buf, 10, &dev_id);
1396 	if (ret)
1397 		return ret;
1398 	if (dev_id < 0)
1399 		return -EINVAL;
1400 
1401 	mutex_lock(&zram_index_mutex);
1402 
1403 	zram = idr_find(&zram_index_idr, dev_id);
1404 	if (zram) {
1405 		ret = zram_remove(zram);
1406 		idr_remove(&zram_index_idr, dev_id);
1407 	} else {
1408 		ret = -ENODEV;
1409 	}
1410 
1411 	mutex_unlock(&zram_index_mutex);
1412 	return ret ? ret : count;
1413 }
1414 
1415 static struct class_attribute zram_control_class_attrs[] = {
1416 	__ATTR_RO(hot_add),
1417 	__ATTR_WO(hot_remove),
1418 	__ATTR_NULL,
1419 };
1420 
1421 static struct class zram_control_class = {
1422 	.name		= "zram-control",
1423 	.owner		= THIS_MODULE,
1424 	.class_attrs	= zram_control_class_attrs,
1425 };
1426 
1427 static int zram_remove_cb(int id, void *ptr, void *data)
1428 {
1429 	zram_remove(ptr);
1430 	return 0;
1431 }
1432 
1433 static void destroy_devices(void)
1434 {
1435 	class_unregister(&zram_control_class);
1436 	idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
1437 	idr_destroy(&zram_index_idr);
1438 	unregister_blkdev(zram_major, "zram");
1439 }
1440 
1441 static int __init zram_init(void)
1442 {
1443 	int ret;
1444 
1445 	ret = class_register(&zram_control_class);
1446 	if (ret) {
1447 		pr_err("Unable to register zram-control class\n");
1448 		return ret;
1449 	}
1450 
1451 	zram_major = register_blkdev(0, "zram");
1452 	if (zram_major <= 0) {
1453 		pr_err("Unable to get major number\n");
1454 		class_unregister(&zram_control_class);
1455 		return -EBUSY;
1456 	}
1457 
1458 	while (num_devices != 0) {
1459 		mutex_lock(&zram_index_mutex);
1460 		ret = zram_add();
1461 		mutex_unlock(&zram_index_mutex);
1462 		if (ret < 0)
1463 			goto out_error;
1464 		num_devices--;
1465 	}
1466 
1467 	return 0;
1468 
1469 out_error:
1470 	destroy_devices();
1471 	return ret;
1472 }
1473 
1474 static void __exit zram_exit(void)
1475 {
1476 	destroy_devices();
1477 }
1478 
1479 module_init(zram_init);
1480 module_exit(zram_exit);
1481 
1482 module_param(num_devices, uint, 0);
1483 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices");
1484 
1485 MODULE_LICENSE("Dual BSD/GPL");
1486 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
1487 MODULE_DESCRIPTION("Compressed RAM Block Device");
1488