xref: /openbmc/linux/drivers/block/loop.c (revision df2634f43f5106947f3735a0b61a6527a4b278cd)
1 /*
2  *  linux/drivers/block/loop.c
3  *
4  *  Written by Theodore Ts'o, 3/29/93
5  *
6  * Copyright 1993 by Theodore Ts'o.  Redistribution of this file is
7  * permitted under the GNU General Public License.
8  *
9  * DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993
10  * more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996
11  *
12  * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994
13  * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996
14  *
15  * Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997
16  *
17  * Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998
18  *
19  * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998
20  *
21  * Loadable modules and other fixes by AK, 1998
22  *
23  * Make real block number available to downstream transfer functions, enables
24  * CBC (and relatives) mode encryption requiring unique IVs per data block.
25  * Reed H. Petty, rhp@draper.net
26  *
27  * Maximum number of loop devices now dynamic via max_loop module parameter.
28  * Russell Kroll <rkroll@exploits.org> 19990701
29  *
30  * Maximum number of loop devices when compiled-in now selectable by passing
31  * max_loop=<1-255> to the kernel on boot.
32  * Erik I. Bolsø, <eriki@himolde.no>, Oct 31, 1999
33  *
34  * Completely rewrite request handling to be make_request_fn style and
35  * non blocking, pushing work to a helper thread. Lots of fixes from
36  * Al Viro too.
37  * Jens Axboe <axboe@suse.de>, Nov 2000
38  *
39  * Support up to 256 loop devices
40  * Heinz Mauelshagen <mge@sistina.com>, Feb 2002
41  *
42  * Support for falling back on the write file operation when the address space
43  * operations write_begin is not available on the backing filesystem.
44  * Anton Altaparmakov, 16 Feb 2005
45  *
46  * Still To Fix:
47  * - Advisory locking is ignored here.
48  * - Should use an own CAP_* category instead of CAP_SYS_ADMIN
49  *
50  */
51 
52 #include <linux/module.h>
53 #include <linux/moduleparam.h>
54 #include <linux/sched.h>
55 #include <linux/fs.h>
56 #include <linux/file.h>
57 #include <linux/stat.h>
58 #include <linux/errno.h>
59 #include <linux/major.h>
60 #include <linux/wait.h>
61 #include <linux/blkdev.h>
62 #include <linux/blkpg.h>
63 #include <linux/init.h>
64 #include <linux/swap.h>
65 #include <linux/slab.h>
66 #include <linux/loop.h>
67 #include <linux/compat.h>
68 #include <linux/suspend.h>
69 #include <linux/freezer.h>
70 #include <linux/mutex.h>
71 #include <linux/writeback.h>
72 #include <linux/buffer_head.h>		/* for invalidate_bdev() */
73 #include <linux/completion.h>
74 #include <linux/highmem.h>
75 #include <linux/kthread.h>
76 #include <linux/splice.h>
77 #include <linux/sysfs.h>
78 
79 #include <asm/uaccess.h>
80 
81 static DEFINE_MUTEX(loop_mutex);
82 static LIST_HEAD(loop_devices);
83 static DEFINE_MUTEX(loop_devices_mutex);
84 
85 static int max_part;
86 static int part_shift;
87 
88 /*
89  * Transfer functions
90  */
91 static int transfer_none(struct loop_device *lo, int cmd,
92 			 struct page *raw_page, unsigned raw_off,
93 			 struct page *loop_page, unsigned loop_off,
94 			 int size, sector_t real_block)
95 {
96 	char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
97 	char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
98 
99 	if (cmd == READ)
100 		memcpy(loop_buf, raw_buf, size);
101 	else
102 		memcpy(raw_buf, loop_buf, size);
103 
104 	kunmap_atomic(loop_buf, KM_USER1);
105 	kunmap_atomic(raw_buf, KM_USER0);
106 	cond_resched();
107 	return 0;
108 }
109 
110 static int transfer_xor(struct loop_device *lo, int cmd,
111 			struct page *raw_page, unsigned raw_off,
112 			struct page *loop_page, unsigned loop_off,
113 			int size, sector_t real_block)
114 {
115 	char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
116 	char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
117 	char *in, *out, *key;
118 	int i, keysize;
119 
120 	if (cmd == READ) {
121 		in = raw_buf;
122 		out = loop_buf;
123 	} else {
124 		in = loop_buf;
125 		out = raw_buf;
126 	}
127 
128 	key = lo->lo_encrypt_key;
129 	keysize = lo->lo_encrypt_key_size;
130 	for (i = 0; i < size; i++)
131 		*out++ = *in++ ^ key[(i & 511) % keysize];
132 
133 	kunmap_atomic(loop_buf, KM_USER1);
134 	kunmap_atomic(raw_buf, KM_USER0);
135 	cond_resched();
136 	return 0;
137 }
138 
139 static int xor_init(struct loop_device *lo, const struct loop_info64 *info)
140 {
141 	if (unlikely(info->lo_encrypt_key_size <= 0))
142 		return -EINVAL;
143 	return 0;
144 }
145 
146 static struct loop_func_table none_funcs = {
147 	.number = LO_CRYPT_NONE,
148 	.transfer = transfer_none,
149 };
150 
151 static struct loop_func_table xor_funcs = {
152 	.number = LO_CRYPT_XOR,
153 	.transfer = transfer_xor,
154 	.init = xor_init
155 };
156 
157 /* xfer_funcs[0] is special - its release function is never called */
158 static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = {
159 	&none_funcs,
160 	&xor_funcs
161 };
162 
163 static loff_t get_loop_size(struct loop_device *lo, struct file *file)
164 {
165 	loff_t size, offset, loopsize;
166 
167 	/* Compute loopsize in bytes */
168 	size = i_size_read(file->f_mapping->host);
169 	offset = lo->lo_offset;
170 	loopsize = size - offset;
171 	if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize)
172 		loopsize = lo->lo_sizelimit;
173 
174 	/*
175 	 * Unfortunately, if we want to do I/O on the device,
176 	 * the number of 512-byte sectors has to fit into a sector_t.
177 	 */
178 	return loopsize >> 9;
179 }
180 
181 static int
182 figure_loop_size(struct loop_device *lo)
183 {
184 	loff_t size = get_loop_size(lo, lo->lo_backing_file);
185 	sector_t x = (sector_t)size;
186 
187 	if (unlikely((loff_t)x != size))
188 		return -EFBIG;
189 
190 	set_capacity(lo->lo_disk, x);
191 	return 0;
192 }
193 
194 static inline int
195 lo_do_transfer(struct loop_device *lo, int cmd,
196 	       struct page *rpage, unsigned roffs,
197 	       struct page *lpage, unsigned loffs,
198 	       int size, sector_t rblock)
199 {
200 	if (unlikely(!lo->transfer))
201 		return 0;
202 
203 	return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock);
204 }
205 
206 /**
207  * do_lo_send_aops - helper for writing data to a loop device
208  *
209  * This is the fast version for backing filesystems which implement the address
210  * space operations write_begin and write_end.
211  */
212 static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
213 		loff_t pos, struct page *unused)
214 {
215 	struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */
216 	struct address_space *mapping = file->f_mapping;
217 	pgoff_t index;
218 	unsigned offset, bv_offs;
219 	int len, ret;
220 
221 	mutex_lock(&mapping->host->i_mutex);
222 	index = pos >> PAGE_CACHE_SHIFT;
223 	offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1);
224 	bv_offs = bvec->bv_offset;
225 	len = bvec->bv_len;
226 	while (len > 0) {
227 		sector_t IV;
228 		unsigned size, copied;
229 		int transfer_result;
230 		struct page *page;
231 		void *fsdata;
232 
233 		IV = ((sector_t)index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9);
234 		size = PAGE_CACHE_SIZE - offset;
235 		if (size > len)
236 			size = len;
237 
238 		ret = pagecache_write_begin(file, mapping, pos, size, 0,
239 							&page, &fsdata);
240 		if (ret)
241 			goto fail;
242 
243 		file_update_time(file);
244 
245 		transfer_result = lo_do_transfer(lo, WRITE, page, offset,
246 				bvec->bv_page, bv_offs, size, IV);
247 		copied = size;
248 		if (unlikely(transfer_result))
249 			copied = 0;
250 
251 		ret = pagecache_write_end(file, mapping, pos, size, copied,
252 							page, fsdata);
253 		if (ret < 0 || ret != copied)
254 			goto fail;
255 
256 		if (unlikely(transfer_result))
257 			goto fail;
258 
259 		bv_offs += copied;
260 		len -= copied;
261 		offset = 0;
262 		index++;
263 		pos += copied;
264 	}
265 	ret = 0;
266 out:
267 	mutex_unlock(&mapping->host->i_mutex);
268 	return ret;
269 fail:
270 	ret = -1;
271 	goto out;
272 }
273 
274 /**
275  * __do_lo_send_write - helper for writing data to a loop device
276  *
277  * This helper just factors out common code between do_lo_send_direct_write()
278  * and do_lo_send_write().
279  */
280 static int __do_lo_send_write(struct file *file,
281 		u8 *buf, const int len, loff_t pos)
282 {
283 	ssize_t bw;
284 	mm_segment_t old_fs = get_fs();
285 
286 	set_fs(get_ds());
287 	bw = file->f_op->write(file, buf, len, &pos);
288 	set_fs(old_fs);
289 	if (likely(bw == len))
290 		return 0;
291 	printk(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n",
292 			(unsigned long long)pos, len);
293 	if (bw >= 0)
294 		bw = -EIO;
295 	return bw;
296 }
297 
298 /**
299  * do_lo_send_direct_write - helper for writing data to a loop device
300  *
301  * This is the fast, non-transforming version for backing filesystems which do
302  * not implement the address space operations write_begin and write_end.
303  * It uses the write file operation which should be present on all writeable
304  * filesystems.
305  */
306 static int do_lo_send_direct_write(struct loop_device *lo,
307 		struct bio_vec *bvec, loff_t pos, struct page *page)
308 {
309 	ssize_t bw = __do_lo_send_write(lo->lo_backing_file,
310 			kmap(bvec->bv_page) + bvec->bv_offset,
311 			bvec->bv_len, pos);
312 	kunmap(bvec->bv_page);
313 	cond_resched();
314 	return bw;
315 }
316 
317 /**
318  * do_lo_send_write - helper for writing data to a loop device
319  *
320  * This is the slow, transforming version for filesystems which do not
321  * implement the address space operations write_begin and write_end.  It
322  * uses the write file operation which should be present on all writeable
323  * filesystems.
324  *
325  * Using fops->write is slower than using aops->{prepare,commit}_write in the
326  * transforming case because we need to double buffer the data as we cannot do
327  * the transformations in place as we do not have direct access to the
328  * destination pages of the backing file.
329  */
330 static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec,
331 		loff_t pos, struct page *page)
332 {
333 	int ret = lo_do_transfer(lo, WRITE, page, 0, bvec->bv_page,
334 			bvec->bv_offset, bvec->bv_len, pos >> 9);
335 	if (likely(!ret))
336 		return __do_lo_send_write(lo->lo_backing_file,
337 				page_address(page), bvec->bv_len,
338 				pos);
339 	printk(KERN_ERR "loop: Transfer error at byte offset %llu, "
340 			"length %i.\n", (unsigned long long)pos, bvec->bv_len);
341 	if (ret > 0)
342 		ret = -EIO;
343 	return ret;
344 }
345 
346 static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos)
347 {
348 	int (*do_lo_send)(struct loop_device *, struct bio_vec *, loff_t,
349 			struct page *page);
350 	struct bio_vec *bvec;
351 	struct page *page = NULL;
352 	int i, ret = 0;
353 
354 	do_lo_send = do_lo_send_aops;
355 	if (!(lo->lo_flags & LO_FLAGS_USE_AOPS)) {
356 		do_lo_send = do_lo_send_direct_write;
357 		if (lo->transfer != transfer_none) {
358 			page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
359 			if (unlikely(!page))
360 				goto fail;
361 			kmap(page);
362 			do_lo_send = do_lo_send_write;
363 		}
364 	}
365 	bio_for_each_segment(bvec, bio, i) {
366 		ret = do_lo_send(lo, bvec, pos, page);
367 		if (ret < 0)
368 			break;
369 		pos += bvec->bv_len;
370 	}
371 	if (page) {
372 		kunmap(page);
373 		__free_page(page);
374 	}
375 out:
376 	return ret;
377 fail:
378 	printk(KERN_ERR "loop: Failed to allocate temporary page for write.\n");
379 	ret = -ENOMEM;
380 	goto out;
381 }
382 
383 struct lo_read_data {
384 	struct loop_device *lo;
385 	struct page *page;
386 	unsigned offset;
387 	int bsize;
388 };
389 
390 static int
391 lo_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
392 		struct splice_desc *sd)
393 {
394 	struct lo_read_data *p = sd->u.data;
395 	struct loop_device *lo = p->lo;
396 	struct page *page = buf->page;
397 	sector_t IV;
398 	int size;
399 
400 	IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) +
401 							(buf->offset >> 9);
402 	size = sd->len;
403 	if (size > p->bsize)
404 		size = p->bsize;
405 
406 	if (lo_do_transfer(lo, READ, page, buf->offset, p->page, p->offset, size, IV)) {
407 		printk(KERN_ERR "loop: transfer error block %ld\n",
408 		       page->index);
409 		size = -EINVAL;
410 	}
411 
412 	flush_dcache_page(p->page);
413 
414 	if (size > 0)
415 		p->offset += size;
416 
417 	return size;
418 }
419 
420 static int
421 lo_direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd)
422 {
423 	return __splice_from_pipe(pipe, sd, lo_splice_actor);
424 }
425 
426 static int
427 do_lo_receive(struct loop_device *lo,
428 	      struct bio_vec *bvec, int bsize, loff_t pos)
429 {
430 	struct lo_read_data cookie;
431 	struct splice_desc sd;
432 	struct file *file;
433 	long retval;
434 
435 	cookie.lo = lo;
436 	cookie.page = bvec->bv_page;
437 	cookie.offset = bvec->bv_offset;
438 	cookie.bsize = bsize;
439 
440 	sd.len = 0;
441 	sd.total_len = bvec->bv_len;
442 	sd.flags = 0;
443 	sd.pos = pos;
444 	sd.u.data = &cookie;
445 
446 	file = lo->lo_backing_file;
447 	retval = splice_direct_to_actor(file, &sd, lo_direct_splice_actor);
448 
449 	if (retval < 0)
450 		return retval;
451 
452 	return 0;
453 }
454 
455 static int
456 lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
457 {
458 	struct bio_vec *bvec;
459 	int i, ret = 0;
460 
461 	bio_for_each_segment(bvec, bio, i) {
462 		ret = do_lo_receive(lo, bvec, bsize, pos);
463 		if (ret < 0)
464 			break;
465 		pos += bvec->bv_len;
466 	}
467 	return ret;
468 }
469 
470 static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
471 {
472 	loff_t pos;
473 	int ret;
474 
475 	pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
476 
477 	if (bio_rw(bio) == WRITE) {
478 		struct file *file = lo->lo_backing_file;
479 
480 		if (bio->bi_rw & REQ_FLUSH) {
481 			ret = vfs_fsync(file, 0);
482 			if (unlikely(ret && ret != -EINVAL)) {
483 				ret = -EIO;
484 				goto out;
485 			}
486 		}
487 
488 		ret = lo_send(lo, bio, pos);
489 
490 		if ((bio->bi_rw & REQ_FUA) && !ret) {
491 			ret = vfs_fsync(file, 0);
492 			if (unlikely(ret && ret != -EINVAL))
493 				ret = -EIO;
494 		}
495 	} else
496 		ret = lo_receive(lo, bio, lo->lo_blocksize, pos);
497 
498 out:
499 	return ret;
500 }
501 
502 /*
503  * Add bio to back of pending list
504  */
505 static void loop_add_bio(struct loop_device *lo, struct bio *bio)
506 {
507 	bio_list_add(&lo->lo_bio_list, bio);
508 }
509 
510 /*
511  * Grab first pending buffer
512  */
513 static struct bio *loop_get_bio(struct loop_device *lo)
514 {
515 	return bio_list_pop(&lo->lo_bio_list);
516 }
517 
518 static int loop_make_request(struct request_queue *q, struct bio *old_bio)
519 {
520 	struct loop_device *lo = q->queuedata;
521 	int rw = bio_rw(old_bio);
522 
523 	if (rw == READA)
524 		rw = READ;
525 
526 	BUG_ON(!lo || (rw != READ && rw != WRITE));
527 
528 	spin_lock_irq(&lo->lo_lock);
529 	if (lo->lo_state != Lo_bound)
530 		goto out;
531 	if (unlikely(rw == WRITE && (lo->lo_flags & LO_FLAGS_READ_ONLY)))
532 		goto out;
533 	loop_add_bio(lo, old_bio);
534 	wake_up(&lo->lo_event);
535 	spin_unlock_irq(&lo->lo_lock);
536 	return 0;
537 
538 out:
539 	spin_unlock_irq(&lo->lo_lock);
540 	bio_io_error(old_bio);
541 	return 0;
542 }
543 
544 /*
545  * kick off io on the underlying address space
546  */
547 static void loop_unplug(struct request_queue *q)
548 {
549 	struct loop_device *lo = q->queuedata;
550 
551 	queue_flag_clear_unlocked(QUEUE_FLAG_PLUGGED, q);
552 	blk_run_address_space(lo->lo_backing_file->f_mapping);
553 }
554 
555 struct switch_request {
556 	struct file *file;
557 	struct completion wait;
558 };
559 
560 static void do_loop_switch(struct loop_device *, struct switch_request *);
561 
562 static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio)
563 {
564 	if (unlikely(!bio->bi_bdev)) {
565 		do_loop_switch(lo, bio->bi_private);
566 		bio_put(bio);
567 	} else {
568 		int ret = do_bio_filebacked(lo, bio);
569 		bio_endio(bio, ret);
570 	}
571 }
572 
573 /*
574  * worker thread that handles reads/writes to file backed loop devices,
575  * to avoid blocking in our make_request_fn. it also does loop decrypting
576  * on reads for block backed loop, as that is too heavy to do from
577  * b_end_io context where irqs may be disabled.
578  *
579  * Loop explanation:  loop_clr_fd() sets lo_state to Lo_rundown before
580  * calling kthread_stop().  Therefore once kthread_should_stop() is
581  * true, make_request will not place any more requests.  Therefore
582  * once kthread_should_stop() is true and lo_bio is NULL, we are
583  * done with the loop.
584  */
585 static int loop_thread(void *data)
586 {
587 	struct loop_device *lo = data;
588 	struct bio *bio;
589 
590 	set_user_nice(current, -20);
591 
592 	while (!kthread_should_stop() || !bio_list_empty(&lo->lo_bio_list)) {
593 
594 		wait_event_interruptible(lo->lo_event,
595 				!bio_list_empty(&lo->lo_bio_list) ||
596 				kthread_should_stop());
597 
598 		if (bio_list_empty(&lo->lo_bio_list))
599 			continue;
600 		spin_lock_irq(&lo->lo_lock);
601 		bio = loop_get_bio(lo);
602 		spin_unlock_irq(&lo->lo_lock);
603 
604 		BUG_ON(!bio);
605 		loop_handle_bio(lo, bio);
606 	}
607 
608 	return 0;
609 }
610 
611 /*
612  * loop_switch performs the hard work of switching a backing store.
613  * First it needs to flush existing IO, it does this by sending a magic
614  * BIO down the pipe. The completion of this BIO does the actual switch.
615  */
616 static int loop_switch(struct loop_device *lo, struct file *file)
617 {
618 	struct switch_request w;
619 	struct bio *bio = bio_alloc(GFP_KERNEL, 0);
620 	if (!bio)
621 		return -ENOMEM;
622 	init_completion(&w.wait);
623 	w.file = file;
624 	bio->bi_private = &w;
625 	bio->bi_bdev = NULL;
626 	loop_make_request(lo->lo_queue, bio);
627 	wait_for_completion(&w.wait);
628 	return 0;
629 }
630 
631 /*
632  * Helper to flush the IOs in loop, but keeping loop thread running
633  */
634 static int loop_flush(struct loop_device *lo)
635 {
636 	/* loop not yet configured, no running thread, nothing to flush */
637 	if (!lo->lo_thread)
638 		return 0;
639 
640 	return loop_switch(lo, NULL);
641 }
642 
643 /*
644  * Do the actual switch; called from the BIO completion routine
645  */
646 static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
647 {
648 	struct file *file = p->file;
649 	struct file *old_file = lo->lo_backing_file;
650 	struct address_space *mapping;
651 
652 	/* if no new file, only flush of queued bios requested */
653 	if (!file)
654 		goto out;
655 
656 	mapping = file->f_mapping;
657 	mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
658 	lo->lo_backing_file = file;
659 	lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ?
660 		mapping->host->i_bdev->bd_block_size : PAGE_SIZE;
661 	lo->old_gfp_mask = mapping_gfp_mask(mapping);
662 	mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
663 out:
664 	complete(&p->wait);
665 }
666 
667 
668 /*
669  * loop_change_fd switched the backing store of a loopback device to
670  * a new file. This is useful for operating system installers to free up
671  * the original file and in High Availability environments to switch to
672  * an alternative location for the content in case of server meltdown.
673  * This can only work if the loop device is used read-only, and if the
674  * new backing store is the same size and type as the old backing store.
675  */
676 static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
677 			  unsigned int arg)
678 {
679 	struct file	*file, *old_file;
680 	struct inode	*inode;
681 	int		error;
682 
683 	error = -ENXIO;
684 	if (lo->lo_state != Lo_bound)
685 		goto out;
686 
687 	/* the loop device has to be read-only */
688 	error = -EINVAL;
689 	if (!(lo->lo_flags & LO_FLAGS_READ_ONLY))
690 		goto out;
691 
692 	error = -EBADF;
693 	file = fget(arg);
694 	if (!file)
695 		goto out;
696 
697 	inode = file->f_mapping->host;
698 	old_file = lo->lo_backing_file;
699 
700 	error = -EINVAL;
701 
702 	if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
703 		goto out_putf;
704 
705 	/* size of the new backing store needs to be the same */
706 	if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
707 		goto out_putf;
708 
709 	/* and ... switch */
710 	error = loop_switch(lo, file);
711 	if (error)
712 		goto out_putf;
713 
714 	fput(old_file);
715 	if (max_part > 0)
716 		ioctl_by_bdev(bdev, BLKRRPART, 0);
717 	return 0;
718 
719  out_putf:
720 	fput(file);
721  out:
722 	return error;
723 }
724 
725 static inline int is_loop_device(struct file *file)
726 {
727 	struct inode *i = file->f_mapping->host;
728 
729 	return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR;
730 }
731 
732 /* loop sysfs attributes */
733 
734 static ssize_t loop_attr_show(struct device *dev, char *page,
735 			      ssize_t (*callback)(struct loop_device *, char *))
736 {
737 	struct loop_device *l, *lo = NULL;
738 
739 	mutex_lock(&loop_devices_mutex);
740 	list_for_each_entry(l, &loop_devices, lo_list)
741 		if (disk_to_dev(l->lo_disk) == dev) {
742 			lo = l;
743 			break;
744 		}
745 	mutex_unlock(&loop_devices_mutex);
746 
747 	return lo ? callback(lo, page) : -EIO;
748 }
749 
750 #define LOOP_ATTR_RO(_name)						\
751 static ssize_t loop_attr_##_name##_show(struct loop_device *, char *);	\
752 static ssize_t loop_attr_do_show_##_name(struct device *d,		\
753 				struct device_attribute *attr, char *b)	\
754 {									\
755 	return loop_attr_show(d, b, loop_attr_##_name##_show);		\
756 }									\
757 static struct device_attribute loop_attr_##_name =			\
758 	__ATTR(_name, S_IRUGO, loop_attr_do_show_##_name, NULL);
759 
760 static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf)
761 {
762 	ssize_t ret;
763 	char *p = NULL;
764 
765 	mutex_lock(&lo->lo_ctl_mutex);
766 	if (lo->lo_backing_file)
767 		p = d_path(&lo->lo_backing_file->f_path, buf, PAGE_SIZE - 1);
768 	mutex_unlock(&lo->lo_ctl_mutex);
769 
770 	if (IS_ERR_OR_NULL(p))
771 		ret = PTR_ERR(p);
772 	else {
773 		ret = strlen(p);
774 		memmove(buf, p, ret);
775 		buf[ret++] = '\n';
776 		buf[ret] = 0;
777 	}
778 
779 	return ret;
780 }
781 
782 static ssize_t loop_attr_offset_show(struct loop_device *lo, char *buf)
783 {
784 	return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_offset);
785 }
786 
787 static ssize_t loop_attr_sizelimit_show(struct loop_device *lo, char *buf)
788 {
789 	return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit);
790 }
791 
792 static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf)
793 {
794 	int autoclear = (lo->lo_flags & LO_FLAGS_AUTOCLEAR);
795 
796 	return sprintf(buf, "%s\n", autoclear ? "1" : "0");
797 }
798 
799 LOOP_ATTR_RO(backing_file);
800 LOOP_ATTR_RO(offset);
801 LOOP_ATTR_RO(sizelimit);
802 LOOP_ATTR_RO(autoclear);
803 
804 static struct attribute *loop_attrs[] = {
805 	&loop_attr_backing_file.attr,
806 	&loop_attr_offset.attr,
807 	&loop_attr_sizelimit.attr,
808 	&loop_attr_autoclear.attr,
809 	NULL,
810 };
811 
812 static struct attribute_group loop_attribute_group = {
813 	.name = "loop",
814 	.attrs= loop_attrs,
815 };
816 
817 static int loop_sysfs_init(struct loop_device *lo)
818 {
819 	return sysfs_create_group(&disk_to_dev(lo->lo_disk)->kobj,
820 				  &loop_attribute_group);
821 }
822 
823 static void loop_sysfs_exit(struct loop_device *lo)
824 {
825 	sysfs_remove_group(&disk_to_dev(lo->lo_disk)->kobj,
826 			   &loop_attribute_group);
827 }
828 
829 static int loop_set_fd(struct loop_device *lo, fmode_t mode,
830 		       struct block_device *bdev, unsigned int arg)
831 {
832 	struct file	*file, *f;
833 	struct inode	*inode;
834 	struct address_space *mapping;
835 	unsigned lo_blocksize;
836 	int		lo_flags = 0;
837 	int		error;
838 	loff_t		size;
839 
840 	/* This is safe, since we have a reference from open(). */
841 	__module_get(THIS_MODULE);
842 
843 	error = -EBADF;
844 	file = fget(arg);
845 	if (!file)
846 		goto out;
847 
848 	error = -EBUSY;
849 	if (lo->lo_state != Lo_unbound)
850 		goto out_putf;
851 
852 	/* Avoid recursion */
853 	f = file;
854 	while (is_loop_device(f)) {
855 		struct loop_device *l;
856 
857 		if (f->f_mapping->host->i_bdev == bdev)
858 			goto out_putf;
859 
860 		l = f->f_mapping->host->i_bdev->bd_disk->private_data;
861 		if (l->lo_state == Lo_unbound) {
862 			error = -EINVAL;
863 			goto out_putf;
864 		}
865 		f = l->lo_backing_file;
866 	}
867 
868 	mapping = file->f_mapping;
869 	inode = mapping->host;
870 
871 	if (!(file->f_mode & FMODE_WRITE))
872 		lo_flags |= LO_FLAGS_READ_ONLY;
873 
874 	error = -EINVAL;
875 	if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) {
876 		const struct address_space_operations *aops = mapping->a_ops;
877 
878 		if (aops->write_begin)
879 			lo_flags |= LO_FLAGS_USE_AOPS;
880 		if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write)
881 			lo_flags |= LO_FLAGS_READ_ONLY;
882 
883 		lo_blocksize = S_ISBLK(inode->i_mode) ?
884 			inode->i_bdev->bd_block_size : PAGE_SIZE;
885 
886 		error = 0;
887 	} else {
888 		goto out_putf;
889 	}
890 
891 	size = get_loop_size(lo, file);
892 
893 	if ((loff_t)(sector_t)size != size) {
894 		error = -EFBIG;
895 		goto out_putf;
896 	}
897 
898 	if (!(mode & FMODE_WRITE))
899 		lo_flags |= LO_FLAGS_READ_ONLY;
900 
901 	set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
902 
903 	lo->lo_blocksize = lo_blocksize;
904 	lo->lo_device = bdev;
905 	lo->lo_flags = lo_flags;
906 	lo->lo_backing_file = file;
907 	lo->transfer = transfer_none;
908 	lo->ioctl = NULL;
909 	lo->lo_sizelimit = 0;
910 	lo->old_gfp_mask = mapping_gfp_mask(mapping);
911 	mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
912 
913 	bio_list_init(&lo->lo_bio_list);
914 
915 	/*
916 	 * set queue make_request_fn, and add limits based on lower level
917 	 * device
918 	 */
919 	blk_queue_make_request(lo->lo_queue, loop_make_request);
920 	lo->lo_queue->queuedata = lo;
921 	lo->lo_queue->unplug_fn = loop_unplug;
922 
923 	if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
924 		blk_queue_flush(lo->lo_queue, REQ_FLUSH);
925 
926 	set_capacity(lo->lo_disk, size);
927 	bd_set_size(bdev, size << 9);
928 	loop_sysfs_init(lo);
929 	/* let user-space know about the new size */
930 	kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
931 
932 	set_blocksize(bdev, lo_blocksize);
933 
934 	lo->lo_thread = kthread_create(loop_thread, lo, "loop%d",
935 						lo->lo_number);
936 	if (IS_ERR(lo->lo_thread)) {
937 		error = PTR_ERR(lo->lo_thread);
938 		goto out_clr;
939 	}
940 	lo->lo_state = Lo_bound;
941 	wake_up_process(lo->lo_thread);
942 	if (max_part > 0)
943 		ioctl_by_bdev(bdev, BLKRRPART, 0);
944 	return 0;
945 
946 out_clr:
947 	loop_sysfs_exit(lo);
948 	lo->lo_thread = NULL;
949 	lo->lo_device = NULL;
950 	lo->lo_backing_file = NULL;
951 	lo->lo_flags = 0;
952 	set_capacity(lo->lo_disk, 0);
953 	invalidate_bdev(bdev);
954 	bd_set_size(bdev, 0);
955 	kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
956 	mapping_set_gfp_mask(mapping, lo->old_gfp_mask);
957 	lo->lo_state = Lo_unbound;
958  out_putf:
959 	fput(file);
960  out:
961 	/* This is safe: open() is still holding a reference. */
962 	module_put(THIS_MODULE);
963 	return error;
964 }
965 
966 static int
967 loop_release_xfer(struct loop_device *lo)
968 {
969 	int err = 0;
970 	struct loop_func_table *xfer = lo->lo_encryption;
971 
972 	if (xfer) {
973 		if (xfer->release)
974 			err = xfer->release(lo);
975 		lo->transfer = NULL;
976 		lo->lo_encryption = NULL;
977 		module_put(xfer->owner);
978 	}
979 	return err;
980 }
981 
982 static int
983 loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
984 	       const struct loop_info64 *i)
985 {
986 	int err = 0;
987 
988 	if (xfer) {
989 		struct module *owner = xfer->owner;
990 
991 		if (!try_module_get(owner))
992 			return -EINVAL;
993 		if (xfer->init)
994 			err = xfer->init(lo, i);
995 		if (err)
996 			module_put(owner);
997 		else
998 			lo->lo_encryption = xfer;
999 	}
1000 	return err;
1001 }
1002 
1003 static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
1004 {
1005 	struct file *filp = lo->lo_backing_file;
1006 	gfp_t gfp = lo->old_gfp_mask;
1007 
1008 	if (lo->lo_state != Lo_bound)
1009 		return -ENXIO;
1010 
1011 	if (lo->lo_refcnt > 1)	/* we needed one fd for the ioctl */
1012 		return -EBUSY;
1013 
1014 	if (filp == NULL)
1015 		return -EINVAL;
1016 
1017 	spin_lock_irq(&lo->lo_lock);
1018 	lo->lo_state = Lo_rundown;
1019 	spin_unlock_irq(&lo->lo_lock);
1020 
1021 	kthread_stop(lo->lo_thread);
1022 
1023 	lo->lo_queue->unplug_fn = NULL;
1024 	lo->lo_backing_file = NULL;
1025 
1026 	loop_release_xfer(lo);
1027 	lo->transfer = NULL;
1028 	lo->ioctl = NULL;
1029 	lo->lo_device = NULL;
1030 	lo->lo_encryption = NULL;
1031 	lo->lo_offset = 0;
1032 	lo->lo_sizelimit = 0;
1033 	lo->lo_encrypt_key_size = 0;
1034 	lo->lo_flags = 0;
1035 	lo->lo_thread = NULL;
1036 	memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
1037 	memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
1038 	memset(lo->lo_file_name, 0, LO_NAME_SIZE);
1039 	if (bdev)
1040 		invalidate_bdev(bdev);
1041 	set_capacity(lo->lo_disk, 0);
1042 	loop_sysfs_exit(lo);
1043 	if (bdev) {
1044 		bd_set_size(bdev, 0);
1045 		/* let user-space know about this change */
1046 		kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
1047 	}
1048 	mapping_set_gfp_mask(filp->f_mapping, gfp);
1049 	lo->lo_state = Lo_unbound;
1050 	/* This is safe: open() is still holding a reference. */
1051 	module_put(THIS_MODULE);
1052 	if (max_part > 0 && bdev)
1053 		ioctl_by_bdev(bdev, BLKRRPART, 0);
1054 	mutex_unlock(&lo->lo_ctl_mutex);
1055 	/*
1056 	 * Need not hold lo_ctl_mutex to fput backing file.
1057 	 * Calling fput holding lo_ctl_mutex triggers a circular
1058 	 * lock dependency possibility warning as fput can take
1059 	 * bd_mutex which is usually taken before lo_ctl_mutex.
1060 	 */
1061 	fput(filp);
1062 	return 0;
1063 }
1064 
1065 static int
1066 loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
1067 {
1068 	int err;
1069 	struct loop_func_table *xfer;
1070 	uid_t uid = current_uid();
1071 
1072 	if (lo->lo_encrypt_key_size &&
1073 	    lo->lo_key_owner != uid &&
1074 	    !capable(CAP_SYS_ADMIN))
1075 		return -EPERM;
1076 	if (lo->lo_state != Lo_bound)
1077 		return -ENXIO;
1078 	if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE)
1079 		return -EINVAL;
1080 
1081 	err = loop_release_xfer(lo);
1082 	if (err)
1083 		return err;
1084 
1085 	if (info->lo_encrypt_type) {
1086 		unsigned int type = info->lo_encrypt_type;
1087 
1088 		if (type >= MAX_LO_CRYPT)
1089 			return -EINVAL;
1090 		xfer = xfer_funcs[type];
1091 		if (xfer == NULL)
1092 			return -EINVAL;
1093 	} else
1094 		xfer = NULL;
1095 
1096 	err = loop_init_xfer(lo, xfer, info);
1097 	if (err)
1098 		return err;
1099 
1100 	if (lo->lo_offset != info->lo_offset ||
1101 	    lo->lo_sizelimit != info->lo_sizelimit) {
1102 		lo->lo_offset = info->lo_offset;
1103 		lo->lo_sizelimit = info->lo_sizelimit;
1104 		if (figure_loop_size(lo))
1105 			return -EFBIG;
1106 	}
1107 
1108 	memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
1109 	memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
1110 	lo->lo_file_name[LO_NAME_SIZE-1] = 0;
1111 	lo->lo_crypt_name[LO_NAME_SIZE-1] = 0;
1112 
1113 	if (!xfer)
1114 		xfer = &none_funcs;
1115 	lo->transfer = xfer->transfer;
1116 	lo->ioctl = xfer->ioctl;
1117 
1118 	if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) !=
1119 	     (info->lo_flags & LO_FLAGS_AUTOCLEAR))
1120 		lo->lo_flags ^= LO_FLAGS_AUTOCLEAR;
1121 
1122 	lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
1123 	lo->lo_init[0] = info->lo_init[0];
1124 	lo->lo_init[1] = info->lo_init[1];
1125 	if (info->lo_encrypt_key_size) {
1126 		memcpy(lo->lo_encrypt_key, info->lo_encrypt_key,
1127 		       info->lo_encrypt_key_size);
1128 		lo->lo_key_owner = uid;
1129 	}
1130 
1131 	return 0;
1132 }
1133 
1134 static int
1135 loop_get_status(struct loop_device *lo, struct loop_info64 *info)
1136 {
1137 	struct file *file = lo->lo_backing_file;
1138 	struct kstat stat;
1139 	int error;
1140 
1141 	if (lo->lo_state != Lo_bound)
1142 		return -ENXIO;
1143 	error = vfs_getattr(file->f_path.mnt, file->f_path.dentry, &stat);
1144 	if (error)
1145 		return error;
1146 	memset(info, 0, sizeof(*info));
1147 	info->lo_number = lo->lo_number;
1148 	info->lo_device = huge_encode_dev(stat.dev);
1149 	info->lo_inode = stat.ino;
1150 	info->lo_rdevice = huge_encode_dev(lo->lo_device ? stat.rdev : stat.dev);
1151 	info->lo_offset = lo->lo_offset;
1152 	info->lo_sizelimit = lo->lo_sizelimit;
1153 	info->lo_flags = lo->lo_flags;
1154 	memcpy(info->lo_file_name, lo->lo_file_name, LO_NAME_SIZE);
1155 	memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE);
1156 	info->lo_encrypt_type =
1157 		lo->lo_encryption ? lo->lo_encryption->number : 0;
1158 	if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) {
1159 		info->lo_encrypt_key_size = lo->lo_encrypt_key_size;
1160 		memcpy(info->lo_encrypt_key, lo->lo_encrypt_key,
1161 		       lo->lo_encrypt_key_size);
1162 	}
1163 	return 0;
1164 }
1165 
1166 static void
1167 loop_info64_from_old(const struct loop_info *info, struct loop_info64 *info64)
1168 {
1169 	memset(info64, 0, sizeof(*info64));
1170 	info64->lo_number = info->lo_number;
1171 	info64->lo_device = info->lo_device;
1172 	info64->lo_inode = info->lo_inode;
1173 	info64->lo_rdevice = info->lo_rdevice;
1174 	info64->lo_offset = info->lo_offset;
1175 	info64->lo_sizelimit = 0;
1176 	info64->lo_encrypt_type = info->lo_encrypt_type;
1177 	info64->lo_encrypt_key_size = info->lo_encrypt_key_size;
1178 	info64->lo_flags = info->lo_flags;
1179 	info64->lo_init[0] = info->lo_init[0];
1180 	info64->lo_init[1] = info->lo_init[1];
1181 	if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
1182 		memcpy(info64->lo_crypt_name, info->lo_name, LO_NAME_SIZE);
1183 	else
1184 		memcpy(info64->lo_file_name, info->lo_name, LO_NAME_SIZE);
1185 	memcpy(info64->lo_encrypt_key, info->lo_encrypt_key, LO_KEY_SIZE);
1186 }
1187 
1188 static int
1189 loop_info64_to_old(const struct loop_info64 *info64, struct loop_info *info)
1190 {
1191 	memset(info, 0, sizeof(*info));
1192 	info->lo_number = info64->lo_number;
1193 	info->lo_device = info64->lo_device;
1194 	info->lo_inode = info64->lo_inode;
1195 	info->lo_rdevice = info64->lo_rdevice;
1196 	info->lo_offset = info64->lo_offset;
1197 	info->lo_encrypt_type = info64->lo_encrypt_type;
1198 	info->lo_encrypt_key_size = info64->lo_encrypt_key_size;
1199 	info->lo_flags = info64->lo_flags;
1200 	info->lo_init[0] = info64->lo_init[0];
1201 	info->lo_init[1] = info64->lo_init[1];
1202 	if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
1203 		memcpy(info->lo_name, info64->lo_crypt_name, LO_NAME_SIZE);
1204 	else
1205 		memcpy(info->lo_name, info64->lo_file_name, LO_NAME_SIZE);
1206 	memcpy(info->lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE);
1207 
1208 	/* error in case values were truncated */
1209 	if (info->lo_device != info64->lo_device ||
1210 	    info->lo_rdevice != info64->lo_rdevice ||
1211 	    info->lo_inode != info64->lo_inode ||
1212 	    info->lo_offset != info64->lo_offset)
1213 		return -EOVERFLOW;
1214 
1215 	return 0;
1216 }
1217 
1218 static int
1219 loop_set_status_old(struct loop_device *lo, const struct loop_info __user *arg)
1220 {
1221 	struct loop_info info;
1222 	struct loop_info64 info64;
1223 
1224 	if (copy_from_user(&info, arg, sizeof (struct loop_info)))
1225 		return -EFAULT;
1226 	loop_info64_from_old(&info, &info64);
1227 	return loop_set_status(lo, &info64);
1228 }
1229 
1230 static int
1231 loop_set_status64(struct loop_device *lo, const struct loop_info64 __user *arg)
1232 {
1233 	struct loop_info64 info64;
1234 
1235 	if (copy_from_user(&info64, arg, sizeof (struct loop_info64)))
1236 		return -EFAULT;
1237 	return loop_set_status(lo, &info64);
1238 }
1239 
1240 static int
1241 loop_get_status_old(struct loop_device *lo, struct loop_info __user *arg) {
1242 	struct loop_info info;
1243 	struct loop_info64 info64;
1244 	int err = 0;
1245 
1246 	if (!arg)
1247 		err = -EINVAL;
1248 	if (!err)
1249 		err = loop_get_status(lo, &info64);
1250 	if (!err)
1251 		err = loop_info64_to_old(&info64, &info);
1252 	if (!err && copy_to_user(arg, &info, sizeof(info)))
1253 		err = -EFAULT;
1254 
1255 	return err;
1256 }
1257 
1258 static int
1259 loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) {
1260 	struct loop_info64 info64;
1261 	int err = 0;
1262 
1263 	if (!arg)
1264 		err = -EINVAL;
1265 	if (!err)
1266 		err = loop_get_status(lo, &info64);
1267 	if (!err && copy_to_user(arg, &info64, sizeof(info64)))
1268 		err = -EFAULT;
1269 
1270 	return err;
1271 }
1272 
1273 static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev)
1274 {
1275 	int err;
1276 	sector_t sec;
1277 	loff_t sz;
1278 
1279 	err = -ENXIO;
1280 	if (unlikely(lo->lo_state != Lo_bound))
1281 		goto out;
1282 	err = figure_loop_size(lo);
1283 	if (unlikely(err))
1284 		goto out;
1285 	sec = get_capacity(lo->lo_disk);
1286 	/* the width of sector_t may be narrow for bit-shift */
1287 	sz = sec;
1288 	sz <<= 9;
1289 	mutex_lock(&bdev->bd_mutex);
1290 	bd_set_size(bdev, sz);
1291 	/* let user-space know about the new size */
1292 	kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
1293 	mutex_unlock(&bdev->bd_mutex);
1294 
1295  out:
1296 	return err;
1297 }
1298 
1299 static int lo_ioctl(struct block_device *bdev, fmode_t mode,
1300 	unsigned int cmd, unsigned long arg)
1301 {
1302 	struct loop_device *lo = bdev->bd_disk->private_data;
1303 	int err;
1304 
1305 	mutex_lock_nested(&lo->lo_ctl_mutex, 1);
1306 	switch (cmd) {
1307 	case LOOP_SET_FD:
1308 		err = loop_set_fd(lo, mode, bdev, arg);
1309 		break;
1310 	case LOOP_CHANGE_FD:
1311 		err = loop_change_fd(lo, bdev, arg);
1312 		break;
1313 	case LOOP_CLR_FD:
1314 		/* loop_clr_fd would have unlocked lo_ctl_mutex on success */
1315 		err = loop_clr_fd(lo, bdev);
1316 		if (!err)
1317 			goto out_unlocked;
1318 		break;
1319 	case LOOP_SET_STATUS:
1320 		err = loop_set_status_old(lo, (struct loop_info __user *) arg);
1321 		break;
1322 	case LOOP_GET_STATUS:
1323 		err = loop_get_status_old(lo, (struct loop_info __user *) arg);
1324 		break;
1325 	case LOOP_SET_STATUS64:
1326 		err = loop_set_status64(lo, (struct loop_info64 __user *) arg);
1327 		break;
1328 	case LOOP_GET_STATUS64:
1329 		err = loop_get_status64(lo, (struct loop_info64 __user *) arg);
1330 		break;
1331 	case LOOP_SET_CAPACITY:
1332 		err = -EPERM;
1333 		if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
1334 			err = loop_set_capacity(lo, bdev);
1335 		break;
1336 	default:
1337 		err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
1338 	}
1339 	mutex_unlock(&lo->lo_ctl_mutex);
1340 
1341 out_unlocked:
1342 	return err;
1343 }
1344 
1345 #ifdef CONFIG_COMPAT
1346 struct compat_loop_info {
1347 	compat_int_t	lo_number;      /* ioctl r/o */
1348 	compat_dev_t	lo_device;      /* ioctl r/o */
1349 	compat_ulong_t	lo_inode;       /* ioctl r/o */
1350 	compat_dev_t	lo_rdevice;     /* ioctl r/o */
1351 	compat_int_t	lo_offset;
1352 	compat_int_t	lo_encrypt_type;
1353 	compat_int_t	lo_encrypt_key_size;    /* ioctl w/o */
1354 	compat_int_t	lo_flags;       /* ioctl r/o */
1355 	char		lo_name[LO_NAME_SIZE];
1356 	unsigned char	lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */
1357 	compat_ulong_t	lo_init[2];
1358 	char		reserved[4];
1359 };
1360 
1361 /*
1362  * Transfer 32-bit compatibility structure in userspace to 64-bit loop info
1363  * - noinlined to reduce stack space usage in main part of driver
1364  */
1365 static noinline int
1366 loop_info64_from_compat(const struct compat_loop_info __user *arg,
1367 			struct loop_info64 *info64)
1368 {
1369 	struct compat_loop_info info;
1370 
1371 	if (copy_from_user(&info, arg, sizeof(info)))
1372 		return -EFAULT;
1373 
1374 	memset(info64, 0, sizeof(*info64));
1375 	info64->lo_number = info.lo_number;
1376 	info64->lo_device = info.lo_device;
1377 	info64->lo_inode = info.lo_inode;
1378 	info64->lo_rdevice = info.lo_rdevice;
1379 	info64->lo_offset = info.lo_offset;
1380 	info64->lo_sizelimit = 0;
1381 	info64->lo_encrypt_type = info.lo_encrypt_type;
1382 	info64->lo_encrypt_key_size = info.lo_encrypt_key_size;
1383 	info64->lo_flags = info.lo_flags;
1384 	info64->lo_init[0] = info.lo_init[0];
1385 	info64->lo_init[1] = info.lo_init[1];
1386 	if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
1387 		memcpy(info64->lo_crypt_name, info.lo_name, LO_NAME_SIZE);
1388 	else
1389 		memcpy(info64->lo_file_name, info.lo_name, LO_NAME_SIZE);
1390 	memcpy(info64->lo_encrypt_key, info.lo_encrypt_key, LO_KEY_SIZE);
1391 	return 0;
1392 }
1393 
1394 /*
1395  * Transfer 64-bit loop info to 32-bit compatibility structure in userspace
1396  * - noinlined to reduce stack space usage in main part of driver
1397  */
1398 static noinline int
1399 loop_info64_to_compat(const struct loop_info64 *info64,
1400 		      struct compat_loop_info __user *arg)
1401 {
1402 	struct compat_loop_info info;
1403 
1404 	memset(&info, 0, sizeof(info));
1405 	info.lo_number = info64->lo_number;
1406 	info.lo_device = info64->lo_device;
1407 	info.lo_inode = info64->lo_inode;
1408 	info.lo_rdevice = info64->lo_rdevice;
1409 	info.lo_offset = info64->lo_offset;
1410 	info.lo_encrypt_type = info64->lo_encrypt_type;
1411 	info.lo_encrypt_key_size = info64->lo_encrypt_key_size;
1412 	info.lo_flags = info64->lo_flags;
1413 	info.lo_init[0] = info64->lo_init[0];
1414 	info.lo_init[1] = info64->lo_init[1];
1415 	if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
1416 		memcpy(info.lo_name, info64->lo_crypt_name, LO_NAME_SIZE);
1417 	else
1418 		memcpy(info.lo_name, info64->lo_file_name, LO_NAME_SIZE);
1419 	memcpy(info.lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE);
1420 
1421 	/* error in case values were truncated */
1422 	if (info.lo_device != info64->lo_device ||
1423 	    info.lo_rdevice != info64->lo_rdevice ||
1424 	    info.lo_inode != info64->lo_inode ||
1425 	    info.lo_offset != info64->lo_offset ||
1426 	    info.lo_init[0] != info64->lo_init[0] ||
1427 	    info.lo_init[1] != info64->lo_init[1])
1428 		return -EOVERFLOW;
1429 
1430 	if (copy_to_user(arg, &info, sizeof(info)))
1431 		return -EFAULT;
1432 	return 0;
1433 }
1434 
1435 static int
1436 loop_set_status_compat(struct loop_device *lo,
1437 		       const struct compat_loop_info __user *arg)
1438 {
1439 	struct loop_info64 info64;
1440 	int ret;
1441 
1442 	ret = loop_info64_from_compat(arg, &info64);
1443 	if (ret < 0)
1444 		return ret;
1445 	return loop_set_status(lo, &info64);
1446 }
1447 
1448 static int
1449 loop_get_status_compat(struct loop_device *lo,
1450 		       struct compat_loop_info __user *arg)
1451 {
1452 	struct loop_info64 info64;
1453 	int err = 0;
1454 
1455 	if (!arg)
1456 		err = -EINVAL;
1457 	if (!err)
1458 		err = loop_get_status(lo, &info64);
1459 	if (!err)
1460 		err = loop_info64_to_compat(&info64, arg);
1461 	return err;
1462 }
1463 
1464 static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
1465 			   unsigned int cmd, unsigned long arg)
1466 {
1467 	struct loop_device *lo = bdev->bd_disk->private_data;
1468 	int err;
1469 
1470 	switch(cmd) {
1471 	case LOOP_SET_STATUS:
1472 		mutex_lock(&lo->lo_ctl_mutex);
1473 		err = loop_set_status_compat(
1474 			lo, (const struct compat_loop_info __user *) arg);
1475 		mutex_unlock(&lo->lo_ctl_mutex);
1476 		break;
1477 	case LOOP_GET_STATUS:
1478 		mutex_lock(&lo->lo_ctl_mutex);
1479 		err = loop_get_status_compat(
1480 			lo, (struct compat_loop_info __user *) arg);
1481 		mutex_unlock(&lo->lo_ctl_mutex);
1482 		break;
1483 	case LOOP_SET_CAPACITY:
1484 	case LOOP_CLR_FD:
1485 	case LOOP_GET_STATUS64:
1486 	case LOOP_SET_STATUS64:
1487 		arg = (unsigned long) compat_ptr(arg);
1488 	case LOOP_SET_FD:
1489 	case LOOP_CHANGE_FD:
1490 		err = lo_ioctl(bdev, mode, cmd, arg);
1491 		break;
1492 	default:
1493 		err = -ENOIOCTLCMD;
1494 		break;
1495 	}
1496 	return err;
1497 }
1498 #endif
1499 
1500 static int lo_open(struct block_device *bdev, fmode_t mode)
1501 {
1502 	struct loop_device *lo = bdev->bd_disk->private_data;
1503 
1504 	mutex_lock(&loop_mutex);
1505 	mutex_lock(&lo->lo_ctl_mutex);
1506 	lo->lo_refcnt++;
1507 	mutex_unlock(&lo->lo_ctl_mutex);
1508 	mutex_unlock(&loop_mutex);
1509 
1510 	return 0;
1511 }
1512 
1513 static int lo_release(struct gendisk *disk, fmode_t mode)
1514 {
1515 	struct loop_device *lo = disk->private_data;
1516 	int err;
1517 
1518 	mutex_lock(&loop_mutex);
1519 	mutex_lock(&lo->lo_ctl_mutex);
1520 
1521 	if (--lo->lo_refcnt)
1522 		goto out;
1523 
1524 	if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
1525 		/*
1526 		 * In autoclear mode, stop the loop thread
1527 		 * and remove configuration after last close.
1528 		 */
1529 		err = loop_clr_fd(lo, NULL);
1530 		if (!err)
1531 			goto out_unlocked;
1532 	} else {
1533 		/*
1534 		 * Otherwise keep thread (if running) and config,
1535 		 * but flush possible ongoing bios in thread.
1536 		 */
1537 		loop_flush(lo);
1538 	}
1539 
1540 out:
1541 	mutex_unlock(&lo->lo_ctl_mutex);
1542 out_unlocked:
1543 	mutex_unlock(&loop_mutex);
1544 	return 0;
1545 }
1546 
1547 static const struct block_device_operations lo_fops = {
1548 	.owner =	THIS_MODULE,
1549 	.open =		lo_open,
1550 	.release =	lo_release,
1551 	.ioctl =	lo_ioctl,
1552 #ifdef CONFIG_COMPAT
1553 	.compat_ioctl =	lo_compat_ioctl,
1554 #endif
1555 };
1556 
1557 /*
1558  * And now the modules code and kernel interface.
1559  */
1560 static int max_loop;
1561 module_param(max_loop, int, 0);
1562 MODULE_PARM_DESC(max_loop, "Maximum number of loop devices");
1563 module_param(max_part, int, 0);
1564 MODULE_PARM_DESC(max_part, "Maximum number of partitions per loop device");
1565 MODULE_LICENSE("GPL");
1566 MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR);
1567 
1568 int loop_register_transfer(struct loop_func_table *funcs)
1569 {
1570 	unsigned int n = funcs->number;
1571 
1572 	if (n >= MAX_LO_CRYPT || xfer_funcs[n])
1573 		return -EINVAL;
1574 	xfer_funcs[n] = funcs;
1575 	return 0;
1576 }
1577 
1578 int loop_unregister_transfer(int number)
1579 {
1580 	unsigned int n = number;
1581 	struct loop_device *lo;
1582 	struct loop_func_table *xfer;
1583 
1584 	if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL)
1585 		return -EINVAL;
1586 
1587 	xfer_funcs[n] = NULL;
1588 
1589 	list_for_each_entry(lo, &loop_devices, lo_list) {
1590 		mutex_lock(&lo->lo_ctl_mutex);
1591 
1592 		if (lo->lo_encryption == xfer)
1593 			loop_release_xfer(lo);
1594 
1595 		mutex_unlock(&lo->lo_ctl_mutex);
1596 	}
1597 
1598 	return 0;
1599 }
1600 
1601 EXPORT_SYMBOL(loop_register_transfer);
1602 EXPORT_SYMBOL(loop_unregister_transfer);
1603 
1604 static struct loop_device *loop_alloc(int i)
1605 {
1606 	struct loop_device *lo;
1607 	struct gendisk *disk;
1608 
1609 	lo = kzalloc(sizeof(*lo), GFP_KERNEL);
1610 	if (!lo)
1611 		goto out;
1612 
1613 	lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
1614 	if (!lo->lo_queue)
1615 		goto out_free_dev;
1616 
1617 	disk = lo->lo_disk = alloc_disk(1 << part_shift);
1618 	if (!disk)
1619 		goto out_free_queue;
1620 
1621 	mutex_init(&lo->lo_ctl_mutex);
1622 	lo->lo_number		= i;
1623 	lo->lo_thread		= NULL;
1624 	init_waitqueue_head(&lo->lo_event);
1625 	spin_lock_init(&lo->lo_lock);
1626 	disk->major		= LOOP_MAJOR;
1627 	disk->first_minor	= i << part_shift;
1628 	disk->fops		= &lo_fops;
1629 	disk->private_data	= lo;
1630 	disk->queue		= lo->lo_queue;
1631 	sprintf(disk->disk_name, "loop%d", i);
1632 	return lo;
1633 
1634 out_free_queue:
1635 	blk_cleanup_queue(lo->lo_queue);
1636 out_free_dev:
1637 	kfree(lo);
1638 out:
1639 	return NULL;
1640 }
1641 
1642 static void loop_free(struct loop_device *lo)
1643 {
1644 	if (!lo->lo_queue->queue_lock)
1645 		lo->lo_queue->queue_lock = &lo->lo_queue->__queue_lock;
1646 
1647 	blk_cleanup_queue(lo->lo_queue);
1648 	put_disk(lo->lo_disk);
1649 	list_del(&lo->lo_list);
1650 	kfree(lo);
1651 }
1652 
1653 static struct loop_device *loop_init_one(int i)
1654 {
1655 	struct loop_device *lo;
1656 
1657 	list_for_each_entry(lo, &loop_devices, lo_list) {
1658 		if (lo->lo_number == i)
1659 			return lo;
1660 	}
1661 
1662 	lo = loop_alloc(i);
1663 	if (lo) {
1664 		add_disk(lo->lo_disk);
1665 		list_add_tail(&lo->lo_list, &loop_devices);
1666 	}
1667 	return lo;
1668 }
1669 
1670 static void loop_del_one(struct loop_device *lo)
1671 {
1672 	del_gendisk(lo->lo_disk);
1673 	loop_free(lo);
1674 }
1675 
1676 static struct kobject *loop_probe(dev_t dev, int *part, void *data)
1677 {
1678 	struct loop_device *lo;
1679 	struct kobject *kobj;
1680 
1681 	mutex_lock(&loop_devices_mutex);
1682 	lo = loop_init_one(dev & MINORMASK);
1683 	kobj = lo ? get_disk(lo->lo_disk) : ERR_PTR(-ENOMEM);
1684 	mutex_unlock(&loop_devices_mutex);
1685 
1686 	*part = 0;
1687 	return kobj;
1688 }
1689 
1690 static int __init loop_init(void)
1691 {
1692 	int i, nr;
1693 	unsigned long range;
1694 	struct loop_device *lo, *next;
1695 
1696 	/*
1697 	 * loop module now has a feature to instantiate underlying device
1698 	 * structure on-demand, provided that there is an access dev node.
1699 	 * However, this will not work well with user space tool that doesn't
1700 	 * know about such "feature".  In order to not break any existing
1701 	 * tool, we do the following:
1702 	 *
1703 	 * (1) if max_loop is specified, create that many upfront, and this
1704 	 *     also becomes a hard limit.
1705 	 * (2) if max_loop is not specified, create 8 loop device on module
1706 	 *     load, user can further extend loop device by create dev node
1707 	 *     themselves and have kernel automatically instantiate actual
1708 	 *     device on-demand.
1709 	 */
1710 
1711 	part_shift = 0;
1712 	if (max_part > 0)
1713 		part_shift = fls(max_part);
1714 
1715 	if (max_loop > 1UL << (MINORBITS - part_shift))
1716 		return -EINVAL;
1717 
1718 	if (max_loop) {
1719 		nr = max_loop;
1720 		range = max_loop;
1721 	} else {
1722 		nr = 8;
1723 		range = 1UL << (MINORBITS - part_shift);
1724 	}
1725 
1726 	if (register_blkdev(LOOP_MAJOR, "loop"))
1727 		return -EIO;
1728 
1729 	for (i = 0; i < nr; i++) {
1730 		lo = loop_alloc(i);
1731 		if (!lo)
1732 			goto Enomem;
1733 		list_add_tail(&lo->lo_list, &loop_devices);
1734 	}
1735 
1736 	/* point of no return */
1737 
1738 	list_for_each_entry(lo, &loop_devices, lo_list)
1739 		add_disk(lo->lo_disk);
1740 
1741 	blk_register_region(MKDEV(LOOP_MAJOR, 0), range,
1742 				  THIS_MODULE, loop_probe, NULL, NULL);
1743 
1744 	printk(KERN_INFO "loop: module loaded\n");
1745 	return 0;
1746 
1747 Enomem:
1748 	printk(KERN_INFO "loop: out of memory\n");
1749 
1750 	list_for_each_entry_safe(lo, next, &loop_devices, lo_list)
1751 		loop_free(lo);
1752 
1753 	unregister_blkdev(LOOP_MAJOR, "loop");
1754 	return -ENOMEM;
1755 }
1756 
1757 static void __exit loop_exit(void)
1758 {
1759 	unsigned long range;
1760 	struct loop_device *lo, *next;
1761 
1762 	range = max_loop ? max_loop :  1UL << (MINORBITS - part_shift);
1763 
1764 	list_for_each_entry_safe(lo, next, &loop_devices, lo_list)
1765 		loop_del_one(lo);
1766 
1767 	blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range);
1768 	unregister_blkdev(LOOP_MAJOR, "loop");
1769 }
1770 
1771 module_init(loop_init);
1772 module_exit(loop_exit);
1773 
1774 #ifndef MODULE
1775 static int __init max_loop_setup(char *str)
1776 {
1777 	max_loop = simple_strtol(str, NULL, 0);
1778 	return 1;
1779 }
1780 
1781 __setup("max_loop=", max_loop_setup);
1782 #endif
1783