1 /* 2 * linux/drivers/block/loop.c 3 * 4 * Written by Theodore Ts'o, 3/29/93 5 * 6 * Copyright 1993 by Theodore Ts'o. Redistribution of this file is 7 * permitted under the GNU General Public License. 8 * 9 * DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993 10 * more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996 11 * 12 * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994 13 * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996 14 * 15 * Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997 16 * 17 * Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998 18 * 19 * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998 20 * 21 * Loadable modules and other fixes by AK, 1998 22 * 23 * Make real block number available to downstream transfer functions, enables 24 * CBC (and relatives) mode encryption requiring unique IVs per data block. 25 * Reed H. Petty, rhp@draper.net 26 * 27 * Maximum number of loop devices now dynamic via max_loop module parameter. 28 * Russell Kroll <rkroll@exploits.org> 19990701 29 * 30 * Maximum number of loop devices when compiled-in now selectable by passing 31 * max_loop=<1-255> to the kernel on boot. 32 * Erik I. Bolsø, <eriki@himolde.no>, Oct 31, 1999 33 * 34 * Completely rewrite request handling to be make_request_fn style and 35 * non blocking, pushing work to a helper thread. Lots of fixes from 36 * Al Viro too. 37 * Jens Axboe <axboe@suse.de>, Nov 2000 38 * 39 * Support up to 256 loop devices 40 * Heinz Mauelshagen <mge@sistina.com>, Feb 2002 41 * 42 * Support for falling back on the write file operation when the address space 43 * operations write_begin is not available on the backing filesystem. 44 * Anton Altaparmakov, 16 Feb 2005 45 * 46 * Still To Fix: 47 * - Advisory locking is ignored here. 48 * - Should use an own CAP_* category instead of CAP_SYS_ADMIN 49 * 50 */ 51 52 #include <linux/module.h> 53 #include <linux/moduleparam.h> 54 #include <linux/sched.h> 55 #include <linux/fs.h> 56 #include <linux/file.h> 57 #include <linux/stat.h> 58 #include <linux/errno.h> 59 #include <linux/major.h> 60 #include <linux/wait.h> 61 #include <linux/blkdev.h> 62 #include <linux/blkpg.h> 63 #include <linux/init.h> 64 #include <linux/swap.h> 65 #include <linux/slab.h> 66 #include <linux/compat.h> 67 #include <linux/suspend.h> 68 #include <linux/freezer.h> 69 #include <linux/mutex.h> 70 #include <linux/writeback.h> 71 #include <linux/completion.h> 72 #include <linux/highmem.h> 73 #include <linux/kthread.h> 74 #include <linux/splice.h> 75 #include <linux/sysfs.h> 76 #include <linux/miscdevice.h> 77 #include <linux/falloc.h> 78 #include <linux/uio.h> 79 #include "loop.h" 80 81 #include <asm/uaccess.h> 82 83 static DEFINE_IDR(loop_index_idr); 84 static DEFINE_MUTEX(loop_index_mutex); 85 86 static int max_part; 87 static int part_shift; 88 89 static struct workqueue_struct *loop_wq; 90 91 /* 92 * Transfer functions 93 */ 94 static int transfer_none(struct loop_device *lo, int cmd, 95 struct page *raw_page, unsigned raw_off, 96 struct page *loop_page, unsigned loop_off, 97 int size, sector_t real_block) 98 { 99 char *raw_buf = kmap_atomic(raw_page) + raw_off; 100 char *loop_buf = kmap_atomic(loop_page) + loop_off; 101 102 if (cmd == READ) 103 memcpy(loop_buf, raw_buf, size); 104 else 105 memcpy(raw_buf, loop_buf, size); 106 107 kunmap_atomic(loop_buf); 108 kunmap_atomic(raw_buf); 109 cond_resched(); 110 return 0; 111 } 112 113 static int transfer_xor(struct loop_device *lo, int cmd, 114 struct page *raw_page, unsigned raw_off, 115 struct page *loop_page, unsigned loop_off, 116 int size, sector_t real_block) 117 { 118 char *raw_buf = kmap_atomic(raw_page) + raw_off; 119 char *loop_buf = kmap_atomic(loop_page) + loop_off; 120 char *in, *out, *key; 121 int i, keysize; 122 123 if (cmd == READ) { 124 in = raw_buf; 125 out = loop_buf; 126 } else { 127 in = loop_buf; 128 out = raw_buf; 129 } 130 131 key = lo->lo_encrypt_key; 132 keysize = lo->lo_encrypt_key_size; 133 for (i = 0; i < size; i++) 134 *out++ = *in++ ^ key[(i & 511) % keysize]; 135 136 kunmap_atomic(loop_buf); 137 kunmap_atomic(raw_buf); 138 cond_resched(); 139 return 0; 140 } 141 142 static int xor_init(struct loop_device *lo, const struct loop_info64 *info) 143 { 144 if (unlikely(info->lo_encrypt_key_size <= 0)) 145 return -EINVAL; 146 return 0; 147 } 148 149 static struct loop_func_table none_funcs = { 150 .number = LO_CRYPT_NONE, 151 .transfer = transfer_none, 152 }; 153 154 static struct loop_func_table xor_funcs = { 155 .number = LO_CRYPT_XOR, 156 .transfer = transfer_xor, 157 .init = xor_init 158 }; 159 160 /* xfer_funcs[0] is special - its release function is never called */ 161 static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = { 162 &none_funcs, 163 &xor_funcs 164 }; 165 166 static loff_t get_size(loff_t offset, loff_t sizelimit, struct file *file) 167 { 168 loff_t loopsize; 169 170 /* Compute loopsize in bytes */ 171 loopsize = i_size_read(file->f_mapping->host); 172 if (offset > 0) 173 loopsize -= offset; 174 /* offset is beyond i_size, weird but possible */ 175 if (loopsize < 0) 176 return 0; 177 178 if (sizelimit > 0 && sizelimit < loopsize) 179 loopsize = sizelimit; 180 /* 181 * Unfortunately, if we want to do I/O on the device, 182 * the number of 512-byte sectors has to fit into a sector_t. 183 */ 184 return loopsize >> 9; 185 } 186 187 static loff_t get_loop_size(struct loop_device *lo, struct file *file) 188 { 189 return get_size(lo->lo_offset, lo->lo_sizelimit, file); 190 } 191 192 static int 193 figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit) 194 { 195 loff_t size = get_size(offset, sizelimit, lo->lo_backing_file); 196 sector_t x = (sector_t)size; 197 struct block_device *bdev = lo->lo_device; 198 199 if (unlikely((loff_t)x != size)) 200 return -EFBIG; 201 if (lo->lo_offset != offset) 202 lo->lo_offset = offset; 203 if (lo->lo_sizelimit != sizelimit) 204 lo->lo_sizelimit = sizelimit; 205 set_capacity(lo->lo_disk, x); 206 bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9); 207 /* let user-space know about the new size */ 208 kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); 209 return 0; 210 } 211 212 static inline int 213 lo_do_transfer(struct loop_device *lo, int cmd, 214 struct page *rpage, unsigned roffs, 215 struct page *lpage, unsigned loffs, 216 int size, sector_t rblock) 217 { 218 if (unlikely(!lo->transfer)) 219 return 0; 220 221 return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock); 222 } 223 224 /** 225 * __do_lo_send_write - helper for writing data to a loop device 226 * 227 * This helper just factors out common code between do_lo_send_direct_write() 228 * and do_lo_send_write(). 229 */ 230 static int __do_lo_send_write(struct file *file, 231 u8 *buf, const int len, loff_t pos) 232 { 233 struct kvec kvec = {.iov_base = buf, .iov_len = len}; 234 struct iov_iter from; 235 ssize_t bw; 236 237 iov_iter_kvec(&from, ITER_KVEC | WRITE, &kvec, 1, len); 238 239 file_start_write(file); 240 bw = vfs_iter_write(file, &from, &pos); 241 file_end_write(file); 242 if (likely(bw == len)) 243 return 0; 244 printk_ratelimited(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n", 245 (unsigned long long)pos, len); 246 if (bw >= 0) 247 bw = -EIO; 248 return bw; 249 } 250 251 /** 252 * do_lo_send_direct_write - helper for writing data to a loop device 253 * 254 * This is the fast, non-transforming version that does not need double 255 * buffering. 256 */ 257 static int do_lo_send_direct_write(struct loop_device *lo, 258 struct bio_vec *bvec, loff_t pos, struct page *page) 259 { 260 ssize_t bw = __do_lo_send_write(lo->lo_backing_file, 261 kmap(bvec->bv_page) + bvec->bv_offset, 262 bvec->bv_len, pos); 263 kunmap(bvec->bv_page); 264 cond_resched(); 265 return bw; 266 } 267 268 /** 269 * do_lo_send_write - helper for writing data to a loop device 270 * 271 * This is the slow, transforming version that needs to double buffer the 272 * data as it cannot do the transformations in place without having direct 273 * access to the destination pages of the backing file. 274 */ 275 static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec, 276 loff_t pos, struct page *page) 277 { 278 int ret = lo_do_transfer(lo, WRITE, page, 0, bvec->bv_page, 279 bvec->bv_offset, bvec->bv_len, pos >> 9); 280 if (likely(!ret)) 281 return __do_lo_send_write(lo->lo_backing_file, 282 page_address(page), bvec->bv_len, 283 pos); 284 printk_ratelimited(KERN_ERR "loop: Transfer error at byte offset %llu, " 285 "length %i.\n", (unsigned long long)pos, bvec->bv_len); 286 if (ret > 0) 287 ret = -EIO; 288 return ret; 289 } 290 291 static int lo_send(struct loop_device *lo, struct request *rq, loff_t pos) 292 { 293 int (*do_lo_send)(struct loop_device *, struct bio_vec *, loff_t, 294 struct page *page); 295 struct bio_vec bvec; 296 struct req_iterator iter; 297 struct page *page = NULL; 298 int ret = 0; 299 300 if (lo->transfer != transfer_none) { 301 page = alloc_page(GFP_NOIO | __GFP_HIGHMEM); 302 if (unlikely(!page)) 303 goto fail; 304 kmap(page); 305 do_lo_send = do_lo_send_write; 306 } else { 307 do_lo_send = do_lo_send_direct_write; 308 } 309 310 rq_for_each_segment(bvec, rq, iter) { 311 ret = do_lo_send(lo, &bvec, pos, page); 312 if (ret < 0) 313 break; 314 pos += bvec.bv_len; 315 } 316 if (page) { 317 kunmap(page); 318 __free_page(page); 319 } 320 out: 321 return ret; 322 fail: 323 printk_ratelimited(KERN_ERR "loop: Failed to allocate temporary page for write.\n"); 324 ret = -ENOMEM; 325 goto out; 326 } 327 328 struct lo_read_data { 329 struct loop_device *lo; 330 struct page *page; 331 unsigned offset; 332 int bsize; 333 }; 334 335 static int 336 lo_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, 337 struct splice_desc *sd) 338 { 339 struct lo_read_data *p = sd->u.data; 340 struct loop_device *lo = p->lo; 341 struct page *page = buf->page; 342 sector_t IV; 343 int size; 344 345 IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) + 346 (buf->offset >> 9); 347 size = sd->len; 348 if (size > p->bsize) 349 size = p->bsize; 350 351 if (lo_do_transfer(lo, READ, page, buf->offset, p->page, p->offset, size, IV)) { 352 printk_ratelimited(KERN_ERR "loop: transfer error block %ld\n", 353 page->index); 354 size = -EINVAL; 355 } 356 357 flush_dcache_page(p->page); 358 359 if (size > 0) 360 p->offset += size; 361 362 return size; 363 } 364 365 static int 366 lo_direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd) 367 { 368 return __splice_from_pipe(pipe, sd, lo_splice_actor); 369 } 370 371 static ssize_t 372 do_lo_receive(struct loop_device *lo, 373 struct bio_vec *bvec, int bsize, loff_t pos) 374 { 375 struct lo_read_data cookie; 376 struct splice_desc sd; 377 struct file *file; 378 ssize_t retval; 379 380 cookie.lo = lo; 381 cookie.page = bvec->bv_page; 382 cookie.offset = bvec->bv_offset; 383 cookie.bsize = bsize; 384 385 sd.len = 0; 386 sd.total_len = bvec->bv_len; 387 sd.flags = 0; 388 sd.pos = pos; 389 sd.u.data = &cookie; 390 391 file = lo->lo_backing_file; 392 retval = splice_direct_to_actor(file, &sd, lo_direct_splice_actor); 393 394 return retval; 395 } 396 397 static int 398 lo_receive(struct loop_device *lo, struct request *rq, int bsize, loff_t pos) 399 { 400 struct bio_vec bvec; 401 struct req_iterator iter; 402 ssize_t s; 403 404 rq_for_each_segment(bvec, rq, iter) { 405 s = do_lo_receive(lo, &bvec, bsize, pos); 406 if (s < 0) 407 return s; 408 409 if (s != bvec.bv_len) { 410 struct bio *bio; 411 412 __rq_for_each_bio(bio, rq) 413 zero_fill_bio(bio); 414 break; 415 } 416 pos += bvec.bv_len; 417 } 418 return 0; 419 } 420 421 static int lo_discard(struct loop_device *lo, struct request *rq, loff_t pos) 422 { 423 /* 424 * We use punch hole to reclaim the free space used by the 425 * image a.k.a. discard. However we do not support discard if 426 * encryption is enabled, because it may give an attacker 427 * useful information. 428 */ 429 struct file *file = lo->lo_backing_file; 430 int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE; 431 int ret; 432 433 if ((!file->f_op->fallocate) || lo->lo_encrypt_key_size) { 434 ret = -EOPNOTSUPP; 435 goto out; 436 } 437 438 ret = file->f_op->fallocate(file, mode, pos, blk_rq_bytes(rq)); 439 if (unlikely(ret && ret != -EINVAL && ret != -EOPNOTSUPP)) 440 ret = -EIO; 441 out: 442 return ret; 443 } 444 445 static int lo_req_flush(struct loop_device *lo, struct request *rq) 446 { 447 struct file *file = lo->lo_backing_file; 448 int ret = vfs_fsync(file, 0); 449 if (unlikely(ret && ret != -EINVAL)) 450 ret = -EIO; 451 452 return ret; 453 } 454 455 static int do_req_filebacked(struct loop_device *lo, struct request *rq) 456 { 457 loff_t pos; 458 int ret; 459 460 pos = ((loff_t) blk_rq_pos(rq) << 9) + lo->lo_offset; 461 462 if (rq->cmd_flags & REQ_WRITE) { 463 if (rq->cmd_flags & REQ_FLUSH) 464 ret = lo_req_flush(lo, rq); 465 else if (rq->cmd_flags & REQ_DISCARD) 466 ret = lo_discard(lo, rq, pos); 467 else 468 ret = lo_send(lo, rq, pos); 469 } else 470 ret = lo_receive(lo, rq, lo->lo_blocksize, pos); 471 472 return ret; 473 } 474 475 struct switch_request { 476 struct file *file; 477 struct completion wait; 478 }; 479 480 /* 481 * Do the actual switch; called from the BIO completion routine 482 */ 483 static void do_loop_switch(struct loop_device *lo, struct switch_request *p) 484 { 485 struct file *file = p->file; 486 struct file *old_file = lo->lo_backing_file; 487 struct address_space *mapping; 488 489 /* if no new file, only flush of queued bios requested */ 490 if (!file) 491 return; 492 493 mapping = file->f_mapping; 494 mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask); 495 lo->lo_backing_file = file; 496 lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ? 497 mapping->host->i_bdev->bd_block_size : PAGE_SIZE; 498 lo->old_gfp_mask = mapping_gfp_mask(mapping); 499 mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); 500 } 501 502 /* 503 * loop_switch performs the hard work of switching a backing store. 504 * First it needs to flush existing IO, it does this by sending a magic 505 * BIO down the pipe. The completion of this BIO does the actual switch. 506 */ 507 static int loop_switch(struct loop_device *lo, struct file *file) 508 { 509 struct switch_request w; 510 511 w.file = file; 512 513 /* freeze queue and wait for completion of scheduled requests */ 514 blk_mq_freeze_queue(lo->lo_queue); 515 516 /* do the switch action */ 517 do_loop_switch(lo, &w); 518 519 /* unfreeze */ 520 blk_mq_unfreeze_queue(lo->lo_queue); 521 522 return 0; 523 } 524 525 /* 526 * Helper to flush the IOs in loop, but keeping loop thread running 527 */ 528 static int loop_flush(struct loop_device *lo) 529 { 530 return loop_switch(lo, NULL); 531 } 532 533 /* 534 * loop_change_fd switched the backing store of a loopback device to 535 * a new file. This is useful for operating system installers to free up 536 * the original file and in High Availability environments to switch to 537 * an alternative location for the content in case of server meltdown. 538 * This can only work if the loop device is used read-only, and if the 539 * new backing store is the same size and type as the old backing store. 540 */ 541 static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, 542 unsigned int arg) 543 { 544 struct file *file, *old_file; 545 struct inode *inode; 546 int error; 547 548 error = -ENXIO; 549 if (lo->lo_state != Lo_bound) 550 goto out; 551 552 /* the loop device has to be read-only */ 553 error = -EINVAL; 554 if (!(lo->lo_flags & LO_FLAGS_READ_ONLY)) 555 goto out; 556 557 error = -EBADF; 558 file = fget(arg); 559 if (!file) 560 goto out; 561 562 inode = file->f_mapping->host; 563 old_file = lo->lo_backing_file; 564 565 error = -EINVAL; 566 567 if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) 568 goto out_putf; 569 570 /* size of the new backing store needs to be the same */ 571 if (get_loop_size(lo, file) != get_loop_size(lo, old_file)) 572 goto out_putf; 573 574 /* and ... switch */ 575 error = loop_switch(lo, file); 576 if (error) 577 goto out_putf; 578 579 fput(old_file); 580 if (lo->lo_flags & LO_FLAGS_PARTSCAN) 581 ioctl_by_bdev(bdev, BLKRRPART, 0); 582 return 0; 583 584 out_putf: 585 fput(file); 586 out: 587 return error; 588 } 589 590 static inline int is_loop_device(struct file *file) 591 { 592 struct inode *i = file->f_mapping->host; 593 594 return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR; 595 } 596 597 /* loop sysfs attributes */ 598 599 static ssize_t loop_attr_show(struct device *dev, char *page, 600 ssize_t (*callback)(struct loop_device *, char *)) 601 { 602 struct gendisk *disk = dev_to_disk(dev); 603 struct loop_device *lo = disk->private_data; 604 605 return callback(lo, page); 606 } 607 608 #define LOOP_ATTR_RO(_name) \ 609 static ssize_t loop_attr_##_name##_show(struct loop_device *, char *); \ 610 static ssize_t loop_attr_do_show_##_name(struct device *d, \ 611 struct device_attribute *attr, char *b) \ 612 { \ 613 return loop_attr_show(d, b, loop_attr_##_name##_show); \ 614 } \ 615 static struct device_attribute loop_attr_##_name = \ 616 __ATTR(_name, S_IRUGO, loop_attr_do_show_##_name, NULL); 617 618 static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf) 619 { 620 ssize_t ret; 621 char *p = NULL; 622 623 spin_lock_irq(&lo->lo_lock); 624 if (lo->lo_backing_file) 625 p = d_path(&lo->lo_backing_file->f_path, buf, PAGE_SIZE - 1); 626 spin_unlock_irq(&lo->lo_lock); 627 628 if (IS_ERR_OR_NULL(p)) 629 ret = PTR_ERR(p); 630 else { 631 ret = strlen(p); 632 memmove(buf, p, ret); 633 buf[ret++] = '\n'; 634 buf[ret] = 0; 635 } 636 637 return ret; 638 } 639 640 static ssize_t loop_attr_offset_show(struct loop_device *lo, char *buf) 641 { 642 return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_offset); 643 } 644 645 static ssize_t loop_attr_sizelimit_show(struct loop_device *lo, char *buf) 646 { 647 return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit); 648 } 649 650 static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf) 651 { 652 int autoclear = (lo->lo_flags & LO_FLAGS_AUTOCLEAR); 653 654 return sprintf(buf, "%s\n", autoclear ? "1" : "0"); 655 } 656 657 static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf) 658 { 659 int partscan = (lo->lo_flags & LO_FLAGS_PARTSCAN); 660 661 return sprintf(buf, "%s\n", partscan ? "1" : "0"); 662 } 663 664 LOOP_ATTR_RO(backing_file); 665 LOOP_ATTR_RO(offset); 666 LOOP_ATTR_RO(sizelimit); 667 LOOP_ATTR_RO(autoclear); 668 LOOP_ATTR_RO(partscan); 669 670 static struct attribute *loop_attrs[] = { 671 &loop_attr_backing_file.attr, 672 &loop_attr_offset.attr, 673 &loop_attr_sizelimit.attr, 674 &loop_attr_autoclear.attr, 675 &loop_attr_partscan.attr, 676 NULL, 677 }; 678 679 static struct attribute_group loop_attribute_group = { 680 .name = "loop", 681 .attrs= loop_attrs, 682 }; 683 684 static int loop_sysfs_init(struct loop_device *lo) 685 { 686 return sysfs_create_group(&disk_to_dev(lo->lo_disk)->kobj, 687 &loop_attribute_group); 688 } 689 690 static void loop_sysfs_exit(struct loop_device *lo) 691 { 692 sysfs_remove_group(&disk_to_dev(lo->lo_disk)->kobj, 693 &loop_attribute_group); 694 } 695 696 static void loop_config_discard(struct loop_device *lo) 697 { 698 struct file *file = lo->lo_backing_file; 699 struct inode *inode = file->f_mapping->host; 700 struct request_queue *q = lo->lo_queue; 701 702 /* 703 * We use punch hole to reclaim the free space used by the 704 * image a.k.a. discard. However we do not support discard if 705 * encryption is enabled, because it may give an attacker 706 * useful information. 707 */ 708 if ((!file->f_op->fallocate) || 709 lo->lo_encrypt_key_size) { 710 q->limits.discard_granularity = 0; 711 q->limits.discard_alignment = 0; 712 q->limits.max_discard_sectors = 0; 713 q->limits.discard_zeroes_data = 0; 714 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); 715 return; 716 } 717 718 q->limits.discard_granularity = inode->i_sb->s_blocksize; 719 q->limits.discard_alignment = 0; 720 q->limits.max_discard_sectors = UINT_MAX >> 9; 721 q->limits.discard_zeroes_data = 1; 722 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); 723 } 724 725 static int loop_set_fd(struct loop_device *lo, fmode_t mode, 726 struct block_device *bdev, unsigned int arg) 727 { 728 struct file *file, *f; 729 struct inode *inode; 730 struct address_space *mapping; 731 unsigned lo_blocksize; 732 int lo_flags = 0; 733 int error; 734 loff_t size; 735 736 /* This is safe, since we have a reference from open(). */ 737 __module_get(THIS_MODULE); 738 739 error = -EBADF; 740 file = fget(arg); 741 if (!file) 742 goto out; 743 744 error = -EBUSY; 745 if (lo->lo_state != Lo_unbound) 746 goto out_putf; 747 748 /* Avoid recursion */ 749 f = file; 750 while (is_loop_device(f)) { 751 struct loop_device *l; 752 753 if (f->f_mapping->host->i_bdev == bdev) 754 goto out_putf; 755 756 l = f->f_mapping->host->i_bdev->bd_disk->private_data; 757 if (l->lo_state == Lo_unbound) { 758 error = -EINVAL; 759 goto out_putf; 760 } 761 f = l->lo_backing_file; 762 } 763 764 mapping = file->f_mapping; 765 inode = mapping->host; 766 767 error = -EINVAL; 768 if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) 769 goto out_putf; 770 771 if (!(file->f_mode & FMODE_WRITE) || !(mode & FMODE_WRITE) || 772 !file->f_op->write_iter) 773 lo_flags |= LO_FLAGS_READ_ONLY; 774 775 lo_blocksize = S_ISBLK(inode->i_mode) ? 776 inode->i_bdev->bd_block_size : PAGE_SIZE; 777 778 error = -EFBIG; 779 size = get_loop_size(lo, file); 780 if ((loff_t)(sector_t)size != size) 781 goto out_putf; 782 783 error = 0; 784 785 set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0); 786 787 lo->lo_blocksize = lo_blocksize; 788 lo->lo_device = bdev; 789 lo->lo_flags = lo_flags; 790 lo->lo_backing_file = file; 791 lo->transfer = transfer_none; 792 lo->ioctl = NULL; 793 lo->lo_sizelimit = 0; 794 lo->old_gfp_mask = mapping_gfp_mask(mapping); 795 mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); 796 797 if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) 798 blk_queue_flush(lo->lo_queue, REQ_FLUSH); 799 800 set_capacity(lo->lo_disk, size); 801 bd_set_size(bdev, size << 9); 802 loop_sysfs_init(lo); 803 /* let user-space know about the new size */ 804 kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); 805 806 set_blocksize(bdev, lo_blocksize); 807 808 lo->lo_state = Lo_bound; 809 if (part_shift) 810 lo->lo_flags |= LO_FLAGS_PARTSCAN; 811 if (lo->lo_flags & LO_FLAGS_PARTSCAN) 812 ioctl_by_bdev(bdev, BLKRRPART, 0); 813 814 /* Grab the block_device to prevent its destruction after we 815 * put /dev/loopXX inode. Later in loop_clr_fd() we bdput(bdev). 816 */ 817 bdgrab(bdev); 818 return 0; 819 820 out_putf: 821 fput(file); 822 out: 823 /* This is safe: open() is still holding a reference. */ 824 module_put(THIS_MODULE); 825 return error; 826 } 827 828 static int 829 loop_release_xfer(struct loop_device *lo) 830 { 831 int err = 0; 832 struct loop_func_table *xfer = lo->lo_encryption; 833 834 if (xfer) { 835 if (xfer->release) 836 err = xfer->release(lo); 837 lo->transfer = NULL; 838 lo->lo_encryption = NULL; 839 module_put(xfer->owner); 840 } 841 return err; 842 } 843 844 static int 845 loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer, 846 const struct loop_info64 *i) 847 { 848 int err = 0; 849 850 if (xfer) { 851 struct module *owner = xfer->owner; 852 853 if (!try_module_get(owner)) 854 return -EINVAL; 855 if (xfer->init) 856 err = xfer->init(lo, i); 857 if (err) 858 module_put(owner); 859 else 860 lo->lo_encryption = xfer; 861 } 862 return err; 863 } 864 865 static int loop_clr_fd(struct loop_device *lo) 866 { 867 struct file *filp = lo->lo_backing_file; 868 gfp_t gfp = lo->old_gfp_mask; 869 struct block_device *bdev = lo->lo_device; 870 871 if (lo->lo_state != Lo_bound) 872 return -ENXIO; 873 874 /* 875 * If we've explicitly asked to tear down the loop device, 876 * and it has an elevated reference count, set it for auto-teardown when 877 * the last reference goes away. This stops $!~#$@ udev from 878 * preventing teardown because it decided that it needs to run blkid on 879 * the loopback device whenever they appear. xfstests is notorious for 880 * failing tests because blkid via udev races with a losetup 881 * <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d 882 * command to fail with EBUSY. 883 */ 884 if (lo->lo_refcnt > 1) { 885 lo->lo_flags |= LO_FLAGS_AUTOCLEAR; 886 mutex_unlock(&lo->lo_ctl_mutex); 887 return 0; 888 } 889 890 if (filp == NULL) 891 return -EINVAL; 892 893 spin_lock_irq(&lo->lo_lock); 894 lo->lo_state = Lo_rundown; 895 lo->lo_backing_file = NULL; 896 spin_unlock_irq(&lo->lo_lock); 897 898 loop_release_xfer(lo); 899 lo->transfer = NULL; 900 lo->ioctl = NULL; 901 lo->lo_device = NULL; 902 lo->lo_encryption = NULL; 903 lo->lo_offset = 0; 904 lo->lo_sizelimit = 0; 905 lo->lo_encrypt_key_size = 0; 906 memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE); 907 memset(lo->lo_crypt_name, 0, LO_NAME_SIZE); 908 memset(lo->lo_file_name, 0, LO_NAME_SIZE); 909 if (bdev) { 910 bdput(bdev); 911 invalidate_bdev(bdev); 912 } 913 set_capacity(lo->lo_disk, 0); 914 loop_sysfs_exit(lo); 915 if (bdev) { 916 bd_set_size(bdev, 0); 917 /* let user-space know about this change */ 918 kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); 919 } 920 mapping_set_gfp_mask(filp->f_mapping, gfp); 921 lo->lo_state = Lo_unbound; 922 /* This is safe: open() is still holding a reference. */ 923 module_put(THIS_MODULE); 924 if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev) 925 ioctl_by_bdev(bdev, BLKRRPART, 0); 926 lo->lo_flags = 0; 927 if (!part_shift) 928 lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN; 929 mutex_unlock(&lo->lo_ctl_mutex); 930 /* 931 * Need not hold lo_ctl_mutex to fput backing file. 932 * Calling fput holding lo_ctl_mutex triggers a circular 933 * lock dependency possibility warning as fput can take 934 * bd_mutex which is usually taken before lo_ctl_mutex. 935 */ 936 fput(filp); 937 return 0; 938 } 939 940 static int 941 loop_set_status(struct loop_device *lo, const struct loop_info64 *info) 942 { 943 int err; 944 struct loop_func_table *xfer; 945 kuid_t uid = current_uid(); 946 947 if (lo->lo_encrypt_key_size && 948 !uid_eq(lo->lo_key_owner, uid) && 949 !capable(CAP_SYS_ADMIN)) 950 return -EPERM; 951 if (lo->lo_state != Lo_bound) 952 return -ENXIO; 953 if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) 954 return -EINVAL; 955 956 err = loop_release_xfer(lo); 957 if (err) 958 return err; 959 960 if (info->lo_encrypt_type) { 961 unsigned int type = info->lo_encrypt_type; 962 963 if (type >= MAX_LO_CRYPT) 964 return -EINVAL; 965 xfer = xfer_funcs[type]; 966 if (xfer == NULL) 967 return -EINVAL; 968 } else 969 xfer = NULL; 970 971 err = loop_init_xfer(lo, xfer, info); 972 if (err) 973 return err; 974 975 if (lo->lo_offset != info->lo_offset || 976 lo->lo_sizelimit != info->lo_sizelimit) 977 if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) 978 return -EFBIG; 979 980 loop_config_discard(lo); 981 982 memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); 983 memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE); 984 lo->lo_file_name[LO_NAME_SIZE-1] = 0; 985 lo->lo_crypt_name[LO_NAME_SIZE-1] = 0; 986 987 if (!xfer) 988 xfer = &none_funcs; 989 lo->transfer = xfer->transfer; 990 lo->ioctl = xfer->ioctl; 991 992 if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) != 993 (info->lo_flags & LO_FLAGS_AUTOCLEAR)) 994 lo->lo_flags ^= LO_FLAGS_AUTOCLEAR; 995 996 if ((info->lo_flags & LO_FLAGS_PARTSCAN) && 997 !(lo->lo_flags & LO_FLAGS_PARTSCAN)) { 998 lo->lo_flags |= LO_FLAGS_PARTSCAN; 999 lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN; 1000 ioctl_by_bdev(lo->lo_device, BLKRRPART, 0); 1001 } 1002 1003 lo->lo_encrypt_key_size = info->lo_encrypt_key_size; 1004 lo->lo_init[0] = info->lo_init[0]; 1005 lo->lo_init[1] = info->lo_init[1]; 1006 if (info->lo_encrypt_key_size) { 1007 memcpy(lo->lo_encrypt_key, info->lo_encrypt_key, 1008 info->lo_encrypt_key_size); 1009 lo->lo_key_owner = uid; 1010 } 1011 1012 return 0; 1013 } 1014 1015 static int 1016 loop_get_status(struct loop_device *lo, struct loop_info64 *info) 1017 { 1018 struct file *file = lo->lo_backing_file; 1019 struct kstat stat; 1020 int error; 1021 1022 if (lo->lo_state != Lo_bound) 1023 return -ENXIO; 1024 error = vfs_getattr(&file->f_path, &stat); 1025 if (error) 1026 return error; 1027 memset(info, 0, sizeof(*info)); 1028 info->lo_number = lo->lo_number; 1029 info->lo_device = huge_encode_dev(stat.dev); 1030 info->lo_inode = stat.ino; 1031 info->lo_rdevice = huge_encode_dev(lo->lo_device ? stat.rdev : stat.dev); 1032 info->lo_offset = lo->lo_offset; 1033 info->lo_sizelimit = lo->lo_sizelimit; 1034 info->lo_flags = lo->lo_flags; 1035 memcpy(info->lo_file_name, lo->lo_file_name, LO_NAME_SIZE); 1036 memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE); 1037 info->lo_encrypt_type = 1038 lo->lo_encryption ? lo->lo_encryption->number : 0; 1039 if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) { 1040 info->lo_encrypt_key_size = lo->lo_encrypt_key_size; 1041 memcpy(info->lo_encrypt_key, lo->lo_encrypt_key, 1042 lo->lo_encrypt_key_size); 1043 } 1044 return 0; 1045 } 1046 1047 static void 1048 loop_info64_from_old(const struct loop_info *info, struct loop_info64 *info64) 1049 { 1050 memset(info64, 0, sizeof(*info64)); 1051 info64->lo_number = info->lo_number; 1052 info64->lo_device = info->lo_device; 1053 info64->lo_inode = info->lo_inode; 1054 info64->lo_rdevice = info->lo_rdevice; 1055 info64->lo_offset = info->lo_offset; 1056 info64->lo_sizelimit = 0; 1057 info64->lo_encrypt_type = info->lo_encrypt_type; 1058 info64->lo_encrypt_key_size = info->lo_encrypt_key_size; 1059 info64->lo_flags = info->lo_flags; 1060 info64->lo_init[0] = info->lo_init[0]; 1061 info64->lo_init[1] = info->lo_init[1]; 1062 if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI) 1063 memcpy(info64->lo_crypt_name, info->lo_name, LO_NAME_SIZE); 1064 else 1065 memcpy(info64->lo_file_name, info->lo_name, LO_NAME_SIZE); 1066 memcpy(info64->lo_encrypt_key, info->lo_encrypt_key, LO_KEY_SIZE); 1067 } 1068 1069 static int 1070 loop_info64_to_old(const struct loop_info64 *info64, struct loop_info *info) 1071 { 1072 memset(info, 0, sizeof(*info)); 1073 info->lo_number = info64->lo_number; 1074 info->lo_device = info64->lo_device; 1075 info->lo_inode = info64->lo_inode; 1076 info->lo_rdevice = info64->lo_rdevice; 1077 info->lo_offset = info64->lo_offset; 1078 info->lo_encrypt_type = info64->lo_encrypt_type; 1079 info->lo_encrypt_key_size = info64->lo_encrypt_key_size; 1080 info->lo_flags = info64->lo_flags; 1081 info->lo_init[0] = info64->lo_init[0]; 1082 info->lo_init[1] = info64->lo_init[1]; 1083 if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI) 1084 memcpy(info->lo_name, info64->lo_crypt_name, LO_NAME_SIZE); 1085 else 1086 memcpy(info->lo_name, info64->lo_file_name, LO_NAME_SIZE); 1087 memcpy(info->lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE); 1088 1089 /* error in case values were truncated */ 1090 if (info->lo_device != info64->lo_device || 1091 info->lo_rdevice != info64->lo_rdevice || 1092 info->lo_inode != info64->lo_inode || 1093 info->lo_offset != info64->lo_offset) 1094 return -EOVERFLOW; 1095 1096 return 0; 1097 } 1098 1099 static int 1100 loop_set_status_old(struct loop_device *lo, const struct loop_info __user *arg) 1101 { 1102 struct loop_info info; 1103 struct loop_info64 info64; 1104 1105 if (copy_from_user(&info, arg, sizeof (struct loop_info))) 1106 return -EFAULT; 1107 loop_info64_from_old(&info, &info64); 1108 return loop_set_status(lo, &info64); 1109 } 1110 1111 static int 1112 loop_set_status64(struct loop_device *lo, const struct loop_info64 __user *arg) 1113 { 1114 struct loop_info64 info64; 1115 1116 if (copy_from_user(&info64, arg, sizeof (struct loop_info64))) 1117 return -EFAULT; 1118 return loop_set_status(lo, &info64); 1119 } 1120 1121 static int 1122 loop_get_status_old(struct loop_device *lo, struct loop_info __user *arg) { 1123 struct loop_info info; 1124 struct loop_info64 info64; 1125 int err = 0; 1126 1127 if (!arg) 1128 err = -EINVAL; 1129 if (!err) 1130 err = loop_get_status(lo, &info64); 1131 if (!err) 1132 err = loop_info64_to_old(&info64, &info); 1133 if (!err && copy_to_user(arg, &info, sizeof(info))) 1134 err = -EFAULT; 1135 1136 return err; 1137 } 1138 1139 static int 1140 loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) { 1141 struct loop_info64 info64; 1142 int err = 0; 1143 1144 if (!arg) 1145 err = -EINVAL; 1146 if (!err) 1147 err = loop_get_status(lo, &info64); 1148 if (!err && copy_to_user(arg, &info64, sizeof(info64))) 1149 err = -EFAULT; 1150 1151 return err; 1152 } 1153 1154 static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev) 1155 { 1156 if (unlikely(lo->lo_state != Lo_bound)) 1157 return -ENXIO; 1158 1159 return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit); 1160 } 1161 1162 static int lo_ioctl(struct block_device *bdev, fmode_t mode, 1163 unsigned int cmd, unsigned long arg) 1164 { 1165 struct loop_device *lo = bdev->bd_disk->private_data; 1166 int err; 1167 1168 mutex_lock_nested(&lo->lo_ctl_mutex, 1); 1169 switch (cmd) { 1170 case LOOP_SET_FD: 1171 err = loop_set_fd(lo, mode, bdev, arg); 1172 break; 1173 case LOOP_CHANGE_FD: 1174 err = loop_change_fd(lo, bdev, arg); 1175 break; 1176 case LOOP_CLR_FD: 1177 /* loop_clr_fd would have unlocked lo_ctl_mutex on success */ 1178 err = loop_clr_fd(lo); 1179 if (!err) 1180 goto out_unlocked; 1181 break; 1182 case LOOP_SET_STATUS: 1183 err = -EPERM; 1184 if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) 1185 err = loop_set_status_old(lo, 1186 (struct loop_info __user *)arg); 1187 break; 1188 case LOOP_GET_STATUS: 1189 err = loop_get_status_old(lo, (struct loop_info __user *) arg); 1190 break; 1191 case LOOP_SET_STATUS64: 1192 err = -EPERM; 1193 if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) 1194 err = loop_set_status64(lo, 1195 (struct loop_info64 __user *) arg); 1196 break; 1197 case LOOP_GET_STATUS64: 1198 err = loop_get_status64(lo, (struct loop_info64 __user *) arg); 1199 break; 1200 case LOOP_SET_CAPACITY: 1201 err = -EPERM; 1202 if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) 1203 err = loop_set_capacity(lo, bdev); 1204 break; 1205 default: 1206 err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL; 1207 } 1208 mutex_unlock(&lo->lo_ctl_mutex); 1209 1210 out_unlocked: 1211 return err; 1212 } 1213 1214 #ifdef CONFIG_COMPAT 1215 struct compat_loop_info { 1216 compat_int_t lo_number; /* ioctl r/o */ 1217 compat_dev_t lo_device; /* ioctl r/o */ 1218 compat_ulong_t lo_inode; /* ioctl r/o */ 1219 compat_dev_t lo_rdevice; /* ioctl r/o */ 1220 compat_int_t lo_offset; 1221 compat_int_t lo_encrypt_type; 1222 compat_int_t lo_encrypt_key_size; /* ioctl w/o */ 1223 compat_int_t lo_flags; /* ioctl r/o */ 1224 char lo_name[LO_NAME_SIZE]; 1225 unsigned char lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */ 1226 compat_ulong_t lo_init[2]; 1227 char reserved[4]; 1228 }; 1229 1230 /* 1231 * Transfer 32-bit compatibility structure in userspace to 64-bit loop info 1232 * - noinlined to reduce stack space usage in main part of driver 1233 */ 1234 static noinline int 1235 loop_info64_from_compat(const struct compat_loop_info __user *arg, 1236 struct loop_info64 *info64) 1237 { 1238 struct compat_loop_info info; 1239 1240 if (copy_from_user(&info, arg, sizeof(info))) 1241 return -EFAULT; 1242 1243 memset(info64, 0, sizeof(*info64)); 1244 info64->lo_number = info.lo_number; 1245 info64->lo_device = info.lo_device; 1246 info64->lo_inode = info.lo_inode; 1247 info64->lo_rdevice = info.lo_rdevice; 1248 info64->lo_offset = info.lo_offset; 1249 info64->lo_sizelimit = 0; 1250 info64->lo_encrypt_type = info.lo_encrypt_type; 1251 info64->lo_encrypt_key_size = info.lo_encrypt_key_size; 1252 info64->lo_flags = info.lo_flags; 1253 info64->lo_init[0] = info.lo_init[0]; 1254 info64->lo_init[1] = info.lo_init[1]; 1255 if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI) 1256 memcpy(info64->lo_crypt_name, info.lo_name, LO_NAME_SIZE); 1257 else 1258 memcpy(info64->lo_file_name, info.lo_name, LO_NAME_SIZE); 1259 memcpy(info64->lo_encrypt_key, info.lo_encrypt_key, LO_KEY_SIZE); 1260 return 0; 1261 } 1262 1263 /* 1264 * Transfer 64-bit loop info to 32-bit compatibility structure in userspace 1265 * - noinlined to reduce stack space usage in main part of driver 1266 */ 1267 static noinline int 1268 loop_info64_to_compat(const struct loop_info64 *info64, 1269 struct compat_loop_info __user *arg) 1270 { 1271 struct compat_loop_info info; 1272 1273 memset(&info, 0, sizeof(info)); 1274 info.lo_number = info64->lo_number; 1275 info.lo_device = info64->lo_device; 1276 info.lo_inode = info64->lo_inode; 1277 info.lo_rdevice = info64->lo_rdevice; 1278 info.lo_offset = info64->lo_offset; 1279 info.lo_encrypt_type = info64->lo_encrypt_type; 1280 info.lo_encrypt_key_size = info64->lo_encrypt_key_size; 1281 info.lo_flags = info64->lo_flags; 1282 info.lo_init[0] = info64->lo_init[0]; 1283 info.lo_init[1] = info64->lo_init[1]; 1284 if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI) 1285 memcpy(info.lo_name, info64->lo_crypt_name, LO_NAME_SIZE); 1286 else 1287 memcpy(info.lo_name, info64->lo_file_name, LO_NAME_SIZE); 1288 memcpy(info.lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE); 1289 1290 /* error in case values were truncated */ 1291 if (info.lo_device != info64->lo_device || 1292 info.lo_rdevice != info64->lo_rdevice || 1293 info.lo_inode != info64->lo_inode || 1294 info.lo_offset != info64->lo_offset || 1295 info.lo_init[0] != info64->lo_init[0] || 1296 info.lo_init[1] != info64->lo_init[1]) 1297 return -EOVERFLOW; 1298 1299 if (copy_to_user(arg, &info, sizeof(info))) 1300 return -EFAULT; 1301 return 0; 1302 } 1303 1304 static int 1305 loop_set_status_compat(struct loop_device *lo, 1306 const struct compat_loop_info __user *arg) 1307 { 1308 struct loop_info64 info64; 1309 int ret; 1310 1311 ret = loop_info64_from_compat(arg, &info64); 1312 if (ret < 0) 1313 return ret; 1314 return loop_set_status(lo, &info64); 1315 } 1316 1317 static int 1318 loop_get_status_compat(struct loop_device *lo, 1319 struct compat_loop_info __user *arg) 1320 { 1321 struct loop_info64 info64; 1322 int err = 0; 1323 1324 if (!arg) 1325 err = -EINVAL; 1326 if (!err) 1327 err = loop_get_status(lo, &info64); 1328 if (!err) 1329 err = loop_info64_to_compat(&info64, arg); 1330 return err; 1331 } 1332 1333 static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, 1334 unsigned int cmd, unsigned long arg) 1335 { 1336 struct loop_device *lo = bdev->bd_disk->private_data; 1337 int err; 1338 1339 switch(cmd) { 1340 case LOOP_SET_STATUS: 1341 mutex_lock(&lo->lo_ctl_mutex); 1342 err = loop_set_status_compat( 1343 lo, (const struct compat_loop_info __user *) arg); 1344 mutex_unlock(&lo->lo_ctl_mutex); 1345 break; 1346 case LOOP_GET_STATUS: 1347 mutex_lock(&lo->lo_ctl_mutex); 1348 err = loop_get_status_compat( 1349 lo, (struct compat_loop_info __user *) arg); 1350 mutex_unlock(&lo->lo_ctl_mutex); 1351 break; 1352 case LOOP_SET_CAPACITY: 1353 case LOOP_CLR_FD: 1354 case LOOP_GET_STATUS64: 1355 case LOOP_SET_STATUS64: 1356 arg = (unsigned long) compat_ptr(arg); 1357 case LOOP_SET_FD: 1358 case LOOP_CHANGE_FD: 1359 err = lo_ioctl(bdev, mode, cmd, arg); 1360 break; 1361 default: 1362 err = -ENOIOCTLCMD; 1363 break; 1364 } 1365 return err; 1366 } 1367 #endif 1368 1369 static int lo_open(struct block_device *bdev, fmode_t mode) 1370 { 1371 struct loop_device *lo; 1372 int err = 0; 1373 1374 mutex_lock(&loop_index_mutex); 1375 lo = bdev->bd_disk->private_data; 1376 if (!lo) { 1377 err = -ENXIO; 1378 goto out; 1379 } 1380 1381 mutex_lock(&lo->lo_ctl_mutex); 1382 lo->lo_refcnt++; 1383 mutex_unlock(&lo->lo_ctl_mutex); 1384 out: 1385 mutex_unlock(&loop_index_mutex); 1386 return err; 1387 } 1388 1389 static void lo_release(struct gendisk *disk, fmode_t mode) 1390 { 1391 struct loop_device *lo = disk->private_data; 1392 int err; 1393 1394 mutex_lock(&lo->lo_ctl_mutex); 1395 1396 if (--lo->lo_refcnt) 1397 goto out; 1398 1399 if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) { 1400 /* 1401 * In autoclear mode, stop the loop thread 1402 * and remove configuration after last close. 1403 */ 1404 err = loop_clr_fd(lo); 1405 if (!err) 1406 return; 1407 } else { 1408 /* 1409 * Otherwise keep thread (if running) and config, 1410 * but flush possible ongoing bios in thread. 1411 */ 1412 loop_flush(lo); 1413 } 1414 1415 out: 1416 mutex_unlock(&lo->lo_ctl_mutex); 1417 } 1418 1419 static const struct block_device_operations lo_fops = { 1420 .owner = THIS_MODULE, 1421 .open = lo_open, 1422 .release = lo_release, 1423 .ioctl = lo_ioctl, 1424 #ifdef CONFIG_COMPAT 1425 .compat_ioctl = lo_compat_ioctl, 1426 #endif 1427 }; 1428 1429 /* 1430 * And now the modules code and kernel interface. 1431 */ 1432 static int max_loop; 1433 module_param(max_loop, int, S_IRUGO); 1434 MODULE_PARM_DESC(max_loop, "Maximum number of loop devices"); 1435 module_param(max_part, int, S_IRUGO); 1436 MODULE_PARM_DESC(max_part, "Maximum number of partitions per loop device"); 1437 MODULE_LICENSE("GPL"); 1438 MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR); 1439 1440 int loop_register_transfer(struct loop_func_table *funcs) 1441 { 1442 unsigned int n = funcs->number; 1443 1444 if (n >= MAX_LO_CRYPT || xfer_funcs[n]) 1445 return -EINVAL; 1446 xfer_funcs[n] = funcs; 1447 return 0; 1448 } 1449 1450 static int unregister_transfer_cb(int id, void *ptr, void *data) 1451 { 1452 struct loop_device *lo = ptr; 1453 struct loop_func_table *xfer = data; 1454 1455 mutex_lock(&lo->lo_ctl_mutex); 1456 if (lo->lo_encryption == xfer) 1457 loop_release_xfer(lo); 1458 mutex_unlock(&lo->lo_ctl_mutex); 1459 return 0; 1460 } 1461 1462 int loop_unregister_transfer(int number) 1463 { 1464 unsigned int n = number; 1465 struct loop_func_table *xfer; 1466 1467 if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL) 1468 return -EINVAL; 1469 1470 xfer_funcs[n] = NULL; 1471 idr_for_each(&loop_index_idr, &unregister_transfer_cb, xfer); 1472 return 0; 1473 } 1474 1475 EXPORT_SYMBOL(loop_register_transfer); 1476 EXPORT_SYMBOL(loop_unregister_transfer); 1477 1478 static int loop_queue_rq(struct blk_mq_hw_ctx *hctx, 1479 const struct blk_mq_queue_data *bd) 1480 { 1481 struct loop_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); 1482 1483 blk_mq_start_request(bd->rq); 1484 1485 if (cmd->rq->cmd_flags & REQ_WRITE) { 1486 struct loop_device *lo = cmd->rq->q->queuedata; 1487 bool need_sched = true; 1488 1489 spin_lock_irq(&lo->lo_lock); 1490 if (lo->write_started) 1491 need_sched = false; 1492 else 1493 lo->write_started = true; 1494 list_add_tail(&cmd->list, &lo->write_cmd_head); 1495 spin_unlock_irq(&lo->lo_lock); 1496 1497 if (need_sched) 1498 queue_work(loop_wq, &lo->write_work); 1499 } else { 1500 queue_work(loop_wq, &cmd->read_work); 1501 } 1502 1503 return BLK_MQ_RQ_QUEUE_OK; 1504 } 1505 1506 static void loop_handle_cmd(struct loop_cmd *cmd) 1507 { 1508 const bool write = cmd->rq->cmd_flags & REQ_WRITE; 1509 struct loop_device *lo = cmd->rq->q->queuedata; 1510 int ret = -EIO; 1511 1512 if (lo->lo_state != Lo_bound) 1513 goto failed; 1514 1515 if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) 1516 goto failed; 1517 1518 ret = do_req_filebacked(lo, cmd->rq); 1519 1520 failed: 1521 if (ret) 1522 cmd->rq->errors = -EIO; 1523 blk_mq_complete_request(cmd->rq); 1524 } 1525 1526 static void loop_queue_write_work(struct work_struct *work) 1527 { 1528 struct loop_device *lo = 1529 container_of(work, struct loop_device, write_work); 1530 LIST_HEAD(cmd_list); 1531 1532 spin_lock_irq(&lo->lo_lock); 1533 repeat: 1534 list_splice_init(&lo->write_cmd_head, &cmd_list); 1535 spin_unlock_irq(&lo->lo_lock); 1536 1537 while (!list_empty(&cmd_list)) { 1538 struct loop_cmd *cmd = list_first_entry(&cmd_list, 1539 struct loop_cmd, list); 1540 list_del_init(&cmd->list); 1541 loop_handle_cmd(cmd); 1542 } 1543 1544 spin_lock_irq(&lo->lo_lock); 1545 if (!list_empty(&lo->write_cmd_head)) 1546 goto repeat; 1547 lo->write_started = false; 1548 spin_unlock_irq(&lo->lo_lock); 1549 } 1550 1551 static void loop_queue_read_work(struct work_struct *work) 1552 { 1553 struct loop_cmd *cmd = 1554 container_of(work, struct loop_cmd, read_work); 1555 1556 loop_handle_cmd(cmd); 1557 } 1558 1559 static int loop_init_request(void *data, struct request *rq, 1560 unsigned int hctx_idx, unsigned int request_idx, 1561 unsigned int numa_node) 1562 { 1563 struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); 1564 1565 cmd->rq = rq; 1566 INIT_WORK(&cmd->read_work, loop_queue_read_work); 1567 1568 return 0; 1569 } 1570 1571 static struct blk_mq_ops loop_mq_ops = { 1572 .queue_rq = loop_queue_rq, 1573 .map_queue = blk_mq_map_queue, 1574 .init_request = loop_init_request, 1575 }; 1576 1577 static int loop_add(struct loop_device **l, int i) 1578 { 1579 struct loop_device *lo; 1580 struct gendisk *disk; 1581 int err; 1582 1583 err = -ENOMEM; 1584 lo = kzalloc(sizeof(*lo), GFP_KERNEL); 1585 if (!lo) 1586 goto out; 1587 1588 lo->lo_state = Lo_unbound; 1589 1590 /* allocate id, if @id >= 0, we're requesting that specific id */ 1591 if (i >= 0) { 1592 err = idr_alloc(&loop_index_idr, lo, i, i + 1, GFP_KERNEL); 1593 if (err == -ENOSPC) 1594 err = -EEXIST; 1595 } else { 1596 err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_KERNEL); 1597 } 1598 if (err < 0) 1599 goto out_free_dev; 1600 i = err; 1601 1602 err = -ENOMEM; 1603 lo->tag_set.ops = &loop_mq_ops; 1604 lo->tag_set.nr_hw_queues = 1; 1605 lo->tag_set.queue_depth = 128; 1606 lo->tag_set.numa_node = NUMA_NO_NODE; 1607 lo->tag_set.cmd_size = sizeof(struct loop_cmd); 1608 lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; 1609 lo->tag_set.driver_data = lo; 1610 1611 err = blk_mq_alloc_tag_set(&lo->tag_set); 1612 if (err) 1613 goto out_free_idr; 1614 1615 lo->lo_queue = blk_mq_init_queue(&lo->tag_set); 1616 if (IS_ERR_OR_NULL(lo->lo_queue)) { 1617 err = PTR_ERR(lo->lo_queue); 1618 goto out_cleanup_tags; 1619 } 1620 lo->lo_queue->queuedata = lo; 1621 1622 INIT_LIST_HEAD(&lo->write_cmd_head); 1623 INIT_WORK(&lo->write_work, loop_queue_write_work); 1624 1625 disk = lo->lo_disk = alloc_disk(1 << part_shift); 1626 if (!disk) 1627 goto out_free_queue; 1628 1629 /* 1630 * Disable partition scanning by default. The in-kernel partition 1631 * scanning can be requested individually per-device during its 1632 * setup. Userspace can always add and remove partitions from all 1633 * devices. The needed partition minors are allocated from the 1634 * extended minor space, the main loop device numbers will continue 1635 * to match the loop minors, regardless of the number of partitions 1636 * used. 1637 * 1638 * If max_part is given, partition scanning is globally enabled for 1639 * all loop devices. The minors for the main loop devices will be 1640 * multiples of max_part. 1641 * 1642 * Note: Global-for-all-devices, set-only-at-init, read-only module 1643 * parameteters like 'max_loop' and 'max_part' make things needlessly 1644 * complicated, are too static, inflexible and may surprise 1645 * userspace tools. Parameters like this in general should be avoided. 1646 */ 1647 if (!part_shift) 1648 disk->flags |= GENHD_FL_NO_PART_SCAN; 1649 disk->flags |= GENHD_FL_EXT_DEVT; 1650 mutex_init(&lo->lo_ctl_mutex); 1651 lo->lo_number = i; 1652 spin_lock_init(&lo->lo_lock); 1653 disk->major = LOOP_MAJOR; 1654 disk->first_minor = i << part_shift; 1655 disk->fops = &lo_fops; 1656 disk->private_data = lo; 1657 disk->queue = lo->lo_queue; 1658 sprintf(disk->disk_name, "loop%d", i); 1659 add_disk(disk); 1660 *l = lo; 1661 return lo->lo_number; 1662 1663 out_free_queue: 1664 blk_cleanup_queue(lo->lo_queue); 1665 out_cleanup_tags: 1666 blk_mq_free_tag_set(&lo->tag_set); 1667 out_free_idr: 1668 idr_remove(&loop_index_idr, i); 1669 out_free_dev: 1670 kfree(lo); 1671 out: 1672 return err; 1673 } 1674 1675 static void loop_remove(struct loop_device *lo) 1676 { 1677 del_gendisk(lo->lo_disk); 1678 blk_cleanup_queue(lo->lo_queue); 1679 blk_mq_free_tag_set(&lo->tag_set); 1680 put_disk(lo->lo_disk); 1681 kfree(lo); 1682 } 1683 1684 static int find_free_cb(int id, void *ptr, void *data) 1685 { 1686 struct loop_device *lo = ptr; 1687 struct loop_device **l = data; 1688 1689 if (lo->lo_state == Lo_unbound) { 1690 *l = lo; 1691 return 1; 1692 } 1693 return 0; 1694 } 1695 1696 static int loop_lookup(struct loop_device **l, int i) 1697 { 1698 struct loop_device *lo; 1699 int ret = -ENODEV; 1700 1701 if (i < 0) { 1702 int err; 1703 1704 err = idr_for_each(&loop_index_idr, &find_free_cb, &lo); 1705 if (err == 1) { 1706 *l = lo; 1707 ret = lo->lo_number; 1708 } 1709 goto out; 1710 } 1711 1712 /* lookup and return a specific i */ 1713 lo = idr_find(&loop_index_idr, i); 1714 if (lo) { 1715 *l = lo; 1716 ret = lo->lo_number; 1717 } 1718 out: 1719 return ret; 1720 } 1721 1722 static struct kobject *loop_probe(dev_t dev, int *part, void *data) 1723 { 1724 struct loop_device *lo; 1725 struct kobject *kobj; 1726 int err; 1727 1728 mutex_lock(&loop_index_mutex); 1729 err = loop_lookup(&lo, MINOR(dev) >> part_shift); 1730 if (err < 0) 1731 err = loop_add(&lo, MINOR(dev) >> part_shift); 1732 if (err < 0) 1733 kobj = NULL; 1734 else 1735 kobj = get_disk(lo->lo_disk); 1736 mutex_unlock(&loop_index_mutex); 1737 1738 *part = 0; 1739 return kobj; 1740 } 1741 1742 static long loop_control_ioctl(struct file *file, unsigned int cmd, 1743 unsigned long parm) 1744 { 1745 struct loop_device *lo; 1746 int ret = -ENOSYS; 1747 1748 mutex_lock(&loop_index_mutex); 1749 switch (cmd) { 1750 case LOOP_CTL_ADD: 1751 ret = loop_lookup(&lo, parm); 1752 if (ret >= 0) { 1753 ret = -EEXIST; 1754 break; 1755 } 1756 ret = loop_add(&lo, parm); 1757 break; 1758 case LOOP_CTL_REMOVE: 1759 ret = loop_lookup(&lo, parm); 1760 if (ret < 0) 1761 break; 1762 mutex_lock(&lo->lo_ctl_mutex); 1763 if (lo->lo_state != Lo_unbound) { 1764 ret = -EBUSY; 1765 mutex_unlock(&lo->lo_ctl_mutex); 1766 break; 1767 } 1768 if (lo->lo_refcnt > 0) { 1769 ret = -EBUSY; 1770 mutex_unlock(&lo->lo_ctl_mutex); 1771 break; 1772 } 1773 lo->lo_disk->private_data = NULL; 1774 mutex_unlock(&lo->lo_ctl_mutex); 1775 idr_remove(&loop_index_idr, lo->lo_number); 1776 loop_remove(lo); 1777 break; 1778 case LOOP_CTL_GET_FREE: 1779 ret = loop_lookup(&lo, -1); 1780 if (ret >= 0) 1781 break; 1782 ret = loop_add(&lo, -1); 1783 } 1784 mutex_unlock(&loop_index_mutex); 1785 1786 return ret; 1787 } 1788 1789 static const struct file_operations loop_ctl_fops = { 1790 .open = nonseekable_open, 1791 .unlocked_ioctl = loop_control_ioctl, 1792 .compat_ioctl = loop_control_ioctl, 1793 .owner = THIS_MODULE, 1794 .llseek = noop_llseek, 1795 }; 1796 1797 static struct miscdevice loop_misc = { 1798 .minor = LOOP_CTRL_MINOR, 1799 .name = "loop-control", 1800 .fops = &loop_ctl_fops, 1801 }; 1802 1803 MODULE_ALIAS_MISCDEV(LOOP_CTRL_MINOR); 1804 MODULE_ALIAS("devname:loop-control"); 1805 1806 static int __init loop_init(void) 1807 { 1808 int i, nr; 1809 unsigned long range; 1810 struct loop_device *lo; 1811 int err; 1812 1813 err = misc_register(&loop_misc); 1814 if (err < 0) 1815 return err; 1816 1817 part_shift = 0; 1818 if (max_part > 0) { 1819 part_shift = fls(max_part); 1820 1821 /* 1822 * Adjust max_part according to part_shift as it is exported 1823 * to user space so that user can decide correct minor number 1824 * if [s]he want to create more devices. 1825 * 1826 * Note that -1 is required because partition 0 is reserved 1827 * for the whole disk. 1828 */ 1829 max_part = (1UL << part_shift) - 1; 1830 } 1831 1832 if ((1UL << part_shift) > DISK_MAX_PARTS) { 1833 err = -EINVAL; 1834 goto misc_out; 1835 } 1836 1837 if (max_loop > 1UL << (MINORBITS - part_shift)) { 1838 err = -EINVAL; 1839 goto misc_out; 1840 } 1841 1842 /* 1843 * If max_loop is specified, create that many devices upfront. 1844 * This also becomes a hard limit. If max_loop is not specified, 1845 * create CONFIG_BLK_DEV_LOOP_MIN_COUNT loop devices at module 1846 * init time. Loop devices can be requested on-demand with the 1847 * /dev/loop-control interface, or be instantiated by accessing 1848 * a 'dead' device node. 1849 */ 1850 if (max_loop) { 1851 nr = max_loop; 1852 range = max_loop << part_shift; 1853 } else { 1854 nr = CONFIG_BLK_DEV_LOOP_MIN_COUNT; 1855 range = 1UL << MINORBITS; 1856 } 1857 1858 if (register_blkdev(LOOP_MAJOR, "loop")) { 1859 err = -EIO; 1860 goto misc_out; 1861 } 1862 1863 loop_wq = alloc_workqueue("kloopd", 1864 WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 0); 1865 if (!loop_wq) { 1866 err = -ENOMEM; 1867 goto misc_out; 1868 } 1869 1870 blk_register_region(MKDEV(LOOP_MAJOR, 0), range, 1871 THIS_MODULE, loop_probe, NULL, NULL); 1872 1873 /* pre-create number of devices given by config or max_loop */ 1874 mutex_lock(&loop_index_mutex); 1875 for (i = 0; i < nr; i++) 1876 loop_add(&lo, i); 1877 mutex_unlock(&loop_index_mutex); 1878 1879 printk(KERN_INFO "loop: module loaded\n"); 1880 return 0; 1881 1882 misc_out: 1883 misc_deregister(&loop_misc); 1884 return err; 1885 } 1886 1887 static int loop_exit_cb(int id, void *ptr, void *data) 1888 { 1889 struct loop_device *lo = ptr; 1890 1891 loop_remove(lo); 1892 return 0; 1893 } 1894 1895 static void __exit loop_exit(void) 1896 { 1897 unsigned long range; 1898 1899 range = max_loop ? max_loop << part_shift : 1UL << MINORBITS; 1900 1901 idr_for_each(&loop_index_idr, &loop_exit_cb, NULL); 1902 idr_destroy(&loop_index_idr); 1903 1904 blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range); 1905 unregister_blkdev(LOOP_MAJOR, "loop"); 1906 1907 destroy_workqueue(loop_wq); 1908 1909 misc_deregister(&loop_misc); 1910 } 1911 1912 module_init(loop_init); 1913 module_exit(loop_exit); 1914 1915 #ifndef MODULE 1916 static int __init max_loop_setup(char *str) 1917 { 1918 max_loop = simple_strtol(str, NULL, 0); 1919 return 1; 1920 } 1921 1922 __setup("max_loop=", max_loop_setup); 1923 #endif 1924