1cd82cca7SChristoph Hellwig // SPDX-License-Identifier: GPL-2.0-only 2cd82cca7SChristoph Hellwig /* 3cd82cca7SChristoph Hellwig * Copyright (C) 1991, 1992 Linus Torvalds 4cd82cca7SChristoph Hellwig * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE 5cd82cca7SChristoph Hellwig * Copyright (C) 2016 - 2020 Christoph Hellwig 6cd82cca7SChristoph Hellwig */ 7cd82cca7SChristoph Hellwig #include <linux/init.h> 8cd82cca7SChristoph Hellwig #include <linux/mm.h> 9cd82cca7SChristoph Hellwig #include <linux/blkdev.h> 10cd82cca7SChristoph Hellwig #include <linux/buffer_head.h> 11cd82cca7SChristoph Hellwig #include <linux/mpage.h> 12cd82cca7SChristoph Hellwig #include <linux/uio.h> 13cd82cca7SChristoph Hellwig #include <linux/namei.h> 14cd82cca7SChristoph Hellwig #include <linux/task_io_accounting_ops.h> 15cd82cca7SChristoph Hellwig #include <linux/falloc.h> 16cd82cca7SChristoph Hellwig #include <linux/suspend.h> 17f278eb3dSMing Lei #include <linux/fs.h> 18cd82cca7SChristoph Hellwig #include "blk.h" 19cd82cca7SChristoph Hellwig 20fac7c6d5SPavel Begunkov static inline struct inode *bdev_file_inode(struct file *file) 21cd82cca7SChristoph Hellwig { 22cd82cca7SChristoph Hellwig return file->f_mapping->host; 23cd82cca7SChristoph Hellwig } 24cd82cca7SChristoph Hellwig 25cd82cca7SChristoph Hellwig static int blkdev_get_block(struct inode *inode, sector_t iblock, 26cd82cca7SChristoph Hellwig struct buffer_head *bh, int create) 27cd82cca7SChristoph Hellwig { 28cd82cca7SChristoph Hellwig bh->b_bdev = I_BDEV(inode); 29cd82cca7SChristoph Hellwig bh->b_blocknr = iblock; 30cd82cca7SChristoph Hellwig set_buffer_mapped(bh); 31cd82cca7SChristoph Hellwig return 0; 32cd82cca7SChristoph Hellwig } 33cd82cca7SChristoph Hellwig 34cd82cca7SChristoph Hellwig static unsigned int dio_bio_write_op(struct kiocb *iocb) 35cd82cca7SChristoph Hellwig { 36cd82cca7SChristoph Hellwig unsigned int op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE; 37cd82cca7SChristoph Hellwig 38cd82cca7SChristoph Hellwig /* avoid the need for a I/O completion work item */ 39cd82cca7SChristoph Hellwig if (iocb->ki_flags & IOCB_DSYNC) 40cd82cca7SChristoph Hellwig op |= REQ_FUA; 41cd82cca7SChristoph Hellwig return op; 42cd82cca7SChristoph Hellwig } 43cd82cca7SChristoph Hellwig 44cd82cca7SChristoph Hellwig #define DIO_INLINE_BIO_VECS 4 45cd82cca7SChristoph Hellwig 46cd82cca7SChristoph Hellwig static void blkdev_bio_end_io_simple(struct bio *bio) 47cd82cca7SChristoph Hellwig { 48cd82cca7SChristoph Hellwig struct task_struct *waiter = bio->bi_private; 49cd82cca7SChristoph Hellwig 50cd82cca7SChristoph Hellwig WRITE_ONCE(bio->bi_private, NULL); 51cd82cca7SChristoph Hellwig blk_wake_io_task(waiter); 52cd82cca7SChristoph Hellwig } 53cd82cca7SChristoph Hellwig 54cd82cca7SChristoph Hellwig static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, 55cd82cca7SChristoph Hellwig struct iov_iter *iter, unsigned int nr_pages) 56cd82cca7SChristoph Hellwig { 57fac7c6d5SPavel Begunkov struct block_device *bdev = iocb->ki_filp->private_data; 58cd82cca7SChristoph Hellwig struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs; 59cd82cca7SChristoph Hellwig loff_t pos = iocb->ki_pos; 60cd82cca7SChristoph Hellwig bool should_dirty = false; 61cd82cca7SChristoph Hellwig struct bio bio; 62cd82cca7SChristoph Hellwig ssize_t ret; 63cd82cca7SChristoph Hellwig 64cd82cca7SChristoph Hellwig if ((pos | iov_iter_alignment(iter)) & 65cd82cca7SChristoph Hellwig (bdev_logical_block_size(bdev) - 1)) 66cd82cca7SChristoph Hellwig return -EINVAL; 67cd82cca7SChristoph Hellwig 68cd82cca7SChristoph Hellwig if (nr_pages <= DIO_INLINE_BIO_VECS) 69cd82cca7SChristoph Hellwig vecs = inline_vecs; 70cd82cca7SChristoph Hellwig else { 71cd82cca7SChristoph Hellwig vecs = kmalloc_array(nr_pages, sizeof(struct bio_vec), 72cd82cca7SChristoph Hellwig GFP_KERNEL); 73cd82cca7SChristoph Hellwig if (!vecs) 74cd82cca7SChristoph Hellwig return -ENOMEM; 75cd82cca7SChristoph Hellwig } 76cd82cca7SChristoph Hellwig 77cd82cca7SChristoph Hellwig bio_init(&bio, vecs, nr_pages); 78cd82cca7SChristoph Hellwig bio_set_dev(&bio, bdev); 79cd82cca7SChristoph Hellwig bio.bi_iter.bi_sector = pos >> 9; 80cd82cca7SChristoph Hellwig bio.bi_write_hint = iocb->ki_hint; 81cd82cca7SChristoph Hellwig bio.bi_private = current; 82cd82cca7SChristoph Hellwig bio.bi_end_io = blkdev_bio_end_io_simple; 83cd82cca7SChristoph Hellwig bio.bi_ioprio = iocb->ki_ioprio; 84cd82cca7SChristoph Hellwig 85cd82cca7SChristoph Hellwig ret = bio_iov_iter_get_pages(&bio, iter); 86cd82cca7SChristoph Hellwig if (unlikely(ret)) 87cd82cca7SChristoph Hellwig goto out; 88cd82cca7SChristoph Hellwig ret = bio.bi_iter.bi_size; 89cd82cca7SChristoph Hellwig 90cd82cca7SChristoph Hellwig if (iov_iter_rw(iter) == READ) { 91cd82cca7SChristoph Hellwig bio.bi_opf = REQ_OP_READ; 92cd82cca7SChristoph Hellwig if (iter_is_iovec(iter)) 93cd82cca7SChristoph Hellwig should_dirty = true; 94cd82cca7SChristoph Hellwig } else { 95cd82cca7SChristoph Hellwig bio.bi_opf = dio_bio_write_op(iocb); 96cd82cca7SChristoph Hellwig task_io_account_write(ret); 97cd82cca7SChristoph Hellwig } 98cd82cca7SChristoph Hellwig if (iocb->ki_flags & IOCB_NOWAIT) 99cd82cca7SChristoph Hellwig bio.bi_opf |= REQ_NOWAIT; 100cd82cca7SChristoph Hellwig if (iocb->ki_flags & IOCB_HIPRI) 101cd82cca7SChristoph Hellwig bio_set_polled(&bio, iocb); 102cd82cca7SChristoph Hellwig 1033e08773cSChristoph Hellwig submit_bio(&bio); 104cd82cca7SChristoph Hellwig for (;;) { 105cd82cca7SChristoph Hellwig set_current_state(TASK_UNINTERRUPTIBLE); 106cd82cca7SChristoph Hellwig if (!READ_ONCE(bio.bi_private)) 107cd82cca7SChristoph Hellwig break; 108*5a72e899SJens Axboe if (!(iocb->ki_flags & IOCB_HIPRI) || !bio_poll(&bio, NULL, 0)) 109cd82cca7SChristoph Hellwig blk_io_schedule(); 110cd82cca7SChristoph Hellwig } 111cd82cca7SChristoph Hellwig __set_current_state(TASK_RUNNING); 112cd82cca7SChristoph Hellwig 113cd82cca7SChristoph Hellwig bio_release_pages(&bio, should_dirty); 114cd82cca7SChristoph Hellwig if (unlikely(bio.bi_status)) 115cd82cca7SChristoph Hellwig ret = blk_status_to_errno(bio.bi_status); 116cd82cca7SChristoph Hellwig 117cd82cca7SChristoph Hellwig out: 118cd82cca7SChristoph Hellwig if (vecs != inline_vecs) 119cd82cca7SChristoph Hellwig kfree(vecs); 120cd82cca7SChristoph Hellwig 121cd82cca7SChristoph Hellwig bio_uninit(&bio); 122cd82cca7SChristoph Hellwig 123cd82cca7SChristoph Hellwig return ret; 124cd82cca7SChristoph Hellwig } 125cd82cca7SChristoph Hellwig 12609ce8744SJens Axboe enum { 12709ce8744SJens Axboe DIO_MULTI_BIO = 1, 12809ce8744SJens Axboe DIO_SHOULD_DIRTY = 2, 12909ce8744SJens Axboe DIO_IS_SYNC = 4, 13009ce8744SJens Axboe }; 13109ce8744SJens Axboe 132cd82cca7SChristoph Hellwig struct blkdev_dio { 133cd82cca7SChristoph Hellwig union { 134cd82cca7SChristoph Hellwig struct kiocb *iocb; 135cd82cca7SChristoph Hellwig struct task_struct *waiter; 136cd82cca7SChristoph Hellwig }; 137cd82cca7SChristoph Hellwig size_t size; 138cd82cca7SChristoph Hellwig atomic_t ref; 13909ce8744SJens Axboe unsigned int flags; 140cd82cca7SChristoph Hellwig struct bio bio; 141cd82cca7SChristoph Hellwig }; 142cd82cca7SChristoph Hellwig 143cd82cca7SChristoph Hellwig static struct bio_set blkdev_dio_pool; 144cd82cca7SChristoph Hellwig 145cd82cca7SChristoph Hellwig static void blkdev_bio_end_io(struct bio *bio) 146cd82cca7SChristoph Hellwig { 147cd82cca7SChristoph Hellwig struct blkdev_dio *dio = bio->bi_private; 14809ce8744SJens Axboe bool should_dirty = dio->flags & DIO_SHOULD_DIRTY; 149cd82cca7SChristoph Hellwig 150cd82cca7SChristoph Hellwig if (bio->bi_status && !dio->bio.bi_status) 151cd82cca7SChristoph Hellwig dio->bio.bi_status = bio->bi_status; 152cd82cca7SChristoph Hellwig 15309ce8744SJens Axboe if (!(dio->flags & DIO_MULTI_BIO) || atomic_dec_and_test(&dio->ref)) { 15409ce8744SJens Axboe if (!(dio->flags & DIO_IS_SYNC)) { 155cd82cca7SChristoph Hellwig struct kiocb *iocb = dio->iocb; 156cd82cca7SChristoph Hellwig ssize_t ret; 157cd82cca7SChristoph Hellwig 1583e08773cSChristoph Hellwig WRITE_ONCE(iocb->private, NULL); 1593e08773cSChristoph Hellwig 160cd82cca7SChristoph Hellwig if (likely(!dio->bio.bi_status)) { 161cd82cca7SChristoph Hellwig ret = dio->size; 162cd82cca7SChristoph Hellwig iocb->ki_pos += ret; 163cd82cca7SChristoph Hellwig } else { 164cd82cca7SChristoph Hellwig ret = blk_status_to_errno(dio->bio.bi_status); 165cd82cca7SChristoph Hellwig } 166cd82cca7SChristoph Hellwig 167cd82cca7SChristoph Hellwig dio->iocb->ki_complete(iocb, ret, 0); 16809ce8744SJens Axboe if (dio->flags & DIO_MULTI_BIO) 169cd82cca7SChristoph Hellwig bio_put(&dio->bio); 170cd82cca7SChristoph Hellwig } else { 171cd82cca7SChristoph Hellwig struct task_struct *waiter = dio->waiter; 172cd82cca7SChristoph Hellwig 173cd82cca7SChristoph Hellwig WRITE_ONCE(dio->waiter, NULL); 174cd82cca7SChristoph Hellwig blk_wake_io_task(waiter); 175cd82cca7SChristoph Hellwig } 176cd82cca7SChristoph Hellwig } 177cd82cca7SChristoph Hellwig 178cd82cca7SChristoph Hellwig if (should_dirty) { 179cd82cca7SChristoph Hellwig bio_check_pages_dirty(bio); 180cd82cca7SChristoph Hellwig } else { 181cd82cca7SChristoph Hellwig bio_release_pages(bio, false); 182cd82cca7SChristoph Hellwig bio_put(bio); 183cd82cca7SChristoph Hellwig } 184cd82cca7SChristoph Hellwig } 185cd82cca7SChristoph Hellwig 186cd82cca7SChristoph Hellwig static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, 187cd82cca7SChristoph Hellwig unsigned int nr_pages) 188cd82cca7SChristoph Hellwig { 189fac7c6d5SPavel Begunkov struct block_device *bdev = iocb->ki_filp->private_data; 190cd82cca7SChristoph Hellwig struct blk_plug plug; 191cd82cca7SChristoph Hellwig struct blkdev_dio *dio; 192cd82cca7SChristoph Hellwig struct bio *bio; 19371fc3f5eSChristoph Hellwig bool do_poll = (iocb->ki_flags & IOCB_HIPRI); 194cd82cca7SChristoph Hellwig bool is_read = (iov_iter_rw(iter) == READ), is_sync; 195cd82cca7SChristoph Hellwig loff_t pos = iocb->ki_pos; 196cd82cca7SChristoph Hellwig int ret = 0; 197cd82cca7SChristoph Hellwig 198cd82cca7SChristoph Hellwig if ((pos | iov_iter_alignment(iter)) & 199cd82cca7SChristoph Hellwig (bdev_logical_block_size(bdev) - 1)) 200cd82cca7SChristoph Hellwig return -EINVAL; 201cd82cca7SChristoph Hellwig 202cd82cca7SChristoph Hellwig bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool); 203cd82cca7SChristoph Hellwig 204cd82cca7SChristoph Hellwig dio = container_of(bio, struct blkdev_dio, bio); 20509ce8744SJens Axboe is_sync = is_sync_kiocb(iocb); 20609ce8744SJens Axboe if (is_sync) { 20709ce8744SJens Axboe dio->flags = DIO_IS_SYNC; 208cd82cca7SChristoph Hellwig dio->waiter = current; 209cd82cca7SChristoph Hellwig bio_get(bio); 210cd82cca7SChristoph Hellwig } else { 21109ce8744SJens Axboe dio->flags = 0; 212cd82cca7SChristoph Hellwig dio->iocb = iocb; 213cd82cca7SChristoph Hellwig } 214cd82cca7SChristoph Hellwig 215cd82cca7SChristoph Hellwig dio->size = 0; 21609ce8744SJens Axboe if (is_read && iter_is_iovec(iter)) 21709ce8744SJens Axboe dio->flags |= DIO_SHOULD_DIRTY; 218cd82cca7SChristoph Hellwig 219cd82cca7SChristoph Hellwig /* 220cd82cca7SChristoph Hellwig * Don't plug for HIPRI/polled IO, as those should go straight 221cd82cca7SChristoph Hellwig * to issue 222cd82cca7SChristoph Hellwig */ 22371fc3f5eSChristoph Hellwig if (!(iocb->ki_flags & IOCB_HIPRI)) 224cd82cca7SChristoph Hellwig blk_start_plug(&plug); 225cd82cca7SChristoph Hellwig 226cd82cca7SChristoph Hellwig for (;;) { 227cd82cca7SChristoph Hellwig bio_set_dev(bio, bdev); 228cd82cca7SChristoph Hellwig bio->bi_iter.bi_sector = pos >> 9; 229cd82cca7SChristoph Hellwig bio->bi_write_hint = iocb->ki_hint; 230cd82cca7SChristoph Hellwig bio->bi_private = dio; 231cd82cca7SChristoph Hellwig bio->bi_end_io = blkdev_bio_end_io; 232cd82cca7SChristoph Hellwig bio->bi_ioprio = iocb->ki_ioprio; 233cd82cca7SChristoph Hellwig 234cd82cca7SChristoph Hellwig ret = bio_iov_iter_get_pages(bio, iter); 235cd82cca7SChristoph Hellwig if (unlikely(ret)) { 236cd82cca7SChristoph Hellwig bio->bi_status = BLK_STS_IOERR; 237cd82cca7SChristoph Hellwig bio_endio(bio); 238cd82cca7SChristoph Hellwig break; 239cd82cca7SChristoph Hellwig } 240cd82cca7SChristoph Hellwig 241cd82cca7SChristoph Hellwig if (is_read) { 242cd82cca7SChristoph Hellwig bio->bi_opf = REQ_OP_READ; 24309ce8744SJens Axboe if (dio->flags & DIO_SHOULD_DIRTY) 244cd82cca7SChristoph Hellwig bio_set_pages_dirty(bio); 245cd82cca7SChristoph Hellwig } else { 246cd82cca7SChristoph Hellwig bio->bi_opf = dio_bio_write_op(iocb); 247cd82cca7SChristoph Hellwig task_io_account_write(bio->bi_iter.bi_size); 248cd82cca7SChristoph Hellwig } 249cd82cca7SChristoph Hellwig if (iocb->ki_flags & IOCB_NOWAIT) 250cd82cca7SChristoph Hellwig bio->bi_opf |= REQ_NOWAIT; 251cd82cca7SChristoph Hellwig 252cd82cca7SChristoph Hellwig dio->size += bio->bi_iter.bi_size; 253cd82cca7SChristoph Hellwig pos += bio->bi_iter.bi_size; 254cd82cca7SChristoph Hellwig 255cd82cca7SChristoph Hellwig nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS); 256cd82cca7SChristoph Hellwig if (!nr_pages) { 25771fc3f5eSChristoph Hellwig if (do_poll) 258cd82cca7SChristoph Hellwig bio_set_polled(bio, iocb); 2593e08773cSChristoph Hellwig submit_bio(bio); 26071fc3f5eSChristoph Hellwig if (do_poll) 2613e08773cSChristoph Hellwig WRITE_ONCE(iocb->private, bio); 262cd82cca7SChristoph Hellwig break; 263cd82cca7SChristoph Hellwig } 26409ce8744SJens Axboe if (!(dio->flags & DIO_MULTI_BIO)) { 265cd82cca7SChristoph Hellwig /* 266cd82cca7SChristoph Hellwig * AIO needs an extra reference to ensure the dio 267cd82cca7SChristoph Hellwig * structure which is embedded into the first bio 268cd82cca7SChristoph Hellwig * stays around. 269cd82cca7SChristoph Hellwig */ 270cd82cca7SChristoph Hellwig if (!is_sync) 271cd82cca7SChristoph Hellwig bio_get(bio); 27209ce8744SJens Axboe dio->flags |= DIO_MULTI_BIO; 273cd82cca7SChristoph Hellwig atomic_set(&dio->ref, 2); 27471fc3f5eSChristoph Hellwig do_poll = false; 275cd82cca7SChristoph Hellwig } else { 276cd82cca7SChristoph Hellwig atomic_inc(&dio->ref); 277cd82cca7SChristoph Hellwig } 278cd82cca7SChristoph Hellwig 279cd82cca7SChristoph Hellwig submit_bio(bio); 280cd82cca7SChristoph Hellwig bio = bio_alloc(GFP_KERNEL, nr_pages); 281cd82cca7SChristoph Hellwig } 282cd82cca7SChristoph Hellwig 28371fc3f5eSChristoph Hellwig if (!(iocb->ki_flags & IOCB_HIPRI)) 284cd82cca7SChristoph Hellwig blk_finish_plug(&plug); 285cd82cca7SChristoph Hellwig 286cd82cca7SChristoph Hellwig if (!is_sync) 287cd82cca7SChristoph Hellwig return -EIOCBQUEUED; 288cd82cca7SChristoph Hellwig 289cd82cca7SChristoph Hellwig for (;;) { 290cd82cca7SChristoph Hellwig set_current_state(TASK_UNINTERRUPTIBLE); 291cd82cca7SChristoph Hellwig if (!READ_ONCE(dio->waiter)) 292cd82cca7SChristoph Hellwig break; 293cd82cca7SChristoph Hellwig 294*5a72e899SJens Axboe if (!do_poll || !bio_poll(bio, NULL, 0)) 295cd82cca7SChristoph Hellwig blk_io_schedule(); 296cd82cca7SChristoph Hellwig } 297cd82cca7SChristoph Hellwig __set_current_state(TASK_RUNNING); 298cd82cca7SChristoph Hellwig 299cd82cca7SChristoph Hellwig if (!ret) 300cd82cca7SChristoph Hellwig ret = blk_status_to_errno(dio->bio.bi_status); 301cd82cca7SChristoph Hellwig if (likely(!ret)) 302cd82cca7SChristoph Hellwig ret = dio->size; 303cd82cca7SChristoph Hellwig 304cd82cca7SChristoph Hellwig bio_put(&dio->bio); 305cd82cca7SChristoph Hellwig return ret; 306cd82cca7SChristoph Hellwig } 307cd82cca7SChristoph Hellwig 308cd82cca7SChristoph Hellwig static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) 309cd82cca7SChristoph Hellwig { 310cd82cca7SChristoph Hellwig unsigned int nr_pages; 311cd82cca7SChristoph Hellwig 312cd82cca7SChristoph Hellwig if (!iov_iter_count(iter)) 313cd82cca7SChristoph Hellwig return 0; 314cd82cca7SChristoph Hellwig 315cd82cca7SChristoph Hellwig nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1); 316cd82cca7SChristoph Hellwig if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_VECS) 317cd82cca7SChristoph Hellwig return __blkdev_direct_IO_simple(iocb, iter, nr_pages); 318cd82cca7SChristoph Hellwig 319cd82cca7SChristoph Hellwig return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages)); 320cd82cca7SChristoph Hellwig } 321cd82cca7SChristoph Hellwig 322cd82cca7SChristoph Hellwig static int blkdev_writepage(struct page *page, struct writeback_control *wbc) 323cd82cca7SChristoph Hellwig { 324cd82cca7SChristoph Hellwig return block_write_full_page(page, blkdev_get_block, wbc); 325cd82cca7SChristoph Hellwig } 326cd82cca7SChristoph Hellwig 327cd82cca7SChristoph Hellwig static int blkdev_readpage(struct file * file, struct page * page) 328cd82cca7SChristoph Hellwig { 329cd82cca7SChristoph Hellwig return block_read_full_page(page, blkdev_get_block); 330cd82cca7SChristoph Hellwig } 331cd82cca7SChristoph Hellwig 332cd82cca7SChristoph Hellwig static void blkdev_readahead(struct readahead_control *rac) 333cd82cca7SChristoph Hellwig { 334cd82cca7SChristoph Hellwig mpage_readahead(rac, blkdev_get_block); 335cd82cca7SChristoph Hellwig } 336cd82cca7SChristoph Hellwig 337cd82cca7SChristoph Hellwig static int blkdev_write_begin(struct file *file, struct address_space *mapping, 338cd82cca7SChristoph Hellwig loff_t pos, unsigned len, unsigned flags, struct page **pagep, 339cd82cca7SChristoph Hellwig void **fsdata) 340cd82cca7SChristoph Hellwig { 341cd82cca7SChristoph Hellwig return block_write_begin(mapping, pos, len, flags, pagep, 342cd82cca7SChristoph Hellwig blkdev_get_block); 343cd82cca7SChristoph Hellwig } 344cd82cca7SChristoph Hellwig 345cd82cca7SChristoph Hellwig static int blkdev_write_end(struct file *file, struct address_space *mapping, 346cd82cca7SChristoph Hellwig loff_t pos, unsigned len, unsigned copied, struct page *page, 347cd82cca7SChristoph Hellwig void *fsdata) 348cd82cca7SChristoph Hellwig { 349cd82cca7SChristoph Hellwig int ret; 350cd82cca7SChristoph Hellwig ret = block_write_end(file, mapping, pos, len, copied, page, fsdata); 351cd82cca7SChristoph Hellwig 352cd82cca7SChristoph Hellwig unlock_page(page); 353cd82cca7SChristoph Hellwig put_page(page); 354cd82cca7SChristoph Hellwig 355cd82cca7SChristoph Hellwig return ret; 356cd82cca7SChristoph Hellwig } 357cd82cca7SChristoph Hellwig 358cd82cca7SChristoph Hellwig static int blkdev_writepages(struct address_space *mapping, 359cd82cca7SChristoph Hellwig struct writeback_control *wbc) 360cd82cca7SChristoph Hellwig { 361cd82cca7SChristoph Hellwig return generic_writepages(mapping, wbc); 362cd82cca7SChristoph Hellwig } 363cd82cca7SChristoph Hellwig 364cd82cca7SChristoph Hellwig const struct address_space_operations def_blk_aops = { 365cd82cca7SChristoph Hellwig .set_page_dirty = __set_page_dirty_buffers, 366cd82cca7SChristoph Hellwig .readpage = blkdev_readpage, 367cd82cca7SChristoph Hellwig .readahead = blkdev_readahead, 368cd82cca7SChristoph Hellwig .writepage = blkdev_writepage, 369cd82cca7SChristoph Hellwig .write_begin = blkdev_write_begin, 370cd82cca7SChristoph Hellwig .write_end = blkdev_write_end, 371cd82cca7SChristoph Hellwig .writepages = blkdev_writepages, 372cd82cca7SChristoph Hellwig .direct_IO = blkdev_direct_IO, 373cd82cca7SChristoph Hellwig .migratepage = buffer_migrate_page_norefs, 374cd82cca7SChristoph Hellwig .is_dirty_writeback = buffer_check_dirty_writeback, 375cd82cca7SChristoph Hellwig }; 376cd82cca7SChristoph Hellwig 377cd82cca7SChristoph Hellwig /* 378cd82cca7SChristoph Hellwig * for a block special file file_inode(file)->i_size is zero 379cd82cca7SChristoph Hellwig * so we compute the size by hand (just as in block_read/write above) 380cd82cca7SChristoph Hellwig */ 381cd82cca7SChristoph Hellwig static loff_t blkdev_llseek(struct file *file, loff_t offset, int whence) 382cd82cca7SChristoph Hellwig { 383cd82cca7SChristoph Hellwig struct inode *bd_inode = bdev_file_inode(file); 384cd82cca7SChristoph Hellwig loff_t retval; 385cd82cca7SChristoph Hellwig 386cd82cca7SChristoph Hellwig inode_lock(bd_inode); 387cd82cca7SChristoph Hellwig retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode)); 388cd82cca7SChristoph Hellwig inode_unlock(bd_inode); 389cd82cca7SChristoph Hellwig return retval; 390cd82cca7SChristoph Hellwig } 391cd82cca7SChristoph Hellwig 392cd82cca7SChristoph Hellwig static int blkdev_fsync(struct file *filp, loff_t start, loff_t end, 393cd82cca7SChristoph Hellwig int datasync) 394cd82cca7SChristoph Hellwig { 395fac7c6d5SPavel Begunkov struct block_device *bdev = filp->private_data; 396cd82cca7SChristoph Hellwig int error; 397cd82cca7SChristoph Hellwig 398cd82cca7SChristoph Hellwig error = file_write_and_wait_range(filp, start, end); 399cd82cca7SChristoph Hellwig if (error) 400cd82cca7SChristoph Hellwig return error; 401cd82cca7SChristoph Hellwig 402cd82cca7SChristoph Hellwig /* 403cd82cca7SChristoph Hellwig * There is no need to serialise calls to blkdev_issue_flush with 404cd82cca7SChristoph Hellwig * i_mutex and doing so causes performance issues with concurrent 405cd82cca7SChristoph Hellwig * O_SYNC writers to a block device. 406cd82cca7SChristoph Hellwig */ 407cd82cca7SChristoph Hellwig error = blkdev_issue_flush(bdev); 408cd82cca7SChristoph Hellwig if (error == -EOPNOTSUPP) 409cd82cca7SChristoph Hellwig error = 0; 410cd82cca7SChristoph Hellwig 411cd82cca7SChristoph Hellwig return error; 412cd82cca7SChristoph Hellwig } 413cd82cca7SChristoph Hellwig 414cd82cca7SChristoph Hellwig static int blkdev_open(struct inode *inode, struct file *filp) 415cd82cca7SChristoph Hellwig { 416cd82cca7SChristoph Hellwig struct block_device *bdev; 417cd82cca7SChristoph Hellwig 418cd82cca7SChristoph Hellwig /* 419cd82cca7SChristoph Hellwig * Preserve backwards compatibility and allow large file access 420cd82cca7SChristoph Hellwig * even if userspace doesn't ask for it explicitly. Some mkfs 421cd82cca7SChristoph Hellwig * binary needs it. We might want to drop this workaround 422cd82cca7SChristoph Hellwig * during an unstable branch. 423cd82cca7SChristoph Hellwig */ 424cd82cca7SChristoph Hellwig filp->f_flags |= O_LARGEFILE; 425cd82cca7SChristoph Hellwig filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC; 426cd82cca7SChristoph Hellwig 427cd82cca7SChristoph Hellwig if (filp->f_flags & O_NDELAY) 428cd82cca7SChristoph Hellwig filp->f_mode |= FMODE_NDELAY; 429cd82cca7SChristoph Hellwig if (filp->f_flags & O_EXCL) 430cd82cca7SChristoph Hellwig filp->f_mode |= FMODE_EXCL; 431cd82cca7SChristoph Hellwig if ((filp->f_flags & O_ACCMODE) == 3) 432cd82cca7SChristoph Hellwig filp->f_mode |= FMODE_WRITE_IOCTL; 433cd82cca7SChristoph Hellwig 434cd82cca7SChristoph Hellwig bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp); 435cd82cca7SChristoph Hellwig if (IS_ERR(bdev)) 436cd82cca7SChristoph Hellwig return PTR_ERR(bdev); 437fac7c6d5SPavel Begunkov 438fac7c6d5SPavel Begunkov filp->private_data = bdev; 439cd82cca7SChristoph Hellwig filp->f_mapping = bdev->bd_inode->i_mapping; 440cd82cca7SChristoph Hellwig filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping); 441cd82cca7SChristoph Hellwig return 0; 442cd82cca7SChristoph Hellwig } 443cd82cca7SChristoph Hellwig 444cd82cca7SChristoph Hellwig static int blkdev_close(struct inode *inode, struct file *filp) 445cd82cca7SChristoph Hellwig { 446fac7c6d5SPavel Begunkov struct block_device *bdev = filp->private_data; 447cd82cca7SChristoph Hellwig 448cd82cca7SChristoph Hellwig blkdev_put(bdev, filp->f_mode); 449cd82cca7SChristoph Hellwig return 0; 450cd82cca7SChristoph Hellwig } 451cd82cca7SChristoph Hellwig 452cd82cca7SChristoph Hellwig /* 453cd82cca7SChristoph Hellwig * Write data to the block device. Only intended for the block device itself 454cd82cca7SChristoph Hellwig * and the raw driver which basically is a fake block device. 455cd82cca7SChristoph Hellwig * 456cd82cca7SChristoph Hellwig * Does not take i_mutex for the write and thus is not for general purpose 457cd82cca7SChristoph Hellwig * use. 458cd82cca7SChristoph Hellwig */ 459cd82cca7SChristoph Hellwig static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) 460cd82cca7SChristoph Hellwig { 461fac7c6d5SPavel Begunkov struct block_device *bdev = iocb->ki_filp->private_data; 462fac7c6d5SPavel Begunkov struct inode *bd_inode = bdev->bd_inode; 463cd82cca7SChristoph Hellwig loff_t size = i_size_read(bd_inode); 464cd82cca7SChristoph Hellwig struct blk_plug plug; 465cd82cca7SChristoph Hellwig size_t shorted = 0; 466cd82cca7SChristoph Hellwig ssize_t ret; 467cd82cca7SChristoph Hellwig 468fac7c6d5SPavel Begunkov if (bdev_read_only(bdev)) 469cd82cca7SChristoph Hellwig return -EPERM; 470cd82cca7SChristoph Hellwig 471cd82cca7SChristoph Hellwig if (IS_SWAPFILE(bd_inode) && !is_hibernate_resume_dev(bd_inode->i_rdev)) 472cd82cca7SChristoph Hellwig return -ETXTBSY; 473cd82cca7SChristoph Hellwig 474cd82cca7SChristoph Hellwig if (!iov_iter_count(from)) 475cd82cca7SChristoph Hellwig return 0; 476cd82cca7SChristoph Hellwig 477cd82cca7SChristoph Hellwig if (iocb->ki_pos >= size) 478cd82cca7SChristoph Hellwig return -ENOSPC; 479cd82cca7SChristoph Hellwig 480cd82cca7SChristoph Hellwig if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT) 481cd82cca7SChristoph Hellwig return -EOPNOTSUPP; 482cd82cca7SChristoph Hellwig 483cd82cca7SChristoph Hellwig size -= iocb->ki_pos; 484cd82cca7SChristoph Hellwig if (iov_iter_count(from) > size) { 485cd82cca7SChristoph Hellwig shorted = iov_iter_count(from) - size; 486cd82cca7SChristoph Hellwig iov_iter_truncate(from, size); 487cd82cca7SChristoph Hellwig } 488cd82cca7SChristoph Hellwig 489cd82cca7SChristoph Hellwig blk_start_plug(&plug); 490cd82cca7SChristoph Hellwig ret = __generic_file_write_iter(iocb, from); 491cd82cca7SChristoph Hellwig if (ret > 0) 492cd82cca7SChristoph Hellwig ret = generic_write_sync(iocb, ret); 493cd82cca7SChristoph Hellwig iov_iter_reexpand(from, iov_iter_count(from) + shorted); 494cd82cca7SChristoph Hellwig blk_finish_plug(&plug); 495cd82cca7SChristoph Hellwig return ret; 496cd82cca7SChristoph Hellwig } 497cd82cca7SChristoph Hellwig 498cd82cca7SChristoph Hellwig static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) 499cd82cca7SChristoph Hellwig { 500fac7c6d5SPavel Begunkov struct block_device *bdev = iocb->ki_filp->private_data; 501fac7c6d5SPavel Begunkov loff_t size = i_size_read(bdev->bd_inode); 502cd82cca7SChristoph Hellwig loff_t pos = iocb->ki_pos; 503cd82cca7SChristoph Hellwig size_t shorted = 0; 504cd82cca7SChristoph Hellwig ssize_t ret; 505cd82cca7SChristoph Hellwig 506cd82cca7SChristoph Hellwig if (pos >= size) 507cd82cca7SChristoph Hellwig return 0; 508cd82cca7SChristoph Hellwig 509cd82cca7SChristoph Hellwig size -= pos; 510cd82cca7SChristoph Hellwig if (iov_iter_count(to) > size) { 511cd82cca7SChristoph Hellwig shorted = iov_iter_count(to) - size; 512cd82cca7SChristoph Hellwig iov_iter_truncate(to, size); 513cd82cca7SChristoph Hellwig } 514cd82cca7SChristoph Hellwig 515cd82cca7SChristoph Hellwig ret = generic_file_read_iter(iocb, to); 516cd82cca7SChristoph Hellwig iov_iter_reexpand(to, iov_iter_count(to) + shorted); 517cd82cca7SChristoph Hellwig return ret; 518cd82cca7SChristoph Hellwig } 519cd82cca7SChristoph Hellwig 520cd82cca7SChristoph Hellwig #define BLKDEV_FALLOC_FL_SUPPORTED \ 521cd82cca7SChristoph Hellwig (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ 522cd82cca7SChristoph Hellwig FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE) 523cd82cca7SChristoph Hellwig 524cd82cca7SChristoph Hellwig static long blkdev_fallocate(struct file *file, int mode, loff_t start, 525cd82cca7SChristoph Hellwig loff_t len) 526cd82cca7SChristoph Hellwig { 527f278eb3dSMing Lei struct inode *inode = bdev_file_inode(file); 528f278eb3dSMing Lei struct block_device *bdev = I_BDEV(inode); 529cd82cca7SChristoph Hellwig loff_t end = start + len - 1; 530cd82cca7SChristoph Hellwig loff_t isize; 531cd82cca7SChristoph Hellwig int error; 532cd82cca7SChristoph Hellwig 533cd82cca7SChristoph Hellwig /* Fail if we don't recognize the flags. */ 534cd82cca7SChristoph Hellwig if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED) 535cd82cca7SChristoph Hellwig return -EOPNOTSUPP; 536cd82cca7SChristoph Hellwig 537cd82cca7SChristoph Hellwig /* Don't go off the end of the device. */ 538cd82cca7SChristoph Hellwig isize = i_size_read(bdev->bd_inode); 539cd82cca7SChristoph Hellwig if (start >= isize) 540cd82cca7SChristoph Hellwig return -EINVAL; 541cd82cca7SChristoph Hellwig if (end >= isize) { 542cd82cca7SChristoph Hellwig if (mode & FALLOC_FL_KEEP_SIZE) { 543cd82cca7SChristoph Hellwig len = isize - start; 544cd82cca7SChristoph Hellwig end = start + len - 1; 545cd82cca7SChristoph Hellwig } else 546cd82cca7SChristoph Hellwig return -EINVAL; 547cd82cca7SChristoph Hellwig } 548cd82cca7SChristoph Hellwig 549cd82cca7SChristoph Hellwig /* 550cd82cca7SChristoph Hellwig * Don't allow IO that isn't aligned to logical block size. 551cd82cca7SChristoph Hellwig */ 552cd82cca7SChristoph Hellwig if ((start | len) & (bdev_logical_block_size(bdev) - 1)) 553cd82cca7SChristoph Hellwig return -EINVAL; 554cd82cca7SChristoph Hellwig 555f278eb3dSMing Lei filemap_invalidate_lock(inode->i_mapping); 556f278eb3dSMing Lei 557cd82cca7SChristoph Hellwig /* Invalidate the page cache, including dirty pages. */ 558cd82cca7SChristoph Hellwig error = truncate_bdev_range(bdev, file->f_mode, start, end); 559cd82cca7SChristoph Hellwig if (error) 560f278eb3dSMing Lei goto fail; 561cd82cca7SChristoph Hellwig 562cd82cca7SChristoph Hellwig switch (mode) { 563cd82cca7SChristoph Hellwig case FALLOC_FL_ZERO_RANGE: 564cd82cca7SChristoph Hellwig case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE: 565cd82cca7SChristoph Hellwig error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, 566cd82cca7SChristoph Hellwig GFP_KERNEL, BLKDEV_ZERO_NOUNMAP); 567cd82cca7SChristoph Hellwig break; 568cd82cca7SChristoph Hellwig case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE: 569cd82cca7SChristoph Hellwig error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, 570cd82cca7SChristoph Hellwig GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK); 571cd82cca7SChristoph Hellwig break; 572cd82cca7SChristoph Hellwig case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE: 573cd82cca7SChristoph Hellwig error = blkdev_issue_discard(bdev, start >> 9, len >> 9, 574cd82cca7SChristoph Hellwig GFP_KERNEL, 0); 575cd82cca7SChristoph Hellwig break; 576cd82cca7SChristoph Hellwig default: 577f278eb3dSMing Lei error = -EOPNOTSUPP; 578cd82cca7SChristoph Hellwig } 579cd82cca7SChristoph Hellwig 580f278eb3dSMing Lei fail: 581f278eb3dSMing Lei filemap_invalidate_unlock(inode->i_mapping); 582f278eb3dSMing Lei return error; 583cd82cca7SChristoph Hellwig } 584cd82cca7SChristoph Hellwig 585cd82cca7SChristoph Hellwig const struct file_operations def_blk_fops = { 586cd82cca7SChristoph Hellwig .open = blkdev_open, 587cd82cca7SChristoph Hellwig .release = blkdev_close, 588cd82cca7SChristoph Hellwig .llseek = blkdev_llseek, 589cd82cca7SChristoph Hellwig .read_iter = blkdev_read_iter, 590cd82cca7SChristoph Hellwig .write_iter = blkdev_write_iter, 5913e08773cSChristoph Hellwig .iopoll = iocb_bio_iopoll, 592cd82cca7SChristoph Hellwig .mmap = generic_file_mmap, 593cd82cca7SChristoph Hellwig .fsync = blkdev_fsync, 5948a709512SChristoph Hellwig .unlocked_ioctl = blkdev_ioctl, 595cd82cca7SChristoph Hellwig #ifdef CONFIG_COMPAT 596cd82cca7SChristoph Hellwig .compat_ioctl = compat_blkdev_ioctl, 597cd82cca7SChristoph Hellwig #endif 598cd82cca7SChristoph Hellwig .splice_read = generic_file_splice_read, 599cd82cca7SChristoph Hellwig .splice_write = iter_file_splice_write, 600cd82cca7SChristoph Hellwig .fallocate = blkdev_fallocate, 601cd82cca7SChristoph Hellwig }; 602cd82cca7SChristoph Hellwig 603cd82cca7SChristoph Hellwig static __init int blkdev_init(void) 604cd82cca7SChristoph Hellwig { 605cd82cca7SChristoph Hellwig return bioset_init(&blkdev_dio_pool, 4, 606cd82cca7SChristoph Hellwig offsetof(struct blkdev_dio, bio), 607cd82cca7SChristoph Hellwig BIOSET_NEED_BVECS|BIOSET_PERCPU_CACHE); 608cd82cca7SChristoph Hellwig } 609cd82cca7SChristoph Hellwig module_init(blkdev_init); 610