1cd82cca7SChristoph Hellwig // SPDX-License-Identifier: GPL-2.0-only 2cd82cca7SChristoph Hellwig /* 3cd82cca7SChristoph Hellwig * Copyright (C) 1991, 1992 Linus Torvalds 4cd82cca7SChristoph Hellwig * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE 5cd82cca7SChristoph Hellwig * Copyright (C) 2016 - 2020 Christoph Hellwig 6cd82cca7SChristoph Hellwig */ 7cd82cca7SChristoph Hellwig #include <linux/init.h> 8cd82cca7SChristoph Hellwig #include <linux/mm.h> 9cd82cca7SChristoph Hellwig #include <linux/blkdev.h> 10cd82cca7SChristoph Hellwig #include <linux/buffer_head.h> 11cd82cca7SChristoph Hellwig #include <linux/mpage.h> 12cd82cca7SChristoph Hellwig #include <linux/uio.h> 13cd82cca7SChristoph Hellwig #include <linux/namei.h> 14cd82cca7SChristoph Hellwig #include <linux/task_io_accounting_ops.h> 15cd82cca7SChristoph Hellwig #include <linux/falloc.h> 16cd82cca7SChristoph Hellwig #include <linux/suspend.h> 17f278eb3dSMing Lei #include <linux/fs.h> 18cd82cca7SChristoph Hellwig #include "blk.h" 19cd82cca7SChristoph Hellwig 20*fac7c6d5SPavel Begunkov static inline struct inode *bdev_file_inode(struct file *file) 21cd82cca7SChristoph Hellwig { 22cd82cca7SChristoph Hellwig return file->f_mapping->host; 23cd82cca7SChristoph Hellwig } 24cd82cca7SChristoph Hellwig 25cd82cca7SChristoph Hellwig static int blkdev_get_block(struct inode *inode, sector_t iblock, 26cd82cca7SChristoph Hellwig struct buffer_head *bh, int create) 27cd82cca7SChristoph Hellwig { 28cd82cca7SChristoph Hellwig bh->b_bdev = I_BDEV(inode); 29cd82cca7SChristoph Hellwig bh->b_blocknr = iblock; 30cd82cca7SChristoph Hellwig set_buffer_mapped(bh); 31cd82cca7SChristoph Hellwig return 0; 32cd82cca7SChristoph Hellwig } 33cd82cca7SChristoph Hellwig 34cd82cca7SChristoph Hellwig static unsigned int dio_bio_write_op(struct kiocb *iocb) 35cd82cca7SChristoph Hellwig { 36cd82cca7SChristoph Hellwig unsigned int op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE; 37cd82cca7SChristoph Hellwig 38cd82cca7SChristoph Hellwig /* avoid the need for a I/O completion work item */ 39cd82cca7SChristoph Hellwig if (iocb->ki_flags & IOCB_DSYNC) 40cd82cca7SChristoph Hellwig op |= REQ_FUA; 41cd82cca7SChristoph Hellwig return op; 42cd82cca7SChristoph Hellwig } 43cd82cca7SChristoph Hellwig 44cd82cca7SChristoph Hellwig #define DIO_INLINE_BIO_VECS 4 45cd82cca7SChristoph Hellwig 46cd82cca7SChristoph Hellwig static void blkdev_bio_end_io_simple(struct bio *bio) 47cd82cca7SChristoph Hellwig { 48cd82cca7SChristoph Hellwig struct task_struct *waiter = bio->bi_private; 49cd82cca7SChristoph Hellwig 50cd82cca7SChristoph Hellwig WRITE_ONCE(bio->bi_private, NULL); 51cd82cca7SChristoph Hellwig blk_wake_io_task(waiter); 52cd82cca7SChristoph Hellwig } 53cd82cca7SChristoph Hellwig 54cd82cca7SChristoph Hellwig static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, 55cd82cca7SChristoph Hellwig struct iov_iter *iter, unsigned int nr_pages) 56cd82cca7SChristoph Hellwig { 57*fac7c6d5SPavel Begunkov struct block_device *bdev = iocb->ki_filp->private_data; 58cd82cca7SChristoph Hellwig struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs; 59cd82cca7SChristoph Hellwig loff_t pos = iocb->ki_pos; 60cd82cca7SChristoph Hellwig bool should_dirty = false; 61cd82cca7SChristoph Hellwig struct bio bio; 62cd82cca7SChristoph Hellwig ssize_t ret; 63cd82cca7SChristoph Hellwig 64cd82cca7SChristoph Hellwig if ((pos | iov_iter_alignment(iter)) & 65cd82cca7SChristoph Hellwig (bdev_logical_block_size(bdev) - 1)) 66cd82cca7SChristoph Hellwig return -EINVAL; 67cd82cca7SChristoph Hellwig 68cd82cca7SChristoph Hellwig if (nr_pages <= DIO_INLINE_BIO_VECS) 69cd82cca7SChristoph Hellwig vecs = inline_vecs; 70cd82cca7SChristoph Hellwig else { 71cd82cca7SChristoph Hellwig vecs = kmalloc_array(nr_pages, sizeof(struct bio_vec), 72cd82cca7SChristoph Hellwig GFP_KERNEL); 73cd82cca7SChristoph Hellwig if (!vecs) 74cd82cca7SChristoph Hellwig return -ENOMEM; 75cd82cca7SChristoph Hellwig } 76cd82cca7SChristoph Hellwig 77cd82cca7SChristoph Hellwig bio_init(&bio, vecs, nr_pages); 78cd82cca7SChristoph Hellwig bio_set_dev(&bio, bdev); 79cd82cca7SChristoph Hellwig bio.bi_iter.bi_sector = pos >> 9; 80cd82cca7SChristoph Hellwig bio.bi_write_hint = iocb->ki_hint; 81cd82cca7SChristoph Hellwig bio.bi_private = current; 82cd82cca7SChristoph Hellwig bio.bi_end_io = blkdev_bio_end_io_simple; 83cd82cca7SChristoph Hellwig bio.bi_ioprio = iocb->ki_ioprio; 84cd82cca7SChristoph Hellwig 85cd82cca7SChristoph Hellwig ret = bio_iov_iter_get_pages(&bio, iter); 86cd82cca7SChristoph Hellwig if (unlikely(ret)) 87cd82cca7SChristoph Hellwig goto out; 88cd82cca7SChristoph Hellwig ret = bio.bi_iter.bi_size; 89cd82cca7SChristoph Hellwig 90cd82cca7SChristoph Hellwig if (iov_iter_rw(iter) == READ) { 91cd82cca7SChristoph Hellwig bio.bi_opf = REQ_OP_READ; 92cd82cca7SChristoph Hellwig if (iter_is_iovec(iter)) 93cd82cca7SChristoph Hellwig should_dirty = true; 94cd82cca7SChristoph Hellwig } else { 95cd82cca7SChristoph Hellwig bio.bi_opf = dio_bio_write_op(iocb); 96cd82cca7SChristoph Hellwig task_io_account_write(ret); 97cd82cca7SChristoph Hellwig } 98cd82cca7SChristoph Hellwig if (iocb->ki_flags & IOCB_NOWAIT) 99cd82cca7SChristoph Hellwig bio.bi_opf |= REQ_NOWAIT; 100cd82cca7SChristoph Hellwig if (iocb->ki_flags & IOCB_HIPRI) 101cd82cca7SChristoph Hellwig bio_set_polled(&bio, iocb); 102cd82cca7SChristoph Hellwig 1033e08773cSChristoph Hellwig submit_bio(&bio); 104cd82cca7SChristoph Hellwig for (;;) { 105cd82cca7SChristoph Hellwig set_current_state(TASK_UNINTERRUPTIBLE); 106cd82cca7SChristoph Hellwig if (!READ_ONCE(bio.bi_private)) 107cd82cca7SChristoph Hellwig break; 1083e08773cSChristoph Hellwig if (!(iocb->ki_flags & IOCB_HIPRI) || !bio_poll(&bio, 0)) 109cd82cca7SChristoph Hellwig blk_io_schedule(); 110cd82cca7SChristoph Hellwig } 111cd82cca7SChristoph Hellwig __set_current_state(TASK_RUNNING); 112cd82cca7SChristoph Hellwig 113cd82cca7SChristoph Hellwig bio_release_pages(&bio, should_dirty); 114cd82cca7SChristoph Hellwig if (unlikely(bio.bi_status)) 115cd82cca7SChristoph Hellwig ret = blk_status_to_errno(bio.bi_status); 116cd82cca7SChristoph Hellwig 117cd82cca7SChristoph Hellwig out: 118cd82cca7SChristoph Hellwig if (vecs != inline_vecs) 119cd82cca7SChristoph Hellwig kfree(vecs); 120cd82cca7SChristoph Hellwig 121cd82cca7SChristoph Hellwig bio_uninit(&bio); 122cd82cca7SChristoph Hellwig 123cd82cca7SChristoph Hellwig return ret; 124cd82cca7SChristoph Hellwig } 125cd82cca7SChristoph Hellwig 126cd82cca7SChristoph Hellwig struct blkdev_dio { 127cd82cca7SChristoph Hellwig union { 128cd82cca7SChristoph Hellwig struct kiocb *iocb; 129cd82cca7SChristoph Hellwig struct task_struct *waiter; 130cd82cca7SChristoph Hellwig }; 131cd82cca7SChristoph Hellwig size_t size; 132cd82cca7SChristoph Hellwig atomic_t ref; 133cd82cca7SChristoph Hellwig bool multi_bio : 1; 134cd82cca7SChristoph Hellwig bool should_dirty : 1; 135cd82cca7SChristoph Hellwig bool is_sync : 1; 136cd82cca7SChristoph Hellwig struct bio bio; 137cd82cca7SChristoph Hellwig }; 138cd82cca7SChristoph Hellwig 139cd82cca7SChristoph Hellwig static struct bio_set blkdev_dio_pool; 140cd82cca7SChristoph Hellwig 141cd82cca7SChristoph Hellwig static void blkdev_bio_end_io(struct bio *bio) 142cd82cca7SChristoph Hellwig { 143cd82cca7SChristoph Hellwig struct blkdev_dio *dio = bio->bi_private; 144cd82cca7SChristoph Hellwig bool should_dirty = dio->should_dirty; 145cd82cca7SChristoph Hellwig 146cd82cca7SChristoph Hellwig if (bio->bi_status && !dio->bio.bi_status) 147cd82cca7SChristoph Hellwig dio->bio.bi_status = bio->bi_status; 148cd82cca7SChristoph Hellwig 149cd82cca7SChristoph Hellwig if (!dio->multi_bio || atomic_dec_and_test(&dio->ref)) { 150cd82cca7SChristoph Hellwig if (!dio->is_sync) { 151cd82cca7SChristoph Hellwig struct kiocb *iocb = dio->iocb; 152cd82cca7SChristoph Hellwig ssize_t ret; 153cd82cca7SChristoph Hellwig 1543e08773cSChristoph Hellwig WRITE_ONCE(iocb->private, NULL); 1553e08773cSChristoph Hellwig 156cd82cca7SChristoph Hellwig if (likely(!dio->bio.bi_status)) { 157cd82cca7SChristoph Hellwig ret = dio->size; 158cd82cca7SChristoph Hellwig iocb->ki_pos += ret; 159cd82cca7SChristoph Hellwig } else { 160cd82cca7SChristoph Hellwig ret = blk_status_to_errno(dio->bio.bi_status); 161cd82cca7SChristoph Hellwig } 162cd82cca7SChristoph Hellwig 163cd82cca7SChristoph Hellwig dio->iocb->ki_complete(iocb, ret, 0); 164cd82cca7SChristoph Hellwig if (dio->multi_bio) 165cd82cca7SChristoph Hellwig bio_put(&dio->bio); 166cd82cca7SChristoph Hellwig } else { 167cd82cca7SChristoph Hellwig struct task_struct *waiter = dio->waiter; 168cd82cca7SChristoph Hellwig 169cd82cca7SChristoph Hellwig WRITE_ONCE(dio->waiter, NULL); 170cd82cca7SChristoph Hellwig blk_wake_io_task(waiter); 171cd82cca7SChristoph Hellwig } 172cd82cca7SChristoph Hellwig } 173cd82cca7SChristoph Hellwig 174cd82cca7SChristoph Hellwig if (should_dirty) { 175cd82cca7SChristoph Hellwig bio_check_pages_dirty(bio); 176cd82cca7SChristoph Hellwig } else { 177cd82cca7SChristoph Hellwig bio_release_pages(bio, false); 178cd82cca7SChristoph Hellwig bio_put(bio); 179cd82cca7SChristoph Hellwig } 180cd82cca7SChristoph Hellwig } 181cd82cca7SChristoph Hellwig 182cd82cca7SChristoph Hellwig static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, 183cd82cca7SChristoph Hellwig unsigned int nr_pages) 184cd82cca7SChristoph Hellwig { 185*fac7c6d5SPavel Begunkov struct block_device *bdev = iocb->ki_filp->private_data; 186cd82cca7SChristoph Hellwig struct blk_plug plug; 187cd82cca7SChristoph Hellwig struct blkdev_dio *dio; 188cd82cca7SChristoph Hellwig struct bio *bio; 18971fc3f5eSChristoph Hellwig bool do_poll = (iocb->ki_flags & IOCB_HIPRI); 190cd82cca7SChristoph Hellwig bool is_read = (iov_iter_rw(iter) == READ), is_sync; 191cd82cca7SChristoph Hellwig loff_t pos = iocb->ki_pos; 192cd82cca7SChristoph Hellwig int ret = 0; 193cd82cca7SChristoph Hellwig 194cd82cca7SChristoph Hellwig if ((pos | iov_iter_alignment(iter)) & 195cd82cca7SChristoph Hellwig (bdev_logical_block_size(bdev) - 1)) 196cd82cca7SChristoph Hellwig return -EINVAL; 197cd82cca7SChristoph Hellwig 198cd82cca7SChristoph Hellwig bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool); 199cd82cca7SChristoph Hellwig 200cd82cca7SChristoph Hellwig dio = container_of(bio, struct blkdev_dio, bio); 201cd82cca7SChristoph Hellwig dio->is_sync = is_sync = is_sync_kiocb(iocb); 202cd82cca7SChristoph Hellwig if (dio->is_sync) { 203cd82cca7SChristoph Hellwig dio->waiter = current; 204cd82cca7SChristoph Hellwig bio_get(bio); 205cd82cca7SChristoph Hellwig } else { 206cd82cca7SChristoph Hellwig dio->iocb = iocb; 207cd82cca7SChristoph Hellwig } 208cd82cca7SChristoph Hellwig 209cd82cca7SChristoph Hellwig dio->size = 0; 210cd82cca7SChristoph Hellwig dio->multi_bio = false; 211cd82cca7SChristoph Hellwig dio->should_dirty = is_read && iter_is_iovec(iter); 212cd82cca7SChristoph Hellwig 213cd82cca7SChristoph Hellwig /* 214cd82cca7SChristoph Hellwig * Don't plug for HIPRI/polled IO, as those should go straight 215cd82cca7SChristoph Hellwig * to issue 216cd82cca7SChristoph Hellwig */ 21771fc3f5eSChristoph Hellwig if (!(iocb->ki_flags & IOCB_HIPRI)) 218cd82cca7SChristoph Hellwig blk_start_plug(&plug); 219cd82cca7SChristoph Hellwig 220cd82cca7SChristoph Hellwig for (;;) { 221cd82cca7SChristoph Hellwig bio_set_dev(bio, bdev); 222cd82cca7SChristoph Hellwig bio->bi_iter.bi_sector = pos >> 9; 223cd82cca7SChristoph Hellwig bio->bi_write_hint = iocb->ki_hint; 224cd82cca7SChristoph Hellwig bio->bi_private = dio; 225cd82cca7SChristoph Hellwig bio->bi_end_io = blkdev_bio_end_io; 226cd82cca7SChristoph Hellwig bio->bi_ioprio = iocb->ki_ioprio; 227cd82cca7SChristoph Hellwig 228cd82cca7SChristoph Hellwig ret = bio_iov_iter_get_pages(bio, iter); 229cd82cca7SChristoph Hellwig if (unlikely(ret)) { 230cd82cca7SChristoph Hellwig bio->bi_status = BLK_STS_IOERR; 231cd82cca7SChristoph Hellwig bio_endio(bio); 232cd82cca7SChristoph Hellwig break; 233cd82cca7SChristoph Hellwig } 234cd82cca7SChristoph Hellwig 235cd82cca7SChristoph Hellwig if (is_read) { 236cd82cca7SChristoph Hellwig bio->bi_opf = REQ_OP_READ; 237cd82cca7SChristoph Hellwig if (dio->should_dirty) 238cd82cca7SChristoph Hellwig bio_set_pages_dirty(bio); 239cd82cca7SChristoph Hellwig } else { 240cd82cca7SChristoph Hellwig bio->bi_opf = dio_bio_write_op(iocb); 241cd82cca7SChristoph Hellwig task_io_account_write(bio->bi_iter.bi_size); 242cd82cca7SChristoph Hellwig } 243cd82cca7SChristoph Hellwig if (iocb->ki_flags & IOCB_NOWAIT) 244cd82cca7SChristoph Hellwig bio->bi_opf |= REQ_NOWAIT; 245cd82cca7SChristoph Hellwig 246cd82cca7SChristoph Hellwig dio->size += bio->bi_iter.bi_size; 247cd82cca7SChristoph Hellwig pos += bio->bi_iter.bi_size; 248cd82cca7SChristoph Hellwig 249cd82cca7SChristoph Hellwig nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS); 250cd82cca7SChristoph Hellwig if (!nr_pages) { 25171fc3f5eSChristoph Hellwig if (do_poll) 252cd82cca7SChristoph Hellwig bio_set_polled(bio, iocb); 2533e08773cSChristoph Hellwig submit_bio(bio); 25471fc3f5eSChristoph Hellwig if (do_poll) 2553e08773cSChristoph Hellwig WRITE_ONCE(iocb->private, bio); 256cd82cca7SChristoph Hellwig break; 257cd82cca7SChristoph Hellwig } 258cd82cca7SChristoph Hellwig if (!dio->multi_bio) { 259cd82cca7SChristoph Hellwig /* 260cd82cca7SChristoph Hellwig * AIO needs an extra reference to ensure the dio 261cd82cca7SChristoph Hellwig * structure which is embedded into the first bio 262cd82cca7SChristoph Hellwig * stays around. 263cd82cca7SChristoph Hellwig */ 264cd82cca7SChristoph Hellwig if (!is_sync) 265cd82cca7SChristoph Hellwig bio_get(bio); 266cd82cca7SChristoph Hellwig dio->multi_bio = true; 267cd82cca7SChristoph Hellwig atomic_set(&dio->ref, 2); 26871fc3f5eSChristoph Hellwig do_poll = false; 269cd82cca7SChristoph Hellwig } else { 270cd82cca7SChristoph Hellwig atomic_inc(&dio->ref); 271cd82cca7SChristoph Hellwig } 272cd82cca7SChristoph Hellwig 273cd82cca7SChristoph Hellwig submit_bio(bio); 274cd82cca7SChristoph Hellwig bio = bio_alloc(GFP_KERNEL, nr_pages); 275cd82cca7SChristoph Hellwig } 276cd82cca7SChristoph Hellwig 27771fc3f5eSChristoph Hellwig if (!(iocb->ki_flags & IOCB_HIPRI)) 278cd82cca7SChristoph Hellwig blk_finish_plug(&plug); 279cd82cca7SChristoph Hellwig 280cd82cca7SChristoph Hellwig if (!is_sync) 281cd82cca7SChristoph Hellwig return -EIOCBQUEUED; 282cd82cca7SChristoph Hellwig 283cd82cca7SChristoph Hellwig for (;;) { 284cd82cca7SChristoph Hellwig set_current_state(TASK_UNINTERRUPTIBLE); 285cd82cca7SChristoph Hellwig if (!READ_ONCE(dio->waiter)) 286cd82cca7SChristoph Hellwig break; 287cd82cca7SChristoph Hellwig 2883e08773cSChristoph Hellwig if (!do_poll || !bio_poll(bio, 0)) 289cd82cca7SChristoph Hellwig blk_io_schedule(); 290cd82cca7SChristoph Hellwig } 291cd82cca7SChristoph Hellwig __set_current_state(TASK_RUNNING); 292cd82cca7SChristoph Hellwig 293cd82cca7SChristoph Hellwig if (!ret) 294cd82cca7SChristoph Hellwig ret = blk_status_to_errno(dio->bio.bi_status); 295cd82cca7SChristoph Hellwig if (likely(!ret)) 296cd82cca7SChristoph Hellwig ret = dio->size; 297cd82cca7SChristoph Hellwig 298cd82cca7SChristoph Hellwig bio_put(&dio->bio); 299cd82cca7SChristoph Hellwig return ret; 300cd82cca7SChristoph Hellwig } 301cd82cca7SChristoph Hellwig 302cd82cca7SChristoph Hellwig static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) 303cd82cca7SChristoph Hellwig { 304cd82cca7SChristoph Hellwig unsigned int nr_pages; 305cd82cca7SChristoph Hellwig 306cd82cca7SChristoph Hellwig if (!iov_iter_count(iter)) 307cd82cca7SChristoph Hellwig return 0; 308cd82cca7SChristoph Hellwig 309cd82cca7SChristoph Hellwig nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1); 310cd82cca7SChristoph Hellwig if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_VECS) 311cd82cca7SChristoph Hellwig return __blkdev_direct_IO_simple(iocb, iter, nr_pages); 312cd82cca7SChristoph Hellwig 313cd82cca7SChristoph Hellwig return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages)); 314cd82cca7SChristoph Hellwig } 315cd82cca7SChristoph Hellwig 316cd82cca7SChristoph Hellwig static int blkdev_writepage(struct page *page, struct writeback_control *wbc) 317cd82cca7SChristoph Hellwig { 318cd82cca7SChristoph Hellwig return block_write_full_page(page, blkdev_get_block, wbc); 319cd82cca7SChristoph Hellwig } 320cd82cca7SChristoph Hellwig 321cd82cca7SChristoph Hellwig static int blkdev_readpage(struct file * file, struct page * page) 322cd82cca7SChristoph Hellwig { 323cd82cca7SChristoph Hellwig return block_read_full_page(page, blkdev_get_block); 324cd82cca7SChristoph Hellwig } 325cd82cca7SChristoph Hellwig 326cd82cca7SChristoph Hellwig static void blkdev_readahead(struct readahead_control *rac) 327cd82cca7SChristoph Hellwig { 328cd82cca7SChristoph Hellwig mpage_readahead(rac, blkdev_get_block); 329cd82cca7SChristoph Hellwig } 330cd82cca7SChristoph Hellwig 331cd82cca7SChristoph Hellwig static int blkdev_write_begin(struct file *file, struct address_space *mapping, 332cd82cca7SChristoph Hellwig loff_t pos, unsigned len, unsigned flags, struct page **pagep, 333cd82cca7SChristoph Hellwig void **fsdata) 334cd82cca7SChristoph Hellwig { 335cd82cca7SChristoph Hellwig return block_write_begin(mapping, pos, len, flags, pagep, 336cd82cca7SChristoph Hellwig blkdev_get_block); 337cd82cca7SChristoph Hellwig } 338cd82cca7SChristoph Hellwig 339cd82cca7SChristoph Hellwig static int blkdev_write_end(struct file *file, struct address_space *mapping, 340cd82cca7SChristoph Hellwig loff_t pos, unsigned len, unsigned copied, struct page *page, 341cd82cca7SChristoph Hellwig void *fsdata) 342cd82cca7SChristoph Hellwig { 343cd82cca7SChristoph Hellwig int ret; 344cd82cca7SChristoph Hellwig ret = block_write_end(file, mapping, pos, len, copied, page, fsdata); 345cd82cca7SChristoph Hellwig 346cd82cca7SChristoph Hellwig unlock_page(page); 347cd82cca7SChristoph Hellwig put_page(page); 348cd82cca7SChristoph Hellwig 349cd82cca7SChristoph Hellwig return ret; 350cd82cca7SChristoph Hellwig } 351cd82cca7SChristoph Hellwig 352cd82cca7SChristoph Hellwig static int blkdev_writepages(struct address_space *mapping, 353cd82cca7SChristoph Hellwig struct writeback_control *wbc) 354cd82cca7SChristoph Hellwig { 355cd82cca7SChristoph Hellwig return generic_writepages(mapping, wbc); 356cd82cca7SChristoph Hellwig } 357cd82cca7SChristoph Hellwig 358cd82cca7SChristoph Hellwig const struct address_space_operations def_blk_aops = { 359cd82cca7SChristoph Hellwig .set_page_dirty = __set_page_dirty_buffers, 360cd82cca7SChristoph Hellwig .readpage = blkdev_readpage, 361cd82cca7SChristoph Hellwig .readahead = blkdev_readahead, 362cd82cca7SChristoph Hellwig .writepage = blkdev_writepage, 363cd82cca7SChristoph Hellwig .write_begin = blkdev_write_begin, 364cd82cca7SChristoph Hellwig .write_end = blkdev_write_end, 365cd82cca7SChristoph Hellwig .writepages = blkdev_writepages, 366cd82cca7SChristoph Hellwig .direct_IO = blkdev_direct_IO, 367cd82cca7SChristoph Hellwig .migratepage = buffer_migrate_page_norefs, 368cd82cca7SChristoph Hellwig .is_dirty_writeback = buffer_check_dirty_writeback, 369cd82cca7SChristoph Hellwig }; 370cd82cca7SChristoph Hellwig 371cd82cca7SChristoph Hellwig /* 372cd82cca7SChristoph Hellwig * for a block special file file_inode(file)->i_size is zero 373cd82cca7SChristoph Hellwig * so we compute the size by hand (just as in block_read/write above) 374cd82cca7SChristoph Hellwig */ 375cd82cca7SChristoph Hellwig static loff_t blkdev_llseek(struct file *file, loff_t offset, int whence) 376cd82cca7SChristoph Hellwig { 377cd82cca7SChristoph Hellwig struct inode *bd_inode = bdev_file_inode(file); 378cd82cca7SChristoph Hellwig loff_t retval; 379cd82cca7SChristoph Hellwig 380cd82cca7SChristoph Hellwig inode_lock(bd_inode); 381cd82cca7SChristoph Hellwig retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode)); 382cd82cca7SChristoph Hellwig inode_unlock(bd_inode); 383cd82cca7SChristoph Hellwig return retval; 384cd82cca7SChristoph Hellwig } 385cd82cca7SChristoph Hellwig 386cd82cca7SChristoph Hellwig static int blkdev_fsync(struct file *filp, loff_t start, loff_t end, 387cd82cca7SChristoph Hellwig int datasync) 388cd82cca7SChristoph Hellwig { 389*fac7c6d5SPavel Begunkov struct block_device *bdev = filp->private_data; 390cd82cca7SChristoph Hellwig int error; 391cd82cca7SChristoph Hellwig 392cd82cca7SChristoph Hellwig error = file_write_and_wait_range(filp, start, end); 393cd82cca7SChristoph Hellwig if (error) 394cd82cca7SChristoph Hellwig return error; 395cd82cca7SChristoph Hellwig 396cd82cca7SChristoph Hellwig /* 397cd82cca7SChristoph Hellwig * There is no need to serialise calls to blkdev_issue_flush with 398cd82cca7SChristoph Hellwig * i_mutex and doing so causes performance issues with concurrent 399cd82cca7SChristoph Hellwig * O_SYNC writers to a block device. 400cd82cca7SChristoph Hellwig */ 401cd82cca7SChristoph Hellwig error = blkdev_issue_flush(bdev); 402cd82cca7SChristoph Hellwig if (error == -EOPNOTSUPP) 403cd82cca7SChristoph Hellwig error = 0; 404cd82cca7SChristoph Hellwig 405cd82cca7SChristoph Hellwig return error; 406cd82cca7SChristoph Hellwig } 407cd82cca7SChristoph Hellwig 408cd82cca7SChristoph Hellwig static int blkdev_open(struct inode *inode, struct file *filp) 409cd82cca7SChristoph Hellwig { 410cd82cca7SChristoph Hellwig struct block_device *bdev; 411cd82cca7SChristoph Hellwig 412cd82cca7SChristoph Hellwig /* 413cd82cca7SChristoph Hellwig * Preserve backwards compatibility and allow large file access 414cd82cca7SChristoph Hellwig * even if userspace doesn't ask for it explicitly. Some mkfs 415cd82cca7SChristoph Hellwig * binary needs it. We might want to drop this workaround 416cd82cca7SChristoph Hellwig * during an unstable branch. 417cd82cca7SChristoph Hellwig */ 418cd82cca7SChristoph Hellwig filp->f_flags |= O_LARGEFILE; 419cd82cca7SChristoph Hellwig filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC; 420cd82cca7SChristoph Hellwig 421cd82cca7SChristoph Hellwig if (filp->f_flags & O_NDELAY) 422cd82cca7SChristoph Hellwig filp->f_mode |= FMODE_NDELAY; 423cd82cca7SChristoph Hellwig if (filp->f_flags & O_EXCL) 424cd82cca7SChristoph Hellwig filp->f_mode |= FMODE_EXCL; 425cd82cca7SChristoph Hellwig if ((filp->f_flags & O_ACCMODE) == 3) 426cd82cca7SChristoph Hellwig filp->f_mode |= FMODE_WRITE_IOCTL; 427cd82cca7SChristoph Hellwig 428cd82cca7SChristoph Hellwig bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp); 429cd82cca7SChristoph Hellwig if (IS_ERR(bdev)) 430cd82cca7SChristoph Hellwig return PTR_ERR(bdev); 431*fac7c6d5SPavel Begunkov 432*fac7c6d5SPavel Begunkov filp->private_data = bdev; 433cd82cca7SChristoph Hellwig filp->f_mapping = bdev->bd_inode->i_mapping; 434cd82cca7SChristoph Hellwig filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping); 435cd82cca7SChristoph Hellwig return 0; 436cd82cca7SChristoph Hellwig } 437cd82cca7SChristoph Hellwig 438cd82cca7SChristoph Hellwig static int blkdev_close(struct inode *inode, struct file *filp) 439cd82cca7SChristoph Hellwig { 440*fac7c6d5SPavel Begunkov struct block_device *bdev = filp->private_data; 441cd82cca7SChristoph Hellwig 442cd82cca7SChristoph Hellwig blkdev_put(bdev, filp->f_mode); 443cd82cca7SChristoph Hellwig return 0; 444cd82cca7SChristoph Hellwig } 445cd82cca7SChristoph Hellwig 446cd82cca7SChristoph Hellwig /* 447cd82cca7SChristoph Hellwig * Write data to the block device. Only intended for the block device itself 448cd82cca7SChristoph Hellwig * and the raw driver which basically is a fake block device. 449cd82cca7SChristoph Hellwig * 450cd82cca7SChristoph Hellwig * Does not take i_mutex for the write and thus is not for general purpose 451cd82cca7SChristoph Hellwig * use. 452cd82cca7SChristoph Hellwig */ 453cd82cca7SChristoph Hellwig static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) 454cd82cca7SChristoph Hellwig { 455*fac7c6d5SPavel Begunkov struct block_device *bdev = iocb->ki_filp->private_data; 456*fac7c6d5SPavel Begunkov struct inode *bd_inode = bdev->bd_inode; 457cd82cca7SChristoph Hellwig loff_t size = i_size_read(bd_inode); 458cd82cca7SChristoph Hellwig struct blk_plug plug; 459cd82cca7SChristoph Hellwig size_t shorted = 0; 460cd82cca7SChristoph Hellwig ssize_t ret; 461cd82cca7SChristoph Hellwig 462*fac7c6d5SPavel Begunkov if (bdev_read_only(bdev)) 463cd82cca7SChristoph Hellwig return -EPERM; 464cd82cca7SChristoph Hellwig 465cd82cca7SChristoph Hellwig if (IS_SWAPFILE(bd_inode) && !is_hibernate_resume_dev(bd_inode->i_rdev)) 466cd82cca7SChristoph Hellwig return -ETXTBSY; 467cd82cca7SChristoph Hellwig 468cd82cca7SChristoph Hellwig if (!iov_iter_count(from)) 469cd82cca7SChristoph Hellwig return 0; 470cd82cca7SChristoph Hellwig 471cd82cca7SChristoph Hellwig if (iocb->ki_pos >= size) 472cd82cca7SChristoph Hellwig return -ENOSPC; 473cd82cca7SChristoph Hellwig 474cd82cca7SChristoph Hellwig if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT) 475cd82cca7SChristoph Hellwig return -EOPNOTSUPP; 476cd82cca7SChristoph Hellwig 477cd82cca7SChristoph Hellwig size -= iocb->ki_pos; 478cd82cca7SChristoph Hellwig if (iov_iter_count(from) > size) { 479cd82cca7SChristoph Hellwig shorted = iov_iter_count(from) - size; 480cd82cca7SChristoph Hellwig iov_iter_truncate(from, size); 481cd82cca7SChristoph Hellwig } 482cd82cca7SChristoph Hellwig 483cd82cca7SChristoph Hellwig blk_start_plug(&plug); 484cd82cca7SChristoph Hellwig ret = __generic_file_write_iter(iocb, from); 485cd82cca7SChristoph Hellwig if (ret > 0) 486cd82cca7SChristoph Hellwig ret = generic_write_sync(iocb, ret); 487cd82cca7SChristoph Hellwig iov_iter_reexpand(from, iov_iter_count(from) + shorted); 488cd82cca7SChristoph Hellwig blk_finish_plug(&plug); 489cd82cca7SChristoph Hellwig return ret; 490cd82cca7SChristoph Hellwig } 491cd82cca7SChristoph Hellwig 492cd82cca7SChristoph Hellwig static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) 493cd82cca7SChristoph Hellwig { 494*fac7c6d5SPavel Begunkov struct block_device *bdev = iocb->ki_filp->private_data; 495*fac7c6d5SPavel Begunkov loff_t size = i_size_read(bdev->bd_inode); 496cd82cca7SChristoph Hellwig loff_t pos = iocb->ki_pos; 497cd82cca7SChristoph Hellwig size_t shorted = 0; 498cd82cca7SChristoph Hellwig ssize_t ret; 499cd82cca7SChristoph Hellwig 500cd82cca7SChristoph Hellwig if (pos >= size) 501cd82cca7SChristoph Hellwig return 0; 502cd82cca7SChristoph Hellwig 503cd82cca7SChristoph Hellwig size -= pos; 504cd82cca7SChristoph Hellwig if (iov_iter_count(to) > size) { 505cd82cca7SChristoph Hellwig shorted = iov_iter_count(to) - size; 506cd82cca7SChristoph Hellwig iov_iter_truncate(to, size); 507cd82cca7SChristoph Hellwig } 508cd82cca7SChristoph Hellwig 509cd82cca7SChristoph Hellwig ret = generic_file_read_iter(iocb, to); 510cd82cca7SChristoph Hellwig iov_iter_reexpand(to, iov_iter_count(to) + shorted); 511cd82cca7SChristoph Hellwig return ret; 512cd82cca7SChristoph Hellwig } 513cd82cca7SChristoph Hellwig 514cd82cca7SChristoph Hellwig #define BLKDEV_FALLOC_FL_SUPPORTED \ 515cd82cca7SChristoph Hellwig (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ 516cd82cca7SChristoph Hellwig FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE) 517cd82cca7SChristoph Hellwig 518cd82cca7SChristoph Hellwig static long blkdev_fallocate(struct file *file, int mode, loff_t start, 519cd82cca7SChristoph Hellwig loff_t len) 520cd82cca7SChristoph Hellwig { 521f278eb3dSMing Lei struct inode *inode = bdev_file_inode(file); 522f278eb3dSMing Lei struct block_device *bdev = I_BDEV(inode); 523cd82cca7SChristoph Hellwig loff_t end = start + len - 1; 524cd82cca7SChristoph Hellwig loff_t isize; 525cd82cca7SChristoph Hellwig int error; 526cd82cca7SChristoph Hellwig 527cd82cca7SChristoph Hellwig /* Fail if we don't recognize the flags. */ 528cd82cca7SChristoph Hellwig if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED) 529cd82cca7SChristoph Hellwig return -EOPNOTSUPP; 530cd82cca7SChristoph Hellwig 531cd82cca7SChristoph Hellwig /* Don't go off the end of the device. */ 532cd82cca7SChristoph Hellwig isize = i_size_read(bdev->bd_inode); 533cd82cca7SChristoph Hellwig if (start >= isize) 534cd82cca7SChristoph Hellwig return -EINVAL; 535cd82cca7SChristoph Hellwig if (end >= isize) { 536cd82cca7SChristoph Hellwig if (mode & FALLOC_FL_KEEP_SIZE) { 537cd82cca7SChristoph Hellwig len = isize - start; 538cd82cca7SChristoph Hellwig end = start + len - 1; 539cd82cca7SChristoph Hellwig } else 540cd82cca7SChristoph Hellwig return -EINVAL; 541cd82cca7SChristoph Hellwig } 542cd82cca7SChristoph Hellwig 543cd82cca7SChristoph Hellwig /* 544cd82cca7SChristoph Hellwig * Don't allow IO that isn't aligned to logical block size. 545cd82cca7SChristoph Hellwig */ 546cd82cca7SChristoph Hellwig if ((start | len) & (bdev_logical_block_size(bdev) - 1)) 547cd82cca7SChristoph Hellwig return -EINVAL; 548cd82cca7SChristoph Hellwig 549f278eb3dSMing Lei filemap_invalidate_lock(inode->i_mapping); 550f278eb3dSMing Lei 551cd82cca7SChristoph Hellwig /* Invalidate the page cache, including dirty pages. */ 552cd82cca7SChristoph Hellwig error = truncate_bdev_range(bdev, file->f_mode, start, end); 553cd82cca7SChristoph Hellwig if (error) 554f278eb3dSMing Lei goto fail; 555cd82cca7SChristoph Hellwig 556cd82cca7SChristoph Hellwig switch (mode) { 557cd82cca7SChristoph Hellwig case FALLOC_FL_ZERO_RANGE: 558cd82cca7SChristoph Hellwig case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE: 559cd82cca7SChristoph Hellwig error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, 560cd82cca7SChristoph Hellwig GFP_KERNEL, BLKDEV_ZERO_NOUNMAP); 561cd82cca7SChristoph Hellwig break; 562cd82cca7SChristoph Hellwig case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE: 563cd82cca7SChristoph Hellwig error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, 564cd82cca7SChristoph Hellwig GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK); 565cd82cca7SChristoph Hellwig break; 566cd82cca7SChristoph Hellwig case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE: 567cd82cca7SChristoph Hellwig error = blkdev_issue_discard(bdev, start >> 9, len >> 9, 568cd82cca7SChristoph Hellwig GFP_KERNEL, 0); 569cd82cca7SChristoph Hellwig break; 570cd82cca7SChristoph Hellwig default: 571f278eb3dSMing Lei error = -EOPNOTSUPP; 572cd82cca7SChristoph Hellwig } 573cd82cca7SChristoph Hellwig 574f278eb3dSMing Lei fail: 575f278eb3dSMing Lei filemap_invalidate_unlock(inode->i_mapping); 576f278eb3dSMing Lei return error; 577cd82cca7SChristoph Hellwig } 578cd82cca7SChristoph Hellwig 579cd82cca7SChristoph Hellwig const struct file_operations def_blk_fops = { 580cd82cca7SChristoph Hellwig .open = blkdev_open, 581cd82cca7SChristoph Hellwig .release = blkdev_close, 582cd82cca7SChristoph Hellwig .llseek = blkdev_llseek, 583cd82cca7SChristoph Hellwig .read_iter = blkdev_read_iter, 584cd82cca7SChristoph Hellwig .write_iter = blkdev_write_iter, 5853e08773cSChristoph Hellwig .iopoll = iocb_bio_iopoll, 586cd82cca7SChristoph Hellwig .mmap = generic_file_mmap, 587cd82cca7SChristoph Hellwig .fsync = blkdev_fsync, 5888a709512SChristoph Hellwig .unlocked_ioctl = blkdev_ioctl, 589cd82cca7SChristoph Hellwig #ifdef CONFIG_COMPAT 590cd82cca7SChristoph Hellwig .compat_ioctl = compat_blkdev_ioctl, 591cd82cca7SChristoph Hellwig #endif 592cd82cca7SChristoph Hellwig .splice_read = generic_file_splice_read, 593cd82cca7SChristoph Hellwig .splice_write = iter_file_splice_write, 594cd82cca7SChristoph Hellwig .fallocate = blkdev_fallocate, 595cd82cca7SChristoph Hellwig }; 596cd82cca7SChristoph Hellwig 597cd82cca7SChristoph Hellwig static __init int blkdev_init(void) 598cd82cca7SChristoph Hellwig { 599cd82cca7SChristoph Hellwig return bioset_init(&blkdev_dio_pool, 4, 600cd82cca7SChristoph Hellwig offsetof(struct blkdev_dio, bio), 601cd82cca7SChristoph Hellwig BIOSET_NEED_BVECS|BIOSET_PERCPU_CACHE); 602cd82cca7SChristoph Hellwig } 603cd82cca7SChristoph Hellwig module_init(blkdev_init); 604