xref: /openbmc/linux/block/fops.c (revision 91071792)
1cd82cca7SChristoph Hellwig // SPDX-License-Identifier: GPL-2.0-only
2cd82cca7SChristoph Hellwig /*
3cd82cca7SChristoph Hellwig  * Copyright (C) 1991, 1992  Linus Torvalds
4cd82cca7SChristoph Hellwig  * Copyright (C) 2001  Andrea Arcangeli <andrea@suse.de> SuSE
5cd82cca7SChristoph Hellwig  * Copyright (C) 2016 - 2020 Christoph Hellwig
6cd82cca7SChristoph Hellwig  */
7cd82cca7SChristoph Hellwig #include <linux/init.h>
8cd82cca7SChristoph Hellwig #include <linux/mm.h>
9cd82cca7SChristoph Hellwig #include <linux/blkdev.h>
10cd82cca7SChristoph Hellwig #include <linux/buffer_head.h>
11cd82cca7SChristoph Hellwig #include <linux/mpage.h>
12cd82cca7SChristoph Hellwig #include <linux/uio.h>
13cd82cca7SChristoph Hellwig #include <linux/namei.h>
14cd82cca7SChristoph Hellwig #include <linux/task_io_accounting_ops.h>
15cd82cca7SChristoph Hellwig #include <linux/falloc.h>
16cd82cca7SChristoph Hellwig #include <linux/suspend.h>
17f278eb3dSMing Lei #include <linux/fs.h>
18487c607dSChristoph Hellwig #include <linux/iomap.h>
198581fd40SJakub Kicinski #include <linux/module.h>
20cd82cca7SChristoph Hellwig #include "blk.h"
21cd82cca7SChristoph Hellwig 
bdev_file_inode(struct file * file)22fac7c6d5SPavel Begunkov static inline struct inode *bdev_file_inode(struct file *file)
23cd82cca7SChristoph Hellwig {
24cd82cca7SChristoph Hellwig 	return file->f_mapping->host;
25cd82cca7SChristoph Hellwig }
26cd82cca7SChristoph Hellwig 
dio_bio_write_op(struct kiocb * iocb)2716458cf3SBart Van Assche static blk_opf_t dio_bio_write_op(struct kiocb *iocb)
28cd82cca7SChristoph Hellwig {
2916458cf3SBart Van Assche 	blk_opf_t opf = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
30cd82cca7SChristoph Hellwig 
31cd82cca7SChristoph Hellwig 	/* avoid the need for a I/O completion work item */
3291b94c5dSAl Viro 	if (iocb_is_dsync(iocb))
3316458cf3SBart Van Assche 		opf |= REQ_FUA;
3416458cf3SBart Van Assche 	return opf;
35cd82cca7SChristoph Hellwig }
36cd82cca7SChristoph Hellwig 
blkdev_dio_unaligned(struct block_device * bdev,loff_t pos,struct iov_iter * iter)3737fee2e4SKeith Busch static bool blkdev_dio_unaligned(struct block_device *bdev, loff_t pos,
3837fee2e4SKeith Busch 			      struct iov_iter *iter)
3937fee2e4SKeith Busch {
40b1a000d3SKeith Busch 	return pos & (bdev_logical_block_size(bdev) - 1) ||
41b1a000d3SKeith Busch 		!bdev_iter_is_aligned(bdev, iter);
4237fee2e4SKeith Busch }
4337fee2e4SKeith Busch 
44cd82cca7SChristoph Hellwig #define DIO_INLINE_BIO_VECS 4
45cd82cca7SChristoph Hellwig 
__blkdev_direct_IO_simple(struct kiocb * iocb,struct iov_iter * iter,unsigned int nr_pages)46cd82cca7SChristoph Hellwig static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
47cd82cca7SChristoph Hellwig 		struct iov_iter *iter, unsigned int nr_pages)
48cd82cca7SChristoph Hellwig {
494e762d86SChristoph Hellwig 	struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
50cd82cca7SChristoph Hellwig 	struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs;
51cd82cca7SChristoph Hellwig 	loff_t pos = iocb->ki_pos;
52cd82cca7SChristoph Hellwig 	bool should_dirty = false;
53cd82cca7SChristoph Hellwig 	struct bio bio;
54cd82cca7SChristoph Hellwig 	ssize_t ret;
55cd82cca7SChristoph Hellwig 
5637fee2e4SKeith Busch 	if (blkdev_dio_unaligned(bdev, pos, iter))
57cd82cca7SChristoph Hellwig 		return -EINVAL;
58cd82cca7SChristoph Hellwig 
59cd82cca7SChristoph Hellwig 	if (nr_pages <= DIO_INLINE_BIO_VECS)
60cd82cca7SChristoph Hellwig 		vecs = inline_vecs;
61cd82cca7SChristoph Hellwig 	else {
62cd82cca7SChristoph Hellwig 		vecs = kmalloc_array(nr_pages, sizeof(struct bio_vec),
63cd82cca7SChristoph Hellwig 				     GFP_KERNEL);
64cd82cca7SChristoph Hellwig 		if (!vecs)
65cd82cca7SChristoph Hellwig 			return -ENOMEM;
66cd82cca7SChristoph Hellwig 	}
67cd82cca7SChristoph Hellwig 
6849add496SChristoph Hellwig 	if (iov_iter_rw(iter) == READ) {
6949add496SChristoph Hellwig 		bio_init(&bio, bdev, vecs, nr_pages, REQ_OP_READ);
70fcb14cb1SAl Viro 		if (user_backed_iter(iter))
7149add496SChristoph Hellwig 			should_dirty = true;
7249add496SChristoph Hellwig 	} else {
7349add496SChristoph Hellwig 		bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb));
7449add496SChristoph Hellwig 	}
756549a874SPavel Begunkov 	bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
76cd82cca7SChristoph Hellwig 	bio.bi_ioprio = iocb->ki_ioprio;
77cd82cca7SChristoph Hellwig 
78cd82cca7SChristoph Hellwig 	ret = bio_iov_iter_get_pages(&bio, iter);
79cd82cca7SChristoph Hellwig 	if (unlikely(ret))
80cd82cca7SChristoph Hellwig 		goto out;
81cd82cca7SChristoph Hellwig 	ret = bio.bi_iter.bi_size;
82cd82cca7SChristoph Hellwig 
8349add496SChristoph Hellwig 	if (iov_iter_rw(iter) == WRITE)
84cd82cca7SChristoph Hellwig 		task_io_account_write(ret);
8549add496SChristoph Hellwig 
86cd82cca7SChristoph Hellwig 	if (iocb->ki_flags & IOCB_NOWAIT)
87cd82cca7SChristoph Hellwig 		bio.bi_opf |= REQ_NOWAIT;
88cd82cca7SChristoph Hellwig 
899650b453SMing Lei 	submit_bio_wait(&bio);
90cd82cca7SChristoph Hellwig 
91cd82cca7SChristoph Hellwig 	bio_release_pages(&bio, should_dirty);
92cd82cca7SChristoph Hellwig 	if (unlikely(bio.bi_status))
93cd82cca7SChristoph Hellwig 		ret = blk_status_to_errno(bio.bi_status);
94cd82cca7SChristoph Hellwig 
95cd82cca7SChristoph Hellwig out:
96cd82cca7SChristoph Hellwig 	if (vecs != inline_vecs)
97cd82cca7SChristoph Hellwig 		kfree(vecs);
98cd82cca7SChristoph Hellwig 
99cd82cca7SChristoph Hellwig 	bio_uninit(&bio);
100cd82cca7SChristoph Hellwig 
101cd82cca7SChristoph Hellwig 	return ret;
102cd82cca7SChristoph Hellwig }
103cd82cca7SChristoph Hellwig 
10409ce8744SJens Axboe enum {
105e71aa913SPavel Begunkov 	DIO_SHOULD_DIRTY	= 1,
106e71aa913SPavel Begunkov 	DIO_IS_SYNC		= 2,
10709ce8744SJens Axboe };
10809ce8744SJens Axboe 
109cd82cca7SChristoph Hellwig struct blkdev_dio {
110cd82cca7SChristoph Hellwig 	union {
111cd82cca7SChristoph Hellwig 		struct kiocb		*iocb;
112cd82cca7SChristoph Hellwig 		struct task_struct	*waiter;
113cd82cca7SChristoph Hellwig 	};
114cd82cca7SChristoph Hellwig 	size_t			size;
115cd82cca7SChristoph Hellwig 	atomic_t		ref;
11609ce8744SJens Axboe 	unsigned int		flags;
1176155631aSJens Axboe 	struct bio		bio ____cacheline_aligned_in_smp;
118cd82cca7SChristoph Hellwig };
119cd82cca7SChristoph Hellwig 
120cd82cca7SChristoph Hellwig static struct bio_set blkdev_dio_pool;
121cd82cca7SChristoph Hellwig 
blkdev_bio_end_io(struct bio * bio)122cd82cca7SChristoph Hellwig static void blkdev_bio_end_io(struct bio *bio)
123cd82cca7SChristoph Hellwig {
124cd82cca7SChristoph Hellwig 	struct blkdev_dio *dio = bio->bi_private;
12509ce8744SJens Axboe 	bool should_dirty = dio->flags & DIO_SHOULD_DIRTY;
126cd82cca7SChristoph Hellwig 
127cd82cca7SChristoph Hellwig 	if (bio->bi_status && !dio->bio.bi_status)
128cd82cca7SChristoph Hellwig 		dio->bio.bi_status = bio->bi_status;
129cd82cca7SChristoph Hellwig 
130e71aa913SPavel Begunkov 	if (atomic_dec_and_test(&dio->ref)) {
13109ce8744SJens Axboe 		if (!(dio->flags & DIO_IS_SYNC)) {
132cd82cca7SChristoph Hellwig 			struct kiocb *iocb = dio->iocb;
133cd82cca7SChristoph Hellwig 			ssize_t ret;
134cd82cca7SChristoph Hellwig 
1353e08773cSChristoph Hellwig 			WRITE_ONCE(iocb->private, NULL);
1363e08773cSChristoph Hellwig 
137cd82cca7SChristoph Hellwig 			if (likely(!dio->bio.bi_status)) {
138cd82cca7SChristoph Hellwig 				ret = dio->size;
139cd82cca7SChristoph Hellwig 				iocb->ki_pos += ret;
140cd82cca7SChristoph Hellwig 			} else {
141cd82cca7SChristoph Hellwig 				ret = blk_status_to_errno(dio->bio.bi_status);
142cd82cca7SChristoph Hellwig 			}
143cd82cca7SChristoph Hellwig 
1446b19b766SJens Axboe 			dio->iocb->ki_complete(iocb, ret);
145cd82cca7SChristoph Hellwig 			bio_put(&dio->bio);
146cd82cca7SChristoph Hellwig 		} else {
147cd82cca7SChristoph Hellwig 			struct task_struct *waiter = dio->waiter;
148cd82cca7SChristoph Hellwig 
149cd82cca7SChristoph Hellwig 			WRITE_ONCE(dio->waiter, NULL);
150cd82cca7SChristoph Hellwig 			blk_wake_io_task(waiter);
151cd82cca7SChristoph Hellwig 		}
152cd82cca7SChristoph Hellwig 	}
153cd82cca7SChristoph Hellwig 
154cd82cca7SChristoph Hellwig 	if (should_dirty) {
155cd82cca7SChristoph Hellwig 		bio_check_pages_dirty(bio);
156cd82cca7SChristoph Hellwig 	} else {
157cd82cca7SChristoph Hellwig 		bio_release_pages(bio, false);
158cd82cca7SChristoph Hellwig 		bio_put(bio);
159cd82cca7SChristoph Hellwig 	}
160cd82cca7SChristoph Hellwig }
161cd82cca7SChristoph Hellwig 
__blkdev_direct_IO(struct kiocb * iocb,struct iov_iter * iter,unsigned int nr_pages)162cd82cca7SChristoph Hellwig static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
163cd82cca7SChristoph Hellwig 		unsigned int nr_pages)
164cd82cca7SChristoph Hellwig {
1654e762d86SChristoph Hellwig 	struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
166cd82cca7SChristoph Hellwig 	struct blk_plug plug;
167cd82cca7SChristoph Hellwig 	struct blkdev_dio *dio;
168cd82cca7SChristoph Hellwig 	struct bio *bio;
169cd82cca7SChristoph Hellwig 	bool is_read = (iov_iter_rw(iter) == READ), is_sync;
17016458cf3SBart Van Assche 	blk_opf_t opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
171cd82cca7SChristoph Hellwig 	loff_t pos = iocb->ki_pos;
172cd82cca7SChristoph Hellwig 	int ret = 0;
173cd82cca7SChristoph Hellwig 
17437fee2e4SKeith Busch 	if (blkdev_dio_unaligned(bdev, pos, iter))
175cd82cca7SChristoph Hellwig 		return -EINVAL;
176cd82cca7SChristoph Hellwig 
1770df71650SMike Snitzer 	if (iocb->ki_flags & IOCB_ALLOC_CACHE)
1780df71650SMike Snitzer 		opf |= REQ_ALLOC_CACHE;
1790df71650SMike Snitzer 	bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL,
1800df71650SMike Snitzer 			       &blkdev_dio_pool);
181cd82cca7SChristoph Hellwig 	dio = container_of(bio, struct blkdev_dio, bio);
182e71aa913SPavel Begunkov 	atomic_set(&dio->ref, 1);
183e71aa913SPavel Begunkov 	/*
184e71aa913SPavel Begunkov 	 * Grab an extra reference to ensure the dio structure which is embedded
185e71aa913SPavel Begunkov 	 * into the first bio stays around.
186e71aa913SPavel Begunkov 	 */
187e71aa913SPavel Begunkov 	bio_get(bio);
188e71aa913SPavel Begunkov 
18909ce8744SJens Axboe 	is_sync = is_sync_kiocb(iocb);
19009ce8744SJens Axboe 	if (is_sync) {
19109ce8744SJens Axboe 		dio->flags = DIO_IS_SYNC;
192cd82cca7SChristoph Hellwig 		dio->waiter = current;
193cd82cca7SChristoph Hellwig 	} else {
19409ce8744SJens Axboe 		dio->flags = 0;
195cd82cca7SChristoph Hellwig 		dio->iocb = iocb;
196cd82cca7SChristoph Hellwig 	}
197cd82cca7SChristoph Hellwig 
198cd82cca7SChristoph Hellwig 	dio->size = 0;
199fcb14cb1SAl Viro 	if (is_read && user_backed_iter(iter))
20009ce8744SJens Axboe 		dio->flags |= DIO_SHOULD_DIRTY;
201cd82cca7SChristoph Hellwig 
202cd82cca7SChristoph Hellwig 	blk_start_plug(&plug);
203cd82cca7SChristoph Hellwig 
204cd82cca7SChristoph Hellwig 	for (;;) {
2056549a874SPavel Begunkov 		bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
206cd82cca7SChristoph Hellwig 		bio->bi_private = dio;
207cd82cca7SChristoph Hellwig 		bio->bi_end_io = blkdev_bio_end_io;
208cd82cca7SChristoph Hellwig 		bio->bi_ioprio = iocb->ki_ioprio;
209cd82cca7SChristoph Hellwig 
210cd82cca7SChristoph Hellwig 		ret = bio_iov_iter_get_pages(bio, iter);
211cd82cca7SChristoph Hellwig 		if (unlikely(ret)) {
212cd82cca7SChristoph Hellwig 			bio->bi_status = BLK_STS_IOERR;
213cd82cca7SChristoph Hellwig 			bio_endio(bio);
214cd82cca7SChristoph Hellwig 			break;
215cd82cca7SChristoph Hellwig 		}
21667d59247SJens Axboe 		if (iocb->ki_flags & IOCB_NOWAIT) {
21767d59247SJens Axboe 			/*
21867d59247SJens Axboe 			 * This is nonblocking IO, and we need to allocate
21967d59247SJens Axboe 			 * another bio if we have data left to map. As we
22067d59247SJens Axboe 			 * cannot guarantee that one of the sub bios will not
22167d59247SJens Axboe 			 * fail getting issued FOR NOWAIT and as error results
22267d59247SJens Axboe 			 * are coalesced across all of them, be safe and ask for
22367d59247SJens Axboe 			 * a retry of this from blocking context.
22467d59247SJens Axboe 			 */
22567d59247SJens Axboe 			if (unlikely(iov_iter_count(iter))) {
22667d59247SJens Axboe 				bio_release_pages(bio, false);
22767d59247SJens Axboe 				bio_clear_flag(bio, BIO_REFFED);
22867d59247SJens Axboe 				bio_put(bio);
22967d59247SJens Axboe 				blk_finish_plug(&plug);
23067d59247SJens Axboe 				return -EAGAIN;
23167d59247SJens Axboe 			}
23267d59247SJens Axboe 			bio->bi_opf |= REQ_NOWAIT;
23367d59247SJens Axboe 		}
234cd82cca7SChristoph Hellwig 
235cd82cca7SChristoph Hellwig 		if (is_read) {
23609ce8744SJens Axboe 			if (dio->flags & DIO_SHOULD_DIRTY)
237cd82cca7SChristoph Hellwig 				bio_set_pages_dirty(bio);
238cd82cca7SChristoph Hellwig 		} else {
239cd82cca7SChristoph Hellwig 			task_io_account_write(bio->bi_iter.bi_size);
240cd82cca7SChristoph Hellwig 		}
241cd82cca7SChristoph Hellwig 		dio->size += bio->bi_iter.bi_size;
242cd82cca7SChristoph Hellwig 		pos += bio->bi_iter.bi_size;
243cd82cca7SChristoph Hellwig 
244cd82cca7SChristoph Hellwig 		nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS);
245cd82cca7SChristoph Hellwig 		if (!nr_pages) {
2463e08773cSChristoph Hellwig 			submit_bio(bio);
247cd82cca7SChristoph Hellwig 			break;
248cd82cca7SChristoph Hellwig 		}
249cd82cca7SChristoph Hellwig 		atomic_inc(&dio->ref);
250cd82cca7SChristoph Hellwig 		submit_bio(bio);
25107888c66SChristoph Hellwig 		bio = bio_alloc(bdev, nr_pages, opf, GFP_KERNEL);
252cd82cca7SChristoph Hellwig 	}
253cd82cca7SChristoph Hellwig 
254cd82cca7SChristoph Hellwig 	blk_finish_plug(&plug);
255cd82cca7SChristoph Hellwig 
256cd82cca7SChristoph Hellwig 	if (!is_sync)
257cd82cca7SChristoph Hellwig 		return -EIOCBQUEUED;
258cd82cca7SChristoph Hellwig 
259cd82cca7SChristoph Hellwig 	for (;;) {
260cd82cca7SChristoph Hellwig 		set_current_state(TASK_UNINTERRUPTIBLE);
261cd82cca7SChristoph Hellwig 		if (!READ_ONCE(dio->waiter))
262cd82cca7SChristoph Hellwig 			break;
263cd82cca7SChristoph Hellwig 		blk_io_schedule();
264cd82cca7SChristoph Hellwig 	}
265cd82cca7SChristoph Hellwig 	__set_current_state(TASK_RUNNING);
266cd82cca7SChristoph Hellwig 
267cd82cca7SChristoph Hellwig 	if (!ret)
268cd82cca7SChristoph Hellwig 		ret = blk_status_to_errno(dio->bio.bi_status);
269cd82cca7SChristoph Hellwig 	if (likely(!ret))
270cd82cca7SChristoph Hellwig 		ret = dio->size;
271cd82cca7SChristoph Hellwig 
272cd82cca7SChristoph Hellwig 	bio_put(&dio->bio);
273cd82cca7SChristoph Hellwig 	return ret;
274cd82cca7SChristoph Hellwig }
275cd82cca7SChristoph Hellwig 
blkdev_bio_end_io_async(struct bio * bio)27654a88eb8SPavel Begunkov static void blkdev_bio_end_io_async(struct bio *bio)
27754a88eb8SPavel Begunkov {
27854a88eb8SPavel Begunkov 	struct blkdev_dio *dio = container_of(bio, struct blkdev_dio, bio);
27954a88eb8SPavel Begunkov 	struct kiocb *iocb = dio->iocb;
28054a88eb8SPavel Begunkov 	ssize_t ret;
28154a88eb8SPavel Begunkov 
282bb49c6faSStefano Garzarella 	WRITE_ONCE(iocb->private, NULL);
283bb49c6faSStefano Garzarella 
28454a88eb8SPavel Begunkov 	if (likely(!bio->bi_status)) {
28554a88eb8SPavel Begunkov 		ret = dio->size;
28654a88eb8SPavel Begunkov 		iocb->ki_pos += ret;
28754a88eb8SPavel Begunkov 	} else {
28854a88eb8SPavel Begunkov 		ret = blk_status_to_errno(bio->bi_status);
28954a88eb8SPavel Begunkov 	}
29054a88eb8SPavel Begunkov 
291b6773cdbSLinus Torvalds 	iocb->ki_complete(iocb, ret);
29254a88eb8SPavel Begunkov 
29354a88eb8SPavel Begunkov 	if (dio->flags & DIO_SHOULD_DIRTY) {
29454a88eb8SPavel Begunkov 		bio_check_pages_dirty(bio);
29554a88eb8SPavel Begunkov 	} else {
29654a88eb8SPavel Begunkov 		bio_release_pages(bio, false);
29754a88eb8SPavel Begunkov 		bio_put(bio);
29854a88eb8SPavel Begunkov 	}
29954a88eb8SPavel Begunkov }
30054a88eb8SPavel Begunkov 
__blkdev_direct_IO_async(struct kiocb * iocb,struct iov_iter * iter,unsigned int nr_pages)30154a88eb8SPavel Begunkov static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
30254a88eb8SPavel Begunkov 					struct iov_iter *iter,
30354a88eb8SPavel Begunkov 					unsigned int nr_pages)
30454a88eb8SPavel Begunkov {
3054e762d86SChristoph Hellwig 	struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
306b77c88c2SChristoph Hellwig 	bool is_read = iov_iter_rw(iter) == READ;
30716458cf3SBart Van Assche 	blk_opf_t opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
30854a88eb8SPavel Begunkov 	struct blkdev_dio *dio;
30954a88eb8SPavel Begunkov 	struct bio *bio;
31054a88eb8SPavel Begunkov 	loff_t pos = iocb->ki_pos;
31154a88eb8SPavel Begunkov 	int ret = 0;
31254a88eb8SPavel Begunkov 
31337fee2e4SKeith Busch 	if (blkdev_dio_unaligned(bdev, pos, iter))
31454a88eb8SPavel Begunkov 		return -EINVAL;
31554a88eb8SPavel Begunkov 
3160df71650SMike Snitzer 	if (iocb->ki_flags & IOCB_ALLOC_CACHE)
3170df71650SMike Snitzer 		opf |= REQ_ALLOC_CACHE;
3180df71650SMike Snitzer 	bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL,
3190df71650SMike Snitzer 			       &blkdev_dio_pool);
32054a88eb8SPavel Begunkov 	dio = container_of(bio, struct blkdev_dio, bio);
32154a88eb8SPavel Begunkov 	dio->flags = 0;
32254a88eb8SPavel Begunkov 	dio->iocb = iocb;
32354a88eb8SPavel Begunkov 	bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
32454a88eb8SPavel Begunkov 	bio->bi_end_io = blkdev_bio_end_io_async;
32554a88eb8SPavel Begunkov 	bio->bi_ioprio = iocb->ki_ioprio;
32654a88eb8SPavel Begunkov 
3271bb6b810SPavel Begunkov 	if (iov_iter_is_bvec(iter)) {
3281bb6b810SPavel Begunkov 		/*
3291bb6b810SPavel Begunkov 		 * Users don't rely on the iterator being in any particular
3301bb6b810SPavel Begunkov 		 * state for async I/O returning -EIOCBQUEUED, hence we can
3311bb6b810SPavel Begunkov 		 * avoid expensive iov_iter_advance(). Bypass
3321bb6b810SPavel Begunkov 		 * bio_iov_iter_get_pages() and set the bvec directly.
3331bb6b810SPavel Begunkov 		 */
3341bb6b810SPavel Begunkov 		bio_iov_bvec_set(bio, iter);
3351bb6b810SPavel Begunkov 	} else {
33654a88eb8SPavel Begunkov 		ret = bio_iov_iter_get_pages(bio, iter);
33754a88eb8SPavel Begunkov 		if (unlikely(ret)) {
33875feae73SPavel Begunkov 			bio_put(bio);
33954a88eb8SPavel Begunkov 			return ret;
34054a88eb8SPavel Begunkov 		}
3411bb6b810SPavel Begunkov 	}
34254a88eb8SPavel Begunkov 	dio->size = bio->bi_iter.bi_size;
34354a88eb8SPavel Begunkov 
344b77c88c2SChristoph Hellwig 	if (is_read) {
345fcb14cb1SAl Viro 		if (user_backed_iter(iter)) {
34654a88eb8SPavel Begunkov 			dio->flags |= DIO_SHOULD_DIRTY;
34754a88eb8SPavel Begunkov 			bio_set_pages_dirty(bio);
34854a88eb8SPavel Begunkov 		}
34954a88eb8SPavel Begunkov 	} else {
35054a88eb8SPavel Begunkov 		task_io_account_write(bio->bi_iter.bi_size);
35154a88eb8SPavel Begunkov 	}
35254a88eb8SPavel Begunkov 
3532bc05769SJens Axboe 	if (iocb->ki_flags & IOCB_NOWAIT)
3542bc05769SJens Axboe 		bio->bi_opf |= REQ_NOWAIT;
3552bc05769SJens Axboe 
35654a88eb8SPavel Begunkov 	if (iocb->ki_flags & IOCB_HIPRI) {
3572bc05769SJens Axboe 		bio->bi_opf |= REQ_POLLED;
35854a88eb8SPavel Begunkov 		submit_bio(bio);
35954a88eb8SPavel Begunkov 		WRITE_ONCE(iocb->private, bio);
36054a88eb8SPavel Begunkov 	} else {
36154a88eb8SPavel Begunkov 		submit_bio(bio);
36254a88eb8SPavel Begunkov 	}
36354a88eb8SPavel Begunkov 	return -EIOCBQUEUED;
36454a88eb8SPavel Begunkov }
36554a88eb8SPavel Begunkov 
blkdev_direct_IO(struct kiocb * iocb,struct iov_iter * iter)366cd82cca7SChristoph Hellwig static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
367cd82cca7SChristoph Hellwig {
368cd82cca7SChristoph Hellwig 	unsigned int nr_pages;
369cd82cca7SChristoph Hellwig 
370cd82cca7SChristoph Hellwig 	if (!iov_iter_count(iter))
371cd82cca7SChristoph Hellwig 		return 0;
372cd82cca7SChristoph Hellwig 
373cd82cca7SChristoph Hellwig 	nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
37454a88eb8SPavel Begunkov 	if (likely(nr_pages <= BIO_MAX_VECS)) {
37554a88eb8SPavel Begunkov 		if (is_sync_kiocb(iocb))
376cd82cca7SChristoph Hellwig 			return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
37754a88eb8SPavel Begunkov 		return __blkdev_direct_IO_async(iocb, iter, nr_pages);
37854a88eb8SPavel Begunkov 	}
379cd82cca7SChristoph Hellwig 	return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages));
380cd82cca7SChristoph Hellwig }
381cd82cca7SChristoph Hellwig 
blkdev_iomap_begin(struct inode * inode,loff_t offset,loff_t length,unsigned int flags,struct iomap * iomap,struct iomap * srcmap)382487c607dSChristoph Hellwig static int blkdev_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
383487c607dSChristoph Hellwig 		unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
384487c607dSChristoph Hellwig {
385487c607dSChristoph Hellwig 	struct block_device *bdev = I_BDEV(inode);
386487c607dSChristoph Hellwig 	loff_t isize = i_size_read(inode);
387487c607dSChristoph Hellwig 
388487c607dSChristoph Hellwig 	iomap->bdev = bdev;
389487c607dSChristoph Hellwig 	iomap->offset = ALIGN_DOWN(offset, bdev_logical_block_size(bdev));
39091071792SChristoph Hellwig 	if (offset >= isize)
391487c607dSChristoph Hellwig 		return -EIO;
392487c607dSChristoph Hellwig 	iomap->type = IOMAP_MAPPED;
393487c607dSChristoph Hellwig 	iomap->addr = iomap->offset;
394487c607dSChristoph Hellwig 	iomap->length = isize - iomap->offset;
395925c86a1SChristoph Hellwig 	iomap->flags |= IOMAP_F_BUFFER_HEAD; /* noop for !CONFIG_BUFFER_HEAD */
396487c607dSChristoph Hellwig 	return 0;
397487c607dSChristoph Hellwig }
398487c607dSChristoph Hellwig 
399487c607dSChristoph Hellwig static const struct iomap_ops blkdev_iomap_ops = {
400487c607dSChristoph Hellwig 	.iomap_begin		= blkdev_iomap_begin,
401487c607dSChristoph Hellwig };
402487c607dSChristoph Hellwig 
403925c86a1SChristoph Hellwig #ifdef CONFIG_BUFFER_HEAD
blkdev_get_block(struct inode * inode,sector_t iblock,struct buffer_head * bh,int create)404925c86a1SChristoph Hellwig static int blkdev_get_block(struct inode *inode, sector_t iblock,
405925c86a1SChristoph Hellwig 		struct buffer_head *bh, int create)
406925c86a1SChristoph Hellwig {
407925c86a1SChristoph Hellwig 	bh->b_bdev = I_BDEV(inode);
408925c86a1SChristoph Hellwig 	bh->b_blocknr = iblock;
409925c86a1SChristoph Hellwig 	set_buffer_mapped(bh);
410925c86a1SChristoph Hellwig 	return 0;
411925c86a1SChristoph Hellwig }
412925c86a1SChristoph Hellwig 
blkdev_writepage(struct page * page,struct writeback_control * wbc)413cd82cca7SChristoph Hellwig static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
414cd82cca7SChristoph Hellwig {
415cd82cca7SChristoph Hellwig 	return block_write_full_page(page, blkdev_get_block, wbc);
416cd82cca7SChristoph Hellwig }
417cd82cca7SChristoph Hellwig 
blkdev_read_folio(struct file * file,struct folio * folio)4182c69e205SMatthew Wilcox (Oracle) static int blkdev_read_folio(struct file *file, struct folio *folio)
419cd82cca7SChristoph Hellwig {
4202c69e205SMatthew Wilcox (Oracle) 	return block_read_full_folio(folio, blkdev_get_block);
421cd82cca7SChristoph Hellwig }
422cd82cca7SChristoph Hellwig 
blkdev_readahead(struct readahead_control * rac)423cd82cca7SChristoph Hellwig static void blkdev_readahead(struct readahead_control *rac)
424cd82cca7SChristoph Hellwig {
425cd82cca7SChristoph Hellwig 	mpage_readahead(rac, blkdev_get_block);
426cd82cca7SChristoph Hellwig }
427cd82cca7SChristoph Hellwig 
blkdev_write_begin(struct file * file,struct address_space * mapping,loff_t pos,unsigned len,struct page ** pagep,void ** fsdata)428cd82cca7SChristoph Hellwig static int blkdev_write_begin(struct file *file, struct address_space *mapping,
4299d6b0cd7SMatthew Wilcox (Oracle) 		loff_t pos, unsigned len, struct page **pagep, void **fsdata)
430cd82cca7SChristoph Hellwig {
431b3992d1eSMatthew Wilcox (Oracle) 	return block_write_begin(mapping, pos, len, pagep, blkdev_get_block);
432cd82cca7SChristoph Hellwig }
433cd82cca7SChristoph Hellwig 
blkdev_write_end(struct file * file,struct address_space * mapping,loff_t pos,unsigned len,unsigned copied,struct page * page,void * fsdata)434cd82cca7SChristoph Hellwig static int blkdev_write_end(struct file *file, struct address_space *mapping,
435cd82cca7SChristoph Hellwig 		loff_t pos, unsigned len, unsigned copied, struct page *page,
436cd82cca7SChristoph Hellwig 		void *fsdata)
437cd82cca7SChristoph Hellwig {
438cd82cca7SChristoph Hellwig 	int ret;
439cd82cca7SChristoph Hellwig 	ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
440cd82cca7SChristoph Hellwig 
441cd82cca7SChristoph Hellwig 	unlock_page(page);
442cd82cca7SChristoph Hellwig 	put_page(page);
443cd82cca7SChristoph Hellwig 
444cd82cca7SChristoph Hellwig 	return ret;
445cd82cca7SChristoph Hellwig }
446cd82cca7SChristoph Hellwig 
447cd82cca7SChristoph Hellwig const struct address_space_operations def_blk_aops = {
448e621900aSMatthew Wilcox (Oracle) 	.dirty_folio	= block_dirty_folio,
4497ba13abbSMatthew Wilcox (Oracle) 	.invalidate_folio = block_invalidate_folio,
4502c69e205SMatthew Wilcox (Oracle) 	.read_folio	= blkdev_read_folio,
451cd82cca7SChristoph Hellwig 	.readahead	= blkdev_readahead,
452cd82cca7SChristoph Hellwig 	.writepage	= blkdev_writepage,
453cd82cca7SChristoph Hellwig 	.write_begin	= blkdev_write_begin,
454cd82cca7SChristoph Hellwig 	.write_end	= blkdev_write_end,
45567235182SMatthew Wilcox (Oracle) 	.migrate_folio	= buffer_migrate_folio_norefs,
456cd82cca7SChristoph Hellwig 	.is_dirty_writeback = buffer_check_dirty_writeback,
457cd82cca7SChristoph Hellwig };
458925c86a1SChristoph Hellwig #else /* CONFIG_BUFFER_HEAD */
blkdev_read_folio(struct file * file,struct folio * folio)459925c86a1SChristoph Hellwig static int blkdev_read_folio(struct file *file, struct folio *folio)
460925c86a1SChristoph Hellwig {
461925c86a1SChristoph Hellwig 	return iomap_read_folio(folio, &blkdev_iomap_ops);
462925c86a1SChristoph Hellwig }
463925c86a1SChristoph Hellwig 
blkdev_readahead(struct readahead_control * rac)464925c86a1SChristoph Hellwig static void blkdev_readahead(struct readahead_control *rac)
465925c86a1SChristoph Hellwig {
466925c86a1SChristoph Hellwig 	iomap_readahead(rac, &blkdev_iomap_ops);
467925c86a1SChristoph Hellwig }
468925c86a1SChristoph Hellwig 
blkdev_map_blocks(struct iomap_writepage_ctx * wpc,struct inode * inode,loff_t offset)469925c86a1SChristoph Hellwig static int blkdev_map_blocks(struct iomap_writepage_ctx *wpc,
470925c86a1SChristoph Hellwig 		struct inode *inode, loff_t offset)
471925c86a1SChristoph Hellwig {
472925c86a1SChristoph Hellwig 	loff_t isize = i_size_read(inode);
473925c86a1SChristoph Hellwig 
474925c86a1SChristoph Hellwig 	if (WARN_ON_ONCE(offset >= isize))
475925c86a1SChristoph Hellwig 		return -EIO;
476925c86a1SChristoph Hellwig 	if (offset >= wpc->iomap.offset &&
477925c86a1SChristoph Hellwig 	    offset < wpc->iomap.offset + wpc->iomap.length)
478925c86a1SChristoph Hellwig 		return 0;
479925c86a1SChristoph Hellwig 	return blkdev_iomap_begin(inode, offset, isize - offset,
480925c86a1SChristoph Hellwig 				  IOMAP_WRITE, &wpc->iomap, NULL);
481925c86a1SChristoph Hellwig }
482925c86a1SChristoph Hellwig 
483925c86a1SChristoph Hellwig static const struct iomap_writeback_ops blkdev_writeback_ops = {
484925c86a1SChristoph Hellwig 	.map_blocks		= blkdev_map_blocks,
485925c86a1SChristoph Hellwig };
486925c86a1SChristoph Hellwig 
blkdev_writepages(struct address_space * mapping,struct writeback_control * wbc)487925c86a1SChristoph Hellwig static int blkdev_writepages(struct address_space *mapping,
488925c86a1SChristoph Hellwig 		struct writeback_control *wbc)
489925c86a1SChristoph Hellwig {
490925c86a1SChristoph Hellwig 	struct iomap_writepage_ctx wpc = { };
491925c86a1SChristoph Hellwig 
492925c86a1SChristoph Hellwig 	return iomap_writepages(mapping, wbc, &wpc, &blkdev_writeback_ops);
493925c86a1SChristoph Hellwig }
494925c86a1SChristoph Hellwig 
495925c86a1SChristoph Hellwig const struct address_space_operations def_blk_aops = {
496925c86a1SChristoph Hellwig 	.dirty_folio	= filemap_dirty_folio,
497925c86a1SChristoph Hellwig 	.release_folio		= iomap_release_folio,
498925c86a1SChristoph Hellwig 	.invalidate_folio	= iomap_invalidate_folio,
499925c86a1SChristoph Hellwig 	.read_folio		= blkdev_read_folio,
500925c86a1SChristoph Hellwig 	.readahead		= blkdev_readahead,
501925c86a1SChristoph Hellwig 	.writepages		= blkdev_writepages,
502925c86a1SChristoph Hellwig 	.is_partially_uptodate  = iomap_is_partially_uptodate,
503925c86a1SChristoph Hellwig 	.error_remove_page	= generic_error_remove_page,
504925c86a1SChristoph Hellwig 	.migrate_folio		= filemap_migrate_folio,
505925c86a1SChristoph Hellwig };
506925c86a1SChristoph Hellwig #endif /* CONFIG_BUFFER_HEAD */
507cd82cca7SChristoph Hellwig 
508cd82cca7SChristoph Hellwig /*
509cd82cca7SChristoph Hellwig  * for a block special file file_inode(file)->i_size is zero
510cd82cca7SChristoph Hellwig  * so we compute the size by hand (just as in block_read/write above)
511cd82cca7SChristoph Hellwig  */
blkdev_llseek(struct file * file,loff_t offset,int whence)512cd82cca7SChristoph Hellwig static loff_t blkdev_llseek(struct file *file, loff_t offset, int whence)
513cd82cca7SChristoph Hellwig {
514cd82cca7SChristoph Hellwig 	struct inode *bd_inode = bdev_file_inode(file);
515cd82cca7SChristoph Hellwig 	loff_t retval;
516cd82cca7SChristoph Hellwig 
517cd82cca7SChristoph Hellwig 	inode_lock(bd_inode);
518cd82cca7SChristoph Hellwig 	retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
519cd82cca7SChristoph Hellwig 	inode_unlock(bd_inode);
520cd82cca7SChristoph Hellwig 	return retval;
521cd82cca7SChristoph Hellwig }
522cd82cca7SChristoph Hellwig 
blkdev_fsync(struct file * filp,loff_t start,loff_t end,int datasync)523cd82cca7SChristoph Hellwig static int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
524cd82cca7SChristoph Hellwig 		int datasync)
525cd82cca7SChristoph Hellwig {
5264e762d86SChristoph Hellwig 	struct block_device *bdev = I_BDEV(filp->f_mapping->host);
527cd82cca7SChristoph Hellwig 	int error;
528cd82cca7SChristoph Hellwig 
529cd82cca7SChristoph Hellwig 	error = file_write_and_wait_range(filp, start, end);
530cd82cca7SChristoph Hellwig 	if (error)
531cd82cca7SChristoph Hellwig 		return error;
532cd82cca7SChristoph Hellwig 
533cd82cca7SChristoph Hellwig 	/*
534cd82cca7SChristoph Hellwig 	 * There is no need to serialise calls to blkdev_issue_flush with
535cd82cca7SChristoph Hellwig 	 * i_mutex and doing so causes performance issues with concurrent
536cd82cca7SChristoph Hellwig 	 * O_SYNC writers to a block device.
537cd82cca7SChristoph Hellwig 	 */
538cd82cca7SChristoph Hellwig 	error = blkdev_issue_flush(bdev);
539cd82cca7SChristoph Hellwig 	if (error == -EOPNOTSUPP)
540cd82cca7SChristoph Hellwig 		error = 0;
541cd82cca7SChristoph Hellwig 
542cd82cca7SChristoph Hellwig 	return error;
543cd82cca7SChristoph Hellwig }
544cd82cca7SChristoph Hellwig 
file_to_blk_mode(struct file * file)54505bdb996SChristoph Hellwig blk_mode_t file_to_blk_mode(struct file *file)
54605bdb996SChristoph Hellwig {
54705bdb996SChristoph Hellwig 	blk_mode_t mode = 0;
54805bdb996SChristoph Hellwig 
54905bdb996SChristoph Hellwig 	if (file->f_mode & FMODE_READ)
55005bdb996SChristoph Hellwig 		mode |= BLK_OPEN_READ;
55105bdb996SChristoph Hellwig 	if (file->f_mode & FMODE_WRITE)
55205bdb996SChristoph Hellwig 		mode |= BLK_OPEN_WRITE;
553ee3249a8SChristoph Hellwig 	if (file->private_data)
55405bdb996SChristoph Hellwig 		mode |= BLK_OPEN_EXCL;
55505bdb996SChristoph Hellwig 	if (file->f_flags & O_NDELAY)
55605bdb996SChristoph Hellwig 		mode |= BLK_OPEN_NDELAY;
55705bdb996SChristoph Hellwig 
55805bdb996SChristoph Hellwig 	/*
55905bdb996SChristoph Hellwig 	 * If all bits in O_ACCMODE set (aka O_RDWR | O_WRONLY), the floppy
56005bdb996SChristoph Hellwig 	 * driver has historically allowed ioctls as if the file was opened for
56105bdb996SChristoph Hellwig 	 * writing, but does not allow and actual reads or writes.
56205bdb996SChristoph Hellwig 	 */
56305bdb996SChristoph Hellwig 	if ((file->f_flags & O_ACCMODE) == (O_RDWR | O_WRONLY))
56405bdb996SChristoph Hellwig 		mode |= BLK_OPEN_WRITE_IOCTL;
56505bdb996SChristoph Hellwig 
56605bdb996SChristoph Hellwig 	return mode;
56705bdb996SChristoph Hellwig }
56805bdb996SChristoph Hellwig 
blkdev_open(struct inode * inode,struct file * filp)569cd82cca7SChristoph Hellwig static int blkdev_open(struct inode *inode, struct file *filp)
570cd82cca7SChristoph Hellwig {
571cd82cca7SChristoph Hellwig 	struct block_device *bdev;
572cd82cca7SChristoph Hellwig 
573cd82cca7SChristoph Hellwig 	/*
574cd82cca7SChristoph Hellwig 	 * Preserve backwards compatibility and allow large file access
575cd82cca7SChristoph Hellwig 	 * even if userspace doesn't ask for it explicitly. Some mkfs
576cd82cca7SChristoph Hellwig 	 * binary needs it. We might want to drop this workaround
577cd82cca7SChristoph Hellwig 	 * during an unstable branch.
578cd82cca7SChristoph Hellwig 	 */
579cd82cca7SChristoph Hellwig 	filp->f_flags |= O_LARGEFILE;
580a05f7bd9SChristoph Hellwig 	filp->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT;
581cd82cca7SChristoph Hellwig 
582ee3249a8SChristoph Hellwig 	/*
583ee3249a8SChristoph Hellwig 	 * Use the file private data to store the holder for exclusive openes.
584ee3249a8SChristoph Hellwig 	 * file_to_blk_mode relies on it being present to set BLK_OPEN_EXCL.
585ee3249a8SChristoph Hellwig 	 */
586cd82cca7SChristoph Hellwig 	if (filp->f_flags & O_EXCL)
587ee3249a8SChristoph Hellwig 		filp->private_data = filp;
588cd82cca7SChristoph Hellwig 
58905bdb996SChristoph Hellwig 	bdev = blkdev_get_by_dev(inode->i_rdev, file_to_blk_mode(filp),
590ee3249a8SChristoph Hellwig 				 filp->private_data, NULL);
591cd82cca7SChristoph Hellwig 	if (IS_ERR(bdev))
592cd82cca7SChristoph Hellwig 		return PTR_ERR(bdev);
593fac7c6d5SPavel Begunkov 
594e9833d87SJens Axboe 	if (bdev_nowait(bdev))
595e9833d87SJens Axboe 		filp->f_mode |= FMODE_NOWAIT;
596e9833d87SJens Axboe 
597cd82cca7SChristoph Hellwig 	filp->f_mapping = bdev->bd_inode->i_mapping;
598cd82cca7SChristoph Hellwig 	filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
599cd82cca7SChristoph Hellwig 	return 0;
600cd82cca7SChristoph Hellwig }
601cd82cca7SChristoph Hellwig 
blkdev_release(struct inode * inode,struct file * filp)6027ee34cbcSChristoph Hellwig static int blkdev_release(struct inode *inode, struct file *filp)
603cd82cca7SChristoph Hellwig {
604ee3249a8SChristoph Hellwig 	blkdev_put(I_BDEV(filp->f_mapping->host), filp->private_data);
605cd82cca7SChristoph Hellwig 	return 0;
606cd82cca7SChristoph Hellwig }
607cd82cca7SChristoph Hellwig 
608727cfe97SChristoph Hellwig static ssize_t
blkdev_direct_write(struct kiocb * iocb,struct iov_iter * from)609727cfe97SChristoph Hellwig blkdev_direct_write(struct kiocb *iocb, struct iov_iter *from)
610727cfe97SChristoph Hellwig {
611727cfe97SChristoph Hellwig 	size_t count = iov_iter_count(from);
612727cfe97SChristoph Hellwig 	ssize_t written;
613727cfe97SChristoph Hellwig 
614727cfe97SChristoph Hellwig 	written = kiocb_invalidate_pages(iocb, count);
615727cfe97SChristoph Hellwig 	if (written) {
616727cfe97SChristoph Hellwig 		if (written == -EBUSY)
617727cfe97SChristoph Hellwig 			return 0;
618727cfe97SChristoph Hellwig 		return written;
619727cfe97SChristoph Hellwig 	}
620727cfe97SChristoph Hellwig 
621727cfe97SChristoph Hellwig 	written = blkdev_direct_IO(iocb, from);
622727cfe97SChristoph Hellwig 	if (written > 0) {
623727cfe97SChristoph Hellwig 		kiocb_invalidate_post_direct_write(iocb, count);
624727cfe97SChristoph Hellwig 		iocb->ki_pos += written;
625727cfe97SChristoph Hellwig 		count -= written;
626727cfe97SChristoph Hellwig 	}
627727cfe97SChristoph Hellwig 	if (written != -EIOCBQUEUED)
628727cfe97SChristoph Hellwig 		iov_iter_revert(from, count - iov_iter_count(from));
629727cfe97SChristoph Hellwig 	return written;
630727cfe97SChristoph Hellwig }
631727cfe97SChristoph Hellwig 
blkdev_buffered_write(struct kiocb * iocb,struct iov_iter * from)632487c607dSChristoph Hellwig static ssize_t blkdev_buffered_write(struct kiocb *iocb, struct iov_iter *from)
633487c607dSChristoph Hellwig {
634487c607dSChristoph Hellwig 	return iomap_file_buffered_write(iocb, from, &blkdev_iomap_ops);
635487c607dSChristoph Hellwig }
636487c607dSChristoph Hellwig 
637cd82cca7SChristoph Hellwig /*
638cd82cca7SChristoph Hellwig  * Write data to the block device.  Only intended for the block device itself
639cd82cca7SChristoph Hellwig  * and the raw driver which basically is a fake block device.
640cd82cca7SChristoph Hellwig  *
641cd82cca7SChristoph Hellwig  * Does not take i_mutex for the write and thus is not for general purpose
642cd82cca7SChristoph Hellwig  * use.
643cd82cca7SChristoph Hellwig  */
blkdev_write_iter(struct kiocb * iocb,struct iov_iter * from)644cd82cca7SChristoph Hellwig static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
645cd82cca7SChristoph Hellwig {
646727cfe97SChristoph Hellwig 	struct file *file = iocb->ki_filp;
647727cfe97SChristoph Hellwig 	struct block_device *bdev = I_BDEV(file->f_mapping->host);
648fac7c6d5SPavel Begunkov 	struct inode *bd_inode = bdev->bd_inode;
649138c1a38SJens Axboe 	loff_t size = bdev_nr_bytes(bdev);
650cd82cca7SChristoph Hellwig 	size_t shorted = 0;
651cd82cca7SChristoph Hellwig 	ssize_t ret;
652cd82cca7SChristoph Hellwig 
653fac7c6d5SPavel Begunkov 	if (bdev_read_only(bdev))
654cd82cca7SChristoph Hellwig 		return -EPERM;
655cd82cca7SChristoph Hellwig 
656cd82cca7SChristoph Hellwig 	if (IS_SWAPFILE(bd_inode) && !is_hibernate_resume_dev(bd_inode->i_rdev))
657cd82cca7SChristoph Hellwig 		return -ETXTBSY;
658cd82cca7SChristoph Hellwig 
659cd82cca7SChristoph Hellwig 	if (!iov_iter_count(from))
660cd82cca7SChristoph Hellwig 		return 0;
661cd82cca7SChristoph Hellwig 
662cd82cca7SChristoph Hellwig 	if (iocb->ki_pos >= size)
663cd82cca7SChristoph Hellwig 		return -ENOSPC;
664cd82cca7SChristoph Hellwig 
665cd82cca7SChristoph Hellwig 	if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT)
666cd82cca7SChristoph Hellwig 		return -EOPNOTSUPP;
667cd82cca7SChristoph Hellwig 
668cd82cca7SChristoph Hellwig 	size -= iocb->ki_pos;
669cd82cca7SChristoph Hellwig 	if (iov_iter_count(from) > size) {
670cd82cca7SChristoph Hellwig 		shorted = iov_iter_count(from) - size;
671cd82cca7SChristoph Hellwig 		iov_iter_truncate(from, size);
672cd82cca7SChristoph Hellwig 	}
673cd82cca7SChristoph Hellwig 
674727cfe97SChristoph Hellwig 	ret = file_update_time(file);
675727cfe97SChristoph Hellwig 	if (ret)
676727cfe97SChristoph Hellwig 		return ret;
677727cfe97SChristoph Hellwig 
678727cfe97SChristoph Hellwig 	if (iocb->ki_flags & IOCB_DIRECT) {
679727cfe97SChristoph Hellwig 		ret = blkdev_direct_write(iocb, from);
680727cfe97SChristoph Hellwig 		if (ret >= 0 && iov_iter_count(from))
681727cfe97SChristoph Hellwig 			ret = direct_write_fallback(iocb, from, ret,
682487c607dSChristoph Hellwig 					blkdev_buffered_write(iocb, from));
683727cfe97SChristoph Hellwig 	} else {
684487c607dSChristoph Hellwig 		ret = blkdev_buffered_write(iocb, from);
685727cfe97SChristoph Hellwig 	}
686727cfe97SChristoph Hellwig 
687cd82cca7SChristoph Hellwig 	if (ret > 0)
688cd82cca7SChristoph Hellwig 		ret = generic_write_sync(iocb, ret);
689cd82cca7SChristoph Hellwig 	iov_iter_reexpand(from, iov_iter_count(from) + shorted);
690cd82cca7SChristoph Hellwig 	return ret;
691cd82cca7SChristoph Hellwig }
692cd82cca7SChristoph Hellwig 
blkdev_read_iter(struct kiocb * iocb,struct iov_iter * to)693cd82cca7SChristoph Hellwig static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
694cd82cca7SChristoph Hellwig {
6954e762d86SChristoph Hellwig 	struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
696138c1a38SJens Axboe 	loff_t size = bdev_nr_bytes(bdev);
697cd82cca7SChristoph Hellwig 	loff_t pos = iocb->ki_pos;
698cd82cca7SChristoph Hellwig 	size_t shorted = 0;
699ceaa7625SJens Axboe 	ssize_t ret = 0;
7003e1f941dSIlya Dryomov 	size_t count;
701cd82cca7SChristoph Hellwig 
7023e1f941dSIlya Dryomov 	if (unlikely(pos + iov_iter_count(to) > size)) {
703cd82cca7SChristoph Hellwig 		if (pos >= size)
704cd82cca7SChristoph Hellwig 			return 0;
705cd82cca7SChristoph Hellwig 		size -= pos;
7063e1f941dSIlya Dryomov 		shorted = iov_iter_count(to) - size;
707cd82cca7SChristoph Hellwig 		iov_iter_truncate(to, size);
708cd82cca7SChristoph Hellwig 	}
7093e1f941dSIlya Dryomov 
7103e1f941dSIlya Dryomov 	count = iov_iter_count(to);
7113e1f941dSIlya Dryomov 	if (!count)
7123e1f941dSIlya Dryomov 		goto reexpand; /* skip atime */
713cd82cca7SChristoph Hellwig 
714ceaa7625SJens Axboe 	if (iocb->ki_flags & IOCB_DIRECT) {
7153c435a0fSChristoph Hellwig 		ret = kiocb_write_and_wait(iocb, count);
716ceaa7625SJens Axboe 		if (ret < 0)
7173e1f941dSIlya Dryomov 			goto reexpand;
718ceaa7625SJens Axboe 		file_accessed(iocb->ki_filp);
719ceaa7625SJens Axboe 
720ceaa7625SJens Axboe 		ret = blkdev_direct_IO(iocb, to);
721ceaa7625SJens Axboe 		if (ret >= 0) {
722ceaa7625SJens Axboe 			iocb->ki_pos += ret;
723ceaa7625SJens Axboe 			count -= ret;
724ceaa7625SJens Axboe 		}
7253e1f941dSIlya Dryomov 		iov_iter_revert(to, count - iov_iter_count(to));
726ceaa7625SJens Axboe 		if (ret < 0 || !count)
7273e1f941dSIlya Dryomov 			goto reexpand;
728ceaa7625SJens Axboe 	}
729ceaa7625SJens Axboe 
730ceaa7625SJens Axboe 	ret = filemap_read(iocb, to, ret);
7316450fe1fSPavel Begunkov 
7323e1f941dSIlya Dryomov reexpand:
7336450fe1fSPavel Begunkov 	if (unlikely(shorted))
734cd82cca7SChristoph Hellwig 		iov_iter_reexpand(to, iov_iter_count(to) + shorted);
735cd82cca7SChristoph Hellwig 	return ret;
736cd82cca7SChristoph Hellwig }
737cd82cca7SChristoph Hellwig 
738cd82cca7SChristoph Hellwig #define	BLKDEV_FALLOC_FL_SUPPORTED					\
739cd82cca7SChristoph Hellwig 		(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |		\
740cd82cca7SChristoph Hellwig 		 FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
741cd82cca7SChristoph Hellwig 
blkdev_fallocate(struct file * file,int mode,loff_t start,loff_t len)742cd82cca7SChristoph Hellwig static long blkdev_fallocate(struct file *file, int mode, loff_t start,
743cd82cca7SChristoph Hellwig 			     loff_t len)
744cd82cca7SChristoph Hellwig {
745f278eb3dSMing Lei 	struct inode *inode = bdev_file_inode(file);
746f278eb3dSMing Lei 	struct block_device *bdev = I_BDEV(inode);
747cd82cca7SChristoph Hellwig 	loff_t end = start + len - 1;
748cd82cca7SChristoph Hellwig 	loff_t isize;
749cd82cca7SChristoph Hellwig 	int error;
750cd82cca7SChristoph Hellwig 
751cd82cca7SChristoph Hellwig 	/* Fail if we don't recognize the flags. */
752cd82cca7SChristoph Hellwig 	if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED)
753cd82cca7SChristoph Hellwig 		return -EOPNOTSUPP;
754cd82cca7SChristoph Hellwig 
755cd82cca7SChristoph Hellwig 	/* Don't go off the end of the device. */
7562a93ad8fSChristoph Hellwig 	isize = bdev_nr_bytes(bdev);
757cd82cca7SChristoph Hellwig 	if (start >= isize)
758cd82cca7SChristoph Hellwig 		return -EINVAL;
759cd82cca7SChristoph Hellwig 	if (end >= isize) {
760cd82cca7SChristoph Hellwig 		if (mode & FALLOC_FL_KEEP_SIZE) {
761cd82cca7SChristoph Hellwig 			len = isize - start;
762cd82cca7SChristoph Hellwig 			end = start + len - 1;
763cd82cca7SChristoph Hellwig 		} else
764cd82cca7SChristoph Hellwig 			return -EINVAL;
765cd82cca7SChristoph Hellwig 	}
766cd82cca7SChristoph Hellwig 
767cd82cca7SChristoph Hellwig 	/*
768cd82cca7SChristoph Hellwig 	 * Don't allow IO that isn't aligned to logical block size.
769cd82cca7SChristoph Hellwig 	 */
770cd82cca7SChristoph Hellwig 	if ((start | len) & (bdev_logical_block_size(bdev) - 1))
771cd82cca7SChristoph Hellwig 		return -EINVAL;
772cd82cca7SChristoph Hellwig 
773f278eb3dSMing Lei 	filemap_invalidate_lock(inode->i_mapping);
774f278eb3dSMing Lei 
7751364a3c3SSarthak Kukreti 	/*
7761364a3c3SSarthak Kukreti 	 * Invalidate the page cache, including dirty pages, for valid
7771364a3c3SSarthak Kukreti 	 * de-allocate mode calls to fallocate().
7781364a3c3SSarthak Kukreti 	 */
7791364a3c3SSarthak Kukreti 	switch (mode) {
7801364a3c3SSarthak Kukreti 	case FALLOC_FL_ZERO_RANGE:
7811364a3c3SSarthak Kukreti 	case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
78205bdb996SChristoph Hellwig 		error = truncate_bdev_range(bdev, file_to_blk_mode(file), start, end);
783cd82cca7SChristoph Hellwig 		if (error)
784f278eb3dSMing Lei 			goto fail;
785cd82cca7SChristoph Hellwig 
7866549a874SPavel Begunkov 		error = blkdev_issue_zeroout(bdev, start >> SECTOR_SHIFT,
7876549a874SPavel Begunkov 					     len >> SECTOR_SHIFT, GFP_KERNEL,
7886549a874SPavel Begunkov 					     BLKDEV_ZERO_NOUNMAP);
789cd82cca7SChristoph Hellwig 		break;
790cd82cca7SChristoph Hellwig 	case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
7911364a3c3SSarthak Kukreti 		error = truncate_bdev_range(bdev, file_to_blk_mode(file), start, end);
7921364a3c3SSarthak Kukreti 		if (error)
7931364a3c3SSarthak Kukreti 			goto fail;
7941364a3c3SSarthak Kukreti 
7956549a874SPavel Begunkov 		error = blkdev_issue_zeroout(bdev, start >> SECTOR_SHIFT,
7966549a874SPavel Begunkov 					     len >> SECTOR_SHIFT, GFP_KERNEL,
7976549a874SPavel Begunkov 					     BLKDEV_ZERO_NOFALLBACK);
798cd82cca7SChristoph Hellwig 		break;
799cd82cca7SChristoph Hellwig 	case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
8001364a3c3SSarthak Kukreti 		error = truncate_bdev_range(bdev, file_to_blk_mode(file), start, end);
8011364a3c3SSarthak Kukreti 		if (error)
8021364a3c3SSarthak Kukreti 			goto fail;
8031364a3c3SSarthak Kukreti 
8046549a874SPavel Begunkov 		error = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
80544abff2cSChristoph Hellwig 					     len >> SECTOR_SHIFT, GFP_KERNEL);
806cd82cca7SChristoph Hellwig 		break;
807cd82cca7SChristoph Hellwig 	default:
808f278eb3dSMing Lei 		error = -EOPNOTSUPP;
809cd82cca7SChristoph Hellwig 	}
810cd82cca7SChristoph Hellwig 
811f278eb3dSMing Lei  fail:
812f278eb3dSMing Lei 	filemap_invalidate_unlock(inode->i_mapping);
813f278eb3dSMing Lei 	return error;
814cd82cca7SChristoph Hellwig }
815cd82cca7SChristoph Hellwig 
blkdev_mmap(struct file * file,struct vm_area_struct * vma)81669baa3a6SLoic Poulain static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
81769baa3a6SLoic Poulain {
81869baa3a6SLoic Poulain 	struct inode *bd_inode = bdev_file_inode(file);
81969baa3a6SLoic Poulain 
82069baa3a6SLoic Poulain 	if (bdev_read_only(I_BDEV(bd_inode)))
82169baa3a6SLoic Poulain 		return generic_file_readonly_mmap(file, vma);
82269baa3a6SLoic Poulain 
82369baa3a6SLoic Poulain 	return generic_file_mmap(file, vma);
82469baa3a6SLoic Poulain }
82569baa3a6SLoic Poulain 
826cd82cca7SChristoph Hellwig const struct file_operations def_blk_fops = {
827cd82cca7SChristoph Hellwig 	.open		= blkdev_open,
8287ee34cbcSChristoph Hellwig 	.release	= blkdev_release,
829cd82cca7SChristoph Hellwig 	.llseek		= blkdev_llseek,
830cd82cca7SChristoph Hellwig 	.read_iter	= blkdev_read_iter,
831cd82cca7SChristoph Hellwig 	.write_iter	= blkdev_write_iter,
8323e08773cSChristoph Hellwig 	.iopoll		= iocb_bio_iopoll,
83369baa3a6SLoic Poulain 	.mmap		= blkdev_mmap,
834cd82cca7SChristoph Hellwig 	.fsync		= blkdev_fsync,
8358a709512SChristoph Hellwig 	.unlocked_ioctl	= blkdev_ioctl,
836cd82cca7SChristoph Hellwig #ifdef CONFIG_COMPAT
837cd82cca7SChristoph Hellwig 	.compat_ioctl	= compat_blkdev_ioctl,
838cd82cca7SChristoph Hellwig #endif
8392cb1e089SDavid Howells 	.splice_read	= filemap_splice_read,
840cd82cca7SChristoph Hellwig 	.splice_write	= iter_file_splice_write,
841cd82cca7SChristoph Hellwig 	.fallocate	= blkdev_fallocate,
842cd82cca7SChristoph Hellwig };
843cd82cca7SChristoph Hellwig 
blkdev_init(void)844cd82cca7SChristoph Hellwig static __init int blkdev_init(void)
845cd82cca7SChristoph Hellwig {
846cd82cca7SChristoph Hellwig 	return bioset_init(&blkdev_dio_pool, 4,
847cd82cca7SChristoph Hellwig 				offsetof(struct blkdev_dio, bio),
848cd82cca7SChristoph Hellwig 				BIOSET_NEED_BVECS|BIOSET_PERCPU_CACHE);
849cd82cca7SChristoph Hellwig }
850cd82cca7SChristoph Hellwig module_init(blkdev_init);
851