xref: /openbmc/linux/block/fops.c (revision ec5c05e5ac8bcb4a6bcd92970e15494a85400d34)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * Copyright (C) 1991, 1992  Linus Torvalds
4   * Copyright (C) 2001  Andrea Arcangeli <andrea@suse.de> SuSE
5   * Copyright (C) 2016 - 2020 Christoph Hellwig
6   */
7  #include <linux/init.h>
8  #include <linux/mm.h>
9  #include <linux/blkdev.h>
10  #include <linux/buffer_head.h>
11  #include <linux/mpage.h>
12  #include <linux/uio.h>
13  #include <linux/namei.h>
14  #include <linux/task_io_accounting_ops.h>
15  #include <linux/falloc.h>
16  #include <linux/suspend.h>
17  #include <linux/fs.h>
18  #include <linux/module.h>
19  #include "blk.h"
20  
21  static inline struct inode *bdev_file_inode(struct file *file)
22  {
23  	return file->f_mapping->host;
24  }
25  
26  static int blkdev_get_block(struct inode *inode, sector_t iblock,
27  		struct buffer_head *bh, int create)
28  {
29  	bh->b_bdev = I_BDEV(inode);
30  	bh->b_blocknr = iblock;
31  	set_buffer_mapped(bh);
32  	return 0;
33  }
34  
35  static blk_opf_t dio_bio_write_op(struct kiocb *iocb)
36  {
37  	blk_opf_t opf = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
38  
39  	/* avoid the need for a I/O completion work item */
40  	if (iocb_is_dsync(iocb))
41  		opf |= REQ_FUA;
42  	return opf;
43  }
44  
45  static bool blkdev_dio_unaligned(struct block_device *bdev, loff_t pos,
46  			      struct iov_iter *iter)
47  {
48  	return pos & (bdev_logical_block_size(bdev) - 1) ||
49  		!bdev_iter_is_aligned(bdev, iter);
50  }
51  
52  #define DIO_INLINE_BIO_VECS 4
53  
54  static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
55  		struct iov_iter *iter, unsigned int nr_pages)
56  {
57  	struct block_device *bdev = iocb->ki_filp->private_data;
58  	struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs;
59  	loff_t pos = iocb->ki_pos;
60  	bool should_dirty = false;
61  	struct bio bio;
62  	ssize_t ret;
63  
64  	if (blkdev_dio_unaligned(bdev, pos, iter))
65  		return -EINVAL;
66  
67  	if (nr_pages <= DIO_INLINE_BIO_VECS)
68  		vecs = inline_vecs;
69  	else {
70  		vecs = kmalloc_array(nr_pages, sizeof(struct bio_vec),
71  				     GFP_KERNEL);
72  		if (!vecs)
73  			return -ENOMEM;
74  	}
75  
76  	if (iov_iter_rw(iter) == READ) {
77  		bio_init(&bio, bdev, vecs, nr_pages, REQ_OP_READ);
78  		if (user_backed_iter(iter))
79  			should_dirty = true;
80  	} else {
81  		bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb));
82  	}
83  	bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
84  	bio.bi_ioprio = iocb->ki_ioprio;
85  
86  	ret = bio_iov_iter_get_pages(&bio, iter);
87  	if (unlikely(ret))
88  		goto out;
89  	ret = bio.bi_iter.bi_size;
90  
91  	if (iov_iter_rw(iter) == WRITE)
92  		task_io_account_write(ret);
93  
94  	if (iocb->ki_flags & IOCB_NOWAIT)
95  		bio.bi_opf |= REQ_NOWAIT;
96  
97  	submit_bio_wait(&bio);
98  
99  	bio_release_pages(&bio, should_dirty);
100  	if (unlikely(bio.bi_status))
101  		ret = blk_status_to_errno(bio.bi_status);
102  
103  out:
104  	if (vecs != inline_vecs)
105  		kfree(vecs);
106  
107  	bio_uninit(&bio);
108  
109  	return ret;
110  }
111  
112  enum {
113  	DIO_SHOULD_DIRTY	= 1,
114  	DIO_IS_SYNC		= 2,
115  };
116  
117  struct blkdev_dio {
118  	union {
119  		struct kiocb		*iocb;
120  		struct task_struct	*waiter;
121  	};
122  	size_t			size;
123  	atomic_t		ref;
124  	unsigned int		flags;
125  	struct bio		bio ____cacheline_aligned_in_smp;
126  };
127  
128  static struct bio_set blkdev_dio_pool;
129  
130  static void blkdev_bio_end_io(struct bio *bio)
131  {
132  	struct blkdev_dio *dio = bio->bi_private;
133  	bool should_dirty = dio->flags & DIO_SHOULD_DIRTY;
134  
135  	if (bio->bi_status && !dio->bio.bi_status)
136  		dio->bio.bi_status = bio->bi_status;
137  
138  	if (atomic_dec_and_test(&dio->ref)) {
139  		if (!(dio->flags & DIO_IS_SYNC)) {
140  			struct kiocb *iocb = dio->iocb;
141  			ssize_t ret;
142  
143  			WRITE_ONCE(iocb->private, NULL);
144  
145  			if (likely(!dio->bio.bi_status)) {
146  				ret = dio->size;
147  				iocb->ki_pos += ret;
148  			} else {
149  				ret = blk_status_to_errno(dio->bio.bi_status);
150  			}
151  
152  			dio->iocb->ki_complete(iocb, ret);
153  			bio_put(&dio->bio);
154  		} else {
155  			struct task_struct *waiter = dio->waiter;
156  
157  			WRITE_ONCE(dio->waiter, NULL);
158  			blk_wake_io_task(waiter);
159  		}
160  	}
161  
162  	if (should_dirty) {
163  		bio_check_pages_dirty(bio);
164  	} else {
165  		bio_release_pages(bio, false);
166  		bio_put(bio);
167  	}
168  }
169  
170  static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
171  		unsigned int nr_pages)
172  {
173  	struct block_device *bdev = iocb->ki_filp->private_data;
174  	struct blk_plug plug;
175  	struct blkdev_dio *dio;
176  	struct bio *bio;
177  	bool is_read = (iov_iter_rw(iter) == READ), is_sync;
178  	blk_opf_t opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
179  	loff_t pos = iocb->ki_pos;
180  	int ret = 0;
181  
182  	if (blkdev_dio_unaligned(bdev, pos, iter))
183  		return -EINVAL;
184  
185  	if (iocb->ki_flags & IOCB_ALLOC_CACHE)
186  		opf |= REQ_ALLOC_CACHE;
187  	bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL,
188  			       &blkdev_dio_pool);
189  	dio = container_of(bio, struct blkdev_dio, bio);
190  	atomic_set(&dio->ref, 1);
191  	/*
192  	 * Grab an extra reference to ensure the dio structure which is embedded
193  	 * into the first bio stays around.
194  	 */
195  	bio_get(bio);
196  
197  	is_sync = is_sync_kiocb(iocb);
198  	if (is_sync) {
199  		dio->flags = DIO_IS_SYNC;
200  		dio->waiter = current;
201  	} else {
202  		dio->flags = 0;
203  		dio->iocb = iocb;
204  	}
205  
206  	dio->size = 0;
207  	if (is_read && user_backed_iter(iter))
208  		dio->flags |= DIO_SHOULD_DIRTY;
209  
210  	blk_start_plug(&plug);
211  
212  	for (;;) {
213  		bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
214  		bio->bi_private = dio;
215  		bio->bi_end_io = blkdev_bio_end_io;
216  		bio->bi_ioprio = iocb->ki_ioprio;
217  
218  		ret = bio_iov_iter_get_pages(bio, iter);
219  		if (unlikely(ret)) {
220  			bio->bi_status = BLK_STS_IOERR;
221  			bio_endio(bio);
222  			break;
223  		}
224  
225  		if (is_read) {
226  			if (dio->flags & DIO_SHOULD_DIRTY)
227  				bio_set_pages_dirty(bio);
228  		} else {
229  			task_io_account_write(bio->bi_iter.bi_size);
230  		}
231  		if (iocb->ki_flags & IOCB_NOWAIT)
232  			bio->bi_opf |= REQ_NOWAIT;
233  
234  		dio->size += bio->bi_iter.bi_size;
235  		pos += bio->bi_iter.bi_size;
236  
237  		nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS);
238  		if (!nr_pages) {
239  			submit_bio(bio);
240  			break;
241  		}
242  		atomic_inc(&dio->ref);
243  		submit_bio(bio);
244  		bio = bio_alloc(bdev, nr_pages, opf, GFP_KERNEL);
245  	}
246  
247  	blk_finish_plug(&plug);
248  
249  	if (!is_sync)
250  		return -EIOCBQUEUED;
251  
252  	for (;;) {
253  		set_current_state(TASK_UNINTERRUPTIBLE);
254  		if (!READ_ONCE(dio->waiter))
255  			break;
256  		blk_io_schedule();
257  	}
258  	__set_current_state(TASK_RUNNING);
259  
260  	if (!ret)
261  		ret = blk_status_to_errno(dio->bio.bi_status);
262  	if (likely(!ret))
263  		ret = dio->size;
264  
265  	bio_put(&dio->bio);
266  	return ret;
267  }
268  
269  static void blkdev_bio_end_io_async(struct bio *bio)
270  {
271  	struct blkdev_dio *dio = container_of(bio, struct blkdev_dio, bio);
272  	struct kiocb *iocb = dio->iocb;
273  	ssize_t ret;
274  
275  	WRITE_ONCE(iocb->private, NULL);
276  
277  	if (likely(!bio->bi_status)) {
278  		ret = dio->size;
279  		iocb->ki_pos += ret;
280  	} else {
281  		ret = blk_status_to_errno(bio->bi_status);
282  	}
283  
284  	iocb->ki_complete(iocb, ret);
285  
286  	if (dio->flags & DIO_SHOULD_DIRTY) {
287  		bio_check_pages_dirty(bio);
288  	} else {
289  		bio_release_pages(bio, false);
290  		bio_put(bio);
291  	}
292  }
293  
294  static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
295  					struct iov_iter *iter,
296  					unsigned int nr_pages)
297  {
298  	struct block_device *bdev = iocb->ki_filp->private_data;
299  	bool is_read = iov_iter_rw(iter) == READ;
300  	blk_opf_t opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
301  	struct blkdev_dio *dio;
302  	struct bio *bio;
303  	loff_t pos = iocb->ki_pos;
304  	int ret = 0;
305  
306  	if (blkdev_dio_unaligned(bdev, pos, iter))
307  		return -EINVAL;
308  
309  	if (iocb->ki_flags & IOCB_ALLOC_CACHE)
310  		opf |= REQ_ALLOC_CACHE;
311  	bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL,
312  			       &blkdev_dio_pool);
313  	dio = container_of(bio, struct blkdev_dio, bio);
314  	dio->flags = 0;
315  	dio->iocb = iocb;
316  	bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
317  	bio->bi_end_io = blkdev_bio_end_io_async;
318  	bio->bi_ioprio = iocb->ki_ioprio;
319  
320  	if (iov_iter_is_bvec(iter)) {
321  		/*
322  		 * Users don't rely on the iterator being in any particular
323  		 * state for async I/O returning -EIOCBQUEUED, hence we can
324  		 * avoid expensive iov_iter_advance(). Bypass
325  		 * bio_iov_iter_get_pages() and set the bvec directly.
326  		 */
327  		bio_iov_bvec_set(bio, iter);
328  	} else {
329  		ret = bio_iov_iter_get_pages(bio, iter);
330  		if (unlikely(ret)) {
331  			bio_put(bio);
332  			return ret;
333  		}
334  	}
335  	dio->size = bio->bi_iter.bi_size;
336  
337  	if (is_read) {
338  		if (user_backed_iter(iter)) {
339  			dio->flags |= DIO_SHOULD_DIRTY;
340  			bio_set_pages_dirty(bio);
341  		}
342  	} else {
343  		task_io_account_write(bio->bi_iter.bi_size);
344  	}
345  
346  	if (iocb->ki_flags & IOCB_HIPRI) {
347  		bio->bi_opf |= REQ_POLLED | REQ_NOWAIT;
348  		submit_bio(bio);
349  		WRITE_ONCE(iocb->private, bio);
350  	} else {
351  		if (iocb->ki_flags & IOCB_NOWAIT)
352  			bio->bi_opf |= REQ_NOWAIT;
353  		submit_bio(bio);
354  	}
355  	return -EIOCBQUEUED;
356  }
357  
358  static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
359  {
360  	unsigned int nr_pages;
361  
362  	if (!iov_iter_count(iter))
363  		return 0;
364  
365  	nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
366  	if (likely(nr_pages <= BIO_MAX_VECS)) {
367  		if (is_sync_kiocb(iocb))
368  			return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
369  		return __blkdev_direct_IO_async(iocb, iter, nr_pages);
370  	}
371  	return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages));
372  }
373  
374  static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
375  {
376  	return block_write_full_page(page, blkdev_get_block, wbc);
377  }
378  
379  static int blkdev_read_folio(struct file *file, struct folio *folio)
380  {
381  	return block_read_full_folio(folio, blkdev_get_block);
382  }
383  
384  static void blkdev_readahead(struct readahead_control *rac)
385  {
386  	mpage_readahead(rac, blkdev_get_block);
387  }
388  
389  static int blkdev_write_begin(struct file *file, struct address_space *mapping,
390  		loff_t pos, unsigned len, struct page **pagep, void **fsdata)
391  {
392  	return block_write_begin(mapping, pos, len, pagep, blkdev_get_block);
393  }
394  
395  static int blkdev_write_end(struct file *file, struct address_space *mapping,
396  		loff_t pos, unsigned len, unsigned copied, struct page *page,
397  		void *fsdata)
398  {
399  	int ret;
400  	ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
401  
402  	unlock_page(page);
403  	put_page(page);
404  
405  	return ret;
406  }
407  
408  const struct address_space_operations def_blk_aops = {
409  	.dirty_folio	= block_dirty_folio,
410  	.invalidate_folio = block_invalidate_folio,
411  	.read_folio	= blkdev_read_folio,
412  	.readahead	= blkdev_readahead,
413  	.writepage	= blkdev_writepage,
414  	.write_begin	= blkdev_write_begin,
415  	.write_end	= blkdev_write_end,
416  	.direct_IO	= blkdev_direct_IO,
417  	.migrate_folio	= buffer_migrate_folio_norefs,
418  	.is_dirty_writeback = buffer_check_dirty_writeback,
419  };
420  
421  /*
422   * for a block special file file_inode(file)->i_size is zero
423   * so we compute the size by hand (just as in block_read/write above)
424   */
425  static loff_t blkdev_llseek(struct file *file, loff_t offset, int whence)
426  {
427  	struct inode *bd_inode = bdev_file_inode(file);
428  	loff_t retval;
429  
430  	inode_lock(bd_inode);
431  	retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
432  	inode_unlock(bd_inode);
433  	return retval;
434  }
435  
436  static int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
437  		int datasync)
438  {
439  	struct block_device *bdev = filp->private_data;
440  	int error;
441  
442  	error = file_write_and_wait_range(filp, start, end);
443  	if (error)
444  		return error;
445  
446  	/*
447  	 * There is no need to serialise calls to blkdev_issue_flush with
448  	 * i_mutex and doing so causes performance issues with concurrent
449  	 * O_SYNC writers to a block device.
450  	 */
451  	error = blkdev_issue_flush(bdev);
452  	if (error == -EOPNOTSUPP)
453  		error = 0;
454  
455  	return error;
456  }
457  
458  static int blkdev_open(struct inode *inode, struct file *filp)
459  {
460  	struct block_device *bdev;
461  
462  	/*
463  	 * Preserve backwards compatibility and allow large file access
464  	 * even if userspace doesn't ask for it explicitly. Some mkfs
465  	 * binary needs it. We might want to drop this workaround
466  	 * during an unstable branch.
467  	 */
468  	filp->f_flags |= O_LARGEFILE;
469  	filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
470  
471  	if (filp->f_flags & O_NDELAY)
472  		filp->f_mode |= FMODE_NDELAY;
473  	if (filp->f_flags & O_EXCL)
474  		filp->f_mode |= FMODE_EXCL;
475  	if ((filp->f_flags & O_ACCMODE) == 3)
476  		filp->f_mode |= FMODE_WRITE_IOCTL;
477  
478  	bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp);
479  	if (IS_ERR(bdev))
480  		return PTR_ERR(bdev);
481  
482  	filp->private_data = bdev;
483  	filp->f_mapping = bdev->bd_inode->i_mapping;
484  	filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
485  	return 0;
486  }
487  
488  static int blkdev_close(struct inode *inode, struct file *filp)
489  {
490  	struct block_device *bdev = filp->private_data;
491  
492  	blkdev_put(bdev, filp->f_mode);
493  	return 0;
494  }
495  
496  /*
497   * Write data to the block device.  Only intended for the block device itself
498   * and the raw driver which basically is a fake block device.
499   *
500   * Does not take i_mutex for the write and thus is not for general purpose
501   * use.
502   */
503  static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
504  {
505  	struct block_device *bdev = iocb->ki_filp->private_data;
506  	struct inode *bd_inode = bdev->bd_inode;
507  	loff_t size = bdev_nr_bytes(bdev);
508  	struct blk_plug plug;
509  	size_t shorted = 0;
510  	ssize_t ret;
511  
512  	if (bdev_read_only(bdev))
513  		return -EPERM;
514  
515  	if (IS_SWAPFILE(bd_inode) && !is_hibernate_resume_dev(bd_inode->i_rdev))
516  		return -ETXTBSY;
517  
518  	if (!iov_iter_count(from))
519  		return 0;
520  
521  	if (iocb->ki_pos >= size)
522  		return -ENOSPC;
523  
524  	if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT)
525  		return -EOPNOTSUPP;
526  
527  	size -= iocb->ki_pos;
528  	if (iov_iter_count(from) > size) {
529  		shorted = iov_iter_count(from) - size;
530  		iov_iter_truncate(from, size);
531  	}
532  
533  	blk_start_plug(&plug);
534  	ret = __generic_file_write_iter(iocb, from);
535  	if (ret > 0)
536  		ret = generic_write_sync(iocb, ret);
537  	iov_iter_reexpand(from, iov_iter_count(from) + shorted);
538  	blk_finish_plug(&plug);
539  	return ret;
540  }
541  
542  static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
543  {
544  	struct block_device *bdev = iocb->ki_filp->private_data;
545  	loff_t size = bdev_nr_bytes(bdev);
546  	loff_t pos = iocb->ki_pos;
547  	size_t shorted = 0;
548  	ssize_t ret = 0;
549  	size_t count;
550  
551  	if (unlikely(pos + iov_iter_count(to) > size)) {
552  		if (pos >= size)
553  			return 0;
554  		size -= pos;
555  		shorted = iov_iter_count(to) - size;
556  		iov_iter_truncate(to, size);
557  	}
558  
559  	count = iov_iter_count(to);
560  	if (!count)
561  		goto reexpand; /* skip atime */
562  
563  	if (iocb->ki_flags & IOCB_DIRECT) {
564  		struct address_space *mapping = iocb->ki_filp->f_mapping;
565  
566  		if (iocb->ki_flags & IOCB_NOWAIT) {
567  			if (filemap_range_needs_writeback(mapping, pos,
568  							  pos + count - 1)) {
569  				ret = -EAGAIN;
570  				goto reexpand;
571  			}
572  		} else {
573  			ret = filemap_write_and_wait_range(mapping, pos,
574  							   pos + count - 1);
575  			if (ret < 0)
576  				goto reexpand;
577  		}
578  
579  		file_accessed(iocb->ki_filp);
580  
581  		ret = blkdev_direct_IO(iocb, to);
582  		if (ret >= 0) {
583  			iocb->ki_pos += ret;
584  			count -= ret;
585  		}
586  		iov_iter_revert(to, count - iov_iter_count(to));
587  		if (ret < 0 || !count)
588  			goto reexpand;
589  	}
590  
591  	ret = filemap_read(iocb, to, ret);
592  
593  reexpand:
594  	if (unlikely(shorted))
595  		iov_iter_reexpand(to, iov_iter_count(to) + shorted);
596  	return ret;
597  }
598  
599  #define	BLKDEV_FALLOC_FL_SUPPORTED					\
600  		(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |		\
601  		 FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
602  
603  static long blkdev_fallocate(struct file *file, int mode, loff_t start,
604  			     loff_t len)
605  {
606  	struct inode *inode = bdev_file_inode(file);
607  	struct block_device *bdev = I_BDEV(inode);
608  	loff_t end = start + len - 1;
609  	loff_t isize;
610  	int error;
611  
612  	/* Fail if we don't recognize the flags. */
613  	if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED)
614  		return -EOPNOTSUPP;
615  
616  	/* Don't go off the end of the device. */
617  	isize = bdev_nr_bytes(bdev);
618  	if (start >= isize)
619  		return -EINVAL;
620  	if (end >= isize) {
621  		if (mode & FALLOC_FL_KEEP_SIZE) {
622  			len = isize - start;
623  			end = start + len - 1;
624  		} else
625  			return -EINVAL;
626  	}
627  
628  	/*
629  	 * Don't allow IO that isn't aligned to logical block size.
630  	 */
631  	if ((start | len) & (bdev_logical_block_size(bdev) - 1))
632  		return -EINVAL;
633  
634  	filemap_invalidate_lock(inode->i_mapping);
635  
636  	/* Invalidate the page cache, including dirty pages. */
637  	error = truncate_bdev_range(bdev, file->f_mode, start, end);
638  	if (error)
639  		goto fail;
640  
641  	switch (mode) {
642  	case FALLOC_FL_ZERO_RANGE:
643  	case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
644  		error = blkdev_issue_zeroout(bdev, start >> SECTOR_SHIFT,
645  					     len >> SECTOR_SHIFT, GFP_KERNEL,
646  					     BLKDEV_ZERO_NOUNMAP);
647  		break;
648  	case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
649  		error = blkdev_issue_zeroout(bdev, start >> SECTOR_SHIFT,
650  					     len >> SECTOR_SHIFT, GFP_KERNEL,
651  					     BLKDEV_ZERO_NOFALLBACK);
652  		break;
653  	case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
654  		error = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
655  					     len >> SECTOR_SHIFT, GFP_KERNEL);
656  		break;
657  	default:
658  		error = -EOPNOTSUPP;
659  	}
660  
661   fail:
662  	filemap_invalidate_unlock(inode->i_mapping);
663  	return error;
664  }
665  
666  const struct file_operations def_blk_fops = {
667  	.open		= blkdev_open,
668  	.release	= blkdev_close,
669  	.llseek		= blkdev_llseek,
670  	.read_iter	= blkdev_read_iter,
671  	.write_iter	= blkdev_write_iter,
672  	.iopoll		= iocb_bio_iopoll,
673  	.mmap		= generic_file_mmap,
674  	.fsync		= blkdev_fsync,
675  	.unlocked_ioctl	= blkdev_ioctl,
676  #ifdef CONFIG_COMPAT
677  	.compat_ioctl	= compat_blkdev_ioctl,
678  #endif
679  	.splice_read	= generic_file_splice_read,
680  	.splice_write	= iter_file_splice_write,
681  	.fallocate	= blkdev_fallocate,
682  };
683  
684  static __init int blkdev_init(void)
685  {
686  	return bioset_init(&blkdev_dio_pool, 4,
687  				offsetof(struct blkdev_dio, bio),
688  				BIOSET_NEED_BVECS|BIOSET_PERCPU_CACHE);
689  }
690  module_init(blkdev_init);
691