xref: /openbmc/linux/fs/nfs/file.c (revision 55f261b73a7e1cb254577c3536cef8f415de220a)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   *  linux/fs/nfs/file.c
4   *
5   *  Copyright (C) 1992  Rick Sladkey
6   *
7   *  Changes Copyright (C) 1994 by Florian La Roche
8   *   - Do not copy data too often around in the kernel.
9   *   - In nfs_file_read the return value of kmalloc wasn't checked.
10   *   - Put in a better version of read look-ahead buffering. Original idea
11   *     and implementation by Wai S Kok elekokws@ee.nus.sg.
12   *
13   *  Expire cache on write to a file by Wai S Kok (Oct 1994).
14   *
15   *  Total rewrite of read side for new NFS buffer cache.. Linus.
16   *
17   *  nfs regular file handling functions
18   */
19  
20  #include <linux/module.h>
21  #include <linux/time.h>
22  #include <linux/kernel.h>
23  #include <linux/errno.h>
24  #include <linux/fcntl.h>
25  #include <linux/stat.h>
26  #include <linux/nfs_fs.h>
27  #include <linux/nfs_mount.h>
28  #include <linux/mm.h>
29  #include <linux/pagemap.h>
30  #include <linux/gfp.h>
31  #include <linux/swap.h>
32  
33  #include <linux/uaccess.h>
34  
35  #include "delegation.h"
36  #include "internal.h"
37  #include "iostat.h"
38  #include "fscache.h"
39  #include "pnfs.h"
40  
41  #include "nfstrace.h"
42  
43  #define NFSDBG_FACILITY		NFSDBG_FILE
44  
45  static const struct vm_operations_struct nfs_file_vm_ops;
46  
47  /* Hack for future NFS swap support */
48  #ifndef IS_SWAPFILE
49  # define IS_SWAPFILE(inode)	(0)
50  #endif
51  
52  int nfs_check_flags(int flags)
53  {
54  	if ((flags & (O_APPEND | O_DIRECT)) == (O_APPEND | O_DIRECT))
55  		return -EINVAL;
56  
57  	return 0;
58  }
59  EXPORT_SYMBOL_GPL(nfs_check_flags);
60  
61  /*
62   * Open file
63   */
64  static int
65  nfs_file_open(struct inode *inode, struct file *filp)
66  {
67  	int res;
68  
69  	dprintk("NFS: open file(%pD2)\n", filp);
70  
71  	nfs_inc_stats(inode, NFSIOS_VFSOPEN);
72  	res = nfs_check_flags(filp->f_flags);
73  	if (res)
74  		return res;
75  
76  	res = nfs_open(inode, filp);
77  	return res;
78  }
79  
80  int
81  nfs_file_release(struct inode *inode, struct file *filp)
82  {
83  	dprintk("NFS: release(%pD2)\n", filp);
84  
85  	nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
86  	nfs_file_clear_open_context(filp);
87  	return 0;
88  }
89  EXPORT_SYMBOL_GPL(nfs_file_release);
90  
91  /**
92   * nfs_revalidate_file_size - Revalidate the file size
93   * @inode: pointer to inode struct
94   * @filp: pointer to struct file
95   *
96   * Revalidates the file length. This is basically a wrapper around
97   * nfs_revalidate_inode() that takes into account the fact that we may
98   * have cached writes (in which case we don't care about the server's
99   * idea of what the file length is), or O_DIRECT (in which case we
100   * shouldn't trust the cache).
101   */
102  static int nfs_revalidate_file_size(struct inode *inode, struct file *filp)
103  {
104  	struct nfs_server *server = NFS_SERVER(inode);
105  
106  	if (filp->f_flags & O_DIRECT)
107  		goto force_reval;
108  	if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_SIZE))
109  		goto force_reval;
110  	return 0;
111  force_reval:
112  	return __nfs_revalidate_inode(server, inode);
113  }
114  
115  loff_t nfs_file_llseek(struct file *filp, loff_t offset, int whence)
116  {
117  	dprintk("NFS: llseek file(%pD2, %lld, %d)\n",
118  			filp, offset, whence);
119  
120  	/*
121  	 * whence == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
122  	 * the cached file length
123  	 */
124  	if (whence != SEEK_SET && whence != SEEK_CUR) {
125  		struct inode *inode = filp->f_mapping->host;
126  
127  		int retval = nfs_revalidate_file_size(inode, filp);
128  		if (retval < 0)
129  			return (loff_t)retval;
130  	}
131  
132  	return generic_file_llseek(filp, offset, whence);
133  }
134  EXPORT_SYMBOL_GPL(nfs_file_llseek);
135  
136  /*
137   * Flush all dirty pages, and check for write errors.
138   */
139  static int
140  nfs_file_flush(struct file *file, fl_owner_t id)
141  {
142  	struct inode	*inode = file_inode(file);
143  	errseq_t since;
144  
145  	dprintk("NFS: flush(%pD2)\n", file);
146  
147  	nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
148  	if ((file->f_mode & FMODE_WRITE) == 0)
149  		return 0;
150  
151  	/* Flush writes to the server and return any errors */
152  	since = filemap_sample_wb_err(file->f_mapping);
153  	nfs_wb_all(inode);
154  	return filemap_check_wb_err(file->f_mapping, since);
155  }
156  
157  ssize_t
158  nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
159  {
160  	struct inode *inode = file_inode(iocb->ki_filp);
161  	ssize_t result;
162  
163  	if (iocb->ki_flags & IOCB_DIRECT)
164  		return nfs_file_direct_read(iocb, to);
165  
166  	dprintk("NFS: read(%pD2, %zu@%lu)\n",
167  		iocb->ki_filp,
168  		iov_iter_count(to), (unsigned long) iocb->ki_pos);
169  
170  	nfs_start_io_read(inode);
171  	result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
172  	if (!result) {
173  		result = generic_file_read_iter(iocb, to);
174  		if (result > 0)
175  			nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result);
176  	}
177  	nfs_end_io_read(inode);
178  	return result;
179  }
180  EXPORT_SYMBOL_GPL(nfs_file_read);
181  
182  int
183  nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
184  {
185  	struct inode *inode = file_inode(file);
186  	int	status;
187  
188  	dprintk("NFS: mmap(%pD2)\n", file);
189  
190  	/* Note: generic_file_mmap() returns ENOSYS on nommu systems
191  	 *       so we call that before revalidating the mapping
192  	 */
193  	status = generic_file_mmap(file, vma);
194  	if (!status) {
195  		vma->vm_ops = &nfs_file_vm_ops;
196  		status = nfs_revalidate_mapping(inode, file->f_mapping);
197  	}
198  	return status;
199  }
200  EXPORT_SYMBOL_GPL(nfs_file_mmap);
201  
202  /*
203   * Flush any dirty pages for this process, and check for write errors.
204   * The return status from this call provides a reliable indication of
205   * whether any write errors occurred for this process.
206   */
207  static int
208  nfs_file_fsync_commit(struct file *file, int datasync)
209  {
210  	struct inode *inode = file_inode(file);
211  	int ret;
212  
213  	dprintk("NFS: fsync file(%pD2) datasync %d\n", file, datasync);
214  
215  	nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
216  	ret = nfs_commit_inode(inode, FLUSH_SYNC);
217  	if (ret < 0)
218  		return ret;
219  	return file_check_and_advance_wb_err(file);
220  }
221  
222  int
223  nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
224  {
225  	struct nfs_open_context *ctx = nfs_file_open_context(file);
226  	struct inode *inode = file_inode(file);
227  	int ret;
228  
229  	trace_nfs_fsync_enter(inode);
230  
231  	for (;;) {
232  		ret = file_write_and_wait_range(file, start, end);
233  		if (ret != 0)
234  			break;
235  		ret = nfs_file_fsync_commit(file, datasync);
236  		if (ret != 0)
237  			break;
238  		ret = pnfs_sync_inode(inode, !!datasync);
239  		if (ret != 0)
240  			break;
241  		if (!test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags))
242  			break;
243  		/*
244  		 * If nfs_file_fsync_commit detected a server reboot, then
245  		 * resend all dirty pages that might have been covered by
246  		 * the NFS_CONTEXT_RESEND_WRITES flag
247  		 */
248  		start = 0;
249  		end = LLONG_MAX;
250  	}
251  
252  	trace_nfs_fsync_exit(inode, ret);
253  	return ret;
254  }
255  EXPORT_SYMBOL_GPL(nfs_file_fsync);
256  
257  /*
258   * Decide whether a read/modify/write cycle may be more efficient
259   * then a modify/write/read cycle when writing to a page in the
260   * page cache.
261   *
262   * Some pNFS layout drivers can only read/write at a certain block
263   * granularity like all block devices and therefore we must perform
264   * read/modify/write whenever a page hasn't read yet and the data
265   * to be written there is not aligned to a block boundary and/or
266   * smaller than the block size.
267   *
268   * The modify/write/read cycle may occur if a page is read before
269   * being completely filled by the writer.  In this situation, the
270   * page must be completely written to stable storage on the server
271   * before it can be refilled by reading in the page from the server.
272   * This can lead to expensive, small, FILE_SYNC mode writes being
273   * done.
274   *
275   * It may be more efficient to read the page first if the file is
276   * open for reading in addition to writing, the page is not marked
277   * as Uptodate, it is not dirty or waiting to be committed,
278   * indicating that it was previously allocated and then modified,
279   * that there were valid bytes of data in that range of the file,
280   * and that the new data won't completely replace the old data in
281   * that range of the file.
282   */
283  static bool nfs_full_page_write(struct page *page, loff_t pos, unsigned int len)
284  {
285  	unsigned int pglen = nfs_page_length(page);
286  	unsigned int offset = pos & (PAGE_SIZE - 1);
287  	unsigned int end = offset + len;
288  
289  	return !pglen || (end >= pglen && !offset);
290  }
291  
292  static bool nfs_want_read_modify_write(struct file *file, struct page *page,
293  			loff_t pos, unsigned int len)
294  {
295  	/*
296  	 * Up-to-date pages, those with ongoing or full-page write
297  	 * don't need read/modify/write
298  	 */
299  	if (PageUptodate(page) || PagePrivate(page) ||
300  	    nfs_full_page_write(page, pos, len))
301  		return false;
302  
303  	if (pnfs_ld_read_whole_page(file->f_mapping->host))
304  		return true;
305  	/* Open for reading too? */
306  	if (file->f_mode & FMODE_READ)
307  		return true;
308  	return false;
309  }
310  
311  /*
312   * This does the "real" work of the write. We must allocate and lock the
313   * page to be sent back to the generic routine, which then copies the
314   * data from user space.
315   *
316   * If the writer ends up delaying the write, the writer needs to
317   * increment the page use counts until he is done with the page.
318   */
319  static int nfs_write_begin(struct file *file, struct address_space *mapping,
320  			loff_t pos, unsigned len, unsigned flags,
321  			struct page **pagep, void **fsdata)
322  {
323  	int ret;
324  	pgoff_t index = pos >> PAGE_SHIFT;
325  	struct page *page;
326  	int once_thru = 0;
327  
328  	dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%lu), %u@%lld)\n",
329  		file, mapping->host->i_ino, len, (long long) pos);
330  
331  start:
332  	page = grab_cache_page_write_begin(mapping, index, flags);
333  	if (!page)
334  		return -ENOMEM;
335  	*pagep = page;
336  
337  	ret = nfs_flush_incompatible(file, page);
338  	if (ret) {
339  		unlock_page(page);
340  		put_page(page);
341  	} else if (!once_thru &&
342  		   nfs_want_read_modify_write(file, page, pos, len)) {
343  		once_thru = 1;
344  		ret = nfs_readpage(file, page);
345  		put_page(page);
346  		if (!ret)
347  			goto start;
348  	}
349  	return ret;
350  }
351  
352  static int nfs_write_end(struct file *file, struct address_space *mapping,
353  			loff_t pos, unsigned len, unsigned copied,
354  			struct page *page, void *fsdata)
355  {
356  	unsigned offset = pos & (PAGE_SIZE - 1);
357  	struct nfs_open_context *ctx = nfs_file_open_context(file);
358  	int status;
359  
360  	dfprintk(PAGECACHE, "NFS: write_end(%pD2(%lu), %u@%lld)\n",
361  		file, mapping->host->i_ino, len, (long long) pos);
362  
363  	/*
364  	 * Zero any uninitialised parts of the page, and then mark the page
365  	 * as up to date if it turns out that we're extending the file.
366  	 */
367  	if (!PageUptodate(page)) {
368  		unsigned pglen = nfs_page_length(page);
369  		unsigned end = offset + copied;
370  
371  		if (pglen == 0) {
372  			zero_user_segments(page, 0, offset,
373  					end, PAGE_SIZE);
374  			SetPageUptodate(page);
375  		} else if (end >= pglen) {
376  			zero_user_segment(page, end, PAGE_SIZE);
377  			if (offset == 0)
378  				SetPageUptodate(page);
379  		} else
380  			zero_user_segment(page, pglen, PAGE_SIZE);
381  	}
382  
383  	status = nfs_updatepage(file, page, offset, copied);
384  
385  	unlock_page(page);
386  	put_page(page);
387  
388  	if (status < 0)
389  		return status;
390  	NFS_I(mapping->host)->write_io += copied;
391  
392  	if (nfs_ctx_key_to_expire(ctx, mapping->host)) {
393  		status = nfs_wb_all(mapping->host);
394  		if (status < 0)
395  			return status;
396  	}
397  
398  	return copied;
399  }
400  
401  /*
402   * Partially or wholly invalidate a page
403   * - Release the private state associated with a page if undergoing complete
404   *   page invalidation
405   * - Called if either PG_private or PG_fscache is set on the page
406   * - Caller holds page lock
407   */
408  static void nfs_invalidate_page(struct page *page, unsigned int offset,
409  				unsigned int length)
410  {
411  	dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %u, %u)\n",
412  		 page, offset, length);
413  
414  	if (offset != 0 || length < PAGE_SIZE)
415  		return;
416  	/* Cancel any unstarted writes on this page */
417  	nfs_wb_page_cancel(page_file_mapping(page)->host, page);
418  
419  	nfs_fscache_invalidate_page(page, page->mapping->host);
420  }
421  
422  /*
423   * Attempt to release the private state associated with a page
424   * - Called if either PG_private or PG_fscache is set on the page
425   * - Caller holds page lock
426   * - Return true (may release page) or false (may not)
427   */
428  static int nfs_release_page(struct page *page, gfp_t gfp)
429  {
430  	dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
431  
432  	/* If PagePrivate() is set, then the page is not freeable */
433  	if (PagePrivate(page))
434  		return 0;
435  	return nfs_fscache_release_page(page, gfp);
436  }
437  
438  static void nfs_check_dirty_writeback(struct page *page,
439  				bool *dirty, bool *writeback)
440  {
441  	struct nfs_inode *nfsi;
442  	struct address_space *mapping = page_file_mapping(page);
443  
444  	if (!mapping || PageSwapCache(page))
445  		return;
446  
447  	/*
448  	 * Check if an unstable page is currently being committed and
449  	 * if so, have the VM treat it as if the page is under writeback
450  	 * so it will not block due to pages that will shortly be freeable.
451  	 */
452  	nfsi = NFS_I(mapping->host);
453  	if (atomic_read(&nfsi->commit_info.rpcs_out)) {
454  		*writeback = true;
455  		return;
456  	}
457  
458  	/*
459  	 * If PagePrivate() is set, then the page is not freeable and as the
460  	 * inode is not being committed, it's not going to be cleaned in the
461  	 * near future so treat it as dirty
462  	 */
463  	if (PagePrivate(page))
464  		*dirty = true;
465  }
466  
467  /*
468   * Attempt to clear the private state associated with a page when an error
469   * occurs that requires the cached contents of an inode to be written back or
470   * destroyed
471   * - Called if either PG_private or fscache is set on the page
472   * - Caller holds page lock
473   * - Return 0 if successful, -error otherwise
474   */
475  static int nfs_launder_page(struct page *page)
476  {
477  	struct inode *inode = page_file_mapping(page)->host;
478  	struct nfs_inode *nfsi = NFS_I(inode);
479  
480  	dfprintk(PAGECACHE, "NFS: launder_page(%ld, %llu)\n",
481  		inode->i_ino, (long long)page_offset(page));
482  
483  	nfs_fscache_wait_on_page_write(nfsi, page);
484  	return nfs_wb_page(inode, page);
485  }
486  
487  static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
488  						sector_t *span)
489  {
490  	unsigned long blocks;
491  	long long isize;
492  	struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host);
493  	struct inode *inode = file->f_mapping->host;
494  
495  	spin_lock(&inode->i_lock);
496  	blocks = inode->i_blocks;
497  	isize = inode->i_size;
498  	spin_unlock(&inode->i_lock);
499  	if (blocks*512 < isize) {
500  		pr_warn("swap activate: swapfile has holes\n");
501  		return -EINVAL;
502  	}
503  
504  	*span = sis->pages;
505  
506  	return rpc_clnt_swap_activate(clnt);
507  }
508  
509  static void nfs_swap_deactivate(struct file *file)
510  {
511  	struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host);
512  
513  	rpc_clnt_swap_deactivate(clnt);
514  }
515  
516  const struct address_space_operations nfs_file_aops = {
517  	.readpage = nfs_readpage,
518  	.readpages = nfs_readpages,
519  	.set_page_dirty = __set_page_dirty_nobuffers,
520  	.writepage = nfs_writepage,
521  	.writepages = nfs_writepages,
522  	.write_begin = nfs_write_begin,
523  	.write_end = nfs_write_end,
524  	.invalidatepage = nfs_invalidate_page,
525  	.releasepage = nfs_release_page,
526  	.direct_IO = nfs_direct_IO,
527  #ifdef CONFIG_MIGRATION
528  	.migratepage = nfs_migrate_page,
529  #endif
530  	.launder_page = nfs_launder_page,
531  	.is_dirty_writeback = nfs_check_dirty_writeback,
532  	.error_remove_page = generic_error_remove_page,
533  	.swap_activate = nfs_swap_activate,
534  	.swap_deactivate = nfs_swap_deactivate,
535  };
536  
537  /*
538   * Notification that a PTE pointing to an NFS page is about to be made
539   * writable, implying that someone is about to modify the page through a
540   * shared-writable mapping
541   */
542  static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf)
543  {
544  	struct page *page = vmf->page;
545  	struct file *filp = vmf->vma->vm_file;
546  	struct inode *inode = file_inode(filp);
547  	unsigned pagelen;
548  	vm_fault_t ret = VM_FAULT_NOPAGE;
549  	struct address_space *mapping;
550  
551  	dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%lu), offset %lld)\n",
552  		filp, filp->f_mapping->host->i_ino,
553  		(long long)page_offset(page));
554  
555  	sb_start_pagefault(inode->i_sb);
556  
557  	/* make sure the cache has finished storing the page */
558  	nfs_fscache_wait_on_page_write(NFS_I(inode), page);
559  
560  	wait_on_bit_action(&NFS_I(inode)->flags, NFS_INO_INVALIDATING,
561  			nfs_wait_bit_killable, TASK_KILLABLE);
562  
563  	lock_page(page);
564  	mapping = page_file_mapping(page);
565  	if (mapping != inode->i_mapping)
566  		goto out_unlock;
567  
568  	wait_on_page_writeback(page);
569  
570  	pagelen = nfs_page_length(page);
571  	if (pagelen == 0)
572  		goto out_unlock;
573  
574  	ret = VM_FAULT_LOCKED;
575  	if (nfs_flush_incompatible(filp, page) == 0 &&
576  	    nfs_updatepage(filp, page, 0, pagelen) == 0)
577  		goto out;
578  
579  	ret = VM_FAULT_SIGBUS;
580  out_unlock:
581  	unlock_page(page);
582  out:
583  	sb_end_pagefault(inode->i_sb);
584  	return ret;
585  }
586  
587  static const struct vm_operations_struct nfs_file_vm_ops = {
588  	.fault = filemap_fault,
589  	.map_pages = filemap_map_pages,
590  	.page_mkwrite = nfs_vm_page_mkwrite,
591  };
592  
593  static int nfs_need_check_write(struct file *filp, struct inode *inode,
594  				int error)
595  {
596  	struct nfs_open_context *ctx;
597  
598  	ctx = nfs_file_open_context(filp);
599  	if (nfs_error_is_fatal_on_server(error) ||
600  	    nfs_ctx_key_to_expire(ctx, inode))
601  		return 1;
602  	return 0;
603  }
604  
605  ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
606  {
607  	struct file *file = iocb->ki_filp;
608  	struct inode *inode = file_inode(file);
609  	unsigned int mntflags = NFS_SERVER(inode)->flags;
610  	ssize_t result, written;
611  	errseq_t since;
612  	int error;
613  
614  	result = nfs_key_timeout_notify(file, inode);
615  	if (result)
616  		return result;
617  
618  	if (iocb->ki_flags & IOCB_DIRECT)
619  		return nfs_file_direct_write(iocb, from);
620  
621  	dprintk("NFS: write(%pD2, %zu@%Ld)\n",
622  		file, iov_iter_count(from), (long long) iocb->ki_pos);
623  
624  	if (IS_SWAPFILE(inode))
625  		goto out_swapfile;
626  	/*
627  	 * O_APPEND implies that we must revalidate the file length.
628  	 */
629  	if (iocb->ki_flags & IOCB_APPEND || iocb->ki_pos > i_size_read(inode)) {
630  		result = nfs_revalidate_file_size(inode, file);
631  		if (result)
632  			goto out;
633  	}
634  
635  	nfs_clear_invalid_mapping(file->f_mapping);
636  
637  	since = filemap_sample_wb_err(file->f_mapping);
638  	nfs_start_io_write(inode);
639  	result = generic_write_checks(iocb, from);
640  	if (result > 0) {
641  		current->backing_dev_info = inode_to_bdi(inode);
642  		result = generic_perform_write(file, from, iocb->ki_pos);
643  		current->backing_dev_info = NULL;
644  	}
645  	nfs_end_io_write(inode);
646  	if (result <= 0)
647  		goto out;
648  
649  	written = result;
650  	iocb->ki_pos += written;
651  
652  	if (mntflags & NFS_MOUNT_WRITE_EAGER) {
653  		result = filemap_fdatawrite_range(file->f_mapping,
654  						  iocb->ki_pos - written,
655  						  iocb->ki_pos - 1);
656  		if (result < 0)
657  			goto out;
658  	}
659  	if (mntflags & NFS_MOUNT_WRITE_WAIT) {
660  		result = filemap_fdatawait_range(file->f_mapping,
661  						 iocb->ki_pos - written,
662  						 iocb->ki_pos - 1);
663  		if (result < 0)
664  			goto out;
665  	}
666  	result = generic_write_sync(iocb, written);
667  	if (result < 0)
668  		goto out;
669  
670  	/* Return error values */
671  	error = filemap_check_wb_err(file->f_mapping, since);
672  	if (nfs_need_check_write(file, inode, error)) {
673  		int err = nfs_wb_all(inode);
674  		if (err < 0)
675  			result = err;
676  	}
677  	nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
678  out:
679  	return result;
680  
681  out_swapfile:
682  	printk(KERN_INFO "NFS: attempt to write to active swap file!\n");
683  	return -ETXTBSY;
684  }
685  EXPORT_SYMBOL_GPL(nfs_file_write);
686  
687  static int
688  do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
689  {
690  	struct inode *inode = filp->f_mapping->host;
691  	int status = 0;
692  	unsigned int saved_type = fl->fl_type;
693  
694  	/* Try local locking first */
695  	posix_test_lock(filp, fl);
696  	if (fl->fl_type != F_UNLCK) {
697  		/* found a conflict */
698  		goto out;
699  	}
700  	fl->fl_type = saved_type;
701  
702  	if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
703  		goto out_noconflict;
704  
705  	if (is_local)
706  		goto out_noconflict;
707  
708  	status = NFS_PROTO(inode)->lock(filp, cmd, fl);
709  out:
710  	return status;
711  out_noconflict:
712  	fl->fl_type = F_UNLCK;
713  	goto out;
714  }
715  
716  static int
717  do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
718  {
719  	struct inode *inode = filp->f_mapping->host;
720  	struct nfs_lock_context *l_ctx;
721  	int status;
722  
723  	/*
724  	 * Flush all pending writes before doing anything
725  	 * with locks..
726  	 */
727  	nfs_wb_all(inode);
728  
729  	l_ctx = nfs_get_lock_context(nfs_file_open_context(filp));
730  	if (!IS_ERR(l_ctx)) {
731  		status = nfs_iocounter_wait(l_ctx);
732  		nfs_put_lock_context(l_ctx);
733  		/*  NOTE: special case
734  		 * 	If we're signalled while cleaning up locks on process exit, we
735  		 * 	still need to complete the unlock.
736  		 */
737  		if (status < 0 && !(fl->fl_flags & FL_CLOSE))
738  			return status;
739  	}
740  
741  	/*
742  	 * Use local locking if mounted with "-onolock" or with appropriate
743  	 * "-olocal_lock="
744  	 */
745  	if (!is_local)
746  		status = NFS_PROTO(inode)->lock(filp, cmd, fl);
747  	else
748  		status = locks_lock_file_wait(filp, fl);
749  	return status;
750  }
751  
752  static int
753  do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
754  {
755  	struct inode *inode = filp->f_mapping->host;
756  	int status;
757  
758  	/*
759  	 * Flush all pending writes before doing anything
760  	 * with locks..
761  	 */
762  	status = nfs_sync_mapping(filp->f_mapping);
763  	if (status != 0)
764  		goto out;
765  
766  	/*
767  	 * Use local locking if mounted with "-onolock" or with appropriate
768  	 * "-olocal_lock="
769  	 */
770  	if (!is_local)
771  		status = NFS_PROTO(inode)->lock(filp, cmd, fl);
772  	else
773  		status = locks_lock_file_wait(filp, fl);
774  	if (status < 0)
775  		goto out;
776  
777  	/*
778  	 * Invalidate cache to prevent missing any changes.  If
779  	 * the file is mapped, clear the page cache as well so
780  	 * those mappings will be loaded.
781  	 *
782  	 * This makes locking act as a cache coherency point.
783  	 */
784  	nfs_sync_mapping(filp->f_mapping);
785  	if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) {
786  		nfs_zap_caches(inode);
787  		if (mapping_mapped(filp->f_mapping))
788  			nfs_revalidate_mapping(inode, filp->f_mapping);
789  	}
790  out:
791  	return status;
792  }
793  
794  /*
795   * Lock a (portion of) a file
796   */
797  int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
798  {
799  	struct inode *inode = filp->f_mapping->host;
800  	int ret = -ENOLCK;
801  	int is_local = 0;
802  
803  	dprintk("NFS: lock(%pD2, t=%x, fl=%x, r=%lld:%lld)\n",
804  			filp, fl->fl_type, fl->fl_flags,
805  			(long long)fl->fl_start, (long long)fl->fl_end);
806  
807  	nfs_inc_stats(inode, NFSIOS_VFSLOCK);
808  
809  	if (fl->fl_flags & FL_RECLAIM)
810  		return -ENOGRACE;
811  
812  	if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FCNTL)
813  		is_local = 1;
814  
815  	if (NFS_PROTO(inode)->lock_check_bounds != NULL) {
816  		ret = NFS_PROTO(inode)->lock_check_bounds(fl);
817  		if (ret < 0)
818  			goto out_err;
819  	}
820  
821  	if (IS_GETLK(cmd))
822  		ret = do_getlk(filp, cmd, fl, is_local);
823  	else if (fl->fl_type == F_UNLCK)
824  		ret = do_unlk(filp, cmd, fl, is_local);
825  	else
826  		ret = do_setlk(filp, cmd, fl, is_local);
827  out_err:
828  	return ret;
829  }
830  EXPORT_SYMBOL_GPL(nfs_lock);
831  
832  /*
833   * Lock a (portion of) a file
834   */
835  int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
836  {
837  	struct inode *inode = filp->f_mapping->host;
838  	int is_local = 0;
839  
840  	dprintk("NFS: flock(%pD2, t=%x, fl=%x)\n",
841  			filp, fl->fl_type, fl->fl_flags);
842  
843  	if (!(fl->fl_flags & FL_FLOCK))
844  		return -ENOLCK;
845  
846  	/*
847  	 * The NFSv4 protocol doesn't support LOCK_MAND, which is not part of
848  	 * any standard. In principle we might be able to support LOCK_MAND
849  	 * on NFSv2/3 since NLMv3/4 support DOS share modes, but for now the
850  	 * NFS code is not set up for it.
851  	 */
852  	if (fl->fl_type & LOCK_MAND)
853  		return -EINVAL;
854  
855  	if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK)
856  		is_local = 1;
857  
858  	/* We're simulating flock() locks using posix locks on the server */
859  	if (fl->fl_type == F_UNLCK)
860  		return do_unlk(filp, cmd, fl, is_local);
861  	return do_setlk(filp, cmd, fl, is_local);
862  }
863  EXPORT_SYMBOL_GPL(nfs_flock);
864  
865  const struct file_operations nfs_file_operations = {
866  	.llseek		= nfs_file_llseek,
867  	.read_iter	= nfs_file_read,
868  	.write_iter	= nfs_file_write,
869  	.mmap		= nfs_file_mmap,
870  	.open		= nfs_file_open,
871  	.flush		= nfs_file_flush,
872  	.release	= nfs_file_release,
873  	.fsync		= nfs_file_fsync,
874  	.lock		= nfs_lock,
875  	.flock		= nfs_flock,
876  	.splice_read	= generic_file_splice_read,
877  	.splice_write	= iter_file_splice_write,
878  	.check_flags	= nfs_check_flags,
879  	.setlease	= simple_nosetlease,
880  };
881  EXPORT_SYMBOL_GPL(nfs_file_operations);
882