xref: /openbmc/linux/fs/nfs/file.c (revision 87c2ce3b)
1 /*
2  *  linux/fs/nfs/file.c
3  *
4  *  Copyright (C) 1992  Rick Sladkey
5  *
6  *  Changes Copyright (C) 1994 by Florian La Roche
7  *   - Do not copy data too often around in the kernel.
8  *   - In nfs_file_read the return value of kmalloc wasn't checked.
9  *   - Put in a better version of read look-ahead buffering. Original idea
10  *     and implementation by Wai S Kok elekokws@ee.nus.sg.
11  *
12  *  Expire cache on write to a file by Wai S Kok (Oct 1994).
13  *
14  *  Total rewrite of read side for new NFS buffer cache.. Linus.
15  *
16  *  nfs regular file handling functions
17  */
18 
19 #include <linux/time.h>
20 #include <linux/kernel.h>
21 #include <linux/errno.h>
22 #include <linux/fcntl.h>
23 #include <linux/stat.h>
24 #include <linux/nfs_fs.h>
25 #include <linux/nfs_mount.h>
26 #include <linux/mm.h>
27 #include <linux/slab.h>
28 #include <linux/pagemap.h>
29 #include <linux/smp_lock.h>
30 
31 #include <asm/uaccess.h>
32 #include <asm/system.h>
33 
34 #include "delegation.h"
35 
36 #define NFSDBG_FACILITY		NFSDBG_FILE
37 
38 static int nfs_file_open(struct inode *, struct file *);
39 static int nfs_file_release(struct inode *, struct file *);
40 static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin);
41 static int  nfs_file_mmap(struct file *, struct vm_area_struct *);
42 static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *);
43 static ssize_t nfs_file_read(struct kiocb *, char __user *, size_t, loff_t);
44 static ssize_t nfs_file_write(struct kiocb *, const char __user *, size_t, loff_t);
45 static int  nfs_file_flush(struct file *);
46 static int  nfs_fsync(struct file *, struct dentry *dentry, int datasync);
47 static int nfs_check_flags(int flags);
48 static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl);
49 static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl);
50 
51 struct file_operations nfs_file_operations = {
52 	.llseek		= nfs_file_llseek,
53 	.read		= do_sync_read,
54 	.write		= do_sync_write,
55 	.aio_read		= nfs_file_read,
56 	.aio_write		= nfs_file_write,
57 	.mmap		= nfs_file_mmap,
58 	.open		= nfs_file_open,
59 	.flush		= nfs_file_flush,
60 	.release	= nfs_file_release,
61 	.fsync		= nfs_fsync,
62 	.lock		= nfs_lock,
63 	.flock		= nfs_flock,
64 	.sendfile	= nfs_file_sendfile,
65 	.check_flags	= nfs_check_flags,
66 };
67 
68 struct inode_operations nfs_file_inode_operations = {
69 	.permission	= nfs_permission,
70 	.getattr	= nfs_getattr,
71 	.setattr	= nfs_setattr,
72 };
73 
74 #ifdef CONFIG_NFS_V3
75 struct inode_operations nfs3_file_inode_operations = {
76 	.permission	= nfs_permission,
77 	.getattr	= nfs_getattr,
78 	.setattr	= nfs_setattr,
79 	.listxattr	= nfs3_listxattr,
80 	.getxattr	= nfs3_getxattr,
81 	.setxattr	= nfs3_setxattr,
82 	.removexattr	= nfs3_removexattr,
83 };
84 #endif  /* CONFIG_NFS_v3 */
85 
86 /* Hack for future NFS swap support */
87 #ifndef IS_SWAPFILE
88 # define IS_SWAPFILE(inode)	(0)
89 #endif
90 
91 static int nfs_check_flags(int flags)
92 {
93 	if ((flags & (O_APPEND | O_DIRECT)) == (O_APPEND | O_DIRECT))
94 		return -EINVAL;
95 
96 	return 0;
97 }
98 
99 /*
100  * Open file
101  */
102 static int
103 nfs_file_open(struct inode *inode, struct file *filp)
104 {
105 	struct nfs_server *server = NFS_SERVER(inode);
106 	int (*open)(struct inode *, struct file *);
107 	int res;
108 
109 	res = nfs_check_flags(filp->f_flags);
110 	if (res)
111 		return res;
112 
113 	lock_kernel();
114 	/* Do NFSv4 open() call */
115 	if ((open = server->rpc_ops->file_open) != NULL)
116 		res = open(inode, filp);
117 	unlock_kernel();
118 	return res;
119 }
120 
121 static int
122 nfs_file_release(struct inode *inode, struct file *filp)
123 {
124 	/* Ensure that dirty pages are flushed out with the right creds */
125 	if (filp->f_mode & FMODE_WRITE)
126 		filemap_fdatawrite(filp->f_mapping);
127 	return NFS_PROTO(inode)->file_release(inode, filp);
128 }
129 
130 /**
131  * nfs_revalidate_file - Revalidate the page cache & related metadata
132  * @inode - pointer to inode struct
133  * @file - pointer to file
134  */
135 static int nfs_revalidate_file(struct inode *inode, struct file *filp)
136 {
137 	struct nfs_inode *nfsi = NFS_I(inode);
138 	int retval = 0;
139 
140 	if ((nfsi->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATTR))
141 			|| nfs_attribute_timeout(inode))
142 		retval = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
143 	nfs_revalidate_mapping(inode, filp->f_mapping);
144 	return 0;
145 }
146 
147 /**
148  * nfs_revalidate_size - Revalidate the file size
149  * @inode - pointer to inode struct
150  * @file - pointer to struct file
151  *
152  * Revalidates the file length. This is basically a wrapper around
153  * nfs_revalidate_inode() that takes into account the fact that we may
154  * have cached writes (in which case we don't care about the server's
155  * idea of what the file length is), or O_DIRECT (in which case we
156  * shouldn't trust the cache).
157  */
158 static int nfs_revalidate_file_size(struct inode *inode, struct file *filp)
159 {
160 	struct nfs_server *server = NFS_SERVER(inode);
161 	struct nfs_inode *nfsi = NFS_I(inode);
162 
163 	if (server->flags & NFS_MOUNT_NOAC)
164 		goto force_reval;
165 	if (filp->f_flags & O_DIRECT)
166 		goto force_reval;
167 	if (nfsi->npages != 0)
168 		return 0;
169 	if (!(nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode))
170 		return 0;
171 force_reval:
172 	return __nfs_revalidate_inode(server, inode);
173 }
174 
175 static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
176 {
177 	/* origin == SEEK_END => we must revalidate the cached file length */
178 	if (origin == 2) {
179 		struct inode *inode = filp->f_mapping->host;
180 		int retval = nfs_revalidate_file_size(inode, filp);
181 		if (retval < 0)
182 			return (loff_t)retval;
183 	}
184 	return remote_llseek(filp, offset, origin);
185 }
186 
187 /*
188  * Flush all dirty pages, and check for write errors.
189  *
190  */
191 static int
192 nfs_file_flush(struct file *file)
193 {
194 	struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
195 	struct inode	*inode = file->f_dentry->d_inode;
196 	int		status;
197 
198 	dfprintk(VFS, "nfs: flush(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
199 
200 	if ((file->f_mode & FMODE_WRITE) == 0)
201 		return 0;
202 	lock_kernel();
203 	/* Ensure that data+attribute caches are up to date after close() */
204 	status = nfs_wb_all(inode);
205 	if (!status) {
206 		status = ctx->error;
207 		ctx->error = 0;
208 		if (!status)
209 			nfs_revalidate_inode(NFS_SERVER(inode), inode);
210 	}
211 	unlock_kernel();
212 	return status;
213 }
214 
215 static ssize_t
216 nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos)
217 {
218 	struct dentry * dentry = iocb->ki_filp->f_dentry;
219 	struct inode * inode = dentry->d_inode;
220 	ssize_t result;
221 
222 #ifdef CONFIG_NFS_DIRECTIO
223 	if (iocb->ki_filp->f_flags & O_DIRECT)
224 		return nfs_file_direct_read(iocb, buf, count, pos);
225 #endif
226 
227 	dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n",
228 		dentry->d_parent->d_name.name, dentry->d_name.name,
229 		(unsigned long) count, (unsigned long) pos);
230 
231 	result = nfs_revalidate_file(inode, iocb->ki_filp);
232 	if (!result)
233 		result = generic_file_aio_read(iocb, buf, count, pos);
234 	return result;
235 }
236 
237 static ssize_t
238 nfs_file_sendfile(struct file *filp, loff_t *ppos, size_t count,
239 		read_actor_t actor, void *target)
240 {
241 	struct dentry *dentry = filp->f_dentry;
242 	struct inode *inode = dentry->d_inode;
243 	ssize_t res;
244 
245 	dfprintk(VFS, "nfs: sendfile(%s/%s, %lu@%Lu)\n",
246 		dentry->d_parent->d_name.name, dentry->d_name.name,
247 		(unsigned long) count, (unsigned long long) *ppos);
248 
249 	res = nfs_revalidate_file(inode, filp);
250 	if (!res)
251 		res = generic_file_sendfile(filp, ppos, count, actor, target);
252 	return res;
253 }
254 
255 static int
256 nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
257 {
258 	struct dentry *dentry = file->f_dentry;
259 	struct inode *inode = dentry->d_inode;
260 	int	status;
261 
262 	dfprintk(VFS, "nfs: mmap(%s/%s)\n",
263 		dentry->d_parent->d_name.name, dentry->d_name.name);
264 
265 	status = nfs_revalidate_file(inode, file);
266 	if (!status)
267 		status = generic_file_mmap(file, vma);
268 	return status;
269 }
270 
271 /*
272  * Flush any dirty pages for this process, and check for write errors.
273  * The return status from this call provides a reliable indication of
274  * whether any write errors occurred for this process.
275  */
276 static int
277 nfs_fsync(struct file *file, struct dentry *dentry, int datasync)
278 {
279 	struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
280 	struct inode *inode = dentry->d_inode;
281 	int status;
282 
283 	dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
284 
285 	lock_kernel();
286 	status = nfs_wb_all(inode);
287 	if (!status) {
288 		status = ctx->error;
289 		ctx->error = 0;
290 	}
291 	unlock_kernel();
292 	return status;
293 }
294 
295 /*
296  * This does the "real" work of the write. The generic routine has
297  * allocated the page, locked it, done all the page alignment stuff
298  * calculations etc. Now we should just copy the data from user
299  * space and write it back to the real medium..
300  *
301  * If the writer ends up delaying the write, the writer needs to
302  * increment the page use counts until he is done with the page.
303  */
304 static int nfs_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
305 {
306 	return nfs_flush_incompatible(file, page);
307 }
308 
309 static int nfs_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to)
310 {
311 	long status;
312 
313 	lock_kernel();
314 	status = nfs_updatepage(file, page, offset, to-offset);
315 	unlock_kernel();
316 	return status;
317 }
318 
319 struct address_space_operations nfs_file_aops = {
320 	.readpage = nfs_readpage,
321 	.readpages = nfs_readpages,
322 	.set_page_dirty = __set_page_dirty_nobuffers,
323 	.writepage = nfs_writepage,
324 	.writepages = nfs_writepages,
325 	.prepare_write = nfs_prepare_write,
326 	.commit_write = nfs_commit_write,
327 #ifdef CONFIG_NFS_DIRECTIO
328 	.direct_IO = nfs_direct_IO,
329 #endif
330 };
331 
332 /*
333  * Write to a file (through the page cache).
334  */
335 static ssize_t
336 nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
337 {
338 	struct dentry * dentry = iocb->ki_filp->f_dentry;
339 	struct inode * inode = dentry->d_inode;
340 	ssize_t result;
341 
342 #ifdef CONFIG_NFS_DIRECTIO
343 	if (iocb->ki_filp->f_flags & O_DIRECT)
344 		return nfs_file_direct_write(iocb, buf, count, pos);
345 #endif
346 
347 	dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%lu)\n",
348 		dentry->d_parent->d_name.name, dentry->d_name.name,
349 		inode->i_ino, (unsigned long) count, (unsigned long) pos);
350 
351 	result = -EBUSY;
352 	if (IS_SWAPFILE(inode))
353 		goto out_swapfile;
354 	/*
355 	 * O_APPEND implies that we must revalidate the file length.
356 	 */
357 	if (iocb->ki_filp->f_flags & O_APPEND) {
358 		result = nfs_revalidate_file_size(inode, iocb->ki_filp);
359 		if (result)
360 			goto out;
361 	}
362 	nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
363 
364 	result = count;
365 	if (!count)
366 		goto out;
367 
368 	result = generic_file_aio_write(iocb, buf, count, pos);
369 out:
370 	return result;
371 
372 out_swapfile:
373 	printk(KERN_INFO "NFS: attempt to write to active swap file!\n");
374 	goto out;
375 }
376 
377 static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
378 {
379 	struct file_lock *cfl;
380 	struct inode *inode = filp->f_mapping->host;
381 	int status = 0;
382 
383 	lock_kernel();
384 	/* Try local locking first */
385 	cfl = posix_test_lock(filp, fl);
386 	if (cfl != NULL) {
387 		locks_copy_lock(fl, cfl);
388 		goto out;
389 	}
390 
391 	if (nfs_have_delegation(inode, FMODE_READ))
392 		goto out_noconflict;
393 
394 	if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)
395 		goto out_noconflict;
396 
397 	status = NFS_PROTO(inode)->lock(filp, cmd, fl);
398 out:
399 	unlock_kernel();
400 	return status;
401 out_noconflict:
402 	fl->fl_type = F_UNLCK;
403 	goto out;
404 }
405 
406 static int do_vfs_lock(struct file *file, struct file_lock *fl)
407 {
408 	int res = 0;
409 	switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) {
410 		case FL_POSIX:
411 			res = posix_lock_file_wait(file, fl);
412 			break;
413 		case FL_FLOCK:
414 			res = flock_lock_file_wait(file, fl);
415 			break;
416 		default:
417 			BUG();
418 	}
419 	if (res < 0)
420 		printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n",
421 				__FUNCTION__);
422 	return res;
423 }
424 
425 static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
426 {
427 	struct inode *inode = filp->f_mapping->host;
428 	sigset_t oldset;
429 	int status;
430 
431 	rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset);
432 	/*
433 	 * Flush all pending writes before doing anything
434 	 * with locks..
435 	 */
436 	nfs_sync_mapping(filp->f_mapping);
437 
438 	/* NOTE: special case
439 	 * 	If we're signalled while cleaning up locks on process exit, we
440 	 * 	still need to complete the unlock.
441 	 */
442 	lock_kernel();
443 	/* Use local locking if mounted with "-onolock" */
444 	if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
445 		status = NFS_PROTO(inode)->lock(filp, cmd, fl);
446 	else
447 		status = do_vfs_lock(filp, fl);
448 	unlock_kernel();
449 	rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
450 	return status;
451 }
452 
453 static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
454 {
455 	struct inode *inode = filp->f_mapping->host;
456 	sigset_t oldset;
457 	int status;
458 
459 	rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset);
460 	/*
461 	 * Flush all pending writes before doing anything
462 	 * with locks..
463 	 */
464 	status = nfs_sync_mapping(filp->f_mapping);
465 	if (status != 0)
466 		goto out;
467 
468 	lock_kernel();
469 	/* Use local locking if mounted with "-onolock" */
470 	if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) {
471 		status = NFS_PROTO(inode)->lock(filp, cmd, fl);
472 		/* If we were signalled we still need to ensure that
473 		 * we clean up any state on the server. We therefore
474 		 * record the lock call as having succeeded in order to
475 		 * ensure that locks_remove_posix() cleans it out when
476 		 * the process exits.
477 		 */
478 		if (status == -EINTR || status == -ERESTARTSYS)
479 			do_vfs_lock(filp, fl);
480 	} else
481 		status = do_vfs_lock(filp, fl);
482 	unlock_kernel();
483 	if (status < 0)
484 		goto out;
485 	/*
486 	 * Make sure we clear the cache whenever we try to get the lock.
487 	 * This makes locking act as a cache coherency point.
488 	 */
489 	nfs_sync_mapping(filp->f_mapping);
490 	nfs_zap_caches(inode);
491 out:
492 	rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
493 	return status;
494 }
495 
496 /*
497  * Lock a (portion of) a file
498  */
499 static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
500 {
501 	struct inode * inode = filp->f_mapping->host;
502 
503 	dprintk("NFS: nfs_lock(f=%s/%ld, t=%x, fl=%x, r=%Ld:%Ld)\n",
504 			inode->i_sb->s_id, inode->i_ino,
505 			fl->fl_type, fl->fl_flags,
506 			(long long)fl->fl_start, (long long)fl->fl_end);
507 
508 	if (!inode)
509 		return -EINVAL;
510 
511 	/* No mandatory locks over NFS */
512 	if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID &&
513 	    fl->fl_type != F_UNLCK)
514 		return -ENOLCK;
515 
516 	if (IS_GETLK(cmd))
517 		return do_getlk(filp, cmd, fl);
518 	if (fl->fl_type == F_UNLCK)
519 		return do_unlk(filp, cmd, fl);
520 	return do_setlk(filp, cmd, fl);
521 }
522 
523 /*
524  * Lock a (portion of) a file
525  */
526 static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
527 {
528 	struct inode * inode = filp->f_mapping->host;
529 
530 	dprintk("NFS: nfs_flock(f=%s/%ld, t=%x, fl=%x)\n",
531 			inode->i_sb->s_id, inode->i_ino,
532 			fl->fl_type, fl->fl_flags);
533 
534 	if (!inode)
535 		return -EINVAL;
536 
537 	/*
538 	 * No BSD flocks over NFS allowed.
539 	 * Note: we could try to fake a POSIX lock request here by
540 	 * using ((u32) filp | 0x80000000) or some such as the pid.
541 	 * Not sure whether that would be unique, though, or whether
542 	 * that would break in other places.
543 	 */
544 	if (!(fl->fl_flags & FL_FLOCK))
545 		return -ENOLCK;
546 
547 	/* We're simulating flock() locks using posix locks on the server */
548 	fl->fl_owner = (fl_owner_t)filp;
549 	fl->fl_start = 0;
550 	fl->fl_end = OFFSET_MAX;
551 
552 	if (fl->fl_type == F_UNLCK)
553 		return do_unlk(filp, cmd, fl);
554 	return do_setlk(filp, cmd, fl);
555 }
556