xref: /openbmc/linux/fs/nfs/inode.c (revision b8bb76713ec50df2f11efee386e16f93d51e1076)
1 /*
2  *  linux/fs/nfs/inode.c
3  *
4  *  Copyright (C) 1992  Rick Sladkey
5  *
6  *  nfs inode and superblock handling functions
7  *
8  *  Modularised by Alan Cox <alan@lxorguk.ukuu.org.uk>, while hacking some
9  *  experimental NFS changes. Modularisation taken straight from SYS5 fs.
10  *
11  *  Change to nfs_read_super() to permit NFS mounts to multi-homed hosts.
12  *  J.S.Peatfield@damtp.cam.ac.uk
13  *
14  */
15 
16 #include <linux/module.h>
17 #include <linux/init.h>
18 #include <linux/sched.h>
19 #include <linux/time.h>
20 #include <linux/kernel.h>
21 #include <linux/mm.h>
22 #include <linux/string.h>
23 #include <linux/stat.h>
24 #include <linux/errno.h>
25 #include <linux/unistd.h>
26 #include <linux/sunrpc/clnt.h>
27 #include <linux/sunrpc/stats.h>
28 #include <linux/sunrpc/metrics.h>
29 #include <linux/nfs_fs.h>
30 #include <linux/nfs_mount.h>
31 #include <linux/nfs4_mount.h>
32 #include <linux/lockd/bind.h>
33 #include <linux/smp_lock.h>
34 #include <linux/seq_file.h>
35 #include <linux/mount.h>
36 #include <linux/nfs_idmap.h>
37 #include <linux/vfs.h>
38 #include <linux/inet.h>
39 #include <linux/nfs_xdr.h>
40 
41 #include <asm/system.h>
42 #include <asm/uaccess.h>
43 
44 #include "nfs4_fs.h"
45 #include "callback.h"
46 #include "delegation.h"
47 #include "iostat.h"
48 #include "internal.h"
49 
50 #define NFSDBG_FACILITY		NFSDBG_VFS
51 
52 #define NFS_64_BIT_INODE_NUMBERS_ENABLED	1
53 
54 /* Default is to see 64-bit inode numbers */
55 static int enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED;
56 
57 static void nfs_invalidate_inode(struct inode *);
58 static int nfs_update_inode(struct inode *, struct nfs_fattr *);
59 
60 static struct kmem_cache * nfs_inode_cachep;
61 
62 static inline unsigned long
63 nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
64 {
65 	return nfs_fileid_to_ino_t(fattr->fileid);
66 }
67 
68 /**
69  * nfs_wait_bit_killable - helper for functions that are sleeping on bit locks
70  * @word: long word containing the bit lock
71  */
72 int nfs_wait_bit_killable(void *word)
73 {
74 	if (fatal_signal_pending(current))
75 		return -ERESTARTSYS;
76 	schedule();
77 	return 0;
78 }
79 
80 /**
81  * nfs_compat_user_ino64 - returns the user-visible inode number
82  * @fileid: 64-bit fileid
83  *
84  * This function returns a 32-bit inode number if the boot parameter
85  * nfs.enable_ino64 is zero.
86  */
87 u64 nfs_compat_user_ino64(u64 fileid)
88 {
89 	int ino;
90 
91 	if (enable_ino64)
92 		return fileid;
93 	ino = fileid;
94 	if (sizeof(ino) < sizeof(fileid))
95 		ino ^= fileid >> (sizeof(fileid)-sizeof(ino)) * 8;
96 	return ino;
97 }
98 
99 int nfs_write_inode(struct inode *inode, int sync)
100 {
101 	int ret;
102 
103 	if (sync) {
104 		ret = filemap_fdatawait(inode->i_mapping);
105 		if (ret == 0)
106 			ret = nfs_commit_inode(inode, FLUSH_SYNC);
107 	} else
108 		ret = nfs_commit_inode(inode, 0);
109 	if (ret >= 0)
110 		return 0;
111 	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
112 	return ret;
113 }
114 
115 void nfs_clear_inode(struct inode *inode)
116 {
117 	/*
118 	 * The following should never happen...
119 	 */
120 	BUG_ON(nfs_have_writebacks(inode));
121 	BUG_ON(!list_empty(&NFS_I(inode)->open_files));
122 	nfs_zap_acl_cache(inode);
123 	nfs_access_zap_cache(inode);
124 }
125 
126 /**
127  * nfs_sync_mapping - helper to flush all mmapped dirty data to disk
128  */
129 int nfs_sync_mapping(struct address_space *mapping)
130 {
131 	int ret;
132 
133 	if (mapping->nrpages == 0)
134 		return 0;
135 	unmap_mapping_range(mapping, 0, 0, 0);
136 	ret = filemap_write_and_wait(mapping);
137 	if (ret != 0)
138 		goto out;
139 	ret = nfs_wb_all(mapping->host);
140 out:
141 	return ret;
142 }
143 
144 /*
145  * Invalidate the local caches
146  */
147 static void nfs_zap_caches_locked(struct inode *inode)
148 {
149 	struct nfs_inode *nfsi = NFS_I(inode);
150 	int mode = inode->i_mode;
151 
152 	nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
153 
154 	nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
155 	nfsi->attrtimeo_timestamp = jiffies;
156 
157 	memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
158 	if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
159 		nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE;
160 	else
161 		nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE;
162 }
163 
164 void nfs_zap_caches(struct inode *inode)
165 {
166 	spin_lock(&inode->i_lock);
167 	nfs_zap_caches_locked(inode);
168 	spin_unlock(&inode->i_lock);
169 }
170 
171 void nfs_zap_mapping(struct inode *inode, struct address_space *mapping)
172 {
173 	if (mapping->nrpages != 0) {
174 		spin_lock(&inode->i_lock);
175 		NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA;
176 		spin_unlock(&inode->i_lock);
177 	}
178 }
179 
180 void nfs_zap_acl_cache(struct inode *inode)
181 {
182 	void (*clear_acl_cache)(struct inode *);
183 
184 	clear_acl_cache = NFS_PROTO(inode)->clear_acl_cache;
185 	if (clear_acl_cache != NULL)
186 		clear_acl_cache(inode);
187 	spin_lock(&inode->i_lock);
188 	NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_ACL;
189 	spin_unlock(&inode->i_lock);
190 }
191 
192 void nfs_invalidate_atime(struct inode *inode)
193 {
194 	spin_lock(&inode->i_lock);
195 	NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
196 	spin_unlock(&inode->i_lock);
197 }
198 
199 /*
200  * Invalidate, but do not unhash, the inode.
201  * NB: must be called with inode->i_lock held!
202  */
203 static void nfs_invalidate_inode(struct inode *inode)
204 {
205 	set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
206 	nfs_zap_caches_locked(inode);
207 }
208 
209 struct nfs_find_desc {
210 	struct nfs_fh		*fh;
211 	struct nfs_fattr	*fattr;
212 };
213 
214 /*
215  * In NFSv3 we can have 64bit inode numbers. In order to support
216  * this, and re-exported directories (also seen in NFSv2)
217  * we are forced to allow 2 different inodes to have the same
218  * i_ino.
219  */
220 static int
221 nfs_find_actor(struct inode *inode, void *opaque)
222 {
223 	struct nfs_find_desc	*desc = (struct nfs_find_desc *)opaque;
224 	struct nfs_fh		*fh = desc->fh;
225 	struct nfs_fattr	*fattr = desc->fattr;
226 
227 	if (NFS_FILEID(inode) != fattr->fileid)
228 		return 0;
229 	if (nfs_compare_fh(NFS_FH(inode), fh))
230 		return 0;
231 	if (is_bad_inode(inode) || NFS_STALE(inode))
232 		return 0;
233 	return 1;
234 }
235 
236 static int
237 nfs_init_locked(struct inode *inode, void *opaque)
238 {
239 	struct nfs_find_desc	*desc = (struct nfs_find_desc *)opaque;
240 	struct nfs_fattr	*fattr = desc->fattr;
241 
242 	set_nfs_fileid(inode, fattr->fileid);
243 	nfs_copy_fh(NFS_FH(inode), desc->fh);
244 	return 0;
245 }
246 
247 /* Don't use READDIRPLUS on directories that we believe are too large */
248 #define NFS_LIMIT_READDIRPLUS (8*PAGE_SIZE)
249 
250 /*
251  * This is our front-end to iget that looks up inodes by file handle
252  * instead of inode number.
253  */
254 struct inode *
255 nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
256 {
257 	struct nfs_find_desc desc = {
258 		.fh	= fh,
259 		.fattr	= fattr
260 	};
261 	struct inode *inode = ERR_PTR(-ENOENT);
262 	unsigned long hash;
263 
264 	if ((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0)
265 		goto out_no_inode;
266 	if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0)
267 		goto out_no_inode;
268 
269 	hash = nfs_fattr_to_ino_t(fattr);
270 
271 	inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc);
272 	if (inode == NULL) {
273 		inode = ERR_PTR(-ENOMEM);
274 		goto out_no_inode;
275 	}
276 
277 	if (inode->i_state & I_NEW) {
278 		struct nfs_inode *nfsi = NFS_I(inode);
279 		unsigned long now = jiffies;
280 
281 		/* We set i_ino for the few things that still rely on it,
282 		 * such as stat(2) */
283 		inode->i_ino = hash;
284 
285 		/* We can't support update_atime(), since the server will reset it */
286 		inode->i_flags |= S_NOATIME|S_NOCMTIME;
287 		inode->i_mode = fattr->mode;
288 		/* Why so? Because we want revalidate for devices/FIFOs, and
289 		 * that's precisely what we have in nfs_file_inode_operations.
290 		 */
291 		inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops;
292 		if (S_ISREG(inode->i_mode)) {
293 			inode->i_fop = &nfs_file_operations;
294 			inode->i_data.a_ops = &nfs_file_aops;
295 			inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info;
296 		} else if (S_ISDIR(inode->i_mode)) {
297 			inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
298 			inode->i_fop = &nfs_dir_operations;
299 			if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)
300 			    && fattr->size <= NFS_LIMIT_READDIRPLUS)
301 				set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
302 			/* Deal with crossing mountpoints */
303 			if ((fattr->valid & NFS_ATTR_FATTR_FSID)
304 					&& !nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) {
305 				if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
306 					inode->i_op = &nfs_referral_inode_operations;
307 				else
308 					inode->i_op = &nfs_mountpoint_inode_operations;
309 				inode->i_fop = NULL;
310 				set_bit(NFS_INO_MOUNTPOINT, &nfsi->flags);
311 			}
312 		} else if (S_ISLNK(inode->i_mode))
313 			inode->i_op = &nfs_symlink_inode_operations;
314 		else
315 			init_special_inode(inode, inode->i_mode, fattr->rdev);
316 
317 		memset(&inode->i_atime, 0, sizeof(inode->i_atime));
318 		memset(&inode->i_mtime, 0, sizeof(inode->i_mtime));
319 		memset(&inode->i_ctime, 0, sizeof(inode->i_ctime));
320 		nfsi->change_attr = 0;
321 		inode->i_size = 0;
322 		inode->i_nlink = 0;
323 		inode->i_uid = -2;
324 		inode->i_gid = -2;
325 		inode->i_blocks = 0;
326 		memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
327 
328 		nfsi->read_cache_jiffies = fattr->time_start;
329 		nfsi->attr_gencount = fattr->gencount;
330 		if (fattr->valid & NFS_ATTR_FATTR_ATIME)
331 			inode->i_atime = fattr->atime;
332 		if (fattr->valid & NFS_ATTR_FATTR_MTIME)
333 			inode->i_mtime = fattr->mtime;
334 		if (fattr->valid & NFS_ATTR_FATTR_CTIME)
335 			inode->i_ctime = fattr->ctime;
336 		if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
337 			nfsi->change_attr = fattr->change_attr;
338 		if (fattr->valid & NFS_ATTR_FATTR_SIZE)
339 			inode->i_size = nfs_size_to_loff_t(fattr->size);
340 		if (fattr->valid & NFS_ATTR_FATTR_NLINK)
341 			inode->i_nlink = fattr->nlink;
342 		if (fattr->valid & NFS_ATTR_FATTR_OWNER)
343 			inode->i_uid = fattr->uid;
344 		if (fattr->valid & NFS_ATTR_FATTR_GROUP)
345 			inode->i_gid = fattr->gid;
346 		if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
347 			inode->i_blocks = fattr->du.nfs2.blocks;
348 		if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
349 			/*
350 			 * report the blocks in 512byte units
351 			 */
352 			inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
353 		}
354 		nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
355 		nfsi->attrtimeo_timestamp = now;
356 		nfsi->access_cache = RB_ROOT;
357 
358 		unlock_new_inode(inode);
359 	} else
360 		nfs_refresh_inode(inode, fattr);
361 	dprintk("NFS: nfs_fhget(%s/%Ld ct=%d)\n",
362 		inode->i_sb->s_id,
363 		(long long)NFS_FILEID(inode),
364 		atomic_read(&inode->i_count));
365 
366 out:
367 	return inode;
368 
369 out_no_inode:
370 	dprintk("nfs_fhget: iget failed with error %ld\n", PTR_ERR(inode));
371 	goto out;
372 }
373 
374 #define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE)
375 
376 int
377 nfs_setattr(struct dentry *dentry, struct iattr *attr)
378 {
379 	struct inode *inode = dentry->d_inode;
380 	struct nfs_fattr fattr;
381 	int error;
382 
383 	nfs_inc_stats(inode, NFSIOS_VFSSETATTR);
384 
385 	/* skip mode change if it's just for clearing setuid/setgid */
386 	if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
387 		attr->ia_valid &= ~ATTR_MODE;
388 
389 	if (attr->ia_valid & ATTR_SIZE) {
390 		if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode))
391 			attr->ia_valid &= ~ATTR_SIZE;
392 	}
393 
394 	/* Optimization: if the end result is no change, don't RPC */
395 	attr->ia_valid &= NFS_VALID_ATTRS;
396 	if ((attr->ia_valid & ~ATTR_FILE) == 0)
397 		return 0;
398 
399 	/* Write all dirty data */
400 	if (S_ISREG(inode->i_mode)) {
401 		filemap_write_and_wait(inode->i_mapping);
402 		nfs_wb_all(inode);
403 	}
404 	/*
405 	 * Return any delegations if we're going to change ACLs
406 	 */
407 	if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0)
408 		nfs_inode_return_delegation(inode);
409 	error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr);
410 	if (error == 0)
411 		nfs_refresh_inode(inode, &fattr);
412 	return error;
413 }
414 
415 /**
416  * nfs_vmtruncate - unmap mappings "freed" by truncate() syscall
417  * @inode: inode of the file used
418  * @offset: file offset to start truncating
419  *
420  * This is a copy of the common vmtruncate, but with the locking
421  * corrected to take into account the fact that NFS requires
422  * inode->i_size to be updated under the inode->i_lock.
423  */
424 static int nfs_vmtruncate(struct inode * inode, loff_t offset)
425 {
426 	if (i_size_read(inode) < offset) {
427 		unsigned long limit;
428 
429 		limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
430 		if (limit != RLIM_INFINITY && offset > limit)
431 			goto out_sig;
432 		if (offset > inode->i_sb->s_maxbytes)
433 			goto out_big;
434 		spin_lock(&inode->i_lock);
435 		i_size_write(inode, offset);
436 		spin_unlock(&inode->i_lock);
437 	} else {
438 		struct address_space *mapping = inode->i_mapping;
439 
440 		/*
441 		 * truncation of in-use swapfiles is disallowed - it would
442 		 * cause subsequent swapout to scribble on the now-freed
443 		 * blocks.
444 		 */
445 		if (IS_SWAPFILE(inode))
446 			return -ETXTBSY;
447 		spin_lock(&inode->i_lock);
448 		i_size_write(inode, offset);
449 		spin_unlock(&inode->i_lock);
450 
451 		/*
452 		 * unmap_mapping_range is called twice, first simply for
453 		 * efficiency so that truncate_inode_pages does fewer
454 		 * single-page unmaps.  However after this first call, and
455 		 * before truncate_inode_pages finishes, it is possible for
456 		 * private pages to be COWed, which remain after
457 		 * truncate_inode_pages finishes, hence the second
458 		 * unmap_mapping_range call must be made for correctness.
459 		 */
460 		unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
461 		truncate_inode_pages(mapping, offset);
462 		unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
463 	}
464 	return 0;
465 out_sig:
466 	send_sig(SIGXFSZ, current, 0);
467 out_big:
468 	return -EFBIG;
469 }
470 
471 /**
472  * nfs_setattr_update_inode - Update inode metadata after a setattr call.
473  * @inode: pointer to struct inode
474  * @attr: pointer to struct iattr
475  *
476  * Note: we do this in the *proc.c in order to ensure that
477  *       it works for things like exclusive creates too.
478  */
479 void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
480 {
481 	if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) {
482 		spin_lock(&inode->i_lock);
483 		if ((attr->ia_valid & ATTR_MODE) != 0) {
484 			int mode = attr->ia_mode & S_IALLUGO;
485 			mode |= inode->i_mode & ~S_IALLUGO;
486 			inode->i_mode = mode;
487 		}
488 		if ((attr->ia_valid & ATTR_UID) != 0)
489 			inode->i_uid = attr->ia_uid;
490 		if ((attr->ia_valid & ATTR_GID) != 0)
491 			inode->i_gid = attr->ia_gid;
492 		NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
493 		spin_unlock(&inode->i_lock);
494 	}
495 	if ((attr->ia_valid & ATTR_SIZE) != 0) {
496 		nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
497 		nfs_vmtruncate(inode, attr->ia_size);
498 	}
499 }
500 
501 int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
502 {
503 	struct inode *inode = dentry->d_inode;
504 	int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
505 	int err;
506 
507 	/*
508 	 * Flush out writes to the server in order to update c/mtime.
509 	 *
510 	 * Hold the i_mutex to suspend application writes temporarily;
511 	 * this prevents long-running writing applications from blocking
512 	 * nfs_wb_nocommit.
513 	 */
514 	if (S_ISREG(inode->i_mode)) {
515 		mutex_lock(&inode->i_mutex);
516 		nfs_wb_nocommit(inode);
517 		mutex_unlock(&inode->i_mutex);
518 	}
519 
520 	/*
521 	 * We may force a getattr if the user cares about atime.
522 	 *
523 	 * Note that we only have to check the vfsmount flags here:
524 	 *  - NFS always sets S_NOATIME by so checking it would give a
525 	 *    bogus result
526 	 *  - NFS never sets MS_NOATIME or MS_NODIRATIME so there is
527 	 *    no point in checking those.
528 	 */
529  	if ((mnt->mnt_flags & MNT_NOATIME) ||
530  	    ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)))
531 		need_atime = 0;
532 
533 	if (need_atime)
534 		err = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
535 	else
536 		err = nfs_revalidate_inode(NFS_SERVER(inode), inode);
537 	if (!err) {
538 		generic_fillattr(inode, stat);
539 		stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
540 	}
541 	return err;
542 }
543 
544 /**
545  * nfs_close_context - Common close_context() routine NFSv2/v3
546  * @ctx: pointer to context
547  * @is_sync: is this a synchronous close
548  *
549  * always ensure that the attributes are up to date if we're mounted
550  * with close-to-open semantics
551  */
552 void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
553 {
554 	struct inode *inode;
555 	struct nfs_server *server;
556 
557 	if (!(ctx->mode & FMODE_WRITE))
558 		return;
559 	if (!is_sync)
560 		return;
561 	inode = ctx->path.dentry->d_inode;
562 	if (!list_empty(&NFS_I(inode)->open_files))
563 		return;
564 	server = NFS_SERVER(inode);
565 	if (server->flags & NFS_MOUNT_NOCTO)
566 		return;
567 	nfs_revalidate_inode(server, inode);
568 }
569 
570 static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, struct dentry *dentry, struct rpc_cred *cred)
571 {
572 	struct nfs_open_context *ctx;
573 
574 	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
575 	if (ctx != NULL) {
576 		ctx->path.dentry = dget(dentry);
577 		ctx->path.mnt = mntget(mnt);
578 		ctx->cred = get_rpccred(cred);
579 		ctx->state = NULL;
580 		ctx->lockowner = current->files;
581 		ctx->flags = 0;
582 		ctx->error = 0;
583 		ctx->dir_cookie = 0;
584 		atomic_set(&ctx->count, 1);
585 	}
586 	return ctx;
587 }
588 
589 struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
590 {
591 	if (ctx != NULL)
592 		atomic_inc(&ctx->count);
593 	return ctx;
594 }
595 
596 static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
597 {
598 	struct inode *inode = ctx->path.dentry->d_inode;
599 
600 	if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock))
601 		return;
602 	list_del(&ctx->list);
603 	spin_unlock(&inode->i_lock);
604 	NFS_PROTO(inode)->close_context(ctx, is_sync);
605 	if (ctx->cred != NULL)
606 		put_rpccred(ctx->cred);
607 	path_put(&ctx->path);
608 	kfree(ctx);
609 }
610 
611 void put_nfs_open_context(struct nfs_open_context *ctx)
612 {
613 	__put_nfs_open_context(ctx, 0);
614 }
615 
616 static void put_nfs_open_context_sync(struct nfs_open_context *ctx)
617 {
618 	__put_nfs_open_context(ctx, 1);
619 }
620 
621 /*
622  * Ensure that mmap has a recent RPC credential for use when writing out
623  * shared pages
624  */
625 static void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
626 {
627 	struct inode *inode = filp->f_path.dentry->d_inode;
628 	struct nfs_inode *nfsi = NFS_I(inode);
629 
630 	filp->private_data = get_nfs_open_context(ctx);
631 	spin_lock(&inode->i_lock);
632 	list_add(&ctx->list, &nfsi->open_files);
633 	spin_unlock(&inode->i_lock);
634 }
635 
636 /*
637  * Given an inode, search for an open context with the desired characteristics
638  */
639 struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, fmode_t mode)
640 {
641 	struct nfs_inode *nfsi = NFS_I(inode);
642 	struct nfs_open_context *pos, *ctx = NULL;
643 
644 	spin_lock(&inode->i_lock);
645 	list_for_each_entry(pos, &nfsi->open_files, list) {
646 		if (cred != NULL && pos->cred != cred)
647 			continue;
648 		if ((pos->mode & mode) == mode) {
649 			ctx = get_nfs_open_context(pos);
650 			break;
651 		}
652 	}
653 	spin_unlock(&inode->i_lock);
654 	return ctx;
655 }
656 
657 static void nfs_file_clear_open_context(struct file *filp)
658 {
659 	struct inode *inode = filp->f_path.dentry->d_inode;
660 	struct nfs_open_context *ctx = nfs_file_open_context(filp);
661 
662 	if (ctx) {
663 		filp->private_data = NULL;
664 		spin_lock(&inode->i_lock);
665 		list_move_tail(&ctx->list, &NFS_I(inode)->open_files);
666 		spin_unlock(&inode->i_lock);
667 		put_nfs_open_context_sync(ctx);
668 	}
669 }
670 
671 /*
672  * These allocate and release file read/write context information.
673  */
674 int nfs_open(struct inode *inode, struct file *filp)
675 {
676 	struct nfs_open_context *ctx;
677 	struct rpc_cred *cred;
678 
679 	cred = rpc_lookup_cred();
680 	if (IS_ERR(cred))
681 		return PTR_ERR(cred);
682 	ctx = alloc_nfs_open_context(filp->f_path.mnt, filp->f_path.dentry, cred);
683 	put_rpccred(cred);
684 	if (ctx == NULL)
685 		return -ENOMEM;
686 	ctx->mode = filp->f_mode;
687 	nfs_file_set_open_context(filp, ctx);
688 	put_nfs_open_context(ctx);
689 	return 0;
690 }
691 
692 int nfs_release(struct inode *inode, struct file *filp)
693 {
694 	nfs_file_clear_open_context(filp);
695 	return 0;
696 }
697 
698 /*
699  * This function is called whenever some part of NFS notices that
700  * the cached attributes have to be refreshed.
701  */
702 int
703 __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
704 {
705 	int		 status = -ESTALE;
706 	struct nfs_fattr fattr;
707 	struct nfs_inode *nfsi = NFS_I(inode);
708 
709 	dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n",
710 		inode->i_sb->s_id, (long long)NFS_FILEID(inode));
711 
712 	if (is_bad_inode(inode))
713 		goto out;
714 	if (NFS_STALE(inode))
715 		goto out;
716 
717 	nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
718 	status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr);
719 	if (status != 0) {
720 		dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n",
721 			 inode->i_sb->s_id,
722 			 (long long)NFS_FILEID(inode), status);
723 		if (status == -ESTALE) {
724 			nfs_zap_caches(inode);
725 			if (!S_ISDIR(inode->i_mode))
726 				set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
727 		}
728 		goto out;
729 	}
730 
731 	status = nfs_refresh_inode(inode, &fattr);
732 	if (status) {
733 		dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n",
734 			 inode->i_sb->s_id,
735 			 (long long)NFS_FILEID(inode), status);
736 		goto out;
737 	}
738 
739 	if (nfsi->cache_validity & NFS_INO_INVALID_ACL)
740 		nfs_zap_acl_cache(inode);
741 
742 	dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n",
743 		inode->i_sb->s_id,
744 		(long long)NFS_FILEID(inode));
745 
746  out:
747 	return status;
748 }
749 
750 int nfs_attribute_timeout(struct inode *inode)
751 {
752 	struct nfs_inode *nfsi = NFS_I(inode);
753 
754 	if (nfs_have_delegation(inode, FMODE_READ))
755 		return 0;
756 	return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo);
757 }
758 
759 /**
760  * nfs_revalidate_inode - Revalidate the inode attributes
761  * @server - pointer to nfs_server struct
762  * @inode - pointer to inode struct
763  *
764  * Updates inode attribute information by retrieving the data from the server.
765  */
766 int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
767 {
768 	if (!(NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATTR)
769 			&& !nfs_attribute_timeout(inode))
770 		return NFS_STALE(inode) ? -ESTALE : 0;
771 	return __nfs_revalidate_inode(server, inode);
772 }
773 
774 static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_space *mapping)
775 {
776 	struct nfs_inode *nfsi = NFS_I(inode);
777 
778 	if (mapping->nrpages != 0) {
779 		int ret = invalidate_inode_pages2(mapping);
780 		if (ret < 0)
781 			return ret;
782 	}
783 	spin_lock(&inode->i_lock);
784 	nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
785 	if (S_ISDIR(inode->i_mode))
786 		memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
787 	spin_unlock(&inode->i_lock);
788 	nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
789 	dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
790 			inode->i_sb->s_id, (long long)NFS_FILEID(inode));
791 	return 0;
792 }
793 
794 static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
795 {
796 	int ret = 0;
797 
798 	mutex_lock(&inode->i_mutex);
799 	if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_DATA) {
800 		ret = nfs_sync_mapping(mapping);
801 		if (ret == 0)
802 			ret = nfs_invalidate_mapping_nolock(inode, mapping);
803 	}
804 	mutex_unlock(&inode->i_mutex);
805 	return ret;
806 }
807 
808 /**
809  * nfs_revalidate_mapping_nolock - Revalidate the pagecache
810  * @inode - pointer to host inode
811  * @mapping - pointer to mapping
812  */
813 int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping)
814 {
815 	struct nfs_inode *nfsi = NFS_I(inode);
816 	int ret = 0;
817 
818 	if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
819 			|| nfs_attribute_timeout(inode) || NFS_STALE(inode)) {
820 		ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
821 		if (ret < 0)
822 			goto out;
823 	}
824 	if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
825 		ret = nfs_invalidate_mapping_nolock(inode, mapping);
826 out:
827 	return ret;
828 }
829 
830 /**
831  * nfs_revalidate_mapping - Revalidate the pagecache
832  * @inode - pointer to host inode
833  * @mapping - pointer to mapping
834  *
835  * This version of the function will take the inode->i_mutex and attempt to
836  * flush out all dirty data if it needs to invalidate the page cache.
837  */
838 int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
839 {
840 	struct nfs_inode *nfsi = NFS_I(inode);
841 	int ret = 0;
842 
843 	if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
844 			|| nfs_attribute_timeout(inode) || NFS_STALE(inode)) {
845 		ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
846 		if (ret < 0)
847 			goto out;
848 	}
849 	if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
850 		ret = nfs_invalidate_mapping(inode, mapping);
851 out:
852 	return ret;
853 }
854 
855 static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
856 {
857 	struct nfs_inode *nfsi = NFS_I(inode);
858 
859 	if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE)
860 			&& (fattr->valid & NFS_ATTR_FATTR_CHANGE)
861 			&& nfsi->change_attr == fattr->pre_change_attr) {
862 		nfsi->change_attr = fattr->change_attr;
863 		if (S_ISDIR(inode->i_mode))
864 			nfsi->cache_validity |= NFS_INO_INVALID_DATA;
865 	}
866 	/* If we have atomic WCC data, we may update some attributes */
867 	if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME)
868 			&& (fattr->valid & NFS_ATTR_FATTR_CTIME)
869 			&& timespec_equal(&inode->i_ctime, &fattr->pre_ctime))
870 			memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
871 
872 	if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME)
873 			&& (fattr->valid & NFS_ATTR_FATTR_MTIME)
874 			&& timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) {
875 			memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
876 			if (S_ISDIR(inode->i_mode))
877 				nfsi->cache_validity |= NFS_INO_INVALID_DATA;
878 	}
879 	if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE)
880 			&& (fattr->valid & NFS_ATTR_FATTR_SIZE)
881 			&& i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size)
882 			&& nfsi->npages == 0)
883 			i_size_write(inode, nfs_size_to_loff_t(fattr->size));
884 }
885 
886 /**
887  * nfs_check_inode_attributes - verify consistency of the inode attribute cache
888  * @inode - pointer to inode
889  * @fattr - updated attributes
890  *
891  * Verifies the attribute cache. If we have just changed the attributes,
892  * so that fattr carries weak cache consistency data, then it may
893  * also update the ctime/mtime/change_attribute.
894  */
895 static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fattr)
896 {
897 	struct nfs_inode *nfsi = NFS_I(inode);
898 	loff_t cur_size, new_isize;
899 	unsigned long invalid = 0;
900 
901 
902 	/* Has the inode gone and changed behind our back? */
903 	if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
904 		return -EIO;
905 	if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
906 		return -EIO;
907 
908 	if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 &&
909 			nfsi->change_attr != fattr->change_attr)
910 		invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
911 
912 	/* Verify a few of the more important attributes */
913 	if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime))
914 		invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
915 
916 	if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
917 		cur_size = i_size_read(inode);
918 		new_isize = nfs_size_to_loff_t(fattr->size);
919 		if (cur_size != new_isize && nfsi->npages == 0)
920 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
921 	}
922 
923 	/* Have any file permissions changed? */
924 	if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO))
925 		invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
926 	if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && inode->i_uid != fattr->uid)
927 		invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
928 	if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && inode->i_gid != fattr->gid)
929 		invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
930 
931 	/* Has the link count changed? */
932 	if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink)
933 		invalid |= NFS_INO_INVALID_ATTR;
934 
935 	if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec_equal(&inode->i_atime, &fattr->atime))
936 		invalid |= NFS_INO_INVALID_ATIME;
937 
938 	if (invalid != 0)
939 		nfsi->cache_validity |= invalid;
940 
941 	nfsi->read_cache_jiffies = fattr->time_start;
942 	return 0;
943 }
944 
945 static int nfs_ctime_need_update(const struct inode *inode, const struct nfs_fattr *fattr)
946 {
947 	if (!(fattr->valid & NFS_ATTR_FATTR_CTIME))
948 		return 0;
949 	return timespec_compare(&fattr->ctime, &inode->i_ctime) > 0;
950 }
951 
952 static int nfs_size_need_update(const struct inode *inode, const struct nfs_fattr *fattr)
953 {
954 	if (!(fattr->valid & NFS_ATTR_FATTR_SIZE))
955 		return 0;
956 	return nfs_size_to_loff_t(fattr->size) > i_size_read(inode);
957 }
958 
959 static atomic_long_t nfs_attr_generation_counter;
960 
961 static unsigned long nfs_read_attr_generation_counter(void)
962 {
963 	return atomic_long_read(&nfs_attr_generation_counter);
964 }
965 
966 unsigned long nfs_inc_attr_generation_counter(void)
967 {
968 	return atomic_long_inc_return(&nfs_attr_generation_counter);
969 }
970 
971 void nfs_fattr_init(struct nfs_fattr *fattr)
972 {
973 	fattr->valid = 0;
974 	fattr->time_start = jiffies;
975 	fattr->gencount = nfs_inc_attr_generation_counter();
976 }
977 
978 /**
979  * nfs_inode_attrs_need_update - check if the inode attributes need updating
980  * @inode - pointer to inode
981  * @fattr - attributes
982  *
983  * Attempt to divine whether or not an RPC call reply carrying stale
984  * attributes got scheduled after another call carrying updated ones.
985  *
986  * To do so, the function first assumes that a more recent ctime means
987  * that the attributes in fattr are newer, however it also attempt to
988  * catch the case where ctime either didn't change, or went backwards
989  * (if someone reset the clock on the server) by looking at whether
990  * or not this RPC call was started after the inode was last updated.
991  * Note also the check for wraparound of 'attr_gencount'
992  *
993  * The function returns 'true' if it thinks the attributes in 'fattr' are
994  * more recent than the ones cached in the inode.
995  *
996  */
997 static int nfs_inode_attrs_need_update(const struct inode *inode, const struct nfs_fattr *fattr)
998 {
999 	const struct nfs_inode *nfsi = NFS_I(inode);
1000 
1001 	return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 ||
1002 		nfs_ctime_need_update(inode, fattr) ||
1003 		nfs_size_need_update(inode, fattr) ||
1004 		((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0);
1005 }
1006 
1007 static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr)
1008 {
1009 	if (nfs_inode_attrs_need_update(inode, fattr))
1010 		return nfs_update_inode(inode, fattr);
1011 	return nfs_check_inode_attributes(inode, fattr);
1012 }
1013 
1014 /**
1015  * nfs_refresh_inode - try to update the inode attribute cache
1016  * @inode - pointer to inode
1017  * @fattr - updated attributes
1018  *
1019  * Check that an RPC call that returned attributes has not overlapped with
1020  * other recent updates of the inode metadata, then decide whether it is
1021  * safe to do a full update of the inode attributes, or whether just to
1022  * call nfs_check_inode_attributes.
1023  */
1024 int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
1025 {
1026 	int status;
1027 
1028 	if ((fattr->valid & NFS_ATTR_FATTR) == 0)
1029 		return 0;
1030 	spin_lock(&inode->i_lock);
1031 	status = nfs_refresh_inode_locked(inode, fattr);
1032 	spin_unlock(&inode->i_lock);
1033 	return status;
1034 }
1035 
1036 static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr)
1037 {
1038 	struct nfs_inode *nfsi = NFS_I(inode);
1039 
1040 	nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
1041 	if (S_ISDIR(inode->i_mode))
1042 		nfsi->cache_validity |= NFS_INO_INVALID_DATA;
1043 	if ((fattr->valid & NFS_ATTR_FATTR) == 0)
1044 		return 0;
1045 	return nfs_refresh_inode_locked(inode, fattr);
1046 }
1047 
1048 /**
1049  * nfs_post_op_update_inode - try to update the inode attribute cache
1050  * @inode - pointer to inode
1051  * @fattr - updated attributes
1052  *
1053  * After an operation that has changed the inode metadata, mark the
1054  * attribute cache as being invalid, then try to update it.
1055  *
1056  * NB: if the server didn't return any post op attributes, this
1057  * function will force the retrieval of attributes before the next
1058  * NFS request.  Thus it should be used only for operations that
1059  * are expected to change one or more attributes, to avoid
1060  * unnecessary NFS requests and trips through nfs_update_inode().
1061  */
1062 int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1063 {
1064 	int status;
1065 
1066 	spin_lock(&inode->i_lock);
1067 	status = nfs_post_op_update_inode_locked(inode, fattr);
1068 	spin_unlock(&inode->i_lock);
1069 	return status;
1070 }
1071 
1072 /**
1073  * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache
1074  * @inode - pointer to inode
1075  * @fattr - updated attributes
1076  *
1077  * After an operation that has changed the inode metadata, mark the
1078  * attribute cache as being invalid, then try to update it. Fake up
1079  * weak cache consistency data, if none exist.
1080  *
1081  * This function is mainly designed to be used by the ->write_done() functions.
1082  */
1083 int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr)
1084 {
1085 	int status;
1086 
1087 	spin_lock(&inode->i_lock);
1088 	/* Don't do a WCC update if these attributes are already stale */
1089 	if ((fattr->valid & NFS_ATTR_FATTR) == 0 ||
1090 			!nfs_inode_attrs_need_update(inode, fattr)) {
1091 		fattr->valid &= ~(NFS_ATTR_FATTR_PRECHANGE
1092 				| NFS_ATTR_FATTR_PRESIZE
1093 				| NFS_ATTR_FATTR_PREMTIME
1094 				| NFS_ATTR_FATTR_PRECTIME);
1095 		goto out_noforce;
1096 	}
1097 	if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 &&
1098 			(fattr->valid & NFS_ATTR_FATTR_PRECHANGE) == 0) {
1099 		fattr->pre_change_attr = NFS_I(inode)->change_attr;
1100 		fattr->valid |= NFS_ATTR_FATTR_PRECHANGE;
1101 	}
1102 	if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 &&
1103 			(fattr->valid & NFS_ATTR_FATTR_PRECTIME) == 0) {
1104 		memcpy(&fattr->pre_ctime, &inode->i_ctime, sizeof(fattr->pre_ctime));
1105 		fattr->valid |= NFS_ATTR_FATTR_PRECTIME;
1106 	}
1107 	if ((fattr->valid & NFS_ATTR_FATTR_MTIME) != 0 &&
1108 			(fattr->valid & NFS_ATTR_FATTR_PREMTIME) == 0) {
1109 		memcpy(&fattr->pre_mtime, &inode->i_mtime, sizeof(fattr->pre_mtime));
1110 		fattr->valid |= NFS_ATTR_FATTR_PREMTIME;
1111 	}
1112 	if ((fattr->valid & NFS_ATTR_FATTR_SIZE) != 0 &&
1113 			(fattr->valid & NFS_ATTR_FATTR_PRESIZE) == 0) {
1114 		fattr->pre_size = i_size_read(inode);
1115 		fattr->valid |= NFS_ATTR_FATTR_PRESIZE;
1116 	}
1117 out_noforce:
1118 	status = nfs_post_op_update_inode_locked(inode, fattr);
1119 	spin_unlock(&inode->i_lock);
1120 	return status;
1121 }
1122 
1123 /*
1124  * Many nfs protocol calls return the new file attributes after
1125  * an operation.  Here we update the inode to reflect the state
1126  * of the server's inode.
1127  *
1128  * This is a bit tricky because we have to make sure all dirty pages
1129  * have been sent off to the server before calling invalidate_inode_pages.
1130  * To make sure no other process adds more write requests while we try
1131  * our best to flush them, we make them sleep during the attribute refresh.
1132  *
1133  * A very similar scenario holds for the dir cache.
1134  */
1135 static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1136 {
1137 	struct nfs_server *server;
1138 	struct nfs_inode *nfsi = NFS_I(inode);
1139 	loff_t cur_isize, new_isize;
1140 	unsigned long invalid = 0;
1141 	unsigned long now = jiffies;
1142 
1143 	dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
1144 			__func__, inode->i_sb->s_id, inode->i_ino,
1145 			atomic_read(&inode->i_count), fattr->valid);
1146 
1147 	if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
1148 		goto out_fileid;
1149 
1150 	/*
1151 	 * Make sure the inode's type hasn't changed.
1152 	 */
1153 	if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
1154 		goto out_changed;
1155 
1156 	server = NFS_SERVER(inode);
1157 	/* Update the fsid? */
1158 	if (S_ISDIR(inode->i_mode) && (fattr->valid & NFS_ATTR_FATTR_FSID) &&
1159 			!nfs_fsid_equal(&server->fsid, &fattr->fsid) &&
1160 			!test_bit(NFS_INO_MOUNTPOINT, &nfsi->flags))
1161 		server->fsid = fattr->fsid;
1162 
1163 	/*
1164 	 * Update the read time so we don't revalidate too often.
1165 	 */
1166 	nfsi->read_cache_jiffies = fattr->time_start;
1167 
1168 	if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) || (fattr->valid & (NFS_ATTR_FATTR_MTIME|NFS_ATTR_FATTR_CTIME)))
1169 	    nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR
1170 		    | NFS_INO_INVALID_ATIME
1171 		    | NFS_INO_REVAL_PAGECACHE);
1172 
1173 	/* Do atomic weak cache consistency updates */
1174 	nfs_wcc_update_inode(inode, fattr);
1175 
1176 	/* More cache consistency checks */
1177 	if (fattr->valid & NFS_ATTR_FATTR_CHANGE) {
1178 		if (nfsi->change_attr != fattr->change_attr) {
1179 			dprintk("NFS: change_attr change on server for file %s/%ld\n",
1180 					inode->i_sb->s_id, inode->i_ino);
1181 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1182 			if (S_ISDIR(inode->i_mode))
1183 				nfs_force_lookup_revalidate(inode);
1184 			nfsi->change_attr = fattr->change_attr;
1185 		}
1186 	}
1187 
1188 	if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
1189 		/* NFSv2/v3: Check if the mtime agrees */
1190 		if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
1191 			dprintk("NFS: mtime change on server for file %s/%ld\n",
1192 					inode->i_sb->s_id, inode->i_ino);
1193 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
1194 			if (S_ISDIR(inode->i_mode))
1195 				nfs_force_lookup_revalidate(inode);
1196 			memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
1197 		}
1198 	}
1199 	if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
1200 		/* If ctime has changed we should definitely clear access+acl caches */
1201 		if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
1202 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1203 			/* and probably clear data for a directory too as utimes can cause
1204 			 * havoc with our cache.
1205 			 */
1206 			if (S_ISDIR(inode->i_mode)) {
1207 				invalid |= NFS_INO_INVALID_DATA;
1208 				nfs_force_lookup_revalidate(inode);
1209 			}
1210 			memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
1211 		}
1212 	}
1213 
1214 	/* Check if our cached file size is stale */
1215 	if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
1216 		new_isize = nfs_size_to_loff_t(fattr->size);
1217 		cur_isize = i_size_read(inode);
1218 		if (new_isize != cur_isize) {
1219 			/* Do we perhaps have any outstanding writes, or has
1220 			 * the file grown beyond our last write? */
1221 			if (nfsi->npages == 0 || new_isize > cur_isize) {
1222 				i_size_write(inode, new_isize);
1223 				invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
1224 			}
1225 			dprintk("NFS: isize change on server for file %s/%ld\n",
1226 					inode->i_sb->s_id, inode->i_ino);
1227 		}
1228 	}
1229 
1230 
1231 	if (fattr->valid & NFS_ATTR_FATTR_ATIME)
1232 		memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
1233 
1234 	if (fattr->valid & NFS_ATTR_FATTR_MODE) {
1235 		if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) {
1236 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1237 			inode->i_mode = fattr->mode;
1238 		}
1239 	}
1240 	if (fattr->valid & NFS_ATTR_FATTR_OWNER) {
1241 		if (inode->i_uid != fattr->uid) {
1242 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1243 			inode->i_uid = fattr->uid;
1244 		}
1245 	}
1246 	if (fattr->valid & NFS_ATTR_FATTR_GROUP) {
1247 		if (inode->i_gid != fattr->gid) {
1248 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1249 			inode->i_gid = fattr->gid;
1250 		}
1251 	}
1252 
1253 	if (fattr->valid & NFS_ATTR_FATTR_NLINK) {
1254 		if (inode->i_nlink != fattr->nlink) {
1255 			invalid |= NFS_INO_INVALID_ATTR;
1256 			if (S_ISDIR(inode->i_mode))
1257 				invalid |= NFS_INO_INVALID_DATA;
1258 			inode->i_nlink = fattr->nlink;
1259 		}
1260 	}
1261 
1262 	if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
1263 		/*
1264 		 * report the blocks in 512byte units
1265 		 */
1266 		inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
1267  	}
1268 	if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
1269 		inode->i_blocks = fattr->du.nfs2.blocks;
1270 
1271 	/* Update attrtimeo value if we're out of the unstable period */
1272 	if (invalid & NFS_INO_INVALID_ATTR) {
1273 		nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
1274 		nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
1275 		nfsi->attrtimeo_timestamp = now;
1276 		nfsi->attr_gencount = nfs_inc_attr_generation_counter();
1277 	} else {
1278 		if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) {
1279 			if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode))
1280 				nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode);
1281 			nfsi->attrtimeo_timestamp = now;
1282 		}
1283 	}
1284 	invalid &= ~NFS_INO_INVALID_ATTR;
1285 	/* Don't invalidate the data if we were to blame */
1286 	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
1287 				|| S_ISLNK(inode->i_mode)))
1288 		invalid &= ~NFS_INO_INVALID_DATA;
1289 	if (!nfs_have_delegation(inode, FMODE_READ) ||
1290 			(nfsi->cache_validity & NFS_INO_REVAL_FORCED))
1291 		nfsi->cache_validity |= invalid;
1292 	nfsi->cache_validity &= ~NFS_INO_REVAL_FORCED;
1293 
1294 	return 0;
1295  out_changed:
1296 	/*
1297 	 * Big trouble! The inode has become a different object.
1298 	 */
1299 	printk(KERN_DEBUG "%s: inode %ld mode changed, %07o to %07o\n",
1300 			__func__, inode->i_ino, inode->i_mode, fattr->mode);
1301  out_err:
1302 	/*
1303 	 * No need to worry about unhashing the dentry, as the
1304 	 * lookup validation will know that the inode is bad.
1305 	 * (But we fall through to invalidate the caches.)
1306 	 */
1307 	nfs_invalidate_inode(inode);
1308 	return -ESTALE;
1309 
1310  out_fileid:
1311 	printk(KERN_ERR "NFS: server %s error: fileid changed\n"
1312 		"fsid %s: expected fileid 0x%Lx, got 0x%Lx\n",
1313 		NFS_SERVER(inode)->nfs_client->cl_hostname, inode->i_sb->s_id,
1314 		(long long)nfsi->fileid, (long long)fattr->fileid);
1315 	goto out_err;
1316 }
1317 
1318 
1319 #ifdef CONFIG_NFS_V4
1320 
1321 /*
1322  * Clean out any remaining NFSv4 state that might be left over due
1323  * to open() calls that passed nfs_atomic_lookup, but failed to call
1324  * nfs_open().
1325  */
1326 void nfs4_clear_inode(struct inode *inode)
1327 {
1328 	/* If we are holding a delegation, return it! */
1329 	nfs_inode_return_delegation_noreclaim(inode);
1330 	/* First call standard NFS clear_inode() code */
1331 	nfs_clear_inode(inode);
1332 }
1333 #endif
1334 
1335 struct inode *nfs_alloc_inode(struct super_block *sb)
1336 {
1337 	struct nfs_inode *nfsi;
1338 	nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, GFP_KERNEL);
1339 	if (!nfsi)
1340 		return NULL;
1341 	nfsi->flags = 0UL;
1342 	nfsi->cache_validity = 0UL;
1343 #ifdef CONFIG_NFS_V3_ACL
1344 	nfsi->acl_access = ERR_PTR(-EAGAIN);
1345 	nfsi->acl_default = ERR_PTR(-EAGAIN);
1346 #endif
1347 #ifdef CONFIG_NFS_V4
1348 	nfsi->nfs4_acl = NULL;
1349 #endif /* CONFIG_NFS_V4 */
1350 	return &nfsi->vfs_inode;
1351 }
1352 
1353 void nfs_destroy_inode(struct inode *inode)
1354 {
1355 	kmem_cache_free(nfs_inode_cachep, NFS_I(inode));
1356 }
1357 
1358 static inline void nfs4_init_once(struct nfs_inode *nfsi)
1359 {
1360 #ifdef CONFIG_NFS_V4
1361 	INIT_LIST_HEAD(&nfsi->open_states);
1362 	nfsi->delegation = NULL;
1363 	nfsi->delegation_state = 0;
1364 	init_rwsem(&nfsi->rwsem);
1365 #endif
1366 }
1367 
1368 static void init_once(void *foo)
1369 {
1370 	struct nfs_inode *nfsi = (struct nfs_inode *) foo;
1371 
1372 	inode_init_once(&nfsi->vfs_inode);
1373 	INIT_LIST_HEAD(&nfsi->open_files);
1374 	INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
1375 	INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
1376 	INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
1377 	nfsi->npages = 0;
1378 	atomic_set(&nfsi->silly_count, 1);
1379 	INIT_HLIST_HEAD(&nfsi->silly_list);
1380 	init_waitqueue_head(&nfsi->waitqueue);
1381 	nfs4_init_once(nfsi);
1382 }
1383 
1384 static int __init nfs_init_inodecache(void)
1385 {
1386 	nfs_inode_cachep = kmem_cache_create("nfs_inode_cache",
1387 					     sizeof(struct nfs_inode),
1388 					     0, (SLAB_RECLAIM_ACCOUNT|
1389 						SLAB_MEM_SPREAD),
1390 					     init_once);
1391 	if (nfs_inode_cachep == NULL)
1392 		return -ENOMEM;
1393 
1394 	return 0;
1395 }
1396 
1397 static void nfs_destroy_inodecache(void)
1398 {
1399 	kmem_cache_destroy(nfs_inode_cachep);
1400 }
1401 
1402 struct workqueue_struct *nfsiod_workqueue;
1403 
1404 /*
1405  * start up the nfsiod workqueue
1406  */
1407 static int nfsiod_start(void)
1408 {
1409 	struct workqueue_struct *wq;
1410 	dprintk("RPC:       creating workqueue nfsiod\n");
1411 	wq = create_singlethread_workqueue("nfsiod");
1412 	if (wq == NULL)
1413 		return -ENOMEM;
1414 	nfsiod_workqueue = wq;
1415 	return 0;
1416 }
1417 
1418 /*
1419  * Destroy the nfsiod workqueue
1420  */
1421 static void nfsiod_stop(void)
1422 {
1423 	struct workqueue_struct *wq;
1424 
1425 	wq = nfsiod_workqueue;
1426 	if (wq == NULL)
1427 		return;
1428 	nfsiod_workqueue = NULL;
1429 	destroy_workqueue(wq);
1430 }
1431 
1432 /*
1433  * Initialize NFS
1434  */
1435 static int __init init_nfs_fs(void)
1436 {
1437 	int err;
1438 
1439 	err = nfsiod_start();
1440 	if (err)
1441 		goto out6;
1442 
1443 	err = nfs_fs_proc_init();
1444 	if (err)
1445 		goto out5;
1446 
1447 	err = nfs_init_nfspagecache();
1448 	if (err)
1449 		goto out4;
1450 
1451 	err = nfs_init_inodecache();
1452 	if (err)
1453 		goto out3;
1454 
1455 	err = nfs_init_readpagecache();
1456 	if (err)
1457 		goto out2;
1458 
1459 	err = nfs_init_writepagecache();
1460 	if (err)
1461 		goto out1;
1462 
1463 	err = nfs_init_directcache();
1464 	if (err)
1465 		goto out0;
1466 
1467 #ifdef CONFIG_PROC_FS
1468 	rpc_proc_register(&nfs_rpcstat);
1469 #endif
1470 	if ((err = register_nfs_fs()) != 0)
1471 		goto out;
1472 	return 0;
1473 out:
1474 #ifdef CONFIG_PROC_FS
1475 	rpc_proc_unregister("nfs");
1476 #endif
1477 	nfs_destroy_directcache();
1478 out0:
1479 	nfs_destroy_writepagecache();
1480 out1:
1481 	nfs_destroy_readpagecache();
1482 out2:
1483 	nfs_destroy_inodecache();
1484 out3:
1485 	nfs_destroy_nfspagecache();
1486 out4:
1487 	nfs_fs_proc_exit();
1488 out5:
1489 	nfsiod_stop();
1490 out6:
1491 	return err;
1492 }
1493 
1494 static void __exit exit_nfs_fs(void)
1495 {
1496 	nfs_destroy_directcache();
1497 	nfs_destroy_writepagecache();
1498 	nfs_destroy_readpagecache();
1499 	nfs_destroy_inodecache();
1500 	nfs_destroy_nfspagecache();
1501 #ifdef CONFIG_PROC_FS
1502 	rpc_proc_unregister("nfs");
1503 #endif
1504 	unregister_nfs_fs();
1505 	nfs_fs_proc_exit();
1506 	nfsiod_stop();
1507 }
1508 
1509 /* Not quite true; I just maintain it */
1510 MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
1511 MODULE_LICENSE("GPL");
1512 module_param(enable_ino64, bool, 0644);
1513 
1514 module_init(init_nfs_fs)
1515 module_exit(exit_nfs_fs)
1516