xref: /openbmc/linux/fs/nfs/dir.c (revision b593c09f)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *  linux/fs/nfs/dir.c
4  *
5  *  Copyright (C) 1992  Rick Sladkey
6  *
7  *  nfs directory handling functions
8  *
9  * 10 Apr 1996	Added silly rename for unlink	--okir
10  * 28 Sep 1996	Improved directory cache --okir
11  * 23 Aug 1997  Claus Heine claus@momo.math.rwth-aachen.de
12  *              Re-implemented silly rename for unlink, newly implemented
13  *              silly rename for nfs_rename() following the suggestions
14  *              of Olaf Kirch (okir) found in this file.
15  *              Following Linus comments on my original hack, this version
16  *              depends only on the dcache stuff and doesn't touch the inode
17  *              layer (iput() and friends).
18  *  6 Jun 1999	Cache readdir lookups in the page cache. -DaveM
19  */
20 
21 #include <linux/module.h>
22 #include <linux/time.h>
23 #include <linux/errno.h>
24 #include <linux/stat.h>
25 #include <linux/fcntl.h>
26 #include <linux/string.h>
27 #include <linux/kernel.h>
28 #include <linux/slab.h>
29 #include <linux/mm.h>
30 #include <linux/sunrpc/clnt.h>
31 #include <linux/nfs_fs.h>
32 #include <linux/nfs_mount.h>
33 #include <linux/pagemap.h>
34 #include <linux/pagevec.h>
35 #include <linux/namei.h>
36 #include <linux/mount.h>
37 #include <linux/swap.h>
38 #include <linux/sched.h>
39 #include <linux/kmemleak.h>
40 #include <linux/xattr.h>
41 
42 #include "delegation.h"
43 #include "iostat.h"
44 #include "internal.h"
45 #include "fscache.h"
46 
47 #include "nfstrace.h"
48 
49 /* #define NFS_DEBUG_VERBOSE 1 */
50 
51 static int nfs_opendir(struct inode *, struct file *);
52 static int nfs_closedir(struct inode *, struct file *);
53 static int nfs_readdir(struct file *, struct dir_context *);
54 static int nfs_fsync_dir(struct file *, loff_t, loff_t, int);
55 static loff_t nfs_llseek_dir(struct file *, loff_t, int);
56 static void nfs_readdir_clear_array(struct page*);
57 
58 const struct file_operations nfs_dir_operations = {
59 	.llseek		= nfs_llseek_dir,
60 	.read		= generic_read_dir,
61 	.iterate_shared	= nfs_readdir,
62 	.open		= nfs_opendir,
63 	.release	= nfs_closedir,
64 	.fsync		= nfs_fsync_dir,
65 };
66 
67 const struct address_space_operations nfs_dir_aops = {
68 	.freepage = nfs_readdir_clear_array,
69 };
70 
71 static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir)
72 {
73 	struct nfs_inode *nfsi = NFS_I(dir);
74 	struct nfs_open_dir_context *ctx;
75 	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
76 	if (ctx != NULL) {
77 		ctx->duped = 0;
78 		ctx->attr_gencount = nfsi->attr_gencount;
79 		ctx->dir_cookie = 0;
80 		ctx->dup_cookie = 0;
81 		spin_lock(&dir->i_lock);
82 		if (list_empty(&nfsi->open_files) &&
83 		    (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER))
84 			nfsi->cache_validity |= NFS_INO_INVALID_DATA |
85 				NFS_INO_REVAL_FORCED;
86 		list_add(&ctx->list, &nfsi->open_files);
87 		spin_unlock(&dir->i_lock);
88 		return ctx;
89 	}
90 	return  ERR_PTR(-ENOMEM);
91 }
92 
93 static void put_nfs_open_dir_context(struct inode *dir, struct nfs_open_dir_context *ctx)
94 {
95 	spin_lock(&dir->i_lock);
96 	list_del(&ctx->list);
97 	spin_unlock(&dir->i_lock);
98 	kfree(ctx);
99 }
100 
101 /*
102  * Open file
103  */
104 static int
105 nfs_opendir(struct inode *inode, struct file *filp)
106 {
107 	int res = 0;
108 	struct nfs_open_dir_context *ctx;
109 
110 	dfprintk(FILE, "NFS: open dir(%pD2)\n", filp);
111 
112 	nfs_inc_stats(inode, NFSIOS_VFSOPEN);
113 
114 	ctx = alloc_nfs_open_dir_context(inode);
115 	if (IS_ERR(ctx)) {
116 		res = PTR_ERR(ctx);
117 		goto out;
118 	}
119 	filp->private_data = ctx;
120 out:
121 	return res;
122 }
123 
124 static int
125 nfs_closedir(struct inode *inode, struct file *filp)
126 {
127 	put_nfs_open_dir_context(file_inode(filp), filp->private_data);
128 	return 0;
129 }
130 
131 struct nfs_cache_array_entry {
132 	u64 cookie;
133 	u64 ino;
134 	const char *name;
135 	unsigned int name_len;
136 	unsigned char d_type;
137 };
138 
139 struct nfs_cache_array {
140 	u64 last_cookie;
141 	unsigned int size;
142 	unsigned char page_full : 1,
143 		      page_is_eof : 1;
144 	struct nfs_cache_array_entry array[];
145 };
146 
147 struct nfs_readdir_descriptor {
148 	struct file	*file;
149 	struct page	*page;
150 	struct dir_context *ctx;
151 	pgoff_t		page_index;
152 	u64		dir_cookie;
153 	u64		last_cookie;
154 	u64		dup_cookie;
155 	loff_t		current_index;
156 	loff_t		prev_index;
157 
158 	__be32		verf[NFS_DIR_VERIFIER_SIZE];
159 	unsigned long	dir_verifier;
160 	unsigned long	timestamp;
161 	unsigned long	gencount;
162 	unsigned long	attr_gencount;
163 	unsigned int	cache_entry_index;
164 	signed char duped;
165 	bool plus;
166 	bool eof;
167 };
168 
169 static void nfs_readdir_array_init(struct nfs_cache_array *array)
170 {
171 	memset(array, 0, sizeof(struct nfs_cache_array));
172 }
173 
174 static void nfs_readdir_page_init_array(struct page *page, u64 last_cookie)
175 {
176 	struct nfs_cache_array *array;
177 
178 	array = kmap_atomic(page);
179 	nfs_readdir_array_init(array);
180 	array->last_cookie = last_cookie;
181 	kunmap_atomic(array);
182 }
183 
184 /*
185  * we are freeing strings created by nfs_add_to_readdir_array()
186  */
187 static
188 void nfs_readdir_clear_array(struct page *page)
189 {
190 	struct nfs_cache_array *array;
191 	int i;
192 
193 	array = kmap_atomic(page);
194 	for (i = 0; i < array->size; i++)
195 		kfree(array->array[i].name);
196 	nfs_readdir_array_init(array);
197 	kunmap_atomic(array);
198 }
199 
200 static void nfs_readdir_array_set_eof(struct nfs_cache_array *array)
201 {
202 	array->page_is_eof = 1;
203 	array->page_full = 1;
204 }
205 
206 static bool nfs_readdir_array_is_full(struct nfs_cache_array *array)
207 {
208 	return array->page_full;
209 }
210 
211 /*
212  * the caller is responsible for freeing qstr.name
213  * when called by nfs_readdir_add_to_array, the strings will be freed in
214  * nfs_clear_readdir_array()
215  */
216 static const char *nfs_readdir_copy_name(const char *name, unsigned int len)
217 {
218 	const char *ret = kmemdup_nul(name, len, GFP_KERNEL);
219 
220 	/*
221 	 * Avoid a kmemleak false positive. The pointer to the name is stored
222 	 * in a page cache page which kmemleak does not scan.
223 	 */
224 	if (ret != NULL)
225 		kmemleak_not_leak(ret);
226 	return ret;
227 }
228 
229 /*
230  * Check that the next array entry lies entirely within the page bounds
231  */
232 static int nfs_readdir_array_can_expand(struct nfs_cache_array *array)
233 {
234 	struct nfs_cache_array_entry *cache_entry;
235 
236 	if (array->page_full)
237 		return -ENOSPC;
238 	cache_entry = &array->array[array->size + 1];
239 	if ((char *)cache_entry - (char *)array > PAGE_SIZE) {
240 		array->page_full = 1;
241 		return -ENOSPC;
242 	}
243 	return 0;
244 }
245 
246 static
247 int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
248 {
249 	struct nfs_cache_array *array;
250 	struct nfs_cache_array_entry *cache_entry;
251 	const char *name;
252 	int ret;
253 
254 	name = nfs_readdir_copy_name(entry->name, entry->len);
255 	if (!name)
256 		return -ENOMEM;
257 
258 	array = kmap_atomic(page);
259 	ret = nfs_readdir_array_can_expand(array);
260 	if (ret) {
261 		kfree(name);
262 		goto out;
263 	}
264 
265 	cache_entry = &array->array[array->size];
266 	cache_entry->cookie = entry->prev_cookie;
267 	cache_entry->ino = entry->ino;
268 	cache_entry->d_type = entry->d_type;
269 	cache_entry->name_len = entry->len;
270 	cache_entry->name = name;
271 	array->last_cookie = entry->cookie;
272 	array->size++;
273 	if (entry->eof != 0)
274 		nfs_readdir_array_set_eof(array);
275 out:
276 	kunmap_atomic(array);
277 	return ret;
278 }
279 
280 static struct page *nfs_readdir_page_get_locked(struct address_space *mapping,
281 						pgoff_t index, u64 last_cookie)
282 {
283 	struct page *page;
284 
285 	page = grab_cache_page(mapping, index);
286 	if (page && !PageUptodate(page)) {
287 		nfs_readdir_page_init_array(page, last_cookie);
288 		if (invalidate_inode_pages2_range(mapping, index + 1, -1) < 0)
289 			nfs_zap_mapping(mapping->host, mapping);
290 		SetPageUptodate(page);
291 	}
292 
293 	return page;
294 }
295 
296 static u64 nfs_readdir_page_last_cookie(struct page *page)
297 {
298 	struct nfs_cache_array *array;
299 	u64 ret;
300 
301 	array = kmap_atomic(page);
302 	ret = array->last_cookie;
303 	kunmap_atomic(array);
304 	return ret;
305 }
306 
307 static bool nfs_readdir_page_needs_filling(struct page *page)
308 {
309 	struct nfs_cache_array *array;
310 	bool ret;
311 
312 	array = kmap_atomic(page);
313 	ret = !nfs_readdir_array_is_full(array);
314 	kunmap_atomic(array);
315 	return ret;
316 }
317 
318 static void nfs_readdir_page_set_eof(struct page *page)
319 {
320 	struct nfs_cache_array *array;
321 
322 	array = kmap_atomic(page);
323 	nfs_readdir_array_set_eof(array);
324 	kunmap_atomic(array);
325 }
326 
327 static void nfs_readdir_page_unlock_and_put(struct page *page)
328 {
329 	unlock_page(page);
330 	put_page(page);
331 }
332 
333 static struct page *nfs_readdir_page_get_next(struct address_space *mapping,
334 					      pgoff_t index, u64 cookie)
335 {
336 	struct page *page;
337 
338 	page = nfs_readdir_page_get_locked(mapping, index, cookie);
339 	if (page) {
340 		if (nfs_readdir_page_last_cookie(page) == cookie)
341 			return page;
342 		nfs_readdir_page_unlock_and_put(page);
343 	}
344 	return NULL;
345 }
346 
347 static inline
348 int is_32bit_api(void)
349 {
350 #ifdef CONFIG_COMPAT
351 	return in_compat_syscall();
352 #else
353 	return (BITS_PER_LONG == 32);
354 #endif
355 }
356 
357 static
358 bool nfs_readdir_use_cookie(const struct file *filp)
359 {
360 	if ((filp->f_mode & FMODE_32BITHASH) ||
361 	    (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
362 		return false;
363 	return true;
364 }
365 
366 static int nfs_readdir_search_for_pos(struct nfs_cache_array *array,
367 				      struct nfs_readdir_descriptor *desc)
368 {
369 	loff_t diff = desc->ctx->pos - desc->current_index;
370 	unsigned int index;
371 
372 	if (diff < 0)
373 		goto out_eof;
374 	if (diff >= array->size) {
375 		if (array->page_is_eof)
376 			goto out_eof;
377 		return -EAGAIN;
378 	}
379 
380 	index = (unsigned int)diff;
381 	desc->dir_cookie = array->array[index].cookie;
382 	desc->cache_entry_index = index;
383 	return 0;
384 out_eof:
385 	desc->eof = true;
386 	return -EBADCOOKIE;
387 }
388 
389 static bool
390 nfs_readdir_inode_mapping_valid(struct nfs_inode *nfsi)
391 {
392 	if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))
393 		return false;
394 	smp_rmb();
395 	return !test_bit(NFS_INO_INVALIDATING, &nfsi->flags);
396 }
397 
398 static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array,
399 					 struct nfs_readdir_descriptor *desc)
400 {
401 	int i;
402 	loff_t new_pos;
403 	int status = -EAGAIN;
404 
405 	for (i = 0; i < array->size; i++) {
406 		if (array->array[i].cookie == desc->dir_cookie) {
407 			struct nfs_inode *nfsi = NFS_I(file_inode(desc->file));
408 
409 			new_pos = desc->current_index + i;
410 			if (desc->attr_gencount != nfsi->attr_gencount ||
411 			    !nfs_readdir_inode_mapping_valid(nfsi)) {
412 				desc->duped = 0;
413 				desc->attr_gencount = nfsi->attr_gencount;
414 			} else if (new_pos < desc->prev_index) {
415 				if (desc->duped > 0
416 				    && desc->dup_cookie == desc->dir_cookie) {
417 					if (printk_ratelimit()) {
418 						pr_notice("NFS: directory %pD2 contains a readdir loop."
419 								"Please contact your server vendor.  "
420 								"The file: %s has duplicate cookie %llu\n",
421 								desc->file, array->array[i].name, desc->dir_cookie);
422 					}
423 					status = -ELOOP;
424 					goto out;
425 				}
426 				desc->dup_cookie = desc->dir_cookie;
427 				desc->duped = -1;
428 			}
429 			if (nfs_readdir_use_cookie(desc->file))
430 				desc->ctx->pos = desc->dir_cookie;
431 			else
432 				desc->ctx->pos = new_pos;
433 			desc->prev_index = new_pos;
434 			desc->cache_entry_index = i;
435 			return 0;
436 		}
437 	}
438 	if (array->page_is_eof) {
439 		status = -EBADCOOKIE;
440 		if (desc->dir_cookie == array->last_cookie)
441 			desc->eof = true;
442 	}
443 out:
444 	return status;
445 }
446 
447 static int nfs_readdir_search_array(struct nfs_readdir_descriptor *desc)
448 {
449 	struct nfs_cache_array *array;
450 	int status;
451 
452 	array = kmap_atomic(desc->page);
453 
454 	if (desc->dir_cookie == 0)
455 		status = nfs_readdir_search_for_pos(array, desc);
456 	else
457 		status = nfs_readdir_search_for_cookie(array, desc);
458 
459 	if (status == -EAGAIN) {
460 		desc->last_cookie = array->last_cookie;
461 		desc->current_index += array->size;
462 		desc->page_index++;
463 	}
464 	kunmap_atomic(array);
465 	return status;
466 }
467 
468 /* Fill a page with xdr information before transferring to the cache page */
469 static int nfs_readdir_xdr_filler(struct nfs_readdir_descriptor *desc,
470 				  __be32 *verf, u64 cookie,
471 				  struct page **pages, size_t bufsize,
472 				  __be32 *verf_res)
473 {
474 	struct inode *inode = file_inode(desc->file);
475 	struct nfs_readdir_arg arg = {
476 		.dentry = file_dentry(desc->file),
477 		.cred = desc->file->f_cred,
478 		.verf = verf,
479 		.cookie = cookie,
480 		.pages = pages,
481 		.page_len = bufsize,
482 		.plus = desc->plus,
483 	};
484 	struct nfs_readdir_res res = {
485 		.verf = verf_res,
486 	};
487 	unsigned long	timestamp, gencount;
488 	int		error;
489 
490  again:
491 	timestamp = jiffies;
492 	gencount = nfs_inc_attr_generation_counter();
493 	desc->dir_verifier = nfs_save_change_attribute(inode);
494 	error = NFS_PROTO(inode)->readdir(&arg, &res);
495 	if (error < 0) {
496 		/* We requested READDIRPLUS, but the server doesn't grok it */
497 		if (error == -ENOTSUPP && desc->plus) {
498 			NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS;
499 			clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
500 			desc->plus = arg.plus = false;
501 			goto again;
502 		}
503 		goto error;
504 	}
505 	desc->timestamp = timestamp;
506 	desc->gencount = gencount;
507 error:
508 	return error;
509 }
510 
511 static int xdr_decode(struct nfs_readdir_descriptor *desc,
512 		      struct nfs_entry *entry, struct xdr_stream *xdr)
513 {
514 	struct inode *inode = file_inode(desc->file);
515 	int error;
516 
517 	error = NFS_PROTO(inode)->decode_dirent(xdr, entry, desc->plus);
518 	if (error)
519 		return error;
520 	entry->fattr->time_start = desc->timestamp;
521 	entry->fattr->gencount = desc->gencount;
522 	return 0;
523 }
524 
525 /* Match file and dirent using either filehandle or fileid
526  * Note: caller is responsible for checking the fsid
527  */
528 static
529 int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
530 {
531 	struct inode *inode;
532 	struct nfs_inode *nfsi;
533 
534 	if (d_really_is_negative(dentry))
535 		return 0;
536 
537 	inode = d_inode(dentry);
538 	if (is_bad_inode(inode) || NFS_STALE(inode))
539 		return 0;
540 
541 	nfsi = NFS_I(inode);
542 	if (entry->fattr->fileid != nfsi->fileid)
543 		return 0;
544 	if (entry->fh->size && nfs_compare_fh(entry->fh, &nfsi->fh) != 0)
545 		return 0;
546 	return 1;
547 }
548 
549 static
550 bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx)
551 {
552 	if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS))
553 		return false;
554 	if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags))
555 		return true;
556 	if (ctx->pos == 0)
557 		return true;
558 	return false;
559 }
560 
561 /*
562  * This function is called by the lookup and getattr code to request the
563  * use of readdirplus to accelerate any future lookups in the same
564  * directory.
565  */
566 void nfs_advise_use_readdirplus(struct inode *dir)
567 {
568 	struct nfs_inode *nfsi = NFS_I(dir);
569 
570 	if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
571 	    !list_empty(&nfsi->open_files))
572 		set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
573 }
574 
575 /*
576  * This function is mainly for use by nfs_getattr().
577  *
578  * If this is an 'ls -l', we want to force use of readdirplus.
579  * Do this by checking if there is an active file descriptor
580  * and calling nfs_advise_use_readdirplus, then forcing a
581  * cache flush.
582  */
583 void nfs_force_use_readdirplus(struct inode *dir)
584 {
585 	struct nfs_inode *nfsi = NFS_I(dir);
586 
587 	if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
588 	    !list_empty(&nfsi->open_files)) {
589 		set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
590 		invalidate_mapping_pages(dir->i_mapping,
591 			nfsi->page_index + 1, -1);
592 	}
593 }
594 
595 static
596 void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry,
597 		unsigned long dir_verifier)
598 {
599 	struct qstr filename = QSTR_INIT(entry->name, entry->len);
600 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
601 	struct dentry *dentry;
602 	struct dentry *alias;
603 	struct inode *inode;
604 	int status;
605 
606 	if (!(entry->fattr->valid & NFS_ATTR_FATTR_FILEID))
607 		return;
608 	if (!(entry->fattr->valid & NFS_ATTR_FATTR_FSID))
609 		return;
610 	if (filename.len == 0)
611 		return;
612 	/* Validate that the name doesn't contain any illegal '\0' */
613 	if (strnlen(filename.name, filename.len) != filename.len)
614 		return;
615 	/* ...or '/' */
616 	if (strnchr(filename.name, filename.len, '/'))
617 		return;
618 	if (filename.name[0] == '.') {
619 		if (filename.len == 1)
620 			return;
621 		if (filename.len == 2 && filename.name[1] == '.')
622 			return;
623 	}
624 	filename.hash = full_name_hash(parent, filename.name, filename.len);
625 
626 	dentry = d_lookup(parent, &filename);
627 again:
628 	if (!dentry) {
629 		dentry = d_alloc_parallel(parent, &filename, &wq);
630 		if (IS_ERR(dentry))
631 			return;
632 	}
633 	if (!d_in_lookup(dentry)) {
634 		/* Is there a mountpoint here? If so, just exit */
635 		if (!nfs_fsid_equal(&NFS_SB(dentry->d_sb)->fsid,
636 					&entry->fattr->fsid))
637 			goto out;
638 		if (nfs_same_file(dentry, entry)) {
639 			if (!entry->fh->size)
640 				goto out;
641 			nfs_set_verifier(dentry, dir_verifier);
642 			status = nfs_refresh_inode(d_inode(dentry), entry->fattr);
643 			if (!status)
644 				nfs_setsecurity(d_inode(dentry), entry->fattr, entry->label);
645 			goto out;
646 		} else {
647 			d_invalidate(dentry);
648 			dput(dentry);
649 			dentry = NULL;
650 			goto again;
651 		}
652 	}
653 	if (!entry->fh->size) {
654 		d_lookup_done(dentry);
655 		goto out;
656 	}
657 
658 	inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr, entry->label);
659 	alias = d_splice_alias(inode, dentry);
660 	d_lookup_done(dentry);
661 	if (alias) {
662 		if (IS_ERR(alias))
663 			goto out;
664 		dput(dentry);
665 		dentry = alias;
666 	}
667 	nfs_set_verifier(dentry, dir_verifier);
668 out:
669 	dput(dentry);
670 }
671 
672 /* Perform conversion from xdr to cache array */
673 static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
674 				   struct nfs_entry *entry,
675 				   struct page **xdr_pages,
676 				   struct page *fillme, unsigned int buflen)
677 {
678 	struct address_space *mapping = desc->file->f_mapping;
679 	struct xdr_stream stream;
680 	struct xdr_buf buf;
681 	struct page *scratch, *new, *page = fillme;
682 	int status;
683 
684 	scratch = alloc_page(GFP_KERNEL);
685 	if (scratch == NULL)
686 		return -ENOMEM;
687 
688 	xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen);
689 	xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
690 
691 	do {
692 		if (entry->label)
693 			entry->label->len = NFS4_MAXLABELLEN;
694 
695 		status = xdr_decode(desc, entry, &stream);
696 		if (status != 0)
697 			break;
698 
699 		if (desc->plus)
700 			nfs_prime_dcache(file_dentry(desc->file), entry,
701 					desc->dir_verifier);
702 
703 		status = nfs_readdir_add_to_array(entry, page);
704 		if (status != -ENOSPC)
705 			continue;
706 
707 		if (page->mapping != mapping)
708 			break;
709 		new = nfs_readdir_page_get_next(mapping, page->index + 1,
710 						entry->prev_cookie);
711 		if (!new)
712 			break;
713 		if (page != fillme)
714 			nfs_readdir_page_unlock_and_put(page);
715 		page = new;
716 		status = nfs_readdir_add_to_array(entry, page);
717 	} while (!status && !entry->eof);
718 
719 	switch (status) {
720 	case -EBADCOOKIE:
721 		if (entry->eof) {
722 			nfs_readdir_page_set_eof(page);
723 			status = 0;
724 		}
725 		break;
726 	case -ENOSPC:
727 	case -EAGAIN:
728 		status = 0;
729 		break;
730 	}
731 
732 	if (page != fillme)
733 		nfs_readdir_page_unlock_and_put(page);
734 
735 	put_page(scratch);
736 	return status;
737 }
738 
739 static void nfs_readdir_free_pages(struct page **pages, size_t npages)
740 {
741 	while (npages--)
742 		put_page(pages[npages]);
743 	kfree(pages);
744 }
745 
746 /*
747  * nfs_readdir_alloc_pages() will allocate pages that must be freed with a call
748  * to nfs_readdir_free_pages()
749  */
750 static struct page **nfs_readdir_alloc_pages(size_t npages)
751 {
752 	struct page **pages;
753 	size_t i;
754 
755 	pages = kmalloc_array(npages, sizeof(*pages), GFP_KERNEL);
756 	if (!pages)
757 		return NULL;
758 	for (i = 0; i < npages; i++) {
759 		struct page *page = alloc_page(GFP_KERNEL);
760 		if (page == NULL)
761 			goto out_freepages;
762 		pages[i] = page;
763 	}
764 	return pages;
765 
766 out_freepages:
767 	nfs_readdir_free_pages(pages, i);
768 	return NULL;
769 }
770 
771 static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
772 				    struct page *page, __be32 *verf_arg,
773 				    __be32 *verf_res)
774 {
775 	struct page **pages;
776 	struct nfs_entry *entry;
777 	size_t array_size;
778 	struct inode *inode = file_inode(desc->file);
779 	size_t dtsize = NFS_SERVER(inode)->dtsize;
780 	int status = -ENOMEM;
781 
782 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
783 	if (!entry)
784 		return -ENOMEM;
785 	entry->cookie = nfs_readdir_page_last_cookie(page);
786 	entry->fh = nfs_alloc_fhandle();
787 	entry->fattr = nfs_alloc_fattr();
788 	entry->server = NFS_SERVER(inode);
789 	if (entry->fh == NULL || entry->fattr == NULL)
790 		goto out;
791 
792 	entry->label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT);
793 	if (IS_ERR(entry->label)) {
794 		status = PTR_ERR(entry->label);
795 		goto out;
796 	}
797 
798 	array_size = (dtsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
799 	pages = nfs_readdir_alloc_pages(array_size);
800 	if (!pages)
801 		goto out_release_label;
802 
803 	do {
804 		unsigned int pglen;
805 		status = nfs_readdir_xdr_filler(desc, verf_arg, entry->cookie,
806 						pages, dtsize,
807 						verf_res);
808 		if (status < 0)
809 			break;
810 
811 		pglen = status;
812 		if (pglen == 0) {
813 			nfs_readdir_page_set_eof(page);
814 			break;
815 		}
816 
817 		status = nfs_readdir_page_filler(desc, entry, pages, page, pglen);
818 	} while (!status && nfs_readdir_page_needs_filling(page));
819 
820 	nfs_readdir_free_pages(pages, array_size);
821 out_release_label:
822 	nfs4_label_free(entry->label);
823 out:
824 	nfs_free_fattr(entry->fattr);
825 	nfs_free_fhandle(entry->fh);
826 	kfree(entry);
827 	return status;
828 }
829 
830 static void nfs_readdir_page_put(struct nfs_readdir_descriptor *desc)
831 {
832 	put_page(desc->page);
833 	desc->page = NULL;
834 }
835 
836 static void
837 nfs_readdir_page_unlock_and_put_cached(struct nfs_readdir_descriptor *desc)
838 {
839 	unlock_page(desc->page);
840 	nfs_readdir_page_put(desc);
841 }
842 
843 static struct page *
844 nfs_readdir_page_get_cached(struct nfs_readdir_descriptor *desc)
845 {
846 	return nfs_readdir_page_get_locked(desc->file->f_mapping,
847 					   desc->page_index,
848 					   desc->last_cookie);
849 }
850 
851 /*
852  * Returns 0 if desc->dir_cookie was found on page desc->page_index
853  * and locks the page to prevent removal from the page cache.
854  */
855 static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
856 {
857 	struct inode *inode = file_inode(desc->file);
858 	struct nfs_inode *nfsi = NFS_I(inode);
859 	__be32 verf[NFS_DIR_VERIFIER_SIZE];
860 	int res;
861 
862 	desc->page = nfs_readdir_page_get_cached(desc);
863 	if (!desc->page)
864 		return -ENOMEM;
865 	if (nfs_readdir_page_needs_filling(desc->page)) {
866 		res = nfs_readdir_xdr_to_array(desc, desc->page,
867 					       nfsi->cookieverf, verf);
868 		if (res < 0) {
869 			nfs_readdir_page_unlock_and_put_cached(desc);
870 			if (res == -EBADCOOKIE || res == -ENOTSYNC) {
871 				invalidate_inode_pages2(desc->file->f_mapping);
872 				desc->page_index = 0;
873 				return -EAGAIN;
874 			}
875 			return res;
876 		}
877 		memcpy(nfsi->cookieverf, verf, sizeof(nfsi->cookieverf));
878 	}
879 	res = nfs_readdir_search_array(desc);
880 	if (res == 0) {
881 		nfsi->page_index = desc->page_index;
882 		return 0;
883 	}
884 	nfs_readdir_page_unlock_and_put_cached(desc);
885 	return res;
886 }
887 
888 /* Search for desc->dir_cookie from the beginning of the page cache */
889 static int readdir_search_pagecache(struct nfs_readdir_descriptor *desc)
890 {
891 	int res;
892 
893 	do {
894 		if (desc->page_index == 0) {
895 			desc->current_index = 0;
896 			desc->prev_index = 0;
897 			desc->last_cookie = 0;
898 		}
899 		res = find_and_lock_cache_page(desc);
900 	} while (res == -EAGAIN);
901 	return res;
902 }
903 
904 /*
905  * Once we've found the start of the dirent within a page: fill 'er up...
906  */
907 static void nfs_do_filldir(struct nfs_readdir_descriptor *desc)
908 {
909 	struct file	*file = desc->file;
910 	struct nfs_inode *nfsi = NFS_I(file_inode(file));
911 	struct nfs_cache_array *array;
912 	unsigned int i = 0;
913 
914 	array = kmap(desc->page);
915 	for (i = desc->cache_entry_index; i < array->size; i++) {
916 		struct nfs_cache_array_entry *ent;
917 
918 		ent = &array->array[i];
919 		if (!dir_emit(desc->ctx, ent->name, ent->name_len,
920 		    nfs_compat_user_ino64(ent->ino), ent->d_type)) {
921 			desc->eof = true;
922 			break;
923 		}
924 		memcpy(desc->verf, nfsi->cookieverf, sizeof(desc->verf));
925 		if (i < (array->size-1))
926 			desc->dir_cookie = array->array[i+1].cookie;
927 		else
928 			desc->dir_cookie = array->last_cookie;
929 		if (nfs_readdir_use_cookie(file))
930 			desc->ctx->pos = desc->dir_cookie;
931 		else
932 			desc->ctx->pos++;
933 		if (desc->duped != 0)
934 			desc->duped = 1;
935 	}
936 	if (array->page_is_eof)
937 		desc->eof = true;
938 
939 	kunmap(desc->page);
940 	dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %llu\n",
941 			(unsigned long long)desc->dir_cookie);
942 }
943 
944 /*
945  * If we cannot find a cookie in our cache, we suspect that this is
946  * because it points to a deleted file, so we ask the server to return
947  * whatever it thinks is the next entry. We then feed this to filldir.
948  * If all goes well, we should then be able to find our way round the
949  * cache on the next call to readdir_search_pagecache();
950  *
951  * NOTE: we cannot add the anonymous page to the pagecache because
952  *	 the data it contains might not be page aligned. Besides,
953  *	 we should already have a complete representation of the
954  *	 directory in the page cache by the time we get here.
955  */
956 static int uncached_readdir(struct nfs_readdir_descriptor *desc)
957 {
958 	struct page	*page = NULL;
959 	__be32		verf[NFS_DIR_VERIFIER_SIZE];
960 	int		status;
961 
962 	dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
963 			(unsigned long long)desc->dir_cookie);
964 
965 	page = alloc_page(GFP_HIGHUSER);
966 	if (!page) {
967 		status = -ENOMEM;
968 		goto out;
969 	}
970 
971 	desc->page_index = 0;
972 	desc->last_cookie = desc->dir_cookie;
973 	desc->page = page;
974 	desc->duped = 0;
975 
976 	nfs_readdir_page_init_array(page, desc->dir_cookie);
977 	status = nfs_readdir_xdr_to_array(desc, page, desc->verf, verf);
978 	if (status < 0)
979 		goto out_release;
980 
981 	nfs_do_filldir(desc);
982 
983  out_release:
984 	nfs_readdir_clear_array(desc->page);
985 	nfs_readdir_page_put(desc);
986  out:
987 	dfprintk(DIRCACHE, "NFS: %s: returns %d\n",
988 			__func__, status);
989 	return status;
990 }
991 
992 /* The file offset position represents the dirent entry number.  A
993    last cookie cache takes care of the common case of reading the
994    whole directory.
995  */
996 static int nfs_readdir(struct file *file, struct dir_context *ctx)
997 {
998 	struct dentry	*dentry = file_dentry(file);
999 	struct inode	*inode = d_inode(dentry);
1000 	struct nfs_open_dir_context *dir_ctx = file->private_data;
1001 	struct nfs_readdir_descriptor *desc;
1002 	int res;
1003 
1004 	dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n",
1005 			file, (long long)ctx->pos);
1006 	nfs_inc_stats(inode, NFSIOS_VFSGETDENTS);
1007 
1008 	/*
1009 	 * ctx->pos points to the dirent entry number.
1010 	 * *desc->dir_cookie has the cookie for the next entry. We have
1011 	 * to either find the entry with the appropriate number or
1012 	 * revalidate the cookie.
1013 	 */
1014 	if (ctx->pos == 0 || nfs_attribute_cache_expired(inode)) {
1015 		res = nfs_revalidate_mapping(inode, file->f_mapping);
1016 		if (res < 0)
1017 			goto out;
1018 	}
1019 
1020 	res = -ENOMEM;
1021 	desc = kzalloc(sizeof(*desc), GFP_KERNEL);
1022 	if (!desc)
1023 		goto out;
1024 	desc->file = file;
1025 	desc->ctx = ctx;
1026 	desc->plus = nfs_use_readdirplus(inode, ctx);
1027 
1028 	spin_lock(&file->f_lock);
1029 	desc->dir_cookie = dir_ctx->dir_cookie;
1030 	desc->dup_cookie = dir_ctx->dup_cookie;
1031 	desc->duped = dir_ctx->duped;
1032 	desc->attr_gencount = dir_ctx->attr_gencount;
1033 	memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf));
1034 	spin_unlock(&file->f_lock);
1035 
1036 	do {
1037 		res = readdir_search_pagecache(desc);
1038 
1039 		if (res == -EBADCOOKIE) {
1040 			res = 0;
1041 			/* This means either end of directory */
1042 			if (desc->dir_cookie && !desc->eof) {
1043 				/* Or that the server has 'lost' a cookie */
1044 				res = uncached_readdir(desc);
1045 				if (res == 0)
1046 					continue;
1047 				if (res == -EBADCOOKIE || res == -ENOTSYNC)
1048 					res = 0;
1049 			}
1050 			break;
1051 		}
1052 		if (res == -ETOOSMALL && desc->plus) {
1053 			clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
1054 			nfs_zap_caches(inode);
1055 			desc->page_index = 0;
1056 			desc->plus = false;
1057 			desc->eof = false;
1058 			continue;
1059 		}
1060 		if (res < 0)
1061 			break;
1062 
1063 		nfs_do_filldir(desc);
1064 		nfs_readdir_page_unlock_and_put_cached(desc);
1065 	} while (!desc->eof);
1066 
1067 	spin_lock(&file->f_lock);
1068 	dir_ctx->dir_cookie = desc->dir_cookie;
1069 	dir_ctx->dup_cookie = desc->dup_cookie;
1070 	dir_ctx->duped = desc->duped;
1071 	dir_ctx->attr_gencount = desc->attr_gencount;
1072 	memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf));
1073 	spin_unlock(&file->f_lock);
1074 
1075 	kfree(desc);
1076 
1077 out:
1078 	dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res);
1079 	return res;
1080 }
1081 
1082 static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
1083 {
1084 	struct nfs_open_dir_context *dir_ctx = filp->private_data;
1085 
1086 	dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n",
1087 			filp, offset, whence);
1088 
1089 	switch (whence) {
1090 	default:
1091 		return -EINVAL;
1092 	case SEEK_SET:
1093 		if (offset < 0)
1094 			return -EINVAL;
1095 		spin_lock(&filp->f_lock);
1096 		break;
1097 	case SEEK_CUR:
1098 		if (offset == 0)
1099 			return filp->f_pos;
1100 		spin_lock(&filp->f_lock);
1101 		offset += filp->f_pos;
1102 		if (offset < 0) {
1103 			spin_unlock(&filp->f_lock);
1104 			return -EINVAL;
1105 		}
1106 	}
1107 	if (offset != filp->f_pos) {
1108 		filp->f_pos = offset;
1109 		if (nfs_readdir_use_cookie(filp))
1110 			dir_ctx->dir_cookie = offset;
1111 		else
1112 			dir_ctx->dir_cookie = 0;
1113 		if (offset == 0)
1114 			memset(dir_ctx->verf, 0, sizeof(dir_ctx->verf));
1115 		dir_ctx->duped = 0;
1116 	}
1117 	spin_unlock(&filp->f_lock);
1118 	return offset;
1119 }
1120 
1121 /*
1122  * All directory operations under NFS are synchronous, so fsync()
1123  * is a dummy operation.
1124  */
1125 static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end,
1126 			 int datasync)
1127 {
1128 	dfprintk(FILE, "NFS: fsync dir(%pD2) datasync %d\n", filp, datasync);
1129 
1130 	nfs_inc_stats(file_inode(filp), NFSIOS_VFSFSYNC);
1131 	return 0;
1132 }
1133 
1134 /**
1135  * nfs_force_lookup_revalidate - Mark the directory as having changed
1136  * @dir: pointer to directory inode
1137  *
1138  * This forces the revalidation code in nfs_lookup_revalidate() to do a
1139  * full lookup on all child dentries of 'dir' whenever a change occurs
1140  * on the server that might have invalidated our dcache.
1141  *
1142  * Note that we reserve bit '0' as a tag to let us know when a dentry
1143  * was revalidated while holding a delegation on its inode.
1144  *
1145  * The caller should be holding dir->i_lock
1146  */
1147 void nfs_force_lookup_revalidate(struct inode *dir)
1148 {
1149 	NFS_I(dir)->cache_change_attribute += 2;
1150 }
1151 EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate);
1152 
1153 /**
1154  * nfs_verify_change_attribute - Detects NFS remote directory changes
1155  * @dir: pointer to parent directory inode
1156  * @verf: previously saved change attribute
1157  *
1158  * Return "false" if the verifiers doesn't match the change attribute.
1159  * This would usually indicate that the directory contents have changed on
1160  * the server, and that any dentries need revalidating.
1161  */
1162 static bool nfs_verify_change_attribute(struct inode *dir, unsigned long verf)
1163 {
1164 	return (verf & ~1UL) == nfs_save_change_attribute(dir);
1165 }
1166 
1167 static void nfs_set_verifier_delegated(unsigned long *verf)
1168 {
1169 	*verf |= 1UL;
1170 }
1171 
1172 #if IS_ENABLED(CONFIG_NFS_V4)
1173 static void nfs_unset_verifier_delegated(unsigned long *verf)
1174 {
1175 	*verf &= ~1UL;
1176 }
1177 #endif /* IS_ENABLED(CONFIG_NFS_V4) */
1178 
1179 static bool nfs_test_verifier_delegated(unsigned long verf)
1180 {
1181 	return verf & 1;
1182 }
1183 
1184 static bool nfs_verifier_is_delegated(struct dentry *dentry)
1185 {
1186 	return nfs_test_verifier_delegated(dentry->d_time);
1187 }
1188 
1189 static void nfs_set_verifier_locked(struct dentry *dentry, unsigned long verf)
1190 {
1191 	struct inode *inode = d_inode(dentry);
1192 
1193 	if (!nfs_verifier_is_delegated(dentry) &&
1194 	    !nfs_verify_change_attribute(d_inode(dentry->d_parent), verf))
1195 		goto out;
1196 	if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
1197 		nfs_set_verifier_delegated(&verf);
1198 out:
1199 	dentry->d_time = verf;
1200 }
1201 
1202 /**
1203  * nfs_set_verifier - save a parent directory verifier in the dentry
1204  * @dentry: pointer to dentry
1205  * @verf: verifier to save
1206  *
1207  * Saves the parent directory verifier in @dentry. If the inode has
1208  * a delegation, we also tag the dentry as having been revalidated
1209  * while holding a delegation so that we know we don't have to
1210  * look it up again after a directory change.
1211  */
1212 void nfs_set_verifier(struct dentry *dentry, unsigned long verf)
1213 {
1214 
1215 	spin_lock(&dentry->d_lock);
1216 	nfs_set_verifier_locked(dentry, verf);
1217 	spin_unlock(&dentry->d_lock);
1218 }
1219 EXPORT_SYMBOL_GPL(nfs_set_verifier);
1220 
1221 #if IS_ENABLED(CONFIG_NFS_V4)
1222 /**
1223  * nfs_clear_verifier_delegated - clear the dir verifier delegation tag
1224  * @inode: pointer to inode
1225  *
1226  * Iterates through the dentries in the inode alias list and clears
1227  * the tag used to indicate that the dentry has been revalidated
1228  * while holding a delegation.
1229  * This function is intended for use when the delegation is being
1230  * returned or revoked.
1231  */
1232 void nfs_clear_verifier_delegated(struct inode *inode)
1233 {
1234 	struct dentry *alias;
1235 
1236 	if (!inode)
1237 		return;
1238 	spin_lock(&inode->i_lock);
1239 	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
1240 		spin_lock(&alias->d_lock);
1241 		nfs_unset_verifier_delegated(&alias->d_time);
1242 		spin_unlock(&alias->d_lock);
1243 	}
1244 	spin_unlock(&inode->i_lock);
1245 }
1246 EXPORT_SYMBOL_GPL(nfs_clear_verifier_delegated);
1247 #endif /* IS_ENABLED(CONFIG_NFS_V4) */
1248 
1249 /*
1250  * A check for whether or not the parent directory has changed.
1251  * In the case it has, we assume that the dentries are untrustworthy
1252  * and may need to be looked up again.
1253  * If rcu_walk prevents us from performing a full check, return 0.
1254  */
1255 static int nfs_check_verifier(struct inode *dir, struct dentry *dentry,
1256 			      int rcu_walk)
1257 {
1258 	if (IS_ROOT(dentry))
1259 		return 1;
1260 	if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE)
1261 		return 0;
1262 	if (!nfs_verify_change_attribute(dir, dentry->d_time))
1263 		return 0;
1264 	/* Revalidate nfsi->cache_change_attribute before we declare a match */
1265 	if (nfs_mapping_need_revalidate_inode(dir)) {
1266 		if (rcu_walk)
1267 			return 0;
1268 		if (__nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0)
1269 			return 0;
1270 	}
1271 	if (!nfs_verify_change_attribute(dir, dentry->d_time))
1272 		return 0;
1273 	return 1;
1274 }
1275 
1276 /*
1277  * Use intent information to check whether or not we're going to do
1278  * an O_EXCL create using this path component.
1279  */
1280 static int nfs_is_exclusive_create(struct inode *dir, unsigned int flags)
1281 {
1282 	if (NFS_PROTO(dir)->version == 2)
1283 		return 0;
1284 	return flags & LOOKUP_EXCL;
1285 }
1286 
1287 /*
1288  * Inode and filehandle revalidation for lookups.
1289  *
1290  * We force revalidation in the cases where the VFS sets LOOKUP_REVAL,
1291  * or if the intent information indicates that we're about to open this
1292  * particular file and the "nocto" mount flag is not set.
1293  *
1294  */
1295 static
1296 int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags)
1297 {
1298 	struct nfs_server *server = NFS_SERVER(inode);
1299 	int ret;
1300 
1301 	if (IS_AUTOMOUNT(inode))
1302 		return 0;
1303 
1304 	if (flags & LOOKUP_OPEN) {
1305 		switch (inode->i_mode & S_IFMT) {
1306 		case S_IFREG:
1307 			/* A NFSv4 OPEN will revalidate later */
1308 			if (server->caps & NFS_CAP_ATOMIC_OPEN)
1309 				goto out;
1310 			fallthrough;
1311 		case S_IFDIR:
1312 			if (server->flags & NFS_MOUNT_NOCTO)
1313 				break;
1314 			/* NFS close-to-open cache consistency validation */
1315 			goto out_force;
1316 		}
1317 	}
1318 
1319 	/* VFS wants an on-the-wire revalidation */
1320 	if (flags & LOOKUP_REVAL)
1321 		goto out_force;
1322 out:
1323 	return (inode->i_nlink == 0) ? -ESTALE : 0;
1324 out_force:
1325 	if (flags & LOOKUP_RCU)
1326 		return -ECHILD;
1327 	ret = __nfs_revalidate_inode(server, inode);
1328 	if (ret != 0)
1329 		return ret;
1330 	goto out;
1331 }
1332 
1333 /*
1334  * We judge how long we want to trust negative
1335  * dentries by looking at the parent inode mtime.
1336  *
1337  * If parent mtime has changed, we revalidate, else we wait for a
1338  * period corresponding to the parent's attribute cache timeout value.
1339  *
1340  * If LOOKUP_RCU prevents us from performing a full check, return 1
1341  * suggesting a reval is needed.
1342  *
1343  * Note that when creating a new file, or looking up a rename target,
1344  * then it shouldn't be necessary to revalidate a negative dentry.
1345  */
1346 static inline
1347 int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
1348 		       unsigned int flags)
1349 {
1350 	if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
1351 		return 0;
1352 	if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG)
1353 		return 1;
1354 	return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU);
1355 }
1356 
1357 static int
1358 nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry,
1359 			   struct inode *inode, int error)
1360 {
1361 	switch (error) {
1362 	case 1:
1363 		dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
1364 			__func__, dentry);
1365 		return 1;
1366 	case 0:
1367 		nfs_mark_for_revalidate(dir);
1368 		if (inode && S_ISDIR(inode->i_mode)) {
1369 			/* Purge readdir caches. */
1370 			nfs_zap_caches(inode);
1371 			/*
1372 			 * We can't d_drop the root of a disconnected tree:
1373 			 * its d_hash is on the s_anon list and d_drop() would hide
1374 			 * it from shrink_dcache_for_unmount(), leading to busy
1375 			 * inodes on unmount and further oopses.
1376 			 */
1377 			if (IS_ROOT(dentry))
1378 				return 1;
1379 		}
1380 		dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n",
1381 				__func__, dentry);
1382 		return 0;
1383 	}
1384 	dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n",
1385 				__func__, dentry, error);
1386 	return error;
1387 }
1388 
1389 static int
1390 nfs_lookup_revalidate_negative(struct inode *dir, struct dentry *dentry,
1391 			       unsigned int flags)
1392 {
1393 	int ret = 1;
1394 	if (nfs_neg_need_reval(dir, dentry, flags)) {
1395 		if (flags & LOOKUP_RCU)
1396 			return -ECHILD;
1397 		ret = 0;
1398 	}
1399 	return nfs_lookup_revalidate_done(dir, dentry, NULL, ret);
1400 }
1401 
1402 static int
1403 nfs_lookup_revalidate_delegated(struct inode *dir, struct dentry *dentry,
1404 				struct inode *inode)
1405 {
1406 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1407 	return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
1408 }
1409 
1410 static int
1411 nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry,
1412 			     struct inode *inode)
1413 {
1414 	struct nfs_fh *fhandle;
1415 	struct nfs_fattr *fattr;
1416 	struct nfs4_label *label;
1417 	unsigned long dir_verifier;
1418 	int ret;
1419 
1420 	ret = -ENOMEM;
1421 	fhandle = nfs_alloc_fhandle();
1422 	fattr = nfs_alloc_fattr();
1423 	label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL);
1424 	if (fhandle == NULL || fattr == NULL || IS_ERR(label))
1425 		goto out;
1426 
1427 	dir_verifier = nfs_save_change_attribute(dir);
1428 	ret = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, label);
1429 	if (ret < 0) {
1430 		switch (ret) {
1431 		case -ESTALE:
1432 		case -ENOENT:
1433 			ret = 0;
1434 			break;
1435 		case -ETIMEDOUT:
1436 			if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL)
1437 				ret = 1;
1438 		}
1439 		goto out;
1440 	}
1441 	ret = 0;
1442 	if (nfs_compare_fh(NFS_FH(inode), fhandle))
1443 		goto out;
1444 	if (nfs_refresh_inode(inode, fattr) < 0)
1445 		goto out;
1446 
1447 	nfs_setsecurity(inode, fattr, label);
1448 	nfs_set_verifier(dentry, dir_verifier);
1449 
1450 	/* set a readdirplus hint that we had a cache miss */
1451 	nfs_force_use_readdirplus(dir);
1452 	ret = 1;
1453 out:
1454 	nfs_free_fattr(fattr);
1455 	nfs_free_fhandle(fhandle);
1456 	nfs4_label_free(label);
1457 	return nfs_lookup_revalidate_done(dir, dentry, inode, ret);
1458 }
1459 
1460 /*
1461  * This is called every time the dcache has a lookup hit,
1462  * and we should check whether we can really trust that
1463  * lookup.
1464  *
1465  * NOTE! The hit can be a negative hit too, don't assume
1466  * we have an inode!
1467  *
1468  * If the parent directory is seen to have changed, we throw out the
1469  * cached dentry and do a new lookup.
1470  */
1471 static int
1472 nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
1473 			 unsigned int flags)
1474 {
1475 	struct inode *inode;
1476 	int error;
1477 
1478 	nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
1479 	inode = d_inode(dentry);
1480 
1481 	if (!inode)
1482 		return nfs_lookup_revalidate_negative(dir, dentry, flags);
1483 
1484 	if (is_bad_inode(inode)) {
1485 		dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
1486 				__func__, dentry);
1487 		goto out_bad;
1488 	}
1489 
1490 	if (nfs_verifier_is_delegated(dentry))
1491 		return nfs_lookup_revalidate_delegated(dir, dentry, inode);
1492 
1493 	/* Force a full look up iff the parent directory has changed */
1494 	if (!(flags & (LOOKUP_EXCL | LOOKUP_REVAL)) &&
1495 	    nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
1496 		error = nfs_lookup_verify_inode(inode, flags);
1497 		if (error) {
1498 			if (error == -ESTALE)
1499 				nfs_zap_caches(dir);
1500 			goto out_bad;
1501 		}
1502 		nfs_advise_use_readdirplus(dir);
1503 		goto out_valid;
1504 	}
1505 
1506 	if (flags & LOOKUP_RCU)
1507 		return -ECHILD;
1508 
1509 	if (NFS_STALE(inode))
1510 		goto out_bad;
1511 
1512 	trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
1513 	error = nfs_lookup_revalidate_dentry(dir, dentry, inode);
1514 	trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error);
1515 	return error;
1516 out_valid:
1517 	return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
1518 out_bad:
1519 	if (flags & LOOKUP_RCU)
1520 		return -ECHILD;
1521 	return nfs_lookup_revalidate_done(dir, dentry, inode, 0);
1522 }
1523 
1524 static int
1525 __nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
1526 			int (*reval)(struct inode *, struct dentry *, unsigned int))
1527 {
1528 	struct dentry *parent;
1529 	struct inode *dir;
1530 	int ret;
1531 
1532 	if (flags & LOOKUP_RCU) {
1533 		parent = READ_ONCE(dentry->d_parent);
1534 		dir = d_inode_rcu(parent);
1535 		if (!dir)
1536 			return -ECHILD;
1537 		ret = reval(dir, dentry, flags);
1538 		if (parent != READ_ONCE(dentry->d_parent))
1539 			return -ECHILD;
1540 	} else {
1541 		parent = dget_parent(dentry);
1542 		ret = reval(d_inode(parent), dentry, flags);
1543 		dput(parent);
1544 	}
1545 	return ret;
1546 }
1547 
1548 static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1549 {
1550 	return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate);
1551 }
1552 
1553 /*
1554  * A weaker form of d_revalidate for revalidating just the d_inode(dentry)
1555  * when we don't really care about the dentry name. This is called when a
1556  * pathwalk ends on a dentry that was not found via a normal lookup in the
1557  * parent dir (e.g.: ".", "..", procfs symlinks or mountpoint traversals).
1558  *
1559  * In this situation, we just want to verify that the inode itself is OK
1560  * since the dentry might have changed on the server.
1561  */
1562 static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags)
1563 {
1564 	struct inode *inode = d_inode(dentry);
1565 	int error = 0;
1566 
1567 	/*
1568 	 * I believe we can only get a negative dentry here in the case of a
1569 	 * procfs-style symlink. Just assume it's correct for now, but we may
1570 	 * eventually need to do something more here.
1571 	 */
1572 	if (!inode) {
1573 		dfprintk(LOOKUPCACHE, "%s: %pd2 has negative inode\n",
1574 				__func__, dentry);
1575 		return 1;
1576 	}
1577 
1578 	if (is_bad_inode(inode)) {
1579 		dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
1580 				__func__, dentry);
1581 		return 0;
1582 	}
1583 
1584 	error = nfs_lookup_verify_inode(inode, flags);
1585 	dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n",
1586 			__func__, inode->i_ino, error ? "invalid" : "valid");
1587 	return !error;
1588 }
1589 
1590 /*
1591  * This is called from dput() when d_count is going to 0.
1592  */
1593 static int nfs_dentry_delete(const struct dentry *dentry)
1594 {
1595 	dfprintk(VFS, "NFS: dentry_delete(%pd2, %x)\n",
1596 		dentry, dentry->d_flags);
1597 
1598 	/* Unhash any dentry with a stale inode */
1599 	if (d_really_is_positive(dentry) && NFS_STALE(d_inode(dentry)))
1600 		return 1;
1601 
1602 	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
1603 		/* Unhash it, so that ->d_iput() would be called */
1604 		return 1;
1605 	}
1606 	if (!(dentry->d_sb->s_flags & SB_ACTIVE)) {
1607 		/* Unhash it, so that ancestors of killed async unlink
1608 		 * files will be cleaned up during umount */
1609 		return 1;
1610 	}
1611 	return 0;
1612 
1613 }
1614 
1615 /* Ensure that we revalidate inode->i_nlink */
1616 static void nfs_drop_nlink(struct inode *inode)
1617 {
1618 	spin_lock(&inode->i_lock);
1619 	/* drop the inode if we're reasonably sure this is the last link */
1620 	if (inode->i_nlink > 0)
1621 		drop_nlink(inode);
1622 	NFS_I(inode)->attr_gencount = nfs_inc_attr_generation_counter();
1623 	NFS_I(inode)->cache_validity |= NFS_INO_INVALID_CHANGE
1624 		| NFS_INO_INVALID_CTIME
1625 		| NFS_INO_INVALID_OTHER
1626 		| NFS_INO_REVAL_FORCED;
1627 	spin_unlock(&inode->i_lock);
1628 }
1629 
1630 /*
1631  * Called when the dentry loses inode.
1632  * We use it to clean up silly-renamed files.
1633  */
1634 static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode)
1635 {
1636 	if (S_ISDIR(inode->i_mode))
1637 		/* drop any readdir cache as it could easily be old */
1638 		NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA;
1639 
1640 	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
1641 		nfs_complete_unlink(dentry, inode);
1642 		nfs_drop_nlink(inode);
1643 	}
1644 	iput(inode);
1645 }
1646 
1647 static void nfs_d_release(struct dentry *dentry)
1648 {
1649 	/* free cached devname value, if it survived that far */
1650 	if (unlikely(dentry->d_fsdata)) {
1651 		if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
1652 			WARN_ON(1);
1653 		else
1654 			kfree(dentry->d_fsdata);
1655 	}
1656 }
1657 
1658 const struct dentry_operations nfs_dentry_operations = {
1659 	.d_revalidate	= nfs_lookup_revalidate,
1660 	.d_weak_revalidate	= nfs_weak_revalidate,
1661 	.d_delete	= nfs_dentry_delete,
1662 	.d_iput		= nfs_dentry_iput,
1663 	.d_automount	= nfs_d_automount,
1664 	.d_release	= nfs_d_release,
1665 };
1666 EXPORT_SYMBOL_GPL(nfs_dentry_operations);
1667 
1668 struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
1669 {
1670 	struct dentry *res;
1671 	struct inode *inode = NULL;
1672 	struct nfs_fh *fhandle = NULL;
1673 	struct nfs_fattr *fattr = NULL;
1674 	struct nfs4_label *label = NULL;
1675 	unsigned long dir_verifier;
1676 	int error;
1677 
1678 	dfprintk(VFS, "NFS: lookup(%pd2)\n", dentry);
1679 	nfs_inc_stats(dir, NFSIOS_VFSLOOKUP);
1680 
1681 	if (unlikely(dentry->d_name.len > NFS_SERVER(dir)->namelen))
1682 		return ERR_PTR(-ENAMETOOLONG);
1683 
1684 	/*
1685 	 * If we're doing an exclusive create, optimize away the lookup
1686 	 * but don't hash the dentry.
1687 	 */
1688 	if (nfs_is_exclusive_create(dir, flags) || flags & LOOKUP_RENAME_TARGET)
1689 		return NULL;
1690 
1691 	res = ERR_PTR(-ENOMEM);
1692 	fhandle = nfs_alloc_fhandle();
1693 	fattr = nfs_alloc_fattr();
1694 	if (fhandle == NULL || fattr == NULL)
1695 		goto out;
1696 
1697 	label = nfs4_label_alloc(NFS_SERVER(dir), GFP_NOWAIT);
1698 	if (IS_ERR(label))
1699 		goto out;
1700 
1701 	dir_verifier = nfs_save_change_attribute(dir);
1702 	trace_nfs_lookup_enter(dir, dentry, flags);
1703 	error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, label);
1704 	if (error == -ENOENT)
1705 		goto no_entry;
1706 	if (error < 0) {
1707 		res = ERR_PTR(error);
1708 		goto out_label;
1709 	}
1710 	inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label);
1711 	res = ERR_CAST(inode);
1712 	if (IS_ERR(res))
1713 		goto out_label;
1714 
1715 	/* Notify readdir to use READDIRPLUS */
1716 	nfs_force_use_readdirplus(dir);
1717 
1718 no_entry:
1719 	res = d_splice_alias(inode, dentry);
1720 	if (res != NULL) {
1721 		if (IS_ERR(res))
1722 			goto out_label;
1723 		dentry = res;
1724 	}
1725 	nfs_set_verifier(dentry, dir_verifier);
1726 out_label:
1727 	trace_nfs_lookup_exit(dir, dentry, flags, error);
1728 	nfs4_label_free(label);
1729 out:
1730 	nfs_free_fattr(fattr);
1731 	nfs_free_fhandle(fhandle);
1732 	return res;
1733 }
1734 EXPORT_SYMBOL_GPL(nfs_lookup);
1735 
1736 #if IS_ENABLED(CONFIG_NFS_V4)
1737 static int nfs4_lookup_revalidate(struct dentry *, unsigned int);
1738 
1739 const struct dentry_operations nfs4_dentry_operations = {
1740 	.d_revalidate	= nfs4_lookup_revalidate,
1741 	.d_weak_revalidate	= nfs_weak_revalidate,
1742 	.d_delete	= nfs_dentry_delete,
1743 	.d_iput		= nfs_dentry_iput,
1744 	.d_automount	= nfs_d_automount,
1745 	.d_release	= nfs_d_release,
1746 };
1747 EXPORT_SYMBOL_GPL(nfs4_dentry_operations);
1748 
1749 static fmode_t flags_to_mode(int flags)
1750 {
1751 	fmode_t res = (__force fmode_t)flags & FMODE_EXEC;
1752 	if ((flags & O_ACCMODE) != O_WRONLY)
1753 		res |= FMODE_READ;
1754 	if ((flags & O_ACCMODE) != O_RDONLY)
1755 		res |= FMODE_WRITE;
1756 	return res;
1757 }
1758 
1759 static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags, struct file *filp)
1760 {
1761 	return alloc_nfs_open_context(dentry, flags_to_mode(open_flags), filp);
1762 }
1763 
1764 static int do_open(struct inode *inode, struct file *filp)
1765 {
1766 	nfs_fscache_open_file(inode, filp);
1767 	return 0;
1768 }
1769 
1770 static int nfs_finish_open(struct nfs_open_context *ctx,
1771 			   struct dentry *dentry,
1772 			   struct file *file, unsigned open_flags)
1773 {
1774 	int err;
1775 
1776 	err = finish_open(file, dentry, do_open);
1777 	if (err)
1778 		goto out;
1779 	if (S_ISREG(file->f_path.dentry->d_inode->i_mode))
1780 		nfs_file_set_open_context(file, ctx);
1781 	else
1782 		err = -EOPENSTALE;
1783 out:
1784 	return err;
1785 }
1786 
1787 int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
1788 		    struct file *file, unsigned open_flags,
1789 		    umode_t mode)
1790 {
1791 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
1792 	struct nfs_open_context *ctx;
1793 	struct dentry *res;
1794 	struct iattr attr = { .ia_valid = ATTR_OPEN };
1795 	struct inode *inode;
1796 	unsigned int lookup_flags = 0;
1797 	bool switched = false;
1798 	int created = 0;
1799 	int err;
1800 
1801 	/* Expect a negative dentry */
1802 	BUG_ON(d_inode(dentry));
1803 
1804 	dfprintk(VFS, "NFS: atomic_open(%s/%lu), %pd\n",
1805 			dir->i_sb->s_id, dir->i_ino, dentry);
1806 
1807 	err = nfs_check_flags(open_flags);
1808 	if (err)
1809 		return err;
1810 
1811 	/* NFS only supports OPEN on regular files */
1812 	if ((open_flags & O_DIRECTORY)) {
1813 		if (!d_in_lookup(dentry)) {
1814 			/*
1815 			 * Hashed negative dentry with O_DIRECTORY: dentry was
1816 			 * revalidated and is fine, no need to perform lookup
1817 			 * again
1818 			 */
1819 			return -ENOENT;
1820 		}
1821 		lookup_flags = LOOKUP_OPEN|LOOKUP_DIRECTORY;
1822 		goto no_open;
1823 	}
1824 
1825 	if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
1826 		return -ENAMETOOLONG;
1827 
1828 	if (open_flags & O_CREAT) {
1829 		struct nfs_server *server = NFS_SERVER(dir);
1830 
1831 		if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK))
1832 			mode &= ~current_umask();
1833 
1834 		attr.ia_valid |= ATTR_MODE;
1835 		attr.ia_mode = mode;
1836 	}
1837 	if (open_flags & O_TRUNC) {
1838 		attr.ia_valid |= ATTR_SIZE;
1839 		attr.ia_size = 0;
1840 	}
1841 
1842 	if (!(open_flags & O_CREAT) && !d_in_lookup(dentry)) {
1843 		d_drop(dentry);
1844 		switched = true;
1845 		dentry = d_alloc_parallel(dentry->d_parent,
1846 					  &dentry->d_name, &wq);
1847 		if (IS_ERR(dentry))
1848 			return PTR_ERR(dentry);
1849 		if (unlikely(!d_in_lookup(dentry)))
1850 			return finish_no_open(file, dentry);
1851 	}
1852 
1853 	ctx = create_nfs_open_context(dentry, open_flags, file);
1854 	err = PTR_ERR(ctx);
1855 	if (IS_ERR(ctx))
1856 		goto out;
1857 
1858 	trace_nfs_atomic_open_enter(dir, ctx, open_flags);
1859 	inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, &created);
1860 	if (created)
1861 		file->f_mode |= FMODE_CREATED;
1862 	if (IS_ERR(inode)) {
1863 		err = PTR_ERR(inode);
1864 		trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
1865 		put_nfs_open_context(ctx);
1866 		d_drop(dentry);
1867 		switch (err) {
1868 		case -ENOENT:
1869 			d_splice_alias(NULL, dentry);
1870 			nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1871 			break;
1872 		case -EISDIR:
1873 		case -ENOTDIR:
1874 			goto no_open;
1875 		case -ELOOP:
1876 			if (!(open_flags & O_NOFOLLOW))
1877 				goto no_open;
1878 			break;
1879 			/* case -EINVAL: */
1880 		default:
1881 			break;
1882 		}
1883 		goto out;
1884 	}
1885 
1886 	err = nfs_finish_open(ctx, ctx->dentry, file, open_flags);
1887 	trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
1888 	put_nfs_open_context(ctx);
1889 out:
1890 	if (unlikely(switched)) {
1891 		d_lookup_done(dentry);
1892 		dput(dentry);
1893 	}
1894 	return err;
1895 
1896 no_open:
1897 	res = nfs_lookup(dir, dentry, lookup_flags);
1898 	if (switched) {
1899 		d_lookup_done(dentry);
1900 		if (!res)
1901 			res = dentry;
1902 		else
1903 			dput(dentry);
1904 	}
1905 	if (IS_ERR(res))
1906 		return PTR_ERR(res);
1907 	return finish_no_open(file, res);
1908 }
1909 EXPORT_SYMBOL_GPL(nfs_atomic_open);
1910 
1911 static int
1912 nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
1913 			  unsigned int flags)
1914 {
1915 	struct inode *inode;
1916 
1917 	if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
1918 		goto full_reval;
1919 	if (d_mountpoint(dentry))
1920 		goto full_reval;
1921 
1922 	inode = d_inode(dentry);
1923 
1924 	/* We can't create new files in nfs_open_revalidate(), so we
1925 	 * optimize away revalidation of negative dentries.
1926 	 */
1927 	if (inode == NULL)
1928 		goto full_reval;
1929 
1930 	if (nfs_verifier_is_delegated(dentry))
1931 		return nfs_lookup_revalidate_delegated(dir, dentry, inode);
1932 
1933 	/* NFS only supports OPEN on regular files */
1934 	if (!S_ISREG(inode->i_mode))
1935 		goto full_reval;
1936 
1937 	/* We cannot do exclusive creation on a positive dentry */
1938 	if (flags & (LOOKUP_EXCL | LOOKUP_REVAL))
1939 		goto reval_dentry;
1940 
1941 	/* Check if the directory changed */
1942 	if (!nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU))
1943 		goto reval_dentry;
1944 
1945 	/* Let f_op->open() actually open (and revalidate) the file */
1946 	return 1;
1947 reval_dentry:
1948 	if (flags & LOOKUP_RCU)
1949 		return -ECHILD;
1950 	return nfs_lookup_revalidate_dentry(dir, dentry, inode);
1951 
1952 full_reval:
1953 	return nfs_do_lookup_revalidate(dir, dentry, flags);
1954 }
1955 
1956 static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1957 {
1958 	return __nfs_lookup_revalidate(dentry, flags,
1959 			nfs4_do_lookup_revalidate);
1960 }
1961 
1962 #endif /* CONFIG_NFSV4 */
1963 
1964 struct dentry *
1965 nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle,
1966 				struct nfs_fattr *fattr,
1967 				struct nfs4_label *label)
1968 {
1969 	struct dentry *parent = dget_parent(dentry);
1970 	struct inode *dir = d_inode(parent);
1971 	struct inode *inode;
1972 	struct dentry *d;
1973 	int error;
1974 
1975 	d_drop(dentry);
1976 
1977 	if (fhandle->size == 0) {
1978 		error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, NULL);
1979 		if (error)
1980 			goto out_error;
1981 	}
1982 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1983 	if (!(fattr->valid & NFS_ATTR_FATTR)) {
1984 		struct nfs_server *server = NFS_SB(dentry->d_sb);
1985 		error = server->nfs_client->rpc_ops->getattr(server, fhandle,
1986 				fattr, NULL, NULL);
1987 		if (error < 0)
1988 			goto out_error;
1989 	}
1990 	inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label);
1991 	d = d_splice_alias(inode, dentry);
1992 out:
1993 	dput(parent);
1994 	return d;
1995 out_error:
1996 	nfs_mark_for_revalidate(dir);
1997 	d = ERR_PTR(error);
1998 	goto out;
1999 }
2000 EXPORT_SYMBOL_GPL(nfs_add_or_obtain);
2001 
2002 /*
2003  * Code common to create, mkdir, and mknod.
2004  */
2005 int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
2006 				struct nfs_fattr *fattr,
2007 				struct nfs4_label *label)
2008 {
2009 	struct dentry *d;
2010 
2011 	d = nfs_add_or_obtain(dentry, fhandle, fattr, label);
2012 	if (IS_ERR(d))
2013 		return PTR_ERR(d);
2014 
2015 	/* Callers don't care */
2016 	dput(d);
2017 	return 0;
2018 }
2019 EXPORT_SYMBOL_GPL(nfs_instantiate);
2020 
2021 /*
2022  * Following a failed create operation, we drop the dentry rather
2023  * than retain a negative dentry. This avoids a problem in the event
2024  * that the operation succeeded on the server, but an error in the
2025  * reply path made it appear to have failed.
2026  */
2027 int nfs_create(struct inode *dir, struct dentry *dentry,
2028 		umode_t mode, bool excl)
2029 {
2030 	struct iattr attr;
2031 	int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT;
2032 	int error;
2033 
2034 	dfprintk(VFS, "NFS: create(%s/%lu), %pd\n",
2035 			dir->i_sb->s_id, dir->i_ino, dentry);
2036 
2037 	attr.ia_mode = mode;
2038 	attr.ia_valid = ATTR_MODE;
2039 
2040 	trace_nfs_create_enter(dir, dentry, open_flags);
2041 	error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
2042 	trace_nfs_create_exit(dir, dentry, open_flags, error);
2043 	if (error != 0)
2044 		goto out_err;
2045 	return 0;
2046 out_err:
2047 	d_drop(dentry);
2048 	return error;
2049 }
2050 EXPORT_SYMBOL_GPL(nfs_create);
2051 
2052 /*
2053  * See comments for nfs_proc_create regarding failed operations.
2054  */
2055 int
2056 nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
2057 {
2058 	struct iattr attr;
2059 	int status;
2060 
2061 	dfprintk(VFS, "NFS: mknod(%s/%lu), %pd\n",
2062 			dir->i_sb->s_id, dir->i_ino, dentry);
2063 
2064 	attr.ia_mode = mode;
2065 	attr.ia_valid = ATTR_MODE;
2066 
2067 	trace_nfs_mknod_enter(dir, dentry);
2068 	status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev);
2069 	trace_nfs_mknod_exit(dir, dentry, status);
2070 	if (status != 0)
2071 		goto out_err;
2072 	return 0;
2073 out_err:
2074 	d_drop(dentry);
2075 	return status;
2076 }
2077 EXPORT_SYMBOL_GPL(nfs_mknod);
2078 
2079 /*
2080  * See comments for nfs_proc_create regarding failed operations.
2081  */
2082 int nfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
2083 {
2084 	struct iattr attr;
2085 	int error;
2086 
2087 	dfprintk(VFS, "NFS: mkdir(%s/%lu), %pd\n",
2088 			dir->i_sb->s_id, dir->i_ino, dentry);
2089 
2090 	attr.ia_valid = ATTR_MODE;
2091 	attr.ia_mode = mode | S_IFDIR;
2092 
2093 	trace_nfs_mkdir_enter(dir, dentry);
2094 	error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr);
2095 	trace_nfs_mkdir_exit(dir, dentry, error);
2096 	if (error != 0)
2097 		goto out_err;
2098 	return 0;
2099 out_err:
2100 	d_drop(dentry);
2101 	return error;
2102 }
2103 EXPORT_SYMBOL_GPL(nfs_mkdir);
2104 
2105 static void nfs_dentry_handle_enoent(struct dentry *dentry)
2106 {
2107 	if (simple_positive(dentry))
2108 		d_delete(dentry);
2109 }
2110 
2111 int nfs_rmdir(struct inode *dir, struct dentry *dentry)
2112 {
2113 	int error;
2114 
2115 	dfprintk(VFS, "NFS: rmdir(%s/%lu), %pd\n",
2116 			dir->i_sb->s_id, dir->i_ino, dentry);
2117 
2118 	trace_nfs_rmdir_enter(dir, dentry);
2119 	if (d_really_is_positive(dentry)) {
2120 		down_write(&NFS_I(d_inode(dentry))->rmdir_sem);
2121 		error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
2122 		/* Ensure the VFS deletes this inode */
2123 		switch (error) {
2124 		case 0:
2125 			clear_nlink(d_inode(dentry));
2126 			break;
2127 		case -ENOENT:
2128 			nfs_dentry_handle_enoent(dentry);
2129 		}
2130 		up_write(&NFS_I(d_inode(dentry))->rmdir_sem);
2131 	} else
2132 		error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
2133 	trace_nfs_rmdir_exit(dir, dentry, error);
2134 
2135 	return error;
2136 }
2137 EXPORT_SYMBOL_GPL(nfs_rmdir);
2138 
2139 /*
2140  * Remove a file after making sure there are no pending writes,
2141  * and after checking that the file has only one user.
2142  *
2143  * We invalidate the attribute cache and free the inode prior to the operation
2144  * to avoid possible races if the server reuses the inode.
2145  */
2146 static int nfs_safe_remove(struct dentry *dentry)
2147 {
2148 	struct inode *dir = d_inode(dentry->d_parent);
2149 	struct inode *inode = d_inode(dentry);
2150 	int error = -EBUSY;
2151 
2152 	dfprintk(VFS, "NFS: safe_remove(%pd2)\n", dentry);
2153 
2154 	/* If the dentry was sillyrenamed, we simply call d_delete() */
2155 	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
2156 		error = 0;
2157 		goto out;
2158 	}
2159 
2160 	trace_nfs_remove_enter(dir, dentry);
2161 	if (inode != NULL) {
2162 		error = NFS_PROTO(dir)->remove(dir, dentry);
2163 		if (error == 0)
2164 			nfs_drop_nlink(inode);
2165 	} else
2166 		error = NFS_PROTO(dir)->remove(dir, dentry);
2167 	if (error == -ENOENT)
2168 		nfs_dentry_handle_enoent(dentry);
2169 	trace_nfs_remove_exit(dir, dentry, error);
2170 out:
2171 	return error;
2172 }
2173 
2174 /*  We do silly rename. In case sillyrename() returns -EBUSY, the inode
2175  *  belongs to an active ".nfs..." file and we return -EBUSY.
2176  *
2177  *  If sillyrename() returns 0, we do nothing, otherwise we unlink.
2178  */
2179 int nfs_unlink(struct inode *dir, struct dentry *dentry)
2180 {
2181 	int error;
2182 	int need_rehash = 0;
2183 
2184 	dfprintk(VFS, "NFS: unlink(%s/%lu, %pd)\n", dir->i_sb->s_id,
2185 		dir->i_ino, dentry);
2186 
2187 	trace_nfs_unlink_enter(dir, dentry);
2188 	spin_lock(&dentry->d_lock);
2189 	if (d_count(dentry) > 1) {
2190 		spin_unlock(&dentry->d_lock);
2191 		/* Start asynchronous writeout of the inode */
2192 		write_inode_now(d_inode(dentry), 0);
2193 		error = nfs_sillyrename(dir, dentry);
2194 		goto out;
2195 	}
2196 	if (!d_unhashed(dentry)) {
2197 		__d_drop(dentry);
2198 		need_rehash = 1;
2199 	}
2200 	spin_unlock(&dentry->d_lock);
2201 	error = nfs_safe_remove(dentry);
2202 	if (!error || error == -ENOENT) {
2203 		nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2204 	} else if (need_rehash)
2205 		d_rehash(dentry);
2206 out:
2207 	trace_nfs_unlink_exit(dir, dentry, error);
2208 	return error;
2209 }
2210 EXPORT_SYMBOL_GPL(nfs_unlink);
2211 
2212 /*
2213  * To create a symbolic link, most file systems instantiate a new inode,
2214  * add a page to it containing the path, then write it out to the disk
2215  * using prepare_write/commit_write.
2216  *
2217  * Unfortunately the NFS client can't create the in-core inode first
2218  * because it needs a file handle to create an in-core inode (see
2219  * fs/nfs/inode.c:nfs_fhget).  We only have a file handle *after* the
2220  * symlink request has completed on the server.
2221  *
2222  * So instead we allocate a raw page, copy the symname into it, then do
2223  * the SYMLINK request with the page as the buffer.  If it succeeds, we
2224  * now have a new file handle and can instantiate an in-core NFS inode
2225  * and move the raw page into its mapping.
2226  */
2227 int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
2228 {
2229 	struct page *page;
2230 	char *kaddr;
2231 	struct iattr attr;
2232 	unsigned int pathlen = strlen(symname);
2233 	int error;
2234 
2235 	dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s)\n", dir->i_sb->s_id,
2236 		dir->i_ino, dentry, symname);
2237 
2238 	if (pathlen > PAGE_SIZE)
2239 		return -ENAMETOOLONG;
2240 
2241 	attr.ia_mode = S_IFLNK | S_IRWXUGO;
2242 	attr.ia_valid = ATTR_MODE;
2243 
2244 	page = alloc_page(GFP_USER);
2245 	if (!page)
2246 		return -ENOMEM;
2247 
2248 	kaddr = page_address(page);
2249 	memcpy(kaddr, symname, pathlen);
2250 	if (pathlen < PAGE_SIZE)
2251 		memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
2252 
2253 	trace_nfs_symlink_enter(dir, dentry);
2254 	error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
2255 	trace_nfs_symlink_exit(dir, dentry, error);
2256 	if (error != 0) {
2257 		dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s) error %d\n",
2258 			dir->i_sb->s_id, dir->i_ino,
2259 			dentry, symname, error);
2260 		d_drop(dentry);
2261 		__free_page(page);
2262 		return error;
2263 	}
2264 
2265 	/*
2266 	 * No big deal if we can't add this page to the page cache here.
2267 	 * READLINK will get the missing page from the server if needed.
2268 	 */
2269 	if (!add_to_page_cache_lru(page, d_inode(dentry)->i_mapping, 0,
2270 							GFP_KERNEL)) {
2271 		SetPageUptodate(page);
2272 		unlock_page(page);
2273 		/*
2274 		 * add_to_page_cache_lru() grabs an extra page refcount.
2275 		 * Drop it here to avoid leaking this page later.
2276 		 */
2277 		put_page(page);
2278 	} else
2279 		__free_page(page);
2280 
2281 	return 0;
2282 }
2283 EXPORT_SYMBOL_GPL(nfs_symlink);
2284 
2285 int
2286 nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
2287 {
2288 	struct inode *inode = d_inode(old_dentry);
2289 	int error;
2290 
2291 	dfprintk(VFS, "NFS: link(%pd2 -> %pd2)\n",
2292 		old_dentry, dentry);
2293 
2294 	trace_nfs_link_enter(inode, dir, dentry);
2295 	d_drop(dentry);
2296 	error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
2297 	if (error == 0) {
2298 		ihold(inode);
2299 		d_add(dentry, inode);
2300 	}
2301 	trace_nfs_link_exit(inode, dir, dentry, error);
2302 	return error;
2303 }
2304 EXPORT_SYMBOL_GPL(nfs_link);
2305 
2306 /*
2307  * RENAME
2308  * FIXME: Some nfsds, like the Linux user space nfsd, may generate a
2309  * different file handle for the same inode after a rename (e.g. when
2310  * moving to a different directory). A fail-safe method to do so would
2311  * be to look up old_dir/old_name, create a link to new_dir/new_name and
2312  * rename the old file using the sillyrename stuff. This way, the original
2313  * file in old_dir will go away when the last process iput()s the inode.
2314  *
2315  * FIXED.
2316  *
2317  * It actually works quite well. One needs to have the possibility for
2318  * at least one ".nfs..." file in each directory the file ever gets
2319  * moved or linked to which happens automagically with the new
2320  * implementation that only depends on the dcache stuff instead of
2321  * using the inode layer
2322  *
2323  * Unfortunately, things are a little more complicated than indicated
2324  * above. For a cross-directory move, we want to make sure we can get
2325  * rid of the old inode after the operation.  This means there must be
2326  * no pending writes (if it's a file), and the use count must be 1.
2327  * If these conditions are met, we can drop the dentries before doing
2328  * the rename.
2329  */
2330 int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
2331 	       struct inode *new_dir, struct dentry *new_dentry,
2332 	       unsigned int flags)
2333 {
2334 	struct inode *old_inode = d_inode(old_dentry);
2335 	struct inode *new_inode = d_inode(new_dentry);
2336 	struct dentry *dentry = NULL, *rehash = NULL;
2337 	struct rpc_task *task;
2338 	int error = -EBUSY;
2339 
2340 	if (flags)
2341 		return -EINVAL;
2342 
2343 	dfprintk(VFS, "NFS: rename(%pd2 -> %pd2, ct=%d)\n",
2344 		 old_dentry, new_dentry,
2345 		 d_count(new_dentry));
2346 
2347 	trace_nfs_rename_enter(old_dir, old_dentry, new_dir, new_dentry);
2348 	/*
2349 	 * For non-directories, check whether the target is busy and if so,
2350 	 * make a copy of the dentry and then do a silly-rename. If the
2351 	 * silly-rename succeeds, the copied dentry is hashed and becomes
2352 	 * the new target.
2353 	 */
2354 	if (new_inode && !S_ISDIR(new_inode->i_mode)) {
2355 		/*
2356 		 * To prevent any new references to the target during the
2357 		 * rename, we unhash the dentry in advance.
2358 		 */
2359 		if (!d_unhashed(new_dentry)) {
2360 			d_drop(new_dentry);
2361 			rehash = new_dentry;
2362 		}
2363 
2364 		if (d_count(new_dentry) > 2) {
2365 			int err;
2366 
2367 			/* copy the target dentry's name */
2368 			dentry = d_alloc(new_dentry->d_parent,
2369 					 &new_dentry->d_name);
2370 			if (!dentry)
2371 				goto out;
2372 
2373 			/* silly-rename the existing target ... */
2374 			err = nfs_sillyrename(new_dir, new_dentry);
2375 			if (err)
2376 				goto out;
2377 
2378 			new_dentry = dentry;
2379 			rehash = NULL;
2380 			new_inode = NULL;
2381 		}
2382 	}
2383 
2384 	task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
2385 	if (IS_ERR(task)) {
2386 		error = PTR_ERR(task);
2387 		goto out;
2388 	}
2389 
2390 	error = rpc_wait_for_completion_task(task);
2391 	if (error != 0) {
2392 		((struct nfs_renamedata *)task->tk_calldata)->cancelled = 1;
2393 		/* Paired with the atomic_dec_and_test() barrier in rpc_do_put_task() */
2394 		smp_wmb();
2395 	} else
2396 		error = task->tk_status;
2397 	rpc_put_task(task);
2398 	/* Ensure the inode attributes are revalidated */
2399 	if (error == 0) {
2400 		spin_lock(&old_inode->i_lock);
2401 		NFS_I(old_inode)->attr_gencount = nfs_inc_attr_generation_counter();
2402 		NFS_I(old_inode)->cache_validity |= NFS_INO_INVALID_CHANGE
2403 			| NFS_INO_INVALID_CTIME
2404 			| NFS_INO_REVAL_FORCED;
2405 		spin_unlock(&old_inode->i_lock);
2406 	}
2407 out:
2408 	if (rehash)
2409 		d_rehash(rehash);
2410 	trace_nfs_rename_exit(old_dir, old_dentry,
2411 			new_dir, new_dentry, error);
2412 	if (!error) {
2413 		if (new_inode != NULL)
2414 			nfs_drop_nlink(new_inode);
2415 		/*
2416 		 * The d_move() should be here instead of in an async RPC completion
2417 		 * handler because we need the proper locks to move the dentry.  If
2418 		 * we're interrupted by a signal, the async RPC completion handler
2419 		 * should mark the directories for revalidation.
2420 		 */
2421 		d_move(old_dentry, new_dentry);
2422 		nfs_set_verifier(old_dentry,
2423 					nfs_save_change_attribute(new_dir));
2424 	} else if (error == -ENOENT)
2425 		nfs_dentry_handle_enoent(old_dentry);
2426 
2427 	/* new dentry created? */
2428 	if (dentry)
2429 		dput(dentry);
2430 	return error;
2431 }
2432 EXPORT_SYMBOL_GPL(nfs_rename);
2433 
2434 static DEFINE_SPINLOCK(nfs_access_lru_lock);
2435 static LIST_HEAD(nfs_access_lru_list);
2436 static atomic_long_t nfs_access_nr_entries;
2437 
2438 static unsigned long nfs_access_max_cachesize = 4*1024*1024;
2439 module_param(nfs_access_max_cachesize, ulong, 0644);
2440 MODULE_PARM_DESC(nfs_access_max_cachesize, "NFS access maximum total cache length");
2441 
2442 static void nfs_access_free_entry(struct nfs_access_entry *entry)
2443 {
2444 	put_cred(entry->cred);
2445 	kfree_rcu(entry, rcu_head);
2446 	smp_mb__before_atomic();
2447 	atomic_long_dec(&nfs_access_nr_entries);
2448 	smp_mb__after_atomic();
2449 }
2450 
2451 static void nfs_access_free_list(struct list_head *head)
2452 {
2453 	struct nfs_access_entry *cache;
2454 
2455 	while (!list_empty(head)) {
2456 		cache = list_entry(head->next, struct nfs_access_entry, lru);
2457 		list_del(&cache->lru);
2458 		nfs_access_free_entry(cache);
2459 	}
2460 }
2461 
2462 static unsigned long
2463 nfs_do_access_cache_scan(unsigned int nr_to_scan)
2464 {
2465 	LIST_HEAD(head);
2466 	struct nfs_inode *nfsi, *next;
2467 	struct nfs_access_entry *cache;
2468 	long freed = 0;
2469 
2470 	spin_lock(&nfs_access_lru_lock);
2471 	list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) {
2472 		struct inode *inode;
2473 
2474 		if (nr_to_scan-- == 0)
2475 			break;
2476 		inode = &nfsi->vfs_inode;
2477 		spin_lock(&inode->i_lock);
2478 		if (list_empty(&nfsi->access_cache_entry_lru))
2479 			goto remove_lru_entry;
2480 		cache = list_entry(nfsi->access_cache_entry_lru.next,
2481 				struct nfs_access_entry, lru);
2482 		list_move(&cache->lru, &head);
2483 		rb_erase(&cache->rb_node, &nfsi->access_cache);
2484 		freed++;
2485 		if (!list_empty(&nfsi->access_cache_entry_lru))
2486 			list_move_tail(&nfsi->access_cache_inode_lru,
2487 					&nfs_access_lru_list);
2488 		else {
2489 remove_lru_entry:
2490 			list_del_init(&nfsi->access_cache_inode_lru);
2491 			smp_mb__before_atomic();
2492 			clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
2493 			smp_mb__after_atomic();
2494 		}
2495 		spin_unlock(&inode->i_lock);
2496 	}
2497 	spin_unlock(&nfs_access_lru_lock);
2498 	nfs_access_free_list(&head);
2499 	return freed;
2500 }
2501 
2502 unsigned long
2503 nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
2504 {
2505 	int nr_to_scan = sc->nr_to_scan;
2506 	gfp_t gfp_mask = sc->gfp_mask;
2507 
2508 	if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
2509 		return SHRINK_STOP;
2510 	return nfs_do_access_cache_scan(nr_to_scan);
2511 }
2512 
2513 
2514 unsigned long
2515 nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc)
2516 {
2517 	return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries));
2518 }
2519 
2520 static void
2521 nfs_access_cache_enforce_limit(void)
2522 {
2523 	long nr_entries = atomic_long_read(&nfs_access_nr_entries);
2524 	unsigned long diff;
2525 	unsigned int nr_to_scan;
2526 
2527 	if (nr_entries < 0 || nr_entries <= nfs_access_max_cachesize)
2528 		return;
2529 	nr_to_scan = 100;
2530 	diff = nr_entries - nfs_access_max_cachesize;
2531 	if (diff < nr_to_scan)
2532 		nr_to_scan = diff;
2533 	nfs_do_access_cache_scan(nr_to_scan);
2534 }
2535 
2536 static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head)
2537 {
2538 	struct rb_root *root_node = &nfsi->access_cache;
2539 	struct rb_node *n;
2540 	struct nfs_access_entry *entry;
2541 
2542 	/* Unhook entries from the cache */
2543 	while ((n = rb_first(root_node)) != NULL) {
2544 		entry = rb_entry(n, struct nfs_access_entry, rb_node);
2545 		rb_erase(n, root_node);
2546 		list_move(&entry->lru, head);
2547 	}
2548 	nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
2549 }
2550 
2551 void nfs_access_zap_cache(struct inode *inode)
2552 {
2553 	LIST_HEAD(head);
2554 
2555 	if (test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags) == 0)
2556 		return;
2557 	/* Remove from global LRU init */
2558 	spin_lock(&nfs_access_lru_lock);
2559 	if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags))
2560 		list_del_init(&NFS_I(inode)->access_cache_inode_lru);
2561 
2562 	spin_lock(&inode->i_lock);
2563 	__nfs_access_zap_cache(NFS_I(inode), &head);
2564 	spin_unlock(&inode->i_lock);
2565 	spin_unlock(&nfs_access_lru_lock);
2566 	nfs_access_free_list(&head);
2567 }
2568 EXPORT_SYMBOL_GPL(nfs_access_zap_cache);
2569 
2570 static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, const struct cred *cred)
2571 {
2572 	struct rb_node *n = NFS_I(inode)->access_cache.rb_node;
2573 
2574 	while (n != NULL) {
2575 		struct nfs_access_entry *entry =
2576 			rb_entry(n, struct nfs_access_entry, rb_node);
2577 		int cmp = cred_fscmp(cred, entry->cred);
2578 
2579 		if (cmp < 0)
2580 			n = n->rb_left;
2581 		else if (cmp > 0)
2582 			n = n->rb_right;
2583 		else
2584 			return entry;
2585 	}
2586 	return NULL;
2587 }
2588 
2589 static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, bool may_block)
2590 {
2591 	struct nfs_inode *nfsi = NFS_I(inode);
2592 	struct nfs_access_entry *cache;
2593 	bool retry = true;
2594 	int err;
2595 
2596 	spin_lock(&inode->i_lock);
2597 	for(;;) {
2598 		if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
2599 			goto out_zap;
2600 		cache = nfs_access_search_rbtree(inode, cred);
2601 		err = -ENOENT;
2602 		if (cache == NULL)
2603 			goto out;
2604 		/* Found an entry, is our attribute cache valid? */
2605 		if (!nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS))
2606 			break;
2607 		if (!retry)
2608 			break;
2609 		err = -ECHILD;
2610 		if (!may_block)
2611 			goto out;
2612 		spin_unlock(&inode->i_lock);
2613 		err = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
2614 		if (err)
2615 			return err;
2616 		spin_lock(&inode->i_lock);
2617 		retry = false;
2618 	}
2619 	res->cred = cache->cred;
2620 	res->mask = cache->mask;
2621 	list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru);
2622 	err = 0;
2623 out:
2624 	spin_unlock(&inode->i_lock);
2625 	return err;
2626 out_zap:
2627 	spin_unlock(&inode->i_lock);
2628 	nfs_access_zap_cache(inode);
2629 	return -ENOENT;
2630 }
2631 
2632 static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res)
2633 {
2634 	/* Only check the most recently returned cache entry,
2635 	 * but do it without locking.
2636 	 */
2637 	struct nfs_inode *nfsi = NFS_I(inode);
2638 	struct nfs_access_entry *cache;
2639 	int err = -ECHILD;
2640 	struct list_head *lh;
2641 
2642 	rcu_read_lock();
2643 	if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
2644 		goto out;
2645 	lh = rcu_dereference(list_tail_rcu(&nfsi->access_cache_entry_lru));
2646 	cache = list_entry(lh, struct nfs_access_entry, lru);
2647 	if (lh == &nfsi->access_cache_entry_lru ||
2648 	    cred_fscmp(cred, cache->cred) != 0)
2649 		cache = NULL;
2650 	if (cache == NULL)
2651 		goto out;
2652 	if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS))
2653 		goto out;
2654 	res->cred = cache->cred;
2655 	res->mask = cache->mask;
2656 	err = 0;
2657 out:
2658 	rcu_read_unlock();
2659 	return err;
2660 }
2661 
2662 int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct
2663 nfs_access_entry *res, bool may_block)
2664 {
2665 	int status;
2666 
2667 	status = nfs_access_get_cached_rcu(inode, cred, res);
2668 	if (status != 0)
2669 		status = nfs_access_get_cached_locked(inode, cred, res,
2670 		    may_block);
2671 
2672 	return status;
2673 }
2674 EXPORT_SYMBOL_GPL(nfs_access_get_cached);
2675 
2676 static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
2677 {
2678 	struct nfs_inode *nfsi = NFS_I(inode);
2679 	struct rb_root *root_node = &nfsi->access_cache;
2680 	struct rb_node **p = &root_node->rb_node;
2681 	struct rb_node *parent = NULL;
2682 	struct nfs_access_entry *entry;
2683 	int cmp;
2684 
2685 	spin_lock(&inode->i_lock);
2686 	while (*p != NULL) {
2687 		parent = *p;
2688 		entry = rb_entry(parent, struct nfs_access_entry, rb_node);
2689 		cmp = cred_fscmp(set->cred, entry->cred);
2690 
2691 		if (cmp < 0)
2692 			p = &parent->rb_left;
2693 		else if (cmp > 0)
2694 			p = &parent->rb_right;
2695 		else
2696 			goto found;
2697 	}
2698 	rb_link_node(&set->rb_node, parent, p);
2699 	rb_insert_color(&set->rb_node, root_node);
2700 	list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
2701 	spin_unlock(&inode->i_lock);
2702 	return;
2703 found:
2704 	rb_replace_node(parent, &set->rb_node, root_node);
2705 	list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
2706 	list_del(&entry->lru);
2707 	spin_unlock(&inode->i_lock);
2708 	nfs_access_free_entry(entry);
2709 }
2710 
2711 void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
2712 {
2713 	struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL);
2714 	if (cache == NULL)
2715 		return;
2716 	RB_CLEAR_NODE(&cache->rb_node);
2717 	cache->cred = get_cred(set->cred);
2718 	cache->mask = set->mask;
2719 
2720 	/* The above field assignments must be visible
2721 	 * before this item appears on the lru.  We cannot easily
2722 	 * use rcu_assign_pointer, so just force the memory barrier.
2723 	 */
2724 	smp_wmb();
2725 	nfs_access_add_rbtree(inode, cache);
2726 
2727 	/* Update accounting */
2728 	smp_mb__before_atomic();
2729 	atomic_long_inc(&nfs_access_nr_entries);
2730 	smp_mb__after_atomic();
2731 
2732 	/* Add inode to global LRU list */
2733 	if (!test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) {
2734 		spin_lock(&nfs_access_lru_lock);
2735 		if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags))
2736 			list_add_tail(&NFS_I(inode)->access_cache_inode_lru,
2737 					&nfs_access_lru_list);
2738 		spin_unlock(&nfs_access_lru_lock);
2739 	}
2740 	nfs_access_cache_enforce_limit();
2741 }
2742 EXPORT_SYMBOL_GPL(nfs_access_add_cache);
2743 
2744 #define NFS_MAY_READ (NFS_ACCESS_READ)
2745 #define NFS_MAY_WRITE (NFS_ACCESS_MODIFY | \
2746 		NFS_ACCESS_EXTEND | \
2747 		NFS_ACCESS_DELETE)
2748 #define NFS_FILE_MAY_WRITE (NFS_ACCESS_MODIFY | \
2749 		NFS_ACCESS_EXTEND)
2750 #define NFS_DIR_MAY_WRITE NFS_MAY_WRITE
2751 #define NFS_MAY_LOOKUP (NFS_ACCESS_LOOKUP)
2752 #define NFS_MAY_EXECUTE (NFS_ACCESS_EXECUTE)
2753 static int
2754 nfs_access_calc_mask(u32 access_result, umode_t umode)
2755 {
2756 	int mask = 0;
2757 
2758 	if (access_result & NFS_MAY_READ)
2759 		mask |= MAY_READ;
2760 	if (S_ISDIR(umode)) {
2761 		if ((access_result & NFS_DIR_MAY_WRITE) == NFS_DIR_MAY_WRITE)
2762 			mask |= MAY_WRITE;
2763 		if ((access_result & NFS_MAY_LOOKUP) == NFS_MAY_LOOKUP)
2764 			mask |= MAY_EXEC;
2765 	} else if (S_ISREG(umode)) {
2766 		if ((access_result & NFS_FILE_MAY_WRITE) == NFS_FILE_MAY_WRITE)
2767 			mask |= MAY_WRITE;
2768 		if ((access_result & NFS_MAY_EXECUTE) == NFS_MAY_EXECUTE)
2769 			mask |= MAY_EXEC;
2770 	} else if (access_result & NFS_MAY_WRITE)
2771 			mask |= MAY_WRITE;
2772 	return mask;
2773 }
2774 
2775 void nfs_access_set_mask(struct nfs_access_entry *entry, u32 access_result)
2776 {
2777 	entry->mask = access_result;
2778 }
2779 EXPORT_SYMBOL_GPL(nfs_access_set_mask);
2780 
2781 static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask)
2782 {
2783 	struct nfs_access_entry cache;
2784 	bool may_block = (mask & MAY_NOT_BLOCK) == 0;
2785 	int cache_mask = -1;
2786 	int status;
2787 
2788 	trace_nfs_access_enter(inode);
2789 
2790 	status = nfs_access_get_cached(inode, cred, &cache, may_block);
2791 	if (status == 0)
2792 		goto out_cached;
2793 
2794 	status = -ECHILD;
2795 	if (!may_block)
2796 		goto out;
2797 
2798 	/*
2799 	 * Determine which access bits we want to ask for...
2800 	 */
2801 	cache.mask = NFS_ACCESS_READ | NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND;
2802 	if (nfs_server_capable(inode, NFS_CAP_XATTR)) {
2803 		cache.mask |= NFS_ACCESS_XAREAD | NFS_ACCESS_XAWRITE |
2804 		    NFS_ACCESS_XALIST;
2805 	}
2806 	if (S_ISDIR(inode->i_mode))
2807 		cache.mask |= NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP;
2808 	else
2809 		cache.mask |= NFS_ACCESS_EXECUTE;
2810 	cache.cred = cred;
2811 	status = NFS_PROTO(inode)->access(inode, &cache);
2812 	if (status != 0) {
2813 		if (status == -ESTALE) {
2814 			if (!S_ISDIR(inode->i_mode))
2815 				nfs_set_inode_stale(inode);
2816 			else
2817 				nfs_zap_caches(inode);
2818 		}
2819 		goto out;
2820 	}
2821 	nfs_access_add_cache(inode, &cache);
2822 out_cached:
2823 	cache_mask = nfs_access_calc_mask(cache.mask, inode->i_mode);
2824 	if ((mask & ~cache_mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0)
2825 		status = -EACCES;
2826 out:
2827 	trace_nfs_access_exit(inode, mask, cache_mask, status);
2828 	return status;
2829 }
2830 
2831 static int nfs_open_permission_mask(int openflags)
2832 {
2833 	int mask = 0;
2834 
2835 	if (openflags & __FMODE_EXEC) {
2836 		/* ONLY check exec rights */
2837 		mask = MAY_EXEC;
2838 	} else {
2839 		if ((openflags & O_ACCMODE) != O_WRONLY)
2840 			mask |= MAY_READ;
2841 		if ((openflags & O_ACCMODE) != O_RDONLY)
2842 			mask |= MAY_WRITE;
2843 	}
2844 
2845 	return mask;
2846 }
2847 
2848 int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags)
2849 {
2850 	return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags));
2851 }
2852 EXPORT_SYMBOL_GPL(nfs_may_open);
2853 
2854 static int nfs_execute_ok(struct inode *inode, int mask)
2855 {
2856 	struct nfs_server *server = NFS_SERVER(inode);
2857 	int ret = 0;
2858 
2859 	if (S_ISDIR(inode->i_mode))
2860 		return 0;
2861 	if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_OTHER)) {
2862 		if (mask & MAY_NOT_BLOCK)
2863 			return -ECHILD;
2864 		ret = __nfs_revalidate_inode(server, inode);
2865 	}
2866 	if (ret == 0 && !execute_ok(inode))
2867 		ret = -EACCES;
2868 	return ret;
2869 }
2870 
2871 int nfs_permission(struct inode *inode, int mask)
2872 {
2873 	const struct cred *cred = current_cred();
2874 	int res = 0;
2875 
2876 	nfs_inc_stats(inode, NFSIOS_VFSACCESS);
2877 
2878 	if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
2879 		goto out;
2880 	/* Is this sys_access() ? */
2881 	if (mask & (MAY_ACCESS | MAY_CHDIR))
2882 		goto force_lookup;
2883 
2884 	switch (inode->i_mode & S_IFMT) {
2885 		case S_IFLNK:
2886 			goto out;
2887 		case S_IFREG:
2888 			if ((mask & MAY_OPEN) &&
2889 			   nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN))
2890 				return 0;
2891 			break;
2892 		case S_IFDIR:
2893 			/*
2894 			 * Optimize away all write operations, since the server
2895 			 * will check permissions when we perform the op.
2896 			 */
2897 			if ((mask & MAY_WRITE) && !(mask & MAY_READ))
2898 				goto out;
2899 	}
2900 
2901 force_lookup:
2902 	if (!NFS_PROTO(inode)->access)
2903 		goto out_notsup;
2904 
2905 	res = nfs_do_access(inode, cred, mask);
2906 out:
2907 	if (!res && (mask & MAY_EXEC))
2908 		res = nfs_execute_ok(inode, mask);
2909 
2910 	dfprintk(VFS, "NFS: permission(%s/%lu), mask=0x%x, res=%d\n",
2911 		inode->i_sb->s_id, inode->i_ino, mask, res);
2912 	return res;
2913 out_notsup:
2914 	if (mask & MAY_NOT_BLOCK)
2915 		return -ECHILD;
2916 
2917 	res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
2918 	if (res == 0)
2919 		res = generic_permission(inode, mask);
2920 	goto out;
2921 }
2922 EXPORT_SYMBOL_GPL(nfs_permission);
2923 
2924 /*
2925  * Local variables:
2926  *  version-control: t
2927  *  kept-new-versions: 5
2928  * End:
2929  */
2930