xref: /openbmc/linux/fs/nfs/dir.c (revision d09e673f)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *  linux/fs/nfs/dir.c
4  *
5  *  Copyright (C) 1992  Rick Sladkey
6  *
7  *  nfs directory handling functions
8  *
9  * 10 Apr 1996	Added silly rename for unlink	--okir
10  * 28 Sep 1996	Improved directory cache --okir
11  * 23 Aug 1997  Claus Heine claus@momo.math.rwth-aachen.de
12  *              Re-implemented silly rename for unlink, newly implemented
13  *              silly rename for nfs_rename() following the suggestions
14  *              of Olaf Kirch (okir) found in this file.
15  *              Following Linus comments on my original hack, this version
16  *              depends only on the dcache stuff and doesn't touch the inode
17  *              layer (iput() and friends).
18  *  6 Jun 1999	Cache readdir lookups in the page cache. -DaveM
19  */
20 
21 #include <linux/compat.h>
22 #include <linux/module.h>
23 #include <linux/time.h>
24 #include <linux/errno.h>
25 #include <linux/stat.h>
26 #include <linux/fcntl.h>
27 #include <linux/string.h>
28 #include <linux/kernel.h>
29 #include <linux/slab.h>
30 #include <linux/mm.h>
31 #include <linux/sunrpc/clnt.h>
32 #include <linux/nfs_fs.h>
33 #include <linux/nfs_mount.h>
34 #include <linux/pagemap.h>
35 #include <linux/pagevec.h>
36 #include <linux/namei.h>
37 #include <linux/mount.h>
38 #include <linux/swap.h>
39 #include <linux/sched.h>
40 #include <linux/kmemleak.h>
41 #include <linux/xattr.h>
42 
43 #include "delegation.h"
44 #include "iostat.h"
45 #include "internal.h"
46 #include "fscache.h"
47 
48 #include "nfstrace.h"
49 
50 /* #define NFS_DEBUG_VERBOSE 1 */
51 
52 static int nfs_opendir(struct inode *, struct file *);
53 static int nfs_closedir(struct inode *, struct file *);
54 static int nfs_readdir(struct file *, struct dir_context *);
55 static int nfs_fsync_dir(struct file *, loff_t, loff_t, int);
56 static loff_t nfs_llseek_dir(struct file *, loff_t, int);
57 static void nfs_readdir_clear_array(struct page*);
58 
59 const struct file_operations nfs_dir_operations = {
60 	.llseek		= nfs_llseek_dir,
61 	.read		= generic_read_dir,
62 	.iterate_shared	= nfs_readdir,
63 	.open		= nfs_opendir,
64 	.release	= nfs_closedir,
65 	.fsync		= nfs_fsync_dir,
66 };
67 
68 const struct address_space_operations nfs_dir_aops = {
69 	.freepage = nfs_readdir_clear_array,
70 };
71 
72 static struct nfs_open_dir_context *
73 alloc_nfs_open_dir_context(struct inode *dir)
74 {
75 	struct nfs_inode *nfsi = NFS_I(dir);
76 	struct nfs_open_dir_context *ctx;
77 
78 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT);
79 	if (ctx != NULL) {
80 		ctx->attr_gencount = nfsi->attr_gencount;
81 		spin_lock(&dir->i_lock);
82 		if (list_empty(&nfsi->open_files) &&
83 		    (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER))
84 			nfs_set_cache_invalid(dir,
85 					      NFS_INO_INVALID_DATA |
86 						      NFS_INO_REVAL_FORCED);
87 		list_add(&ctx->list, &nfsi->open_files);
88 		clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags);
89 		memcpy(ctx->verf, nfsi->cookieverf, sizeof(ctx->verf));
90 		spin_unlock(&dir->i_lock);
91 		return ctx;
92 	}
93 	return  ERR_PTR(-ENOMEM);
94 }
95 
96 static void put_nfs_open_dir_context(struct inode *dir, struct nfs_open_dir_context *ctx)
97 {
98 	spin_lock(&dir->i_lock);
99 	list_del(&ctx->list);
100 	spin_unlock(&dir->i_lock);
101 	kfree(ctx);
102 }
103 
104 /*
105  * Open file
106  */
107 static int
108 nfs_opendir(struct inode *inode, struct file *filp)
109 {
110 	int res = 0;
111 	struct nfs_open_dir_context *ctx;
112 
113 	dfprintk(FILE, "NFS: open dir(%pD2)\n", filp);
114 
115 	nfs_inc_stats(inode, NFSIOS_VFSOPEN);
116 
117 	ctx = alloc_nfs_open_dir_context(inode);
118 	if (IS_ERR(ctx)) {
119 		res = PTR_ERR(ctx);
120 		goto out;
121 	}
122 	filp->private_data = ctx;
123 out:
124 	return res;
125 }
126 
127 static int
128 nfs_closedir(struct inode *inode, struct file *filp)
129 {
130 	put_nfs_open_dir_context(file_inode(filp), filp->private_data);
131 	return 0;
132 }
133 
134 struct nfs_cache_array_entry {
135 	u64 cookie;
136 	u64 ino;
137 	const char *name;
138 	unsigned int name_len;
139 	unsigned char d_type;
140 };
141 
142 struct nfs_cache_array {
143 	u64 change_attr;
144 	u64 last_cookie;
145 	unsigned int size;
146 	unsigned char page_full : 1,
147 		      page_is_eof : 1,
148 		      cookies_are_ordered : 1;
149 	struct nfs_cache_array_entry array[];
150 };
151 
152 struct nfs_readdir_descriptor {
153 	struct file	*file;
154 	struct page	*page;
155 	struct dir_context *ctx;
156 	pgoff_t		page_index;
157 	u64		dir_cookie;
158 	u64		last_cookie;
159 	u64		dup_cookie;
160 	loff_t		current_index;
161 	loff_t		prev_index;
162 
163 	__be32		verf[NFS_DIR_VERIFIER_SIZE];
164 	unsigned long	dir_verifier;
165 	unsigned long	timestamp;
166 	unsigned long	gencount;
167 	unsigned long	attr_gencount;
168 	unsigned int	cache_entry_index;
169 	signed char duped;
170 	bool plus;
171 	bool eob;
172 	bool eof;
173 };
174 
175 static void nfs_readdir_array_init(struct nfs_cache_array *array)
176 {
177 	memset(array, 0, sizeof(struct nfs_cache_array));
178 }
179 
180 static void nfs_readdir_page_init_array(struct page *page, u64 last_cookie,
181 					u64 change_attr)
182 {
183 	struct nfs_cache_array *array;
184 
185 	array = kmap_atomic(page);
186 	nfs_readdir_array_init(array);
187 	array->change_attr = change_attr;
188 	array->last_cookie = last_cookie;
189 	array->cookies_are_ordered = 1;
190 	kunmap_atomic(array);
191 }
192 
193 /*
194  * we are freeing strings created by nfs_add_to_readdir_array()
195  */
196 static
197 void nfs_readdir_clear_array(struct page *page)
198 {
199 	struct nfs_cache_array *array;
200 	int i;
201 
202 	array = kmap_atomic(page);
203 	for (i = 0; i < array->size; i++)
204 		kfree(array->array[i].name);
205 	nfs_readdir_array_init(array);
206 	kunmap_atomic(array);
207 }
208 
209 static struct page *
210 nfs_readdir_page_array_alloc(u64 last_cookie, gfp_t gfp_flags)
211 {
212 	struct page *page = alloc_page(gfp_flags);
213 	if (page)
214 		nfs_readdir_page_init_array(page, last_cookie, 0);
215 	return page;
216 }
217 
218 static void nfs_readdir_page_array_free(struct page *page)
219 {
220 	if (page) {
221 		nfs_readdir_clear_array(page);
222 		put_page(page);
223 	}
224 }
225 
226 static void nfs_readdir_array_set_eof(struct nfs_cache_array *array)
227 {
228 	array->page_is_eof = 1;
229 	array->page_full = 1;
230 }
231 
232 static bool nfs_readdir_array_is_full(struct nfs_cache_array *array)
233 {
234 	return array->page_full;
235 }
236 
237 /*
238  * the caller is responsible for freeing qstr.name
239  * when called by nfs_readdir_add_to_array, the strings will be freed in
240  * nfs_clear_readdir_array()
241  */
242 static const char *nfs_readdir_copy_name(const char *name, unsigned int len)
243 {
244 	const char *ret = kmemdup_nul(name, len, GFP_KERNEL);
245 
246 	/*
247 	 * Avoid a kmemleak false positive. The pointer to the name is stored
248 	 * in a page cache page which kmemleak does not scan.
249 	 */
250 	if (ret != NULL)
251 		kmemleak_not_leak(ret);
252 	return ret;
253 }
254 
255 static size_t nfs_readdir_array_maxentries(void)
256 {
257 	return (PAGE_SIZE - sizeof(struct nfs_cache_array)) /
258 	       sizeof(struct nfs_cache_array_entry);
259 }
260 
261 /*
262  * Check that the next array entry lies entirely within the page bounds
263  */
264 static int nfs_readdir_array_can_expand(struct nfs_cache_array *array)
265 {
266 	if (array->page_full)
267 		return -ENOSPC;
268 	if (array->size == nfs_readdir_array_maxentries()) {
269 		array->page_full = 1;
270 		return -ENOSPC;
271 	}
272 	return 0;
273 }
274 
275 static
276 int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
277 {
278 	struct nfs_cache_array *array;
279 	struct nfs_cache_array_entry *cache_entry;
280 	const char *name;
281 	int ret;
282 
283 	name = nfs_readdir_copy_name(entry->name, entry->len);
284 	if (!name)
285 		return -ENOMEM;
286 
287 	array = kmap_atomic(page);
288 	ret = nfs_readdir_array_can_expand(array);
289 	if (ret) {
290 		kfree(name);
291 		goto out;
292 	}
293 
294 	cache_entry = &array->array[array->size];
295 	cache_entry->cookie = entry->prev_cookie;
296 	cache_entry->ino = entry->ino;
297 	cache_entry->d_type = entry->d_type;
298 	cache_entry->name_len = entry->len;
299 	cache_entry->name = name;
300 	array->last_cookie = entry->cookie;
301 	if (array->last_cookie <= cache_entry->cookie)
302 		array->cookies_are_ordered = 0;
303 	array->size++;
304 	if (entry->eof != 0)
305 		nfs_readdir_array_set_eof(array);
306 out:
307 	kunmap_atomic(array);
308 	return ret;
309 }
310 
311 static bool nfs_readdir_page_validate(struct page *page, u64 last_cookie,
312 				      u64 change_attr)
313 {
314 	struct nfs_cache_array *array = kmap_atomic(page);
315 	int ret = true;
316 
317 	if (array->change_attr != change_attr)
318 		ret = false;
319 	if (array->size > 0 && array->array[0].cookie != last_cookie)
320 		ret = false;
321 	kunmap_atomic(array);
322 	return ret;
323 }
324 
325 static void nfs_readdir_page_unlock_and_put(struct page *page)
326 {
327 	unlock_page(page);
328 	put_page(page);
329 }
330 
331 static struct page *nfs_readdir_page_get_locked(struct address_space *mapping,
332 						pgoff_t index, u64 last_cookie)
333 {
334 	struct page *page;
335 	u64 change_attr;
336 
337 	page = grab_cache_page(mapping, index);
338 	if (!page)
339 		return NULL;
340 	change_attr = inode_peek_iversion_raw(mapping->host);
341 	if (PageUptodate(page)) {
342 		if (nfs_readdir_page_validate(page, last_cookie, change_attr))
343 			return page;
344 		nfs_readdir_clear_array(page);
345 	}
346 	nfs_readdir_page_init_array(page, last_cookie, change_attr);
347 	SetPageUptodate(page);
348 	return page;
349 }
350 
351 static loff_t nfs_readdir_page_offset(struct page *page)
352 {
353 	return (loff_t)page->index * (loff_t)nfs_readdir_array_maxentries();
354 }
355 
356 static u64 nfs_readdir_page_last_cookie(struct page *page)
357 {
358 	struct nfs_cache_array *array;
359 	u64 ret;
360 
361 	array = kmap_atomic(page);
362 	ret = array->last_cookie;
363 	kunmap_atomic(array);
364 	return ret;
365 }
366 
367 static bool nfs_readdir_page_needs_filling(struct page *page)
368 {
369 	struct nfs_cache_array *array;
370 	bool ret;
371 
372 	array = kmap_atomic(page);
373 	ret = !nfs_readdir_array_is_full(array);
374 	kunmap_atomic(array);
375 	return ret;
376 }
377 
378 static void nfs_readdir_page_set_eof(struct page *page)
379 {
380 	struct nfs_cache_array *array;
381 
382 	array = kmap_atomic(page);
383 	nfs_readdir_array_set_eof(array);
384 	kunmap_atomic(array);
385 }
386 
387 static struct page *nfs_readdir_page_get_next(struct address_space *mapping,
388 					      pgoff_t index, u64 cookie)
389 {
390 	struct page *page;
391 
392 	page = nfs_readdir_page_get_locked(mapping, index, cookie);
393 	if (page) {
394 		if (nfs_readdir_page_last_cookie(page) == cookie)
395 			return page;
396 		nfs_readdir_page_unlock_and_put(page);
397 	}
398 	return NULL;
399 }
400 
401 static inline
402 int is_32bit_api(void)
403 {
404 #ifdef CONFIG_COMPAT
405 	return in_compat_syscall();
406 #else
407 	return (BITS_PER_LONG == 32);
408 #endif
409 }
410 
411 static
412 bool nfs_readdir_use_cookie(const struct file *filp)
413 {
414 	if ((filp->f_mode & FMODE_32BITHASH) ||
415 	    (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
416 		return false;
417 	return true;
418 }
419 
420 static int nfs_readdir_search_for_pos(struct nfs_cache_array *array,
421 				      struct nfs_readdir_descriptor *desc)
422 {
423 	loff_t diff = desc->ctx->pos - desc->current_index;
424 	unsigned int index;
425 
426 	if (diff < 0)
427 		goto out_eof;
428 	if (diff >= array->size) {
429 		if (array->page_is_eof)
430 			goto out_eof;
431 		return -EAGAIN;
432 	}
433 
434 	index = (unsigned int)diff;
435 	desc->dir_cookie = array->array[index].cookie;
436 	desc->cache_entry_index = index;
437 	return 0;
438 out_eof:
439 	desc->eof = true;
440 	return -EBADCOOKIE;
441 }
442 
443 static bool nfs_readdir_array_cookie_in_range(struct nfs_cache_array *array,
444 					      u64 cookie)
445 {
446 	if (!array->cookies_are_ordered)
447 		return true;
448 	/* Optimisation for monotonically increasing cookies */
449 	if (cookie >= array->last_cookie)
450 		return false;
451 	if (array->size && cookie < array->array[0].cookie)
452 		return false;
453 	return true;
454 }
455 
456 static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array,
457 					 struct nfs_readdir_descriptor *desc)
458 {
459 	int i;
460 	loff_t new_pos;
461 	int status = -EAGAIN;
462 
463 	if (!nfs_readdir_array_cookie_in_range(array, desc->dir_cookie))
464 		goto check_eof;
465 
466 	for (i = 0; i < array->size; i++) {
467 		if (array->array[i].cookie == desc->dir_cookie) {
468 			struct nfs_inode *nfsi = NFS_I(file_inode(desc->file));
469 
470 			new_pos = nfs_readdir_page_offset(desc->page) + i;
471 			if (desc->attr_gencount != nfsi->attr_gencount) {
472 				desc->duped = 0;
473 				desc->attr_gencount = nfsi->attr_gencount;
474 			} else if (new_pos < desc->prev_index) {
475 				if (desc->duped > 0
476 				    && desc->dup_cookie == desc->dir_cookie) {
477 					if (printk_ratelimit()) {
478 						pr_notice("NFS: directory %pD2 contains a readdir loop."
479 								"Please contact your server vendor.  "
480 								"The file: %s has duplicate cookie %llu\n",
481 								desc->file, array->array[i].name, desc->dir_cookie);
482 					}
483 					status = -ELOOP;
484 					goto out;
485 				}
486 				desc->dup_cookie = desc->dir_cookie;
487 				desc->duped = -1;
488 			}
489 			if (nfs_readdir_use_cookie(desc->file))
490 				desc->ctx->pos = desc->dir_cookie;
491 			else
492 				desc->ctx->pos = new_pos;
493 			desc->prev_index = new_pos;
494 			desc->cache_entry_index = i;
495 			return 0;
496 		}
497 	}
498 check_eof:
499 	if (array->page_is_eof) {
500 		status = -EBADCOOKIE;
501 		if (desc->dir_cookie == array->last_cookie)
502 			desc->eof = true;
503 	}
504 out:
505 	return status;
506 }
507 
508 static int nfs_readdir_search_array(struct nfs_readdir_descriptor *desc)
509 {
510 	struct nfs_cache_array *array;
511 	int status;
512 
513 	array = kmap_atomic(desc->page);
514 
515 	if (desc->dir_cookie == 0)
516 		status = nfs_readdir_search_for_pos(array, desc);
517 	else
518 		status = nfs_readdir_search_for_cookie(array, desc);
519 
520 	if (status == -EAGAIN) {
521 		desc->last_cookie = array->last_cookie;
522 		desc->current_index += array->size;
523 		desc->page_index++;
524 	}
525 	kunmap_atomic(array);
526 	return status;
527 }
528 
529 /* Fill a page with xdr information before transferring to the cache page */
530 static int nfs_readdir_xdr_filler(struct nfs_readdir_descriptor *desc,
531 				  __be32 *verf, u64 cookie,
532 				  struct page **pages, size_t bufsize,
533 				  __be32 *verf_res)
534 {
535 	struct inode *inode = file_inode(desc->file);
536 	struct nfs_readdir_arg arg = {
537 		.dentry = file_dentry(desc->file),
538 		.cred = desc->file->f_cred,
539 		.verf = verf,
540 		.cookie = cookie,
541 		.pages = pages,
542 		.page_len = bufsize,
543 		.plus = desc->plus,
544 	};
545 	struct nfs_readdir_res res = {
546 		.verf = verf_res,
547 	};
548 	unsigned long	timestamp, gencount;
549 	int		error;
550 
551  again:
552 	timestamp = jiffies;
553 	gencount = nfs_inc_attr_generation_counter();
554 	desc->dir_verifier = nfs_save_change_attribute(inode);
555 	error = NFS_PROTO(inode)->readdir(&arg, &res);
556 	if (error < 0) {
557 		/* We requested READDIRPLUS, but the server doesn't grok it */
558 		if (error == -ENOTSUPP && desc->plus) {
559 			NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS;
560 			clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
561 			desc->plus = arg.plus = false;
562 			goto again;
563 		}
564 		goto error;
565 	}
566 	desc->timestamp = timestamp;
567 	desc->gencount = gencount;
568 error:
569 	return error;
570 }
571 
572 static int xdr_decode(struct nfs_readdir_descriptor *desc,
573 		      struct nfs_entry *entry, struct xdr_stream *xdr)
574 {
575 	struct inode *inode = file_inode(desc->file);
576 	int error;
577 
578 	error = NFS_PROTO(inode)->decode_dirent(xdr, entry, desc->plus);
579 	if (error)
580 		return error;
581 	entry->fattr->time_start = desc->timestamp;
582 	entry->fattr->gencount = desc->gencount;
583 	return 0;
584 }
585 
586 /* Match file and dirent using either filehandle or fileid
587  * Note: caller is responsible for checking the fsid
588  */
589 static
590 int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
591 {
592 	struct inode *inode;
593 	struct nfs_inode *nfsi;
594 
595 	if (d_really_is_negative(dentry))
596 		return 0;
597 
598 	inode = d_inode(dentry);
599 	if (is_bad_inode(inode) || NFS_STALE(inode))
600 		return 0;
601 
602 	nfsi = NFS_I(inode);
603 	if (entry->fattr->fileid != nfsi->fileid)
604 		return 0;
605 	if (entry->fh->size && nfs_compare_fh(entry->fh, &nfsi->fh) != 0)
606 		return 0;
607 	return 1;
608 }
609 
610 static
611 bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx)
612 {
613 	if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS))
614 		return false;
615 	if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags))
616 		return true;
617 	if (ctx->pos == 0)
618 		return true;
619 	return false;
620 }
621 
622 /*
623  * This function is called by the lookup and getattr code to request the
624  * use of readdirplus to accelerate any future lookups in the same
625  * directory.
626  */
627 void nfs_advise_use_readdirplus(struct inode *dir)
628 {
629 	struct nfs_inode *nfsi = NFS_I(dir);
630 
631 	if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
632 	    !list_empty(&nfsi->open_files))
633 		set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
634 }
635 
636 /*
637  * This function is mainly for use by nfs_getattr().
638  *
639  * If this is an 'ls -l', we want to force use of readdirplus.
640  * Do this by checking if there is an active file descriptor
641  * and calling nfs_advise_use_readdirplus, then forcing a
642  * cache flush.
643  */
644 void nfs_force_use_readdirplus(struct inode *dir)
645 {
646 	struct nfs_inode *nfsi = NFS_I(dir);
647 
648 	if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
649 	    !list_empty(&nfsi->open_files)) {
650 		set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
651 		set_bit(NFS_INO_FORCE_READDIR, &nfsi->flags);
652 	}
653 }
654 
655 static
656 void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry,
657 		unsigned long dir_verifier)
658 {
659 	struct qstr filename = QSTR_INIT(entry->name, entry->len);
660 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
661 	struct dentry *dentry;
662 	struct dentry *alias;
663 	struct inode *inode;
664 	int status;
665 
666 	if (!(entry->fattr->valid & NFS_ATTR_FATTR_FILEID))
667 		return;
668 	if (!(entry->fattr->valid & NFS_ATTR_FATTR_FSID))
669 		return;
670 	if (filename.len == 0)
671 		return;
672 	/* Validate that the name doesn't contain any illegal '\0' */
673 	if (strnlen(filename.name, filename.len) != filename.len)
674 		return;
675 	/* ...or '/' */
676 	if (strnchr(filename.name, filename.len, '/'))
677 		return;
678 	if (filename.name[0] == '.') {
679 		if (filename.len == 1)
680 			return;
681 		if (filename.len == 2 && filename.name[1] == '.')
682 			return;
683 	}
684 	filename.hash = full_name_hash(parent, filename.name, filename.len);
685 
686 	dentry = d_lookup(parent, &filename);
687 again:
688 	if (!dentry) {
689 		dentry = d_alloc_parallel(parent, &filename, &wq);
690 		if (IS_ERR(dentry))
691 			return;
692 	}
693 	if (!d_in_lookup(dentry)) {
694 		/* Is there a mountpoint here? If so, just exit */
695 		if (!nfs_fsid_equal(&NFS_SB(dentry->d_sb)->fsid,
696 					&entry->fattr->fsid))
697 			goto out;
698 		if (nfs_same_file(dentry, entry)) {
699 			if (!entry->fh->size)
700 				goto out;
701 			nfs_set_verifier(dentry, dir_verifier);
702 			status = nfs_refresh_inode(d_inode(dentry), entry->fattr);
703 			if (!status)
704 				nfs_setsecurity(d_inode(dentry), entry->fattr);
705 			goto out;
706 		} else {
707 			d_invalidate(dentry);
708 			dput(dentry);
709 			dentry = NULL;
710 			goto again;
711 		}
712 	}
713 	if (!entry->fh->size) {
714 		d_lookup_done(dentry);
715 		goto out;
716 	}
717 
718 	inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
719 	alias = d_splice_alias(inode, dentry);
720 	d_lookup_done(dentry);
721 	if (alias) {
722 		if (IS_ERR(alias))
723 			goto out;
724 		dput(dentry);
725 		dentry = alias;
726 	}
727 	nfs_set_verifier(dentry, dir_verifier);
728 out:
729 	dput(dentry);
730 }
731 
732 /* Perform conversion from xdr to cache array */
733 static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
734 				   struct nfs_entry *entry,
735 				   struct page **xdr_pages,
736 				   unsigned int buflen,
737 				   struct page **arrays,
738 				   size_t narrays)
739 {
740 	struct address_space *mapping = desc->file->f_mapping;
741 	struct xdr_stream stream;
742 	struct xdr_buf buf;
743 	struct page *scratch, *new, *page = *arrays;
744 	int status;
745 
746 	scratch = alloc_page(GFP_KERNEL);
747 	if (scratch == NULL)
748 		return -ENOMEM;
749 
750 	xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen);
751 	xdr_set_scratch_page(&stream, scratch);
752 
753 	do {
754 		if (entry->fattr->label)
755 			entry->fattr->label->len = NFS4_MAXLABELLEN;
756 
757 		status = xdr_decode(desc, entry, &stream);
758 		if (status != 0)
759 			break;
760 
761 		if (desc->plus)
762 			nfs_prime_dcache(file_dentry(desc->file), entry,
763 					desc->dir_verifier);
764 
765 		status = nfs_readdir_add_to_array(entry, page);
766 		if (status != -ENOSPC)
767 			continue;
768 
769 		if (page->mapping != mapping) {
770 			if (!--narrays)
771 				break;
772 			new = nfs_readdir_page_array_alloc(entry->prev_cookie,
773 							   GFP_KERNEL);
774 			if (!new)
775 				break;
776 			arrays++;
777 			*arrays = page = new;
778 		} else {
779 			new = nfs_readdir_page_get_next(mapping,
780 							page->index + 1,
781 							entry->prev_cookie);
782 			if (!new)
783 				break;
784 			if (page != *arrays)
785 				nfs_readdir_page_unlock_and_put(page);
786 			page = new;
787 		}
788 		status = nfs_readdir_add_to_array(entry, page);
789 	} while (!status && !entry->eof);
790 
791 	switch (status) {
792 	case -EBADCOOKIE:
793 		if (entry->eof) {
794 			nfs_readdir_page_set_eof(page);
795 			status = 0;
796 		}
797 		break;
798 	case -ENOSPC:
799 	case -EAGAIN:
800 		status = 0;
801 		break;
802 	}
803 
804 	if (page != *arrays)
805 		nfs_readdir_page_unlock_and_put(page);
806 
807 	put_page(scratch);
808 	return status;
809 }
810 
811 static void nfs_readdir_free_pages(struct page **pages, size_t npages)
812 {
813 	while (npages--)
814 		put_page(pages[npages]);
815 	kfree(pages);
816 }
817 
818 /*
819  * nfs_readdir_alloc_pages() will allocate pages that must be freed with a call
820  * to nfs_readdir_free_pages()
821  */
822 static struct page **nfs_readdir_alloc_pages(size_t npages)
823 {
824 	struct page **pages;
825 	size_t i;
826 
827 	pages = kmalloc_array(npages, sizeof(*pages), GFP_KERNEL);
828 	if (!pages)
829 		return NULL;
830 	for (i = 0; i < npages; i++) {
831 		struct page *page = alloc_page(GFP_KERNEL);
832 		if (page == NULL)
833 			goto out_freepages;
834 		pages[i] = page;
835 	}
836 	return pages;
837 
838 out_freepages:
839 	nfs_readdir_free_pages(pages, i);
840 	return NULL;
841 }
842 
843 static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
844 				    __be32 *verf_arg, __be32 *verf_res,
845 				    struct page **arrays, size_t narrays)
846 {
847 	struct page **pages;
848 	struct page *page = *arrays;
849 	struct nfs_entry *entry;
850 	size_t array_size;
851 	struct inode *inode = file_inode(desc->file);
852 	size_t dtsize = NFS_SERVER(inode)->dtsize;
853 	int status = -ENOMEM;
854 
855 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
856 	if (!entry)
857 		return -ENOMEM;
858 	entry->cookie = nfs_readdir_page_last_cookie(page);
859 	entry->fh = nfs_alloc_fhandle();
860 	entry->fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode));
861 	entry->server = NFS_SERVER(inode);
862 	if (entry->fh == NULL || entry->fattr == NULL)
863 		goto out;
864 
865 	array_size = (dtsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
866 	pages = nfs_readdir_alloc_pages(array_size);
867 	if (!pages)
868 		goto out;
869 
870 	do {
871 		unsigned int pglen;
872 		status = nfs_readdir_xdr_filler(desc, verf_arg, entry->cookie,
873 						pages, dtsize,
874 						verf_res);
875 		if (status < 0)
876 			break;
877 
878 		pglen = status;
879 		if (pglen == 0) {
880 			nfs_readdir_page_set_eof(page);
881 			break;
882 		}
883 
884 		verf_arg = verf_res;
885 
886 		status = nfs_readdir_page_filler(desc, entry, pages, pglen,
887 						 arrays, narrays);
888 	} while (!status && nfs_readdir_page_needs_filling(page) &&
889 		page_mapping(page));
890 
891 	nfs_readdir_free_pages(pages, array_size);
892 out:
893 	nfs_free_fattr(entry->fattr);
894 	nfs_free_fhandle(entry->fh);
895 	kfree(entry);
896 	return status;
897 }
898 
899 static void nfs_readdir_page_put(struct nfs_readdir_descriptor *desc)
900 {
901 	put_page(desc->page);
902 	desc->page = NULL;
903 }
904 
905 static void
906 nfs_readdir_page_unlock_and_put_cached(struct nfs_readdir_descriptor *desc)
907 {
908 	unlock_page(desc->page);
909 	nfs_readdir_page_put(desc);
910 }
911 
912 static struct page *
913 nfs_readdir_page_get_cached(struct nfs_readdir_descriptor *desc)
914 {
915 	return nfs_readdir_page_get_locked(desc->file->f_mapping,
916 					   desc->page_index,
917 					   desc->last_cookie);
918 }
919 
920 /*
921  * Returns 0 if desc->dir_cookie was found on page desc->page_index
922  * and locks the page to prevent removal from the page cache.
923  */
924 static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
925 {
926 	struct inode *inode = file_inode(desc->file);
927 	struct nfs_inode *nfsi = NFS_I(inode);
928 	__be32 verf[NFS_DIR_VERIFIER_SIZE];
929 	int res;
930 
931 	desc->page = nfs_readdir_page_get_cached(desc);
932 	if (!desc->page)
933 		return -ENOMEM;
934 	if (nfs_readdir_page_needs_filling(desc->page)) {
935 		res = nfs_readdir_xdr_to_array(desc, nfsi->cookieverf, verf,
936 					       &desc->page, 1);
937 		if (res < 0) {
938 			nfs_readdir_page_unlock_and_put_cached(desc);
939 			if (res == -EBADCOOKIE || res == -ENOTSYNC) {
940 				invalidate_inode_pages2(desc->file->f_mapping);
941 				desc->page_index = 0;
942 				return -EAGAIN;
943 			}
944 			return res;
945 		}
946 		/*
947 		 * Set the cookie verifier if the page cache was empty
948 		 */
949 		if (desc->page_index == 0)
950 			memcpy(nfsi->cookieverf, verf,
951 			       sizeof(nfsi->cookieverf));
952 	}
953 	res = nfs_readdir_search_array(desc);
954 	if (res == 0)
955 		return 0;
956 	nfs_readdir_page_unlock_and_put_cached(desc);
957 	return res;
958 }
959 
960 static bool nfs_readdir_dont_search_cache(struct nfs_readdir_descriptor *desc)
961 {
962 	struct address_space *mapping = desc->file->f_mapping;
963 	struct inode *dir = file_inode(desc->file);
964 	unsigned int dtsize = NFS_SERVER(dir)->dtsize;
965 	loff_t size = i_size_read(dir);
966 
967 	/*
968 	 * Default to uncached readdir if the page cache is empty, and
969 	 * we're looking for a non-zero cookie in a large directory.
970 	 */
971 	return desc->dir_cookie != 0 && mapping->nrpages == 0 && size > dtsize;
972 }
973 
974 /* Search for desc->dir_cookie from the beginning of the page cache */
975 static int readdir_search_pagecache(struct nfs_readdir_descriptor *desc)
976 {
977 	int res;
978 
979 	if (nfs_readdir_dont_search_cache(desc))
980 		return -EBADCOOKIE;
981 
982 	do {
983 		if (desc->page_index == 0) {
984 			desc->current_index = 0;
985 			desc->prev_index = 0;
986 			desc->last_cookie = 0;
987 		}
988 		res = find_and_lock_cache_page(desc);
989 	} while (res == -EAGAIN);
990 	return res;
991 }
992 
993 /*
994  * Once we've found the start of the dirent within a page: fill 'er up...
995  */
996 static void nfs_do_filldir(struct nfs_readdir_descriptor *desc,
997 			   const __be32 *verf)
998 {
999 	struct file	*file = desc->file;
1000 	struct nfs_cache_array *array;
1001 	unsigned int i = 0;
1002 
1003 	array = kmap(desc->page);
1004 	for (i = desc->cache_entry_index; i < array->size; i++) {
1005 		struct nfs_cache_array_entry *ent;
1006 
1007 		ent = &array->array[i];
1008 		if (!dir_emit(desc->ctx, ent->name, ent->name_len,
1009 		    nfs_compat_user_ino64(ent->ino), ent->d_type)) {
1010 			desc->eob = true;
1011 			break;
1012 		}
1013 		memcpy(desc->verf, verf, sizeof(desc->verf));
1014 		if (i < (array->size-1))
1015 			desc->dir_cookie = array->array[i+1].cookie;
1016 		else
1017 			desc->dir_cookie = array->last_cookie;
1018 		if (nfs_readdir_use_cookie(file))
1019 			desc->ctx->pos = desc->dir_cookie;
1020 		else
1021 			desc->ctx->pos++;
1022 		if (desc->duped != 0)
1023 			desc->duped = 1;
1024 	}
1025 	if (array->page_is_eof)
1026 		desc->eof = !desc->eob;
1027 
1028 	kunmap(desc->page);
1029 	dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %llu\n",
1030 			(unsigned long long)desc->dir_cookie);
1031 }
1032 
1033 /*
1034  * If we cannot find a cookie in our cache, we suspect that this is
1035  * because it points to a deleted file, so we ask the server to return
1036  * whatever it thinks is the next entry. We then feed this to filldir.
1037  * If all goes well, we should then be able to find our way round the
1038  * cache on the next call to readdir_search_pagecache();
1039  *
1040  * NOTE: we cannot add the anonymous page to the pagecache because
1041  *	 the data it contains might not be page aligned. Besides,
1042  *	 we should already have a complete representation of the
1043  *	 directory in the page cache by the time we get here.
1044  */
1045 static int uncached_readdir(struct nfs_readdir_descriptor *desc)
1046 {
1047 	struct page	**arrays;
1048 	size_t		i, sz = 512;
1049 	__be32		verf[NFS_DIR_VERIFIER_SIZE];
1050 	int		status = -ENOMEM;
1051 
1052 	dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %llu\n",
1053 			(unsigned long long)desc->dir_cookie);
1054 
1055 	arrays = kcalloc(sz, sizeof(*arrays), GFP_KERNEL);
1056 	if (!arrays)
1057 		goto out;
1058 	arrays[0] = nfs_readdir_page_array_alloc(desc->dir_cookie, GFP_KERNEL);
1059 	if (!arrays[0])
1060 		goto out;
1061 
1062 	desc->page_index = 0;
1063 	desc->cache_entry_index = 0;
1064 	desc->last_cookie = desc->dir_cookie;
1065 	desc->duped = 0;
1066 
1067 	status = nfs_readdir_xdr_to_array(desc, desc->verf, verf, arrays, sz);
1068 
1069 	for (i = 0; !desc->eob && i < sz && arrays[i]; i++) {
1070 		desc->page = arrays[i];
1071 		nfs_do_filldir(desc, verf);
1072 	}
1073 	desc->page = NULL;
1074 
1075 
1076 	for (i = 0; i < sz && arrays[i]; i++)
1077 		nfs_readdir_page_array_free(arrays[i]);
1078 out:
1079 	kfree(arrays);
1080 	dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status);
1081 	return status;
1082 }
1083 
1084 /* The file offset position represents the dirent entry number.  A
1085    last cookie cache takes care of the common case of reading the
1086    whole directory.
1087  */
1088 static int nfs_readdir(struct file *file, struct dir_context *ctx)
1089 {
1090 	struct dentry	*dentry = file_dentry(file);
1091 	struct inode	*inode = d_inode(dentry);
1092 	struct nfs_inode *nfsi = NFS_I(inode);
1093 	struct nfs_open_dir_context *dir_ctx = file->private_data;
1094 	struct nfs_readdir_descriptor *desc;
1095 	pgoff_t page_index;
1096 	int res;
1097 
1098 	dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n",
1099 			file, (long long)ctx->pos);
1100 	nfs_inc_stats(inode, NFSIOS_VFSGETDENTS);
1101 
1102 	/*
1103 	 * ctx->pos points to the dirent entry number.
1104 	 * *desc->dir_cookie has the cookie for the next entry. We have
1105 	 * to either find the entry with the appropriate number or
1106 	 * revalidate the cookie.
1107 	 */
1108 	nfs_revalidate_mapping(inode, file->f_mapping);
1109 
1110 	res = -ENOMEM;
1111 	desc = kzalloc(sizeof(*desc), GFP_KERNEL);
1112 	if (!desc)
1113 		goto out;
1114 	desc->file = file;
1115 	desc->ctx = ctx;
1116 	desc->plus = nfs_use_readdirplus(inode, ctx);
1117 
1118 	spin_lock(&file->f_lock);
1119 	desc->dir_cookie = dir_ctx->dir_cookie;
1120 	desc->dup_cookie = dir_ctx->dup_cookie;
1121 	desc->duped = dir_ctx->duped;
1122 	page_index = dir_ctx->page_index;
1123 	desc->attr_gencount = dir_ctx->attr_gencount;
1124 	desc->eof = dir_ctx->eof;
1125 	memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf));
1126 	spin_unlock(&file->f_lock);
1127 
1128 	if (desc->eof) {
1129 		res = 0;
1130 		goto out_free;
1131 	}
1132 
1133 	if (test_and_clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags) &&
1134 	    list_is_singular(&nfsi->open_files))
1135 		invalidate_mapping_pages(inode->i_mapping, page_index + 1, -1);
1136 
1137 	do {
1138 		res = readdir_search_pagecache(desc);
1139 
1140 		if (res == -EBADCOOKIE) {
1141 			res = 0;
1142 			/* This means either end of directory */
1143 			if (desc->dir_cookie && !desc->eof) {
1144 				/* Or that the server has 'lost' a cookie */
1145 				res = uncached_readdir(desc);
1146 				if (res == 0)
1147 					continue;
1148 				if (res == -EBADCOOKIE || res == -ENOTSYNC)
1149 					res = 0;
1150 			}
1151 			break;
1152 		}
1153 		if (res == -ETOOSMALL && desc->plus) {
1154 			clear_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
1155 			nfs_zap_caches(inode);
1156 			desc->page_index = 0;
1157 			desc->plus = false;
1158 			desc->eof = false;
1159 			continue;
1160 		}
1161 		if (res < 0)
1162 			break;
1163 
1164 		nfs_do_filldir(desc, nfsi->cookieverf);
1165 		nfs_readdir_page_unlock_and_put_cached(desc);
1166 	} while (!desc->eob && !desc->eof);
1167 
1168 	spin_lock(&file->f_lock);
1169 	dir_ctx->dir_cookie = desc->dir_cookie;
1170 	dir_ctx->dup_cookie = desc->dup_cookie;
1171 	dir_ctx->duped = desc->duped;
1172 	dir_ctx->attr_gencount = desc->attr_gencount;
1173 	dir_ctx->page_index = desc->page_index;
1174 	dir_ctx->eof = desc->eof;
1175 	memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf));
1176 	spin_unlock(&file->f_lock);
1177 out_free:
1178 	kfree(desc);
1179 
1180 out:
1181 	dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res);
1182 	return res;
1183 }
1184 
1185 static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
1186 {
1187 	struct nfs_open_dir_context *dir_ctx = filp->private_data;
1188 
1189 	dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n",
1190 			filp, offset, whence);
1191 
1192 	switch (whence) {
1193 	default:
1194 		return -EINVAL;
1195 	case SEEK_SET:
1196 		if (offset < 0)
1197 			return -EINVAL;
1198 		spin_lock(&filp->f_lock);
1199 		break;
1200 	case SEEK_CUR:
1201 		if (offset == 0)
1202 			return filp->f_pos;
1203 		spin_lock(&filp->f_lock);
1204 		offset += filp->f_pos;
1205 		if (offset < 0) {
1206 			spin_unlock(&filp->f_lock);
1207 			return -EINVAL;
1208 		}
1209 	}
1210 	if (offset != filp->f_pos) {
1211 		filp->f_pos = offset;
1212 		if (nfs_readdir_use_cookie(filp))
1213 			dir_ctx->dir_cookie = offset;
1214 		else
1215 			dir_ctx->dir_cookie = 0;
1216 		if (offset == 0)
1217 			memset(dir_ctx->verf, 0, sizeof(dir_ctx->verf));
1218 		dir_ctx->duped = 0;
1219 		dir_ctx->eof = false;
1220 	}
1221 	spin_unlock(&filp->f_lock);
1222 	return offset;
1223 }
1224 
1225 /*
1226  * All directory operations under NFS are synchronous, so fsync()
1227  * is a dummy operation.
1228  */
1229 static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end,
1230 			 int datasync)
1231 {
1232 	dfprintk(FILE, "NFS: fsync dir(%pD2) datasync %d\n", filp, datasync);
1233 
1234 	nfs_inc_stats(file_inode(filp), NFSIOS_VFSFSYNC);
1235 	return 0;
1236 }
1237 
1238 /**
1239  * nfs_force_lookup_revalidate - Mark the directory as having changed
1240  * @dir: pointer to directory inode
1241  *
1242  * This forces the revalidation code in nfs_lookup_revalidate() to do a
1243  * full lookup on all child dentries of 'dir' whenever a change occurs
1244  * on the server that might have invalidated our dcache.
1245  *
1246  * Note that we reserve bit '0' as a tag to let us know when a dentry
1247  * was revalidated while holding a delegation on its inode.
1248  *
1249  * The caller should be holding dir->i_lock
1250  */
1251 void nfs_force_lookup_revalidate(struct inode *dir)
1252 {
1253 	NFS_I(dir)->cache_change_attribute += 2;
1254 }
1255 EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate);
1256 
1257 /**
1258  * nfs_verify_change_attribute - Detects NFS remote directory changes
1259  * @dir: pointer to parent directory inode
1260  * @verf: previously saved change attribute
1261  *
1262  * Return "false" if the verifiers doesn't match the change attribute.
1263  * This would usually indicate that the directory contents have changed on
1264  * the server, and that any dentries need revalidating.
1265  */
1266 static bool nfs_verify_change_attribute(struct inode *dir, unsigned long verf)
1267 {
1268 	return (verf & ~1UL) == nfs_save_change_attribute(dir);
1269 }
1270 
1271 static void nfs_set_verifier_delegated(unsigned long *verf)
1272 {
1273 	*verf |= 1UL;
1274 }
1275 
1276 #if IS_ENABLED(CONFIG_NFS_V4)
1277 static void nfs_unset_verifier_delegated(unsigned long *verf)
1278 {
1279 	*verf &= ~1UL;
1280 }
1281 #endif /* IS_ENABLED(CONFIG_NFS_V4) */
1282 
1283 static bool nfs_test_verifier_delegated(unsigned long verf)
1284 {
1285 	return verf & 1;
1286 }
1287 
1288 static bool nfs_verifier_is_delegated(struct dentry *dentry)
1289 {
1290 	return nfs_test_verifier_delegated(dentry->d_time);
1291 }
1292 
1293 static void nfs_set_verifier_locked(struct dentry *dentry, unsigned long verf)
1294 {
1295 	struct inode *inode = d_inode(dentry);
1296 	struct inode *dir = d_inode(dentry->d_parent);
1297 
1298 	if (!nfs_verify_change_attribute(dir, verf))
1299 		return;
1300 	if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
1301 		nfs_set_verifier_delegated(&verf);
1302 	dentry->d_time = verf;
1303 }
1304 
1305 /**
1306  * nfs_set_verifier - save a parent directory verifier in the dentry
1307  * @dentry: pointer to dentry
1308  * @verf: verifier to save
1309  *
1310  * Saves the parent directory verifier in @dentry. If the inode has
1311  * a delegation, we also tag the dentry as having been revalidated
1312  * while holding a delegation so that we know we don't have to
1313  * look it up again after a directory change.
1314  */
1315 void nfs_set_verifier(struct dentry *dentry, unsigned long verf)
1316 {
1317 
1318 	spin_lock(&dentry->d_lock);
1319 	nfs_set_verifier_locked(dentry, verf);
1320 	spin_unlock(&dentry->d_lock);
1321 }
1322 EXPORT_SYMBOL_GPL(nfs_set_verifier);
1323 
1324 #if IS_ENABLED(CONFIG_NFS_V4)
1325 /**
1326  * nfs_clear_verifier_delegated - clear the dir verifier delegation tag
1327  * @inode: pointer to inode
1328  *
1329  * Iterates through the dentries in the inode alias list and clears
1330  * the tag used to indicate that the dentry has been revalidated
1331  * while holding a delegation.
1332  * This function is intended for use when the delegation is being
1333  * returned or revoked.
1334  */
1335 void nfs_clear_verifier_delegated(struct inode *inode)
1336 {
1337 	struct dentry *alias;
1338 
1339 	if (!inode)
1340 		return;
1341 	spin_lock(&inode->i_lock);
1342 	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
1343 		spin_lock(&alias->d_lock);
1344 		nfs_unset_verifier_delegated(&alias->d_time);
1345 		spin_unlock(&alias->d_lock);
1346 	}
1347 	spin_unlock(&inode->i_lock);
1348 }
1349 EXPORT_SYMBOL_GPL(nfs_clear_verifier_delegated);
1350 #endif /* IS_ENABLED(CONFIG_NFS_V4) */
1351 
1352 static int nfs_dentry_verify_change(struct inode *dir, struct dentry *dentry)
1353 {
1354 	if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE) &&
1355 	    d_really_is_negative(dentry))
1356 		return dentry->d_time == inode_peek_iversion_raw(dir);
1357 	return nfs_verify_change_attribute(dir, dentry->d_time);
1358 }
1359 
1360 /*
1361  * A check for whether or not the parent directory has changed.
1362  * In the case it has, we assume that the dentries are untrustworthy
1363  * and may need to be looked up again.
1364  * If rcu_walk prevents us from performing a full check, return 0.
1365  */
1366 static int nfs_check_verifier(struct inode *dir, struct dentry *dentry,
1367 			      int rcu_walk)
1368 {
1369 	if (IS_ROOT(dentry))
1370 		return 1;
1371 	if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE)
1372 		return 0;
1373 	if (!nfs_dentry_verify_change(dir, dentry))
1374 		return 0;
1375 	/* Revalidate nfsi->cache_change_attribute before we declare a match */
1376 	if (nfs_mapping_need_revalidate_inode(dir)) {
1377 		if (rcu_walk)
1378 			return 0;
1379 		if (__nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0)
1380 			return 0;
1381 	}
1382 	if (!nfs_dentry_verify_change(dir, dentry))
1383 		return 0;
1384 	return 1;
1385 }
1386 
1387 /*
1388  * Use intent information to check whether or not we're going to do
1389  * an O_EXCL create using this path component.
1390  */
1391 static int nfs_is_exclusive_create(struct inode *dir, unsigned int flags)
1392 {
1393 	if (NFS_PROTO(dir)->version == 2)
1394 		return 0;
1395 	return flags & LOOKUP_EXCL;
1396 }
1397 
1398 /*
1399  * Inode and filehandle revalidation for lookups.
1400  *
1401  * We force revalidation in the cases where the VFS sets LOOKUP_REVAL,
1402  * or if the intent information indicates that we're about to open this
1403  * particular file and the "nocto" mount flag is not set.
1404  *
1405  */
1406 static
1407 int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags)
1408 {
1409 	struct nfs_server *server = NFS_SERVER(inode);
1410 	int ret;
1411 
1412 	if (IS_AUTOMOUNT(inode))
1413 		return 0;
1414 
1415 	if (flags & LOOKUP_OPEN) {
1416 		switch (inode->i_mode & S_IFMT) {
1417 		case S_IFREG:
1418 			/* A NFSv4 OPEN will revalidate later */
1419 			if (server->caps & NFS_CAP_ATOMIC_OPEN)
1420 				goto out;
1421 			fallthrough;
1422 		case S_IFDIR:
1423 			if (server->flags & NFS_MOUNT_NOCTO)
1424 				break;
1425 			/* NFS close-to-open cache consistency validation */
1426 			goto out_force;
1427 		}
1428 	}
1429 
1430 	/* VFS wants an on-the-wire revalidation */
1431 	if (flags & LOOKUP_REVAL)
1432 		goto out_force;
1433 out:
1434 	if (inode->i_nlink > 0 ||
1435 	    (inode->i_nlink == 0 &&
1436 	     test_bit(NFS_INO_PRESERVE_UNLINKED, &NFS_I(inode)->flags)))
1437 		return 0;
1438 	else
1439 		return -ESTALE;
1440 out_force:
1441 	if (flags & LOOKUP_RCU)
1442 		return -ECHILD;
1443 	ret = __nfs_revalidate_inode(server, inode);
1444 	if (ret != 0)
1445 		return ret;
1446 	goto out;
1447 }
1448 
1449 static void nfs_mark_dir_for_revalidate(struct inode *inode)
1450 {
1451 	spin_lock(&inode->i_lock);
1452 	nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE);
1453 	spin_unlock(&inode->i_lock);
1454 }
1455 
1456 /*
1457  * We judge how long we want to trust negative
1458  * dentries by looking at the parent inode mtime.
1459  *
1460  * If parent mtime has changed, we revalidate, else we wait for a
1461  * period corresponding to the parent's attribute cache timeout value.
1462  *
1463  * If LOOKUP_RCU prevents us from performing a full check, return 1
1464  * suggesting a reval is needed.
1465  *
1466  * Note that when creating a new file, or looking up a rename target,
1467  * then it shouldn't be necessary to revalidate a negative dentry.
1468  */
1469 static inline
1470 int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
1471 		       unsigned int flags)
1472 {
1473 	if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
1474 		return 0;
1475 	if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG)
1476 		return 1;
1477 	/* Case insensitive server? Revalidate negative dentries */
1478 	if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
1479 		return 1;
1480 	return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU);
1481 }
1482 
1483 static int
1484 nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry,
1485 			   struct inode *inode, int error)
1486 {
1487 	switch (error) {
1488 	case 1:
1489 		break;
1490 	case 0:
1491 		/*
1492 		 * We can't d_drop the root of a disconnected tree:
1493 		 * its d_hash is on the s_anon list and d_drop() would hide
1494 		 * it from shrink_dcache_for_unmount(), leading to busy
1495 		 * inodes on unmount and further oopses.
1496 		 */
1497 		if (inode && IS_ROOT(dentry))
1498 			error = 1;
1499 		break;
1500 	}
1501 	trace_nfs_lookup_revalidate_exit(dir, dentry, 0, error);
1502 	return error;
1503 }
1504 
1505 static int
1506 nfs_lookup_revalidate_negative(struct inode *dir, struct dentry *dentry,
1507 			       unsigned int flags)
1508 {
1509 	int ret = 1;
1510 	if (nfs_neg_need_reval(dir, dentry, flags)) {
1511 		if (flags & LOOKUP_RCU)
1512 			return -ECHILD;
1513 		ret = 0;
1514 	}
1515 	return nfs_lookup_revalidate_done(dir, dentry, NULL, ret);
1516 }
1517 
1518 static int
1519 nfs_lookup_revalidate_delegated(struct inode *dir, struct dentry *dentry,
1520 				struct inode *inode)
1521 {
1522 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1523 	return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
1524 }
1525 
1526 static int
1527 nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry,
1528 			     struct inode *inode)
1529 {
1530 	struct nfs_fh *fhandle;
1531 	struct nfs_fattr *fattr;
1532 	unsigned long dir_verifier;
1533 	int ret;
1534 
1535 	ret = -ENOMEM;
1536 	fhandle = nfs_alloc_fhandle();
1537 	fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode));
1538 	if (fhandle == NULL || fattr == NULL)
1539 		goto out;
1540 
1541 	dir_verifier = nfs_save_change_attribute(dir);
1542 	ret = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
1543 	if (ret < 0) {
1544 		switch (ret) {
1545 		case -ESTALE:
1546 		case -ENOENT:
1547 			ret = 0;
1548 			break;
1549 		case -ETIMEDOUT:
1550 			if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL)
1551 				ret = 1;
1552 		}
1553 		goto out;
1554 	}
1555 	ret = 0;
1556 	if (nfs_compare_fh(NFS_FH(inode), fhandle))
1557 		goto out;
1558 	if (nfs_refresh_inode(inode, fattr) < 0)
1559 		goto out;
1560 
1561 	nfs_setsecurity(inode, fattr);
1562 	nfs_set_verifier(dentry, dir_verifier);
1563 
1564 	/* set a readdirplus hint that we had a cache miss */
1565 	nfs_force_use_readdirplus(dir);
1566 	ret = 1;
1567 out:
1568 	nfs_free_fattr(fattr);
1569 	nfs_free_fhandle(fhandle);
1570 
1571 	/*
1572 	 * If the lookup failed despite the dentry change attribute being
1573 	 * a match, then we should revalidate the directory cache.
1574 	 */
1575 	if (!ret && nfs_dentry_verify_change(dir, dentry))
1576 		nfs_mark_dir_for_revalidate(dir);
1577 	return nfs_lookup_revalidate_done(dir, dentry, inode, ret);
1578 }
1579 
1580 /*
1581  * This is called every time the dcache has a lookup hit,
1582  * and we should check whether we can really trust that
1583  * lookup.
1584  *
1585  * NOTE! The hit can be a negative hit too, don't assume
1586  * we have an inode!
1587  *
1588  * If the parent directory is seen to have changed, we throw out the
1589  * cached dentry and do a new lookup.
1590  */
1591 static int
1592 nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
1593 			 unsigned int flags)
1594 {
1595 	struct inode *inode;
1596 	int error;
1597 
1598 	nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
1599 	inode = d_inode(dentry);
1600 
1601 	if (!inode)
1602 		return nfs_lookup_revalidate_negative(dir, dentry, flags);
1603 
1604 	if (is_bad_inode(inode)) {
1605 		dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
1606 				__func__, dentry);
1607 		goto out_bad;
1608 	}
1609 
1610 	if (nfs_verifier_is_delegated(dentry))
1611 		return nfs_lookup_revalidate_delegated(dir, dentry, inode);
1612 
1613 	/* Force a full look up iff the parent directory has changed */
1614 	if (!(flags & (LOOKUP_EXCL | LOOKUP_REVAL)) &&
1615 	    nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
1616 		error = nfs_lookup_verify_inode(inode, flags);
1617 		if (error) {
1618 			if (error == -ESTALE)
1619 				nfs_mark_dir_for_revalidate(dir);
1620 			goto out_bad;
1621 		}
1622 		nfs_advise_use_readdirplus(dir);
1623 		goto out_valid;
1624 	}
1625 
1626 	if (flags & LOOKUP_RCU)
1627 		return -ECHILD;
1628 
1629 	if (NFS_STALE(inode))
1630 		goto out_bad;
1631 
1632 	trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
1633 	return nfs_lookup_revalidate_dentry(dir, dentry, inode);
1634 out_valid:
1635 	return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
1636 out_bad:
1637 	if (flags & LOOKUP_RCU)
1638 		return -ECHILD;
1639 	return nfs_lookup_revalidate_done(dir, dentry, inode, 0);
1640 }
1641 
1642 static int
1643 __nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
1644 			int (*reval)(struct inode *, struct dentry *, unsigned int))
1645 {
1646 	struct dentry *parent;
1647 	struct inode *dir;
1648 	int ret;
1649 
1650 	if (flags & LOOKUP_RCU) {
1651 		parent = READ_ONCE(dentry->d_parent);
1652 		dir = d_inode_rcu(parent);
1653 		if (!dir)
1654 			return -ECHILD;
1655 		ret = reval(dir, dentry, flags);
1656 		if (parent != READ_ONCE(dentry->d_parent))
1657 			return -ECHILD;
1658 	} else {
1659 		parent = dget_parent(dentry);
1660 		ret = reval(d_inode(parent), dentry, flags);
1661 		dput(parent);
1662 	}
1663 	return ret;
1664 }
1665 
1666 static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1667 {
1668 	return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate);
1669 }
1670 
1671 /*
1672  * A weaker form of d_revalidate for revalidating just the d_inode(dentry)
1673  * when we don't really care about the dentry name. This is called when a
1674  * pathwalk ends on a dentry that was not found via a normal lookup in the
1675  * parent dir (e.g.: ".", "..", procfs symlinks or mountpoint traversals).
1676  *
1677  * In this situation, we just want to verify that the inode itself is OK
1678  * since the dentry might have changed on the server.
1679  */
1680 static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags)
1681 {
1682 	struct inode *inode = d_inode(dentry);
1683 	int error = 0;
1684 
1685 	/*
1686 	 * I believe we can only get a negative dentry here in the case of a
1687 	 * procfs-style symlink. Just assume it's correct for now, but we may
1688 	 * eventually need to do something more here.
1689 	 */
1690 	if (!inode) {
1691 		dfprintk(LOOKUPCACHE, "%s: %pd2 has negative inode\n",
1692 				__func__, dentry);
1693 		return 1;
1694 	}
1695 
1696 	if (is_bad_inode(inode)) {
1697 		dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
1698 				__func__, dentry);
1699 		return 0;
1700 	}
1701 
1702 	error = nfs_lookup_verify_inode(inode, flags);
1703 	dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n",
1704 			__func__, inode->i_ino, error ? "invalid" : "valid");
1705 	return !error;
1706 }
1707 
1708 /*
1709  * This is called from dput() when d_count is going to 0.
1710  */
1711 static int nfs_dentry_delete(const struct dentry *dentry)
1712 {
1713 	dfprintk(VFS, "NFS: dentry_delete(%pd2, %x)\n",
1714 		dentry, dentry->d_flags);
1715 
1716 	/* Unhash any dentry with a stale inode */
1717 	if (d_really_is_positive(dentry) && NFS_STALE(d_inode(dentry)))
1718 		return 1;
1719 
1720 	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
1721 		/* Unhash it, so that ->d_iput() would be called */
1722 		return 1;
1723 	}
1724 	if (!(dentry->d_sb->s_flags & SB_ACTIVE)) {
1725 		/* Unhash it, so that ancestors of killed async unlink
1726 		 * files will be cleaned up during umount */
1727 		return 1;
1728 	}
1729 	return 0;
1730 
1731 }
1732 
1733 /* Ensure that we revalidate inode->i_nlink */
1734 static void nfs_drop_nlink(struct inode *inode)
1735 {
1736 	spin_lock(&inode->i_lock);
1737 	/* drop the inode if we're reasonably sure this is the last link */
1738 	if (inode->i_nlink > 0)
1739 		drop_nlink(inode);
1740 	NFS_I(inode)->attr_gencount = nfs_inc_attr_generation_counter();
1741 	nfs_set_cache_invalid(
1742 		inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME |
1743 			       NFS_INO_INVALID_NLINK);
1744 	spin_unlock(&inode->i_lock);
1745 }
1746 
1747 /*
1748  * Called when the dentry loses inode.
1749  * We use it to clean up silly-renamed files.
1750  */
1751 static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode)
1752 {
1753 	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
1754 		nfs_complete_unlink(dentry, inode);
1755 		nfs_drop_nlink(inode);
1756 	}
1757 	iput(inode);
1758 }
1759 
1760 static void nfs_d_release(struct dentry *dentry)
1761 {
1762 	/* free cached devname value, if it survived that far */
1763 	if (unlikely(dentry->d_fsdata)) {
1764 		if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
1765 			WARN_ON(1);
1766 		else
1767 			kfree(dentry->d_fsdata);
1768 	}
1769 }
1770 
1771 const struct dentry_operations nfs_dentry_operations = {
1772 	.d_revalidate	= nfs_lookup_revalidate,
1773 	.d_weak_revalidate	= nfs_weak_revalidate,
1774 	.d_delete	= nfs_dentry_delete,
1775 	.d_iput		= nfs_dentry_iput,
1776 	.d_automount	= nfs_d_automount,
1777 	.d_release	= nfs_d_release,
1778 };
1779 EXPORT_SYMBOL_GPL(nfs_dentry_operations);
1780 
1781 struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
1782 {
1783 	struct dentry *res;
1784 	struct inode *inode = NULL;
1785 	struct nfs_fh *fhandle = NULL;
1786 	struct nfs_fattr *fattr = NULL;
1787 	unsigned long dir_verifier;
1788 	int error;
1789 
1790 	dfprintk(VFS, "NFS: lookup(%pd2)\n", dentry);
1791 	nfs_inc_stats(dir, NFSIOS_VFSLOOKUP);
1792 
1793 	if (unlikely(dentry->d_name.len > NFS_SERVER(dir)->namelen))
1794 		return ERR_PTR(-ENAMETOOLONG);
1795 
1796 	/*
1797 	 * If we're doing an exclusive create, optimize away the lookup
1798 	 * but don't hash the dentry.
1799 	 */
1800 	if (nfs_is_exclusive_create(dir, flags) || flags & LOOKUP_RENAME_TARGET)
1801 		return NULL;
1802 
1803 	res = ERR_PTR(-ENOMEM);
1804 	fhandle = nfs_alloc_fhandle();
1805 	fattr = nfs_alloc_fattr_with_label(NFS_SERVER(dir));
1806 	if (fhandle == NULL || fattr == NULL)
1807 		goto out;
1808 
1809 	dir_verifier = nfs_save_change_attribute(dir);
1810 	trace_nfs_lookup_enter(dir, dentry, flags);
1811 	error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
1812 	if (error == -ENOENT) {
1813 		if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
1814 			dir_verifier = inode_peek_iversion_raw(dir);
1815 		goto no_entry;
1816 	}
1817 	if (error < 0) {
1818 		res = ERR_PTR(error);
1819 		goto out;
1820 	}
1821 	inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
1822 	res = ERR_CAST(inode);
1823 	if (IS_ERR(res))
1824 		goto out;
1825 
1826 	/* Notify readdir to use READDIRPLUS */
1827 	nfs_force_use_readdirplus(dir);
1828 
1829 no_entry:
1830 	res = d_splice_alias(inode, dentry);
1831 	if (res != NULL) {
1832 		if (IS_ERR(res))
1833 			goto out;
1834 		dentry = res;
1835 	}
1836 	nfs_set_verifier(dentry, dir_verifier);
1837 out:
1838 	trace_nfs_lookup_exit(dir, dentry, flags, PTR_ERR_OR_ZERO(res));
1839 	nfs_free_fattr(fattr);
1840 	nfs_free_fhandle(fhandle);
1841 	return res;
1842 }
1843 EXPORT_SYMBOL_GPL(nfs_lookup);
1844 
1845 void nfs_d_prune_case_insensitive_aliases(struct inode *inode)
1846 {
1847 	/* Case insensitive server? Revalidate dentries */
1848 	if (inode && nfs_server_capable(inode, NFS_CAP_CASE_INSENSITIVE))
1849 		d_prune_aliases(inode);
1850 }
1851 EXPORT_SYMBOL_GPL(nfs_d_prune_case_insensitive_aliases);
1852 
1853 #if IS_ENABLED(CONFIG_NFS_V4)
1854 static int nfs4_lookup_revalidate(struct dentry *, unsigned int);
1855 
1856 const struct dentry_operations nfs4_dentry_operations = {
1857 	.d_revalidate	= nfs4_lookup_revalidate,
1858 	.d_weak_revalidate	= nfs_weak_revalidate,
1859 	.d_delete	= nfs_dentry_delete,
1860 	.d_iput		= nfs_dentry_iput,
1861 	.d_automount	= nfs_d_automount,
1862 	.d_release	= nfs_d_release,
1863 };
1864 EXPORT_SYMBOL_GPL(nfs4_dentry_operations);
1865 
1866 static fmode_t flags_to_mode(int flags)
1867 {
1868 	fmode_t res = (__force fmode_t)flags & FMODE_EXEC;
1869 	if ((flags & O_ACCMODE) != O_WRONLY)
1870 		res |= FMODE_READ;
1871 	if ((flags & O_ACCMODE) != O_RDONLY)
1872 		res |= FMODE_WRITE;
1873 	return res;
1874 }
1875 
1876 static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags, struct file *filp)
1877 {
1878 	return alloc_nfs_open_context(dentry, flags_to_mode(open_flags), filp);
1879 }
1880 
1881 static int do_open(struct inode *inode, struct file *filp)
1882 {
1883 	nfs_fscache_open_file(inode, filp);
1884 	return 0;
1885 }
1886 
1887 static int nfs_finish_open(struct nfs_open_context *ctx,
1888 			   struct dentry *dentry,
1889 			   struct file *file, unsigned open_flags)
1890 {
1891 	int err;
1892 
1893 	err = finish_open(file, dentry, do_open);
1894 	if (err)
1895 		goto out;
1896 	if (S_ISREG(file->f_path.dentry->d_inode->i_mode))
1897 		nfs_file_set_open_context(file, ctx);
1898 	else
1899 		err = -EOPENSTALE;
1900 out:
1901 	return err;
1902 }
1903 
1904 int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
1905 		    struct file *file, unsigned open_flags,
1906 		    umode_t mode)
1907 {
1908 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
1909 	struct nfs_open_context *ctx;
1910 	struct dentry *res;
1911 	struct iattr attr = { .ia_valid = ATTR_OPEN };
1912 	struct inode *inode;
1913 	unsigned int lookup_flags = 0;
1914 	unsigned long dir_verifier;
1915 	bool switched = false;
1916 	int created = 0;
1917 	int err;
1918 
1919 	/* Expect a negative dentry */
1920 	BUG_ON(d_inode(dentry));
1921 
1922 	dfprintk(VFS, "NFS: atomic_open(%s/%lu), %pd\n",
1923 			dir->i_sb->s_id, dir->i_ino, dentry);
1924 
1925 	err = nfs_check_flags(open_flags);
1926 	if (err)
1927 		return err;
1928 
1929 	/* NFS only supports OPEN on regular files */
1930 	if ((open_flags & O_DIRECTORY)) {
1931 		if (!d_in_lookup(dentry)) {
1932 			/*
1933 			 * Hashed negative dentry with O_DIRECTORY: dentry was
1934 			 * revalidated and is fine, no need to perform lookup
1935 			 * again
1936 			 */
1937 			return -ENOENT;
1938 		}
1939 		lookup_flags = LOOKUP_OPEN|LOOKUP_DIRECTORY;
1940 		goto no_open;
1941 	}
1942 
1943 	if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
1944 		return -ENAMETOOLONG;
1945 
1946 	if (open_flags & O_CREAT) {
1947 		struct nfs_server *server = NFS_SERVER(dir);
1948 
1949 		if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK))
1950 			mode &= ~current_umask();
1951 
1952 		attr.ia_valid |= ATTR_MODE;
1953 		attr.ia_mode = mode;
1954 	}
1955 	if (open_flags & O_TRUNC) {
1956 		attr.ia_valid |= ATTR_SIZE;
1957 		attr.ia_size = 0;
1958 	}
1959 
1960 	if (!(open_flags & O_CREAT) && !d_in_lookup(dentry)) {
1961 		d_drop(dentry);
1962 		switched = true;
1963 		dentry = d_alloc_parallel(dentry->d_parent,
1964 					  &dentry->d_name, &wq);
1965 		if (IS_ERR(dentry))
1966 			return PTR_ERR(dentry);
1967 		if (unlikely(!d_in_lookup(dentry)))
1968 			return finish_no_open(file, dentry);
1969 	}
1970 
1971 	ctx = create_nfs_open_context(dentry, open_flags, file);
1972 	err = PTR_ERR(ctx);
1973 	if (IS_ERR(ctx))
1974 		goto out;
1975 
1976 	trace_nfs_atomic_open_enter(dir, ctx, open_flags);
1977 	inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, &created);
1978 	if (created)
1979 		file->f_mode |= FMODE_CREATED;
1980 	if (IS_ERR(inode)) {
1981 		err = PTR_ERR(inode);
1982 		trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
1983 		put_nfs_open_context(ctx);
1984 		d_drop(dentry);
1985 		switch (err) {
1986 		case -ENOENT:
1987 			d_splice_alias(NULL, dentry);
1988 			if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
1989 				dir_verifier = inode_peek_iversion_raw(dir);
1990 			else
1991 				dir_verifier = nfs_save_change_attribute(dir);
1992 			nfs_set_verifier(dentry, dir_verifier);
1993 			break;
1994 		case -EISDIR:
1995 		case -ENOTDIR:
1996 			goto no_open;
1997 		case -ELOOP:
1998 			if (!(open_flags & O_NOFOLLOW))
1999 				goto no_open;
2000 			break;
2001 			/* case -EINVAL: */
2002 		default:
2003 			break;
2004 		}
2005 		goto out;
2006 	}
2007 
2008 	err = nfs_finish_open(ctx, ctx->dentry, file, open_flags);
2009 	trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
2010 	put_nfs_open_context(ctx);
2011 out:
2012 	if (unlikely(switched)) {
2013 		d_lookup_done(dentry);
2014 		dput(dentry);
2015 	}
2016 	return err;
2017 
2018 no_open:
2019 	res = nfs_lookup(dir, dentry, lookup_flags);
2020 	if (!res) {
2021 		inode = d_inode(dentry);
2022 		if ((lookup_flags & LOOKUP_DIRECTORY) && inode &&
2023 		    !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)))
2024 			res = ERR_PTR(-ENOTDIR);
2025 		else if (inode && S_ISREG(inode->i_mode))
2026 			res = ERR_PTR(-EOPENSTALE);
2027 	} else if (!IS_ERR(res)) {
2028 		inode = d_inode(res);
2029 		if ((lookup_flags & LOOKUP_DIRECTORY) && inode &&
2030 		    !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) {
2031 			dput(res);
2032 			res = ERR_PTR(-ENOTDIR);
2033 		} else if (inode && S_ISREG(inode->i_mode)) {
2034 			dput(res);
2035 			res = ERR_PTR(-EOPENSTALE);
2036 		}
2037 	}
2038 	if (switched) {
2039 		d_lookup_done(dentry);
2040 		if (!res)
2041 			res = dentry;
2042 		else
2043 			dput(dentry);
2044 	}
2045 	if (IS_ERR(res))
2046 		return PTR_ERR(res);
2047 	return finish_no_open(file, res);
2048 }
2049 EXPORT_SYMBOL_GPL(nfs_atomic_open);
2050 
2051 static int
2052 nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
2053 			  unsigned int flags)
2054 {
2055 	struct inode *inode;
2056 
2057 	if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
2058 		goto full_reval;
2059 	if (d_mountpoint(dentry))
2060 		goto full_reval;
2061 
2062 	inode = d_inode(dentry);
2063 
2064 	/* We can't create new files in nfs_open_revalidate(), so we
2065 	 * optimize away revalidation of negative dentries.
2066 	 */
2067 	if (inode == NULL)
2068 		goto full_reval;
2069 
2070 	if (nfs_verifier_is_delegated(dentry))
2071 		return nfs_lookup_revalidate_delegated(dir, dentry, inode);
2072 
2073 	/* NFS only supports OPEN on regular files */
2074 	if (!S_ISREG(inode->i_mode))
2075 		goto full_reval;
2076 
2077 	/* We cannot do exclusive creation on a positive dentry */
2078 	if (flags & (LOOKUP_EXCL | LOOKUP_REVAL))
2079 		goto reval_dentry;
2080 
2081 	/* Check if the directory changed */
2082 	if (!nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU))
2083 		goto reval_dentry;
2084 
2085 	/* Let f_op->open() actually open (and revalidate) the file */
2086 	return 1;
2087 reval_dentry:
2088 	if (flags & LOOKUP_RCU)
2089 		return -ECHILD;
2090 	return nfs_lookup_revalidate_dentry(dir, dentry, inode);
2091 
2092 full_reval:
2093 	return nfs_do_lookup_revalidate(dir, dentry, flags);
2094 }
2095 
2096 static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
2097 {
2098 	return __nfs_lookup_revalidate(dentry, flags,
2099 			nfs4_do_lookup_revalidate);
2100 }
2101 
2102 #endif /* CONFIG_NFSV4 */
2103 
2104 struct dentry *
2105 nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle,
2106 				struct nfs_fattr *fattr)
2107 {
2108 	struct dentry *parent = dget_parent(dentry);
2109 	struct inode *dir = d_inode(parent);
2110 	struct inode *inode;
2111 	struct dentry *d;
2112 	int error;
2113 
2114 	d_drop(dentry);
2115 
2116 	if (fhandle->size == 0) {
2117 		error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
2118 		if (error)
2119 			goto out_error;
2120 	}
2121 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2122 	if (!(fattr->valid & NFS_ATTR_FATTR)) {
2123 		struct nfs_server *server = NFS_SB(dentry->d_sb);
2124 		error = server->nfs_client->rpc_ops->getattr(server, fhandle,
2125 				fattr, NULL);
2126 		if (error < 0)
2127 			goto out_error;
2128 	}
2129 	inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
2130 	d = d_splice_alias(inode, dentry);
2131 out:
2132 	dput(parent);
2133 	return d;
2134 out_error:
2135 	d = ERR_PTR(error);
2136 	goto out;
2137 }
2138 EXPORT_SYMBOL_GPL(nfs_add_or_obtain);
2139 
2140 /*
2141  * Code common to create, mkdir, and mknod.
2142  */
2143 int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
2144 				struct nfs_fattr *fattr)
2145 {
2146 	struct dentry *d;
2147 
2148 	d = nfs_add_or_obtain(dentry, fhandle, fattr);
2149 	if (IS_ERR(d))
2150 		return PTR_ERR(d);
2151 
2152 	/* Callers don't care */
2153 	dput(d);
2154 	return 0;
2155 }
2156 EXPORT_SYMBOL_GPL(nfs_instantiate);
2157 
2158 /*
2159  * Following a failed create operation, we drop the dentry rather
2160  * than retain a negative dentry. This avoids a problem in the event
2161  * that the operation succeeded on the server, but an error in the
2162  * reply path made it appear to have failed.
2163  */
2164 int nfs_create(struct user_namespace *mnt_userns, struct inode *dir,
2165 	       struct dentry *dentry, umode_t mode, bool excl)
2166 {
2167 	struct iattr attr;
2168 	int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT;
2169 	int error;
2170 
2171 	dfprintk(VFS, "NFS: create(%s/%lu), %pd\n",
2172 			dir->i_sb->s_id, dir->i_ino, dentry);
2173 
2174 	attr.ia_mode = mode;
2175 	attr.ia_valid = ATTR_MODE;
2176 
2177 	trace_nfs_create_enter(dir, dentry, open_flags);
2178 	error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
2179 	trace_nfs_create_exit(dir, dentry, open_flags, error);
2180 	if (error != 0)
2181 		goto out_err;
2182 	return 0;
2183 out_err:
2184 	d_drop(dentry);
2185 	return error;
2186 }
2187 EXPORT_SYMBOL_GPL(nfs_create);
2188 
2189 /*
2190  * See comments for nfs_proc_create regarding failed operations.
2191  */
2192 int
2193 nfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
2194 	  struct dentry *dentry, umode_t mode, dev_t rdev)
2195 {
2196 	struct iattr attr;
2197 	int status;
2198 
2199 	dfprintk(VFS, "NFS: mknod(%s/%lu), %pd\n",
2200 			dir->i_sb->s_id, dir->i_ino, dentry);
2201 
2202 	attr.ia_mode = mode;
2203 	attr.ia_valid = ATTR_MODE;
2204 
2205 	trace_nfs_mknod_enter(dir, dentry);
2206 	status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev);
2207 	trace_nfs_mknod_exit(dir, dentry, status);
2208 	if (status != 0)
2209 		goto out_err;
2210 	return 0;
2211 out_err:
2212 	d_drop(dentry);
2213 	return status;
2214 }
2215 EXPORT_SYMBOL_GPL(nfs_mknod);
2216 
2217 /*
2218  * See comments for nfs_proc_create regarding failed operations.
2219  */
2220 int nfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
2221 	      struct dentry *dentry, umode_t mode)
2222 {
2223 	struct iattr attr;
2224 	int error;
2225 
2226 	dfprintk(VFS, "NFS: mkdir(%s/%lu), %pd\n",
2227 			dir->i_sb->s_id, dir->i_ino, dentry);
2228 
2229 	attr.ia_valid = ATTR_MODE;
2230 	attr.ia_mode = mode | S_IFDIR;
2231 
2232 	trace_nfs_mkdir_enter(dir, dentry);
2233 	error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr);
2234 	trace_nfs_mkdir_exit(dir, dentry, error);
2235 	if (error != 0)
2236 		goto out_err;
2237 	return 0;
2238 out_err:
2239 	d_drop(dentry);
2240 	return error;
2241 }
2242 EXPORT_SYMBOL_GPL(nfs_mkdir);
2243 
2244 static void nfs_dentry_handle_enoent(struct dentry *dentry)
2245 {
2246 	if (simple_positive(dentry))
2247 		d_delete(dentry);
2248 }
2249 
2250 static void nfs_dentry_remove_handle_error(struct inode *dir,
2251 					   struct dentry *dentry, int error)
2252 {
2253 	switch (error) {
2254 	case -ENOENT:
2255 		d_delete(dentry);
2256 		nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2257 		break;
2258 	case 0:
2259 		nfs_d_prune_case_insensitive_aliases(d_inode(dentry));
2260 		nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2261 	}
2262 }
2263 
2264 int nfs_rmdir(struct inode *dir, struct dentry *dentry)
2265 {
2266 	int error;
2267 
2268 	dfprintk(VFS, "NFS: rmdir(%s/%lu), %pd\n",
2269 			dir->i_sb->s_id, dir->i_ino, dentry);
2270 
2271 	trace_nfs_rmdir_enter(dir, dentry);
2272 	if (d_really_is_positive(dentry)) {
2273 		down_write(&NFS_I(d_inode(dentry))->rmdir_sem);
2274 		error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
2275 		/* Ensure the VFS deletes this inode */
2276 		switch (error) {
2277 		case 0:
2278 			clear_nlink(d_inode(dentry));
2279 			break;
2280 		case -ENOENT:
2281 			nfs_dentry_handle_enoent(dentry);
2282 		}
2283 		up_write(&NFS_I(d_inode(dentry))->rmdir_sem);
2284 	} else
2285 		error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
2286 	nfs_dentry_remove_handle_error(dir, dentry, error);
2287 	trace_nfs_rmdir_exit(dir, dentry, error);
2288 
2289 	return error;
2290 }
2291 EXPORT_SYMBOL_GPL(nfs_rmdir);
2292 
2293 /*
2294  * Remove a file after making sure there are no pending writes,
2295  * and after checking that the file has only one user.
2296  *
2297  * We invalidate the attribute cache and free the inode prior to the operation
2298  * to avoid possible races if the server reuses the inode.
2299  */
2300 static int nfs_safe_remove(struct dentry *dentry)
2301 {
2302 	struct inode *dir = d_inode(dentry->d_parent);
2303 	struct inode *inode = d_inode(dentry);
2304 	int error = -EBUSY;
2305 
2306 	dfprintk(VFS, "NFS: safe_remove(%pd2)\n", dentry);
2307 
2308 	/* If the dentry was sillyrenamed, we simply call d_delete() */
2309 	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
2310 		error = 0;
2311 		goto out;
2312 	}
2313 
2314 	trace_nfs_remove_enter(dir, dentry);
2315 	if (inode != NULL) {
2316 		error = NFS_PROTO(dir)->remove(dir, dentry);
2317 		if (error == 0)
2318 			nfs_drop_nlink(inode);
2319 	} else
2320 		error = NFS_PROTO(dir)->remove(dir, dentry);
2321 	if (error == -ENOENT)
2322 		nfs_dentry_handle_enoent(dentry);
2323 	trace_nfs_remove_exit(dir, dentry, error);
2324 out:
2325 	return error;
2326 }
2327 
2328 /*  We do silly rename. In case sillyrename() returns -EBUSY, the inode
2329  *  belongs to an active ".nfs..." file and we return -EBUSY.
2330  *
2331  *  If sillyrename() returns 0, we do nothing, otherwise we unlink.
2332  */
2333 int nfs_unlink(struct inode *dir, struct dentry *dentry)
2334 {
2335 	int error;
2336 	int need_rehash = 0;
2337 
2338 	dfprintk(VFS, "NFS: unlink(%s/%lu, %pd)\n", dir->i_sb->s_id,
2339 		dir->i_ino, dentry);
2340 
2341 	trace_nfs_unlink_enter(dir, dentry);
2342 	spin_lock(&dentry->d_lock);
2343 	if (d_count(dentry) > 1 && !test_bit(NFS_INO_PRESERVE_UNLINKED,
2344 					     &NFS_I(d_inode(dentry))->flags)) {
2345 		spin_unlock(&dentry->d_lock);
2346 		/* Start asynchronous writeout of the inode */
2347 		write_inode_now(d_inode(dentry), 0);
2348 		error = nfs_sillyrename(dir, dentry);
2349 		goto out;
2350 	}
2351 	if (!d_unhashed(dentry)) {
2352 		__d_drop(dentry);
2353 		need_rehash = 1;
2354 	}
2355 	spin_unlock(&dentry->d_lock);
2356 	error = nfs_safe_remove(dentry);
2357 	nfs_dentry_remove_handle_error(dir, dentry, error);
2358 	if (need_rehash)
2359 		d_rehash(dentry);
2360 out:
2361 	trace_nfs_unlink_exit(dir, dentry, error);
2362 	return error;
2363 }
2364 EXPORT_SYMBOL_GPL(nfs_unlink);
2365 
2366 /*
2367  * To create a symbolic link, most file systems instantiate a new inode,
2368  * add a page to it containing the path, then write it out to the disk
2369  * using prepare_write/commit_write.
2370  *
2371  * Unfortunately the NFS client can't create the in-core inode first
2372  * because it needs a file handle to create an in-core inode (see
2373  * fs/nfs/inode.c:nfs_fhget).  We only have a file handle *after* the
2374  * symlink request has completed on the server.
2375  *
2376  * So instead we allocate a raw page, copy the symname into it, then do
2377  * the SYMLINK request with the page as the buffer.  If it succeeds, we
2378  * now have a new file handle and can instantiate an in-core NFS inode
2379  * and move the raw page into its mapping.
2380  */
2381 int nfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
2382 		struct dentry *dentry, const char *symname)
2383 {
2384 	struct page *page;
2385 	char *kaddr;
2386 	struct iattr attr;
2387 	unsigned int pathlen = strlen(symname);
2388 	int error;
2389 
2390 	dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s)\n", dir->i_sb->s_id,
2391 		dir->i_ino, dentry, symname);
2392 
2393 	if (pathlen > PAGE_SIZE)
2394 		return -ENAMETOOLONG;
2395 
2396 	attr.ia_mode = S_IFLNK | S_IRWXUGO;
2397 	attr.ia_valid = ATTR_MODE;
2398 
2399 	page = alloc_page(GFP_USER);
2400 	if (!page)
2401 		return -ENOMEM;
2402 
2403 	kaddr = page_address(page);
2404 	memcpy(kaddr, symname, pathlen);
2405 	if (pathlen < PAGE_SIZE)
2406 		memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
2407 
2408 	trace_nfs_symlink_enter(dir, dentry);
2409 	error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
2410 	trace_nfs_symlink_exit(dir, dentry, error);
2411 	if (error != 0) {
2412 		dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s) error %d\n",
2413 			dir->i_sb->s_id, dir->i_ino,
2414 			dentry, symname, error);
2415 		d_drop(dentry);
2416 		__free_page(page);
2417 		return error;
2418 	}
2419 
2420 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2421 
2422 	/*
2423 	 * No big deal if we can't add this page to the page cache here.
2424 	 * READLINK will get the missing page from the server if needed.
2425 	 */
2426 	if (!add_to_page_cache_lru(page, d_inode(dentry)->i_mapping, 0,
2427 							GFP_KERNEL)) {
2428 		SetPageUptodate(page);
2429 		unlock_page(page);
2430 		/*
2431 		 * add_to_page_cache_lru() grabs an extra page refcount.
2432 		 * Drop it here to avoid leaking this page later.
2433 		 */
2434 		put_page(page);
2435 	} else
2436 		__free_page(page);
2437 
2438 	return 0;
2439 }
2440 EXPORT_SYMBOL_GPL(nfs_symlink);
2441 
2442 int
2443 nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
2444 {
2445 	struct inode *inode = d_inode(old_dentry);
2446 	int error;
2447 
2448 	dfprintk(VFS, "NFS: link(%pd2 -> %pd2)\n",
2449 		old_dentry, dentry);
2450 
2451 	trace_nfs_link_enter(inode, dir, dentry);
2452 	d_drop(dentry);
2453 	if (S_ISREG(inode->i_mode))
2454 		nfs_sync_inode(inode);
2455 	error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
2456 	if (error == 0) {
2457 		nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2458 		ihold(inode);
2459 		d_add(dentry, inode);
2460 	}
2461 	trace_nfs_link_exit(inode, dir, dentry, error);
2462 	return error;
2463 }
2464 EXPORT_SYMBOL_GPL(nfs_link);
2465 
2466 /*
2467  * RENAME
2468  * FIXME: Some nfsds, like the Linux user space nfsd, may generate a
2469  * different file handle for the same inode after a rename (e.g. when
2470  * moving to a different directory). A fail-safe method to do so would
2471  * be to look up old_dir/old_name, create a link to new_dir/new_name and
2472  * rename the old file using the sillyrename stuff. This way, the original
2473  * file in old_dir will go away when the last process iput()s the inode.
2474  *
2475  * FIXED.
2476  *
2477  * It actually works quite well. One needs to have the possibility for
2478  * at least one ".nfs..." file in each directory the file ever gets
2479  * moved or linked to which happens automagically with the new
2480  * implementation that only depends on the dcache stuff instead of
2481  * using the inode layer
2482  *
2483  * Unfortunately, things are a little more complicated than indicated
2484  * above. For a cross-directory move, we want to make sure we can get
2485  * rid of the old inode after the operation.  This means there must be
2486  * no pending writes (if it's a file), and the use count must be 1.
2487  * If these conditions are met, we can drop the dentries before doing
2488  * the rename.
2489  */
2490 int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
2491 	       struct dentry *old_dentry, struct inode *new_dir,
2492 	       struct dentry *new_dentry, unsigned int flags)
2493 {
2494 	struct inode *old_inode = d_inode(old_dentry);
2495 	struct inode *new_inode = d_inode(new_dentry);
2496 	struct dentry *dentry = NULL, *rehash = NULL;
2497 	struct rpc_task *task;
2498 	int error = -EBUSY;
2499 
2500 	if (flags)
2501 		return -EINVAL;
2502 
2503 	dfprintk(VFS, "NFS: rename(%pd2 -> %pd2, ct=%d)\n",
2504 		 old_dentry, new_dentry,
2505 		 d_count(new_dentry));
2506 
2507 	trace_nfs_rename_enter(old_dir, old_dentry, new_dir, new_dentry);
2508 	/*
2509 	 * For non-directories, check whether the target is busy and if so,
2510 	 * make a copy of the dentry and then do a silly-rename. If the
2511 	 * silly-rename succeeds, the copied dentry is hashed and becomes
2512 	 * the new target.
2513 	 */
2514 	if (new_inode && !S_ISDIR(new_inode->i_mode)) {
2515 		/*
2516 		 * To prevent any new references to the target during the
2517 		 * rename, we unhash the dentry in advance.
2518 		 */
2519 		if (!d_unhashed(new_dentry)) {
2520 			d_drop(new_dentry);
2521 			rehash = new_dentry;
2522 		}
2523 
2524 		if (d_count(new_dentry) > 2) {
2525 			int err;
2526 
2527 			/* copy the target dentry's name */
2528 			dentry = d_alloc(new_dentry->d_parent,
2529 					 &new_dentry->d_name);
2530 			if (!dentry)
2531 				goto out;
2532 
2533 			/* silly-rename the existing target ... */
2534 			err = nfs_sillyrename(new_dir, new_dentry);
2535 			if (err)
2536 				goto out;
2537 
2538 			new_dentry = dentry;
2539 			rehash = NULL;
2540 			new_inode = NULL;
2541 		}
2542 	}
2543 
2544 	if (S_ISREG(old_inode->i_mode))
2545 		nfs_sync_inode(old_inode);
2546 	task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
2547 	if (IS_ERR(task)) {
2548 		error = PTR_ERR(task);
2549 		goto out;
2550 	}
2551 
2552 	error = rpc_wait_for_completion_task(task);
2553 	if (error != 0) {
2554 		((struct nfs_renamedata *)task->tk_calldata)->cancelled = 1;
2555 		/* Paired with the atomic_dec_and_test() barrier in rpc_do_put_task() */
2556 		smp_wmb();
2557 	} else
2558 		error = task->tk_status;
2559 	rpc_put_task(task);
2560 	/* Ensure the inode attributes are revalidated */
2561 	if (error == 0) {
2562 		spin_lock(&old_inode->i_lock);
2563 		NFS_I(old_inode)->attr_gencount = nfs_inc_attr_generation_counter();
2564 		nfs_set_cache_invalid(old_inode, NFS_INO_INVALID_CHANGE |
2565 							 NFS_INO_INVALID_CTIME |
2566 							 NFS_INO_REVAL_FORCED);
2567 		spin_unlock(&old_inode->i_lock);
2568 	}
2569 out:
2570 	if (rehash)
2571 		d_rehash(rehash);
2572 	trace_nfs_rename_exit(old_dir, old_dentry,
2573 			new_dir, new_dentry, error);
2574 	if (!error) {
2575 		if (new_inode != NULL)
2576 			nfs_drop_nlink(new_inode);
2577 		/*
2578 		 * The d_move() should be here instead of in an async RPC completion
2579 		 * handler because we need the proper locks to move the dentry.  If
2580 		 * we're interrupted by a signal, the async RPC completion handler
2581 		 * should mark the directories for revalidation.
2582 		 */
2583 		d_move(old_dentry, new_dentry);
2584 		nfs_set_verifier(old_dentry,
2585 					nfs_save_change_attribute(new_dir));
2586 	} else if (error == -ENOENT)
2587 		nfs_dentry_handle_enoent(old_dentry);
2588 
2589 	/* new dentry created? */
2590 	if (dentry)
2591 		dput(dentry);
2592 	return error;
2593 }
2594 EXPORT_SYMBOL_GPL(nfs_rename);
2595 
2596 static DEFINE_SPINLOCK(nfs_access_lru_lock);
2597 static LIST_HEAD(nfs_access_lru_list);
2598 static atomic_long_t nfs_access_nr_entries;
2599 
2600 static unsigned long nfs_access_max_cachesize = 4*1024*1024;
2601 module_param(nfs_access_max_cachesize, ulong, 0644);
2602 MODULE_PARM_DESC(nfs_access_max_cachesize, "NFS access maximum total cache length");
2603 
2604 static void nfs_access_free_entry(struct nfs_access_entry *entry)
2605 {
2606 	put_group_info(entry->group_info);
2607 	kfree_rcu(entry, rcu_head);
2608 	smp_mb__before_atomic();
2609 	atomic_long_dec(&nfs_access_nr_entries);
2610 	smp_mb__after_atomic();
2611 }
2612 
2613 static void nfs_access_free_list(struct list_head *head)
2614 {
2615 	struct nfs_access_entry *cache;
2616 
2617 	while (!list_empty(head)) {
2618 		cache = list_entry(head->next, struct nfs_access_entry, lru);
2619 		list_del(&cache->lru);
2620 		nfs_access_free_entry(cache);
2621 	}
2622 }
2623 
2624 static unsigned long
2625 nfs_do_access_cache_scan(unsigned int nr_to_scan)
2626 {
2627 	LIST_HEAD(head);
2628 	struct nfs_inode *nfsi, *next;
2629 	struct nfs_access_entry *cache;
2630 	long freed = 0;
2631 
2632 	spin_lock(&nfs_access_lru_lock);
2633 	list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) {
2634 		struct inode *inode;
2635 
2636 		if (nr_to_scan-- == 0)
2637 			break;
2638 		inode = &nfsi->vfs_inode;
2639 		spin_lock(&inode->i_lock);
2640 		if (list_empty(&nfsi->access_cache_entry_lru))
2641 			goto remove_lru_entry;
2642 		cache = list_entry(nfsi->access_cache_entry_lru.next,
2643 				struct nfs_access_entry, lru);
2644 		list_move(&cache->lru, &head);
2645 		rb_erase(&cache->rb_node, &nfsi->access_cache);
2646 		freed++;
2647 		if (!list_empty(&nfsi->access_cache_entry_lru))
2648 			list_move_tail(&nfsi->access_cache_inode_lru,
2649 					&nfs_access_lru_list);
2650 		else {
2651 remove_lru_entry:
2652 			list_del_init(&nfsi->access_cache_inode_lru);
2653 			smp_mb__before_atomic();
2654 			clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
2655 			smp_mb__after_atomic();
2656 		}
2657 		spin_unlock(&inode->i_lock);
2658 	}
2659 	spin_unlock(&nfs_access_lru_lock);
2660 	nfs_access_free_list(&head);
2661 	return freed;
2662 }
2663 
2664 unsigned long
2665 nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
2666 {
2667 	int nr_to_scan = sc->nr_to_scan;
2668 	gfp_t gfp_mask = sc->gfp_mask;
2669 
2670 	if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
2671 		return SHRINK_STOP;
2672 	return nfs_do_access_cache_scan(nr_to_scan);
2673 }
2674 
2675 
2676 unsigned long
2677 nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc)
2678 {
2679 	return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries));
2680 }
2681 
2682 static void
2683 nfs_access_cache_enforce_limit(void)
2684 {
2685 	long nr_entries = atomic_long_read(&nfs_access_nr_entries);
2686 	unsigned long diff;
2687 	unsigned int nr_to_scan;
2688 
2689 	if (nr_entries < 0 || nr_entries <= nfs_access_max_cachesize)
2690 		return;
2691 	nr_to_scan = 100;
2692 	diff = nr_entries - nfs_access_max_cachesize;
2693 	if (diff < nr_to_scan)
2694 		nr_to_scan = diff;
2695 	nfs_do_access_cache_scan(nr_to_scan);
2696 }
2697 
2698 static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head)
2699 {
2700 	struct rb_root *root_node = &nfsi->access_cache;
2701 	struct rb_node *n;
2702 	struct nfs_access_entry *entry;
2703 
2704 	/* Unhook entries from the cache */
2705 	while ((n = rb_first(root_node)) != NULL) {
2706 		entry = rb_entry(n, struct nfs_access_entry, rb_node);
2707 		rb_erase(n, root_node);
2708 		list_move(&entry->lru, head);
2709 	}
2710 	nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
2711 }
2712 
2713 void nfs_access_zap_cache(struct inode *inode)
2714 {
2715 	LIST_HEAD(head);
2716 
2717 	if (test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags) == 0)
2718 		return;
2719 	/* Remove from global LRU init */
2720 	spin_lock(&nfs_access_lru_lock);
2721 	if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags))
2722 		list_del_init(&NFS_I(inode)->access_cache_inode_lru);
2723 
2724 	spin_lock(&inode->i_lock);
2725 	__nfs_access_zap_cache(NFS_I(inode), &head);
2726 	spin_unlock(&inode->i_lock);
2727 	spin_unlock(&nfs_access_lru_lock);
2728 	nfs_access_free_list(&head);
2729 }
2730 EXPORT_SYMBOL_GPL(nfs_access_zap_cache);
2731 
2732 static int access_cmp(const struct cred *a, const struct nfs_access_entry *b)
2733 {
2734 	struct group_info *ga, *gb;
2735 	int g;
2736 
2737 	if (uid_lt(a->fsuid, b->fsuid))
2738 		return -1;
2739 	if (uid_gt(a->fsuid, b->fsuid))
2740 		return 1;
2741 
2742 	if (gid_lt(a->fsgid, b->fsgid))
2743 		return -1;
2744 	if (gid_gt(a->fsgid, b->fsgid))
2745 		return 1;
2746 
2747 	ga = a->group_info;
2748 	gb = b->group_info;
2749 	if (ga == gb)
2750 		return 0;
2751 	if (ga == NULL)
2752 		return -1;
2753 	if (gb == NULL)
2754 		return 1;
2755 	if (ga->ngroups < gb->ngroups)
2756 		return -1;
2757 	if (ga->ngroups > gb->ngroups)
2758 		return 1;
2759 
2760 	for (g = 0; g < ga->ngroups; g++) {
2761 		if (gid_lt(ga->gid[g], gb->gid[g]))
2762 			return -1;
2763 		if (gid_gt(ga->gid[g], gb->gid[g]))
2764 			return 1;
2765 	}
2766 	return 0;
2767 }
2768 
2769 static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, const struct cred *cred)
2770 {
2771 	struct rb_node *n = NFS_I(inode)->access_cache.rb_node;
2772 
2773 	while (n != NULL) {
2774 		struct nfs_access_entry *entry =
2775 			rb_entry(n, struct nfs_access_entry, rb_node);
2776 		int cmp = access_cmp(cred, entry);
2777 
2778 		if (cmp < 0)
2779 			n = n->rb_left;
2780 		else if (cmp > 0)
2781 			n = n->rb_right;
2782 		else
2783 			return entry;
2784 	}
2785 	return NULL;
2786 }
2787 
2788 static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *cred, u32 *mask, bool may_block)
2789 {
2790 	struct nfs_inode *nfsi = NFS_I(inode);
2791 	struct nfs_access_entry *cache;
2792 	bool retry = true;
2793 	int err;
2794 
2795 	spin_lock(&inode->i_lock);
2796 	for(;;) {
2797 		if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
2798 			goto out_zap;
2799 		cache = nfs_access_search_rbtree(inode, cred);
2800 		err = -ENOENT;
2801 		if (cache == NULL)
2802 			goto out;
2803 		/* Found an entry, is our attribute cache valid? */
2804 		if (!nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS))
2805 			break;
2806 		if (!retry)
2807 			break;
2808 		err = -ECHILD;
2809 		if (!may_block)
2810 			goto out;
2811 		spin_unlock(&inode->i_lock);
2812 		err = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
2813 		if (err)
2814 			return err;
2815 		spin_lock(&inode->i_lock);
2816 		retry = false;
2817 	}
2818 	*mask = cache->mask;
2819 	list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru);
2820 	err = 0;
2821 out:
2822 	spin_unlock(&inode->i_lock);
2823 	return err;
2824 out_zap:
2825 	spin_unlock(&inode->i_lock);
2826 	nfs_access_zap_cache(inode);
2827 	return -ENOENT;
2828 }
2829 
2830 static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cred, u32 *mask)
2831 {
2832 	/* Only check the most recently returned cache entry,
2833 	 * but do it without locking.
2834 	 */
2835 	struct nfs_inode *nfsi = NFS_I(inode);
2836 	struct nfs_access_entry *cache;
2837 	int err = -ECHILD;
2838 	struct list_head *lh;
2839 
2840 	rcu_read_lock();
2841 	if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
2842 		goto out;
2843 	lh = rcu_dereference(list_tail_rcu(&nfsi->access_cache_entry_lru));
2844 	cache = list_entry(lh, struct nfs_access_entry, lru);
2845 	if (lh == &nfsi->access_cache_entry_lru ||
2846 	    access_cmp(cred, cache) != 0)
2847 		cache = NULL;
2848 	if (cache == NULL)
2849 		goto out;
2850 	if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS))
2851 		goto out;
2852 	*mask = cache->mask;
2853 	err = 0;
2854 out:
2855 	rcu_read_unlock();
2856 	return err;
2857 }
2858 
2859 int nfs_access_get_cached(struct inode *inode, const struct cred *cred,
2860 			  u32 *mask, bool may_block)
2861 {
2862 	int status;
2863 
2864 	status = nfs_access_get_cached_rcu(inode, cred, mask);
2865 	if (status != 0)
2866 		status = nfs_access_get_cached_locked(inode, cred, mask,
2867 		    may_block);
2868 
2869 	return status;
2870 }
2871 EXPORT_SYMBOL_GPL(nfs_access_get_cached);
2872 
2873 static void nfs_access_add_rbtree(struct inode *inode,
2874 				  struct nfs_access_entry *set,
2875 				  const struct cred *cred)
2876 {
2877 	struct nfs_inode *nfsi = NFS_I(inode);
2878 	struct rb_root *root_node = &nfsi->access_cache;
2879 	struct rb_node **p = &root_node->rb_node;
2880 	struct rb_node *parent = NULL;
2881 	struct nfs_access_entry *entry;
2882 	int cmp;
2883 
2884 	spin_lock(&inode->i_lock);
2885 	while (*p != NULL) {
2886 		parent = *p;
2887 		entry = rb_entry(parent, struct nfs_access_entry, rb_node);
2888 		cmp = access_cmp(cred, entry);
2889 
2890 		if (cmp < 0)
2891 			p = &parent->rb_left;
2892 		else if (cmp > 0)
2893 			p = &parent->rb_right;
2894 		else
2895 			goto found;
2896 	}
2897 	rb_link_node(&set->rb_node, parent, p);
2898 	rb_insert_color(&set->rb_node, root_node);
2899 	list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
2900 	spin_unlock(&inode->i_lock);
2901 	return;
2902 found:
2903 	rb_replace_node(parent, &set->rb_node, root_node);
2904 	list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
2905 	list_del(&entry->lru);
2906 	spin_unlock(&inode->i_lock);
2907 	nfs_access_free_entry(entry);
2908 }
2909 
2910 void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set,
2911 			  const struct cred *cred)
2912 {
2913 	struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL);
2914 	if (cache == NULL)
2915 		return;
2916 	RB_CLEAR_NODE(&cache->rb_node);
2917 	cache->fsuid = cred->fsuid;
2918 	cache->fsgid = cred->fsgid;
2919 	cache->group_info = get_group_info(cred->group_info);
2920 	cache->mask = set->mask;
2921 
2922 	/* The above field assignments must be visible
2923 	 * before this item appears on the lru.  We cannot easily
2924 	 * use rcu_assign_pointer, so just force the memory barrier.
2925 	 */
2926 	smp_wmb();
2927 	nfs_access_add_rbtree(inode, cache, cred);
2928 
2929 	/* Update accounting */
2930 	smp_mb__before_atomic();
2931 	atomic_long_inc(&nfs_access_nr_entries);
2932 	smp_mb__after_atomic();
2933 
2934 	/* Add inode to global LRU list */
2935 	if (!test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) {
2936 		spin_lock(&nfs_access_lru_lock);
2937 		if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags))
2938 			list_add_tail(&NFS_I(inode)->access_cache_inode_lru,
2939 					&nfs_access_lru_list);
2940 		spin_unlock(&nfs_access_lru_lock);
2941 	}
2942 	nfs_access_cache_enforce_limit();
2943 }
2944 EXPORT_SYMBOL_GPL(nfs_access_add_cache);
2945 
2946 #define NFS_MAY_READ (NFS_ACCESS_READ)
2947 #define NFS_MAY_WRITE (NFS_ACCESS_MODIFY | \
2948 		NFS_ACCESS_EXTEND | \
2949 		NFS_ACCESS_DELETE)
2950 #define NFS_FILE_MAY_WRITE (NFS_ACCESS_MODIFY | \
2951 		NFS_ACCESS_EXTEND)
2952 #define NFS_DIR_MAY_WRITE NFS_MAY_WRITE
2953 #define NFS_MAY_LOOKUP (NFS_ACCESS_LOOKUP)
2954 #define NFS_MAY_EXECUTE (NFS_ACCESS_EXECUTE)
2955 static int
2956 nfs_access_calc_mask(u32 access_result, umode_t umode)
2957 {
2958 	int mask = 0;
2959 
2960 	if (access_result & NFS_MAY_READ)
2961 		mask |= MAY_READ;
2962 	if (S_ISDIR(umode)) {
2963 		if ((access_result & NFS_DIR_MAY_WRITE) == NFS_DIR_MAY_WRITE)
2964 			mask |= MAY_WRITE;
2965 		if ((access_result & NFS_MAY_LOOKUP) == NFS_MAY_LOOKUP)
2966 			mask |= MAY_EXEC;
2967 	} else if (S_ISREG(umode)) {
2968 		if ((access_result & NFS_FILE_MAY_WRITE) == NFS_FILE_MAY_WRITE)
2969 			mask |= MAY_WRITE;
2970 		if ((access_result & NFS_MAY_EXECUTE) == NFS_MAY_EXECUTE)
2971 			mask |= MAY_EXEC;
2972 	} else if (access_result & NFS_MAY_WRITE)
2973 			mask |= MAY_WRITE;
2974 	return mask;
2975 }
2976 
2977 void nfs_access_set_mask(struct nfs_access_entry *entry, u32 access_result)
2978 {
2979 	entry->mask = access_result;
2980 }
2981 EXPORT_SYMBOL_GPL(nfs_access_set_mask);
2982 
2983 static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask)
2984 {
2985 	struct nfs_access_entry cache;
2986 	bool may_block = (mask & MAY_NOT_BLOCK) == 0;
2987 	int cache_mask = -1;
2988 	int status;
2989 
2990 	trace_nfs_access_enter(inode);
2991 
2992 	status = nfs_access_get_cached(inode, cred, &cache.mask, may_block);
2993 	if (status == 0)
2994 		goto out_cached;
2995 
2996 	status = -ECHILD;
2997 	if (!may_block)
2998 		goto out;
2999 
3000 	/*
3001 	 * Determine which access bits we want to ask for...
3002 	 */
3003 	cache.mask = NFS_ACCESS_READ | NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND |
3004 		     nfs_access_xattr_mask(NFS_SERVER(inode));
3005 	if (S_ISDIR(inode->i_mode))
3006 		cache.mask |= NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP;
3007 	else
3008 		cache.mask |= NFS_ACCESS_EXECUTE;
3009 	status = NFS_PROTO(inode)->access(inode, &cache, cred);
3010 	if (status != 0) {
3011 		if (status == -ESTALE) {
3012 			if (!S_ISDIR(inode->i_mode))
3013 				nfs_set_inode_stale(inode);
3014 			else
3015 				nfs_zap_caches(inode);
3016 		}
3017 		goto out;
3018 	}
3019 	nfs_access_add_cache(inode, &cache, cred);
3020 out_cached:
3021 	cache_mask = nfs_access_calc_mask(cache.mask, inode->i_mode);
3022 	if ((mask & ~cache_mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0)
3023 		status = -EACCES;
3024 out:
3025 	trace_nfs_access_exit(inode, mask, cache_mask, status);
3026 	return status;
3027 }
3028 
3029 static int nfs_open_permission_mask(int openflags)
3030 {
3031 	int mask = 0;
3032 
3033 	if (openflags & __FMODE_EXEC) {
3034 		/* ONLY check exec rights */
3035 		mask = MAY_EXEC;
3036 	} else {
3037 		if ((openflags & O_ACCMODE) != O_WRONLY)
3038 			mask |= MAY_READ;
3039 		if ((openflags & O_ACCMODE) != O_RDONLY)
3040 			mask |= MAY_WRITE;
3041 	}
3042 
3043 	return mask;
3044 }
3045 
3046 int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags)
3047 {
3048 	return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags));
3049 }
3050 EXPORT_SYMBOL_GPL(nfs_may_open);
3051 
3052 static int nfs_execute_ok(struct inode *inode, int mask)
3053 {
3054 	struct nfs_server *server = NFS_SERVER(inode);
3055 	int ret = 0;
3056 
3057 	if (S_ISDIR(inode->i_mode))
3058 		return 0;
3059 	if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_MODE)) {
3060 		if (mask & MAY_NOT_BLOCK)
3061 			return -ECHILD;
3062 		ret = __nfs_revalidate_inode(server, inode);
3063 	}
3064 	if (ret == 0 && !execute_ok(inode))
3065 		ret = -EACCES;
3066 	return ret;
3067 }
3068 
3069 int nfs_permission(struct user_namespace *mnt_userns,
3070 		   struct inode *inode,
3071 		   int mask)
3072 {
3073 	const struct cred *cred = current_cred();
3074 	int res = 0;
3075 
3076 	nfs_inc_stats(inode, NFSIOS_VFSACCESS);
3077 
3078 	if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
3079 		goto out;
3080 	/* Is this sys_access() ? */
3081 	if (mask & (MAY_ACCESS | MAY_CHDIR))
3082 		goto force_lookup;
3083 
3084 	switch (inode->i_mode & S_IFMT) {
3085 		case S_IFLNK:
3086 			goto out;
3087 		case S_IFREG:
3088 			if ((mask & MAY_OPEN) &&
3089 			   nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN))
3090 				return 0;
3091 			break;
3092 		case S_IFDIR:
3093 			/*
3094 			 * Optimize away all write operations, since the server
3095 			 * will check permissions when we perform the op.
3096 			 */
3097 			if ((mask & MAY_WRITE) && !(mask & MAY_READ))
3098 				goto out;
3099 	}
3100 
3101 force_lookup:
3102 	if (!NFS_PROTO(inode)->access)
3103 		goto out_notsup;
3104 
3105 	res = nfs_do_access(inode, cred, mask);
3106 out:
3107 	if (!res && (mask & MAY_EXEC))
3108 		res = nfs_execute_ok(inode, mask);
3109 
3110 	dfprintk(VFS, "NFS: permission(%s/%lu), mask=0x%x, res=%d\n",
3111 		inode->i_sb->s_id, inode->i_ino, mask, res);
3112 	return res;
3113 out_notsup:
3114 	if (mask & MAY_NOT_BLOCK)
3115 		return -ECHILD;
3116 
3117 	res = nfs_revalidate_inode(inode, NFS_INO_INVALID_MODE |
3118 						  NFS_INO_INVALID_OTHER);
3119 	if (res == 0)
3120 		res = generic_permission(&init_user_ns, inode, mask);
3121 	goto out;
3122 }
3123 EXPORT_SYMBOL_GPL(nfs_permission);
3124