xref: /openbmc/linux/fs/nfs/nfs4state.c (revision 474be445555ba8f2e776b4b6458c310bc215f76b)
1  /*
2   *  fs/nfs/nfs4state.c
3   *
4   *  Client-side XDR for NFSv4.
5   *
6   *  Copyright (c) 2002 The Regents of the University of Michigan.
7   *  All rights reserved.
8   *
9   *  Kendrick Smith <kmsmith@umich.edu>
10   *
11   *  Redistribution and use in source and binary forms, with or without
12   *  modification, are permitted provided that the following conditions
13   *  are met:
14   *
15   *  1. Redistributions of source code must retain the above copyright
16   *     notice, this list of conditions and the following disclaimer.
17   *  2. Redistributions in binary form must reproduce the above copyright
18   *     notice, this list of conditions and the following disclaimer in the
19   *     documentation and/or other materials provided with the distribution.
20   *  3. Neither the name of the University nor the names of its
21   *     contributors may be used to endorse or promote products derived
22   *     from this software without specific prior written permission.
23   *
24   *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
25   *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
26   *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
27   *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28   *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29   *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30   *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
31   *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
32   *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
33   *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34   *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35   *
36   * Implementation of the NFSv4 state model.  For the time being,
37   * this is minimal, but will be made much more complex in a
38   * subsequent patch.
39   */
40  
41  #include <linux/kernel.h>
42  #include <linux/slab.h>
43  #include <linux/fs.h>
44  #include <linux/nfs_fs.h>
45  #include <linux/kthread.h>
46  #include <linux/module.h>
47  #include <linux/random.h>
48  #include <linux/ratelimit.h>
49  #include <linux/workqueue.h>
50  #include <linux/bitops.h>
51  #include <linux/jiffies.h>
52  #include <linux/sched/mm.h>
53  
54  #include <linux/sunrpc/clnt.h>
55  
56  #include "nfs4_fs.h"
57  #include "callback.h"
58  #include "delegation.h"
59  #include "internal.h"
60  #include "nfs4idmap.h"
61  #include "nfs4session.h"
62  #include "pnfs.h"
63  #include "netns.h"
64  #include "nfs4trace.h"
65  
66  #define NFSDBG_FACILITY		NFSDBG_STATE
67  
68  #define OPENOWNER_POOL_SIZE	8
69  
70  const nfs4_stateid zero_stateid = {
71  	{ .data = { 0 } },
72  	.type = NFS4_SPECIAL_STATEID_TYPE,
73  };
74  const nfs4_stateid invalid_stateid = {
75  	{
76  		/* Funky initialiser keeps older gcc versions happy */
77  		.data = { 0xff, 0xff, 0xff, 0xff, 0 },
78  	},
79  	.type = NFS4_INVALID_STATEID_TYPE,
80  };
81  
82  const nfs4_stateid current_stateid = {
83  	{
84  		/* Funky initialiser keeps older gcc versions happy */
85  		.data = { 0x0, 0x0, 0x0, 0x1, 0 },
86  	},
87  	.type = NFS4_SPECIAL_STATEID_TYPE,
88  };
89  
90  static DEFINE_MUTEX(nfs_clid_init_mutex);
91  
92  static int nfs4_setup_state_renewal(struct nfs_client *clp)
93  {
94  	int status;
95  	struct nfs_fsinfo fsinfo;
96  
97  	if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) {
98  		nfs4_schedule_state_renewal(clp);
99  		return 0;
100  	}
101  
102  	status = nfs4_proc_get_lease_time(clp, &fsinfo);
103  	if (status == 0) {
104  		nfs4_set_lease_period(clp, fsinfo.lease_time * HZ);
105  		nfs4_schedule_state_renewal(clp);
106  	}
107  
108  	return status;
109  }
110  
111  int nfs4_init_clientid(struct nfs_client *clp, const struct cred *cred)
112  {
113  	struct nfs4_setclientid_res clid = {
114  		.clientid = clp->cl_clientid,
115  		.confirm = clp->cl_confirm,
116  	};
117  	unsigned short port;
118  	int status;
119  	struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
120  
121  	if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state))
122  		goto do_confirm;
123  	port = nn->nfs_callback_tcpport;
124  	if (clp->cl_addr.ss_family == AF_INET6)
125  		port = nn->nfs_callback_tcpport6;
126  
127  	status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid);
128  	if (status != 0)
129  		goto out;
130  	clp->cl_clientid = clid.clientid;
131  	clp->cl_confirm = clid.confirm;
132  	set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
133  do_confirm:
134  	status = nfs4_proc_setclientid_confirm(clp, &clid, cred);
135  	if (status != 0)
136  		goto out;
137  	clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
138  	nfs4_setup_state_renewal(clp);
139  out:
140  	return status;
141  }
142  
143  /**
144   * nfs40_discover_server_trunking - Detect server IP address trunking (mv0)
145   *
146   * @clp: nfs_client under test
147   * @result: OUT: found nfs_client, or clp
148   * @cred: credential to use for trunking test
149   *
150   * Returns zero, a negative errno, or a negative NFS4ERR status.
151   * If zero is returned, an nfs_client pointer is planted in
152   * "result".
153   *
154   * Note: The returned client may not yet be marked ready.
155   */
156  int nfs40_discover_server_trunking(struct nfs_client *clp,
157  				   struct nfs_client **result,
158  				   const struct cred *cred)
159  {
160  	struct nfs4_setclientid_res clid = {
161  		.clientid = clp->cl_clientid,
162  		.confirm = clp->cl_confirm,
163  	};
164  	struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
165  	unsigned short port;
166  	int status;
167  
168  	port = nn->nfs_callback_tcpport;
169  	if (clp->cl_addr.ss_family == AF_INET6)
170  		port = nn->nfs_callback_tcpport6;
171  
172  	status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid);
173  	if (status != 0)
174  		goto out;
175  	clp->cl_clientid = clid.clientid;
176  	clp->cl_confirm = clid.confirm;
177  
178  	status = nfs40_walk_client_list(clp, result, cred);
179  	if (status == 0) {
180  		/* Sustain the lease, even if it's empty.  If the clientid4
181  		 * goes stale it's of no use for trunking discovery. */
182  		nfs4_schedule_state_renewal(*result);
183  
184  		/* If the client state need to recover, do it. */
185  		if (clp->cl_state)
186  			nfs4_schedule_state_manager(clp);
187  	}
188  out:
189  	return status;
190  }
191  
192  const struct cred *nfs4_get_machine_cred(struct nfs_client *clp)
193  {
194  	return get_cred(rpc_machine_cred());
195  }
196  
197  static void nfs4_root_machine_cred(struct nfs_client *clp)
198  {
199  
200  	/* Force root creds instead of machine */
201  	clp->cl_principal = NULL;
202  	clp->cl_rpcclient->cl_principal = NULL;
203  }
204  
205  static const struct cred *
206  nfs4_get_renew_cred_server_locked(struct nfs_server *server)
207  {
208  	const struct cred *cred = NULL;
209  	struct nfs4_state_owner *sp;
210  	struct rb_node *pos;
211  
212  	for (pos = rb_first(&server->state_owners);
213  	     pos != NULL;
214  	     pos = rb_next(pos)) {
215  		sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
216  		if (list_empty(&sp->so_states))
217  			continue;
218  		cred = get_cred(sp->so_cred);
219  		break;
220  	}
221  	return cred;
222  }
223  
224  /**
225   * nfs4_get_renew_cred - Acquire credential for a renew operation
226   * @clp: client state handle
227   *
228   * Returns an rpc_cred with reference count bumped, or NULL.
229   * Caller must hold clp->cl_lock.
230   */
231  const struct cred *nfs4_get_renew_cred(struct nfs_client *clp)
232  {
233  	const struct cred *cred = NULL;
234  	struct nfs_server *server;
235  
236  	/* Use machine credentials if available */
237  	cred = nfs4_get_machine_cred(clp);
238  	if (cred != NULL)
239  		goto out;
240  
241  	spin_lock(&clp->cl_lock);
242  	rcu_read_lock();
243  	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
244  		cred = nfs4_get_renew_cred_server_locked(server);
245  		if (cred != NULL)
246  			break;
247  	}
248  	rcu_read_unlock();
249  	spin_unlock(&clp->cl_lock);
250  
251  out:
252  	return cred;
253  }
254  
255  static void nfs4_end_drain_slot_table(struct nfs4_slot_table *tbl)
256  {
257  	if (test_and_clear_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) {
258  		spin_lock(&tbl->slot_tbl_lock);
259  		nfs41_wake_slot_table(tbl);
260  		spin_unlock(&tbl->slot_tbl_lock);
261  	}
262  }
263  
264  static void nfs4_end_drain_session(struct nfs_client *clp)
265  {
266  	struct nfs4_session *ses = clp->cl_session;
267  
268  	if (clp->cl_slot_tbl) {
269  		nfs4_end_drain_slot_table(clp->cl_slot_tbl);
270  		return;
271  	}
272  
273  	if (ses != NULL) {
274  		nfs4_end_drain_slot_table(&ses->bc_slot_table);
275  		nfs4_end_drain_slot_table(&ses->fc_slot_table);
276  	}
277  }
278  
279  static int nfs4_drain_slot_tbl(struct nfs4_slot_table *tbl)
280  {
281  	set_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state);
282  	spin_lock(&tbl->slot_tbl_lock);
283  	if (tbl->highest_used_slotid != NFS4_NO_SLOT) {
284  		reinit_completion(&tbl->complete);
285  		spin_unlock(&tbl->slot_tbl_lock);
286  		return wait_for_completion_interruptible(&tbl->complete);
287  	}
288  	spin_unlock(&tbl->slot_tbl_lock);
289  	return 0;
290  }
291  
292  static int nfs4_begin_drain_session(struct nfs_client *clp)
293  {
294  	struct nfs4_session *ses = clp->cl_session;
295  	int ret;
296  
297  	if (clp->cl_slot_tbl)
298  		return nfs4_drain_slot_tbl(clp->cl_slot_tbl);
299  
300  	/* back channel */
301  	ret = nfs4_drain_slot_tbl(&ses->bc_slot_table);
302  	if (ret)
303  		return ret;
304  	/* fore channel */
305  	return nfs4_drain_slot_tbl(&ses->fc_slot_table);
306  }
307  
308  #if defined(CONFIG_NFS_V4_1)
309  
310  static void nfs41_finish_session_reset(struct nfs_client *clp)
311  {
312  	clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
313  	clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
314  	/* create_session negotiated new slot table */
315  	clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
316  	nfs4_setup_state_renewal(clp);
317  }
318  
319  int nfs41_init_clientid(struct nfs_client *clp, const struct cred *cred)
320  {
321  	int status;
322  
323  	if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state))
324  		goto do_confirm;
325  	status = nfs4_proc_exchange_id(clp, cred);
326  	if (status != 0)
327  		goto out;
328  	set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
329  do_confirm:
330  	status = nfs4_proc_create_session(clp, cred);
331  	if (status != 0)
332  		goto out;
333  	nfs41_finish_session_reset(clp);
334  	nfs_mark_client_ready(clp, NFS_CS_READY);
335  out:
336  	return status;
337  }
338  
339  /**
340   * nfs41_discover_server_trunking - Detect server IP address trunking (mv1)
341   *
342   * @clp: nfs_client under test
343   * @result: OUT: found nfs_client, or clp
344   * @cred: credential to use for trunking test
345   *
346   * Returns NFS4_OK, a negative errno, or a negative NFS4ERR status.
347   * If NFS4_OK is returned, an nfs_client pointer is planted in
348   * "result".
349   *
350   * Note: The returned client may not yet be marked ready.
351   */
352  int nfs41_discover_server_trunking(struct nfs_client *clp,
353  				   struct nfs_client **result,
354  				   const struct cred *cred)
355  {
356  	int status;
357  
358  	status = nfs4_proc_exchange_id(clp, cred);
359  	if (status != NFS4_OK)
360  		return status;
361  
362  	status = nfs41_walk_client_list(clp, result, cred);
363  	if (status < 0)
364  		return status;
365  	if (clp != *result)
366  		return 0;
367  
368  	/*
369  	 * Purge state if the client id was established in a prior
370  	 * instance and the client id could not have arrived on the
371  	 * server via Transparent State Migration.
372  	 */
373  	if (clp->cl_exchange_flags & EXCHGID4_FLAG_CONFIRMED_R) {
374  		if (!test_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags))
375  			set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
376  		else
377  			set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
378  	}
379  	nfs4_schedule_state_manager(clp);
380  	status = nfs_wait_client_init_complete(clp);
381  	if (status < 0)
382  		nfs_put_client(clp);
383  	return status;
384  }
385  
386  #endif /* CONFIG_NFS_V4_1 */
387  
388  /**
389   * nfs4_get_clid_cred - Acquire credential for a setclientid operation
390   * @clp: client state handle
391   *
392   * Returns a cred with reference count bumped, or NULL.
393   */
394  const struct cred *nfs4_get_clid_cred(struct nfs_client *clp)
395  {
396  	const struct cred *cred;
397  
398  	cred = nfs4_get_machine_cred(clp);
399  	return cred;
400  }
401  
402  static struct nfs4_state_owner *
403  nfs4_find_state_owner_locked(struct nfs_server *server, const struct cred *cred)
404  {
405  	struct rb_node **p = &server->state_owners.rb_node,
406  		       *parent = NULL;
407  	struct nfs4_state_owner *sp;
408  	int cmp;
409  
410  	while (*p != NULL) {
411  		parent = *p;
412  		sp = rb_entry(parent, struct nfs4_state_owner, so_server_node);
413  		cmp = cred_fscmp(cred, sp->so_cred);
414  
415  		if (cmp < 0)
416  			p = &parent->rb_left;
417  		else if (cmp > 0)
418  			p = &parent->rb_right;
419  		else {
420  			if (!list_empty(&sp->so_lru))
421  				list_del_init(&sp->so_lru);
422  			atomic_inc(&sp->so_count);
423  			return sp;
424  		}
425  	}
426  	return NULL;
427  }
428  
429  static struct nfs4_state_owner *
430  nfs4_insert_state_owner_locked(struct nfs4_state_owner *new)
431  {
432  	struct nfs_server *server = new->so_server;
433  	struct rb_node **p = &server->state_owners.rb_node,
434  		       *parent = NULL;
435  	struct nfs4_state_owner *sp;
436  	int cmp;
437  
438  	while (*p != NULL) {
439  		parent = *p;
440  		sp = rb_entry(parent, struct nfs4_state_owner, so_server_node);
441  		cmp = cred_fscmp(new->so_cred, sp->so_cred);
442  
443  		if (cmp < 0)
444  			p = &parent->rb_left;
445  		else if (cmp > 0)
446  			p = &parent->rb_right;
447  		else {
448  			if (!list_empty(&sp->so_lru))
449  				list_del_init(&sp->so_lru);
450  			atomic_inc(&sp->so_count);
451  			return sp;
452  		}
453  	}
454  	rb_link_node(&new->so_server_node, parent, p);
455  	rb_insert_color(&new->so_server_node, &server->state_owners);
456  	return new;
457  }
458  
459  static void
460  nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp)
461  {
462  	struct nfs_server *server = sp->so_server;
463  
464  	if (!RB_EMPTY_NODE(&sp->so_server_node))
465  		rb_erase(&sp->so_server_node, &server->state_owners);
466  }
467  
468  static void
469  nfs4_init_seqid_counter(struct nfs_seqid_counter *sc)
470  {
471  	sc->create_time = ktime_get();
472  	sc->flags = 0;
473  	sc->counter = 0;
474  	spin_lock_init(&sc->lock);
475  	INIT_LIST_HEAD(&sc->list);
476  	rpc_init_wait_queue(&sc->wait, "Seqid_waitqueue");
477  }
478  
479  static void
480  nfs4_destroy_seqid_counter(struct nfs_seqid_counter *sc)
481  {
482  	rpc_destroy_wait_queue(&sc->wait);
483  }
484  
485  /*
486   * nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to
487   * create a new state_owner.
488   *
489   */
490  static struct nfs4_state_owner *
491  nfs4_alloc_state_owner(struct nfs_server *server,
492  		const struct cred *cred,
493  		gfp_t gfp_flags)
494  {
495  	struct nfs4_state_owner *sp;
496  
497  	sp = kzalloc(sizeof(*sp), gfp_flags);
498  	if (!sp)
499  		return NULL;
500  	sp->so_seqid.owner_id = ida_alloc(&server->openowner_id, gfp_flags);
501  	if (sp->so_seqid.owner_id < 0) {
502  		kfree(sp);
503  		return NULL;
504  	}
505  	sp->so_server = server;
506  	sp->so_cred = get_cred(cred);
507  	spin_lock_init(&sp->so_lock);
508  	INIT_LIST_HEAD(&sp->so_states);
509  	nfs4_init_seqid_counter(&sp->so_seqid);
510  	atomic_set(&sp->so_count, 1);
511  	INIT_LIST_HEAD(&sp->so_lru);
512  	seqcount_spinlock_init(&sp->so_reclaim_seqcount, &sp->so_lock);
513  	mutex_init(&sp->so_delegreturn_mutex);
514  	return sp;
515  }
516  
517  static void
518  nfs4_reset_state_owner(struct nfs4_state_owner *sp)
519  {
520  	/* This state_owner is no longer usable, but must
521  	 * remain in place so that state recovery can find it
522  	 * and the opens associated with it.
523  	 * It may also be used for new 'open' request to
524  	 * return a delegation to the server.
525  	 * So update the 'create_time' so that it looks like
526  	 * a new state_owner.  This will cause the server to
527  	 * request an OPEN_CONFIRM to start a new sequence.
528  	 */
529  	sp->so_seqid.create_time = ktime_get();
530  }
531  
532  static void nfs4_free_state_owner(struct nfs4_state_owner *sp)
533  {
534  	nfs4_destroy_seqid_counter(&sp->so_seqid);
535  	put_cred(sp->so_cred);
536  	ida_free(&sp->so_server->openowner_id, sp->so_seqid.owner_id);
537  	kfree(sp);
538  }
539  
540  static void nfs4_gc_state_owners(struct nfs_server *server)
541  {
542  	struct nfs_client *clp = server->nfs_client;
543  	struct nfs4_state_owner *sp, *tmp;
544  	unsigned long time_min, time_max;
545  	LIST_HEAD(doomed);
546  
547  	spin_lock(&clp->cl_lock);
548  	time_max = jiffies;
549  	time_min = (long)time_max - (long)clp->cl_lease_time;
550  	list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) {
551  		/* NB: LRU is sorted so that oldest is at the head */
552  		if (time_in_range(sp->so_expires, time_min, time_max))
553  			break;
554  		list_move(&sp->so_lru, &doomed);
555  		nfs4_remove_state_owner_locked(sp);
556  	}
557  	spin_unlock(&clp->cl_lock);
558  
559  	list_for_each_entry_safe(sp, tmp, &doomed, so_lru) {
560  		list_del(&sp->so_lru);
561  		nfs4_free_state_owner(sp);
562  	}
563  }
564  
565  /**
566   * nfs4_get_state_owner - Look up a state owner given a credential
567   * @server: nfs_server to search
568   * @cred: RPC credential to match
569   * @gfp_flags: allocation mode
570   *
571   * Returns a pointer to an instantiated nfs4_state_owner struct, or NULL.
572   */
573  struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
574  					      const struct cred *cred,
575  					      gfp_t gfp_flags)
576  {
577  	struct nfs_client *clp = server->nfs_client;
578  	struct nfs4_state_owner *sp, *new;
579  
580  	spin_lock(&clp->cl_lock);
581  	sp = nfs4_find_state_owner_locked(server, cred);
582  	spin_unlock(&clp->cl_lock);
583  	if (sp != NULL)
584  		goto out;
585  	new = nfs4_alloc_state_owner(server, cred, gfp_flags);
586  	if (new == NULL)
587  		goto out;
588  	spin_lock(&clp->cl_lock);
589  	sp = nfs4_insert_state_owner_locked(new);
590  	spin_unlock(&clp->cl_lock);
591  	if (sp != new)
592  		nfs4_free_state_owner(new);
593  out:
594  	nfs4_gc_state_owners(server);
595  	return sp;
596  }
597  
598  /**
599   * nfs4_put_state_owner - Release a nfs4_state_owner
600   * @sp: state owner data to release
601   *
602   * Note that we keep released state owners on an LRU
603   * list.
604   * This caches valid state owners so that they can be
605   * reused, to avoid the OPEN_CONFIRM on minor version 0.
606   * It also pins the uniquifier of dropped state owners for
607   * a while, to ensure that those state owner names are
608   * never reused.
609   */
610  void nfs4_put_state_owner(struct nfs4_state_owner *sp)
611  {
612  	struct nfs_server *server = sp->so_server;
613  	struct nfs_client *clp = server->nfs_client;
614  
615  	if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
616  		return;
617  
618  	sp->so_expires = jiffies;
619  	list_add_tail(&sp->so_lru, &server->state_owners_lru);
620  	spin_unlock(&clp->cl_lock);
621  }
622  
623  /**
624   * nfs4_purge_state_owners - Release all cached state owners
625   * @server: nfs_server with cached state owners to release
626   * @head: resulting list of state owners
627   *
628   * Called at umount time.  Remaining state owners will be on
629   * the LRU with ref count of zero.
630   * Note that the state owners are not freed, but are added
631   * to the list @head, which can later be used as an argument
632   * to nfs4_free_state_owners.
633   */
634  void nfs4_purge_state_owners(struct nfs_server *server, struct list_head *head)
635  {
636  	struct nfs_client *clp = server->nfs_client;
637  	struct nfs4_state_owner *sp, *tmp;
638  
639  	spin_lock(&clp->cl_lock);
640  	list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) {
641  		list_move(&sp->so_lru, head);
642  		nfs4_remove_state_owner_locked(sp);
643  	}
644  	spin_unlock(&clp->cl_lock);
645  }
646  
647  /**
648   * nfs4_free_state_owners - Release all cached state owners
649   * @head: resulting list of state owners
650   *
651   * Frees a list of state owners that was generated by
652   * nfs4_purge_state_owners
653   */
654  void nfs4_free_state_owners(struct list_head *head)
655  {
656  	struct nfs4_state_owner *sp, *tmp;
657  
658  	list_for_each_entry_safe(sp, tmp, head, so_lru) {
659  		list_del(&sp->so_lru);
660  		nfs4_free_state_owner(sp);
661  	}
662  }
663  
664  static struct nfs4_state *
665  nfs4_alloc_open_state(void)
666  {
667  	struct nfs4_state *state;
668  
669  	state = kzalloc(sizeof(*state), GFP_KERNEL_ACCOUNT);
670  	if (!state)
671  		return NULL;
672  	refcount_set(&state->count, 1);
673  	INIT_LIST_HEAD(&state->lock_states);
674  	spin_lock_init(&state->state_lock);
675  	seqlock_init(&state->seqlock);
676  	init_waitqueue_head(&state->waitq);
677  	return state;
678  }
679  
680  void
681  nfs4_state_set_mode_locked(struct nfs4_state *state, fmode_t fmode)
682  {
683  	if (state->state == fmode)
684  		return;
685  	/* NB! List reordering - see the reclaim code for why.  */
686  	if ((fmode & FMODE_WRITE) != (state->state & FMODE_WRITE)) {
687  		if (fmode & FMODE_WRITE)
688  			list_move(&state->open_states, &state->owner->so_states);
689  		else
690  			list_move_tail(&state->open_states, &state->owner->so_states);
691  	}
692  	state->state = fmode;
693  }
694  
695  static struct nfs4_state *
696  __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
697  {
698  	struct nfs_inode *nfsi = NFS_I(inode);
699  	struct nfs4_state *state;
700  
701  	list_for_each_entry_rcu(state, &nfsi->open_states, inode_states) {
702  		if (state->owner != owner)
703  			continue;
704  		if (!nfs4_valid_open_stateid(state))
705  			continue;
706  		if (refcount_inc_not_zero(&state->count))
707  			return state;
708  	}
709  	return NULL;
710  }
711  
712  static void
713  nfs4_free_open_state(struct nfs4_state *state)
714  {
715  	kfree_rcu(state, rcu_head);
716  }
717  
718  struct nfs4_state *
719  nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner)
720  {
721  	struct nfs4_state *state, *new;
722  	struct nfs_inode *nfsi = NFS_I(inode);
723  
724  	rcu_read_lock();
725  	state = __nfs4_find_state_byowner(inode, owner);
726  	rcu_read_unlock();
727  	if (state)
728  		goto out;
729  	new = nfs4_alloc_open_state();
730  	spin_lock(&owner->so_lock);
731  	spin_lock(&inode->i_lock);
732  	state = __nfs4_find_state_byowner(inode, owner);
733  	if (state == NULL && new != NULL) {
734  		state = new;
735  		state->owner = owner;
736  		atomic_inc(&owner->so_count);
737  		ihold(inode);
738  		state->inode = inode;
739  		list_add_rcu(&state->inode_states, &nfsi->open_states);
740  		spin_unlock(&inode->i_lock);
741  		/* Note: The reclaim code dictates that we add stateless
742  		 * and read-only stateids to the end of the list */
743  		list_add_tail(&state->open_states, &owner->so_states);
744  		spin_unlock(&owner->so_lock);
745  	} else {
746  		spin_unlock(&inode->i_lock);
747  		spin_unlock(&owner->so_lock);
748  		if (new)
749  			nfs4_free_open_state(new);
750  	}
751  out:
752  	return state;
753  }
754  
755  void nfs4_put_open_state(struct nfs4_state *state)
756  {
757  	struct inode *inode = state->inode;
758  	struct nfs4_state_owner *owner = state->owner;
759  
760  	if (!refcount_dec_and_lock(&state->count, &owner->so_lock))
761  		return;
762  	spin_lock(&inode->i_lock);
763  	list_del_rcu(&state->inode_states);
764  	list_del(&state->open_states);
765  	spin_unlock(&inode->i_lock);
766  	spin_unlock(&owner->so_lock);
767  	nfs4_inode_return_delegation_on_close(inode);
768  	iput(inode);
769  	nfs4_free_open_state(state);
770  	nfs4_put_state_owner(owner);
771  }
772  
773  /*
774   * Close the current file.
775   */
776  static void __nfs4_close(struct nfs4_state *state,
777  		fmode_t fmode, gfp_t gfp_mask, int wait)
778  {
779  	struct nfs4_state_owner *owner = state->owner;
780  	int call_close = 0;
781  	fmode_t newstate;
782  
783  	atomic_inc(&owner->so_count);
784  	/* Protect against nfs4_find_state() */
785  	spin_lock(&owner->so_lock);
786  	switch (fmode & (FMODE_READ | FMODE_WRITE)) {
787  		case FMODE_READ:
788  			state->n_rdonly--;
789  			break;
790  		case FMODE_WRITE:
791  			state->n_wronly--;
792  			break;
793  		case FMODE_READ|FMODE_WRITE:
794  			state->n_rdwr--;
795  	}
796  	newstate = FMODE_READ|FMODE_WRITE;
797  	if (state->n_rdwr == 0) {
798  		if (state->n_rdonly == 0) {
799  			newstate &= ~FMODE_READ;
800  			call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags);
801  			call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
802  		}
803  		if (state->n_wronly == 0) {
804  			newstate &= ~FMODE_WRITE;
805  			call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags);
806  			call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
807  		}
808  		if (newstate == 0)
809  			clear_bit(NFS_DELEGATED_STATE, &state->flags);
810  	}
811  	nfs4_state_set_mode_locked(state, newstate);
812  	spin_unlock(&owner->so_lock);
813  
814  	if (!call_close) {
815  		nfs4_put_open_state(state);
816  		nfs4_put_state_owner(owner);
817  	} else
818  		nfs4_do_close(state, gfp_mask, wait);
819  }
820  
821  void nfs4_close_state(struct nfs4_state *state, fmode_t fmode)
822  {
823  	__nfs4_close(state, fmode, GFP_KERNEL, 0);
824  }
825  
826  void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode)
827  {
828  	__nfs4_close(state, fmode, GFP_KERNEL, 1);
829  }
830  
831  /*
832   * Search the state->lock_states for an existing lock_owner
833   * that is compatible with either of the given owners.
834   * If the second is non-zero, then the first refers to a Posix-lock
835   * owner (current->files) and the second refers to a flock/OFD
836   * owner (struct file*).  In that case, prefer a match for the first
837   * owner.
838   * If both sorts of locks are held on the one file we cannot know
839   * which stateid was intended to be used, so a "correct" choice cannot
840   * be made.  Failing that, a "consistent" choice is preferable.  The
841   * consistent choice we make is to prefer the first owner, that of a
842   * Posix lock.
843   */
844  static struct nfs4_lock_state *
845  __nfs4_find_lock_state(struct nfs4_state *state,
846  		       fl_owner_t fl_owner, fl_owner_t fl_owner2)
847  {
848  	struct nfs4_lock_state *pos, *ret = NULL;
849  	list_for_each_entry(pos, &state->lock_states, ls_locks) {
850  		if (pos->ls_owner == fl_owner) {
851  			ret = pos;
852  			break;
853  		}
854  		if (pos->ls_owner == fl_owner2)
855  			ret = pos;
856  	}
857  	if (ret)
858  		refcount_inc(&ret->ls_count);
859  	return ret;
860  }
861  
862  /*
863   * Return a compatible lock_state. If no initialized lock_state structure
864   * exists, return an uninitialized one.
865   *
866   */
867  static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
868  {
869  	struct nfs4_lock_state *lsp;
870  	struct nfs_server *server = state->owner->so_server;
871  
872  	lsp = kzalloc(sizeof(*lsp), GFP_KERNEL_ACCOUNT);
873  	if (lsp == NULL)
874  		return NULL;
875  	nfs4_init_seqid_counter(&lsp->ls_seqid);
876  	refcount_set(&lsp->ls_count, 1);
877  	lsp->ls_state = state;
878  	lsp->ls_owner = fl_owner;
879  	lsp->ls_seqid.owner_id = ida_alloc(&server->lockowner_id, GFP_KERNEL_ACCOUNT);
880  	if (lsp->ls_seqid.owner_id < 0)
881  		goto out_free;
882  	INIT_LIST_HEAD(&lsp->ls_locks);
883  	return lsp;
884  out_free:
885  	kfree(lsp);
886  	return NULL;
887  }
888  
889  void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp)
890  {
891  	ida_free(&server->lockowner_id, lsp->ls_seqid.owner_id);
892  	nfs4_destroy_seqid_counter(&lsp->ls_seqid);
893  	kfree(lsp);
894  }
895  
896  /*
897   * Return a compatible lock_state. If no initialized lock_state structure
898   * exists, return an uninitialized one.
899   *
900   */
901  static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
902  {
903  	struct nfs4_lock_state *lsp, *new = NULL;
904  
905  	for(;;) {
906  		spin_lock(&state->state_lock);
907  		lsp = __nfs4_find_lock_state(state, owner, NULL);
908  		if (lsp != NULL)
909  			break;
910  		if (new != NULL) {
911  			list_add(&new->ls_locks, &state->lock_states);
912  			set_bit(LK_STATE_IN_USE, &state->flags);
913  			lsp = new;
914  			new = NULL;
915  			break;
916  		}
917  		spin_unlock(&state->state_lock);
918  		new = nfs4_alloc_lock_state(state, owner);
919  		if (new == NULL)
920  			return NULL;
921  	}
922  	spin_unlock(&state->state_lock);
923  	if (new != NULL)
924  		nfs4_free_lock_state(state->owner->so_server, new);
925  	return lsp;
926  }
927  
928  /*
929   * Release reference to lock_state, and free it if we see that
930   * it is no longer in use
931   */
932  void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
933  {
934  	struct nfs_server *server;
935  	struct nfs4_state *state;
936  
937  	if (lsp == NULL)
938  		return;
939  	state = lsp->ls_state;
940  	if (!refcount_dec_and_lock(&lsp->ls_count, &state->state_lock))
941  		return;
942  	list_del(&lsp->ls_locks);
943  	if (list_empty(&state->lock_states))
944  		clear_bit(LK_STATE_IN_USE, &state->flags);
945  	spin_unlock(&state->state_lock);
946  	server = state->owner->so_server;
947  	if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) {
948  		struct nfs_client *clp = server->nfs_client;
949  
950  		clp->cl_mvops->free_lock_state(server, lsp);
951  	} else
952  		nfs4_free_lock_state(server, lsp);
953  }
954  
955  static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
956  {
957  	struct nfs4_lock_state *lsp = src->fl_u.nfs4_fl.owner;
958  
959  	dst->fl_u.nfs4_fl.owner = lsp;
960  	refcount_inc(&lsp->ls_count);
961  }
962  
963  static void nfs4_fl_release_lock(struct file_lock *fl)
964  {
965  	nfs4_put_lock_state(fl->fl_u.nfs4_fl.owner);
966  }
967  
968  static const struct file_lock_operations nfs4_fl_lock_ops = {
969  	.fl_copy_lock = nfs4_fl_copy_lock,
970  	.fl_release_private = nfs4_fl_release_lock,
971  };
972  
973  int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
974  {
975  	struct nfs4_lock_state *lsp;
976  
977  	if (fl->fl_ops != NULL)
978  		return 0;
979  	lsp = nfs4_get_lock_state(state, fl->fl_owner);
980  	if (lsp == NULL)
981  		return -ENOMEM;
982  	fl->fl_u.nfs4_fl.owner = lsp;
983  	fl->fl_ops = &nfs4_fl_lock_ops;
984  	return 0;
985  }
986  
987  static int nfs4_copy_lock_stateid(nfs4_stateid *dst,
988  		struct nfs4_state *state,
989  		const struct nfs_lock_context *l_ctx)
990  {
991  	struct nfs4_lock_state *lsp;
992  	fl_owner_t fl_owner, fl_flock_owner;
993  	int ret = -ENOENT;
994  
995  	if (l_ctx == NULL)
996  		goto out;
997  
998  	if (test_bit(LK_STATE_IN_USE, &state->flags) == 0)
999  		goto out;
1000  
1001  	fl_owner = l_ctx->lockowner;
1002  	fl_flock_owner = l_ctx->open_context->flock_owner;
1003  
1004  	spin_lock(&state->state_lock);
1005  	lsp = __nfs4_find_lock_state(state, fl_owner, fl_flock_owner);
1006  	if (lsp && test_bit(NFS_LOCK_LOST, &lsp->ls_flags))
1007  		ret = -EIO;
1008  	else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) {
1009  		nfs4_stateid_copy(dst, &lsp->ls_stateid);
1010  		ret = 0;
1011  	}
1012  	spin_unlock(&state->state_lock);
1013  	nfs4_put_lock_state(lsp);
1014  out:
1015  	return ret;
1016  }
1017  
1018  bool nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
1019  {
1020  	bool ret;
1021  	const nfs4_stateid *src;
1022  	int seq;
1023  
1024  	do {
1025  		ret = false;
1026  		src = &zero_stateid;
1027  		seq = read_seqbegin(&state->seqlock);
1028  		if (test_bit(NFS_OPEN_STATE, &state->flags)) {
1029  			src = &state->open_stateid;
1030  			ret = true;
1031  		}
1032  		nfs4_stateid_copy(dst, src);
1033  	} while (read_seqretry(&state->seqlock, seq));
1034  	return ret;
1035  }
1036  
1037  /*
1038   * Byte-range lock aware utility to initialize the stateid of read/write
1039   * requests.
1040   */
1041  int nfs4_select_rw_stateid(struct nfs4_state *state,
1042  		fmode_t fmode, const struct nfs_lock_context *l_ctx,
1043  		nfs4_stateid *dst, const struct cred **cred)
1044  {
1045  	int ret;
1046  
1047  	if (!nfs4_valid_open_stateid(state))
1048  		return -EIO;
1049  	if (cred != NULL)
1050  		*cred = NULL;
1051  	ret = nfs4_copy_lock_stateid(dst, state, l_ctx);
1052  	if (ret == -EIO)
1053  		/* A lost lock - don't even consider delegations */
1054  		goto out;
1055  	/* returns true if delegation stateid found and copied */
1056  	if (nfs4_copy_delegation_stateid(state->inode, fmode, dst, cred)) {
1057  		ret = 0;
1058  		goto out;
1059  	}
1060  	if (ret != -ENOENT)
1061  		/* nfs4_copy_delegation_stateid() didn't over-write
1062  		 * dst, so it still has the lock stateid which we now
1063  		 * choose to use.
1064  		 */
1065  		goto out;
1066  	ret = nfs4_copy_open_stateid(dst, state) ? 0 : -EAGAIN;
1067  out:
1068  	if (nfs_server_capable(state->inode, NFS_CAP_STATEID_NFSV41))
1069  		dst->seqid = 0;
1070  	return ret;
1071  }
1072  
1073  struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask)
1074  {
1075  	struct nfs_seqid *new;
1076  
1077  	new = kmalloc(sizeof(*new), gfp_mask);
1078  	if (new == NULL)
1079  		return ERR_PTR(-ENOMEM);
1080  	new->sequence = counter;
1081  	INIT_LIST_HEAD(&new->list);
1082  	new->task = NULL;
1083  	return new;
1084  }
1085  
1086  void nfs_release_seqid(struct nfs_seqid *seqid)
1087  {
1088  	struct nfs_seqid_counter *sequence;
1089  
1090  	if (seqid == NULL || list_empty(&seqid->list))
1091  		return;
1092  	sequence = seqid->sequence;
1093  	spin_lock(&sequence->lock);
1094  	list_del_init(&seqid->list);
1095  	if (!list_empty(&sequence->list)) {
1096  		struct nfs_seqid *next;
1097  
1098  		next = list_first_entry(&sequence->list,
1099  				struct nfs_seqid, list);
1100  		rpc_wake_up_queued_task(&sequence->wait, next->task);
1101  	}
1102  	spin_unlock(&sequence->lock);
1103  }
1104  
1105  void nfs_free_seqid(struct nfs_seqid *seqid)
1106  {
1107  	nfs_release_seqid(seqid);
1108  	kfree(seqid);
1109  }
1110  
1111  /*
1112   * Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or
1113   * failed with a seqid incrementing error -
1114   * see comments nfs4.h:seqid_mutating_error()
1115   */
1116  static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
1117  {
1118  	switch (status) {
1119  		case 0:
1120  			break;
1121  		case -NFS4ERR_BAD_SEQID:
1122  			if (seqid->sequence->flags & NFS_SEQID_CONFIRMED)
1123  				return;
1124  			pr_warn_ratelimited("NFS: v4 server returned a bad"
1125  					" sequence-id error on an"
1126  					" unconfirmed sequence %p!\n",
1127  					seqid->sequence);
1128  			return;
1129  		case -NFS4ERR_STALE_CLIENTID:
1130  		case -NFS4ERR_STALE_STATEID:
1131  		case -NFS4ERR_BAD_STATEID:
1132  		case -NFS4ERR_BADXDR:
1133  		case -NFS4ERR_RESOURCE:
1134  		case -NFS4ERR_NOFILEHANDLE:
1135  		case -NFS4ERR_MOVED:
1136  			/* Non-seqid mutating errors */
1137  			return;
1138  	}
1139  	/*
1140  	 * Note: no locking needed as we are guaranteed to be first
1141  	 * on the sequence list
1142  	 */
1143  	seqid->sequence->counter++;
1144  }
1145  
1146  void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
1147  {
1148  	struct nfs4_state_owner *sp;
1149  
1150  	if (seqid == NULL)
1151  		return;
1152  
1153  	sp = container_of(seqid->sequence, struct nfs4_state_owner, so_seqid);
1154  	if (status == -NFS4ERR_BAD_SEQID)
1155  		nfs4_reset_state_owner(sp);
1156  	if (!nfs4_has_session(sp->so_server->nfs_client))
1157  		nfs_increment_seqid(status, seqid);
1158  }
1159  
1160  /*
1161   * Increment the seqid if the LOCK/LOCKU succeeded, or
1162   * failed with a seqid incrementing error -
1163   * see comments nfs4.h:seqid_mutating_error()
1164   */
1165  void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid)
1166  {
1167  	if (seqid != NULL)
1168  		nfs_increment_seqid(status, seqid);
1169  }
1170  
1171  int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task)
1172  {
1173  	struct nfs_seqid_counter *sequence;
1174  	int status = 0;
1175  
1176  	if (seqid == NULL)
1177  		goto out;
1178  	sequence = seqid->sequence;
1179  	spin_lock(&sequence->lock);
1180  	seqid->task = task;
1181  	if (list_empty(&seqid->list))
1182  		list_add_tail(&seqid->list, &sequence->list);
1183  	if (list_first_entry(&sequence->list, struct nfs_seqid, list) == seqid)
1184  		goto unlock;
1185  	rpc_sleep_on(&sequence->wait, task, NULL);
1186  	status = -EAGAIN;
1187  unlock:
1188  	spin_unlock(&sequence->lock);
1189  out:
1190  	return status;
1191  }
1192  
1193  static int nfs4_run_state_manager(void *);
1194  
1195  static void nfs4_clear_state_manager_bit(struct nfs_client *clp)
1196  {
1197  	clear_and_wake_up_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
1198  	rpc_wake_up(&clp->cl_rpcwaitq);
1199  }
1200  
1201  /*
1202   * Schedule the nfs_client asynchronous state management routine
1203   */
1204  void nfs4_schedule_state_manager(struct nfs_client *clp)
1205  {
1206  	struct task_struct *task;
1207  	char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1];
1208  	struct rpc_clnt *cl = clp->cl_rpcclient;
1209  
1210  	while (cl != cl->cl_parent)
1211  		cl = cl->cl_parent;
1212  
1213  	set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
1214  	if (test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) != 0) {
1215  		wake_up_var(&clp->cl_state);
1216  		return;
1217  	}
1218  	set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
1219  	__module_get(THIS_MODULE);
1220  	refcount_inc(&clp->cl_count);
1221  
1222  	/* The rcu_read_lock() is not strictly necessary, as the state
1223  	 * manager is the only thread that ever changes the rpc_xprt
1224  	 * after it's initialized.  At this point, we're single threaded. */
1225  	rcu_read_lock();
1226  	snprintf(buf, sizeof(buf), "%s-manager",
1227  			rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
1228  	rcu_read_unlock();
1229  	task = kthread_run(nfs4_run_state_manager, clp, "%s", buf);
1230  	if (IS_ERR(task)) {
1231  		printk(KERN_ERR "%s: kthread_run: %ld\n",
1232  			__func__, PTR_ERR(task));
1233  		nfs4_clear_state_manager_bit(clp);
1234  		clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
1235  		nfs_put_client(clp);
1236  		module_put(THIS_MODULE);
1237  	}
1238  }
1239  
1240  /*
1241   * Schedule a lease recovery attempt
1242   */
1243  void nfs4_schedule_lease_recovery(struct nfs_client *clp)
1244  {
1245  	if (!clp)
1246  		return;
1247  	if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
1248  		set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
1249  	dprintk("%s: scheduling lease recovery for server %s\n", __func__,
1250  			clp->cl_hostname);
1251  	nfs4_schedule_state_manager(clp);
1252  }
1253  EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery);
1254  
1255  /**
1256   * nfs4_schedule_migration_recovery - trigger migration recovery
1257   *
1258   * @server: FSID that is migrating
1259   *
1260   * Returns zero if recovery has started, otherwise a negative NFS4ERR
1261   * value is returned.
1262   */
1263  int nfs4_schedule_migration_recovery(const struct nfs_server *server)
1264  {
1265  	struct nfs_client *clp = server->nfs_client;
1266  
1267  	if (server->fh_expire_type != NFS4_FH_PERSISTENT) {
1268  		pr_err("NFS: volatile file handles not supported (server %s)\n",
1269  				clp->cl_hostname);
1270  		return -NFS4ERR_IO;
1271  	}
1272  
1273  	if (test_bit(NFS_MIG_FAILED, &server->mig_status))
1274  		return -NFS4ERR_IO;
1275  
1276  	dprintk("%s: scheduling migration recovery for (%llx:%llx) on %s\n",
1277  			__func__,
1278  			(unsigned long long)server->fsid.major,
1279  			(unsigned long long)server->fsid.minor,
1280  			clp->cl_hostname);
1281  
1282  	set_bit(NFS_MIG_IN_TRANSITION,
1283  			&((struct nfs_server *)server)->mig_status);
1284  	set_bit(NFS4CLNT_MOVED, &clp->cl_state);
1285  
1286  	nfs4_schedule_state_manager(clp);
1287  	return 0;
1288  }
1289  EXPORT_SYMBOL_GPL(nfs4_schedule_migration_recovery);
1290  
1291  /**
1292   * nfs4_schedule_lease_moved_recovery - start lease-moved recovery
1293   *
1294   * @clp: server to check for moved leases
1295   *
1296   */
1297  void nfs4_schedule_lease_moved_recovery(struct nfs_client *clp)
1298  {
1299  	dprintk("%s: scheduling lease-moved recovery for client ID %llx on %s\n",
1300  		__func__, clp->cl_clientid, clp->cl_hostname);
1301  
1302  	set_bit(NFS4CLNT_LEASE_MOVED, &clp->cl_state);
1303  	nfs4_schedule_state_manager(clp);
1304  }
1305  EXPORT_SYMBOL_GPL(nfs4_schedule_lease_moved_recovery);
1306  
1307  int nfs4_wait_clnt_recover(struct nfs_client *clp)
1308  {
1309  	int res;
1310  
1311  	might_sleep();
1312  
1313  	refcount_inc(&clp->cl_count);
1314  	res = wait_on_bit_action(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
1315  				 nfs_wait_bit_killable,
1316  				 TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
1317  	if (res)
1318  		goto out;
1319  	if (clp->cl_cons_state < 0)
1320  		res = clp->cl_cons_state;
1321  out:
1322  	nfs_put_client(clp);
1323  	return res;
1324  }
1325  
1326  int nfs4_client_recover_expired_lease(struct nfs_client *clp)
1327  {
1328  	unsigned int loop;
1329  	int ret;
1330  
1331  	for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) {
1332  		ret = nfs4_wait_clnt_recover(clp);
1333  		if (ret != 0)
1334  			break;
1335  		if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
1336  		    !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state))
1337  			break;
1338  		nfs4_schedule_state_manager(clp);
1339  		ret = -EIO;
1340  	}
1341  	return ret;
1342  }
1343  
1344  /*
1345   * nfs40_handle_cb_pathdown - return all delegations after NFS4ERR_CB_PATH_DOWN
1346   * @clp: client to process
1347   *
1348   * Set the NFS4CLNT_LEASE_EXPIRED state in order to force a
1349   * resend of the SETCLIENTID and hence re-establish the
1350   * callback channel. Then return all existing delegations.
1351   */
1352  static void nfs40_handle_cb_pathdown(struct nfs_client *clp)
1353  {
1354  	set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1355  	nfs_expire_all_delegations(clp);
1356  	dprintk("%s: handling CB_PATHDOWN recovery for server %s\n", __func__,
1357  			clp->cl_hostname);
1358  }
1359  
1360  void nfs4_schedule_path_down_recovery(struct nfs_client *clp)
1361  {
1362  	nfs40_handle_cb_pathdown(clp);
1363  	nfs4_schedule_state_manager(clp);
1364  }
1365  
1366  static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
1367  {
1368  
1369  	if (!nfs4_valid_open_stateid(state))
1370  		return 0;
1371  	set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
1372  	/* Don't recover state that expired before the reboot */
1373  	if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags)) {
1374  		clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
1375  		return 0;
1376  	}
1377  	set_bit(NFS_OWNER_RECLAIM_REBOOT, &state->owner->so_flags);
1378  	set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
1379  	return 1;
1380  }
1381  
1382  int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
1383  {
1384  	if (!nfs4_valid_open_stateid(state))
1385  		return 0;
1386  	set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags);
1387  	clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
1388  	set_bit(NFS_OWNER_RECLAIM_NOGRACE, &state->owner->so_flags);
1389  	set_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state);
1390  	return 1;
1391  }
1392  
1393  int nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state)
1394  {
1395  	struct nfs_client *clp = server->nfs_client;
1396  
1397  	if (!nfs4_state_mark_reclaim_nograce(clp, state))
1398  		return -EBADF;
1399  	nfs_inode_find_delegation_state_and_recover(state->inode,
1400  			&state->stateid);
1401  	dprintk("%s: scheduling stateid recovery for server %s\n", __func__,
1402  			clp->cl_hostname);
1403  	nfs4_schedule_state_manager(clp);
1404  	return 0;
1405  }
1406  EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery);
1407  
1408  static struct nfs4_lock_state *
1409  nfs_state_find_lock_state_by_stateid(struct nfs4_state *state,
1410  		const nfs4_stateid *stateid)
1411  {
1412  	struct nfs4_lock_state *pos;
1413  
1414  	list_for_each_entry(pos, &state->lock_states, ls_locks) {
1415  		if (!test_bit(NFS_LOCK_INITIALIZED, &pos->ls_flags))
1416  			continue;
1417  		if (nfs4_stateid_match_or_older(&pos->ls_stateid, stateid))
1418  			return pos;
1419  	}
1420  	return NULL;
1421  }
1422  
1423  static bool nfs_state_lock_state_matches_stateid(struct nfs4_state *state,
1424  		const nfs4_stateid *stateid)
1425  {
1426  	bool found = false;
1427  
1428  	if (test_bit(LK_STATE_IN_USE, &state->flags)) {
1429  		spin_lock(&state->state_lock);
1430  		if (nfs_state_find_lock_state_by_stateid(state, stateid))
1431  			found = true;
1432  		spin_unlock(&state->state_lock);
1433  	}
1434  	return found;
1435  }
1436  
1437  void nfs_inode_find_state_and_recover(struct inode *inode,
1438  		const nfs4_stateid *stateid)
1439  {
1440  	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
1441  	struct nfs_inode *nfsi = NFS_I(inode);
1442  	struct nfs_open_context *ctx;
1443  	struct nfs4_state *state;
1444  	bool found = false;
1445  
1446  	rcu_read_lock();
1447  	list_for_each_entry_rcu(ctx, &nfsi->open_files, list) {
1448  		state = ctx->state;
1449  		if (state == NULL)
1450  			continue;
1451  		if (nfs4_stateid_match_or_older(&state->stateid, stateid) &&
1452  		    nfs4_state_mark_reclaim_nograce(clp, state)) {
1453  			found = true;
1454  			continue;
1455  		}
1456  		if (test_bit(NFS_OPEN_STATE, &state->flags) &&
1457  		    nfs4_stateid_match_or_older(&state->open_stateid, stateid) &&
1458  		    nfs4_state_mark_reclaim_nograce(clp, state)) {
1459  			found = true;
1460  			continue;
1461  		}
1462  		if (nfs_state_lock_state_matches_stateid(state, stateid) &&
1463  		    nfs4_state_mark_reclaim_nograce(clp, state))
1464  			found = true;
1465  	}
1466  	rcu_read_unlock();
1467  
1468  	nfs_inode_find_delegation_state_and_recover(inode, stateid);
1469  	if (found)
1470  		nfs4_schedule_state_manager(clp);
1471  }
1472  
1473  static void nfs4_state_mark_open_context_bad(struct nfs4_state *state, int err)
1474  {
1475  	struct inode *inode = state->inode;
1476  	struct nfs_inode *nfsi = NFS_I(inode);
1477  	struct nfs_open_context *ctx;
1478  
1479  	rcu_read_lock();
1480  	list_for_each_entry_rcu(ctx, &nfsi->open_files, list) {
1481  		if (ctx->state != state)
1482  			continue;
1483  		set_bit(NFS_CONTEXT_BAD, &ctx->flags);
1484  		pr_warn("NFSv4: state recovery failed for open file %pd2, "
1485  				"error = %d\n", ctx->dentry, err);
1486  	}
1487  	rcu_read_unlock();
1488  }
1489  
1490  static void nfs4_state_mark_recovery_failed(struct nfs4_state *state, int error)
1491  {
1492  	set_bit(NFS_STATE_RECOVERY_FAILED, &state->flags);
1493  	nfs4_state_mark_open_context_bad(state, error);
1494  }
1495  
1496  
1497  static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops)
1498  {
1499  	struct inode *inode = state->inode;
1500  	struct nfs_inode *nfsi = NFS_I(inode);
1501  	struct file_lock *fl;
1502  	struct nfs4_lock_state *lsp;
1503  	int status = 0;
1504  	struct file_lock_context *flctx = inode->i_flctx;
1505  	struct list_head *list;
1506  
1507  	if (flctx == NULL)
1508  		return 0;
1509  
1510  	list = &flctx->flc_posix;
1511  
1512  	/* Guard against delegation returns and new lock/unlock calls */
1513  	down_write(&nfsi->rwsem);
1514  	spin_lock(&flctx->flc_lock);
1515  restart:
1516  	list_for_each_entry(fl, list, fl_list) {
1517  		if (nfs_file_open_context(fl->fl_file)->state != state)
1518  			continue;
1519  		spin_unlock(&flctx->flc_lock);
1520  		status = ops->recover_lock(state, fl);
1521  		switch (status) {
1522  		case 0:
1523  			break;
1524  		case -ETIMEDOUT:
1525  		case -ESTALE:
1526  		case -NFS4ERR_ADMIN_REVOKED:
1527  		case -NFS4ERR_STALE_STATEID:
1528  		case -NFS4ERR_BAD_STATEID:
1529  		case -NFS4ERR_EXPIRED:
1530  		case -NFS4ERR_NO_GRACE:
1531  		case -NFS4ERR_STALE_CLIENTID:
1532  		case -NFS4ERR_BADSESSION:
1533  		case -NFS4ERR_BADSLOT:
1534  		case -NFS4ERR_BAD_HIGH_SLOT:
1535  		case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1536  			goto out;
1537  		default:
1538  			pr_err("NFS: %s: unhandled error %d\n",
1539  					__func__, status);
1540  			fallthrough;
1541  		case -ENOMEM:
1542  		case -NFS4ERR_DENIED:
1543  		case -NFS4ERR_RECLAIM_BAD:
1544  		case -NFS4ERR_RECLAIM_CONFLICT:
1545  			lsp = fl->fl_u.nfs4_fl.owner;
1546  			if (lsp)
1547  				set_bit(NFS_LOCK_LOST, &lsp->ls_flags);
1548  			status = 0;
1549  		}
1550  		spin_lock(&flctx->flc_lock);
1551  	}
1552  	if (list == &flctx->flc_posix) {
1553  		list = &flctx->flc_flock;
1554  		goto restart;
1555  	}
1556  	spin_unlock(&flctx->flc_lock);
1557  out:
1558  	up_write(&nfsi->rwsem);
1559  	return status;
1560  }
1561  
1562  #ifdef CONFIG_NFS_V4_2
1563  static void nfs42_complete_copies(struct nfs4_state_owner *sp, struct nfs4_state *state)
1564  {
1565  	struct nfs4_copy_state *copy;
1566  
1567  	if (!test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags) &&
1568  		!test_bit(NFS_CLNT_SRC_SSC_COPY_STATE, &state->flags))
1569  		return;
1570  
1571  	spin_lock(&sp->so_server->nfs_client->cl_lock);
1572  	list_for_each_entry(copy, &sp->so_server->ss_copies, copies) {
1573  		if ((test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags) &&
1574  				!nfs4_stateid_match_other(&state->stateid,
1575  				&copy->parent_dst_state->stateid)))
1576  				continue;
1577  		copy->flags = 1;
1578  		if (test_and_clear_bit(NFS_CLNT_DST_SSC_COPY_STATE,
1579  				&state->flags)) {
1580  			clear_bit(NFS_CLNT_SRC_SSC_COPY_STATE, &state->flags);
1581  			complete(&copy->completion);
1582  		}
1583  	}
1584  	list_for_each_entry(copy, &sp->so_server->ss_copies, src_copies) {
1585  		if ((test_bit(NFS_CLNT_SRC_SSC_COPY_STATE, &state->flags) &&
1586  				!nfs4_stateid_match_other(&state->stateid,
1587  				&copy->parent_src_state->stateid)))
1588  				continue;
1589  		copy->flags = 1;
1590  		if (test_and_clear_bit(NFS_CLNT_DST_SSC_COPY_STATE,
1591  				&state->flags))
1592  			complete(&copy->completion);
1593  	}
1594  	spin_unlock(&sp->so_server->nfs_client->cl_lock);
1595  }
1596  #else /* !CONFIG_NFS_V4_2 */
1597  static inline void nfs42_complete_copies(struct nfs4_state_owner *sp,
1598  					 struct nfs4_state *state)
1599  {
1600  }
1601  #endif /* CONFIG_NFS_V4_2 */
1602  
1603  static int __nfs4_reclaim_open_state(struct nfs4_state_owner *sp, struct nfs4_state *state,
1604  				     const struct nfs4_state_recovery_ops *ops,
1605  				     int *lost_locks)
1606  {
1607  	struct nfs4_lock_state *lock;
1608  	int status;
1609  
1610  	status = ops->recover_open(sp, state);
1611  	if (status < 0)
1612  		return status;
1613  
1614  	status = nfs4_reclaim_locks(state, ops);
1615  	if (status < 0)
1616  		return status;
1617  
1618  	if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) {
1619  		spin_lock(&state->state_lock);
1620  		list_for_each_entry(lock, &state->lock_states, ls_locks) {
1621  			trace_nfs4_state_lock_reclaim(state, lock);
1622  			if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags))
1623  				*lost_locks += 1;
1624  		}
1625  		spin_unlock(&state->state_lock);
1626  	}
1627  
1628  	nfs42_complete_copies(sp, state);
1629  	clear_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags);
1630  	return status;
1631  }
1632  
1633  static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp,
1634  				   const struct nfs4_state_recovery_ops *ops,
1635  				   int *lost_locks)
1636  {
1637  	struct nfs4_state *state;
1638  	unsigned int loop = 0;
1639  	int status = 0;
1640  #ifdef CONFIG_NFS_V4_2
1641  	bool found_ssc_copy_state = false;
1642  #endif /* CONFIG_NFS_V4_2 */
1643  
1644  	/* Note: we rely on the sp->so_states list being ordered
1645  	 * so that we always reclaim open(O_RDWR) and/or open(O_WRITE)
1646  	 * states first.
1647  	 * This is needed to ensure that the server won't give us any
1648  	 * read delegations that we have to return if, say, we are
1649  	 * recovering after a network partition or a reboot from a
1650  	 * server that doesn't support a grace period.
1651  	 */
1652  	spin_lock(&sp->so_lock);
1653  	raw_write_seqcount_begin(&sp->so_reclaim_seqcount);
1654  restart:
1655  	list_for_each_entry(state, &sp->so_states, open_states) {
1656  		if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
1657  			continue;
1658  		if (!nfs4_valid_open_stateid(state))
1659  			continue;
1660  		if (state->state == 0)
1661  			continue;
1662  #ifdef CONFIG_NFS_V4_2
1663  		if (test_bit(NFS_SRV_SSC_COPY_STATE, &state->flags)) {
1664  			nfs4_state_mark_recovery_failed(state, -EIO);
1665  			found_ssc_copy_state = true;
1666  			continue;
1667  		}
1668  #endif /* CONFIG_NFS_V4_2 */
1669  		refcount_inc(&state->count);
1670  		spin_unlock(&sp->so_lock);
1671  		status = __nfs4_reclaim_open_state(sp, state, ops, lost_locks);
1672  
1673  		switch (status) {
1674  		default:
1675  			if (status >= 0) {
1676  				loop = 0;
1677  				break;
1678  			}
1679  			printk(KERN_ERR "NFS: %s: unhandled error %d\n", __func__, status);
1680  			fallthrough;
1681  		case -ENOENT:
1682  		case -ENOMEM:
1683  		case -EACCES:
1684  		case -EROFS:
1685  		case -EIO:
1686  		case -ESTALE:
1687  			/* Open state on this file cannot be recovered */
1688  			nfs4_state_mark_recovery_failed(state, status);
1689  			break;
1690  		case -EAGAIN:
1691  			ssleep(1);
1692  			if (loop++ < 10) {
1693  				set_bit(ops->state_flag_bit, &state->flags);
1694  				break;
1695  			}
1696  			fallthrough;
1697  		case -NFS4ERR_ADMIN_REVOKED:
1698  		case -NFS4ERR_STALE_STATEID:
1699  		case -NFS4ERR_OLD_STATEID:
1700  		case -NFS4ERR_BAD_STATEID:
1701  		case -NFS4ERR_RECLAIM_BAD:
1702  		case -NFS4ERR_RECLAIM_CONFLICT:
1703  			nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
1704  			break;
1705  		case -NFS4ERR_EXPIRED:
1706  		case -NFS4ERR_NO_GRACE:
1707  			nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
1708  			fallthrough;
1709  		case -NFS4ERR_STALE_CLIENTID:
1710  		case -NFS4ERR_BADSESSION:
1711  		case -NFS4ERR_BADSLOT:
1712  		case -NFS4ERR_BAD_HIGH_SLOT:
1713  		case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1714  		case -ETIMEDOUT:
1715  			goto out_err;
1716  		}
1717  		nfs4_put_open_state(state);
1718  		spin_lock(&sp->so_lock);
1719  		goto restart;
1720  	}
1721  	raw_write_seqcount_end(&sp->so_reclaim_seqcount);
1722  	spin_unlock(&sp->so_lock);
1723  #ifdef CONFIG_NFS_V4_2
1724  	if (found_ssc_copy_state)
1725  		return -EIO;
1726  #endif /* CONFIG_NFS_V4_2 */
1727  	return 0;
1728  out_err:
1729  	nfs4_put_open_state(state);
1730  	spin_lock(&sp->so_lock);
1731  	raw_write_seqcount_end(&sp->so_reclaim_seqcount);
1732  	spin_unlock(&sp->so_lock);
1733  	return status;
1734  }
1735  
1736  static void nfs4_clear_open_state(struct nfs4_state *state)
1737  {
1738  	struct nfs4_lock_state *lock;
1739  
1740  	clear_bit(NFS_DELEGATED_STATE, &state->flags);
1741  	clear_bit(NFS_O_RDONLY_STATE, &state->flags);
1742  	clear_bit(NFS_O_WRONLY_STATE, &state->flags);
1743  	clear_bit(NFS_O_RDWR_STATE, &state->flags);
1744  	spin_lock(&state->state_lock);
1745  	list_for_each_entry(lock, &state->lock_states, ls_locks) {
1746  		lock->ls_seqid.flags = 0;
1747  		clear_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags);
1748  	}
1749  	spin_unlock(&state->state_lock);
1750  }
1751  
1752  static void nfs4_reset_seqids(struct nfs_server *server,
1753  	int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state))
1754  {
1755  	struct nfs_client *clp = server->nfs_client;
1756  	struct nfs4_state_owner *sp;
1757  	struct rb_node *pos;
1758  	struct nfs4_state *state;
1759  
1760  	spin_lock(&clp->cl_lock);
1761  	for (pos = rb_first(&server->state_owners);
1762  	     pos != NULL;
1763  	     pos = rb_next(pos)) {
1764  		sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
1765  		sp->so_seqid.flags = 0;
1766  		spin_lock(&sp->so_lock);
1767  		list_for_each_entry(state, &sp->so_states, open_states) {
1768  			if (mark_reclaim(clp, state))
1769  				nfs4_clear_open_state(state);
1770  		}
1771  		spin_unlock(&sp->so_lock);
1772  	}
1773  	spin_unlock(&clp->cl_lock);
1774  }
1775  
1776  static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp,
1777  	int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state))
1778  {
1779  	struct nfs_server *server;
1780  
1781  	rcu_read_lock();
1782  	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
1783  		nfs4_reset_seqids(server, mark_reclaim);
1784  	rcu_read_unlock();
1785  }
1786  
1787  static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp)
1788  {
1789  	set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
1790  	/* Mark all delegations for reclaim */
1791  	nfs_delegation_mark_reclaim(clp);
1792  	nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_reboot);
1793  }
1794  
1795  static int nfs4_reclaim_complete(struct nfs_client *clp,
1796  				 const struct nfs4_state_recovery_ops *ops,
1797  				 const struct cred *cred)
1798  {
1799  	/* Notify the server we're done reclaiming our state */
1800  	if (ops->reclaim_complete)
1801  		return ops->reclaim_complete(clp, cred);
1802  	return 0;
1803  }
1804  
1805  static void nfs4_clear_reclaim_server(struct nfs_server *server)
1806  {
1807  	struct nfs_client *clp = server->nfs_client;
1808  	struct nfs4_state_owner *sp;
1809  	struct rb_node *pos;
1810  	struct nfs4_state *state;
1811  
1812  	spin_lock(&clp->cl_lock);
1813  	for (pos = rb_first(&server->state_owners);
1814  	     pos != NULL;
1815  	     pos = rb_next(pos)) {
1816  		sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
1817  		spin_lock(&sp->so_lock);
1818  		list_for_each_entry(state, &sp->so_states, open_states) {
1819  			if (!test_and_clear_bit(NFS_STATE_RECLAIM_REBOOT,
1820  						&state->flags))
1821  				continue;
1822  			nfs4_state_mark_reclaim_nograce(clp, state);
1823  		}
1824  		spin_unlock(&sp->so_lock);
1825  	}
1826  	spin_unlock(&clp->cl_lock);
1827  }
1828  
1829  static int nfs4_state_clear_reclaim_reboot(struct nfs_client *clp)
1830  {
1831  	struct nfs_server *server;
1832  
1833  	if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
1834  		return 0;
1835  
1836  	rcu_read_lock();
1837  	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
1838  		nfs4_clear_reclaim_server(server);
1839  	rcu_read_unlock();
1840  
1841  	nfs_delegation_reap_unclaimed(clp);
1842  	return 1;
1843  }
1844  
1845  static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
1846  {
1847  	const struct nfs4_state_recovery_ops *ops;
1848  	const struct cred *cred;
1849  	int err;
1850  
1851  	if (!nfs4_state_clear_reclaim_reboot(clp))
1852  		return;
1853  	ops = clp->cl_mvops->reboot_recovery_ops;
1854  	cred = nfs4_get_clid_cred(clp);
1855  	err = nfs4_reclaim_complete(clp, ops, cred);
1856  	put_cred(cred);
1857  	if (err == -NFS4ERR_CONN_NOT_BOUND_TO_SESSION)
1858  		set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
1859  }
1860  
1861  static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp)
1862  {
1863  	nfs_mark_test_expired_all_delegations(clp);
1864  	nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce);
1865  }
1866  
1867  static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1868  {
1869  	switch (error) {
1870  	case 0:
1871  		break;
1872  	case -NFS4ERR_CB_PATH_DOWN:
1873  		nfs40_handle_cb_pathdown(clp);
1874  		break;
1875  	case -NFS4ERR_NO_GRACE:
1876  		nfs4_state_end_reclaim_reboot(clp);
1877  		break;
1878  	case -NFS4ERR_STALE_CLIENTID:
1879  		set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1880  		nfs4_state_start_reclaim_reboot(clp);
1881  		break;
1882  	case -NFS4ERR_EXPIRED:
1883  		set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1884  		nfs4_state_start_reclaim_nograce(clp);
1885  		break;
1886  	case -NFS4ERR_BADSESSION:
1887  	case -NFS4ERR_BADSLOT:
1888  	case -NFS4ERR_BAD_HIGH_SLOT:
1889  	case -NFS4ERR_DEADSESSION:
1890  	case -NFS4ERR_SEQ_FALSE_RETRY:
1891  	case -NFS4ERR_SEQ_MISORDERED:
1892  		set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
1893  		/* Zero session reset errors */
1894  		break;
1895  	case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1896  		set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
1897  		break;
1898  	default:
1899  		dprintk("%s: failed to handle error %d for server %s\n",
1900  				__func__, error, clp->cl_hostname);
1901  		return error;
1902  	}
1903  	dprintk("%s: handled error %d for server %s\n", __func__, error,
1904  			clp->cl_hostname);
1905  	return 0;
1906  }
1907  
1908  static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops)
1909  {
1910  	struct nfs4_state_owner *sp;
1911  	struct nfs_server *server;
1912  	struct rb_node *pos;
1913  	LIST_HEAD(freeme);
1914  	int status = 0;
1915  	int lost_locks = 0;
1916  
1917  restart:
1918  	rcu_read_lock();
1919  	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
1920  		nfs4_purge_state_owners(server, &freeme);
1921  		spin_lock(&clp->cl_lock);
1922  		for (pos = rb_first(&server->state_owners);
1923  		     pos != NULL;
1924  		     pos = rb_next(pos)) {
1925  			sp = rb_entry(pos,
1926  				struct nfs4_state_owner, so_server_node);
1927  			if (!test_and_clear_bit(ops->owner_flag_bit,
1928  							&sp->so_flags))
1929  				continue;
1930  			if (!atomic_inc_not_zero(&sp->so_count))
1931  				continue;
1932  			spin_unlock(&clp->cl_lock);
1933  			rcu_read_unlock();
1934  
1935  			status = nfs4_reclaim_open_state(sp, ops, &lost_locks);
1936  			if (status < 0) {
1937  				if (lost_locks)
1938  					pr_warn("NFS: %s: lost %d locks\n",
1939  						clp->cl_hostname, lost_locks);
1940  				set_bit(ops->owner_flag_bit, &sp->so_flags);
1941  				nfs4_put_state_owner(sp);
1942  				status = nfs4_recovery_handle_error(clp, status);
1943  				return (status != 0) ? status : -EAGAIN;
1944  			}
1945  
1946  			nfs4_put_state_owner(sp);
1947  			goto restart;
1948  		}
1949  		spin_unlock(&clp->cl_lock);
1950  	}
1951  	rcu_read_unlock();
1952  	nfs4_free_state_owners(&freeme);
1953  	if (lost_locks)
1954  		pr_warn("NFS: %s: lost %d locks\n",
1955  			clp->cl_hostname, lost_locks);
1956  	return 0;
1957  }
1958  
1959  static int nfs4_check_lease(struct nfs_client *clp)
1960  {
1961  	const struct cred *cred;
1962  	const struct nfs4_state_maintenance_ops *ops =
1963  		clp->cl_mvops->state_renewal_ops;
1964  	int status;
1965  
1966  	/* Is the client already known to have an expired lease? */
1967  	if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
1968  		return 0;
1969  	cred = ops->get_state_renewal_cred(clp);
1970  	if (cred == NULL) {
1971  		cred = nfs4_get_clid_cred(clp);
1972  		status = -ENOKEY;
1973  		if (cred == NULL)
1974  			goto out;
1975  	}
1976  	status = ops->renew_lease(clp, cred);
1977  	put_cred(cred);
1978  	if (status == -ETIMEDOUT) {
1979  		set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
1980  		return 0;
1981  	}
1982  out:
1983  	return nfs4_recovery_handle_error(clp, status);
1984  }
1985  
1986  /* Set NFS4CLNT_LEASE_EXPIRED and reclaim reboot state for all v4.0 errors
1987   * and for recoverable errors on EXCHANGE_ID for v4.1
1988   */
1989  static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status)
1990  {
1991  	switch (status) {
1992  	case -NFS4ERR_SEQ_MISORDERED:
1993  		if (test_and_set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state))
1994  			return -ESERVERFAULT;
1995  		/* Lease confirmation error: retry after purging the lease */
1996  		ssleep(1);
1997  		clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
1998  		break;
1999  	case -NFS4ERR_STALE_CLIENTID:
2000  		clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
2001  		nfs4_state_start_reclaim_reboot(clp);
2002  		break;
2003  	case -NFS4ERR_CLID_INUSE:
2004  		pr_err("NFS: Server %s reports our clientid is in use\n",
2005  			clp->cl_hostname);
2006  		nfs_mark_client_ready(clp, -EPERM);
2007  		clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
2008  		return -EPERM;
2009  	case -EACCES:
2010  	case -NFS4ERR_DELAY:
2011  	case -EAGAIN:
2012  		ssleep(1);
2013  		break;
2014  
2015  	case -NFS4ERR_MINOR_VERS_MISMATCH:
2016  		if (clp->cl_cons_state == NFS_CS_SESSION_INITING)
2017  			nfs_mark_client_ready(clp, -EPROTONOSUPPORT);
2018  		dprintk("%s: exit with error %d for server %s\n",
2019  				__func__, -EPROTONOSUPPORT, clp->cl_hostname);
2020  		return -EPROTONOSUPPORT;
2021  	case -ENOSPC:
2022  		if (clp->cl_cons_state == NFS_CS_SESSION_INITING)
2023  			nfs_mark_client_ready(clp, -EIO);
2024  		return -EIO;
2025  	case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
2026  				 * in nfs4_exchange_id */
2027  	default:
2028  		dprintk("%s: exit with error %d for server %s\n", __func__,
2029  				status, clp->cl_hostname);
2030  		return status;
2031  	}
2032  	set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
2033  	dprintk("%s: handled error %d for server %s\n", __func__, status,
2034  			clp->cl_hostname);
2035  	return 0;
2036  }
2037  
2038  static int nfs4_establish_lease(struct nfs_client *clp)
2039  {
2040  	const struct cred *cred;
2041  	const struct nfs4_state_recovery_ops *ops =
2042  		clp->cl_mvops->reboot_recovery_ops;
2043  	int status;
2044  
2045  	status = nfs4_begin_drain_session(clp);
2046  	if (status != 0)
2047  		return status;
2048  	cred = nfs4_get_clid_cred(clp);
2049  	if (cred == NULL)
2050  		return -ENOENT;
2051  	status = ops->establish_clid(clp, cred);
2052  	put_cred(cred);
2053  	if (status != 0)
2054  		return status;
2055  	pnfs_destroy_all_layouts(clp);
2056  	return 0;
2057  }
2058  
2059  /*
2060   * Returns zero or a negative errno.  NFS4ERR values are converted
2061   * to local errno values.
2062   */
2063  static int nfs4_reclaim_lease(struct nfs_client *clp)
2064  {
2065  	int status;
2066  
2067  	status = nfs4_establish_lease(clp);
2068  	if (status < 0)
2069  		return nfs4_handle_reclaim_lease_error(clp, status);
2070  	if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state))
2071  		nfs4_state_start_reclaim_nograce(clp);
2072  	if (!test_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state))
2073  		set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
2074  	clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
2075  	clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
2076  	return 0;
2077  }
2078  
2079  static int nfs4_purge_lease(struct nfs_client *clp)
2080  {
2081  	int status;
2082  
2083  	status = nfs4_establish_lease(clp);
2084  	if (status < 0)
2085  		return nfs4_handle_reclaim_lease_error(clp, status);
2086  	clear_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
2087  	set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
2088  	nfs4_state_start_reclaim_nograce(clp);
2089  	return 0;
2090  }
2091  
2092  /*
2093   * Try remote migration of one FSID from a source server to a
2094   * destination server.  The source server provides a list of
2095   * potential destinations.
2096   *
2097   * Returns zero or a negative NFS4ERR status code.
2098   */
2099  static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred)
2100  {
2101  	struct nfs_client *clp = server->nfs_client;
2102  	struct nfs4_fs_locations *locations = NULL;
2103  	struct inode *inode;
2104  	struct page *page;
2105  	int status, result;
2106  
2107  	dprintk("--> %s: FSID %llx:%llx on \"%s\"\n", __func__,
2108  			(unsigned long long)server->fsid.major,
2109  			(unsigned long long)server->fsid.minor,
2110  			clp->cl_hostname);
2111  
2112  	result = 0;
2113  	page = alloc_page(GFP_KERNEL);
2114  	locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
2115  	if (page == NULL || locations == NULL) {
2116  		dprintk("<-- %s: no memory\n", __func__);
2117  		goto out;
2118  	}
2119  	locations->fattr = nfs_alloc_fattr();
2120  	if (locations->fattr == NULL) {
2121  		dprintk("<-- %s: no memory\n", __func__);
2122  		goto out;
2123  	}
2124  
2125  	inode = d_inode(server->super->s_root);
2126  	result = nfs4_proc_get_locations(server, NFS_FH(inode), locations,
2127  					 page, cred);
2128  	if (result) {
2129  		dprintk("<-- %s: failed to retrieve fs_locations: %d\n",
2130  			__func__, result);
2131  		goto out;
2132  	}
2133  
2134  	result = -NFS4ERR_NXIO;
2135  	if (!locations->nlocations)
2136  		goto out;
2137  
2138  	if (!(locations->fattr->valid & NFS_ATTR_FATTR_V4_LOCATIONS)) {
2139  		dprintk("<-- %s: No fs_locations data, migration skipped\n",
2140  			__func__);
2141  		goto out;
2142  	}
2143  
2144  	status = nfs4_begin_drain_session(clp);
2145  	if (status != 0) {
2146  		result = status;
2147  		goto out;
2148  	}
2149  
2150  	status = nfs4_replace_transport(server, locations);
2151  	if (status != 0) {
2152  		dprintk("<-- %s: failed to replace transport: %d\n",
2153  			__func__, status);
2154  		goto out;
2155  	}
2156  
2157  	result = 0;
2158  	dprintk("<-- %s: migration succeeded\n", __func__);
2159  
2160  out:
2161  	if (page != NULL)
2162  		__free_page(page);
2163  	if (locations != NULL)
2164  		kfree(locations->fattr);
2165  	kfree(locations);
2166  	if (result) {
2167  		pr_err("NFS: migration recovery failed (server %s)\n",
2168  				clp->cl_hostname);
2169  		set_bit(NFS_MIG_FAILED, &server->mig_status);
2170  	}
2171  	return result;
2172  }
2173  
2174  /*
2175   * Returns zero or a negative NFS4ERR status code.
2176   */
2177  static int nfs4_handle_migration(struct nfs_client *clp)
2178  {
2179  	const struct nfs4_state_maintenance_ops *ops =
2180  				clp->cl_mvops->state_renewal_ops;
2181  	struct nfs_server *server;
2182  	const struct cred *cred;
2183  
2184  	dprintk("%s: migration reported on \"%s\"\n", __func__,
2185  			clp->cl_hostname);
2186  
2187  	cred = ops->get_state_renewal_cred(clp);
2188  	if (cred == NULL)
2189  		return -NFS4ERR_NOENT;
2190  
2191  	clp->cl_mig_gen++;
2192  restart:
2193  	rcu_read_lock();
2194  	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
2195  		int status;
2196  
2197  		if (server->mig_gen == clp->cl_mig_gen)
2198  			continue;
2199  		server->mig_gen = clp->cl_mig_gen;
2200  
2201  		if (!test_and_clear_bit(NFS_MIG_IN_TRANSITION,
2202  						&server->mig_status))
2203  			continue;
2204  
2205  		rcu_read_unlock();
2206  		status = nfs4_try_migration(server, cred);
2207  		if (status < 0) {
2208  			put_cred(cred);
2209  			return status;
2210  		}
2211  		goto restart;
2212  	}
2213  	rcu_read_unlock();
2214  	put_cred(cred);
2215  	return 0;
2216  }
2217  
2218  /*
2219   * Test each nfs_server on the clp's cl_superblocks list to see
2220   * if it's moved to another server.  Stop when the server no longer
2221   * returns NFS4ERR_LEASE_MOVED.
2222   */
2223  static int nfs4_handle_lease_moved(struct nfs_client *clp)
2224  {
2225  	const struct nfs4_state_maintenance_ops *ops =
2226  				clp->cl_mvops->state_renewal_ops;
2227  	struct nfs_server *server;
2228  	const struct cred *cred;
2229  
2230  	dprintk("%s: lease moved reported on \"%s\"\n", __func__,
2231  			clp->cl_hostname);
2232  
2233  	cred = ops->get_state_renewal_cred(clp);
2234  	if (cred == NULL)
2235  		return -NFS4ERR_NOENT;
2236  
2237  	clp->cl_mig_gen++;
2238  restart:
2239  	rcu_read_lock();
2240  	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
2241  		struct inode *inode;
2242  		int status;
2243  
2244  		if (server->mig_gen == clp->cl_mig_gen)
2245  			continue;
2246  		server->mig_gen = clp->cl_mig_gen;
2247  
2248  		rcu_read_unlock();
2249  
2250  		inode = d_inode(server->super->s_root);
2251  		status = nfs4_proc_fsid_present(inode, cred);
2252  		if (status != -NFS4ERR_MOVED)
2253  			goto restart;	/* wasn't this one */
2254  		if (nfs4_try_migration(server, cred) == -NFS4ERR_LEASE_MOVED)
2255  			goto restart;	/* there are more */
2256  		goto out;
2257  	}
2258  	rcu_read_unlock();
2259  
2260  out:
2261  	put_cred(cred);
2262  	return 0;
2263  }
2264  
2265  /**
2266   * nfs4_discover_server_trunking - Detect server IP address trunking
2267   *
2268   * @clp: nfs_client under test
2269   * @result: OUT: found nfs_client, or clp
2270   *
2271   * Returns zero or a negative errno.  If zero is returned,
2272   * an nfs_client pointer is planted in "result".
2273   *
2274   * Note: since we are invoked in process context, and
2275   * not from inside the state manager, we cannot use
2276   * nfs4_handle_reclaim_lease_error().
2277   */
2278  int nfs4_discover_server_trunking(struct nfs_client *clp,
2279  				  struct nfs_client **result)
2280  {
2281  	const struct nfs4_state_recovery_ops *ops =
2282  				clp->cl_mvops->reboot_recovery_ops;
2283  	struct rpc_clnt *clnt;
2284  	const struct cred *cred;
2285  	int i, status;
2286  
2287  	dprintk("NFS: %s: testing '%s'\n", __func__, clp->cl_hostname);
2288  
2289  	clnt = clp->cl_rpcclient;
2290  	i = 0;
2291  
2292  	mutex_lock(&nfs_clid_init_mutex);
2293  again:
2294  	status  = -ENOENT;
2295  	cred = nfs4_get_clid_cred(clp);
2296  	if (cred == NULL)
2297  		goto out_unlock;
2298  
2299  	status = ops->detect_trunking(clp, result, cred);
2300  	put_cred(cred);
2301  	switch (status) {
2302  	case 0:
2303  	case -EINTR:
2304  	case -ERESTARTSYS:
2305  		break;
2306  	case -ETIMEDOUT:
2307  		if (clnt->cl_softrtry)
2308  			break;
2309  		fallthrough;
2310  	case -NFS4ERR_DELAY:
2311  	case -EAGAIN:
2312  		ssleep(1);
2313  		fallthrough;
2314  	case -NFS4ERR_STALE_CLIENTID:
2315  		dprintk("NFS: %s after status %d, retrying\n",
2316  			__func__, status);
2317  		goto again;
2318  	case -EACCES:
2319  		if (i++ == 0) {
2320  			nfs4_root_machine_cred(clp);
2321  			goto again;
2322  		}
2323  		if (clnt->cl_auth->au_flavor == RPC_AUTH_UNIX)
2324  			break;
2325  		fallthrough;
2326  	case -NFS4ERR_CLID_INUSE:
2327  	case -NFS4ERR_WRONGSEC:
2328  		/* No point in retrying if we already used RPC_AUTH_UNIX */
2329  		if (clnt->cl_auth->au_flavor == RPC_AUTH_UNIX) {
2330  			status = -EPERM;
2331  			break;
2332  		}
2333  		clnt = rpc_clone_client_set_auth(clnt, RPC_AUTH_UNIX);
2334  		if (IS_ERR(clnt)) {
2335  			status = PTR_ERR(clnt);
2336  			break;
2337  		}
2338  		/* Note: this is safe because we haven't yet marked the
2339  		 * client as ready, so we are the only user of
2340  		 * clp->cl_rpcclient
2341  		 */
2342  		clnt = xchg(&clp->cl_rpcclient, clnt);
2343  		rpc_shutdown_client(clnt);
2344  		clnt = clp->cl_rpcclient;
2345  		goto again;
2346  
2347  	case -NFS4ERR_MINOR_VERS_MISMATCH:
2348  		status = -EPROTONOSUPPORT;
2349  		break;
2350  
2351  	case -EKEYEXPIRED:
2352  	case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
2353  				 * in nfs4_exchange_id */
2354  		status = -EKEYEXPIRED;
2355  		break;
2356  	default:
2357  		pr_warn("NFS: %s unhandled error %d. Exiting with error EIO\n",
2358  				__func__, status);
2359  		status = -EIO;
2360  	}
2361  
2362  out_unlock:
2363  	mutex_unlock(&nfs_clid_init_mutex);
2364  	dprintk("NFS: %s: status = %d\n", __func__, status);
2365  	return status;
2366  }
2367  
2368  #ifdef CONFIG_NFS_V4_1
2369  void nfs4_schedule_session_recovery(struct nfs4_session *session, int err)
2370  {
2371  	struct nfs_client *clp = session->clp;
2372  
2373  	switch (err) {
2374  	default:
2375  		set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
2376  		break;
2377  	case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
2378  		set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
2379  	}
2380  	nfs4_schedule_state_manager(clp);
2381  }
2382  EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery);
2383  
2384  void nfs41_notify_server(struct nfs_client *clp)
2385  {
2386  	/* Use CHECK_LEASE to ping the server with a SEQUENCE */
2387  	set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
2388  	nfs4_schedule_state_manager(clp);
2389  }
2390  
2391  static void nfs4_reset_all_state(struct nfs_client *clp)
2392  {
2393  	if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
2394  		set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
2395  		clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
2396  		nfs4_state_start_reclaim_nograce(clp);
2397  		dprintk("%s: scheduling reset of all state for server %s!\n",
2398  				__func__, clp->cl_hostname);
2399  		nfs4_schedule_state_manager(clp);
2400  	}
2401  }
2402  
2403  static void nfs41_handle_server_reboot(struct nfs_client *clp)
2404  {
2405  	if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
2406  		nfs4_state_start_reclaim_reboot(clp);
2407  		dprintk("%s: server %s rebooted!\n", __func__,
2408  				clp->cl_hostname);
2409  		nfs4_schedule_state_manager(clp);
2410  	}
2411  }
2412  
2413  static void nfs41_handle_all_state_revoked(struct nfs_client *clp)
2414  {
2415  	nfs4_reset_all_state(clp);
2416  	dprintk("%s: state revoked on server %s\n", __func__, clp->cl_hostname);
2417  }
2418  
2419  static void nfs41_handle_some_state_revoked(struct nfs_client *clp)
2420  {
2421  	nfs4_state_start_reclaim_nograce(clp);
2422  	nfs4_schedule_state_manager(clp);
2423  
2424  	dprintk("%s: state revoked on server %s\n", __func__, clp->cl_hostname);
2425  }
2426  
2427  static void nfs41_handle_recallable_state_revoked(struct nfs_client *clp)
2428  {
2429  	/* FIXME: For now, we destroy all layouts. */
2430  	pnfs_destroy_all_layouts(clp);
2431  	nfs_test_expired_all_delegations(clp);
2432  	dprintk("%s: Recallable state revoked on server %s!\n", __func__,
2433  			clp->cl_hostname);
2434  }
2435  
2436  static void nfs41_handle_backchannel_fault(struct nfs_client *clp)
2437  {
2438  	set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
2439  	nfs4_schedule_state_manager(clp);
2440  
2441  	dprintk("%s: server %s declared a backchannel fault\n", __func__,
2442  			clp->cl_hostname);
2443  }
2444  
2445  static void nfs41_handle_cb_path_down(struct nfs_client *clp)
2446  {
2447  	if (test_and_set_bit(NFS4CLNT_BIND_CONN_TO_SESSION,
2448  		&clp->cl_state) == 0)
2449  		nfs4_schedule_state_manager(clp);
2450  }
2451  
2452  void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags,
2453  		bool recovery)
2454  {
2455  	if (!flags)
2456  		return;
2457  
2458  	dprintk("%s: \"%s\" (client ID %llx) flags=0x%08x\n",
2459  		__func__, clp->cl_hostname, clp->cl_clientid, flags);
2460  	/*
2461  	 * If we're called from the state manager thread, then assume we're
2462  	 * already handling the RECLAIM_NEEDED and/or STATE_REVOKED.
2463  	 * Those flags are expected to remain set until we're done
2464  	 * recovering (see RFC5661, section 18.46.3).
2465  	 */
2466  	if (recovery)
2467  		goto out_recovery;
2468  
2469  	if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED)
2470  		nfs41_handle_server_reboot(clp);
2471  	if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED))
2472  		nfs41_handle_all_state_revoked(clp);
2473  	if (flags & (SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED |
2474  			    SEQ4_STATUS_ADMIN_STATE_REVOKED))
2475  		nfs41_handle_some_state_revoked(clp);
2476  	if (flags & SEQ4_STATUS_LEASE_MOVED)
2477  		nfs4_schedule_lease_moved_recovery(clp);
2478  	if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED)
2479  		nfs41_handle_recallable_state_revoked(clp);
2480  out_recovery:
2481  	if (flags & SEQ4_STATUS_BACKCHANNEL_FAULT)
2482  		nfs41_handle_backchannel_fault(clp);
2483  	else if (flags & (SEQ4_STATUS_CB_PATH_DOWN |
2484  				SEQ4_STATUS_CB_PATH_DOWN_SESSION))
2485  		nfs41_handle_cb_path_down(clp);
2486  }
2487  
2488  static int nfs4_reset_session(struct nfs_client *clp)
2489  {
2490  	const struct cred *cred;
2491  	int status;
2492  
2493  	if (!nfs4_has_session(clp))
2494  		return 0;
2495  	status = nfs4_begin_drain_session(clp);
2496  	if (status != 0)
2497  		return status;
2498  	cred = nfs4_get_clid_cred(clp);
2499  	status = nfs4_proc_destroy_session(clp->cl_session, cred);
2500  	switch (status) {
2501  	case 0:
2502  	case -NFS4ERR_BADSESSION:
2503  	case -NFS4ERR_DEADSESSION:
2504  		break;
2505  	case -NFS4ERR_BACK_CHAN_BUSY:
2506  	case -NFS4ERR_DELAY:
2507  		set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
2508  		status = 0;
2509  		ssleep(1);
2510  		goto out;
2511  	default:
2512  		status = nfs4_recovery_handle_error(clp, status);
2513  		goto out;
2514  	}
2515  
2516  	memset(clp->cl_session->sess_id.data, 0, NFS4_MAX_SESSIONID_LEN);
2517  	status = nfs4_proc_create_session(clp, cred);
2518  	if (status) {
2519  		dprintk("%s: session reset failed with status %d for server %s!\n",
2520  			__func__, status, clp->cl_hostname);
2521  		status = nfs4_handle_reclaim_lease_error(clp, status);
2522  		goto out;
2523  	}
2524  	nfs41_finish_session_reset(clp);
2525  	dprintk("%s: session reset was successful for server %s!\n",
2526  			__func__, clp->cl_hostname);
2527  out:
2528  	put_cred(cred);
2529  	return status;
2530  }
2531  
2532  static int nfs4_bind_conn_to_session(struct nfs_client *clp)
2533  {
2534  	const struct cred *cred;
2535  	int ret;
2536  
2537  	if (!nfs4_has_session(clp))
2538  		return 0;
2539  	ret = nfs4_begin_drain_session(clp);
2540  	if (ret != 0)
2541  		return ret;
2542  	cred = nfs4_get_clid_cred(clp);
2543  	ret = nfs4_proc_bind_conn_to_session(clp, cred);
2544  	put_cred(cred);
2545  	clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
2546  	switch (ret) {
2547  	case 0:
2548  		dprintk("%s: bind_conn_to_session was successful for server %s!\n",
2549  			__func__, clp->cl_hostname);
2550  		break;
2551  	case -NFS4ERR_DELAY:
2552  		ssleep(1);
2553  		set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
2554  		break;
2555  	default:
2556  		return nfs4_recovery_handle_error(clp, ret);
2557  	}
2558  	return 0;
2559  }
2560  
2561  static void nfs4_layoutreturn_any_run(struct nfs_client *clp)
2562  {
2563  	int iomode = 0;
2564  
2565  	if (test_and_clear_bit(NFS4CLNT_RECALL_ANY_LAYOUT_READ, &clp->cl_state))
2566  		iomode += IOMODE_READ;
2567  	if (test_and_clear_bit(NFS4CLNT_RECALL_ANY_LAYOUT_RW, &clp->cl_state))
2568  		iomode += IOMODE_RW;
2569  	/* Note: IOMODE_READ + IOMODE_RW == IOMODE_ANY */
2570  	if (iomode) {
2571  		pnfs_layout_return_unused_byclid(clp, iomode);
2572  		set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
2573  	}
2574  }
2575  #else /* CONFIG_NFS_V4_1 */
2576  static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
2577  
2578  static int nfs4_bind_conn_to_session(struct nfs_client *clp)
2579  {
2580  	return 0;
2581  }
2582  
2583  static void nfs4_layoutreturn_any_run(struct nfs_client *clp)
2584  {
2585  }
2586  #endif /* CONFIG_NFS_V4_1 */
2587  
2588  static void nfs4_state_manager(struct nfs_client *clp)
2589  {
2590  	unsigned int memflags;
2591  	int status = 0;
2592  	const char *section = "", *section_sep = "";
2593  
2594  	/*
2595  	 * State recovery can deadlock if the direct reclaim code tries
2596  	 * start NFS writeback. So ensure memory allocations are all
2597  	 * GFP_NOFS.
2598  	 */
2599  	memflags = memalloc_nofs_save();
2600  
2601  	/* Ensure exclusive access to NFSv4 state */
2602  	do {
2603  		trace_nfs4_state_mgr(clp);
2604  		clear_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
2605  		if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) {
2606  			section = "purge state";
2607  			status = nfs4_purge_lease(clp);
2608  			if (status < 0)
2609  				goto out_error;
2610  			continue;
2611  		}
2612  
2613  		if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) {
2614  			section = "lease expired";
2615  			/* We're going to have to re-establish a clientid */
2616  			status = nfs4_reclaim_lease(clp);
2617  			if (status < 0)
2618  				goto out_error;
2619  			continue;
2620  		}
2621  
2622  		/* Initialize or reset the session */
2623  		if (test_and_clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) {
2624  			section = "reset session";
2625  			status = nfs4_reset_session(clp);
2626  			if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
2627  				continue;
2628  			if (status < 0)
2629  				goto out_error;
2630  		}
2631  
2632  		/* Send BIND_CONN_TO_SESSION */
2633  		if (test_and_clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION,
2634  				&clp->cl_state)) {
2635  			section = "bind conn to session";
2636  			status = nfs4_bind_conn_to_session(clp);
2637  			if (status < 0)
2638  				goto out_error;
2639  			continue;
2640  		}
2641  
2642  		if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) {
2643  			section = "check lease";
2644  			status = nfs4_check_lease(clp);
2645  			if (status < 0)
2646  				goto out_error;
2647  			continue;
2648  		}
2649  
2650  		if (test_and_clear_bit(NFS4CLNT_MOVED, &clp->cl_state)) {
2651  			section = "migration";
2652  			status = nfs4_handle_migration(clp);
2653  			if (status < 0)
2654  				goto out_error;
2655  		}
2656  
2657  		if (test_and_clear_bit(NFS4CLNT_LEASE_MOVED, &clp->cl_state)) {
2658  			section = "lease moved";
2659  			status = nfs4_handle_lease_moved(clp);
2660  			if (status < 0)
2661  				goto out_error;
2662  		}
2663  
2664  		/* First recover reboot state... */
2665  		if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {
2666  			section = "reclaim reboot";
2667  			status = nfs4_do_reclaim(clp,
2668  				clp->cl_mvops->reboot_recovery_ops);
2669  			if (status == -EAGAIN)
2670  				continue;
2671  			if (status < 0)
2672  				goto out_error;
2673  			nfs4_state_end_reclaim_reboot(clp);
2674  			continue;
2675  		}
2676  
2677  		/* Detect expired delegations... */
2678  		if (test_and_clear_bit(NFS4CLNT_DELEGATION_EXPIRED, &clp->cl_state)) {
2679  			section = "detect expired delegations";
2680  			nfs_reap_expired_delegations(clp);
2681  			continue;
2682  		}
2683  
2684  		/* Now recover expired state... */
2685  		if (test_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) {
2686  			section = "reclaim nograce";
2687  			status = nfs4_do_reclaim(clp,
2688  				clp->cl_mvops->nograce_recovery_ops);
2689  			if (status == -EAGAIN)
2690  				continue;
2691  			if (status < 0)
2692  				goto out_error;
2693  			clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state);
2694  		}
2695  
2696  		memalloc_nofs_restore(memflags);
2697  		nfs4_end_drain_session(clp);
2698  		nfs4_clear_state_manager_bit(clp);
2699  
2700  		if (!test_and_set_bit(NFS4CLNT_RECALL_RUNNING, &clp->cl_state)) {
2701  			if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) {
2702  				nfs_client_return_marked_delegations(clp);
2703  				set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
2704  			}
2705  			nfs4_layoutreturn_any_run(clp);
2706  			clear_bit(NFS4CLNT_RECALL_RUNNING, &clp->cl_state);
2707  		}
2708  
2709  		return;
2710  
2711  	} while (refcount_read(&clp->cl_count) > 1 && !signalled());
2712  	goto out_drain;
2713  
2714  out_error:
2715  	if (strlen(section))
2716  		section_sep = ": ";
2717  	trace_nfs4_state_mgr_failed(clp, section, status);
2718  	pr_warn_ratelimited("NFS: state manager%s%s failed on NFSv4 server %s"
2719  			" with error %d\n", section_sep, section,
2720  			clp->cl_hostname, -status);
2721  	ssleep(1);
2722  out_drain:
2723  	memalloc_nofs_restore(memflags);
2724  	nfs4_end_drain_session(clp);
2725  	nfs4_clear_state_manager_bit(clp);
2726  }
2727  
2728  static int nfs4_run_state_manager(void *ptr)
2729  {
2730  	struct nfs_client *clp = ptr;
2731  	struct rpc_clnt *cl = clp->cl_rpcclient;
2732  
2733  	while (cl != cl->cl_parent)
2734  		cl = cl->cl_parent;
2735  
2736  	allow_signal(SIGKILL);
2737  again:
2738  	set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
2739  	nfs4_state_manager(clp);
2740  	if (atomic_read(&cl->cl_swapper)) {
2741  		wait_var_event_interruptible(&clp->cl_state,
2742  					     test_bit(NFS4CLNT_RUN_MANAGER,
2743  						      &clp->cl_state));
2744  		if (atomic_read(&cl->cl_swapper) &&
2745  		    test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state))
2746  			goto again;
2747  		/* Either no longer a swapper, or were signalled */
2748  	}
2749  	clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
2750  
2751  	if (refcount_read(&clp->cl_count) > 1 && !signalled() &&
2752  	    test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state) &&
2753  	    !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state))
2754  		goto again;
2755  
2756  	nfs_put_client(clp);
2757  	module_put_and_kthread_exit(0);
2758  	return 0;
2759  }
2760