xref: /openbmc/linux/fs/nfsd/nfs4state.c (revision fd589a8f)
1 /*
2 *  linux/fs/nfsd/nfs4state.c
3 *
4 *  Copyright (c) 2001 The Regents of the University of Michigan.
5 *  All rights reserved.
6 *
7 *  Kendrick Smith <kmsmith@umich.edu>
8 *  Andy Adamson <kandros@umich.edu>
9 *
10 *  Redistribution and use in source and binary forms, with or without
11 *  modification, are permitted provided that the following conditions
12 *  are met:
13 *
14 *  1. Redistributions of source code must retain the above copyright
15 *     notice, this list of conditions and the following disclaimer.
16 *  2. Redistributions in binary form must reproduce the above copyright
17 *     notice, this list of conditions and the following disclaimer in the
18 *     documentation and/or other materials provided with the distribution.
19 *  3. Neither the name of the University nor the names of its
20 *     contributors may be used to endorse or promote products derived
21 *     from this software without specific prior written permission.
22 *
23 *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
24 *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
25 *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26 *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
30 *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 *
35 */
36 
37 #include <linux/param.h>
38 #include <linux/major.h>
39 #include <linux/slab.h>
40 
41 #include <linux/sunrpc/svc.h>
42 #include <linux/nfsd/nfsd.h>
43 #include <linux/nfsd/cache.h>
44 #include <linux/file.h>
45 #include <linux/mount.h>
46 #include <linux/workqueue.h>
47 #include <linux/smp_lock.h>
48 #include <linux/kthread.h>
49 #include <linux/nfs4.h>
50 #include <linux/nfsd/state.h>
51 #include <linux/nfsd/xdr4.h>
52 #include <linux/namei.h>
53 #include <linux/swap.h>
54 #include <linux/mutex.h>
55 #include <linux/lockd/bind.h>
56 #include <linux/module.h>
57 #include <linux/sunrpc/svcauth_gss.h>
58 
59 #define NFSDDBG_FACILITY                NFSDDBG_PROC
60 
61 /* Globals */
62 static time_t lease_time = 90;     /* default lease time */
63 static time_t user_lease_time = 90;
64 static time_t boot_time;
65 static u32 current_ownerid = 1;
66 static u32 current_fileid = 1;
67 static u32 current_delegid = 1;
68 static u32 nfs4_init;
69 static stateid_t zerostateid;             /* bits all 0 */
70 static stateid_t onestateid;              /* bits all 1 */
71 static u64 current_sessionid = 1;
72 
73 #define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t)))
74 #define ONE_STATEID(stateid)  (!memcmp((stateid), &onestateid, sizeof(stateid_t)))
75 
76 /* forward declarations */
77 static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
78 static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid);
79 static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
80 static void nfs4_set_recdir(char *recdir);
81 
82 /* Locking: */
83 
84 /* Currently used for almost all code touching nfsv4 state: */
85 static DEFINE_MUTEX(client_mutex);
86 
87 /*
88  * Currently used for the del_recall_lru and file hash table.  In an
89  * effort to decrease the scope of the client_mutex, this spinlock may
90  * eventually cover more:
91  */
92 static DEFINE_SPINLOCK(recall_lock);
93 
94 static struct kmem_cache *stateowner_slab = NULL;
95 static struct kmem_cache *file_slab = NULL;
96 static struct kmem_cache *stateid_slab = NULL;
97 static struct kmem_cache *deleg_slab = NULL;
98 
99 void
100 nfs4_lock_state(void)
101 {
102 	mutex_lock(&client_mutex);
103 }
104 
105 void
106 nfs4_unlock_state(void)
107 {
108 	mutex_unlock(&client_mutex);
109 }
110 
111 static inline u32
112 opaque_hashval(const void *ptr, int nbytes)
113 {
114 	unsigned char *cptr = (unsigned char *) ptr;
115 
116 	u32 x = 0;
117 	while (nbytes--) {
118 		x *= 37;
119 		x += *cptr++;
120 	}
121 	return x;
122 }
123 
124 static struct list_head del_recall_lru;
125 
126 static inline void
127 put_nfs4_file(struct nfs4_file *fi)
128 {
129 	if (atomic_dec_and_lock(&fi->fi_ref, &recall_lock)) {
130 		list_del(&fi->fi_hash);
131 		spin_unlock(&recall_lock);
132 		iput(fi->fi_inode);
133 		kmem_cache_free(file_slab, fi);
134 	}
135 }
136 
137 static inline void
138 get_nfs4_file(struct nfs4_file *fi)
139 {
140 	atomic_inc(&fi->fi_ref);
141 }
142 
143 static int num_delegations;
144 unsigned int max_delegations;
145 
146 /*
147  * Open owner state (share locks)
148  */
149 
150 /* hash tables for nfs4_stateowner */
151 #define OWNER_HASH_BITS              8
152 #define OWNER_HASH_SIZE             (1 << OWNER_HASH_BITS)
153 #define OWNER_HASH_MASK             (OWNER_HASH_SIZE - 1)
154 
155 #define ownerid_hashval(id) \
156         ((id) & OWNER_HASH_MASK)
157 #define ownerstr_hashval(clientid, ownername) \
158         (((clientid) + opaque_hashval((ownername.data), (ownername.len))) & OWNER_HASH_MASK)
159 
160 static struct list_head	ownerid_hashtbl[OWNER_HASH_SIZE];
161 static struct list_head	ownerstr_hashtbl[OWNER_HASH_SIZE];
162 
163 /* hash table for nfs4_file */
164 #define FILE_HASH_BITS                   8
165 #define FILE_HASH_SIZE                  (1 << FILE_HASH_BITS)
166 #define FILE_HASH_MASK                  (FILE_HASH_SIZE - 1)
167 /* hash table for (open)nfs4_stateid */
168 #define STATEID_HASH_BITS              10
169 #define STATEID_HASH_SIZE              (1 << STATEID_HASH_BITS)
170 #define STATEID_HASH_MASK              (STATEID_HASH_SIZE - 1)
171 
172 #define file_hashval(x) \
173         hash_ptr(x, FILE_HASH_BITS)
174 #define stateid_hashval(owner_id, file_id)  \
175         (((owner_id) + (file_id)) & STATEID_HASH_MASK)
176 
177 static struct list_head file_hashtbl[FILE_HASH_SIZE];
178 static struct list_head stateid_hashtbl[STATEID_HASH_SIZE];
179 
180 static struct nfs4_delegation *
181 alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type)
182 {
183 	struct nfs4_delegation *dp;
184 	struct nfs4_file *fp = stp->st_file;
185 	struct nfs4_cb_conn *cb = &stp->st_stateowner->so_client->cl_cb_conn;
186 
187 	dprintk("NFSD alloc_init_deleg\n");
188 	if (fp->fi_had_conflict)
189 		return NULL;
190 	if (num_delegations > max_delegations)
191 		return NULL;
192 	dp = kmem_cache_alloc(deleg_slab, GFP_KERNEL);
193 	if (dp == NULL)
194 		return dp;
195 	num_delegations++;
196 	INIT_LIST_HEAD(&dp->dl_perfile);
197 	INIT_LIST_HEAD(&dp->dl_perclnt);
198 	INIT_LIST_HEAD(&dp->dl_recall_lru);
199 	dp->dl_client = clp;
200 	get_nfs4_file(fp);
201 	dp->dl_file = fp;
202 	dp->dl_flock = NULL;
203 	get_file(stp->st_vfs_file);
204 	dp->dl_vfs_file = stp->st_vfs_file;
205 	dp->dl_type = type;
206 	dp->dl_ident = cb->cb_ident;
207 	dp->dl_stateid.si_boot = get_seconds();
208 	dp->dl_stateid.si_stateownerid = current_delegid++;
209 	dp->dl_stateid.si_fileid = 0;
210 	dp->dl_stateid.si_generation = 0;
211 	fh_copy_shallow(&dp->dl_fh, &current_fh->fh_handle);
212 	dp->dl_time = 0;
213 	atomic_set(&dp->dl_count, 1);
214 	list_add(&dp->dl_perfile, &fp->fi_delegations);
215 	list_add(&dp->dl_perclnt, &clp->cl_delegations);
216 	return dp;
217 }
218 
219 void
220 nfs4_put_delegation(struct nfs4_delegation *dp)
221 {
222 	if (atomic_dec_and_test(&dp->dl_count)) {
223 		dprintk("NFSD: freeing dp %p\n",dp);
224 		put_nfs4_file(dp->dl_file);
225 		kmem_cache_free(deleg_slab, dp);
226 		num_delegations--;
227 	}
228 }
229 
230 /* Remove the associated file_lock first, then remove the delegation.
231  * lease_modify() is called to remove the FS_LEASE file_lock from
232  * the i_flock list, eventually calling nfsd's lock_manager
233  * fl_release_callback.
234  */
235 static void
236 nfs4_close_delegation(struct nfs4_delegation *dp)
237 {
238 	struct file *filp = dp->dl_vfs_file;
239 
240 	dprintk("NFSD: close_delegation dp %p\n",dp);
241 	dp->dl_vfs_file = NULL;
242 	/* The following nfsd_close may not actually close the file,
243 	 * but we want to remove the lease in any case. */
244 	if (dp->dl_flock)
245 		vfs_setlease(filp, F_UNLCK, &dp->dl_flock);
246 	nfsd_close(filp);
247 }
248 
249 /* Called under the state lock. */
250 static void
251 unhash_delegation(struct nfs4_delegation *dp)
252 {
253 	list_del_init(&dp->dl_perfile);
254 	list_del_init(&dp->dl_perclnt);
255 	spin_lock(&recall_lock);
256 	list_del_init(&dp->dl_recall_lru);
257 	spin_unlock(&recall_lock);
258 	nfs4_close_delegation(dp);
259 	nfs4_put_delegation(dp);
260 }
261 
262 /*
263  * SETCLIENTID state
264  */
265 
266 /* Hash tables for nfs4_clientid state */
267 #define CLIENT_HASH_BITS                 4
268 #define CLIENT_HASH_SIZE                (1 << CLIENT_HASH_BITS)
269 #define CLIENT_HASH_MASK                (CLIENT_HASH_SIZE - 1)
270 
271 #define clientid_hashval(id) \
272 	((id) & CLIENT_HASH_MASK)
273 #define clientstr_hashval(name) \
274 	(opaque_hashval((name), 8) & CLIENT_HASH_MASK)
275 /*
276  * reclaim_str_hashtbl[] holds known client info from previous reset/reboot
277  * used in reboot/reset lease grace period processing
278  *
279  * conf_id_hashtbl[], and conf_str_hashtbl[] hold confirmed
280  * setclientid_confirmed info.
281  *
282  * unconf_str_hastbl[] and unconf_id_hashtbl[] hold unconfirmed
283  * setclientid info.
284  *
285  * client_lru holds client queue ordered by nfs4_client.cl_time
286  * for lease renewal.
287  *
288  * close_lru holds (open) stateowner queue ordered by nfs4_stateowner.so_time
289  * for last close replay.
290  */
291 static struct list_head	reclaim_str_hashtbl[CLIENT_HASH_SIZE];
292 static int reclaim_str_hashtbl_size = 0;
293 static struct list_head	conf_id_hashtbl[CLIENT_HASH_SIZE];
294 static struct list_head	conf_str_hashtbl[CLIENT_HASH_SIZE];
295 static struct list_head	unconf_str_hashtbl[CLIENT_HASH_SIZE];
296 static struct list_head	unconf_id_hashtbl[CLIENT_HASH_SIZE];
297 static struct list_head client_lru;
298 static struct list_head close_lru;
299 
300 static void unhash_generic_stateid(struct nfs4_stateid *stp)
301 {
302 	list_del(&stp->st_hash);
303 	list_del(&stp->st_perfile);
304 	list_del(&stp->st_perstateowner);
305 }
306 
307 static void free_generic_stateid(struct nfs4_stateid *stp)
308 {
309 	put_nfs4_file(stp->st_file);
310 	kmem_cache_free(stateid_slab, stp);
311 }
312 
313 static void release_lock_stateid(struct nfs4_stateid *stp)
314 {
315 	unhash_generic_stateid(stp);
316 	locks_remove_posix(stp->st_vfs_file, (fl_owner_t)stp->st_stateowner);
317 	free_generic_stateid(stp);
318 }
319 
320 static void unhash_lockowner(struct nfs4_stateowner *sop)
321 {
322 	struct nfs4_stateid *stp;
323 
324 	list_del(&sop->so_idhash);
325 	list_del(&sop->so_strhash);
326 	list_del(&sop->so_perstateid);
327 	while (!list_empty(&sop->so_stateids)) {
328 		stp = list_first_entry(&sop->so_stateids,
329 				struct nfs4_stateid, st_perstateowner);
330 		release_lock_stateid(stp);
331 	}
332 }
333 
334 static void release_lockowner(struct nfs4_stateowner *sop)
335 {
336 	unhash_lockowner(sop);
337 	nfs4_put_stateowner(sop);
338 }
339 
340 static void
341 release_stateid_lockowners(struct nfs4_stateid *open_stp)
342 {
343 	struct nfs4_stateowner *lock_sop;
344 
345 	while (!list_empty(&open_stp->st_lockowners)) {
346 		lock_sop = list_entry(open_stp->st_lockowners.next,
347 				struct nfs4_stateowner, so_perstateid);
348 		/* list_del(&open_stp->st_lockowners);  */
349 		BUG_ON(lock_sop->so_is_open_owner);
350 		release_lockowner(lock_sop);
351 	}
352 }
353 
354 static void release_open_stateid(struct nfs4_stateid *stp)
355 {
356 	unhash_generic_stateid(stp);
357 	release_stateid_lockowners(stp);
358 	nfsd_close(stp->st_vfs_file);
359 	free_generic_stateid(stp);
360 }
361 
362 static void unhash_openowner(struct nfs4_stateowner *sop)
363 {
364 	struct nfs4_stateid *stp;
365 
366 	list_del(&sop->so_idhash);
367 	list_del(&sop->so_strhash);
368 	list_del(&sop->so_perclient);
369 	list_del(&sop->so_perstateid); /* XXX: necessary? */
370 	while (!list_empty(&sop->so_stateids)) {
371 		stp = list_first_entry(&sop->so_stateids,
372 				struct nfs4_stateid, st_perstateowner);
373 		release_open_stateid(stp);
374 	}
375 }
376 
377 static void release_openowner(struct nfs4_stateowner *sop)
378 {
379 	unhash_openowner(sop);
380 	list_del(&sop->so_close_lru);
381 	nfs4_put_stateowner(sop);
382 }
383 
384 static DEFINE_SPINLOCK(sessionid_lock);
385 #define SESSION_HASH_SIZE	512
386 static struct list_head sessionid_hashtbl[SESSION_HASH_SIZE];
387 
388 static inline int
389 hash_sessionid(struct nfs4_sessionid *sessionid)
390 {
391 	struct nfsd4_sessionid *sid = (struct nfsd4_sessionid *)sessionid;
392 
393 	return sid->sequence % SESSION_HASH_SIZE;
394 }
395 
396 static inline void
397 dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid)
398 {
399 	u32 *ptr = (u32 *)(&sessionid->data[0]);
400 	dprintk("%s: %u:%u:%u:%u\n", fn, ptr[0], ptr[1], ptr[2], ptr[3]);
401 }
402 
403 static void
404 gen_sessionid(struct nfsd4_session *ses)
405 {
406 	struct nfs4_client *clp = ses->se_client;
407 	struct nfsd4_sessionid *sid;
408 
409 	sid = (struct nfsd4_sessionid *)ses->se_sessionid.data;
410 	sid->clientid = clp->cl_clientid;
411 	sid->sequence = current_sessionid++;
412 	sid->reserved = 0;
413 }
414 
415 /*
416  * Give the client the number of slots it requests bound by
417  * NFSD_MAX_SLOTS_PER_SESSION and by sv_drc_max_pages.
418  *
419  * If we run out of pages (sv_drc_pages_used == sv_drc_max_pages) we
420  * should (up to a point) re-negotiate active sessions and reduce their
421  * slot usage to make rooom for new connections. For now we just fail the
422  * create session.
423  */
424 static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan)
425 {
426 	int status = 0, np = fchan->maxreqs * NFSD_PAGES_PER_SLOT;
427 
428 	if (fchan->maxreqs < 1)
429 		return nfserr_inval;
430 	else if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
431 		fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
432 
433 	spin_lock(&nfsd_serv->sv_lock);
434 	if (np + nfsd_serv->sv_drc_pages_used > nfsd_serv->sv_drc_max_pages)
435 		np = nfsd_serv->sv_drc_max_pages - nfsd_serv->sv_drc_pages_used;
436 	nfsd_serv->sv_drc_pages_used += np;
437 	spin_unlock(&nfsd_serv->sv_lock);
438 
439 	if (np <= 0) {
440 		status = nfserr_resource;
441 		fchan->maxreqs = 0;
442 	} else
443 		fchan->maxreqs = np / NFSD_PAGES_PER_SLOT;
444 
445 	return status;
446 }
447 
448 /*
449  * fchan holds the client values on input, and the server values on output
450  */
451 static int init_forechannel_attrs(struct svc_rqst *rqstp,
452 				  struct nfsd4_channel_attrs *session_fchan,
453 				  struct nfsd4_channel_attrs *fchan)
454 {
455 	int status = 0;
456 	__u32   maxcount = svc_max_payload(rqstp);
457 
458 	/* headerpadsz set to zero in encode routine */
459 
460 	/* Use the client's max request and max response size if possible */
461 	if (fchan->maxreq_sz > maxcount)
462 		fchan->maxreq_sz = maxcount;
463 	session_fchan->maxreq_sz = fchan->maxreq_sz;
464 
465 	if (fchan->maxresp_sz > maxcount)
466 		fchan->maxresp_sz = maxcount;
467 	session_fchan->maxresp_sz = fchan->maxresp_sz;
468 
469 	/* Set the max response cached size our default which is
470 	 * a multiple of PAGE_SIZE and small */
471 	session_fchan->maxresp_cached = NFSD_PAGES_PER_SLOT * PAGE_SIZE;
472 	fchan->maxresp_cached = session_fchan->maxresp_cached;
473 
474 	/* Use the client's maxops if possible */
475 	if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND)
476 		fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND;
477 	session_fchan->maxops = fchan->maxops;
478 
479 	/* try to use the client requested number of slots */
480 	if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
481 		fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
482 
483 	/* FIXME: Error means no more DRC pages so the server should
484 	 * recover pages from existing sessions. For now fail session
485 	 * creation.
486 	 */
487 	status = set_forechannel_maxreqs(fchan);
488 
489 	session_fchan->maxreqs = fchan->maxreqs;
490 	return status;
491 }
492 
493 static int
494 alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
495 		   struct nfsd4_create_session *cses)
496 {
497 	struct nfsd4_session *new, tmp;
498 	int idx, status = nfserr_resource, slotsize;
499 
500 	memset(&tmp, 0, sizeof(tmp));
501 
502 	/* FIXME: For now, we just accept the client back channel attributes. */
503 	tmp.se_bchannel = cses->back_channel;
504 	status = init_forechannel_attrs(rqstp, &tmp.se_fchannel,
505 					&cses->fore_channel);
506 	if (status)
507 		goto out;
508 
509 	/* allocate struct nfsd4_session and slot table in one piece */
510 	slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot);
511 	new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL);
512 	if (!new)
513 		goto out;
514 
515 	memcpy(new, &tmp, sizeof(*new));
516 
517 	new->se_client = clp;
518 	gen_sessionid(new);
519 	idx = hash_sessionid(&new->se_sessionid);
520 	memcpy(clp->cl_sessionid.data, new->se_sessionid.data,
521 	       NFS4_MAX_SESSIONID_LEN);
522 
523 	new->se_flags = cses->flags;
524 	kref_init(&new->se_ref);
525 	spin_lock(&sessionid_lock);
526 	list_add(&new->se_hash, &sessionid_hashtbl[idx]);
527 	list_add(&new->se_perclnt, &clp->cl_sessions);
528 	spin_unlock(&sessionid_lock);
529 
530 	status = nfs_ok;
531 out:
532 	return status;
533 }
534 
535 /* caller must hold sessionid_lock */
536 static struct nfsd4_session *
537 find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid)
538 {
539 	struct nfsd4_session *elem;
540 	int idx;
541 
542 	dump_sessionid(__func__, sessionid);
543 	idx = hash_sessionid(sessionid);
544 	dprintk("%s: idx is %d\n", __func__, idx);
545 	/* Search in the appropriate list */
546 	list_for_each_entry(elem, &sessionid_hashtbl[idx], se_hash) {
547 		dump_sessionid("list traversal", &elem->se_sessionid);
548 		if (!memcmp(elem->se_sessionid.data, sessionid->data,
549 			    NFS4_MAX_SESSIONID_LEN)) {
550 			return elem;
551 		}
552 	}
553 
554 	dprintk("%s: session not found\n", __func__);
555 	return NULL;
556 }
557 
558 /* caller must hold sessionid_lock */
559 static void
560 unhash_session(struct nfsd4_session *ses)
561 {
562 	list_del(&ses->se_hash);
563 	list_del(&ses->se_perclnt);
564 }
565 
566 static void
567 release_session(struct nfsd4_session *ses)
568 {
569 	spin_lock(&sessionid_lock);
570 	unhash_session(ses);
571 	spin_unlock(&sessionid_lock);
572 	nfsd4_put_session(ses);
573 }
574 
575 static void nfsd4_release_respages(struct page **respages, short resused);
576 
577 void
578 free_session(struct kref *kref)
579 {
580 	struct nfsd4_session *ses;
581 	int i;
582 
583 	ses = container_of(kref, struct nfsd4_session, se_ref);
584 	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
585 		struct nfsd4_cache_entry *e = &ses->se_slots[i].sl_cache_entry;
586 		nfsd4_release_respages(e->ce_respages, e->ce_resused);
587 	}
588 	kfree(ses);
589 }
590 
591 static inline void
592 renew_client(struct nfs4_client *clp)
593 {
594 	/*
595 	* Move client to the end to the LRU list.
596 	*/
597 	dprintk("renewing client (clientid %08x/%08x)\n",
598 			clp->cl_clientid.cl_boot,
599 			clp->cl_clientid.cl_id);
600 	list_move_tail(&clp->cl_lru, &client_lru);
601 	clp->cl_time = get_seconds();
602 }
603 
604 /* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */
605 static int
606 STALE_CLIENTID(clientid_t *clid)
607 {
608 	if (clid->cl_boot == boot_time)
609 		return 0;
610 	dprintk("NFSD stale clientid (%08x/%08x) boot_time %08lx\n",
611 		clid->cl_boot, clid->cl_id, boot_time);
612 	return 1;
613 }
614 
615 /*
616  * XXX Should we use a slab cache ?
617  * This type of memory management is somewhat inefficient, but we use it
618  * anyway since SETCLIENTID is not a common operation.
619  */
620 static struct nfs4_client *alloc_client(struct xdr_netobj name)
621 {
622 	struct nfs4_client *clp;
623 
624 	clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL);
625 	if (clp == NULL)
626 		return NULL;
627 	clp->cl_name.data = kmalloc(name.len, GFP_KERNEL);
628 	if (clp->cl_name.data == NULL) {
629 		kfree(clp);
630 		return NULL;
631 	}
632 	memcpy(clp->cl_name.data, name.data, name.len);
633 	clp->cl_name.len = name.len;
634 	return clp;
635 }
636 
637 static void
638 shutdown_callback_client(struct nfs4_client *clp)
639 {
640 	struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client;
641 
642 	if (clnt) {
643 		/*
644 		 * Callback threads take a reference on the client, so there
645 		 * should be no outstanding callbacks at this point.
646 		 */
647 		clp->cl_cb_conn.cb_client = NULL;
648 		rpc_shutdown_client(clnt);
649 	}
650 	if (clp->cl_cb_conn.cb_cred) {
651 		put_rpccred(clp->cl_cb_conn.cb_cred);
652 		clp->cl_cb_conn.cb_cred = NULL;
653 	}
654 }
655 
656 static inline void
657 free_client(struct nfs4_client *clp)
658 {
659 	shutdown_callback_client(clp);
660 	nfsd4_release_respages(clp->cl_slot.sl_cache_entry.ce_respages,
661 			     clp->cl_slot.sl_cache_entry.ce_resused);
662 	if (clp->cl_cred.cr_group_info)
663 		put_group_info(clp->cl_cred.cr_group_info);
664 	kfree(clp->cl_principal);
665 	kfree(clp->cl_name.data);
666 	kfree(clp);
667 }
668 
669 void
670 put_nfs4_client(struct nfs4_client *clp)
671 {
672 	if (atomic_dec_and_test(&clp->cl_count))
673 		free_client(clp);
674 }
675 
676 static void
677 expire_client(struct nfs4_client *clp)
678 {
679 	struct nfs4_stateowner *sop;
680 	struct nfs4_delegation *dp;
681 	struct list_head reaplist;
682 
683 	dprintk("NFSD: expire_client cl_count %d\n",
684 	                    atomic_read(&clp->cl_count));
685 
686 	INIT_LIST_HEAD(&reaplist);
687 	spin_lock(&recall_lock);
688 	while (!list_empty(&clp->cl_delegations)) {
689 		dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
690 		dprintk("NFSD: expire client. dp %p, fp %p\n", dp,
691 				dp->dl_flock);
692 		list_del_init(&dp->dl_perclnt);
693 		list_move(&dp->dl_recall_lru, &reaplist);
694 	}
695 	spin_unlock(&recall_lock);
696 	while (!list_empty(&reaplist)) {
697 		dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
698 		list_del_init(&dp->dl_recall_lru);
699 		unhash_delegation(dp);
700 	}
701 	list_del(&clp->cl_idhash);
702 	list_del(&clp->cl_strhash);
703 	list_del(&clp->cl_lru);
704 	while (!list_empty(&clp->cl_openowners)) {
705 		sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient);
706 		release_openowner(sop);
707 	}
708 	while (!list_empty(&clp->cl_sessions)) {
709 		struct nfsd4_session  *ses;
710 		ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
711 				 se_perclnt);
712 		release_session(ses);
713 	}
714 	put_nfs4_client(clp);
715 }
716 
717 static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir)
718 {
719 	struct nfs4_client *clp;
720 
721 	clp = alloc_client(name);
722 	if (clp == NULL)
723 		return NULL;
724 	memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
725 	atomic_set(&clp->cl_count, 1);
726 	atomic_set(&clp->cl_cb_conn.cb_set, 0);
727 	INIT_LIST_HEAD(&clp->cl_idhash);
728 	INIT_LIST_HEAD(&clp->cl_strhash);
729 	INIT_LIST_HEAD(&clp->cl_openowners);
730 	INIT_LIST_HEAD(&clp->cl_delegations);
731 	INIT_LIST_HEAD(&clp->cl_sessions);
732 	INIT_LIST_HEAD(&clp->cl_lru);
733 	return clp;
734 }
735 
736 static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
737 {
738 	memcpy(target->cl_verifier.data, source->data,
739 			sizeof(target->cl_verifier.data));
740 }
741 
742 static void copy_clid(struct nfs4_client *target, struct nfs4_client *source)
743 {
744 	target->cl_clientid.cl_boot = source->cl_clientid.cl_boot;
745 	target->cl_clientid.cl_id = source->cl_clientid.cl_id;
746 }
747 
748 static void copy_cred(struct svc_cred *target, struct svc_cred *source)
749 {
750 	target->cr_uid = source->cr_uid;
751 	target->cr_gid = source->cr_gid;
752 	target->cr_group_info = source->cr_group_info;
753 	get_group_info(target->cr_group_info);
754 }
755 
756 static int same_name(const char *n1, const char *n2)
757 {
758 	return 0 == memcmp(n1, n2, HEXDIR_LEN);
759 }
760 
761 static int
762 same_verf(nfs4_verifier *v1, nfs4_verifier *v2)
763 {
764 	return 0 == memcmp(v1->data, v2->data, sizeof(v1->data));
765 }
766 
767 static int
768 same_clid(clientid_t *cl1, clientid_t *cl2)
769 {
770 	return (cl1->cl_boot == cl2->cl_boot) && (cl1->cl_id == cl2->cl_id);
771 }
772 
773 /* XXX what about NGROUP */
774 static int
775 same_creds(struct svc_cred *cr1, struct svc_cred *cr2)
776 {
777 	return cr1->cr_uid == cr2->cr_uid;
778 }
779 
780 static void gen_clid(struct nfs4_client *clp)
781 {
782 	static u32 current_clientid = 1;
783 
784 	clp->cl_clientid.cl_boot = boot_time;
785 	clp->cl_clientid.cl_id = current_clientid++;
786 }
787 
788 static void gen_confirm(struct nfs4_client *clp)
789 {
790 	static u32 i;
791 	u32 *p;
792 
793 	p = (u32 *)clp->cl_confirm.data;
794 	*p++ = get_seconds();
795 	*p++ = i++;
796 }
797 
798 static int check_name(struct xdr_netobj name)
799 {
800 	if (name.len == 0)
801 		return 0;
802 	if (name.len > NFS4_OPAQUE_LIMIT) {
803 		dprintk("NFSD: check_name: name too long(%d)!\n", name.len);
804 		return 0;
805 	}
806 	return 1;
807 }
808 
809 static void
810 add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval)
811 {
812 	unsigned int idhashval;
813 
814 	list_add(&clp->cl_strhash, &unconf_str_hashtbl[strhashval]);
815 	idhashval = clientid_hashval(clp->cl_clientid.cl_id);
816 	list_add(&clp->cl_idhash, &unconf_id_hashtbl[idhashval]);
817 	list_add_tail(&clp->cl_lru, &client_lru);
818 	clp->cl_time = get_seconds();
819 }
820 
821 static void
822 move_to_confirmed(struct nfs4_client *clp)
823 {
824 	unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id);
825 	unsigned int strhashval;
826 
827 	dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp);
828 	list_del_init(&clp->cl_strhash);
829 	list_move(&clp->cl_idhash, &conf_id_hashtbl[idhashval]);
830 	strhashval = clientstr_hashval(clp->cl_recdir);
831 	list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]);
832 	renew_client(clp);
833 }
834 
835 static struct nfs4_client *
836 find_confirmed_client(clientid_t *clid)
837 {
838 	struct nfs4_client *clp;
839 	unsigned int idhashval = clientid_hashval(clid->cl_id);
840 
841 	list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) {
842 		if (same_clid(&clp->cl_clientid, clid))
843 			return clp;
844 	}
845 	return NULL;
846 }
847 
848 static struct nfs4_client *
849 find_unconfirmed_client(clientid_t *clid)
850 {
851 	struct nfs4_client *clp;
852 	unsigned int idhashval = clientid_hashval(clid->cl_id);
853 
854 	list_for_each_entry(clp, &unconf_id_hashtbl[idhashval], cl_idhash) {
855 		if (same_clid(&clp->cl_clientid, clid))
856 			return clp;
857 	}
858 	return NULL;
859 }
860 
861 /*
862  * Return 1 iff clp's clientid establishment method matches the use_exchange_id
863  * parameter. Matching is based on the fact the at least one of the
864  * EXCHGID4_FLAG_USE_{NON_PNFS,PNFS_MDS,PNFS_DS} flags must be set for v4.1
865  *
866  * FIXME: we need to unify the clientid namespaces for nfsv4.x
867  * and correctly deal with client upgrade/downgrade in EXCHANGE_ID
868  * and SET_CLIENTID{,_CONFIRM}
869  */
870 static inline int
871 match_clientid_establishment(struct nfs4_client *clp, bool use_exchange_id)
872 {
873 	bool has_exchange_flags = (clp->cl_exchange_flags != 0);
874 	return use_exchange_id == has_exchange_flags;
875 }
876 
877 static struct nfs4_client *
878 find_confirmed_client_by_str(const char *dname, unsigned int hashval,
879 			     bool use_exchange_id)
880 {
881 	struct nfs4_client *clp;
882 
883 	list_for_each_entry(clp, &conf_str_hashtbl[hashval], cl_strhash) {
884 		if (same_name(clp->cl_recdir, dname) &&
885 		    match_clientid_establishment(clp, use_exchange_id))
886 			return clp;
887 	}
888 	return NULL;
889 }
890 
891 static struct nfs4_client *
892 find_unconfirmed_client_by_str(const char *dname, unsigned int hashval,
893 			       bool use_exchange_id)
894 {
895 	struct nfs4_client *clp;
896 
897 	list_for_each_entry(clp, &unconf_str_hashtbl[hashval], cl_strhash) {
898 		if (same_name(clp->cl_recdir, dname) &&
899 		    match_clientid_establishment(clp, use_exchange_id))
900 			return clp;
901 	}
902 	return NULL;
903 }
904 
905 /* a helper function for parse_callback */
906 static int
907 parse_octet(unsigned int *lenp, char **addrp)
908 {
909 	unsigned int len = *lenp;
910 	char *p = *addrp;
911 	int n = -1;
912 	char c;
913 
914 	for (;;) {
915 		if (!len)
916 			break;
917 		len--;
918 		c = *p++;
919 		if (c == '.')
920 			break;
921 		if ((c < '0') || (c > '9')) {
922 			n = -1;
923 			break;
924 		}
925 		if (n < 0)
926 			n = 0;
927 		n = (n * 10) + (c - '0');
928 		if (n > 255) {
929 			n = -1;
930 			break;
931 		}
932 	}
933 	*lenp = len;
934 	*addrp = p;
935 	return n;
936 }
937 
938 /* parse and set the setclientid ipv4 callback address */
939 static int
940 parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigned short *cbportp)
941 {
942 	int temp = 0;
943 	u32 cbaddr = 0;
944 	u16 cbport = 0;
945 	u32 addrlen = addr_len;
946 	char *addr = addr_val;
947 	int i, shift;
948 
949 	/* ipaddress */
950 	shift = 24;
951 	for(i = 4; i > 0  ; i--) {
952 		if ((temp = parse_octet(&addrlen, &addr)) < 0) {
953 			return 0;
954 		}
955 		cbaddr |= (temp << shift);
956 		if (shift > 0)
957 		shift -= 8;
958 	}
959 	*cbaddrp = cbaddr;
960 
961 	/* port */
962 	shift = 8;
963 	for(i = 2; i > 0  ; i--) {
964 		if ((temp = parse_octet(&addrlen, &addr)) < 0) {
965 			return 0;
966 		}
967 		cbport |= (temp << shift);
968 		if (shift > 0)
969 			shift -= 8;
970 	}
971 	*cbportp = cbport;
972 	return 1;
973 }
974 
975 static void
976 gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
977 {
978 	struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
979 
980 	/* Currently, we only support tcp for the callback channel */
981 	if ((se->se_callback_netid_len != 3) || memcmp((char *)se->se_callback_netid_val, "tcp", 3))
982 		goto out_err;
983 
984 	if ( !(parse_ipv4(se->se_callback_addr_len, se->se_callback_addr_val,
985 	                 &cb->cb_addr, &cb->cb_port)))
986 		goto out_err;
987 	cb->cb_minorversion = 0;
988 	cb->cb_prog = se->se_callback_prog;
989 	cb->cb_ident = se->se_callback_ident;
990 	return;
991 out_err:
992 	dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
993 		"will not receive delegations\n",
994 		clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
995 
996 	return;
997 }
998 
999 void
1000 nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp)
1001 {
1002 	struct nfsd4_compoundres *resp = rqstp->rq_resp;
1003 
1004 	resp->cstate.statp = statp;
1005 }
1006 
1007 /*
1008  * Dereference the result pages.
1009  */
1010 static void
1011 nfsd4_release_respages(struct page **respages, short resused)
1012 {
1013 	int i;
1014 
1015 	dprintk("--> %s\n", __func__);
1016 	for (i = 0; i < resused; i++) {
1017 		if (!respages[i])
1018 			continue;
1019 		put_page(respages[i]);
1020 		respages[i] = NULL;
1021 	}
1022 }
1023 
1024 static void
1025 nfsd4_copy_pages(struct page **topages, struct page **frompages, short count)
1026 {
1027 	int i;
1028 
1029 	for (i = 0; i < count; i++) {
1030 		topages[i] = frompages[i];
1031 		if (!topages[i])
1032 			continue;
1033 		get_page(topages[i]);
1034 	}
1035 }
1036 
1037 /*
1038  * Cache the reply pages up to NFSD_PAGES_PER_SLOT + 1, clearing the previous
1039  * pages. We add a page to NFSD_PAGES_PER_SLOT for the case where the total
1040  * length of the XDR response is less than se_fmaxresp_cached
1041  * (NFSD_PAGES_PER_SLOT * PAGE_SIZE) but the xdr_buf pages is used for a
1042  * of the reply (e.g. readdir).
1043  *
1044  * Store the base and length of the rq_req.head[0] page
1045  * of the NFSv4.1 data, just past the rpc header.
1046  */
1047 void
1048 nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
1049 {
1050 	struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry;
1051 	struct svc_rqst *rqstp = resp->rqstp;
1052 	struct nfsd4_compoundargs *args = rqstp->rq_argp;
1053 	struct nfsd4_op *op = &args->ops[resp->opcnt];
1054 	struct kvec *resv = &rqstp->rq_res.head[0];
1055 
1056 	dprintk("--> %s entry %p\n", __func__, entry);
1057 
1058 	/* Don't cache a failed OP_SEQUENCE. */
1059 	if (resp->opcnt == 1 && op->opnum == OP_SEQUENCE && resp->cstate.status)
1060 		return;
1061 
1062 	nfsd4_release_respages(entry->ce_respages, entry->ce_resused);
1063 	entry->ce_opcnt = resp->opcnt;
1064 	entry->ce_status = resp->cstate.status;
1065 
1066 	/*
1067 	 * Don't need a page to cache just the sequence operation - the slot
1068 	 * does this for us!
1069 	 */
1070 
1071 	if (nfsd4_not_cached(resp)) {
1072 		entry->ce_resused = 0;
1073 		entry->ce_rpchdrlen = 0;
1074 		dprintk("%s Just cache SEQUENCE. ce_cachethis %d\n", __func__,
1075 			resp->cstate.slot->sl_cache_entry.ce_cachethis);
1076 		return;
1077 	}
1078 	entry->ce_resused = rqstp->rq_resused;
1079 	if (entry->ce_resused > NFSD_PAGES_PER_SLOT + 1)
1080 		entry->ce_resused = NFSD_PAGES_PER_SLOT + 1;
1081 	nfsd4_copy_pages(entry->ce_respages, rqstp->rq_respages,
1082 			 entry->ce_resused);
1083 	entry->ce_datav.iov_base = resp->cstate.statp;
1084 	entry->ce_datav.iov_len = resv->iov_len - ((char *)resp->cstate.statp -
1085 				(char *)page_address(rqstp->rq_respages[0]));
1086 	/* Current request rpc header length*/
1087 	entry->ce_rpchdrlen = (char *)resp->cstate.statp -
1088 				(char *)page_address(rqstp->rq_respages[0]);
1089 }
1090 
1091 /*
1092  * We keep the rpc header, but take the nfs reply from the replycache.
1093  */
1094 static int
1095 nfsd41_copy_replay_data(struct nfsd4_compoundres *resp,
1096 			struct nfsd4_cache_entry *entry)
1097 {
1098 	struct svc_rqst *rqstp = resp->rqstp;
1099 	struct kvec *resv = &resp->rqstp->rq_res.head[0];
1100 	int len;
1101 
1102 	/* Current request rpc header length*/
1103 	len = (char *)resp->cstate.statp -
1104 			(char *)page_address(rqstp->rq_respages[0]);
1105 	if (entry->ce_datav.iov_len + len > PAGE_SIZE) {
1106 		dprintk("%s v41 cached reply too large (%Zd).\n", __func__,
1107 			entry->ce_datav.iov_len);
1108 		return 0;
1109 	}
1110 	/* copy the cached reply nfsd data past the current rpc header */
1111 	memcpy((char *)resv->iov_base + len, entry->ce_datav.iov_base,
1112 		entry->ce_datav.iov_len);
1113 	resv->iov_len = len + entry->ce_datav.iov_len;
1114 	return 1;
1115 }
1116 
1117 /*
1118  * Keep the first page of the replay. Copy the NFSv4.1 data from the first
1119  * cached page.  Replace any futher replay pages from the cache.
1120  */
1121 __be32
1122 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
1123 			 struct nfsd4_sequence *seq)
1124 {
1125 	struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry;
1126 	__be32 status;
1127 
1128 	dprintk("--> %s entry %p\n", __func__, entry);
1129 
1130 	/*
1131 	 * If this is just the sequence operation, we did not keep
1132 	 * a page in the cache entry because we can just use the
1133 	 * slot info stored in struct nfsd4_sequence that was checked
1134 	 * against the slot in nfsd4_sequence().
1135 	 *
1136 	 * This occurs when seq->cachethis is FALSE, or when the client
1137 	 * session inactivity timer fires and a solo sequence operation
1138 	 * is sent (lease renewal).
1139 	 */
1140 	if (seq && nfsd4_not_cached(resp)) {
1141 		seq->maxslots = resp->cstate.session->se_fchannel.maxreqs;
1142 		return nfs_ok;
1143 	}
1144 
1145 	if (!nfsd41_copy_replay_data(resp, entry)) {
1146 		/*
1147 		 * Not enough room to use the replay rpc header, send the
1148 		 * cached header. Release all the allocated result pages.
1149 		 */
1150 		svc_free_res_pages(resp->rqstp);
1151 		nfsd4_copy_pages(resp->rqstp->rq_respages, entry->ce_respages,
1152 			entry->ce_resused);
1153 	} else {
1154 		/* Release all but the first allocated result page */
1155 
1156 		resp->rqstp->rq_resused--;
1157 		svc_free_res_pages(resp->rqstp);
1158 
1159 		nfsd4_copy_pages(&resp->rqstp->rq_respages[1],
1160 				 &entry->ce_respages[1],
1161 				 entry->ce_resused - 1);
1162 	}
1163 
1164 	resp->rqstp->rq_resused = entry->ce_resused;
1165 	resp->opcnt = entry->ce_opcnt;
1166 	resp->cstate.iovlen = entry->ce_datav.iov_len + entry->ce_rpchdrlen;
1167 	status = entry->ce_status;
1168 
1169 	return status;
1170 }
1171 
1172 /*
1173  * Set the exchange_id flags returned by the server.
1174  */
1175 static void
1176 nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid)
1177 {
1178 	/* pNFS is not supported */
1179 	new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS;
1180 
1181 	/* Referrals are supported, Migration is not. */
1182 	new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER;
1183 
1184 	/* set the wire flags to return to client. */
1185 	clid->flags = new->cl_exchange_flags;
1186 }
1187 
1188 __be32
1189 nfsd4_exchange_id(struct svc_rqst *rqstp,
1190 		  struct nfsd4_compound_state *cstate,
1191 		  struct nfsd4_exchange_id *exid)
1192 {
1193 	struct nfs4_client *unconf, *conf, *new;
1194 	int status;
1195 	unsigned int		strhashval;
1196 	char			dname[HEXDIR_LEN];
1197 	nfs4_verifier		verf = exid->verifier;
1198 	u32			ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr;
1199 
1200 	dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p "
1201 		" ip_addr=%u flags %x, spa_how %d\n",
1202 		__func__, rqstp, exid, exid->clname.len, exid->clname.data,
1203 		ip_addr, exid->flags, exid->spa_how);
1204 
1205 	if (!check_name(exid->clname) || (exid->flags & ~EXCHGID4_FLAG_MASK_A))
1206 		return nfserr_inval;
1207 
1208 	/* Currently only support SP4_NONE */
1209 	switch (exid->spa_how) {
1210 	case SP4_NONE:
1211 		break;
1212 	case SP4_SSV:
1213 		return nfserr_encr_alg_unsupp;
1214 	default:
1215 		BUG();				/* checked by xdr code */
1216 	case SP4_MACH_CRED:
1217 		return nfserr_serverfault;	/* no excuse :-/ */
1218 	}
1219 
1220 	status = nfs4_make_rec_clidname(dname, &exid->clname);
1221 
1222 	if (status)
1223 		goto error;
1224 
1225 	strhashval = clientstr_hashval(dname);
1226 
1227 	nfs4_lock_state();
1228 	status = nfs_ok;
1229 
1230 	conf = find_confirmed_client_by_str(dname, strhashval, true);
1231 	if (conf) {
1232 		if (!same_verf(&verf, &conf->cl_verifier)) {
1233 			/* 18.35.4 case 8 */
1234 			if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) {
1235 				status = nfserr_not_same;
1236 				goto out;
1237 			}
1238 			/* Client reboot: destroy old state */
1239 			expire_client(conf);
1240 			goto out_new;
1241 		}
1242 		if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) {
1243 			/* 18.35.4 case 9 */
1244 			if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) {
1245 				status = nfserr_perm;
1246 				goto out;
1247 			}
1248 			expire_client(conf);
1249 			goto out_new;
1250 		}
1251 		/*
1252 		 * Set bit when the owner id and verifier map to an already
1253 		 * confirmed client id (18.35.3).
1254 		 */
1255 		exid->flags |= EXCHGID4_FLAG_CONFIRMED_R;
1256 
1257 		/*
1258 		 * Falling into 18.35.4 case 2, possible router replay.
1259 		 * Leave confirmed record intact and return same result.
1260 		 */
1261 		copy_verf(conf, &verf);
1262 		new = conf;
1263 		goto out_copy;
1264 	}
1265 
1266 	/* 18.35.4 case 7 */
1267 	if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) {
1268 		status = nfserr_noent;
1269 		goto out;
1270 	}
1271 
1272 	unconf  = find_unconfirmed_client_by_str(dname, strhashval, true);
1273 	if (unconf) {
1274 		/*
1275 		 * Possible retry or client restart.  Per 18.35.4 case 4,
1276 		 * a new unconfirmed record should be generated regardless
1277 		 * of whether any properties have changed.
1278 		 */
1279 		expire_client(unconf);
1280 	}
1281 
1282 out_new:
1283 	/* Normal case */
1284 	new = create_client(exid->clname, dname);
1285 	if (new == NULL) {
1286 		status = nfserr_resource;
1287 		goto out;
1288 	}
1289 
1290 	copy_verf(new, &verf);
1291 	copy_cred(&new->cl_cred, &rqstp->rq_cred);
1292 	new->cl_addr = ip_addr;
1293 	gen_clid(new);
1294 	gen_confirm(new);
1295 	add_to_unconfirmed(new, strhashval);
1296 out_copy:
1297 	exid->clientid.cl_boot = new->cl_clientid.cl_boot;
1298 	exid->clientid.cl_id = new->cl_clientid.cl_id;
1299 
1300 	new->cl_slot.sl_seqid = 0;
1301 	exid->seqid = 1;
1302 	nfsd4_set_ex_flags(new, exid);
1303 
1304 	dprintk("nfsd4_exchange_id seqid %d flags %x\n",
1305 		new->cl_slot.sl_seqid, new->cl_exchange_flags);
1306 	status = nfs_ok;
1307 
1308 out:
1309 	nfs4_unlock_state();
1310 error:
1311 	dprintk("nfsd4_exchange_id returns %d\n", ntohl(status));
1312 	return status;
1313 }
1314 
1315 static int
1316 check_slot_seqid(u32 seqid, struct nfsd4_slot *slot)
1317 {
1318 	dprintk("%s enter. seqid %d slot->sl_seqid %d\n", __func__, seqid,
1319 		slot->sl_seqid);
1320 
1321 	/* The slot is in use, and no response has been sent. */
1322 	if (slot->sl_inuse) {
1323 		if (seqid == slot->sl_seqid)
1324 			return nfserr_jukebox;
1325 		else
1326 			return nfserr_seq_misordered;
1327 	}
1328 	/* Normal */
1329 	if (likely(seqid == slot->sl_seqid + 1))
1330 		return nfs_ok;
1331 	/* Replay */
1332 	if (seqid == slot->sl_seqid)
1333 		return nfserr_replay_cache;
1334 	/* Wraparound */
1335 	if (seqid == 1 && (slot->sl_seqid + 1) == 0)
1336 		return nfs_ok;
1337 	/* Misordered replay or misordered new request */
1338 	return nfserr_seq_misordered;
1339 }
1340 
1341 __be32
1342 nfsd4_create_session(struct svc_rqst *rqstp,
1343 		     struct nfsd4_compound_state *cstate,
1344 		     struct nfsd4_create_session *cr_ses)
1345 {
1346 	u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr;
1347 	struct nfsd4_compoundres *resp = rqstp->rq_resp;
1348 	struct nfs4_client *conf, *unconf;
1349 	struct nfsd4_slot *slot = NULL;
1350 	int status = 0;
1351 
1352 	nfs4_lock_state();
1353 	unconf = find_unconfirmed_client(&cr_ses->clientid);
1354 	conf = find_confirmed_client(&cr_ses->clientid);
1355 
1356 	if (conf) {
1357 		slot = &conf->cl_slot;
1358 		status = check_slot_seqid(cr_ses->seqid, slot);
1359 		if (status == nfserr_replay_cache) {
1360 			dprintk("Got a create_session replay! seqid= %d\n",
1361 				slot->sl_seqid);
1362 			cstate->slot = slot;
1363 			cstate->status = status;
1364 			/* Return the cached reply status */
1365 			status = nfsd4_replay_cache_entry(resp, NULL);
1366 			goto out;
1367 		} else if (cr_ses->seqid != conf->cl_slot.sl_seqid + 1) {
1368 			status = nfserr_seq_misordered;
1369 			dprintk("Sequence misordered!\n");
1370 			dprintk("Expected seqid= %d but got seqid= %d\n",
1371 				slot->sl_seqid, cr_ses->seqid);
1372 			goto out;
1373 		}
1374 		conf->cl_slot.sl_seqid++;
1375 	} else if (unconf) {
1376 		if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
1377 		    (ip_addr != unconf->cl_addr)) {
1378 			status = nfserr_clid_inuse;
1379 			goto out;
1380 		}
1381 
1382 		slot = &unconf->cl_slot;
1383 		status = check_slot_seqid(cr_ses->seqid, slot);
1384 		if (status) {
1385 			/* an unconfirmed replay returns misordered */
1386 			status = nfserr_seq_misordered;
1387 			goto out;
1388 		}
1389 
1390 		slot->sl_seqid++; /* from 0 to 1 */
1391 		move_to_confirmed(unconf);
1392 
1393 		/*
1394 		 * We do not support RDMA or persistent sessions
1395 		 */
1396 		cr_ses->flags &= ~SESSION4_PERSIST;
1397 		cr_ses->flags &= ~SESSION4_RDMA;
1398 
1399 		conf = unconf;
1400 	} else {
1401 		status = nfserr_stale_clientid;
1402 		goto out;
1403 	}
1404 
1405 	status = alloc_init_session(rqstp, conf, cr_ses);
1406 	if (status)
1407 		goto out;
1408 
1409 	memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data,
1410 	       NFS4_MAX_SESSIONID_LEN);
1411 	cr_ses->seqid = slot->sl_seqid;
1412 
1413 	slot->sl_inuse = true;
1414 	cstate->slot = slot;
1415 	/* Ensure a page is used for the cache */
1416 	slot->sl_cache_entry.ce_cachethis = 1;
1417 out:
1418 	nfs4_unlock_state();
1419 	dprintk("%s returns %d\n", __func__, ntohl(status));
1420 	return status;
1421 }
1422 
1423 __be32
1424 nfsd4_destroy_session(struct svc_rqst *r,
1425 		      struct nfsd4_compound_state *cstate,
1426 		      struct nfsd4_destroy_session *sessionid)
1427 {
1428 	struct nfsd4_session *ses;
1429 	u32 status = nfserr_badsession;
1430 
1431 	/* Notes:
1432 	 * - The confirmed nfs4_client->cl_sessionid holds destroyed sessinid
1433 	 * - Should we return nfserr_back_chan_busy if waiting for
1434 	 *   callbacks on to-be-destroyed session?
1435 	 * - Do we need to clear any callback info from previous session?
1436 	 */
1437 
1438 	dump_sessionid(__func__, &sessionid->sessionid);
1439 	spin_lock(&sessionid_lock);
1440 	ses = find_in_sessionid_hashtbl(&sessionid->sessionid);
1441 	if (!ses) {
1442 		spin_unlock(&sessionid_lock);
1443 		goto out;
1444 	}
1445 
1446 	unhash_session(ses);
1447 	spin_unlock(&sessionid_lock);
1448 
1449 	/* wait for callbacks */
1450 	shutdown_callback_client(ses->se_client);
1451 	nfsd4_put_session(ses);
1452 	status = nfs_ok;
1453 out:
1454 	dprintk("%s returns %d\n", __func__, ntohl(status));
1455 	return status;
1456 }
1457 
1458 __be32
1459 nfsd4_sequence(struct svc_rqst *rqstp,
1460 	       struct nfsd4_compound_state *cstate,
1461 	       struct nfsd4_sequence *seq)
1462 {
1463 	struct nfsd4_compoundres *resp = rqstp->rq_resp;
1464 	struct nfsd4_session *session;
1465 	struct nfsd4_slot *slot;
1466 	int status;
1467 
1468 	if (resp->opcnt != 1)
1469 		return nfserr_sequence_pos;
1470 
1471 	spin_lock(&sessionid_lock);
1472 	status = nfserr_badsession;
1473 	session = find_in_sessionid_hashtbl(&seq->sessionid);
1474 	if (!session)
1475 		goto out;
1476 
1477 	status = nfserr_badslot;
1478 	if (seq->slotid >= session->se_fchannel.maxreqs)
1479 		goto out;
1480 
1481 	slot = &session->se_slots[seq->slotid];
1482 	dprintk("%s: slotid %d\n", __func__, seq->slotid);
1483 
1484 	status = check_slot_seqid(seq->seqid, slot);
1485 	if (status == nfserr_replay_cache) {
1486 		cstate->slot = slot;
1487 		cstate->session = session;
1488 		/* Return the cached reply status and set cstate->status
1489 		 * for nfsd4_svc_encode_compoundres processing */
1490 		status = nfsd4_replay_cache_entry(resp, seq);
1491 		cstate->status = nfserr_replay_cache;
1492 		goto replay_cache;
1493 	}
1494 	if (status)
1495 		goto out;
1496 
1497 	/* Success! bump slot seqid */
1498 	slot->sl_inuse = true;
1499 	slot->sl_seqid = seq->seqid;
1500 	slot->sl_cache_entry.ce_cachethis = seq->cachethis;
1501 	/* Always set the cache entry cachethis for solo sequence */
1502 	if (nfsd4_is_solo_sequence(resp))
1503 		slot->sl_cache_entry.ce_cachethis = 1;
1504 
1505 	cstate->slot = slot;
1506 	cstate->session = session;
1507 
1508 replay_cache:
1509 	/* Renew the clientid on success and on replay.
1510 	 * Hold a session reference until done processing the compound:
1511 	 * nfsd4_put_session called only if the cstate slot is set.
1512 	 */
1513 	renew_client(session->se_client);
1514 	nfsd4_get_session(session);
1515 out:
1516 	spin_unlock(&sessionid_lock);
1517 	dprintk("%s: return %d\n", __func__, ntohl(status));
1518 	return status;
1519 }
1520 
1521 __be32
1522 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1523 		  struct nfsd4_setclientid *setclid)
1524 {
1525 	struct sockaddr_in	*sin = svc_addr_in(rqstp);
1526 	struct xdr_netobj 	clname = {
1527 		.len = setclid->se_namelen,
1528 		.data = setclid->se_name,
1529 	};
1530 	nfs4_verifier		clverifier = setclid->se_verf;
1531 	unsigned int 		strhashval;
1532 	struct nfs4_client	*conf, *unconf, *new;
1533 	__be32 			status;
1534 	char			*princ;
1535 	char                    dname[HEXDIR_LEN];
1536 
1537 	if (!check_name(clname))
1538 		return nfserr_inval;
1539 
1540 	status = nfs4_make_rec_clidname(dname, &clname);
1541 	if (status)
1542 		return status;
1543 
1544 	/*
1545 	 * XXX The Duplicate Request Cache (DRC) has been checked (??)
1546 	 * We get here on a DRC miss.
1547 	 */
1548 
1549 	strhashval = clientstr_hashval(dname);
1550 
1551 	nfs4_lock_state();
1552 	conf = find_confirmed_client_by_str(dname, strhashval, false);
1553 	if (conf) {
1554 		/* RFC 3530 14.2.33 CASE 0: */
1555 		status = nfserr_clid_inuse;
1556 		if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) {
1557 			dprintk("NFSD: setclientid: string in use by client"
1558 				" at %pI4\n", &conf->cl_addr);
1559 			goto out;
1560 		}
1561 	}
1562 	/*
1563 	 * section 14.2.33 of RFC 3530 (under the heading "IMPLEMENTATION")
1564 	 * has a description of SETCLIENTID request processing consisting
1565 	 * of 5 bullet points, labeled as CASE0 - CASE4 below.
1566 	 */
1567 	unconf = find_unconfirmed_client_by_str(dname, strhashval, false);
1568 	status = nfserr_resource;
1569 	if (!conf) {
1570 		/*
1571 		 * RFC 3530 14.2.33 CASE 4:
1572 		 * placed first, because it is the normal case
1573 		 */
1574 		if (unconf)
1575 			expire_client(unconf);
1576 		new = create_client(clname, dname);
1577 		if (new == NULL)
1578 			goto out;
1579 		gen_clid(new);
1580 	} else if (same_verf(&conf->cl_verifier, &clverifier)) {
1581 		/*
1582 		 * RFC 3530 14.2.33 CASE 1:
1583 		 * probable callback update
1584 		 */
1585 		if (unconf) {
1586 			/* Note this is removing unconfirmed {*x***},
1587 			 * which is stronger than RFC recommended {vxc**}.
1588 			 * This has the advantage that there is at most
1589 			 * one {*x***} in either list at any time.
1590 			 */
1591 			expire_client(unconf);
1592 		}
1593 		new = create_client(clname, dname);
1594 		if (new == NULL)
1595 			goto out;
1596 		copy_clid(new, conf);
1597 	} else if (!unconf) {
1598 		/*
1599 		 * RFC 3530 14.2.33 CASE 2:
1600 		 * probable client reboot; state will be removed if
1601 		 * confirmed.
1602 		 */
1603 		new = create_client(clname, dname);
1604 		if (new == NULL)
1605 			goto out;
1606 		gen_clid(new);
1607 	} else {
1608 		/*
1609 		 * RFC 3530 14.2.33 CASE 3:
1610 		 * probable client reboot; state will be removed if
1611 		 * confirmed.
1612 		 */
1613 		expire_client(unconf);
1614 		new = create_client(clname, dname);
1615 		if (new == NULL)
1616 			goto out;
1617 		gen_clid(new);
1618 	}
1619 	copy_verf(new, &clverifier);
1620 	new->cl_addr = sin->sin_addr.s_addr;
1621 	new->cl_flavor = rqstp->rq_flavor;
1622 	princ = svc_gss_principal(rqstp);
1623 	if (princ) {
1624 		new->cl_principal = kstrdup(princ, GFP_KERNEL);
1625 		if (new->cl_principal == NULL) {
1626 			free_client(new);
1627 			goto out;
1628 		}
1629 	}
1630 	copy_cred(&new->cl_cred, &rqstp->rq_cred);
1631 	gen_confirm(new);
1632 	gen_callback(new, setclid);
1633 	add_to_unconfirmed(new, strhashval);
1634 	setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot;
1635 	setclid->se_clientid.cl_id = new->cl_clientid.cl_id;
1636 	memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data));
1637 	status = nfs_ok;
1638 out:
1639 	nfs4_unlock_state();
1640 	return status;
1641 }
1642 
1643 
1644 /*
1645  * Section 14.2.34 of RFC 3530 (under the heading "IMPLEMENTATION") has
1646  * a description of SETCLIENTID_CONFIRM request processing consisting of 4
1647  * bullets, labeled as CASE1 - CASE4 below.
1648  */
1649 __be32
1650 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
1651 			 struct nfsd4_compound_state *cstate,
1652 			 struct nfsd4_setclientid_confirm *setclientid_confirm)
1653 {
1654 	struct sockaddr_in *sin = svc_addr_in(rqstp);
1655 	struct nfs4_client *conf, *unconf;
1656 	nfs4_verifier confirm = setclientid_confirm->sc_confirm;
1657 	clientid_t * clid = &setclientid_confirm->sc_clientid;
1658 	__be32 status;
1659 
1660 	if (STALE_CLIENTID(clid))
1661 		return nfserr_stale_clientid;
1662 	/*
1663 	 * XXX The Duplicate Request Cache (DRC) has been checked (??)
1664 	 * We get here on a DRC miss.
1665 	 */
1666 
1667 	nfs4_lock_state();
1668 
1669 	conf = find_confirmed_client(clid);
1670 	unconf = find_unconfirmed_client(clid);
1671 
1672 	status = nfserr_clid_inuse;
1673 	if (conf && conf->cl_addr != sin->sin_addr.s_addr)
1674 		goto out;
1675 	if (unconf && unconf->cl_addr != sin->sin_addr.s_addr)
1676 		goto out;
1677 
1678 	/*
1679 	 * section 14.2.34 of RFC 3530 has a description of
1680 	 * SETCLIENTID_CONFIRM request processing consisting
1681 	 * of 4 bullet points, labeled as CASE1 - CASE4 below.
1682 	 */
1683 	if (conf && unconf && same_verf(&confirm, &unconf->cl_confirm)) {
1684 		/*
1685 		 * RFC 3530 14.2.34 CASE 1:
1686 		 * callback update
1687 		 */
1688 		if (!same_creds(&conf->cl_cred, &unconf->cl_cred))
1689 			status = nfserr_clid_inuse;
1690 		else {
1691 			/* XXX: We just turn off callbacks until we can handle
1692 			  * change request correctly. */
1693 			atomic_set(&conf->cl_cb_conn.cb_set, 0);
1694 			expire_client(unconf);
1695 			status = nfs_ok;
1696 
1697 		}
1698 	} else if (conf && !unconf) {
1699 		/*
1700 		 * RFC 3530 14.2.34 CASE 2:
1701 		 * probable retransmitted request; play it safe and
1702 		 * do nothing.
1703 		 */
1704 		if (!same_creds(&conf->cl_cred, &rqstp->rq_cred))
1705 			status = nfserr_clid_inuse;
1706 		else
1707 			status = nfs_ok;
1708 	} else if (!conf && unconf
1709 			&& same_verf(&unconf->cl_confirm, &confirm)) {
1710 		/*
1711 		 * RFC 3530 14.2.34 CASE 3:
1712 		 * Normal case; new or rebooted client:
1713 		 */
1714 		if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred)) {
1715 			status = nfserr_clid_inuse;
1716 		} else {
1717 			unsigned int hash =
1718 				clientstr_hashval(unconf->cl_recdir);
1719 			conf = find_confirmed_client_by_str(unconf->cl_recdir,
1720 							    hash, false);
1721 			if (conf) {
1722 				nfsd4_remove_clid_dir(conf);
1723 				expire_client(conf);
1724 			}
1725 			move_to_confirmed(unconf);
1726 			conf = unconf;
1727 			nfsd4_probe_callback(conf);
1728 			status = nfs_ok;
1729 		}
1730 	} else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm)))
1731 	    && (!unconf || (unconf && !same_verf(&unconf->cl_confirm,
1732 				    				&confirm)))) {
1733 		/*
1734 		 * RFC 3530 14.2.34 CASE 4:
1735 		 * Client probably hasn't noticed that we rebooted yet.
1736 		 */
1737 		status = nfserr_stale_clientid;
1738 	} else {
1739 		/* check that we have hit one of the cases...*/
1740 		status = nfserr_clid_inuse;
1741 	}
1742 out:
1743 	nfs4_unlock_state();
1744 	return status;
1745 }
1746 
1747 /* OPEN Share state helper functions */
1748 static inline struct nfs4_file *
1749 alloc_init_file(struct inode *ino)
1750 {
1751 	struct nfs4_file *fp;
1752 	unsigned int hashval = file_hashval(ino);
1753 
1754 	fp = kmem_cache_alloc(file_slab, GFP_KERNEL);
1755 	if (fp) {
1756 		atomic_set(&fp->fi_ref, 1);
1757 		INIT_LIST_HEAD(&fp->fi_hash);
1758 		INIT_LIST_HEAD(&fp->fi_stateids);
1759 		INIT_LIST_HEAD(&fp->fi_delegations);
1760 		spin_lock(&recall_lock);
1761 		list_add(&fp->fi_hash, &file_hashtbl[hashval]);
1762 		spin_unlock(&recall_lock);
1763 		fp->fi_inode = igrab(ino);
1764 		fp->fi_id = current_fileid++;
1765 		fp->fi_had_conflict = false;
1766 		return fp;
1767 	}
1768 	return NULL;
1769 }
1770 
1771 static void
1772 nfsd4_free_slab(struct kmem_cache **slab)
1773 {
1774 	if (*slab == NULL)
1775 		return;
1776 	kmem_cache_destroy(*slab);
1777 	*slab = NULL;
1778 }
1779 
1780 void
1781 nfsd4_free_slabs(void)
1782 {
1783 	nfsd4_free_slab(&stateowner_slab);
1784 	nfsd4_free_slab(&file_slab);
1785 	nfsd4_free_slab(&stateid_slab);
1786 	nfsd4_free_slab(&deleg_slab);
1787 }
1788 
1789 static int
1790 nfsd4_init_slabs(void)
1791 {
1792 	stateowner_slab = kmem_cache_create("nfsd4_stateowners",
1793 			sizeof(struct nfs4_stateowner), 0, 0, NULL);
1794 	if (stateowner_slab == NULL)
1795 		goto out_nomem;
1796 	file_slab = kmem_cache_create("nfsd4_files",
1797 			sizeof(struct nfs4_file), 0, 0, NULL);
1798 	if (file_slab == NULL)
1799 		goto out_nomem;
1800 	stateid_slab = kmem_cache_create("nfsd4_stateids",
1801 			sizeof(struct nfs4_stateid), 0, 0, NULL);
1802 	if (stateid_slab == NULL)
1803 		goto out_nomem;
1804 	deleg_slab = kmem_cache_create("nfsd4_delegations",
1805 			sizeof(struct nfs4_delegation), 0, 0, NULL);
1806 	if (deleg_slab == NULL)
1807 		goto out_nomem;
1808 	return 0;
1809 out_nomem:
1810 	nfsd4_free_slabs();
1811 	dprintk("nfsd4: out of memory while initializing nfsv4\n");
1812 	return -ENOMEM;
1813 }
1814 
1815 void
1816 nfs4_free_stateowner(struct kref *kref)
1817 {
1818 	struct nfs4_stateowner *sop =
1819 		container_of(kref, struct nfs4_stateowner, so_ref);
1820 	kfree(sop->so_owner.data);
1821 	kmem_cache_free(stateowner_slab, sop);
1822 }
1823 
1824 static inline struct nfs4_stateowner *
1825 alloc_stateowner(struct xdr_netobj *owner)
1826 {
1827 	struct nfs4_stateowner *sop;
1828 
1829 	if ((sop = kmem_cache_alloc(stateowner_slab, GFP_KERNEL))) {
1830 		if ((sop->so_owner.data = kmalloc(owner->len, GFP_KERNEL))) {
1831 			memcpy(sop->so_owner.data, owner->data, owner->len);
1832 			sop->so_owner.len = owner->len;
1833 			kref_init(&sop->so_ref);
1834 			return sop;
1835 		}
1836 		kmem_cache_free(stateowner_slab, sop);
1837 	}
1838 	return NULL;
1839 }
1840 
1841 static struct nfs4_stateowner *
1842 alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfsd4_open *open) {
1843 	struct nfs4_stateowner *sop;
1844 	struct nfs4_replay *rp;
1845 	unsigned int idhashval;
1846 
1847 	if (!(sop = alloc_stateowner(&open->op_owner)))
1848 		return NULL;
1849 	idhashval = ownerid_hashval(current_ownerid);
1850 	INIT_LIST_HEAD(&sop->so_idhash);
1851 	INIT_LIST_HEAD(&sop->so_strhash);
1852 	INIT_LIST_HEAD(&sop->so_perclient);
1853 	INIT_LIST_HEAD(&sop->so_stateids);
1854 	INIT_LIST_HEAD(&sop->so_perstateid);  /* not used */
1855 	INIT_LIST_HEAD(&sop->so_close_lru);
1856 	sop->so_time = 0;
1857 	list_add(&sop->so_idhash, &ownerid_hashtbl[idhashval]);
1858 	list_add(&sop->so_strhash, &ownerstr_hashtbl[strhashval]);
1859 	list_add(&sop->so_perclient, &clp->cl_openowners);
1860 	sop->so_is_open_owner = 1;
1861 	sop->so_id = current_ownerid++;
1862 	sop->so_client = clp;
1863 	sop->so_seqid = open->op_seqid;
1864 	sop->so_confirmed = 0;
1865 	rp = &sop->so_replay;
1866 	rp->rp_status = nfserr_serverfault;
1867 	rp->rp_buflen = 0;
1868 	rp->rp_buf = rp->rp_ibuf;
1869 	return sop;
1870 }
1871 
1872 static inline void
1873 init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) {
1874 	struct nfs4_stateowner *sop = open->op_stateowner;
1875 	unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id);
1876 
1877 	INIT_LIST_HEAD(&stp->st_hash);
1878 	INIT_LIST_HEAD(&stp->st_perstateowner);
1879 	INIT_LIST_HEAD(&stp->st_lockowners);
1880 	INIT_LIST_HEAD(&stp->st_perfile);
1881 	list_add(&stp->st_hash, &stateid_hashtbl[hashval]);
1882 	list_add(&stp->st_perstateowner, &sop->so_stateids);
1883 	list_add(&stp->st_perfile, &fp->fi_stateids);
1884 	stp->st_stateowner = sop;
1885 	get_nfs4_file(fp);
1886 	stp->st_file = fp;
1887 	stp->st_stateid.si_boot = get_seconds();
1888 	stp->st_stateid.si_stateownerid = sop->so_id;
1889 	stp->st_stateid.si_fileid = fp->fi_id;
1890 	stp->st_stateid.si_generation = 0;
1891 	stp->st_access_bmap = 0;
1892 	stp->st_deny_bmap = 0;
1893 	__set_bit(open->op_share_access & ~NFS4_SHARE_WANT_MASK,
1894 		  &stp->st_access_bmap);
1895 	__set_bit(open->op_share_deny, &stp->st_deny_bmap);
1896 	stp->st_openstp = NULL;
1897 }
1898 
1899 static void
1900 move_to_close_lru(struct nfs4_stateowner *sop)
1901 {
1902 	dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop);
1903 
1904 	list_move_tail(&sop->so_close_lru, &close_lru);
1905 	sop->so_time = get_seconds();
1906 }
1907 
1908 static int
1909 same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner,
1910 							clientid_t *clid)
1911 {
1912 	return (sop->so_owner.len == owner->len) &&
1913 		0 == memcmp(sop->so_owner.data, owner->data, owner->len) &&
1914 		(sop->so_client->cl_clientid.cl_id == clid->cl_id);
1915 }
1916 
1917 static struct nfs4_stateowner *
1918 find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open)
1919 {
1920 	struct nfs4_stateowner *so = NULL;
1921 
1922 	list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) {
1923 		if (same_owner_str(so, &open->op_owner, &open->op_clientid))
1924 			return so;
1925 	}
1926 	return NULL;
1927 }
1928 
1929 /* search file_hashtbl[] for file */
1930 static struct nfs4_file *
1931 find_file(struct inode *ino)
1932 {
1933 	unsigned int hashval = file_hashval(ino);
1934 	struct nfs4_file *fp;
1935 
1936 	spin_lock(&recall_lock);
1937 	list_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) {
1938 		if (fp->fi_inode == ino) {
1939 			get_nfs4_file(fp);
1940 			spin_unlock(&recall_lock);
1941 			return fp;
1942 		}
1943 	}
1944 	spin_unlock(&recall_lock);
1945 	return NULL;
1946 }
1947 
1948 static inline int access_valid(u32 x, u32 minorversion)
1949 {
1950 	if ((x & NFS4_SHARE_ACCESS_MASK) < NFS4_SHARE_ACCESS_READ)
1951 		return 0;
1952 	if ((x & NFS4_SHARE_ACCESS_MASK) > NFS4_SHARE_ACCESS_BOTH)
1953 		return 0;
1954 	x &= ~NFS4_SHARE_ACCESS_MASK;
1955 	if (minorversion && x) {
1956 		if ((x & NFS4_SHARE_WANT_MASK) > NFS4_SHARE_WANT_CANCEL)
1957 			return 0;
1958 		if ((x & NFS4_SHARE_WHEN_MASK) > NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED)
1959 			return 0;
1960 		x &= ~(NFS4_SHARE_WANT_MASK | NFS4_SHARE_WHEN_MASK);
1961 	}
1962 	if (x)
1963 		return 0;
1964 	return 1;
1965 }
1966 
1967 static inline int deny_valid(u32 x)
1968 {
1969 	/* Note: unlike access bits, deny bits may be zero. */
1970 	return x <= NFS4_SHARE_DENY_BOTH;
1971 }
1972 
1973 /*
1974  * We store the NONE, READ, WRITE, and BOTH bits separately in the
1975  * st_{access,deny}_bmap field of the stateid, in order to track not
1976  * only what share bits are currently in force, but also what
1977  * combinations of share bits previous opens have used.  This allows us
1978  * to enforce the recommendation of rfc 3530 14.2.19 that the server
1979  * return an error if the client attempt to downgrade to a combination
1980  * of share bits not explicable by closing some of its previous opens.
1981  *
1982  * XXX: This enforcement is actually incomplete, since we don't keep
1983  * track of access/deny bit combinations; so, e.g., we allow:
1984  *
1985  *	OPEN allow read, deny write
1986  *	OPEN allow both, deny none
1987  *	DOWNGRADE allow read, deny none
1988  *
1989  * which we should reject.
1990  */
1991 static void
1992 set_access(unsigned int *access, unsigned long bmap) {
1993 	int i;
1994 
1995 	*access = 0;
1996 	for (i = 1; i < 4; i++) {
1997 		if (test_bit(i, &bmap))
1998 			*access |= i;
1999 	}
2000 }
2001 
2002 static void
2003 set_deny(unsigned int *deny, unsigned long bmap) {
2004 	int i;
2005 
2006 	*deny = 0;
2007 	for (i = 0; i < 4; i++) {
2008 		if (test_bit(i, &bmap))
2009 			*deny |= i ;
2010 	}
2011 }
2012 
2013 static int
2014 test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) {
2015 	unsigned int access, deny;
2016 
2017 	set_access(&access, stp->st_access_bmap);
2018 	set_deny(&deny, stp->st_deny_bmap);
2019 	if ((access & open->op_share_deny) || (deny & open->op_share_access))
2020 		return 0;
2021 	return 1;
2022 }
2023 
2024 /*
2025  * Called to check deny when READ with all zero stateid or
2026  * WRITE with all zero or all one stateid
2027  */
2028 static __be32
2029 nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
2030 {
2031 	struct inode *ino = current_fh->fh_dentry->d_inode;
2032 	struct nfs4_file *fp;
2033 	struct nfs4_stateid *stp;
2034 	__be32 ret;
2035 
2036 	dprintk("NFSD: nfs4_share_conflict\n");
2037 
2038 	fp = find_file(ino);
2039 	if (!fp)
2040 		return nfs_ok;
2041 	ret = nfserr_locked;
2042 	/* Search for conflicting share reservations */
2043 	list_for_each_entry(stp, &fp->fi_stateids, st_perfile) {
2044 		if (test_bit(deny_type, &stp->st_deny_bmap) ||
2045 		    test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap))
2046 			goto out;
2047 	}
2048 	ret = nfs_ok;
2049 out:
2050 	put_nfs4_file(fp);
2051 	return ret;
2052 }
2053 
2054 static inline void
2055 nfs4_file_downgrade(struct file *filp, unsigned int share_access)
2056 {
2057 	if (share_access & NFS4_SHARE_ACCESS_WRITE) {
2058 		drop_file_write_access(filp);
2059 		filp->f_mode = (filp->f_mode | FMODE_READ) & ~FMODE_WRITE;
2060 	}
2061 }
2062 
2063 /*
2064  * Spawn a thread to perform a recall on the delegation represented
2065  * by the lease (file_lock)
2066  *
2067  * Called from break_lease() with lock_kernel() held.
2068  * Note: we assume break_lease will only call this *once* for any given
2069  * lease.
2070  */
2071 static
2072 void nfsd_break_deleg_cb(struct file_lock *fl)
2073 {
2074 	struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
2075 
2076 	dprintk("NFSD nfsd_break_deleg_cb: dp %p fl %p\n",dp,fl);
2077 	if (!dp)
2078 		return;
2079 
2080 	/* We're assuming the state code never drops its reference
2081 	 * without first removing the lease.  Since we're in this lease
2082 	 * callback (and since the lease code is serialized by the kernel
2083 	 * lock) we know the server hasn't removed the lease yet, we know
2084 	 * it's safe to take a reference: */
2085 	atomic_inc(&dp->dl_count);
2086 	atomic_inc(&dp->dl_client->cl_count);
2087 
2088 	spin_lock(&recall_lock);
2089 	list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
2090 	spin_unlock(&recall_lock);
2091 
2092 	/* only place dl_time is set. protected by lock_kernel*/
2093 	dp->dl_time = get_seconds();
2094 
2095 	/*
2096 	 * We don't want the locks code to timeout the lease for us;
2097 	 * we'll remove it ourself if the delegation isn't returned
2098 	 * in time.
2099 	 */
2100 	fl->fl_break_time = 0;
2101 
2102 	dp->dl_file->fi_had_conflict = true;
2103 	nfsd4_cb_recall(dp);
2104 }
2105 
2106 /*
2107  * The file_lock is being reapd.
2108  *
2109  * Called by locks_free_lock() with lock_kernel() held.
2110  */
2111 static
2112 void nfsd_release_deleg_cb(struct file_lock *fl)
2113 {
2114 	struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
2115 
2116 	dprintk("NFSD nfsd_release_deleg_cb: fl %p dp %p dl_count %d\n", fl,dp, atomic_read(&dp->dl_count));
2117 
2118 	if (!(fl->fl_flags & FL_LEASE) || !dp)
2119 		return;
2120 	dp->dl_flock = NULL;
2121 }
2122 
2123 /*
2124  * Set the delegation file_lock back pointer.
2125  *
2126  * Called from setlease() with lock_kernel() held.
2127  */
2128 static
2129 void nfsd_copy_lock_deleg_cb(struct file_lock *new, struct file_lock *fl)
2130 {
2131 	struct nfs4_delegation *dp = (struct nfs4_delegation *)new->fl_owner;
2132 
2133 	dprintk("NFSD: nfsd_copy_lock_deleg_cb: new fl %p dp %p\n", new, dp);
2134 	if (!dp)
2135 		return;
2136 	dp->dl_flock = new;
2137 }
2138 
2139 /*
2140  * Called from setlease() with lock_kernel() held
2141  */
2142 static
2143 int nfsd_same_client_deleg_cb(struct file_lock *onlist, struct file_lock *try)
2144 {
2145 	struct nfs4_delegation *onlistd =
2146 		(struct nfs4_delegation *)onlist->fl_owner;
2147 	struct nfs4_delegation *tryd =
2148 		(struct nfs4_delegation *)try->fl_owner;
2149 
2150 	if (onlist->fl_lmops != try->fl_lmops)
2151 		return 0;
2152 
2153 	return onlistd->dl_client == tryd->dl_client;
2154 }
2155 
2156 
2157 static
2158 int nfsd_change_deleg_cb(struct file_lock **onlist, int arg)
2159 {
2160 	if (arg & F_UNLCK)
2161 		return lease_modify(onlist, arg);
2162 	else
2163 		return -EAGAIN;
2164 }
2165 
2166 static struct lock_manager_operations nfsd_lease_mng_ops = {
2167 	.fl_break = nfsd_break_deleg_cb,
2168 	.fl_release_private = nfsd_release_deleg_cb,
2169 	.fl_copy_lock = nfsd_copy_lock_deleg_cb,
2170 	.fl_mylease = nfsd_same_client_deleg_cb,
2171 	.fl_change = nfsd_change_deleg_cb,
2172 };
2173 
2174 
2175 __be32
2176 nfsd4_process_open1(struct nfsd4_compound_state *cstate,
2177 		    struct nfsd4_open *open)
2178 {
2179 	clientid_t *clientid = &open->op_clientid;
2180 	struct nfs4_client *clp = NULL;
2181 	unsigned int strhashval;
2182 	struct nfs4_stateowner *sop = NULL;
2183 
2184 	if (!check_name(open->op_owner))
2185 		return nfserr_inval;
2186 
2187 	if (STALE_CLIENTID(&open->op_clientid))
2188 		return nfserr_stale_clientid;
2189 
2190 	strhashval = ownerstr_hashval(clientid->cl_id, open->op_owner);
2191 	sop = find_openstateowner_str(strhashval, open);
2192 	open->op_stateowner = sop;
2193 	if (!sop) {
2194 		/* Make sure the client's lease hasn't expired. */
2195 		clp = find_confirmed_client(clientid);
2196 		if (clp == NULL)
2197 			return nfserr_expired;
2198 		goto renew;
2199 	}
2200 	/* When sessions are used, skip open sequenceid processing */
2201 	if (nfsd4_has_session(cstate))
2202 		goto renew;
2203 	if (!sop->so_confirmed) {
2204 		/* Replace unconfirmed owners without checking for replay. */
2205 		clp = sop->so_client;
2206 		release_openowner(sop);
2207 		open->op_stateowner = NULL;
2208 		goto renew;
2209 	}
2210 	if (open->op_seqid == sop->so_seqid - 1) {
2211 		if (sop->so_replay.rp_buflen)
2212 			return nfserr_replay_me;
2213 		/* The original OPEN failed so spectacularly
2214 		 * that we don't even have replay data saved!
2215 		 * Therefore, we have no choice but to continue
2216 		 * processing this OPEN; presumably, we'll
2217 		 * fail again for the same reason.
2218 		 */
2219 		dprintk("nfsd4_process_open1: replay with no replay cache\n");
2220 		goto renew;
2221 	}
2222 	if (open->op_seqid != sop->so_seqid)
2223 		return nfserr_bad_seqid;
2224 renew:
2225 	if (open->op_stateowner == NULL) {
2226 		sop = alloc_init_open_stateowner(strhashval, clp, open);
2227 		if (sop == NULL)
2228 			return nfserr_resource;
2229 		open->op_stateowner = sop;
2230 	}
2231 	list_del_init(&sop->so_close_lru);
2232 	renew_client(sop->so_client);
2233 	return nfs_ok;
2234 }
2235 
2236 static inline __be32
2237 nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
2238 {
2239 	if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
2240 		return nfserr_openmode;
2241 	else
2242 		return nfs_ok;
2243 }
2244 
2245 static struct nfs4_delegation *
2246 find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
2247 {
2248 	struct nfs4_delegation *dp;
2249 
2250 	list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) {
2251 		if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid)
2252 			return dp;
2253 	}
2254 	return NULL;
2255 }
2256 
2257 static __be32
2258 nfs4_check_deleg(struct nfs4_file *fp, struct nfsd4_open *open,
2259 		struct nfs4_delegation **dp)
2260 {
2261 	int flags;
2262 	__be32 status = nfserr_bad_stateid;
2263 
2264 	*dp = find_delegation_file(fp, &open->op_delegate_stateid);
2265 	if (*dp == NULL)
2266 		goto out;
2267 	flags = open->op_share_access == NFS4_SHARE_ACCESS_READ ?
2268 						RD_STATE : WR_STATE;
2269 	status = nfs4_check_delegmode(*dp, flags);
2270 	if (status)
2271 		*dp = NULL;
2272 out:
2273 	if (open->op_claim_type != NFS4_OPEN_CLAIM_DELEGATE_CUR)
2274 		return nfs_ok;
2275 	if (status)
2276 		return status;
2277 	open->op_stateowner->so_confirmed = 1;
2278 	return nfs_ok;
2279 }
2280 
2281 static __be32
2282 nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_stateid **stpp)
2283 {
2284 	struct nfs4_stateid *local;
2285 	__be32 status = nfserr_share_denied;
2286 	struct nfs4_stateowner *sop = open->op_stateowner;
2287 
2288 	list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
2289 		/* ignore lock owners */
2290 		if (local->st_stateowner->so_is_open_owner == 0)
2291 			continue;
2292 		/* remember if we have seen this open owner */
2293 		if (local->st_stateowner == sop)
2294 			*stpp = local;
2295 		/* check for conflicting share reservations */
2296 		if (!test_share(local, open))
2297 			goto out;
2298 	}
2299 	status = 0;
2300 out:
2301 	return status;
2302 }
2303 
2304 static inline struct nfs4_stateid *
2305 nfs4_alloc_stateid(void)
2306 {
2307 	return kmem_cache_alloc(stateid_slab, GFP_KERNEL);
2308 }
2309 
2310 static __be32
2311 nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp,
2312 		struct nfs4_delegation *dp,
2313 		struct svc_fh *cur_fh, int flags)
2314 {
2315 	struct nfs4_stateid *stp;
2316 
2317 	stp = nfs4_alloc_stateid();
2318 	if (stp == NULL)
2319 		return nfserr_resource;
2320 
2321 	if (dp) {
2322 		get_file(dp->dl_vfs_file);
2323 		stp->st_vfs_file = dp->dl_vfs_file;
2324 	} else {
2325 		__be32 status;
2326 		status = nfsd_open(rqstp, cur_fh, S_IFREG, flags,
2327 				&stp->st_vfs_file);
2328 		if (status) {
2329 			if (status == nfserr_dropit)
2330 				status = nfserr_jukebox;
2331 			kmem_cache_free(stateid_slab, stp);
2332 			return status;
2333 		}
2334 	}
2335 	*stpp = stp;
2336 	return 0;
2337 }
2338 
2339 static inline __be32
2340 nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
2341 		struct nfsd4_open *open)
2342 {
2343 	struct iattr iattr = {
2344 		.ia_valid = ATTR_SIZE,
2345 		.ia_size = 0,
2346 	};
2347 	if (!open->op_truncate)
2348 		return 0;
2349 	if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
2350 		return nfserr_inval;
2351 	return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0);
2352 }
2353 
2354 static __be32
2355 nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_stateid *stp, struct nfsd4_open *open)
2356 {
2357 	struct file *filp = stp->st_vfs_file;
2358 	struct inode *inode = filp->f_path.dentry->d_inode;
2359 	unsigned int share_access, new_writer;
2360 	__be32 status;
2361 
2362 	set_access(&share_access, stp->st_access_bmap);
2363 	new_writer = (~share_access) & open->op_share_access
2364 			& NFS4_SHARE_ACCESS_WRITE;
2365 
2366 	if (new_writer) {
2367 		int err = get_write_access(inode);
2368 		if (err)
2369 			return nfserrno(err);
2370 		err = mnt_want_write(cur_fh->fh_export->ex_path.mnt);
2371 		if (err)
2372 			return nfserrno(err);
2373 		file_take_write(filp);
2374 	}
2375 	status = nfsd4_truncate(rqstp, cur_fh, open);
2376 	if (status) {
2377 		if (new_writer)
2378 			put_write_access(inode);
2379 		return status;
2380 	}
2381 	/* remember the open */
2382 	filp->f_mode |= open->op_share_access;
2383 	__set_bit(open->op_share_access, &stp->st_access_bmap);
2384 	__set_bit(open->op_share_deny, &stp->st_deny_bmap);
2385 
2386 	return nfs_ok;
2387 }
2388 
2389 
2390 static void
2391 nfs4_set_claim_prev(struct nfsd4_open *open)
2392 {
2393 	open->op_stateowner->so_confirmed = 1;
2394 	open->op_stateowner->so_client->cl_firststate = 1;
2395 }
2396 
2397 /*
2398  * Attempt to hand out a delegation.
2399  */
2400 static void
2401 nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_stateid *stp)
2402 {
2403 	struct nfs4_delegation *dp;
2404 	struct nfs4_stateowner *sop = stp->st_stateowner;
2405 	struct nfs4_cb_conn *cb = &sop->so_client->cl_cb_conn;
2406 	struct file_lock fl, *flp = &fl;
2407 	int status, flag = 0;
2408 
2409 	flag = NFS4_OPEN_DELEGATE_NONE;
2410 	open->op_recall = 0;
2411 	switch (open->op_claim_type) {
2412 		case NFS4_OPEN_CLAIM_PREVIOUS:
2413 			if (!atomic_read(&cb->cb_set))
2414 				open->op_recall = 1;
2415 			flag = open->op_delegate_type;
2416 			if (flag == NFS4_OPEN_DELEGATE_NONE)
2417 				goto out;
2418 			break;
2419 		case NFS4_OPEN_CLAIM_NULL:
2420 			/* Let's not give out any delegations till everyone's
2421 			 * had the chance to reclaim theirs.... */
2422 			if (locks_in_grace())
2423 				goto out;
2424 			if (!atomic_read(&cb->cb_set) || !sop->so_confirmed)
2425 				goto out;
2426 			if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
2427 				flag = NFS4_OPEN_DELEGATE_WRITE;
2428 			else
2429 				flag = NFS4_OPEN_DELEGATE_READ;
2430 			break;
2431 		default:
2432 			goto out;
2433 	}
2434 
2435 	dp = alloc_init_deleg(sop->so_client, stp, fh, flag);
2436 	if (dp == NULL) {
2437 		flag = NFS4_OPEN_DELEGATE_NONE;
2438 		goto out;
2439 	}
2440 	locks_init_lock(&fl);
2441 	fl.fl_lmops = &nfsd_lease_mng_ops;
2442 	fl.fl_flags = FL_LEASE;
2443 	fl.fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
2444 	fl.fl_end = OFFSET_MAX;
2445 	fl.fl_owner =  (fl_owner_t)dp;
2446 	fl.fl_file = stp->st_vfs_file;
2447 	fl.fl_pid = current->tgid;
2448 
2449 	/* vfs_setlease checks to see if delegation should be handed out.
2450 	 * the lock_manager callbacks fl_mylease and fl_change are used
2451 	 */
2452 	if ((status = vfs_setlease(stp->st_vfs_file, fl.fl_type, &flp))) {
2453 		dprintk("NFSD: setlease failed [%d], no delegation\n", status);
2454 		unhash_delegation(dp);
2455 		flag = NFS4_OPEN_DELEGATE_NONE;
2456 		goto out;
2457 	}
2458 
2459 	memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid));
2460 
2461 	dprintk("NFSD: delegation stateid=(%08x/%08x/%08x/%08x)\n\n",
2462 	             dp->dl_stateid.si_boot,
2463 	             dp->dl_stateid.si_stateownerid,
2464 	             dp->dl_stateid.si_fileid,
2465 	             dp->dl_stateid.si_generation);
2466 out:
2467 	if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS
2468 			&& flag == NFS4_OPEN_DELEGATE_NONE
2469 			&& open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE)
2470 		dprintk("NFSD: WARNING: refusing delegation reclaim\n");
2471 	open->op_delegate_type = flag;
2472 }
2473 
2474 /*
2475  * called with nfs4_lock_state() held.
2476  */
2477 __be32
2478 nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
2479 {
2480 	struct nfsd4_compoundres *resp = rqstp->rq_resp;
2481 	struct nfs4_file *fp = NULL;
2482 	struct inode *ino = current_fh->fh_dentry->d_inode;
2483 	struct nfs4_stateid *stp = NULL;
2484 	struct nfs4_delegation *dp = NULL;
2485 	__be32 status;
2486 
2487 	status = nfserr_inval;
2488 	if (!access_valid(open->op_share_access, resp->cstate.minorversion)
2489 			|| !deny_valid(open->op_share_deny))
2490 		goto out;
2491 	/*
2492 	 * Lookup file; if found, lookup stateid and check open request,
2493 	 * and check for delegations in the process of being recalled.
2494 	 * If not found, create the nfs4_file struct
2495 	 */
2496 	fp = find_file(ino);
2497 	if (fp) {
2498 		if ((status = nfs4_check_open(fp, open, &stp)))
2499 			goto out;
2500 		status = nfs4_check_deleg(fp, open, &dp);
2501 		if (status)
2502 			goto out;
2503 	} else {
2504 		status = nfserr_bad_stateid;
2505 		if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR)
2506 			goto out;
2507 		status = nfserr_resource;
2508 		fp = alloc_init_file(ino);
2509 		if (fp == NULL)
2510 			goto out;
2511 	}
2512 
2513 	/*
2514 	 * OPEN the file, or upgrade an existing OPEN.
2515 	 * If truncate fails, the OPEN fails.
2516 	 */
2517 	if (stp) {
2518 		/* Stateid was found, this is an OPEN upgrade */
2519 		status = nfs4_upgrade_open(rqstp, current_fh, stp, open);
2520 		if (status)
2521 			goto out;
2522 		update_stateid(&stp->st_stateid);
2523 	} else {
2524 		/* Stateid was not found, this is a new OPEN */
2525 		int flags = 0;
2526 		if (open->op_share_access & NFS4_SHARE_ACCESS_READ)
2527 			flags |= NFSD_MAY_READ;
2528 		if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
2529 			flags |= NFSD_MAY_WRITE;
2530 		status = nfs4_new_open(rqstp, &stp, dp, current_fh, flags);
2531 		if (status)
2532 			goto out;
2533 		init_stateid(stp, fp, open);
2534 		status = nfsd4_truncate(rqstp, current_fh, open);
2535 		if (status) {
2536 			release_open_stateid(stp);
2537 			goto out;
2538 		}
2539 		if (nfsd4_has_session(&resp->cstate))
2540 			update_stateid(&stp->st_stateid);
2541 	}
2542 	memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t));
2543 
2544 	if (nfsd4_has_session(&resp->cstate))
2545 		open->op_stateowner->so_confirmed = 1;
2546 
2547 	/*
2548 	* Attempt to hand out a delegation. No error return, because the
2549 	* OPEN succeeds even if we fail.
2550 	*/
2551 	nfs4_open_delegation(current_fh, open, stp);
2552 
2553 	status = nfs_ok;
2554 
2555 	dprintk("nfs4_process_open2: stateid=(%08x/%08x/%08x/%08x)\n",
2556 	            stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid,
2557 	            stp->st_stateid.si_fileid, stp->st_stateid.si_generation);
2558 out:
2559 	if (fp)
2560 		put_nfs4_file(fp);
2561 	if (status == 0 && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
2562 		nfs4_set_claim_prev(open);
2563 	/*
2564 	* To finish the open response, we just need to set the rflags.
2565 	*/
2566 	open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX;
2567 	if (!open->op_stateowner->so_confirmed &&
2568 	    !nfsd4_has_session(&resp->cstate))
2569 		open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM;
2570 
2571 	return status;
2572 }
2573 
2574 __be32
2575 nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2576 	    clientid_t *clid)
2577 {
2578 	struct nfs4_client *clp;
2579 	__be32 status;
2580 
2581 	nfs4_lock_state();
2582 	dprintk("process_renew(%08x/%08x): starting\n",
2583 			clid->cl_boot, clid->cl_id);
2584 	status = nfserr_stale_clientid;
2585 	if (STALE_CLIENTID(clid))
2586 		goto out;
2587 	clp = find_confirmed_client(clid);
2588 	status = nfserr_expired;
2589 	if (clp == NULL) {
2590 		/* We assume the client took too long to RENEW. */
2591 		dprintk("nfsd4_renew: clientid not found!\n");
2592 		goto out;
2593 	}
2594 	renew_client(clp);
2595 	status = nfserr_cb_path_down;
2596 	if (!list_empty(&clp->cl_delegations)
2597 			&& !atomic_read(&clp->cl_cb_conn.cb_set))
2598 		goto out;
2599 	status = nfs_ok;
2600 out:
2601 	nfs4_unlock_state();
2602 	return status;
2603 }
2604 
2605 struct lock_manager nfsd4_manager = {
2606 };
2607 
2608 static void
2609 nfsd4_end_grace(void)
2610 {
2611 	dprintk("NFSD: end of grace period\n");
2612 	nfsd4_recdir_purge_old();
2613 	locks_end_grace(&nfsd4_manager);
2614 }
2615 
2616 static time_t
2617 nfs4_laundromat(void)
2618 {
2619 	struct nfs4_client *clp;
2620 	struct nfs4_stateowner *sop;
2621 	struct nfs4_delegation *dp;
2622 	struct list_head *pos, *next, reaplist;
2623 	time_t cutoff = get_seconds() - NFSD_LEASE_TIME;
2624 	time_t t, clientid_val = NFSD_LEASE_TIME;
2625 	time_t u, test_val = NFSD_LEASE_TIME;
2626 
2627 	nfs4_lock_state();
2628 
2629 	dprintk("NFSD: laundromat service - starting\n");
2630 	if (locks_in_grace())
2631 		nfsd4_end_grace();
2632 	list_for_each_safe(pos, next, &client_lru) {
2633 		clp = list_entry(pos, struct nfs4_client, cl_lru);
2634 		if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
2635 			t = clp->cl_time - cutoff;
2636 			if (clientid_val > t)
2637 				clientid_val = t;
2638 			break;
2639 		}
2640 		dprintk("NFSD: purging unused client (clientid %08x)\n",
2641 			clp->cl_clientid.cl_id);
2642 		nfsd4_remove_clid_dir(clp);
2643 		expire_client(clp);
2644 	}
2645 	INIT_LIST_HEAD(&reaplist);
2646 	spin_lock(&recall_lock);
2647 	list_for_each_safe(pos, next, &del_recall_lru) {
2648 		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
2649 		if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) {
2650 			u = dp->dl_time - cutoff;
2651 			if (test_val > u)
2652 				test_val = u;
2653 			break;
2654 		}
2655 		dprintk("NFSD: purging unused delegation dp %p, fp %p\n",
2656 			            dp, dp->dl_flock);
2657 		list_move(&dp->dl_recall_lru, &reaplist);
2658 	}
2659 	spin_unlock(&recall_lock);
2660 	list_for_each_safe(pos, next, &reaplist) {
2661 		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
2662 		list_del_init(&dp->dl_recall_lru);
2663 		unhash_delegation(dp);
2664 	}
2665 	test_val = NFSD_LEASE_TIME;
2666 	list_for_each_safe(pos, next, &close_lru) {
2667 		sop = list_entry(pos, struct nfs4_stateowner, so_close_lru);
2668 		if (time_after((unsigned long)sop->so_time, (unsigned long)cutoff)) {
2669 			u = sop->so_time - cutoff;
2670 			if (test_val > u)
2671 				test_val = u;
2672 			break;
2673 		}
2674 		dprintk("NFSD: purging unused open stateowner (so_id %d)\n",
2675 			sop->so_id);
2676 		release_openowner(sop);
2677 	}
2678 	if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT)
2679 		clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT;
2680 	nfs4_unlock_state();
2681 	return clientid_val;
2682 }
2683 
2684 static struct workqueue_struct *laundry_wq;
2685 static void laundromat_main(struct work_struct *);
2686 static DECLARE_DELAYED_WORK(laundromat_work, laundromat_main);
2687 
2688 static void
2689 laundromat_main(struct work_struct *not_used)
2690 {
2691 	time_t t;
2692 
2693 	t = nfs4_laundromat();
2694 	dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t);
2695 	queue_delayed_work(laundry_wq, &laundromat_work, t*HZ);
2696 }
2697 
2698 static struct nfs4_stateowner *
2699 search_close_lru(u32 st_id, int flags)
2700 {
2701 	struct nfs4_stateowner *local = NULL;
2702 
2703 	if (flags & CLOSE_STATE) {
2704 		list_for_each_entry(local, &close_lru, so_close_lru) {
2705 			if (local->so_id == st_id)
2706 				return local;
2707 		}
2708 	}
2709 	return NULL;
2710 }
2711 
2712 static inline int
2713 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stateid *stp)
2714 {
2715 	return fhp->fh_dentry->d_inode != stp->st_vfs_file->f_path.dentry->d_inode;
2716 }
2717 
2718 static int
2719 STALE_STATEID(stateid_t *stateid)
2720 {
2721 	if (time_after((unsigned long)boot_time,
2722 			(unsigned long)stateid->si_boot)) {
2723 		dprintk("NFSD: stale stateid (%08x/%08x/%08x/%08x)!\n",
2724 			stateid->si_boot, stateid->si_stateownerid,
2725 			stateid->si_fileid, stateid->si_generation);
2726 		return 1;
2727 	}
2728 	return 0;
2729 }
2730 
2731 static int
2732 EXPIRED_STATEID(stateid_t *stateid)
2733 {
2734 	if (time_before((unsigned long)boot_time,
2735 			((unsigned long)stateid->si_boot)) &&
2736 	    time_before((unsigned long)(stateid->si_boot + lease_time), get_seconds())) {
2737 		dprintk("NFSD: expired stateid (%08x/%08x/%08x/%08x)!\n",
2738 			stateid->si_boot, stateid->si_stateownerid,
2739 			stateid->si_fileid, stateid->si_generation);
2740 		return 1;
2741 	}
2742 	return 0;
2743 }
2744 
2745 static __be32
2746 stateid_error_map(stateid_t *stateid)
2747 {
2748 	if (STALE_STATEID(stateid))
2749 		return nfserr_stale_stateid;
2750 	if (EXPIRED_STATEID(stateid))
2751 		return nfserr_expired;
2752 
2753 	dprintk("NFSD: bad stateid (%08x/%08x/%08x/%08x)!\n",
2754 		stateid->si_boot, stateid->si_stateownerid,
2755 		stateid->si_fileid, stateid->si_generation);
2756 	return nfserr_bad_stateid;
2757 }
2758 
2759 static inline int
2760 access_permit_read(unsigned long access_bmap)
2761 {
2762 	return test_bit(NFS4_SHARE_ACCESS_READ, &access_bmap) ||
2763 		test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap) ||
2764 		test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap);
2765 }
2766 
2767 static inline int
2768 access_permit_write(unsigned long access_bmap)
2769 {
2770 	return test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap) ||
2771 		test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap);
2772 }
2773 
2774 static
2775 __be32 nfs4_check_openmode(struct nfs4_stateid *stp, int flags)
2776 {
2777         __be32 status = nfserr_openmode;
2778 
2779 	if ((flags & WR_STATE) && (!access_permit_write(stp->st_access_bmap)))
2780                 goto out;
2781 	if ((flags & RD_STATE) && (!access_permit_read(stp->st_access_bmap)))
2782                 goto out;
2783 	status = nfs_ok;
2784 out:
2785 	return status;
2786 }
2787 
2788 static inline __be32
2789 check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags)
2790 {
2791 	if (ONE_STATEID(stateid) && (flags & RD_STATE))
2792 		return nfs_ok;
2793 	else if (locks_in_grace()) {
2794 		/* Answer in remaining cases depends on existance of
2795 		 * conflicting state; so we must wait out the grace period. */
2796 		return nfserr_grace;
2797 	} else if (flags & WR_STATE)
2798 		return nfs4_share_conflict(current_fh,
2799 				NFS4_SHARE_DENY_WRITE);
2800 	else /* (flags & RD_STATE) && ZERO_STATEID(stateid) */
2801 		return nfs4_share_conflict(current_fh,
2802 				NFS4_SHARE_DENY_READ);
2803 }
2804 
2805 /*
2806  * Allow READ/WRITE during grace period on recovered state only for files
2807  * that are not able to provide mandatory locking.
2808  */
2809 static inline int
2810 grace_disallows_io(struct inode *inode)
2811 {
2812 	return locks_in_grace() && mandatory_lock(inode);
2813 }
2814 
2815 static int check_stateid_generation(stateid_t *in, stateid_t *ref, int flags)
2816 {
2817 	/*
2818 	 * When sessions are used the stateid generation number is ignored
2819 	 * when it is zero.
2820 	 */
2821 	if ((flags & HAS_SESSION) && in->si_generation == 0)
2822 		goto out;
2823 
2824 	/* If the client sends us a stateid from the future, it's buggy: */
2825 	if (in->si_generation > ref->si_generation)
2826 		return nfserr_bad_stateid;
2827 	/*
2828 	 * The following, however, can happen.  For example, if the
2829 	 * client sends an open and some IO at the same time, the open
2830 	 * may bump si_generation while the IO is still in flight.
2831 	 * Thanks to hard links and renames, the client never knows what
2832 	 * file an open will affect.  So it could avoid that situation
2833 	 * only by serializing all opens and IO from the same open
2834 	 * owner.  To recover from the old_stateid error, the client
2835 	 * will just have to retry the IO:
2836 	 */
2837 	if (in->si_generation < ref->si_generation)
2838 		return nfserr_old_stateid;
2839 out:
2840 	return nfs_ok;
2841 }
2842 
2843 static int is_delegation_stateid(stateid_t *stateid)
2844 {
2845 	return stateid->si_fileid == 0;
2846 }
2847 
2848 /*
2849 * Checks for stateid operations
2850 */
2851 __be32
2852 nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
2853 			   stateid_t *stateid, int flags, struct file **filpp)
2854 {
2855 	struct nfs4_stateid *stp = NULL;
2856 	struct nfs4_delegation *dp = NULL;
2857 	struct svc_fh *current_fh = &cstate->current_fh;
2858 	struct inode *ino = current_fh->fh_dentry->d_inode;
2859 	__be32 status;
2860 
2861 	if (filpp)
2862 		*filpp = NULL;
2863 
2864 	if (grace_disallows_io(ino))
2865 		return nfserr_grace;
2866 
2867 	if (nfsd4_has_session(cstate))
2868 		flags |= HAS_SESSION;
2869 
2870 	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
2871 		return check_special_stateids(current_fh, stateid, flags);
2872 
2873 	status = nfserr_stale_stateid;
2874 	if (STALE_STATEID(stateid))
2875 		goto out;
2876 
2877 	status = nfserr_bad_stateid;
2878 	if (is_delegation_stateid(stateid)) {
2879 		dp = find_delegation_stateid(ino, stateid);
2880 		if (!dp) {
2881 			status = stateid_error_map(stateid);
2882 			goto out;
2883 		}
2884 		status = check_stateid_generation(stateid, &dp->dl_stateid,
2885 						  flags);
2886 		if (status)
2887 			goto out;
2888 		status = nfs4_check_delegmode(dp, flags);
2889 		if (status)
2890 			goto out;
2891 		renew_client(dp->dl_client);
2892 		if (filpp)
2893 			*filpp = dp->dl_vfs_file;
2894 	} else { /* open or lock stateid */
2895 		stp = find_stateid(stateid, flags);
2896 		if (!stp) {
2897 			status = stateid_error_map(stateid);
2898 			goto out;
2899 		}
2900 		if (nfs4_check_fh(current_fh, stp))
2901 			goto out;
2902 		if (!stp->st_stateowner->so_confirmed)
2903 			goto out;
2904 		status = check_stateid_generation(stateid, &stp->st_stateid,
2905 						  flags);
2906 		if (status)
2907 			goto out;
2908 		status = nfs4_check_openmode(stp, flags);
2909 		if (status)
2910 			goto out;
2911 		renew_client(stp->st_stateowner->so_client);
2912 		if (filpp)
2913 			*filpp = stp->st_vfs_file;
2914 	}
2915 	status = nfs_ok;
2916 out:
2917 	return status;
2918 }
2919 
2920 static inline int
2921 setlkflg (int type)
2922 {
2923 	return (type == NFS4_READW_LT || type == NFS4_READ_LT) ?
2924 		RD_STATE : WR_STATE;
2925 }
2926 
2927 /*
2928  * Checks for sequence id mutating operations.
2929  */
2930 static __be32
2931 nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
2932 			 stateid_t *stateid, int flags,
2933 			 struct nfs4_stateowner **sopp,
2934 			 struct nfs4_stateid **stpp, struct nfsd4_lock *lock)
2935 {
2936 	struct nfs4_stateid *stp;
2937 	struct nfs4_stateowner *sop;
2938 	struct svc_fh *current_fh = &cstate->current_fh;
2939 	__be32 status;
2940 
2941 	dprintk("NFSD: preprocess_seqid_op: seqid=%d "
2942 			"stateid = (%08x/%08x/%08x/%08x)\n", seqid,
2943 		stateid->si_boot, stateid->si_stateownerid, stateid->si_fileid,
2944 		stateid->si_generation);
2945 
2946 	*stpp = NULL;
2947 	*sopp = NULL;
2948 
2949 	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
2950 		dprintk("NFSD: preprocess_seqid_op: magic stateid!\n");
2951 		return nfserr_bad_stateid;
2952 	}
2953 
2954 	if (STALE_STATEID(stateid))
2955 		return nfserr_stale_stateid;
2956 
2957 	if (nfsd4_has_session(cstate))
2958 		flags |= HAS_SESSION;
2959 
2960 	/*
2961 	* We return BAD_STATEID if filehandle doesn't match stateid,
2962 	* the confirmed flag is incorrecly set, or the generation
2963 	* number is incorrect.
2964 	*/
2965 	stp = find_stateid(stateid, flags);
2966 	if (stp == NULL) {
2967 		/*
2968 		 * Also, we should make sure this isn't just the result of
2969 		 * a replayed close:
2970 		 */
2971 		sop = search_close_lru(stateid->si_stateownerid, flags);
2972 		if (sop == NULL)
2973 			return stateid_error_map(stateid);
2974 		*sopp = sop;
2975 		goto check_replay;
2976 	}
2977 
2978 	*stpp = stp;
2979 	*sopp = sop = stp->st_stateowner;
2980 
2981 	if (lock) {
2982 		clientid_t *lockclid = &lock->v.new.clientid;
2983 		struct nfs4_client *clp = sop->so_client;
2984 		int lkflg = 0;
2985 		__be32 status;
2986 
2987 		lkflg = setlkflg(lock->lk_type);
2988 
2989 		if (lock->lk_is_new) {
2990 			if (!sop->so_is_open_owner)
2991 				return nfserr_bad_stateid;
2992 			if (!(flags & HAS_SESSION) &&
2993 			    !same_clid(&clp->cl_clientid, lockclid))
2994 				return nfserr_bad_stateid;
2995 			/* stp is the open stateid */
2996 			status = nfs4_check_openmode(stp, lkflg);
2997 			if (status)
2998 				return status;
2999 		} else {
3000 			/* stp is the lock stateid */
3001 			status = nfs4_check_openmode(stp->st_openstp, lkflg);
3002 			if (status)
3003 				return status;
3004                }
3005 	}
3006 
3007 	if (nfs4_check_fh(current_fh, stp)) {
3008 		dprintk("NFSD: preprocess_seqid_op: fh-stateid mismatch!\n");
3009 		return nfserr_bad_stateid;
3010 	}
3011 
3012 	/*
3013 	*  We now validate the seqid and stateid generation numbers.
3014 	*  For the moment, we ignore the possibility of
3015 	*  generation number wraparound.
3016 	*/
3017 	if (!(flags & HAS_SESSION) && seqid != sop->so_seqid)
3018 		goto check_replay;
3019 
3020 	if (sop->so_confirmed && flags & CONFIRM) {
3021 		dprintk("NFSD: preprocess_seqid_op: expected"
3022 				" unconfirmed stateowner!\n");
3023 		return nfserr_bad_stateid;
3024 	}
3025 	if (!sop->so_confirmed && !(flags & CONFIRM)) {
3026 		dprintk("NFSD: preprocess_seqid_op: stateowner not"
3027 				" confirmed yet!\n");
3028 		return nfserr_bad_stateid;
3029 	}
3030 	status = check_stateid_generation(stateid, &stp->st_stateid, flags);
3031 	if (status)
3032 		return status;
3033 	renew_client(sop->so_client);
3034 	return nfs_ok;
3035 
3036 check_replay:
3037 	if (seqid == sop->so_seqid - 1) {
3038 		dprintk("NFSD: preprocess_seqid_op: retransmission?\n");
3039 		/* indicate replay to calling function */
3040 		return nfserr_replay_me;
3041 	}
3042 	dprintk("NFSD: preprocess_seqid_op: bad seqid (expected %d, got %d)\n",
3043 			sop->so_seqid, seqid);
3044 	*sopp = NULL;
3045 	return nfserr_bad_seqid;
3046 }
3047 
3048 __be32
3049 nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3050 		   struct nfsd4_open_confirm *oc)
3051 {
3052 	__be32 status;
3053 	struct nfs4_stateowner *sop;
3054 	struct nfs4_stateid *stp;
3055 
3056 	dprintk("NFSD: nfsd4_open_confirm on file %.*s\n",
3057 			(int)cstate->current_fh.fh_dentry->d_name.len,
3058 			cstate->current_fh.fh_dentry->d_name.name);
3059 
3060 	status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0);
3061 	if (status)
3062 		return status;
3063 
3064 	nfs4_lock_state();
3065 
3066 	if ((status = nfs4_preprocess_seqid_op(cstate,
3067 					oc->oc_seqid, &oc->oc_req_stateid,
3068 					CONFIRM | OPEN_STATE,
3069 					&oc->oc_stateowner, &stp, NULL)))
3070 		goto out;
3071 
3072 	sop = oc->oc_stateowner;
3073 	sop->so_confirmed = 1;
3074 	update_stateid(&stp->st_stateid);
3075 	memcpy(&oc->oc_resp_stateid, &stp->st_stateid, sizeof(stateid_t));
3076 	dprintk("NFSD: nfsd4_open_confirm: success, seqid=%d "
3077 		"stateid=(%08x/%08x/%08x/%08x)\n", oc->oc_seqid,
3078 		         stp->st_stateid.si_boot,
3079 		         stp->st_stateid.si_stateownerid,
3080 		         stp->st_stateid.si_fileid,
3081 		         stp->st_stateid.si_generation);
3082 
3083 	nfsd4_create_clid_dir(sop->so_client);
3084 out:
3085 	if (oc->oc_stateowner) {
3086 		nfs4_get_stateowner(oc->oc_stateowner);
3087 		cstate->replay_owner = oc->oc_stateowner;
3088 	}
3089 	nfs4_unlock_state();
3090 	return status;
3091 }
3092 
3093 
3094 /*
3095  * unset all bits in union bitmap (bmap) that
3096  * do not exist in share (from successful OPEN_DOWNGRADE)
3097  */
3098 static void
3099 reset_union_bmap_access(unsigned long access, unsigned long *bmap)
3100 {
3101 	int i;
3102 	for (i = 1; i < 4; i++) {
3103 		if ((i & access) != i)
3104 			__clear_bit(i, bmap);
3105 	}
3106 }
3107 
3108 static void
3109 reset_union_bmap_deny(unsigned long deny, unsigned long *bmap)
3110 {
3111 	int i;
3112 	for (i = 0; i < 4; i++) {
3113 		if ((i & deny) != i)
3114 			__clear_bit(i, bmap);
3115 	}
3116 }
3117 
3118 __be32
3119 nfsd4_open_downgrade(struct svc_rqst *rqstp,
3120 		     struct nfsd4_compound_state *cstate,
3121 		     struct nfsd4_open_downgrade *od)
3122 {
3123 	__be32 status;
3124 	struct nfs4_stateid *stp;
3125 	unsigned int share_access;
3126 
3127 	dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n",
3128 			(int)cstate->current_fh.fh_dentry->d_name.len,
3129 			cstate->current_fh.fh_dentry->d_name.name);
3130 
3131 	if (!access_valid(od->od_share_access, cstate->minorversion)
3132 			|| !deny_valid(od->od_share_deny))
3133 		return nfserr_inval;
3134 
3135 	nfs4_lock_state();
3136 	if ((status = nfs4_preprocess_seqid_op(cstate,
3137 					od->od_seqid,
3138 					&od->od_stateid,
3139 					OPEN_STATE,
3140 					&od->od_stateowner, &stp, NULL)))
3141 		goto out;
3142 
3143 	status = nfserr_inval;
3144 	if (!test_bit(od->od_share_access, &stp->st_access_bmap)) {
3145 		dprintk("NFSD:access not a subset current bitmap: 0x%lx, input access=%08x\n",
3146 			stp->st_access_bmap, od->od_share_access);
3147 		goto out;
3148 	}
3149 	if (!test_bit(od->od_share_deny, &stp->st_deny_bmap)) {
3150 		dprintk("NFSD:deny not a subset current bitmap: 0x%lx, input deny=%08x\n",
3151 			stp->st_deny_bmap, od->od_share_deny);
3152 		goto out;
3153 	}
3154 	set_access(&share_access, stp->st_access_bmap);
3155 	nfs4_file_downgrade(stp->st_vfs_file,
3156 	                    share_access & ~od->od_share_access);
3157 
3158 	reset_union_bmap_access(od->od_share_access, &stp->st_access_bmap);
3159 	reset_union_bmap_deny(od->od_share_deny, &stp->st_deny_bmap);
3160 
3161 	update_stateid(&stp->st_stateid);
3162 	memcpy(&od->od_stateid, &stp->st_stateid, sizeof(stateid_t));
3163 	status = nfs_ok;
3164 out:
3165 	if (od->od_stateowner) {
3166 		nfs4_get_stateowner(od->od_stateowner);
3167 		cstate->replay_owner = od->od_stateowner;
3168 	}
3169 	nfs4_unlock_state();
3170 	return status;
3171 }
3172 
3173 /*
3174  * nfs4_unlock_state() called after encode
3175  */
3176 __be32
3177 nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3178 	    struct nfsd4_close *close)
3179 {
3180 	__be32 status;
3181 	struct nfs4_stateid *stp;
3182 
3183 	dprintk("NFSD: nfsd4_close on file %.*s\n",
3184 			(int)cstate->current_fh.fh_dentry->d_name.len,
3185 			cstate->current_fh.fh_dentry->d_name.name);
3186 
3187 	nfs4_lock_state();
3188 	/* check close_lru for replay */
3189 	if ((status = nfs4_preprocess_seqid_op(cstate,
3190 					close->cl_seqid,
3191 					&close->cl_stateid,
3192 					OPEN_STATE | CLOSE_STATE,
3193 					&close->cl_stateowner, &stp, NULL)))
3194 		goto out;
3195 	status = nfs_ok;
3196 	update_stateid(&stp->st_stateid);
3197 	memcpy(&close->cl_stateid, &stp->st_stateid, sizeof(stateid_t));
3198 
3199 	/* release_stateid() calls nfsd_close() if needed */
3200 	release_open_stateid(stp);
3201 
3202 	/* place unused nfs4_stateowners on so_close_lru list to be
3203 	 * released by the laundromat service after the lease period
3204 	 * to enable us to handle CLOSE replay
3205 	 */
3206 	if (list_empty(&close->cl_stateowner->so_stateids))
3207 		move_to_close_lru(close->cl_stateowner);
3208 out:
3209 	if (close->cl_stateowner) {
3210 		nfs4_get_stateowner(close->cl_stateowner);
3211 		cstate->replay_owner = close->cl_stateowner;
3212 	}
3213 	nfs4_unlock_state();
3214 	return status;
3215 }
3216 
3217 __be32
3218 nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3219 		  struct nfsd4_delegreturn *dr)
3220 {
3221 	struct nfs4_delegation *dp;
3222 	stateid_t *stateid = &dr->dr_stateid;
3223 	struct inode *inode;
3224 	__be32 status;
3225 	int flags = 0;
3226 
3227 	if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
3228 		return status;
3229 	inode = cstate->current_fh.fh_dentry->d_inode;
3230 
3231 	if (nfsd4_has_session(cstate))
3232 		flags |= HAS_SESSION;
3233 	nfs4_lock_state();
3234 	status = nfserr_bad_stateid;
3235 	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
3236 		goto out;
3237 	status = nfserr_stale_stateid;
3238 	if (STALE_STATEID(stateid))
3239 		goto out;
3240 	status = nfserr_bad_stateid;
3241 	if (!is_delegation_stateid(stateid))
3242 		goto out;
3243 	dp = find_delegation_stateid(inode, stateid);
3244 	if (!dp) {
3245 		status = stateid_error_map(stateid);
3246 		goto out;
3247 	}
3248 	status = check_stateid_generation(stateid, &dp->dl_stateid, flags);
3249 	if (status)
3250 		goto out;
3251 	renew_client(dp->dl_client);
3252 
3253 	unhash_delegation(dp);
3254 out:
3255 	nfs4_unlock_state();
3256 
3257 	return status;
3258 }
3259 
3260 
3261 /*
3262  * Lock owner state (byte-range locks)
3263  */
3264 #define LOFF_OVERFLOW(start, len)      ((u64)(len) > ~(u64)(start))
3265 #define LOCK_HASH_BITS              8
3266 #define LOCK_HASH_SIZE             (1 << LOCK_HASH_BITS)
3267 #define LOCK_HASH_MASK             (LOCK_HASH_SIZE - 1)
3268 
3269 static inline u64
3270 end_offset(u64 start, u64 len)
3271 {
3272 	u64 end;
3273 
3274 	end = start + len;
3275 	return end >= start ? end: NFS4_MAX_UINT64;
3276 }
3277 
3278 /* last octet in a range */
3279 static inline u64
3280 last_byte_offset(u64 start, u64 len)
3281 {
3282 	u64 end;
3283 
3284 	BUG_ON(!len);
3285 	end = start + len;
3286 	return end > start ? end - 1: NFS4_MAX_UINT64;
3287 }
3288 
3289 #define lockownerid_hashval(id) \
3290         ((id) & LOCK_HASH_MASK)
3291 
3292 static inline unsigned int
3293 lock_ownerstr_hashval(struct inode *inode, u32 cl_id,
3294 		struct xdr_netobj *ownername)
3295 {
3296 	return (file_hashval(inode) + cl_id
3297 			+ opaque_hashval(ownername->data, ownername->len))
3298 		& LOCK_HASH_MASK;
3299 }
3300 
3301 static struct list_head lock_ownerid_hashtbl[LOCK_HASH_SIZE];
3302 static struct list_head	lock_ownerstr_hashtbl[LOCK_HASH_SIZE];
3303 static struct list_head lockstateid_hashtbl[STATEID_HASH_SIZE];
3304 
3305 static struct nfs4_stateid *
3306 find_stateid(stateid_t *stid, int flags)
3307 {
3308 	struct nfs4_stateid *local;
3309 	u32 st_id = stid->si_stateownerid;
3310 	u32 f_id = stid->si_fileid;
3311 	unsigned int hashval;
3312 
3313 	dprintk("NFSD: find_stateid flags 0x%x\n",flags);
3314 	if (flags & (LOCK_STATE | RD_STATE | WR_STATE)) {
3315 		hashval = stateid_hashval(st_id, f_id);
3316 		list_for_each_entry(local, &lockstateid_hashtbl[hashval], st_hash) {
3317 			if ((local->st_stateid.si_stateownerid == st_id) &&
3318 			    (local->st_stateid.si_fileid == f_id))
3319 				return local;
3320 		}
3321 	}
3322 
3323 	if (flags & (OPEN_STATE | RD_STATE | WR_STATE)) {
3324 		hashval = stateid_hashval(st_id, f_id);
3325 		list_for_each_entry(local, &stateid_hashtbl[hashval], st_hash) {
3326 			if ((local->st_stateid.si_stateownerid == st_id) &&
3327 			    (local->st_stateid.si_fileid == f_id))
3328 				return local;
3329 		}
3330 	}
3331 	return NULL;
3332 }
3333 
3334 static struct nfs4_delegation *
3335 find_delegation_stateid(struct inode *ino, stateid_t *stid)
3336 {
3337 	struct nfs4_file *fp;
3338 	struct nfs4_delegation *dl;
3339 
3340 	dprintk("NFSD:find_delegation_stateid stateid=(%08x/%08x/%08x/%08x)\n",
3341                     stid->si_boot, stid->si_stateownerid,
3342                     stid->si_fileid, stid->si_generation);
3343 
3344 	fp = find_file(ino);
3345 	if (!fp)
3346 		return NULL;
3347 	dl = find_delegation_file(fp, stid);
3348 	put_nfs4_file(fp);
3349 	return dl;
3350 }
3351 
3352 /*
3353  * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that
3354  * we can't properly handle lock requests that go beyond the (2^63 - 1)-th
3355  * byte, because of sign extension problems.  Since NFSv4 calls for 64-bit
3356  * locking, this prevents us from being completely protocol-compliant.  The
3357  * real solution to this problem is to start using unsigned file offsets in
3358  * the VFS, but this is a very deep change!
3359  */
3360 static inline void
3361 nfs4_transform_lock_offset(struct file_lock *lock)
3362 {
3363 	if (lock->fl_start < 0)
3364 		lock->fl_start = OFFSET_MAX;
3365 	if (lock->fl_end < 0)
3366 		lock->fl_end = OFFSET_MAX;
3367 }
3368 
3369 /* Hack!: For now, we're defining this just so we can use a pointer to it
3370  * as a unique cookie to identify our (NFSv4's) posix locks. */
3371 static struct lock_manager_operations nfsd_posix_mng_ops  = {
3372 };
3373 
3374 static inline void
3375 nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
3376 {
3377 	struct nfs4_stateowner *sop;
3378 	unsigned int hval;
3379 
3380 	if (fl->fl_lmops == &nfsd_posix_mng_ops) {
3381 		sop = (struct nfs4_stateowner *) fl->fl_owner;
3382 		hval = lockownerid_hashval(sop->so_id);
3383 		kref_get(&sop->so_ref);
3384 		deny->ld_sop = sop;
3385 		deny->ld_clientid = sop->so_client->cl_clientid;
3386 	} else {
3387 		deny->ld_sop = NULL;
3388 		deny->ld_clientid.cl_boot = 0;
3389 		deny->ld_clientid.cl_id = 0;
3390 	}
3391 	deny->ld_start = fl->fl_start;
3392 	deny->ld_length = NFS4_MAX_UINT64;
3393 	if (fl->fl_end != NFS4_MAX_UINT64)
3394 		deny->ld_length = fl->fl_end - fl->fl_start + 1;
3395 	deny->ld_type = NFS4_READ_LT;
3396 	if (fl->fl_type != F_RDLCK)
3397 		deny->ld_type = NFS4_WRITE_LT;
3398 }
3399 
3400 static struct nfs4_stateowner *
3401 find_lockstateowner_str(struct inode *inode, clientid_t *clid,
3402 		struct xdr_netobj *owner)
3403 {
3404 	unsigned int hashval = lock_ownerstr_hashval(inode, clid->cl_id, owner);
3405 	struct nfs4_stateowner *op;
3406 
3407 	list_for_each_entry(op, &lock_ownerstr_hashtbl[hashval], so_strhash) {
3408 		if (same_owner_str(op, owner, clid))
3409 			return op;
3410 	}
3411 	return NULL;
3412 }
3413 
3414 /*
3415  * Alloc a lock owner structure.
3416  * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has
3417  * occured.
3418  *
3419  * strhashval = lock_ownerstr_hashval
3420  */
3421 
3422 static struct nfs4_stateowner *
3423 alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfs4_stateid *open_stp, struct nfsd4_lock *lock) {
3424 	struct nfs4_stateowner *sop;
3425 	struct nfs4_replay *rp;
3426 	unsigned int idhashval;
3427 
3428 	if (!(sop = alloc_stateowner(&lock->lk_new_owner)))
3429 		return NULL;
3430 	idhashval = lockownerid_hashval(current_ownerid);
3431 	INIT_LIST_HEAD(&sop->so_idhash);
3432 	INIT_LIST_HEAD(&sop->so_strhash);
3433 	INIT_LIST_HEAD(&sop->so_perclient);
3434 	INIT_LIST_HEAD(&sop->so_stateids);
3435 	INIT_LIST_HEAD(&sop->so_perstateid);
3436 	INIT_LIST_HEAD(&sop->so_close_lru); /* not used */
3437 	sop->so_time = 0;
3438 	list_add(&sop->so_idhash, &lock_ownerid_hashtbl[idhashval]);
3439 	list_add(&sop->so_strhash, &lock_ownerstr_hashtbl[strhashval]);
3440 	list_add(&sop->so_perstateid, &open_stp->st_lockowners);
3441 	sop->so_is_open_owner = 0;
3442 	sop->so_id = current_ownerid++;
3443 	sop->so_client = clp;
3444 	/* It is the openowner seqid that will be incremented in encode in the
3445 	 * case of new lockowners; so increment the lock seqid manually: */
3446 	sop->so_seqid = lock->lk_new_lock_seqid + 1;
3447 	sop->so_confirmed = 1;
3448 	rp = &sop->so_replay;
3449 	rp->rp_status = nfserr_serverfault;
3450 	rp->rp_buflen = 0;
3451 	rp->rp_buf = rp->rp_ibuf;
3452 	return sop;
3453 }
3454 
3455 static struct nfs4_stateid *
3456 alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struct nfs4_stateid *open_stp)
3457 {
3458 	struct nfs4_stateid *stp;
3459 	unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id);
3460 
3461 	stp = nfs4_alloc_stateid();
3462 	if (stp == NULL)
3463 		goto out;
3464 	INIT_LIST_HEAD(&stp->st_hash);
3465 	INIT_LIST_HEAD(&stp->st_perfile);
3466 	INIT_LIST_HEAD(&stp->st_perstateowner);
3467 	INIT_LIST_HEAD(&stp->st_lockowners); /* not used */
3468 	list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]);
3469 	list_add(&stp->st_perfile, &fp->fi_stateids);
3470 	list_add(&stp->st_perstateowner, &sop->so_stateids);
3471 	stp->st_stateowner = sop;
3472 	get_nfs4_file(fp);
3473 	stp->st_file = fp;
3474 	stp->st_stateid.si_boot = get_seconds();
3475 	stp->st_stateid.si_stateownerid = sop->so_id;
3476 	stp->st_stateid.si_fileid = fp->fi_id;
3477 	stp->st_stateid.si_generation = 0;
3478 	stp->st_vfs_file = open_stp->st_vfs_file; /* FIXME refcount?? */
3479 	stp->st_access_bmap = open_stp->st_access_bmap;
3480 	stp->st_deny_bmap = open_stp->st_deny_bmap;
3481 	stp->st_openstp = open_stp;
3482 
3483 out:
3484 	return stp;
3485 }
3486 
3487 static int
3488 check_lock_length(u64 offset, u64 length)
3489 {
3490 	return ((length == 0)  || ((length != NFS4_MAX_UINT64) &&
3491 	     LOFF_OVERFLOW(offset, length)));
3492 }
3493 
3494 /*
3495  *  LOCK operation
3496  */
3497 __be32
3498 nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3499 	   struct nfsd4_lock *lock)
3500 {
3501 	struct nfs4_stateowner *open_sop = NULL;
3502 	struct nfs4_stateowner *lock_sop = NULL;
3503 	struct nfs4_stateid *lock_stp;
3504 	struct file *filp;
3505 	struct file_lock file_lock;
3506 	struct file_lock conflock;
3507 	__be32 status = 0;
3508 	unsigned int strhashval;
3509 	unsigned int cmd;
3510 	int err;
3511 
3512 	dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n",
3513 		(long long) lock->lk_offset,
3514 		(long long) lock->lk_length);
3515 
3516 	if (check_lock_length(lock->lk_offset, lock->lk_length))
3517 		 return nfserr_inval;
3518 
3519 	if ((status = fh_verify(rqstp, &cstate->current_fh,
3520 				S_IFREG, NFSD_MAY_LOCK))) {
3521 		dprintk("NFSD: nfsd4_lock: permission denied!\n");
3522 		return status;
3523 	}
3524 
3525 	nfs4_lock_state();
3526 
3527 	if (lock->lk_is_new) {
3528 		/*
3529 		 * Client indicates that this is a new lockowner.
3530 		 * Use open owner and open stateid to create lock owner and
3531 		 * lock stateid.
3532 		 */
3533 		struct nfs4_stateid *open_stp = NULL;
3534 		struct nfs4_file *fp;
3535 
3536 		status = nfserr_stale_clientid;
3537 		if (!nfsd4_has_session(cstate) &&
3538 		    STALE_CLIENTID(&lock->lk_new_clientid))
3539 			goto out;
3540 
3541 		/* validate and update open stateid and open seqid */
3542 		status = nfs4_preprocess_seqid_op(cstate,
3543 				        lock->lk_new_open_seqid,
3544 		                        &lock->lk_new_open_stateid,
3545 					OPEN_STATE,
3546 		                        &lock->lk_replay_owner, &open_stp,
3547 					lock);
3548 		if (status)
3549 			goto out;
3550 		open_sop = lock->lk_replay_owner;
3551 		/* create lockowner and lock stateid */
3552 		fp = open_stp->st_file;
3553 		strhashval = lock_ownerstr_hashval(fp->fi_inode,
3554 				open_sop->so_client->cl_clientid.cl_id,
3555 				&lock->v.new.owner);
3556 		/* XXX: Do we need to check for duplicate stateowners on
3557 		 * the same file, or should they just be allowed (and
3558 		 * create new stateids)? */
3559 		status = nfserr_resource;
3560 		lock_sop = alloc_init_lock_stateowner(strhashval,
3561 				open_sop->so_client, open_stp, lock);
3562 		if (lock_sop == NULL)
3563 			goto out;
3564 		lock_stp = alloc_init_lock_stateid(lock_sop, fp, open_stp);
3565 		if (lock_stp == NULL)
3566 			goto out;
3567 	} else {
3568 		/* lock (lock owner + lock stateid) already exists */
3569 		status = nfs4_preprocess_seqid_op(cstate,
3570 				       lock->lk_old_lock_seqid,
3571 				       &lock->lk_old_lock_stateid,
3572 				       LOCK_STATE,
3573 				       &lock->lk_replay_owner, &lock_stp, lock);
3574 		if (status)
3575 			goto out;
3576 		lock_sop = lock->lk_replay_owner;
3577 	}
3578 	/* lock->lk_replay_owner and lock_stp have been created or found */
3579 	filp = lock_stp->st_vfs_file;
3580 
3581 	status = nfserr_grace;
3582 	if (locks_in_grace() && !lock->lk_reclaim)
3583 		goto out;
3584 	status = nfserr_no_grace;
3585 	if (!locks_in_grace() && lock->lk_reclaim)
3586 		goto out;
3587 
3588 	locks_init_lock(&file_lock);
3589 	switch (lock->lk_type) {
3590 		case NFS4_READ_LT:
3591 		case NFS4_READW_LT:
3592 			file_lock.fl_type = F_RDLCK;
3593 			cmd = F_SETLK;
3594 		break;
3595 		case NFS4_WRITE_LT:
3596 		case NFS4_WRITEW_LT:
3597 			file_lock.fl_type = F_WRLCK;
3598 			cmd = F_SETLK;
3599 		break;
3600 		default:
3601 			status = nfserr_inval;
3602 		goto out;
3603 	}
3604 	file_lock.fl_owner = (fl_owner_t)lock_sop;
3605 	file_lock.fl_pid = current->tgid;
3606 	file_lock.fl_file = filp;
3607 	file_lock.fl_flags = FL_POSIX;
3608 	file_lock.fl_lmops = &nfsd_posix_mng_ops;
3609 
3610 	file_lock.fl_start = lock->lk_offset;
3611 	file_lock.fl_end = last_byte_offset(lock->lk_offset, lock->lk_length);
3612 	nfs4_transform_lock_offset(&file_lock);
3613 
3614 	/*
3615 	* Try to lock the file in the VFS.
3616 	* Note: locks.c uses the BKL to protect the inode's lock list.
3617 	*/
3618 
3619 	err = vfs_lock_file(filp, cmd, &file_lock, &conflock);
3620 	switch (-err) {
3621 	case 0: /* success! */
3622 		update_stateid(&lock_stp->st_stateid);
3623 		memcpy(&lock->lk_resp_stateid, &lock_stp->st_stateid,
3624 				sizeof(stateid_t));
3625 		status = 0;
3626 		break;
3627 	case (EAGAIN):		/* conflock holds conflicting lock */
3628 		status = nfserr_denied;
3629 		dprintk("NFSD: nfsd4_lock: conflicting lock found!\n");
3630 		nfs4_set_lock_denied(&conflock, &lock->lk_denied);
3631 		break;
3632 	case (EDEADLK):
3633 		status = nfserr_deadlock;
3634 		break;
3635 	default:
3636 		dprintk("NFSD: nfsd4_lock: vfs_lock_file() failed! status %d\n",err);
3637 		status = nfserr_resource;
3638 		break;
3639 	}
3640 out:
3641 	if (status && lock->lk_is_new && lock_sop)
3642 		release_lockowner(lock_sop);
3643 	if (lock->lk_replay_owner) {
3644 		nfs4_get_stateowner(lock->lk_replay_owner);
3645 		cstate->replay_owner = lock->lk_replay_owner;
3646 	}
3647 	nfs4_unlock_state();
3648 	return status;
3649 }
3650 
3651 /*
3652  * The NFSv4 spec allows a client to do a LOCKT without holding an OPEN,
3653  * so we do a temporary open here just to get an open file to pass to
3654  * vfs_test_lock.  (Arguably perhaps test_lock should be done with an
3655  * inode operation.)
3656  */
3657 static int nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
3658 {
3659 	struct file *file;
3660 	int err;
3661 
3662 	err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
3663 	if (err)
3664 		return err;
3665 	err = vfs_test_lock(file, lock);
3666 	nfsd_close(file);
3667 	return err;
3668 }
3669 
3670 /*
3671  * LOCKT operation
3672  */
3673 __be32
3674 nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3675 	    struct nfsd4_lockt *lockt)
3676 {
3677 	struct inode *inode;
3678 	struct file_lock file_lock;
3679 	int error;
3680 	__be32 status;
3681 
3682 	if (locks_in_grace())
3683 		return nfserr_grace;
3684 
3685 	if (check_lock_length(lockt->lt_offset, lockt->lt_length))
3686 		 return nfserr_inval;
3687 
3688 	lockt->lt_stateowner = NULL;
3689 	nfs4_lock_state();
3690 
3691 	status = nfserr_stale_clientid;
3692 	if (!nfsd4_has_session(cstate) && STALE_CLIENTID(&lockt->lt_clientid))
3693 		goto out;
3694 
3695 	if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) {
3696 		dprintk("NFSD: nfsd4_lockt: fh_verify() failed!\n");
3697 		if (status == nfserr_symlink)
3698 			status = nfserr_inval;
3699 		goto out;
3700 	}
3701 
3702 	inode = cstate->current_fh.fh_dentry->d_inode;
3703 	locks_init_lock(&file_lock);
3704 	switch (lockt->lt_type) {
3705 		case NFS4_READ_LT:
3706 		case NFS4_READW_LT:
3707 			file_lock.fl_type = F_RDLCK;
3708 		break;
3709 		case NFS4_WRITE_LT:
3710 		case NFS4_WRITEW_LT:
3711 			file_lock.fl_type = F_WRLCK;
3712 		break;
3713 		default:
3714 			dprintk("NFSD: nfs4_lockt: bad lock type!\n");
3715 			status = nfserr_inval;
3716 		goto out;
3717 	}
3718 
3719 	lockt->lt_stateowner = find_lockstateowner_str(inode,
3720 			&lockt->lt_clientid, &lockt->lt_owner);
3721 	if (lockt->lt_stateowner)
3722 		file_lock.fl_owner = (fl_owner_t)lockt->lt_stateowner;
3723 	file_lock.fl_pid = current->tgid;
3724 	file_lock.fl_flags = FL_POSIX;
3725 
3726 	file_lock.fl_start = lockt->lt_offset;
3727 	file_lock.fl_end = last_byte_offset(lockt->lt_offset, lockt->lt_length);
3728 
3729 	nfs4_transform_lock_offset(&file_lock);
3730 
3731 	status = nfs_ok;
3732 	error = nfsd_test_lock(rqstp, &cstate->current_fh, &file_lock);
3733 	if (error) {
3734 		status = nfserrno(error);
3735 		goto out;
3736 	}
3737 	if (file_lock.fl_type != F_UNLCK) {
3738 		status = nfserr_denied;
3739 		nfs4_set_lock_denied(&file_lock, &lockt->lt_denied);
3740 	}
3741 out:
3742 	nfs4_unlock_state();
3743 	return status;
3744 }
3745 
3746 __be32
3747 nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3748 	    struct nfsd4_locku *locku)
3749 {
3750 	struct nfs4_stateid *stp;
3751 	struct file *filp = NULL;
3752 	struct file_lock file_lock;
3753 	__be32 status;
3754 	int err;
3755 
3756 	dprintk("NFSD: nfsd4_locku: start=%Ld length=%Ld\n",
3757 		(long long) locku->lu_offset,
3758 		(long long) locku->lu_length);
3759 
3760 	if (check_lock_length(locku->lu_offset, locku->lu_length))
3761 		 return nfserr_inval;
3762 
3763 	nfs4_lock_state();
3764 
3765 	if ((status = nfs4_preprocess_seqid_op(cstate,
3766 					locku->lu_seqid,
3767 					&locku->lu_stateid,
3768 					LOCK_STATE,
3769 					&locku->lu_stateowner, &stp, NULL)))
3770 		goto out;
3771 
3772 	filp = stp->st_vfs_file;
3773 	BUG_ON(!filp);
3774 	locks_init_lock(&file_lock);
3775 	file_lock.fl_type = F_UNLCK;
3776 	file_lock.fl_owner = (fl_owner_t) locku->lu_stateowner;
3777 	file_lock.fl_pid = current->tgid;
3778 	file_lock.fl_file = filp;
3779 	file_lock.fl_flags = FL_POSIX;
3780 	file_lock.fl_lmops = &nfsd_posix_mng_ops;
3781 	file_lock.fl_start = locku->lu_offset;
3782 
3783 	file_lock.fl_end = last_byte_offset(locku->lu_offset, locku->lu_length);
3784 	nfs4_transform_lock_offset(&file_lock);
3785 
3786 	/*
3787 	*  Try to unlock the file in the VFS.
3788 	*/
3789 	err = vfs_lock_file(filp, F_SETLK, &file_lock, NULL);
3790 	if (err) {
3791 		dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n");
3792 		goto out_nfserr;
3793 	}
3794 	/*
3795 	* OK, unlock succeeded; the only thing left to do is update the stateid.
3796 	*/
3797 	update_stateid(&stp->st_stateid);
3798 	memcpy(&locku->lu_stateid, &stp->st_stateid, sizeof(stateid_t));
3799 
3800 out:
3801 	if (locku->lu_stateowner) {
3802 		nfs4_get_stateowner(locku->lu_stateowner);
3803 		cstate->replay_owner = locku->lu_stateowner;
3804 	}
3805 	nfs4_unlock_state();
3806 	return status;
3807 
3808 out_nfserr:
3809 	status = nfserrno(err);
3810 	goto out;
3811 }
3812 
3813 /*
3814  * returns
3815  * 	1: locks held by lockowner
3816  * 	0: no locks held by lockowner
3817  */
3818 static int
3819 check_for_locks(struct file *filp, struct nfs4_stateowner *lowner)
3820 {
3821 	struct file_lock **flpp;
3822 	struct inode *inode = filp->f_path.dentry->d_inode;
3823 	int status = 0;
3824 
3825 	lock_kernel();
3826 	for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) {
3827 		if ((*flpp)->fl_owner == (fl_owner_t)lowner) {
3828 			status = 1;
3829 			goto out;
3830 		}
3831 	}
3832 out:
3833 	unlock_kernel();
3834 	return status;
3835 }
3836 
3837 __be32
3838 nfsd4_release_lockowner(struct svc_rqst *rqstp,
3839 			struct nfsd4_compound_state *cstate,
3840 			struct nfsd4_release_lockowner *rlockowner)
3841 {
3842 	clientid_t *clid = &rlockowner->rl_clientid;
3843 	struct nfs4_stateowner *sop;
3844 	struct nfs4_stateid *stp;
3845 	struct xdr_netobj *owner = &rlockowner->rl_owner;
3846 	struct list_head matches;
3847 	int i;
3848 	__be32 status;
3849 
3850 	dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
3851 		clid->cl_boot, clid->cl_id);
3852 
3853 	/* XXX check for lease expiration */
3854 
3855 	status = nfserr_stale_clientid;
3856 	if (STALE_CLIENTID(clid))
3857 		return status;
3858 
3859 	nfs4_lock_state();
3860 
3861 	status = nfserr_locks_held;
3862 	/* XXX: we're doing a linear search through all the lockowners.
3863 	 * Yipes!  For now we'll just hope clients aren't really using
3864 	 * release_lockowner much, but eventually we have to fix these
3865 	 * data structures. */
3866 	INIT_LIST_HEAD(&matches);
3867 	for (i = 0; i < LOCK_HASH_SIZE; i++) {
3868 		list_for_each_entry(sop, &lock_ownerid_hashtbl[i], so_idhash) {
3869 			if (!same_owner_str(sop, owner, clid))
3870 				continue;
3871 			list_for_each_entry(stp, &sop->so_stateids,
3872 					st_perstateowner) {
3873 				if (check_for_locks(stp->st_vfs_file, sop))
3874 					goto out;
3875 				/* Note: so_perclient unused for lockowners,
3876 				 * so it's OK to fool with here. */
3877 				list_add(&sop->so_perclient, &matches);
3878 			}
3879 		}
3880 	}
3881 	/* Clients probably won't expect us to return with some (but not all)
3882 	 * of the lockowner state released; so don't release any until all
3883 	 * have been checked. */
3884 	status = nfs_ok;
3885 	while (!list_empty(&matches)) {
3886 		sop = list_entry(matches.next, struct nfs4_stateowner,
3887 								so_perclient);
3888 		/* unhash_stateowner deletes so_perclient only
3889 		 * for openowners. */
3890 		list_del(&sop->so_perclient);
3891 		release_lockowner(sop);
3892 	}
3893 out:
3894 	nfs4_unlock_state();
3895 	return status;
3896 }
3897 
3898 static inline struct nfs4_client_reclaim *
3899 alloc_reclaim(void)
3900 {
3901 	return kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL);
3902 }
3903 
3904 int
3905 nfs4_has_reclaimed_state(const char *name, bool use_exchange_id)
3906 {
3907 	unsigned int strhashval = clientstr_hashval(name);
3908 	struct nfs4_client *clp;
3909 
3910 	clp = find_confirmed_client_by_str(name, strhashval, use_exchange_id);
3911 	return clp ? 1 : 0;
3912 }
3913 
3914 /*
3915  * failure => all reset bets are off, nfserr_no_grace...
3916  */
3917 int
3918 nfs4_client_to_reclaim(const char *name)
3919 {
3920 	unsigned int strhashval;
3921 	struct nfs4_client_reclaim *crp = NULL;
3922 
3923 	dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", HEXDIR_LEN, name);
3924 	crp = alloc_reclaim();
3925 	if (!crp)
3926 		return 0;
3927 	strhashval = clientstr_hashval(name);
3928 	INIT_LIST_HEAD(&crp->cr_strhash);
3929 	list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]);
3930 	memcpy(crp->cr_recdir, name, HEXDIR_LEN);
3931 	reclaim_str_hashtbl_size++;
3932 	return 1;
3933 }
3934 
3935 static void
3936 nfs4_release_reclaim(void)
3937 {
3938 	struct nfs4_client_reclaim *crp = NULL;
3939 	int i;
3940 
3941 	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
3942 		while (!list_empty(&reclaim_str_hashtbl[i])) {
3943 			crp = list_entry(reclaim_str_hashtbl[i].next,
3944 			                struct nfs4_client_reclaim, cr_strhash);
3945 			list_del(&crp->cr_strhash);
3946 			kfree(crp);
3947 			reclaim_str_hashtbl_size--;
3948 		}
3949 	}
3950 	BUG_ON(reclaim_str_hashtbl_size);
3951 }
3952 
3953 /*
3954  * called from OPEN, CLAIM_PREVIOUS with a new clientid. */
3955 static struct nfs4_client_reclaim *
3956 nfs4_find_reclaim_client(clientid_t *clid)
3957 {
3958 	unsigned int strhashval;
3959 	struct nfs4_client *clp;
3960 	struct nfs4_client_reclaim *crp = NULL;
3961 
3962 
3963 	/* find clientid in conf_id_hashtbl */
3964 	clp = find_confirmed_client(clid);
3965 	if (clp == NULL)
3966 		return NULL;
3967 
3968 	dprintk("NFSD: nfs4_find_reclaim_client for %.*s with recdir %s\n",
3969 		            clp->cl_name.len, clp->cl_name.data,
3970 			    clp->cl_recdir);
3971 
3972 	/* find clp->cl_name in reclaim_str_hashtbl */
3973 	strhashval = clientstr_hashval(clp->cl_recdir);
3974 	list_for_each_entry(crp, &reclaim_str_hashtbl[strhashval], cr_strhash) {
3975 		if (same_name(crp->cr_recdir, clp->cl_recdir)) {
3976 			return crp;
3977 		}
3978 	}
3979 	return NULL;
3980 }
3981 
3982 /*
3983 * Called from OPEN. Look for clientid in reclaim list.
3984 */
3985 __be32
3986 nfs4_check_open_reclaim(clientid_t *clid)
3987 {
3988 	return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad;
3989 }
3990 
3991 /* initialization to perform at module load time: */
3992 
3993 int
3994 nfs4_state_init(void)
3995 {
3996 	int i, status;
3997 
3998 	status = nfsd4_init_slabs();
3999 	if (status)
4000 		return status;
4001 	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
4002 		INIT_LIST_HEAD(&conf_id_hashtbl[i]);
4003 		INIT_LIST_HEAD(&conf_str_hashtbl[i]);
4004 		INIT_LIST_HEAD(&unconf_str_hashtbl[i]);
4005 		INIT_LIST_HEAD(&unconf_id_hashtbl[i]);
4006 		INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
4007 	}
4008 	for (i = 0; i < SESSION_HASH_SIZE; i++)
4009 		INIT_LIST_HEAD(&sessionid_hashtbl[i]);
4010 	for (i = 0; i < FILE_HASH_SIZE; i++) {
4011 		INIT_LIST_HEAD(&file_hashtbl[i]);
4012 	}
4013 	for (i = 0; i < OWNER_HASH_SIZE; i++) {
4014 		INIT_LIST_HEAD(&ownerstr_hashtbl[i]);
4015 		INIT_LIST_HEAD(&ownerid_hashtbl[i]);
4016 	}
4017 	for (i = 0; i < STATEID_HASH_SIZE; i++) {
4018 		INIT_LIST_HEAD(&stateid_hashtbl[i]);
4019 		INIT_LIST_HEAD(&lockstateid_hashtbl[i]);
4020 	}
4021 	for (i = 0; i < LOCK_HASH_SIZE; i++) {
4022 		INIT_LIST_HEAD(&lock_ownerid_hashtbl[i]);
4023 		INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]);
4024 	}
4025 	memset(&onestateid, ~0, sizeof(stateid_t));
4026 	INIT_LIST_HEAD(&close_lru);
4027 	INIT_LIST_HEAD(&client_lru);
4028 	INIT_LIST_HEAD(&del_recall_lru);
4029 	reclaim_str_hashtbl_size = 0;
4030 	return 0;
4031 }
4032 
4033 static void
4034 nfsd4_load_reboot_recovery_data(void)
4035 {
4036 	int status;
4037 
4038 	nfs4_lock_state();
4039 	nfsd4_init_recdir(user_recovery_dirname);
4040 	status = nfsd4_recdir_load();
4041 	nfs4_unlock_state();
4042 	if (status)
4043 		printk("NFSD: Failure reading reboot recovery data\n");
4044 }
4045 
4046 unsigned long
4047 get_nfs4_grace_period(void)
4048 {
4049 	return max(user_lease_time, lease_time) * HZ;
4050 }
4051 
4052 /*
4053  * Since the lifetime of a delegation isn't limited to that of an open, a
4054  * client may quite reasonably hang on to a delegation as long as it has
4055  * the inode cached.  This becomes an obvious problem the first time a
4056  * client's inode cache approaches the size of the server's total memory.
4057  *
4058  * For now we avoid this problem by imposing a hard limit on the number
4059  * of delegations, which varies according to the server's memory size.
4060  */
4061 static void
4062 set_max_delegations(void)
4063 {
4064 	/*
4065 	 * Allow at most 4 delegations per megabyte of RAM.  Quick
4066 	 * estimates suggest that in the worst case (where every delegation
4067 	 * is for a different inode), a delegation could take about 1.5K,
4068 	 * giving a worst case usage of about 6% of memory.
4069 	 */
4070 	max_delegations = nr_free_buffer_pages() >> (20 - 2 - PAGE_SHIFT);
4071 }
4072 
4073 /* initialization to perform when the nfsd service is started: */
4074 
4075 static void
4076 __nfs4_state_start(void)
4077 {
4078 	unsigned long grace_time;
4079 
4080 	boot_time = get_seconds();
4081 	grace_time = get_nfs4_grace_period();
4082 	lease_time = user_lease_time;
4083 	locks_start_grace(&nfsd4_manager);
4084 	printk(KERN_INFO "NFSD: starting %ld-second grace period\n",
4085 	       grace_time/HZ);
4086 	laundry_wq = create_singlethread_workqueue("nfsd4");
4087 	queue_delayed_work(laundry_wq, &laundromat_work, grace_time);
4088 	set_max_delegations();
4089 }
4090 
4091 void
4092 nfs4_state_start(void)
4093 {
4094 	if (nfs4_init)
4095 		return;
4096 	nfsd4_load_reboot_recovery_data();
4097 	__nfs4_state_start();
4098 	nfs4_init = 1;
4099 	return;
4100 }
4101 
4102 time_t
4103 nfs4_lease_time(void)
4104 {
4105 	return lease_time;
4106 }
4107 
4108 static void
4109 __nfs4_state_shutdown(void)
4110 {
4111 	int i;
4112 	struct nfs4_client *clp = NULL;
4113 	struct nfs4_delegation *dp = NULL;
4114 	struct list_head *pos, *next, reaplist;
4115 
4116 	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
4117 		while (!list_empty(&conf_id_hashtbl[i])) {
4118 			clp = list_entry(conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
4119 			expire_client(clp);
4120 		}
4121 		while (!list_empty(&unconf_str_hashtbl[i])) {
4122 			clp = list_entry(unconf_str_hashtbl[i].next, struct nfs4_client, cl_strhash);
4123 			expire_client(clp);
4124 		}
4125 	}
4126 	INIT_LIST_HEAD(&reaplist);
4127 	spin_lock(&recall_lock);
4128 	list_for_each_safe(pos, next, &del_recall_lru) {
4129 		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
4130 		list_move(&dp->dl_recall_lru, &reaplist);
4131 	}
4132 	spin_unlock(&recall_lock);
4133 	list_for_each_safe(pos, next, &reaplist) {
4134 		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
4135 		list_del_init(&dp->dl_recall_lru);
4136 		unhash_delegation(dp);
4137 	}
4138 
4139 	nfsd4_shutdown_recdir();
4140 	nfs4_init = 0;
4141 }
4142 
4143 void
4144 nfs4_state_shutdown(void)
4145 {
4146 	cancel_rearming_delayed_workqueue(laundry_wq, &laundromat_work);
4147 	destroy_workqueue(laundry_wq);
4148 	locks_end_grace(&nfsd4_manager);
4149 	nfs4_lock_state();
4150 	nfs4_release_reclaim();
4151 	__nfs4_state_shutdown();
4152 	nfs4_unlock_state();
4153 }
4154 
4155 /*
4156  * user_recovery_dirname is protected by the nfsd_mutex since it's only
4157  * accessed when nfsd is starting.
4158  */
4159 static void
4160 nfs4_set_recdir(char *recdir)
4161 {
4162 	strcpy(user_recovery_dirname, recdir);
4163 }
4164 
4165 /*
4166  * Change the NFSv4 recovery directory to recdir.
4167  */
4168 int
4169 nfs4_reset_recoverydir(char *recdir)
4170 {
4171 	int status;
4172 	struct path path;
4173 
4174 	status = kern_path(recdir, LOOKUP_FOLLOW, &path);
4175 	if (status)
4176 		return status;
4177 	status = -ENOTDIR;
4178 	if (S_ISDIR(path.dentry->d_inode->i_mode)) {
4179 		nfs4_set_recdir(recdir);
4180 		status = 0;
4181 	}
4182 	path_put(&path);
4183 	return status;
4184 }
4185 
4186 char *
4187 nfs4_recoverydir(void)
4188 {
4189 	return user_recovery_dirname;
4190 }
4191 
4192 /*
4193  * Called when leasetime is changed.
4194  *
4195  * The only way the protocol gives us to handle on-the-fly lease changes is to
4196  * simulate a reboot.  Instead of doing that, we just wait till the next time
4197  * we start to register any changes in lease time.  If the administrator
4198  * really wants to change the lease time *now*, they can go ahead and bring
4199  * nfsd down and then back up again after changing the lease time.
4200  *
4201  * user_lease_time is protected by nfsd_mutex since it's only really accessed
4202  * when nfsd is starting
4203  */
4204 void
4205 nfs4_reset_lease(time_t leasetime)
4206 {
4207 	user_lease_time = leasetime;
4208 }
4209