xref: /openbmc/linux/fs/nfs/pnfs.c (revision baa7eb025ab14f3cba2e35c0a8648f9c9f01d24f)
1 /*
2  *  pNFS functions to call and manage layout drivers.
3  *
4  *  Copyright (c) 2002 [year of first publication]
5  *  The Regents of the University of Michigan
6  *  All Rights Reserved
7  *
8  *  Dean Hildebrand <dhildebz@umich.edu>
9  *
10  *  Permission is granted to use, copy, create derivative works, and
11  *  redistribute this software and such derivative works for any purpose,
12  *  so long as the name of the University of Michigan is not used in
13  *  any advertising or publicity pertaining to the use or distribution
14  *  of this software without specific, written prior authorization. If
15  *  the above copyright notice or any other identification of the
16  *  University of Michigan is included in any copy of any portion of
17  *  this software, then the disclaimer below must also be included.
18  *
19  *  This software is provided as is, without representation or warranty
20  *  of any kind either express or implied, including without limitation
21  *  the implied warranties of merchantability, fitness for a particular
22  *  purpose, or noninfringement.  The Regents of the University of
23  *  Michigan shall not be liable for any damages, including special,
24  *  indirect, incidental, or consequential damages, with respect to any
25  *  claim arising out of or in connection with the use of the software,
26  *  even if it has been or is hereafter advised of the possibility of
27  *  such damages.
28  */
29 
30 #include <linux/nfs_fs.h>
31 #include "internal.h"
32 #include "pnfs.h"
33 
34 #define NFSDBG_FACILITY		NFSDBG_PNFS
35 
36 /* Locking:
37  *
38  * pnfs_spinlock:
39  *      protects pnfs_modules_tbl.
40  */
41 static DEFINE_SPINLOCK(pnfs_spinlock);
42 
43 /*
44  * pnfs_modules_tbl holds all pnfs modules
45  */
46 static LIST_HEAD(pnfs_modules_tbl);
47 
48 /* Return the registered pnfs layout driver module matching given id */
49 static struct pnfs_layoutdriver_type *
50 find_pnfs_driver_locked(u32 id)
51 {
52 	struct pnfs_layoutdriver_type *local;
53 
54 	list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid)
55 		if (local->id == id)
56 			goto out;
57 	local = NULL;
58 out:
59 	dprintk("%s: Searching for id %u, found %p\n", __func__, id, local);
60 	return local;
61 }
62 
63 static struct pnfs_layoutdriver_type *
64 find_pnfs_driver(u32 id)
65 {
66 	struct pnfs_layoutdriver_type *local;
67 
68 	spin_lock(&pnfs_spinlock);
69 	local = find_pnfs_driver_locked(id);
70 	spin_unlock(&pnfs_spinlock);
71 	return local;
72 }
73 
74 void
75 unset_pnfs_layoutdriver(struct nfs_server *nfss)
76 {
77 	if (nfss->pnfs_curr_ld) {
78 		nfss->pnfs_curr_ld->clear_layoutdriver(nfss);
79 		module_put(nfss->pnfs_curr_ld->owner);
80 	}
81 	nfss->pnfs_curr_ld = NULL;
82 }
83 
84 /*
85  * Try to set the server's pnfs module to the pnfs layout type specified by id.
86  * Currently only one pNFS layout driver per filesystem is supported.
87  *
88  * @id layout type. Zero (illegal layout type) indicates pNFS not in use.
89  */
90 void
91 set_pnfs_layoutdriver(struct nfs_server *server, u32 id)
92 {
93 	struct pnfs_layoutdriver_type *ld_type = NULL;
94 
95 	if (id == 0)
96 		goto out_no_driver;
97 	if (!(server->nfs_client->cl_exchange_flags &
98 		 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
99 		printk(KERN_ERR "%s: id %u cl_exchange_flags 0x%x\n", __func__,
100 		       id, server->nfs_client->cl_exchange_flags);
101 		goto out_no_driver;
102 	}
103 	ld_type = find_pnfs_driver(id);
104 	if (!ld_type) {
105 		request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
106 		ld_type = find_pnfs_driver(id);
107 		if (!ld_type) {
108 			dprintk("%s: No pNFS module found for %u.\n",
109 				__func__, id);
110 			goto out_no_driver;
111 		}
112 	}
113 	if (!try_module_get(ld_type->owner)) {
114 		dprintk("%s: Could not grab reference on module\n", __func__);
115 		goto out_no_driver;
116 	}
117 	server->pnfs_curr_ld = ld_type;
118 	if (ld_type->set_layoutdriver(server)) {
119 		printk(KERN_ERR
120 		       "%s: Error initializing mount point for layout driver %u.\n",
121 		       __func__, id);
122 		module_put(ld_type->owner);
123 		goto out_no_driver;
124 	}
125 	dprintk("%s: pNFS module for %u set\n", __func__, id);
126 	return;
127 
128 out_no_driver:
129 	dprintk("%s: Using NFSv4 I/O\n", __func__);
130 	server->pnfs_curr_ld = NULL;
131 }
132 
133 int
134 pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
135 {
136 	int status = -EINVAL;
137 	struct pnfs_layoutdriver_type *tmp;
138 
139 	if (ld_type->id == 0) {
140 		printk(KERN_ERR "%s id 0 is reserved\n", __func__);
141 		return status;
142 	}
143 	if (!ld_type->alloc_lseg || !ld_type->free_lseg) {
144 		printk(KERN_ERR "%s Layout driver must provide "
145 		       "alloc_lseg and free_lseg.\n", __func__);
146 		return status;
147 	}
148 
149 	spin_lock(&pnfs_spinlock);
150 	tmp = find_pnfs_driver_locked(ld_type->id);
151 	if (!tmp) {
152 		list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl);
153 		status = 0;
154 		dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id,
155 			ld_type->name);
156 	} else {
157 		printk(KERN_ERR "%s Module with id %d already loaded!\n",
158 			__func__, ld_type->id);
159 	}
160 	spin_unlock(&pnfs_spinlock);
161 
162 	return status;
163 }
164 EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver);
165 
166 void
167 pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
168 {
169 	dprintk("%s Deregistering id:%u\n", __func__, ld_type->id);
170 	spin_lock(&pnfs_spinlock);
171 	list_del(&ld_type->pnfs_tblid);
172 	spin_unlock(&pnfs_spinlock);
173 }
174 EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
175 
176 /*
177  * pNFS client layout cache
178  */
179 
180 static void
181 get_layout_hdr_locked(struct pnfs_layout_hdr *lo)
182 {
183 	assert_spin_locked(&lo->inode->i_lock);
184 	lo->refcount++;
185 }
186 
187 static void
188 put_layout_hdr_locked(struct pnfs_layout_hdr *lo)
189 {
190 	assert_spin_locked(&lo->inode->i_lock);
191 	BUG_ON(lo->refcount == 0);
192 
193 	lo->refcount--;
194 	if (!lo->refcount) {
195 		dprintk("%s: freeing layout cache %p\n", __func__, lo);
196 		BUG_ON(!list_empty(&lo->layouts));
197 		NFS_I(lo->inode)->layout = NULL;
198 		kfree(lo);
199 	}
200 }
201 
202 void
203 put_layout_hdr(struct inode *inode)
204 {
205 	spin_lock(&inode->i_lock);
206 	put_layout_hdr_locked(NFS_I(inode)->layout);
207 	spin_unlock(&inode->i_lock);
208 }
209 
210 static void
211 init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
212 {
213 	INIT_LIST_HEAD(&lseg->fi_list);
214 	kref_init(&lseg->kref);
215 	lseg->layout = lo;
216 }
217 
218 /* Called without i_lock held, as the free_lseg call may sleep */
219 static void
220 destroy_lseg(struct kref *kref)
221 {
222 	struct pnfs_layout_segment *lseg =
223 		container_of(kref, struct pnfs_layout_segment, kref);
224 	struct inode *ino = lseg->layout->inode;
225 
226 	dprintk("--> %s\n", __func__);
227 	NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
228 	/* Matched by get_layout_hdr_locked in pnfs_insert_layout */
229 	put_layout_hdr(ino);
230 }
231 
232 static void
233 put_lseg(struct pnfs_layout_segment *lseg)
234 {
235 	if (!lseg)
236 		return;
237 
238 	dprintk("%s: lseg %p ref %d\n", __func__, lseg,
239 		atomic_read(&lseg->kref.refcount));
240 	kref_put(&lseg->kref, destroy_lseg);
241 }
242 
243 static void
244 pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list)
245 {
246 	struct pnfs_layout_segment *lseg, *next;
247 	struct nfs_client *clp;
248 
249 	dprintk("%s:Begin lo %p\n", __func__, lo);
250 
251 	assert_spin_locked(&lo->inode->i_lock);
252 	list_for_each_entry_safe(lseg, next, &lo->segs, fi_list) {
253 		dprintk("%s: freeing lseg %p\n", __func__, lseg);
254 		list_move(&lseg->fi_list, tmp_list);
255 	}
256 	clp = NFS_SERVER(lo->inode)->nfs_client;
257 	spin_lock(&clp->cl_lock);
258 	/* List does not take a reference, so no need for put here */
259 	list_del_init(&lo->layouts);
260 	spin_unlock(&clp->cl_lock);
261 	write_seqlock(&lo->seqlock);
262 	clear_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
263 	write_sequnlock(&lo->seqlock);
264 
265 	dprintk("%s:Return\n", __func__);
266 }
267 
268 static void
269 pnfs_free_lseg_list(struct list_head *tmp_list)
270 {
271 	struct pnfs_layout_segment *lseg;
272 
273 	while (!list_empty(tmp_list)) {
274 		lseg = list_entry(tmp_list->next, struct pnfs_layout_segment,
275 				fi_list);
276 		dprintk("%s calling put_lseg on %p\n", __func__, lseg);
277 		list_del(&lseg->fi_list);
278 		put_lseg(lseg);
279 	}
280 }
281 
282 void
283 pnfs_destroy_layout(struct nfs_inode *nfsi)
284 {
285 	struct pnfs_layout_hdr *lo;
286 	LIST_HEAD(tmp_list);
287 
288 	spin_lock(&nfsi->vfs_inode.i_lock);
289 	lo = nfsi->layout;
290 	if (lo) {
291 		pnfs_clear_lseg_list(lo, &tmp_list);
292 		/* Matched by refcount set to 1 in alloc_init_layout_hdr */
293 		put_layout_hdr_locked(lo);
294 	}
295 	spin_unlock(&nfsi->vfs_inode.i_lock);
296 	pnfs_free_lseg_list(&tmp_list);
297 }
298 
299 /*
300  * Called by the state manger to remove all layouts established under an
301  * expired lease.
302  */
303 void
304 pnfs_destroy_all_layouts(struct nfs_client *clp)
305 {
306 	struct pnfs_layout_hdr *lo;
307 	LIST_HEAD(tmp_list);
308 
309 	spin_lock(&clp->cl_lock);
310 	list_splice_init(&clp->cl_layouts, &tmp_list);
311 	spin_unlock(&clp->cl_lock);
312 
313 	while (!list_empty(&tmp_list)) {
314 		lo = list_entry(tmp_list.next, struct pnfs_layout_hdr,
315 				layouts);
316 		dprintk("%s freeing layout for inode %lu\n", __func__,
317 			lo->inode->i_ino);
318 		pnfs_destroy_layout(NFS_I(lo->inode));
319 	}
320 }
321 
322 /* update lo->stateid with new if is more recent
323  *
324  * lo->stateid could be the open stateid, in which case we just use what given.
325  */
326 static void
327 pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
328 			const nfs4_stateid *new)
329 {
330 	nfs4_stateid *old = &lo->stateid;
331 	bool overwrite = false;
332 
333 	write_seqlock(&lo->seqlock);
334 	if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state) ||
335 	    memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other)))
336 		overwrite = true;
337 	else {
338 		u32 oldseq, newseq;
339 
340 		oldseq = be32_to_cpu(old->stateid.seqid);
341 		newseq = be32_to_cpu(new->stateid.seqid);
342 		if ((int)(newseq - oldseq) > 0)
343 			overwrite = true;
344 	}
345 	if (overwrite)
346 		memcpy(&old->stateid, &new->stateid, sizeof(new->stateid));
347 	write_sequnlock(&lo->seqlock);
348 }
349 
350 static void
351 pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo,
352 			      struct nfs4_state *state)
353 {
354 	int seq;
355 
356 	dprintk("--> %s\n", __func__);
357 	write_seqlock(&lo->seqlock);
358 	do {
359 		seq = read_seqbegin(&state->seqlock);
360 		memcpy(lo->stateid.data, state->stateid.data,
361 		       sizeof(state->stateid.data));
362 	} while (read_seqretry(&state->seqlock, seq));
363 	set_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
364 	write_sequnlock(&lo->seqlock);
365 	dprintk("<-- %s\n", __func__);
366 }
367 
368 void
369 pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
370 			struct nfs4_state *open_state)
371 {
372 	int seq;
373 
374 	dprintk("--> %s\n", __func__);
375 	do {
376 		seq = read_seqbegin(&lo->seqlock);
377 		if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) {
378 			/* This will trigger retry of the read */
379 			pnfs_layout_from_open_stateid(lo, open_state);
380 		} else
381 			memcpy(dst->data, lo->stateid.data,
382 			       sizeof(lo->stateid.data));
383 	} while (read_seqretry(&lo->seqlock, seq));
384 	dprintk("<-- %s\n", __func__);
385 }
386 
387 /*
388 * Get layout from server.
389 *    for now, assume that whole file layouts are requested.
390 *    arg->offset: 0
391 *    arg->length: all ones
392 */
393 static struct pnfs_layout_segment *
394 send_layoutget(struct pnfs_layout_hdr *lo,
395 	   struct nfs_open_context *ctx,
396 	   u32 iomode)
397 {
398 	struct inode *ino = lo->inode;
399 	struct nfs_server *server = NFS_SERVER(ino);
400 	struct nfs4_layoutget *lgp;
401 	struct pnfs_layout_segment *lseg = NULL;
402 
403 	dprintk("--> %s\n", __func__);
404 
405 	BUG_ON(ctx == NULL);
406 	lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
407 	if (lgp == NULL) {
408 		put_layout_hdr(lo->inode);
409 		return NULL;
410 	}
411 	lgp->args.minlength = NFS4_MAX_UINT64;
412 	lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
413 	lgp->args.range.iomode = iomode;
414 	lgp->args.range.offset = 0;
415 	lgp->args.range.length = NFS4_MAX_UINT64;
416 	lgp->args.type = server->pnfs_curr_ld->id;
417 	lgp->args.inode = ino;
418 	lgp->args.ctx = get_nfs_open_context(ctx);
419 	lgp->lsegpp = &lseg;
420 
421 	/* Synchronously retrieve layout information from server and
422 	 * store in lseg.
423 	 */
424 	nfs4_proc_layoutget(lgp);
425 	if (!lseg) {
426 		/* remember that LAYOUTGET failed and suspend trying */
427 		set_bit(lo_fail_bit(iomode), &lo->state);
428 	}
429 	return lseg;
430 }
431 
432 /*
433  * Compare two layout segments for sorting into layout cache.
434  * We want to preferentially return RW over RO layouts, so ensure those
435  * are seen first.
436  */
437 static s64
438 cmp_layout(u32 iomode1, u32 iomode2)
439 {
440 	/* read > read/write */
441 	return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ);
442 }
443 
444 static void
445 pnfs_insert_layout(struct pnfs_layout_hdr *lo,
446 		   struct pnfs_layout_segment *lseg)
447 {
448 	struct pnfs_layout_segment *lp;
449 	int found = 0;
450 
451 	dprintk("%s:Begin\n", __func__);
452 
453 	assert_spin_locked(&lo->inode->i_lock);
454 	if (list_empty(&lo->segs)) {
455 		struct nfs_client *clp = NFS_SERVER(lo->inode)->nfs_client;
456 
457 		spin_lock(&clp->cl_lock);
458 		BUG_ON(!list_empty(&lo->layouts));
459 		list_add_tail(&lo->layouts, &clp->cl_layouts);
460 		spin_unlock(&clp->cl_lock);
461 	}
462 	list_for_each_entry(lp, &lo->segs, fi_list) {
463 		if (cmp_layout(lp->range.iomode, lseg->range.iomode) > 0)
464 			continue;
465 		list_add_tail(&lseg->fi_list, &lp->fi_list);
466 		dprintk("%s: inserted lseg %p "
467 			"iomode %d offset %llu length %llu before "
468 			"lp %p iomode %d offset %llu length %llu\n",
469 			__func__, lseg, lseg->range.iomode,
470 			lseg->range.offset, lseg->range.length,
471 			lp, lp->range.iomode, lp->range.offset,
472 			lp->range.length);
473 		found = 1;
474 		break;
475 	}
476 	if (!found) {
477 		list_add_tail(&lseg->fi_list, &lo->segs);
478 		dprintk("%s: inserted lseg %p "
479 			"iomode %d offset %llu length %llu at tail\n",
480 			__func__, lseg, lseg->range.iomode,
481 			lseg->range.offset, lseg->range.length);
482 	}
483 	get_layout_hdr_locked(lo);
484 
485 	dprintk("%s:Return\n", __func__);
486 }
487 
488 static struct pnfs_layout_hdr *
489 alloc_init_layout_hdr(struct inode *ino)
490 {
491 	struct pnfs_layout_hdr *lo;
492 
493 	lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL);
494 	if (!lo)
495 		return NULL;
496 	lo->refcount = 1;
497 	INIT_LIST_HEAD(&lo->layouts);
498 	INIT_LIST_HEAD(&lo->segs);
499 	seqlock_init(&lo->seqlock);
500 	lo->inode = ino;
501 	return lo;
502 }
503 
504 static struct pnfs_layout_hdr *
505 pnfs_find_alloc_layout(struct inode *ino)
506 {
507 	struct nfs_inode *nfsi = NFS_I(ino);
508 	struct pnfs_layout_hdr *new = NULL;
509 
510 	dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
511 
512 	assert_spin_locked(&ino->i_lock);
513 	if (nfsi->layout)
514 		return nfsi->layout;
515 
516 	spin_unlock(&ino->i_lock);
517 	new = alloc_init_layout_hdr(ino);
518 	spin_lock(&ino->i_lock);
519 
520 	if (likely(nfsi->layout == NULL))	/* Won the race? */
521 		nfsi->layout = new;
522 	else
523 		kfree(new);
524 	return nfsi->layout;
525 }
526 
527 /*
528  * iomode matching rules:
529  * iomode	lseg	match
530  * -----	-----	-----
531  * ANY		READ	true
532  * ANY		RW	true
533  * RW		READ	false
534  * RW		RW	true
535  * READ		READ	true
536  * READ		RW	true
537  */
538 static int
539 is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
540 {
541 	return (iomode != IOMODE_RW || lseg->range.iomode == IOMODE_RW);
542 }
543 
544 /*
545  * lookup range in layout
546  */
547 static struct pnfs_layout_segment *
548 pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode)
549 {
550 	struct pnfs_layout_segment *lseg, *ret = NULL;
551 
552 	dprintk("%s:Begin\n", __func__);
553 
554 	assert_spin_locked(&lo->inode->i_lock);
555 	list_for_each_entry(lseg, &lo->segs, fi_list) {
556 		if (is_matching_lseg(lseg, iomode)) {
557 			ret = lseg;
558 			break;
559 		}
560 		if (cmp_layout(iomode, lseg->range.iomode) > 0)
561 			break;
562 	}
563 
564 	dprintk("%s:Return lseg %p ref %d\n",
565 		__func__, ret, ret ? atomic_read(&ret->kref.refcount) : 0);
566 	return ret;
567 }
568 
569 /*
570  * Layout segment is retreived from the server if not cached.
571  * The appropriate layout segment is referenced and returned to the caller.
572  */
573 struct pnfs_layout_segment *
574 pnfs_update_layout(struct inode *ino,
575 		   struct nfs_open_context *ctx,
576 		   enum pnfs_iomode iomode)
577 {
578 	struct nfs_inode *nfsi = NFS_I(ino);
579 	struct pnfs_layout_hdr *lo;
580 	struct pnfs_layout_segment *lseg = NULL;
581 
582 	if (!pnfs_enabled_sb(NFS_SERVER(ino)))
583 		return NULL;
584 	spin_lock(&ino->i_lock);
585 	lo = pnfs_find_alloc_layout(ino);
586 	if (lo == NULL) {
587 		dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__);
588 		goto out_unlock;
589 	}
590 
591 	/* Check to see if the layout for the given range already exists */
592 	lseg = pnfs_has_layout(lo, iomode);
593 	if (lseg) {
594 		dprintk("%s: Using cached lseg %p for iomode %d)\n",
595 			__func__, lseg, iomode);
596 		goto out_unlock;
597 	}
598 
599 	/* if LAYOUTGET already failed once we don't try again */
600 	if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state))
601 		goto out_unlock;
602 
603 	get_layout_hdr_locked(lo); /* Matched in nfs4_layoutget_release */
604 	spin_unlock(&ino->i_lock);
605 
606 	lseg = send_layoutget(lo, ctx, iomode);
607 out:
608 	dprintk("%s end, state 0x%lx lseg %p\n", __func__,
609 		nfsi->layout->state, lseg);
610 	return lseg;
611 out_unlock:
612 	spin_unlock(&ino->i_lock);
613 	goto out;
614 }
615 
616 int
617 pnfs_layout_process(struct nfs4_layoutget *lgp)
618 {
619 	struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
620 	struct nfs4_layoutget_res *res = &lgp->res;
621 	struct pnfs_layout_segment *lseg;
622 	struct inode *ino = lo->inode;
623 	int status = 0;
624 
625 	/* Inject layout blob into I/O device driver */
626 	lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res);
627 	if (!lseg || IS_ERR(lseg)) {
628 		if (!lseg)
629 			status = -ENOMEM;
630 		else
631 			status = PTR_ERR(lseg);
632 		dprintk("%s: Could not allocate layout: error %d\n",
633 		       __func__, status);
634 		goto out;
635 	}
636 
637 	spin_lock(&ino->i_lock);
638 	init_lseg(lo, lseg);
639 	lseg->range = res->range;
640 	*lgp->lsegpp = lseg;
641 	pnfs_insert_layout(lo, lseg);
642 
643 	/* Done processing layoutget. Set the layout stateid */
644 	pnfs_set_layout_stateid(lo, &res->stateid);
645 	spin_unlock(&ino->i_lock);
646 out:
647 	return status;
648 }
649 
650 /*
651  * Device ID cache. Currently supports one layout type per struct nfs_client.
652  * Add layout type to the lookup key to expand to support multiple types.
653  */
654 int
655 pnfs_alloc_init_deviceid_cache(struct nfs_client *clp,
656 			 void (*free_callback)(struct pnfs_deviceid_node *))
657 {
658 	struct pnfs_deviceid_cache *c;
659 
660 	c = kzalloc(sizeof(struct pnfs_deviceid_cache), GFP_KERNEL);
661 	if (!c)
662 		return -ENOMEM;
663 	spin_lock(&clp->cl_lock);
664 	if (clp->cl_devid_cache != NULL) {
665 		atomic_inc(&clp->cl_devid_cache->dc_ref);
666 		dprintk("%s [kref [%d]]\n", __func__,
667 			atomic_read(&clp->cl_devid_cache->dc_ref));
668 		kfree(c);
669 	} else {
670 		/* kzalloc initializes hlists */
671 		spin_lock_init(&c->dc_lock);
672 		atomic_set(&c->dc_ref, 1);
673 		c->dc_free_callback = free_callback;
674 		clp->cl_devid_cache = c;
675 		dprintk("%s [new]\n", __func__);
676 	}
677 	spin_unlock(&clp->cl_lock);
678 	return 0;
679 }
680 EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache);
681 
682 /*
683  * Called from pnfs_layoutdriver_type->free_lseg
684  * last layout segment reference frees deviceid
685  */
686 void
687 pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
688 		  struct pnfs_deviceid_node *devid)
689 {
690 	struct nfs4_deviceid *id = &devid->de_id;
691 	struct pnfs_deviceid_node *d;
692 	struct hlist_node *n;
693 	long h = nfs4_deviceid_hash(id);
694 
695 	dprintk("%s [%d]\n", __func__, atomic_read(&devid->de_ref));
696 	if (!atomic_dec_and_lock(&devid->de_ref, &c->dc_lock))
697 		return;
698 
699 	hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[h], de_node)
700 		if (!memcmp(&d->de_id, id, sizeof(*id))) {
701 			hlist_del_rcu(&d->de_node);
702 			spin_unlock(&c->dc_lock);
703 			synchronize_rcu();
704 			c->dc_free_callback(devid);
705 			return;
706 		}
707 	spin_unlock(&c->dc_lock);
708 	/* Why wasn't it found in  the list? */
709 	BUG();
710 }
711 EXPORT_SYMBOL_GPL(pnfs_put_deviceid);
712 
713 /* Find and reference a deviceid */
714 struct pnfs_deviceid_node *
715 pnfs_find_get_deviceid(struct pnfs_deviceid_cache *c, struct nfs4_deviceid *id)
716 {
717 	struct pnfs_deviceid_node *d;
718 	struct hlist_node *n;
719 	long hash = nfs4_deviceid_hash(id);
720 
721 	dprintk("--> %s hash %ld\n", __func__, hash);
722 	rcu_read_lock();
723 	hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
724 		if (!memcmp(&d->de_id, id, sizeof(*id))) {
725 			if (!atomic_inc_not_zero(&d->de_ref)) {
726 				goto fail;
727 			} else {
728 				rcu_read_unlock();
729 				return d;
730 			}
731 		}
732 	}
733 fail:
734 	rcu_read_unlock();
735 	return NULL;
736 }
737 EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid);
738 
739 /*
740  * Add a deviceid to the cache.
741  * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
742  */
743 struct pnfs_deviceid_node *
744 pnfs_add_deviceid(struct pnfs_deviceid_cache *c, struct pnfs_deviceid_node *new)
745 {
746 	struct pnfs_deviceid_node *d;
747 	long hash = nfs4_deviceid_hash(&new->de_id);
748 
749 	dprintk("--> %s hash %ld\n", __func__, hash);
750 	spin_lock(&c->dc_lock);
751 	d = pnfs_find_get_deviceid(c, &new->de_id);
752 	if (d) {
753 		spin_unlock(&c->dc_lock);
754 		dprintk("%s [discard]\n", __func__);
755 		c->dc_free_callback(new);
756 		return d;
757 	}
758 	INIT_HLIST_NODE(&new->de_node);
759 	atomic_set(&new->de_ref, 1);
760 	hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]);
761 	spin_unlock(&c->dc_lock);
762 	dprintk("%s [new]\n", __func__);
763 	return new;
764 }
765 EXPORT_SYMBOL_GPL(pnfs_add_deviceid);
766 
767 void
768 pnfs_put_deviceid_cache(struct nfs_client *clp)
769 {
770 	struct pnfs_deviceid_cache *local = clp->cl_devid_cache;
771 
772 	dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache);
773 	if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) {
774 		int i;
775 		/* Verify cache is empty */
776 		for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++)
777 			BUG_ON(!hlist_empty(&local->dc_deviceids[i]));
778 		clp->cl_devid_cache = NULL;
779 		spin_unlock(&clp->cl_lock);
780 		kfree(local);
781 	}
782 }
783 EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache);
784