1 /* 2 * pNFS functions to call and manage layout drivers. 3 * 4 * Copyright (c) 2002 [year of first publication] 5 * The Regents of the University of Michigan 6 * All Rights Reserved 7 * 8 * Dean Hildebrand <dhildebz@umich.edu> 9 * 10 * Permission is granted to use, copy, create derivative works, and 11 * redistribute this software and such derivative works for any purpose, 12 * so long as the name of the University of Michigan is not used in 13 * any advertising or publicity pertaining to the use or distribution 14 * of this software without specific, written prior authorization. If 15 * the above copyright notice or any other identification of the 16 * University of Michigan is included in any copy of any portion of 17 * this software, then the disclaimer below must also be included. 18 * 19 * This software is provided as is, without representation or warranty 20 * of any kind either express or implied, including without limitation 21 * the implied warranties of merchantability, fitness for a particular 22 * purpose, or noninfringement. The Regents of the University of 23 * Michigan shall not be liable for any damages, including special, 24 * indirect, incidental, or consequential damages, with respect to any 25 * claim arising out of or in connection with the use of the software, 26 * even if it has been or is hereafter advised of the possibility of 27 * such damages. 28 */ 29 30 #include <linux/nfs_fs.h> 31 #include "internal.h" 32 #include "pnfs.h" 33 34 #define NFSDBG_FACILITY NFSDBG_PNFS 35 36 /* Locking: 37 * 38 * pnfs_spinlock: 39 * protects pnfs_modules_tbl. 40 */ 41 static DEFINE_SPINLOCK(pnfs_spinlock); 42 43 /* 44 * pnfs_modules_tbl holds all pnfs modules 45 */ 46 static LIST_HEAD(pnfs_modules_tbl); 47 48 /* Return the registered pnfs layout driver module matching given id */ 49 static struct pnfs_layoutdriver_type * 50 find_pnfs_driver_locked(u32 id) 51 { 52 struct pnfs_layoutdriver_type *local; 53 54 list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid) 55 if (local->id == id) 56 goto out; 57 local = NULL; 58 out: 59 dprintk("%s: Searching for id %u, found %p\n", __func__, id, local); 60 return local; 61 } 62 63 static struct pnfs_layoutdriver_type * 64 find_pnfs_driver(u32 id) 65 { 66 struct pnfs_layoutdriver_type *local; 67 68 spin_lock(&pnfs_spinlock); 69 local = find_pnfs_driver_locked(id); 70 spin_unlock(&pnfs_spinlock); 71 return local; 72 } 73 74 void 75 unset_pnfs_layoutdriver(struct nfs_server *nfss) 76 { 77 if (nfss->pnfs_curr_ld) { 78 nfss->pnfs_curr_ld->clear_layoutdriver(nfss); 79 module_put(nfss->pnfs_curr_ld->owner); 80 } 81 nfss->pnfs_curr_ld = NULL; 82 } 83 84 /* 85 * Try to set the server's pnfs module to the pnfs layout type specified by id. 86 * Currently only one pNFS layout driver per filesystem is supported. 87 * 88 * @id layout type. Zero (illegal layout type) indicates pNFS not in use. 89 */ 90 void 91 set_pnfs_layoutdriver(struct nfs_server *server, u32 id) 92 { 93 struct pnfs_layoutdriver_type *ld_type = NULL; 94 95 if (id == 0) 96 goto out_no_driver; 97 if (!(server->nfs_client->cl_exchange_flags & 98 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) { 99 printk(KERN_ERR "%s: id %u cl_exchange_flags 0x%x\n", __func__, 100 id, server->nfs_client->cl_exchange_flags); 101 goto out_no_driver; 102 } 103 ld_type = find_pnfs_driver(id); 104 if (!ld_type) { 105 request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id); 106 ld_type = find_pnfs_driver(id); 107 if (!ld_type) { 108 dprintk("%s: No pNFS module found for %u.\n", 109 __func__, id); 110 goto out_no_driver; 111 } 112 } 113 if (!try_module_get(ld_type->owner)) { 114 dprintk("%s: Could not grab reference on module\n", __func__); 115 goto out_no_driver; 116 } 117 server->pnfs_curr_ld = ld_type; 118 if (ld_type->set_layoutdriver(server)) { 119 printk(KERN_ERR 120 "%s: Error initializing mount point for layout driver %u.\n", 121 __func__, id); 122 module_put(ld_type->owner); 123 goto out_no_driver; 124 } 125 dprintk("%s: pNFS module for %u set\n", __func__, id); 126 return; 127 128 out_no_driver: 129 dprintk("%s: Using NFSv4 I/O\n", __func__); 130 server->pnfs_curr_ld = NULL; 131 } 132 133 int 134 pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type) 135 { 136 int status = -EINVAL; 137 struct pnfs_layoutdriver_type *tmp; 138 139 if (ld_type->id == 0) { 140 printk(KERN_ERR "%s id 0 is reserved\n", __func__); 141 return status; 142 } 143 if (!ld_type->alloc_lseg || !ld_type->free_lseg) { 144 printk(KERN_ERR "%s Layout driver must provide " 145 "alloc_lseg and free_lseg.\n", __func__); 146 return status; 147 } 148 149 spin_lock(&pnfs_spinlock); 150 tmp = find_pnfs_driver_locked(ld_type->id); 151 if (!tmp) { 152 list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl); 153 status = 0; 154 dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id, 155 ld_type->name); 156 } else { 157 printk(KERN_ERR "%s Module with id %d already loaded!\n", 158 __func__, ld_type->id); 159 } 160 spin_unlock(&pnfs_spinlock); 161 162 return status; 163 } 164 EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver); 165 166 void 167 pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type) 168 { 169 dprintk("%s Deregistering id:%u\n", __func__, ld_type->id); 170 spin_lock(&pnfs_spinlock); 171 list_del(&ld_type->pnfs_tblid); 172 spin_unlock(&pnfs_spinlock); 173 } 174 EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); 175 176 /* 177 * pNFS client layout cache 178 */ 179 180 /* Need to hold i_lock if caller does not already hold reference */ 181 void 182 get_layout_hdr(struct pnfs_layout_hdr *lo) 183 { 184 atomic_inc(&lo->plh_refcount); 185 } 186 187 static void 188 destroy_layout_hdr(struct pnfs_layout_hdr *lo) 189 { 190 dprintk("%s: freeing layout cache %p\n", __func__, lo); 191 BUG_ON(!list_empty(&lo->plh_layouts)); 192 NFS_I(lo->plh_inode)->layout = NULL; 193 kfree(lo); 194 } 195 196 static void 197 put_layout_hdr_locked(struct pnfs_layout_hdr *lo) 198 { 199 if (atomic_dec_and_test(&lo->plh_refcount)) 200 destroy_layout_hdr(lo); 201 } 202 203 void 204 put_layout_hdr(struct pnfs_layout_hdr *lo) 205 { 206 struct inode *inode = lo->plh_inode; 207 208 if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { 209 destroy_layout_hdr(lo); 210 spin_unlock(&inode->i_lock); 211 } 212 } 213 214 static void 215 init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) 216 { 217 INIT_LIST_HEAD(&lseg->pls_list); 218 atomic_set(&lseg->pls_refcount, 1); 219 smp_mb(); 220 set_bit(NFS_LSEG_VALID, &lseg->pls_flags); 221 lseg->pls_layout = lo; 222 } 223 224 static void free_lseg(struct pnfs_layout_segment *lseg) 225 { 226 struct inode *ino = lseg->pls_layout->plh_inode; 227 228 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); 229 /* Matched by get_layout_hdr in pnfs_insert_layout */ 230 put_layout_hdr(NFS_I(ino)->layout); 231 } 232 233 /* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg 234 * could sleep, so must be called outside of the lock. 235 * Returns 1 if object was removed, otherwise return 0. 236 */ 237 static int 238 put_lseg_locked(struct pnfs_layout_segment *lseg, 239 struct list_head *tmp_list) 240 { 241 dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, 242 atomic_read(&lseg->pls_refcount), 243 test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 244 if (atomic_dec_and_test(&lseg->pls_refcount)) { 245 struct inode *ino = lseg->pls_layout->plh_inode; 246 247 BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 248 list_del(&lseg->pls_list); 249 if (list_empty(&lseg->pls_layout->plh_segs)) { 250 struct nfs_client *clp; 251 252 clp = NFS_SERVER(ino)->nfs_client; 253 spin_lock(&clp->cl_lock); 254 /* List does not take a reference, so no need for put here */ 255 list_del_init(&lseg->pls_layout->plh_layouts); 256 spin_unlock(&clp->cl_lock); 257 clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags); 258 } 259 rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq); 260 list_add(&lseg->pls_list, tmp_list); 261 return 1; 262 } 263 return 0; 264 } 265 266 static bool 267 should_free_lseg(u32 lseg_iomode, u32 recall_iomode) 268 { 269 return (recall_iomode == IOMODE_ANY || 270 lseg_iomode == recall_iomode); 271 } 272 273 /* Returns 1 if lseg is removed from list, 0 otherwise */ 274 static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, 275 struct list_head *tmp_list) 276 { 277 int rv = 0; 278 279 if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { 280 /* Remove the reference keeping the lseg in the 281 * list. It will now be removed when all 282 * outstanding io is finished. 283 */ 284 rv = put_lseg_locked(lseg, tmp_list); 285 } 286 return rv; 287 } 288 289 /* Returns count of number of matching invalid lsegs remaining in list 290 * after call. 291 */ 292 int 293 mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, 294 struct list_head *tmp_list, 295 u32 iomode) 296 { 297 struct pnfs_layout_segment *lseg, *next; 298 int invalid = 0, removed = 0; 299 300 dprintk("%s:Begin lo %p\n", __func__, lo); 301 302 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) 303 if (should_free_lseg(lseg->pls_range.iomode, iomode)) { 304 dprintk("%s: freeing lseg %p iomode %d " 305 "offset %llu length %llu\n", __func__, 306 lseg, lseg->pls_range.iomode, lseg->pls_range.offset, 307 lseg->pls_range.length); 308 invalid++; 309 removed += mark_lseg_invalid(lseg, tmp_list); 310 } 311 dprintk("%s:Return %i\n", __func__, invalid - removed); 312 return invalid - removed; 313 } 314 315 void 316 pnfs_free_lseg_list(struct list_head *free_me) 317 { 318 struct pnfs_layout_segment *lseg, *tmp; 319 320 list_for_each_entry_safe(lseg, tmp, free_me, pls_list) { 321 list_del(&lseg->pls_list); 322 free_lseg(lseg); 323 } 324 } 325 326 void 327 pnfs_destroy_layout(struct nfs_inode *nfsi) 328 { 329 struct pnfs_layout_hdr *lo; 330 LIST_HEAD(tmp_list); 331 332 spin_lock(&nfsi->vfs_inode.i_lock); 333 lo = nfsi->layout; 334 if (lo) { 335 set_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags); 336 mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY); 337 /* Matched by refcount set to 1 in alloc_init_layout_hdr */ 338 put_layout_hdr_locked(lo); 339 } 340 spin_unlock(&nfsi->vfs_inode.i_lock); 341 pnfs_free_lseg_list(&tmp_list); 342 } 343 344 /* 345 * Called by the state manger to remove all layouts established under an 346 * expired lease. 347 */ 348 void 349 pnfs_destroy_all_layouts(struct nfs_client *clp) 350 { 351 struct pnfs_layout_hdr *lo; 352 LIST_HEAD(tmp_list); 353 354 spin_lock(&clp->cl_lock); 355 list_splice_init(&clp->cl_layouts, &tmp_list); 356 spin_unlock(&clp->cl_lock); 357 358 while (!list_empty(&tmp_list)) { 359 lo = list_entry(tmp_list.next, struct pnfs_layout_hdr, 360 plh_layouts); 361 dprintk("%s freeing layout for inode %lu\n", __func__, 362 lo->plh_inode->i_ino); 363 pnfs_destroy_layout(NFS_I(lo->plh_inode)); 364 } 365 } 366 367 /* update lo->plh_stateid with new if is more recent */ 368 void 369 pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, 370 bool update_barrier) 371 { 372 u32 oldseq, newseq; 373 374 oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid); 375 newseq = be32_to_cpu(new->stateid.seqid); 376 if ((int)(newseq - oldseq) > 0) { 377 memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid)); 378 if (update_barrier) { 379 u32 new_barrier = be32_to_cpu(new->stateid.seqid); 380 381 if ((int)(new_barrier - lo->plh_barrier)) 382 lo->plh_barrier = new_barrier; 383 } else { 384 /* Because of wraparound, we want to keep the barrier 385 * "close" to the current seqids. It needs to be 386 * within 2**31 to count as "behind", so if it 387 * gets too near that limit, give us a litle leeway 388 * and bring it to within 2**30. 389 * NOTE - and yes, this is all unsigned arithmetic. 390 */ 391 if (unlikely((newseq - lo->plh_barrier) > (3 << 29))) 392 lo->plh_barrier = newseq - (1 << 30); 393 } 394 } 395 } 396 397 /* lget is set to 1 if called from inside send_layoutget call chain */ 398 static bool 399 pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid, 400 int lget) 401 { 402 if ((stateid) && 403 (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0) 404 return true; 405 return lo->plh_block_lgets || 406 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || 407 (list_empty(&lo->plh_segs) && 408 (atomic_read(&lo->plh_outstanding) > lget)); 409 } 410 411 int 412 pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, 413 struct nfs4_state *open_state) 414 { 415 int status = 0; 416 417 dprintk("--> %s\n", __func__); 418 spin_lock(&lo->plh_inode->i_lock); 419 if (pnfs_layoutgets_blocked(lo, NULL, 1)) { 420 status = -EAGAIN; 421 } else if (list_empty(&lo->plh_segs)) { 422 int seq; 423 424 do { 425 seq = read_seqbegin(&open_state->seqlock); 426 memcpy(dst->data, open_state->stateid.data, 427 sizeof(open_state->stateid.data)); 428 } while (read_seqretry(&open_state->seqlock, seq)); 429 } else 430 memcpy(dst->data, lo->plh_stateid.data, sizeof(lo->plh_stateid.data)); 431 spin_unlock(&lo->plh_inode->i_lock); 432 dprintk("<-- %s\n", __func__); 433 return status; 434 } 435 436 /* 437 * Get layout from server. 438 * for now, assume that whole file layouts are requested. 439 * arg->offset: 0 440 * arg->length: all ones 441 */ 442 static struct pnfs_layout_segment * 443 send_layoutget(struct pnfs_layout_hdr *lo, 444 struct nfs_open_context *ctx, 445 u32 iomode) 446 { 447 struct inode *ino = lo->plh_inode; 448 struct nfs_server *server = NFS_SERVER(ino); 449 struct nfs4_layoutget *lgp; 450 struct pnfs_layout_segment *lseg = NULL; 451 452 dprintk("--> %s\n", __func__); 453 454 BUG_ON(ctx == NULL); 455 lgp = kzalloc(sizeof(*lgp), GFP_KERNEL); 456 if (lgp == NULL) 457 return NULL; 458 lgp->args.minlength = NFS4_MAX_UINT64; 459 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; 460 lgp->args.range.iomode = iomode; 461 lgp->args.range.offset = 0; 462 lgp->args.range.length = NFS4_MAX_UINT64; 463 lgp->args.type = server->pnfs_curr_ld->id; 464 lgp->args.inode = ino; 465 lgp->args.ctx = get_nfs_open_context(ctx); 466 lgp->lsegpp = &lseg; 467 468 /* Synchronously retrieve layout information from server and 469 * store in lseg. 470 */ 471 nfs4_proc_layoutget(lgp); 472 if (!lseg) { 473 /* remember that LAYOUTGET failed and suspend trying */ 474 set_bit(lo_fail_bit(iomode), &lo->plh_flags); 475 } 476 return lseg; 477 } 478 479 bool pnfs_roc(struct inode *ino) 480 { 481 struct pnfs_layout_hdr *lo; 482 struct pnfs_layout_segment *lseg, *tmp; 483 LIST_HEAD(tmp_list); 484 bool found = false; 485 486 spin_lock(&ino->i_lock); 487 lo = NFS_I(ino)->layout; 488 if (!lo || !test_and_clear_bit(NFS_LAYOUT_ROC, &lo->plh_flags) || 489 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) 490 goto out_nolayout; 491 list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) 492 if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { 493 mark_lseg_invalid(lseg, &tmp_list); 494 found = true; 495 } 496 if (!found) 497 goto out_nolayout; 498 lo->plh_block_lgets++; 499 get_layout_hdr(lo); /* matched in pnfs_roc_release */ 500 spin_unlock(&ino->i_lock); 501 pnfs_free_lseg_list(&tmp_list); 502 return true; 503 504 out_nolayout: 505 spin_unlock(&ino->i_lock); 506 return false; 507 } 508 509 void pnfs_roc_release(struct inode *ino) 510 { 511 struct pnfs_layout_hdr *lo; 512 513 spin_lock(&ino->i_lock); 514 lo = NFS_I(ino)->layout; 515 lo->plh_block_lgets--; 516 put_layout_hdr_locked(lo); 517 spin_unlock(&ino->i_lock); 518 } 519 520 void pnfs_roc_set_barrier(struct inode *ino, u32 barrier) 521 { 522 struct pnfs_layout_hdr *lo; 523 524 spin_lock(&ino->i_lock); 525 lo = NFS_I(ino)->layout; 526 if ((int)(barrier - lo->plh_barrier) > 0) 527 lo->plh_barrier = barrier; 528 spin_unlock(&ino->i_lock); 529 } 530 531 bool pnfs_roc_drain(struct inode *ino, u32 *barrier) 532 { 533 struct nfs_inode *nfsi = NFS_I(ino); 534 struct pnfs_layout_segment *lseg; 535 bool found = false; 536 537 spin_lock(&ino->i_lock); 538 list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list) 539 if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { 540 found = true; 541 break; 542 } 543 if (!found) { 544 struct pnfs_layout_hdr *lo = nfsi->layout; 545 u32 current_seqid = be32_to_cpu(lo->plh_stateid.stateid.seqid); 546 547 /* Since close does not return a layout stateid for use as 548 * a barrier, we choose the worst-case barrier. 549 */ 550 *barrier = current_seqid + atomic_read(&lo->plh_outstanding); 551 } 552 spin_unlock(&ino->i_lock); 553 return found; 554 } 555 556 /* 557 * Compare two layout segments for sorting into layout cache. 558 * We want to preferentially return RW over RO layouts, so ensure those 559 * are seen first. 560 */ 561 static s64 562 cmp_layout(u32 iomode1, u32 iomode2) 563 { 564 /* read > read/write */ 565 return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ); 566 } 567 568 static void 569 pnfs_insert_layout(struct pnfs_layout_hdr *lo, 570 struct pnfs_layout_segment *lseg) 571 { 572 struct pnfs_layout_segment *lp; 573 int found = 0; 574 575 dprintk("%s:Begin\n", __func__); 576 577 assert_spin_locked(&lo->plh_inode->i_lock); 578 list_for_each_entry(lp, &lo->plh_segs, pls_list) { 579 if (cmp_layout(lp->pls_range.iomode, lseg->pls_range.iomode) > 0) 580 continue; 581 list_add_tail(&lseg->pls_list, &lp->pls_list); 582 dprintk("%s: inserted lseg %p " 583 "iomode %d offset %llu length %llu before " 584 "lp %p iomode %d offset %llu length %llu\n", 585 __func__, lseg, lseg->pls_range.iomode, 586 lseg->pls_range.offset, lseg->pls_range.length, 587 lp, lp->pls_range.iomode, lp->pls_range.offset, 588 lp->pls_range.length); 589 found = 1; 590 break; 591 } 592 if (!found) { 593 list_add_tail(&lseg->pls_list, &lo->plh_segs); 594 dprintk("%s: inserted lseg %p " 595 "iomode %d offset %llu length %llu at tail\n", 596 __func__, lseg, lseg->pls_range.iomode, 597 lseg->pls_range.offset, lseg->pls_range.length); 598 } 599 get_layout_hdr(lo); 600 601 dprintk("%s:Return\n", __func__); 602 } 603 604 static struct pnfs_layout_hdr * 605 alloc_init_layout_hdr(struct inode *ino) 606 { 607 struct pnfs_layout_hdr *lo; 608 609 lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL); 610 if (!lo) 611 return NULL; 612 atomic_set(&lo->plh_refcount, 1); 613 INIT_LIST_HEAD(&lo->plh_layouts); 614 INIT_LIST_HEAD(&lo->plh_segs); 615 INIT_LIST_HEAD(&lo->plh_bulk_recall); 616 lo->plh_inode = ino; 617 return lo; 618 } 619 620 static struct pnfs_layout_hdr * 621 pnfs_find_alloc_layout(struct inode *ino) 622 { 623 struct nfs_inode *nfsi = NFS_I(ino); 624 struct pnfs_layout_hdr *new = NULL; 625 626 dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout); 627 628 assert_spin_locked(&ino->i_lock); 629 if (nfsi->layout) { 630 if (test_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags)) 631 return NULL; 632 else 633 return nfsi->layout; 634 } 635 spin_unlock(&ino->i_lock); 636 new = alloc_init_layout_hdr(ino); 637 spin_lock(&ino->i_lock); 638 639 if (likely(nfsi->layout == NULL)) /* Won the race? */ 640 nfsi->layout = new; 641 else 642 kfree(new); 643 return nfsi->layout; 644 } 645 646 /* 647 * iomode matching rules: 648 * iomode lseg match 649 * ----- ----- ----- 650 * ANY READ true 651 * ANY RW true 652 * RW READ false 653 * RW RW true 654 * READ READ true 655 * READ RW true 656 */ 657 static int 658 is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode) 659 { 660 return (iomode != IOMODE_RW || lseg->pls_range.iomode == IOMODE_RW); 661 } 662 663 /* 664 * lookup range in layout 665 */ 666 static struct pnfs_layout_segment * 667 pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode) 668 { 669 struct pnfs_layout_segment *lseg, *ret = NULL; 670 671 dprintk("%s:Begin\n", __func__); 672 673 assert_spin_locked(&lo->plh_inode->i_lock); 674 list_for_each_entry(lseg, &lo->plh_segs, pls_list) { 675 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && 676 is_matching_lseg(lseg, iomode)) { 677 ret = lseg; 678 break; 679 } 680 if (cmp_layout(iomode, lseg->pls_range.iomode) > 0) 681 break; 682 } 683 684 dprintk("%s:Return lseg %p ref %d\n", 685 __func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0); 686 return ret; 687 } 688 689 /* 690 * Layout segment is retreived from the server if not cached. 691 * The appropriate layout segment is referenced and returned to the caller. 692 */ 693 struct pnfs_layout_segment * 694 pnfs_update_layout(struct inode *ino, 695 struct nfs_open_context *ctx, 696 enum pnfs_iomode iomode) 697 { 698 struct nfs_inode *nfsi = NFS_I(ino); 699 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; 700 struct pnfs_layout_hdr *lo; 701 struct pnfs_layout_segment *lseg = NULL; 702 703 if (!pnfs_enabled_sb(NFS_SERVER(ino))) 704 return NULL; 705 spin_lock(&ino->i_lock); 706 lo = pnfs_find_alloc_layout(ino); 707 if (lo == NULL) { 708 dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__); 709 goto out_unlock; 710 } 711 712 /* Do we even need to bother with this? */ 713 if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) || 714 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { 715 dprintk("%s matches recall, use MDS\n", __func__); 716 goto out_unlock; 717 } 718 /* Check to see if the layout for the given range already exists */ 719 lseg = pnfs_find_lseg(lo, iomode); 720 if (lseg) 721 goto out_unlock; 722 723 /* if LAYOUTGET already failed once we don't try again */ 724 if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags)) 725 goto out_unlock; 726 727 if (pnfs_layoutgets_blocked(lo, NULL, 0)) 728 goto out_unlock; 729 atomic_inc(&lo->plh_outstanding); 730 731 get_layout_hdr(lo); 732 if (list_empty(&lo->plh_segs)) { 733 /* The lo must be on the clp list if there is any 734 * chance of a CB_LAYOUTRECALL(FILE) coming in. 735 */ 736 spin_lock(&clp->cl_lock); 737 BUG_ON(!list_empty(&lo->plh_layouts)); 738 list_add_tail(&lo->plh_layouts, &clp->cl_layouts); 739 spin_unlock(&clp->cl_lock); 740 } 741 spin_unlock(&ino->i_lock); 742 743 lseg = send_layoutget(lo, ctx, iomode); 744 if (!lseg) { 745 spin_lock(&ino->i_lock); 746 if (list_empty(&lo->plh_segs)) { 747 spin_lock(&clp->cl_lock); 748 list_del_init(&lo->plh_layouts); 749 spin_unlock(&clp->cl_lock); 750 clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); 751 } 752 spin_unlock(&ino->i_lock); 753 } 754 atomic_dec(&lo->plh_outstanding); 755 put_layout_hdr(lo); 756 out: 757 dprintk("%s end, state 0x%lx lseg %p\n", __func__, 758 nfsi->layout->plh_flags, lseg); 759 return lseg; 760 out_unlock: 761 spin_unlock(&ino->i_lock); 762 goto out; 763 } 764 765 int 766 pnfs_layout_process(struct nfs4_layoutget *lgp) 767 { 768 struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout; 769 struct nfs4_layoutget_res *res = &lgp->res; 770 struct pnfs_layout_segment *lseg; 771 struct inode *ino = lo->plh_inode; 772 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; 773 int status = 0; 774 775 /* Verify we got what we asked for. 776 * Note that because the xdr parsing only accepts a single 777 * element array, this can fail even if the server is behaving 778 * correctly. 779 */ 780 if (lgp->args.range.iomode > res->range.iomode || 781 res->range.offset != 0 || 782 res->range.length != NFS4_MAX_UINT64) { 783 status = -EINVAL; 784 goto out; 785 } 786 /* Inject layout blob into I/O device driver */ 787 lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res); 788 if (!lseg || IS_ERR(lseg)) { 789 if (!lseg) 790 status = -ENOMEM; 791 else 792 status = PTR_ERR(lseg); 793 dprintk("%s: Could not allocate layout: error %d\n", 794 __func__, status); 795 goto out; 796 } 797 798 spin_lock(&ino->i_lock); 799 if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) || 800 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { 801 dprintk("%s forget reply due to recall\n", __func__); 802 goto out_forget_reply; 803 } 804 805 if (pnfs_layoutgets_blocked(lo, &res->stateid, 1)) { 806 dprintk("%s forget reply due to state\n", __func__); 807 goto out_forget_reply; 808 } 809 init_lseg(lo, lseg); 810 lseg->pls_range = res->range; 811 *lgp->lsegpp = lseg; 812 pnfs_insert_layout(lo, lseg); 813 814 if (res->return_on_close) { 815 set_bit(NFS_LSEG_ROC, &lseg->pls_flags); 816 set_bit(NFS_LAYOUT_ROC, &lo->plh_flags); 817 } 818 819 /* Done processing layoutget. Set the layout stateid */ 820 pnfs_set_layout_stateid(lo, &res->stateid, false); 821 spin_unlock(&ino->i_lock); 822 out: 823 return status; 824 825 out_forget_reply: 826 spin_unlock(&ino->i_lock); 827 lseg->pls_layout = lo; 828 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); 829 goto out; 830 } 831 832 /* 833 * Device ID cache. Currently supports one layout type per struct nfs_client. 834 * Add layout type to the lookup key to expand to support multiple types. 835 */ 836 int 837 pnfs_alloc_init_deviceid_cache(struct nfs_client *clp, 838 void (*free_callback)(struct pnfs_deviceid_node *)) 839 { 840 struct pnfs_deviceid_cache *c; 841 842 c = kzalloc(sizeof(struct pnfs_deviceid_cache), GFP_KERNEL); 843 if (!c) 844 return -ENOMEM; 845 spin_lock(&clp->cl_lock); 846 if (clp->cl_devid_cache != NULL) { 847 atomic_inc(&clp->cl_devid_cache->dc_ref); 848 dprintk("%s [kref [%d]]\n", __func__, 849 atomic_read(&clp->cl_devid_cache->dc_ref)); 850 kfree(c); 851 } else { 852 /* kzalloc initializes hlists */ 853 spin_lock_init(&c->dc_lock); 854 atomic_set(&c->dc_ref, 1); 855 c->dc_free_callback = free_callback; 856 clp->cl_devid_cache = c; 857 dprintk("%s [new]\n", __func__); 858 } 859 spin_unlock(&clp->cl_lock); 860 return 0; 861 } 862 EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache); 863 864 /* 865 * Called from pnfs_layoutdriver_type->free_lseg 866 * last layout segment reference frees deviceid 867 */ 868 void 869 pnfs_put_deviceid(struct pnfs_deviceid_cache *c, 870 struct pnfs_deviceid_node *devid) 871 { 872 struct nfs4_deviceid *id = &devid->de_id; 873 struct pnfs_deviceid_node *d; 874 struct hlist_node *n; 875 long h = nfs4_deviceid_hash(id); 876 877 dprintk("%s [%d]\n", __func__, atomic_read(&devid->de_ref)); 878 if (!atomic_dec_and_lock(&devid->de_ref, &c->dc_lock)) 879 return; 880 881 hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[h], de_node) 882 if (!memcmp(&d->de_id, id, sizeof(*id))) { 883 hlist_del_rcu(&d->de_node); 884 spin_unlock(&c->dc_lock); 885 synchronize_rcu(); 886 c->dc_free_callback(devid); 887 return; 888 } 889 spin_unlock(&c->dc_lock); 890 /* Why wasn't it found in the list? */ 891 BUG(); 892 } 893 EXPORT_SYMBOL_GPL(pnfs_put_deviceid); 894 895 /* Find and reference a deviceid */ 896 struct pnfs_deviceid_node * 897 pnfs_find_get_deviceid(struct pnfs_deviceid_cache *c, struct nfs4_deviceid *id) 898 { 899 struct pnfs_deviceid_node *d; 900 struct hlist_node *n; 901 long hash = nfs4_deviceid_hash(id); 902 903 dprintk("--> %s hash %ld\n", __func__, hash); 904 rcu_read_lock(); 905 hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) { 906 if (!memcmp(&d->de_id, id, sizeof(*id))) { 907 if (!atomic_inc_not_zero(&d->de_ref)) { 908 goto fail; 909 } else { 910 rcu_read_unlock(); 911 return d; 912 } 913 } 914 } 915 fail: 916 rcu_read_unlock(); 917 return NULL; 918 } 919 EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid); 920 921 /* 922 * Add a deviceid to the cache. 923 * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new 924 */ 925 struct pnfs_deviceid_node * 926 pnfs_add_deviceid(struct pnfs_deviceid_cache *c, struct pnfs_deviceid_node *new) 927 { 928 struct pnfs_deviceid_node *d; 929 long hash = nfs4_deviceid_hash(&new->de_id); 930 931 dprintk("--> %s hash %ld\n", __func__, hash); 932 spin_lock(&c->dc_lock); 933 d = pnfs_find_get_deviceid(c, &new->de_id); 934 if (d) { 935 spin_unlock(&c->dc_lock); 936 dprintk("%s [discard]\n", __func__); 937 c->dc_free_callback(new); 938 return d; 939 } 940 INIT_HLIST_NODE(&new->de_node); 941 atomic_set(&new->de_ref, 1); 942 hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]); 943 spin_unlock(&c->dc_lock); 944 dprintk("%s [new]\n", __func__); 945 return new; 946 } 947 EXPORT_SYMBOL_GPL(pnfs_add_deviceid); 948 949 void 950 pnfs_put_deviceid_cache(struct nfs_client *clp) 951 { 952 struct pnfs_deviceid_cache *local = clp->cl_devid_cache; 953 954 dprintk("--> %s ({%d})\n", __func__, atomic_read(&local->dc_ref)); 955 if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) { 956 int i; 957 /* Verify cache is empty */ 958 for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++) 959 BUG_ON(!hlist_empty(&local->dc_deviceids[i])); 960 clp->cl_devid_cache = NULL; 961 spin_unlock(&clp->cl_lock); 962 kfree(local); 963 } 964 } 965 EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache); 966