1 /* 2 * pNFS functions to call and manage layout drivers. 3 * 4 * Copyright (c) 2002 [year of first publication] 5 * The Regents of the University of Michigan 6 * All Rights Reserved 7 * 8 * Dean Hildebrand <dhildebz@umich.edu> 9 * 10 * Permission is granted to use, copy, create derivative works, and 11 * redistribute this software and such derivative works for any purpose, 12 * so long as the name of the University of Michigan is not used in 13 * any advertising or publicity pertaining to the use or distribution 14 * of this software without specific, written prior authorization. If 15 * the above copyright notice or any other identification of the 16 * University of Michigan is included in any copy of any portion of 17 * this software, then the disclaimer below must also be included. 18 * 19 * This software is provided as is, without representation or warranty 20 * of any kind either express or implied, including without limitation 21 * the implied warranties of merchantability, fitness for a particular 22 * purpose, or noninfringement. The Regents of the University of 23 * Michigan shall not be liable for any damages, including special, 24 * indirect, incidental, or consequential damages, with respect to any 25 * claim arising out of or in connection with the use of the software, 26 * even if it has been or is hereafter advised of the possibility of 27 * such damages. 28 */ 29 30 #include <linux/nfs_fs.h> 31 #include <linux/nfs_page.h> 32 #include <linux/module.h> 33 #include "internal.h" 34 #include "pnfs.h" 35 #include "iostat.h" 36 37 #define NFSDBG_FACILITY NFSDBG_PNFS 38 #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) 39 40 /* Locking: 41 * 42 * pnfs_spinlock: 43 * protects pnfs_modules_tbl. 44 */ 45 static DEFINE_SPINLOCK(pnfs_spinlock); 46 47 /* 48 * pnfs_modules_tbl holds all pnfs modules 49 */ 50 static LIST_HEAD(pnfs_modules_tbl); 51 52 /* Return the registered pnfs layout driver module matching given id */ 53 static struct pnfs_layoutdriver_type * 54 find_pnfs_driver_locked(u32 id) 55 { 56 struct pnfs_layoutdriver_type *local; 57 58 list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid) 59 if (local->id == id) 60 goto out; 61 local = NULL; 62 out: 63 dprintk("%s: Searching for id %u, found %p\n", __func__, id, local); 64 return local; 65 } 66 67 static struct pnfs_layoutdriver_type * 68 find_pnfs_driver(u32 id) 69 { 70 struct pnfs_layoutdriver_type *local; 71 72 spin_lock(&pnfs_spinlock); 73 local = find_pnfs_driver_locked(id); 74 if (local != NULL && !try_module_get(local->owner)) { 75 dprintk("%s: Could not grab reference on module\n", __func__); 76 local = NULL; 77 } 78 spin_unlock(&pnfs_spinlock); 79 return local; 80 } 81 82 void 83 unset_pnfs_layoutdriver(struct nfs_server *nfss) 84 { 85 if (nfss->pnfs_curr_ld) { 86 if (nfss->pnfs_curr_ld->clear_layoutdriver) 87 nfss->pnfs_curr_ld->clear_layoutdriver(nfss); 88 /* Decrement the MDS count. Purge the deviceid cache if zero */ 89 if (atomic_dec_and_test(&nfss->nfs_client->cl_mds_count)) 90 nfs4_deviceid_purge_client(nfss->nfs_client); 91 module_put(nfss->pnfs_curr_ld->owner); 92 } 93 nfss->pnfs_curr_ld = NULL; 94 } 95 96 /* 97 * Try to set the server's pnfs module to the pnfs layout type specified by id. 98 * Currently only one pNFS layout driver per filesystem is supported. 99 * 100 * @id layout type. Zero (illegal layout type) indicates pNFS not in use. 101 */ 102 void 103 set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh, 104 u32 id) 105 { 106 struct pnfs_layoutdriver_type *ld_type = NULL; 107 108 if (id == 0) 109 goto out_no_driver; 110 if (!(server->nfs_client->cl_exchange_flags & 111 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) { 112 printk(KERN_ERR "NFS: %s: id %u cl_exchange_flags 0x%x\n", 113 __func__, id, server->nfs_client->cl_exchange_flags); 114 goto out_no_driver; 115 } 116 ld_type = find_pnfs_driver(id); 117 if (!ld_type) { 118 request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id); 119 ld_type = find_pnfs_driver(id); 120 if (!ld_type) { 121 dprintk("%s: No pNFS module found for %u.\n", 122 __func__, id); 123 goto out_no_driver; 124 } 125 } 126 server->pnfs_curr_ld = ld_type; 127 if (ld_type->set_layoutdriver 128 && ld_type->set_layoutdriver(server, mntfh)) { 129 printk(KERN_ERR "NFS: %s: Error initializing pNFS layout " 130 "driver %u.\n", __func__, id); 131 module_put(ld_type->owner); 132 goto out_no_driver; 133 } 134 /* Bump the MDS count */ 135 atomic_inc(&server->nfs_client->cl_mds_count); 136 137 dprintk("%s: pNFS module for %u set\n", __func__, id); 138 return; 139 140 out_no_driver: 141 dprintk("%s: Using NFSv4 I/O\n", __func__); 142 server->pnfs_curr_ld = NULL; 143 } 144 145 int 146 pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type) 147 { 148 int status = -EINVAL; 149 struct pnfs_layoutdriver_type *tmp; 150 151 if (ld_type->id == 0) { 152 printk(KERN_ERR "NFS: %s id 0 is reserved\n", __func__); 153 return status; 154 } 155 if (!ld_type->alloc_lseg || !ld_type->free_lseg) { 156 printk(KERN_ERR "NFS: %s Layout driver must provide " 157 "alloc_lseg and free_lseg.\n", __func__); 158 return status; 159 } 160 161 spin_lock(&pnfs_spinlock); 162 tmp = find_pnfs_driver_locked(ld_type->id); 163 if (!tmp) { 164 list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl); 165 status = 0; 166 dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id, 167 ld_type->name); 168 } else { 169 printk(KERN_ERR "NFS: %s Module with id %d already loaded!\n", 170 __func__, ld_type->id); 171 } 172 spin_unlock(&pnfs_spinlock); 173 174 return status; 175 } 176 EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver); 177 178 void 179 pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type) 180 { 181 dprintk("%s Deregistering id:%u\n", __func__, ld_type->id); 182 spin_lock(&pnfs_spinlock); 183 list_del(&ld_type->pnfs_tblid); 184 spin_unlock(&pnfs_spinlock); 185 } 186 EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); 187 188 /* 189 * pNFS client layout cache 190 */ 191 192 /* Need to hold i_lock if caller does not already hold reference */ 193 void 194 pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo) 195 { 196 atomic_inc(&lo->plh_refcount); 197 } 198 199 static struct pnfs_layout_hdr * 200 pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags) 201 { 202 struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; 203 return ld->alloc_layout_hdr(ino, gfp_flags); 204 } 205 206 static void 207 pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo) 208 { 209 struct nfs_server *server = NFS_SERVER(lo->plh_inode); 210 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; 211 212 if (!list_empty(&lo->plh_layouts)) { 213 struct nfs_client *clp = server->nfs_client; 214 215 spin_lock(&clp->cl_lock); 216 list_del_init(&lo->plh_layouts); 217 spin_unlock(&clp->cl_lock); 218 } 219 put_rpccred(lo->plh_lc_cred); 220 return ld->free_layout_hdr(lo); 221 } 222 223 static void 224 pnfs_detach_layout_hdr(struct pnfs_layout_hdr *lo) 225 { 226 struct nfs_inode *nfsi = NFS_I(lo->plh_inode); 227 dprintk("%s: freeing layout cache %p\n", __func__, lo); 228 nfsi->layout = NULL; 229 /* Reset MDS Threshold I/O counters */ 230 nfsi->write_io = 0; 231 nfsi->read_io = 0; 232 } 233 234 void 235 pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) 236 { 237 struct inode *inode = lo->plh_inode; 238 239 if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { 240 pnfs_detach_layout_hdr(lo); 241 spin_unlock(&inode->i_lock); 242 pnfs_free_layout_hdr(lo); 243 } 244 } 245 246 static int 247 pnfs_iomode_to_fail_bit(u32 iomode) 248 { 249 return iomode == IOMODE_RW ? 250 NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED; 251 } 252 253 static void 254 pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) 255 { 256 lo->plh_retry_timestamp = jiffies; 257 if (!test_and_set_bit(fail_bit, &lo->plh_flags)) 258 atomic_inc(&lo->plh_refcount); 259 } 260 261 static void 262 pnfs_layout_clear_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) 263 { 264 if (test_and_clear_bit(fail_bit, &lo->plh_flags)) 265 atomic_dec(&lo->plh_refcount); 266 } 267 268 static void 269 pnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode) 270 { 271 struct inode *inode = lo->plh_inode; 272 struct pnfs_layout_range range = { 273 .iomode = iomode, 274 .offset = 0, 275 .length = NFS4_MAX_UINT64, 276 }; 277 LIST_HEAD(head); 278 279 spin_lock(&inode->i_lock); 280 pnfs_layout_set_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); 281 pnfs_mark_matching_lsegs_invalid(lo, &head, &range); 282 spin_unlock(&inode->i_lock); 283 pnfs_free_lseg_list(&head); 284 dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__, 285 iomode == IOMODE_RW ? "RW" : "READ"); 286 } 287 288 static bool 289 pnfs_layout_io_test_failed(struct pnfs_layout_hdr *lo, u32 iomode) 290 { 291 unsigned long start, end; 292 int fail_bit = pnfs_iomode_to_fail_bit(iomode); 293 294 if (test_bit(fail_bit, &lo->plh_flags) == 0) 295 return false; 296 end = jiffies; 297 start = end - PNFS_LAYOUTGET_RETRY_TIMEOUT; 298 if (!time_in_range(lo->plh_retry_timestamp, start, end)) { 299 /* It is time to retry the failed layoutgets */ 300 pnfs_layout_clear_fail_bit(lo, fail_bit); 301 return false; 302 } 303 return true; 304 } 305 306 static void 307 init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) 308 { 309 INIT_LIST_HEAD(&lseg->pls_list); 310 INIT_LIST_HEAD(&lseg->pls_lc_list); 311 atomic_set(&lseg->pls_refcount, 1); 312 smp_mb(); 313 set_bit(NFS_LSEG_VALID, &lseg->pls_flags); 314 lseg->pls_layout = lo; 315 } 316 317 static void pnfs_free_lseg(struct pnfs_layout_segment *lseg) 318 { 319 struct inode *ino = lseg->pls_layout->plh_inode; 320 321 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); 322 } 323 324 static void 325 pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, 326 struct pnfs_layout_segment *lseg) 327 { 328 struct inode *inode = lo->plh_inode; 329 330 WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 331 list_del_init(&lseg->pls_list); 332 /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ 333 atomic_dec(&lo->plh_refcount); 334 if (list_empty(&lo->plh_segs)) 335 clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); 336 rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); 337 } 338 339 void 340 pnfs_put_lseg(struct pnfs_layout_segment *lseg) 341 { 342 struct pnfs_layout_hdr *lo; 343 struct inode *inode; 344 345 if (!lseg) 346 return; 347 348 dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, 349 atomic_read(&lseg->pls_refcount), 350 test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 351 lo = lseg->pls_layout; 352 inode = lo->plh_inode; 353 if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { 354 pnfs_get_layout_hdr(lo); 355 pnfs_layout_remove_lseg(lo, lseg); 356 spin_unlock(&inode->i_lock); 357 pnfs_free_lseg(lseg); 358 pnfs_put_layout_hdr(lo); 359 } 360 } 361 EXPORT_SYMBOL_GPL(pnfs_put_lseg); 362 363 static inline u64 364 end_offset(u64 start, u64 len) 365 { 366 u64 end; 367 368 end = start + len; 369 return end >= start ? end : NFS4_MAX_UINT64; 370 } 371 372 /* 373 * is l2 fully contained in l1? 374 * start1 end1 375 * [----------------------------------) 376 * start2 end2 377 * [----------------) 378 */ 379 static inline int 380 lo_seg_contained(struct pnfs_layout_range *l1, 381 struct pnfs_layout_range *l2) 382 { 383 u64 start1 = l1->offset; 384 u64 end1 = end_offset(start1, l1->length); 385 u64 start2 = l2->offset; 386 u64 end2 = end_offset(start2, l2->length); 387 388 return (start1 <= start2) && (end1 >= end2); 389 } 390 391 /* 392 * is l1 and l2 intersecting? 393 * start1 end1 394 * [----------------------------------) 395 * start2 end2 396 * [----------------) 397 */ 398 static inline int 399 lo_seg_intersecting(struct pnfs_layout_range *l1, 400 struct pnfs_layout_range *l2) 401 { 402 u64 start1 = l1->offset; 403 u64 end1 = end_offset(start1, l1->length); 404 u64 start2 = l2->offset; 405 u64 end2 = end_offset(start2, l2->length); 406 407 return (end1 == NFS4_MAX_UINT64 || end1 > start2) && 408 (end2 == NFS4_MAX_UINT64 || end2 > start1); 409 } 410 411 static bool 412 should_free_lseg(struct pnfs_layout_range *lseg_range, 413 struct pnfs_layout_range *recall_range) 414 { 415 return (recall_range->iomode == IOMODE_ANY || 416 lseg_range->iomode == recall_range->iomode) && 417 lo_seg_intersecting(lseg_range, recall_range); 418 } 419 420 /* Returns 1 if lseg is removed from list, 0 otherwise */ 421 static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, 422 struct list_head *tmp_list) 423 { 424 int rv = 0; 425 426 if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { 427 /* Remove the reference keeping the lseg in the 428 * list. It will now be removed when all 429 * outstanding io is finished. 430 */ 431 dprintk("%s: lseg %p ref %d\n", __func__, lseg, 432 atomic_read(&lseg->pls_refcount)); 433 if (atomic_dec_and_test(&lseg->pls_refcount)) { 434 pnfs_layout_remove_lseg(lseg->pls_layout, lseg); 435 list_add(&lseg->pls_list, tmp_list); 436 rv = 1; 437 } 438 } 439 return rv; 440 } 441 442 /* Returns count of number of matching invalid lsegs remaining in list 443 * after call. 444 */ 445 int 446 pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, 447 struct list_head *tmp_list, 448 struct pnfs_layout_range *recall_range) 449 { 450 struct pnfs_layout_segment *lseg, *next; 451 int invalid = 0, removed = 0; 452 453 dprintk("%s:Begin lo %p\n", __func__, lo); 454 455 if (list_empty(&lo->plh_segs)) 456 return 0; 457 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) 458 if (!recall_range || 459 should_free_lseg(&lseg->pls_range, recall_range)) { 460 dprintk("%s: freeing lseg %p iomode %d " 461 "offset %llu length %llu\n", __func__, 462 lseg, lseg->pls_range.iomode, lseg->pls_range.offset, 463 lseg->pls_range.length); 464 invalid++; 465 removed += mark_lseg_invalid(lseg, tmp_list); 466 } 467 dprintk("%s:Return %i\n", __func__, invalid - removed); 468 return invalid - removed; 469 } 470 471 /* note free_me must contain lsegs from a single layout_hdr */ 472 void 473 pnfs_free_lseg_list(struct list_head *free_me) 474 { 475 struct pnfs_layout_segment *lseg, *tmp; 476 477 if (list_empty(free_me)) 478 return; 479 480 list_for_each_entry_safe(lseg, tmp, free_me, pls_list) { 481 list_del(&lseg->pls_list); 482 pnfs_free_lseg(lseg); 483 } 484 } 485 486 void 487 pnfs_destroy_layout(struct nfs_inode *nfsi) 488 { 489 struct pnfs_layout_hdr *lo; 490 LIST_HEAD(tmp_list); 491 492 spin_lock(&nfsi->vfs_inode.i_lock); 493 lo = nfsi->layout; 494 if (lo) { 495 lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ 496 pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL); 497 pnfs_get_layout_hdr(lo); 498 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); 499 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); 500 spin_unlock(&nfsi->vfs_inode.i_lock); 501 pnfs_free_lseg_list(&tmp_list); 502 pnfs_put_layout_hdr(lo); 503 } else 504 spin_unlock(&nfsi->vfs_inode.i_lock); 505 } 506 EXPORT_SYMBOL_GPL(pnfs_destroy_layout); 507 508 static bool 509 pnfs_layout_add_bulk_destroy_list(struct inode *inode, 510 struct list_head *layout_list) 511 { 512 struct pnfs_layout_hdr *lo; 513 bool ret = false; 514 515 spin_lock(&inode->i_lock); 516 lo = NFS_I(inode)->layout; 517 if (lo != NULL && list_empty(&lo->plh_bulk_destroy)) { 518 pnfs_get_layout_hdr(lo); 519 list_add(&lo->plh_bulk_destroy, layout_list); 520 ret = true; 521 } 522 spin_unlock(&inode->i_lock); 523 return ret; 524 } 525 526 /* Caller must hold rcu_read_lock and clp->cl_lock */ 527 static int 528 pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp, 529 struct nfs_server *server, 530 struct list_head *layout_list) 531 { 532 struct pnfs_layout_hdr *lo, *next; 533 struct inode *inode; 534 535 list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) { 536 inode = igrab(lo->plh_inode); 537 if (inode == NULL) 538 continue; 539 list_del_init(&lo->plh_layouts); 540 if (pnfs_layout_add_bulk_destroy_list(inode, layout_list)) 541 continue; 542 rcu_read_unlock(); 543 spin_unlock(&clp->cl_lock); 544 iput(inode); 545 spin_lock(&clp->cl_lock); 546 rcu_read_lock(); 547 return -EAGAIN; 548 } 549 return 0; 550 } 551 552 static int 553 pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, 554 bool is_bulk_recall) 555 { 556 struct pnfs_layout_hdr *lo; 557 struct inode *inode; 558 struct pnfs_layout_range range = { 559 .iomode = IOMODE_ANY, 560 .offset = 0, 561 .length = NFS4_MAX_UINT64, 562 }; 563 LIST_HEAD(lseg_list); 564 int ret = 0; 565 566 while (!list_empty(layout_list)) { 567 lo = list_entry(layout_list->next, struct pnfs_layout_hdr, 568 plh_bulk_destroy); 569 dprintk("%s freeing layout for inode %lu\n", __func__, 570 lo->plh_inode->i_ino); 571 inode = lo->plh_inode; 572 spin_lock(&inode->i_lock); 573 list_del_init(&lo->plh_bulk_destroy); 574 lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ 575 if (is_bulk_recall) 576 set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); 577 if (pnfs_mark_matching_lsegs_invalid(lo, &lseg_list, &range)) 578 ret = -EAGAIN; 579 spin_unlock(&inode->i_lock); 580 pnfs_free_lseg_list(&lseg_list); 581 pnfs_put_layout_hdr(lo); 582 iput(inode); 583 } 584 return ret; 585 } 586 587 int 588 pnfs_destroy_layouts_byfsid(struct nfs_client *clp, 589 struct nfs_fsid *fsid, 590 bool is_recall) 591 { 592 struct nfs_server *server; 593 LIST_HEAD(layout_list); 594 595 spin_lock(&clp->cl_lock); 596 rcu_read_lock(); 597 restart: 598 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 599 if (memcmp(&server->fsid, fsid, sizeof(*fsid)) != 0) 600 continue; 601 if (pnfs_layout_bulk_destroy_byserver_locked(clp, 602 server, 603 &layout_list) != 0) 604 goto restart; 605 } 606 rcu_read_unlock(); 607 spin_unlock(&clp->cl_lock); 608 609 if (list_empty(&layout_list)) 610 return 0; 611 return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); 612 } 613 614 int 615 pnfs_destroy_layouts_byclid(struct nfs_client *clp, 616 bool is_recall) 617 { 618 struct nfs_server *server; 619 LIST_HEAD(layout_list); 620 621 spin_lock(&clp->cl_lock); 622 rcu_read_lock(); 623 restart: 624 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 625 if (pnfs_layout_bulk_destroy_byserver_locked(clp, 626 server, 627 &layout_list) != 0) 628 goto restart; 629 } 630 rcu_read_unlock(); 631 spin_unlock(&clp->cl_lock); 632 633 if (list_empty(&layout_list)) 634 return 0; 635 return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); 636 } 637 638 /* 639 * Called by the state manger to remove all layouts established under an 640 * expired lease. 641 */ 642 void 643 pnfs_destroy_all_layouts(struct nfs_client *clp) 644 { 645 nfs4_deviceid_mark_client_invalid(clp); 646 nfs4_deviceid_purge_client(clp); 647 648 pnfs_destroy_layouts_byclid(clp, false); 649 } 650 651 /* 652 * Compare 2 layout stateid sequence ids, to see which is newer, 653 * taking into account wraparound issues. 654 */ 655 static bool pnfs_seqid_is_newer(u32 s1, u32 s2) 656 { 657 return (s32)s1 - (s32)s2 > 0; 658 } 659 660 /* update lo->plh_stateid with new if is more recent */ 661 void 662 pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, 663 bool update_barrier) 664 { 665 u32 oldseq, newseq, new_barrier; 666 int empty = list_empty(&lo->plh_segs); 667 668 oldseq = be32_to_cpu(lo->plh_stateid.seqid); 669 newseq = be32_to_cpu(new->seqid); 670 if (empty || pnfs_seqid_is_newer(newseq, oldseq)) { 671 nfs4_stateid_copy(&lo->plh_stateid, new); 672 if (update_barrier) { 673 new_barrier = be32_to_cpu(new->seqid); 674 } else { 675 /* Because of wraparound, we want to keep the barrier 676 * "close" to the current seqids. 677 */ 678 new_barrier = newseq - atomic_read(&lo->plh_outstanding); 679 } 680 if (empty || pnfs_seqid_is_newer(new_barrier, lo->plh_barrier)) 681 lo->plh_barrier = new_barrier; 682 } 683 } 684 685 static bool 686 pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo, 687 const nfs4_stateid *stateid) 688 { 689 u32 seqid = be32_to_cpu(stateid->seqid); 690 691 return !pnfs_seqid_is_newer(seqid, lo->plh_barrier); 692 } 693 694 /* lget is set to 1 if called from inside send_layoutget call chain */ 695 static bool 696 pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo, int lget) 697 { 698 return lo->plh_block_lgets || 699 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || 700 (list_empty(&lo->plh_segs) && 701 (atomic_read(&lo->plh_outstanding) > lget)); 702 } 703 704 int 705 pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, 706 struct nfs4_state *open_state) 707 { 708 int status = 0; 709 710 dprintk("--> %s\n", __func__); 711 spin_lock(&lo->plh_inode->i_lock); 712 if (pnfs_layoutgets_blocked(lo, 1)) { 713 status = -EAGAIN; 714 } else if (list_empty(&lo->plh_segs)) { 715 int seq; 716 717 do { 718 seq = read_seqbegin(&open_state->seqlock); 719 nfs4_stateid_copy(dst, &open_state->stateid); 720 } while (read_seqretry(&open_state->seqlock, seq)); 721 } else 722 nfs4_stateid_copy(dst, &lo->plh_stateid); 723 spin_unlock(&lo->plh_inode->i_lock); 724 dprintk("<-- %s\n", __func__); 725 return status; 726 } 727 728 /* 729 * Get layout from server. 730 * for now, assume that whole file layouts are requested. 731 * arg->offset: 0 732 * arg->length: all ones 733 */ 734 static struct pnfs_layout_segment * 735 send_layoutget(struct pnfs_layout_hdr *lo, 736 struct nfs_open_context *ctx, 737 struct pnfs_layout_range *range, 738 gfp_t gfp_flags) 739 { 740 struct inode *ino = lo->plh_inode; 741 struct nfs_server *server = NFS_SERVER(ino); 742 struct nfs4_layoutget *lgp; 743 struct pnfs_layout_segment *lseg; 744 745 dprintk("--> %s\n", __func__); 746 747 lgp = kzalloc(sizeof(*lgp), gfp_flags); 748 if (lgp == NULL) 749 return NULL; 750 751 lgp->args.minlength = PAGE_CACHE_SIZE; 752 if (lgp->args.minlength > range->length) 753 lgp->args.minlength = range->length; 754 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; 755 lgp->args.range = *range; 756 lgp->args.type = server->pnfs_curr_ld->id; 757 lgp->args.inode = ino; 758 lgp->args.ctx = get_nfs_open_context(ctx); 759 lgp->gfp_flags = gfp_flags; 760 761 /* Synchronously retrieve layout information from server and 762 * store in lseg. 763 */ 764 lseg = nfs4_proc_layoutget(lgp, gfp_flags); 765 if (IS_ERR(lseg)) { 766 switch (PTR_ERR(lseg)) { 767 case -ENOMEM: 768 case -ERESTARTSYS: 769 break; 770 default: 771 /* remember that LAYOUTGET failed and suspend trying */ 772 pnfs_layout_io_set_failed(lo, range->iomode); 773 } 774 return NULL; 775 } 776 777 return lseg; 778 } 779 780 /* 781 * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr 782 * when the layout segment list is empty. 783 * 784 * Note that a pnfs_layout_hdr can exist with an empty layout segment 785 * list when LAYOUTGET has failed, or when LAYOUTGET succeeded, but the 786 * deviceid is marked invalid. 787 */ 788 int 789 _pnfs_return_layout(struct inode *ino) 790 { 791 struct pnfs_layout_hdr *lo = NULL; 792 struct nfs_inode *nfsi = NFS_I(ino); 793 LIST_HEAD(tmp_list); 794 struct nfs4_layoutreturn *lrp; 795 nfs4_stateid stateid; 796 int status = 0, empty; 797 798 dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino); 799 800 spin_lock(&ino->i_lock); 801 lo = nfsi->layout; 802 if (!lo) { 803 spin_unlock(&ino->i_lock); 804 dprintk("NFS: %s no layout to return\n", __func__); 805 goto out; 806 } 807 stateid = nfsi->layout->plh_stateid; 808 /* Reference matched in nfs4_layoutreturn_release */ 809 pnfs_get_layout_hdr(lo); 810 empty = list_empty(&lo->plh_segs); 811 pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL); 812 /* Don't send a LAYOUTRETURN if list was initially empty */ 813 if (empty) { 814 spin_unlock(&ino->i_lock); 815 pnfs_put_layout_hdr(lo); 816 dprintk("NFS: %s no layout segments to return\n", __func__); 817 goto out; 818 } 819 lo->plh_block_lgets++; 820 spin_unlock(&ino->i_lock); 821 pnfs_free_lseg_list(&tmp_list); 822 823 WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)); 824 825 lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); 826 if (unlikely(lrp == NULL)) { 827 status = -ENOMEM; 828 spin_lock(&ino->i_lock); 829 lo->plh_block_lgets--; 830 spin_unlock(&ino->i_lock); 831 pnfs_put_layout_hdr(lo); 832 goto out; 833 } 834 835 lrp->args.stateid = stateid; 836 lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; 837 lrp->args.inode = ino; 838 lrp->args.layout = lo; 839 lrp->clp = NFS_SERVER(ino)->nfs_client; 840 841 status = nfs4_proc_layoutreturn(lrp); 842 out: 843 dprintk("<-- %s status: %d\n", __func__, status); 844 return status; 845 } 846 EXPORT_SYMBOL_GPL(_pnfs_return_layout); 847 848 bool pnfs_roc(struct inode *ino) 849 { 850 struct pnfs_layout_hdr *lo; 851 struct pnfs_layout_segment *lseg, *tmp; 852 LIST_HEAD(tmp_list); 853 bool found = false; 854 855 spin_lock(&ino->i_lock); 856 lo = NFS_I(ino)->layout; 857 if (!lo || !test_and_clear_bit(NFS_LAYOUT_ROC, &lo->plh_flags) || 858 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) 859 goto out_nolayout; 860 list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) 861 if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { 862 mark_lseg_invalid(lseg, &tmp_list); 863 found = true; 864 } 865 if (!found) 866 goto out_nolayout; 867 lo->plh_block_lgets++; 868 pnfs_get_layout_hdr(lo); /* matched in pnfs_roc_release */ 869 spin_unlock(&ino->i_lock); 870 pnfs_free_lseg_list(&tmp_list); 871 return true; 872 873 out_nolayout: 874 spin_unlock(&ino->i_lock); 875 return false; 876 } 877 878 void pnfs_roc_release(struct inode *ino) 879 { 880 struct pnfs_layout_hdr *lo; 881 882 spin_lock(&ino->i_lock); 883 lo = NFS_I(ino)->layout; 884 lo->plh_block_lgets--; 885 if (atomic_dec_and_test(&lo->plh_refcount)) { 886 pnfs_detach_layout_hdr(lo); 887 spin_unlock(&ino->i_lock); 888 pnfs_free_layout_hdr(lo); 889 } else 890 spin_unlock(&ino->i_lock); 891 } 892 893 void pnfs_roc_set_barrier(struct inode *ino, u32 barrier) 894 { 895 struct pnfs_layout_hdr *lo; 896 897 spin_lock(&ino->i_lock); 898 lo = NFS_I(ino)->layout; 899 if (pnfs_seqid_is_newer(barrier, lo->plh_barrier)) 900 lo->plh_barrier = barrier; 901 spin_unlock(&ino->i_lock); 902 } 903 904 bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task) 905 { 906 struct nfs_inode *nfsi = NFS_I(ino); 907 struct pnfs_layout_hdr *lo; 908 struct pnfs_layout_segment *lseg; 909 u32 current_seqid; 910 bool found = false; 911 912 spin_lock(&ino->i_lock); 913 list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list) 914 if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { 915 rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); 916 found = true; 917 goto out; 918 } 919 lo = nfsi->layout; 920 current_seqid = be32_to_cpu(lo->plh_stateid.seqid); 921 922 /* Since close does not return a layout stateid for use as 923 * a barrier, we choose the worst-case barrier. 924 */ 925 *barrier = current_seqid + atomic_read(&lo->plh_outstanding); 926 out: 927 spin_unlock(&ino->i_lock); 928 return found; 929 } 930 931 /* 932 * Compare two layout segments for sorting into layout cache. 933 * We want to preferentially return RW over RO layouts, so ensure those 934 * are seen first. 935 */ 936 static s64 937 cmp_layout(struct pnfs_layout_range *l1, 938 struct pnfs_layout_range *l2) 939 { 940 s64 d; 941 942 /* high offset > low offset */ 943 d = l1->offset - l2->offset; 944 if (d) 945 return d; 946 947 /* short length > long length */ 948 d = l2->length - l1->length; 949 if (d) 950 return d; 951 952 /* read > read/write */ 953 return (int)(l1->iomode == IOMODE_READ) - (int)(l2->iomode == IOMODE_READ); 954 } 955 956 static void 957 pnfs_layout_insert_lseg(struct pnfs_layout_hdr *lo, 958 struct pnfs_layout_segment *lseg) 959 { 960 struct pnfs_layout_segment *lp; 961 962 dprintk("%s:Begin\n", __func__); 963 964 list_for_each_entry(lp, &lo->plh_segs, pls_list) { 965 if (cmp_layout(&lseg->pls_range, &lp->pls_range) > 0) 966 continue; 967 list_add_tail(&lseg->pls_list, &lp->pls_list); 968 dprintk("%s: inserted lseg %p " 969 "iomode %d offset %llu length %llu before " 970 "lp %p iomode %d offset %llu length %llu\n", 971 __func__, lseg, lseg->pls_range.iomode, 972 lseg->pls_range.offset, lseg->pls_range.length, 973 lp, lp->pls_range.iomode, lp->pls_range.offset, 974 lp->pls_range.length); 975 goto out; 976 } 977 list_add_tail(&lseg->pls_list, &lo->plh_segs); 978 dprintk("%s: inserted lseg %p " 979 "iomode %d offset %llu length %llu at tail\n", 980 __func__, lseg, lseg->pls_range.iomode, 981 lseg->pls_range.offset, lseg->pls_range.length); 982 out: 983 pnfs_get_layout_hdr(lo); 984 985 dprintk("%s:Return\n", __func__); 986 } 987 988 static struct pnfs_layout_hdr * 989 alloc_init_layout_hdr(struct inode *ino, 990 struct nfs_open_context *ctx, 991 gfp_t gfp_flags) 992 { 993 struct pnfs_layout_hdr *lo; 994 995 lo = pnfs_alloc_layout_hdr(ino, gfp_flags); 996 if (!lo) 997 return NULL; 998 atomic_set(&lo->plh_refcount, 1); 999 INIT_LIST_HEAD(&lo->plh_layouts); 1000 INIT_LIST_HEAD(&lo->plh_segs); 1001 INIT_LIST_HEAD(&lo->plh_bulk_destroy); 1002 lo->plh_inode = ino; 1003 lo->plh_lc_cred = get_rpccred(ctx->state->owner->so_cred); 1004 return lo; 1005 } 1006 1007 static struct pnfs_layout_hdr * 1008 pnfs_find_alloc_layout(struct inode *ino, 1009 struct nfs_open_context *ctx, 1010 gfp_t gfp_flags) 1011 { 1012 struct nfs_inode *nfsi = NFS_I(ino); 1013 struct pnfs_layout_hdr *new = NULL; 1014 1015 dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout); 1016 1017 if (nfsi->layout != NULL) 1018 goto out_existing; 1019 spin_unlock(&ino->i_lock); 1020 new = alloc_init_layout_hdr(ino, ctx, gfp_flags); 1021 spin_lock(&ino->i_lock); 1022 1023 if (likely(nfsi->layout == NULL)) { /* Won the race? */ 1024 nfsi->layout = new; 1025 return new; 1026 } else if (new != NULL) 1027 pnfs_free_layout_hdr(new); 1028 out_existing: 1029 pnfs_get_layout_hdr(nfsi->layout); 1030 return nfsi->layout; 1031 } 1032 1033 /* 1034 * iomode matching rules: 1035 * iomode lseg match 1036 * ----- ----- ----- 1037 * ANY READ true 1038 * ANY RW true 1039 * RW READ false 1040 * RW RW true 1041 * READ READ true 1042 * READ RW true 1043 */ 1044 static int 1045 is_matching_lseg(struct pnfs_layout_range *ls_range, 1046 struct pnfs_layout_range *range) 1047 { 1048 struct pnfs_layout_range range1; 1049 1050 if ((range->iomode == IOMODE_RW && 1051 ls_range->iomode != IOMODE_RW) || 1052 !lo_seg_intersecting(ls_range, range)) 1053 return 0; 1054 1055 /* range1 covers only the first byte in the range */ 1056 range1 = *range; 1057 range1.length = 1; 1058 return lo_seg_contained(ls_range, &range1); 1059 } 1060 1061 /* 1062 * lookup range in layout 1063 */ 1064 static struct pnfs_layout_segment * 1065 pnfs_find_lseg(struct pnfs_layout_hdr *lo, 1066 struct pnfs_layout_range *range) 1067 { 1068 struct pnfs_layout_segment *lseg, *ret = NULL; 1069 1070 dprintk("%s:Begin\n", __func__); 1071 1072 list_for_each_entry(lseg, &lo->plh_segs, pls_list) { 1073 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && 1074 is_matching_lseg(&lseg->pls_range, range)) { 1075 ret = pnfs_get_lseg(lseg); 1076 break; 1077 } 1078 if (lseg->pls_range.offset > range->offset) 1079 break; 1080 } 1081 1082 dprintk("%s:Return lseg %p ref %d\n", 1083 __func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0); 1084 return ret; 1085 } 1086 1087 /* 1088 * Use mdsthreshold hints set at each OPEN to determine if I/O should go 1089 * to the MDS or over pNFS 1090 * 1091 * The nfs_inode read_io and write_io fields are cumulative counters reset 1092 * when there are no layout segments. Note that in pnfs_update_layout iomode 1093 * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a 1094 * WRITE request. 1095 * 1096 * A return of true means use MDS I/O. 1097 * 1098 * From rfc 5661: 1099 * If a file's size is smaller than the file size threshold, data accesses 1100 * SHOULD be sent to the metadata server. If an I/O request has a length that 1101 * is below the I/O size threshold, the I/O SHOULD be sent to the metadata 1102 * server. If both file size and I/O size are provided, the client SHOULD 1103 * reach or exceed both thresholds before sending its read or write 1104 * requests to the data server. 1105 */ 1106 static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx, 1107 struct inode *ino, int iomode) 1108 { 1109 struct nfs4_threshold *t = ctx->mdsthreshold; 1110 struct nfs_inode *nfsi = NFS_I(ino); 1111 loff_t fsize = i_size_read(ino); 1112 bool size = false, size_set = false, io = false, io_set = false, ret = false; 1113 1114 if (t == NULL) 1115 return ret; 1116 1117 dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n", 1118 __func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz); 1119 1120 switch (iomode) { 1121 case IOMODE_READ: 1122 if (t->bm & THRESHOLD_RD) { 1123 dprintk("%s fsize %llu\n", __func__, fsize); 1124 size_set = true; 1125 if (fsize < t->rd_sz) 1126 size = true; 1127 } 1128 if (t->bm & THRESHOLD_RD_IO) { 1129 dprintk("%s nfsi->read_io %llu\n", __func__, 1130 nfsi->read_io); 1131 io_set = true; 1132 if (nfsi->read_io < t->rd_io_sz) 1133 io = true; 1134 } 1135 break; 1136 case IOMODE_RW: 1137 if (t->bm & THRESHOLD_WR) { 1138 dprintk("%s fsize %llu\n", __func__, fsize); 1139 size_set = true; 1140 if (fsize < t->wr_sz) 1141 size = true; 1142 } 1143 if (t->bm & THRESHOLD_WR_IO) { 1144 dprintk("%s nfsi->write_io %llu\n", __func__, 1145 nfsi->write_io); 1146 io_set = true; 1147 if (nfsi->write_io < t->wr_io_sz) 1148 io = true; 1149 } 1150 break; 1151 } 1152 if (size_set && io_set) { 1153 if (size && io) 1154 ret = true; 1155 } else if (size || io) 1156 ret = true; 1157 1158 dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret); 1159 return ret; 1160 } 1161 1162 /* 1163 * Layout segment is retreived from the server if not cached. 1164 * The appropriate layout segment is referenced and returned to the caller. 1165 */ 1166 struct pnfs_layout_segment * 1167 pnfs_update_layout(struct inode *ino, 1168 struct nfs_open_context *ctx, 1169 loff_t pos, 1170 u64 count, 1171 enum pnfs_iomode iomode, 1172 gfp_t gfp_flags) 1173 { 1174 struct pnfs_layout_range arg = { 1175 .iomode = iomode, 1176 .offset = pos, 1177 .length = count, 1178 }; 1179 unsigned pg_offset; 1180 struct nfs_server *server = NFS_SERVER(ino); 1181 struct nfs_client *clp = server->nfs_client; 1182 struct pnfs_layout_hdr *lo; 1183 struct pnfs_layout_segment *lseg = NULL; 1184 bool first = false; 1185 1186 if (!pnfs_enabled_sb(NFS_SERVER(ino))) 1187 goto out; 1188 1189 if (pnfs_within_mdsthreshold(ctx, ino, iomode)) 1190 goto out; 1191 1192 spin_lock(&ino->i_lock); 1193 lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); 1194 if (lo == NULL) { 1195 spin_unlock(&ino->i_lock); 1196 goto out; 1197 } 1198 1199 /* Do we even need to bother with this? */ 1200 if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { 1201 dprintk("%s matches recall, use MDS\n", __func__); 1202 goto out_unlock; 1203 } 1204 1205 /* if LAYOUTGET already failed once we don't try again */ 1206 if (pnfs_layout_io_test_failed(lo, iomode)) 1207 goto out_unlock; 1208 1209 /* Check to see if the layout for the given range already exists */ 1210 lseg = pnfs_find_lseg(lo, &arg); 1211 if (lseg) 1212 goto out_unlock; 1213 1214 if (pnfs_layoutgets_blocked(lo, 0)) 1215 goto out_unlock; 1216 atomic_inc(&lo->plh_outstanding); 1217 1218 if (list_empty(&lo->plh_segs)) 1219 first = true; 1220 1221 spin_unlock(&ino->i_lock); 1222 if (first) { 1223 /* The lo must be on the clp list if there is any 1224 * chance of a CB_LAYOUTRECALL(FILE) coming in. 1225 */ 1226 spin_lock(&clp->cl_lock); 1227 list_add_tail(&lo->plh_layouts, &server->layouts); 1228 spin_unlock(&clp->cl_lock); 1229 } 1230 1231 pg_offset = arg.offset & ~PAGE_CACHE_MASK; 1232 if (pg_offset) { 1233 arg.offset -= pg_offset; 1234 arg.length += pg_offset; 1235 } 1236 if (arg.length != NFS4_MAX_UINT64) 1237 arg.length = PAGE_CACHE_ALIGN(arg.length); 1238 1239 lseg = send_layoutget(lo, ctx, &arg, gfp_flags); 1240 atomic_dec(&lo->plh_outstanding); 1241 out_put_layout_hdr: 1242 pnfs_put_layout_hdr(lo); 1243 out: 1244 dprintk("%s: inode %s/%llu pNFS layout segment %s for " 1245 "(%s, offset: %llu, length: %llu)\n", 1246 __func__, ino->i_sb->s_id, 1247 (unsigned long long)NFS_FILEID(ino), 1248 lseg == NULL ? "not found" : "found", 1249 iomode==IOMODE_RW ? "read/write" : "read-only", 1250 (unsigned long long)pos, 1251 (unsigned long long)count); 1252 return lseg; 1253 out_unlock: 1254 spin_unlock(&ino->i_lock); 1255 goto out_put_layout_hdr; 1256 } 1257 EXPORT_SYMBOL_GPL(pnfs_update_layout); 1258 1259 struct pnfs_layout_segment * 1260 pnfs_layout_process(struct nfs4_layoutget *lgp) 1261 { 1262 struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout; 1263 struct nfs4_layoutget_res *res = &lgp->res; 1264 struct pnfs_layout_segment *lseg; 1265 struct inode *ino = lo->plh_inode; 1266 int status = 0; 1267 1268 /* Inject layout blob into I/O device driver */ 1269 lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags); 1270 if (!lseg || IS_ERR(lseg)) { 1271 if (!lseg) 1272 status = -ENOMEM; 1273 else 1274 status = PTR_ERR(lseg); 1275 dprintk("%s: Could not allocate layout: error %d\n", 1276 __func__, status); 1277 goto out; 1278 } 1279 1280 spin_lock(&ino->i_lock); 1281 if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { 1282 dprintk("%s forget reply due to recall\n", __func__); 1283 goto out_forget_reply; 1284 } 1285 1286 if (pnfs_layoutgets_blocked(lo, 1) || 1287 pnfs_layout_stateid_blocked(lo, &res->stateid)) { 1288 dprintk("%s forget reply due to state\n", __func__); 1289 goto out_forget_reply; 1290 } 1291 1292 /* Done processing layoutget. Set the layout stateid */ 1293 pnfs_set_layout_stateid(lo, &res->stateid, false); 1294 1295 init_lseg(lo, lseg); 1296 lseg->pls_range = res->range; 1297 pnfs_get_lseg(lseg); 1298 pnfs_layout_insert_lseg(lo, lseg); 1299 1300 if (res->return_on_close) { 1301 set_bit(NFS_LSEG_ROC, &lseg->pls_flags); 1302 set_bit(NFS_LAYOUT_ROC, &lo->plh_flags); 1303 } 1304 1305 spin_unlock(&ino->i_lock); 1306 return lseg; 1307 out: 1308 return ERR_PTR(status); 1309 1310 out_forget_reply: 1311 spin_unlock(&ino->i_lock); 1312 lseg->pls_layout = lo; 1313 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); 1314 goto out; 1315 } 1316 1317 void 1318 pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) 1319 { 1320 u64 rd_size = req->wb_bytes; 1321 1322 WARN_ON_ONCE(pgio->pg_lseg != NULL); 1323 1324 if (req->wb_offset != req->wb_pgbase) { 1325 nfs_pageio_reset_read_mds(pgio); 1326 return; 1327 } 1328 1329 if (pgio->pg_dreq == NULL) 1330 rd_size = i_size_read(pgio->pg_inode) - req_offset(req); 1331 else 1332 rd_size = nfs_dreq_bytes_left(pgio->pg_dreq); 1333 1334 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1335 req->wb_context, 1336 req_offset(req), 1337 rd_size, 1338 IOMODE_READ, 1339 GFP_KERNEL); 1340 /* If no lseg, fall back to read through mds */ 1341 if (pgio->pg_lseg == NULL) 1342 nfs_pageio_reset_read_mds(pgio); 1343 1344 } 1345 EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); 1346 1347 void 1348 pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, 1349 struct nfs_page *req, u64 wb_size) 1350 { 1351 WARN_ON_ONCE(pgio->pg_lseg != NULL); 1352 1353 if (req->wb_offset != req->wb_pgbase) { 1354 nfs_pageio_reset_write_mds(pgio); 1355 return; 1356 } 1357 1358 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1359 req->wb_context, 1360 req_offset(req), 1361 wb_size, 1362 IOMODE_RW, 1363 GFP_NOFS); 1364 /* If no lseg, fall back to write through mds */ 1365 if (pgio->pg_lseg == NULL) 1366 nfs_pageio_reset_write_mds(pgio); 1367 } 1368 EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); 1369 1370 void 1371 pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, 1372 const struct nfs_pgio_completion_ops *compl_ops) 1373 { 1374 struct nfs_server *server = NFS_SERVER(inode); 1375 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; 1376 1377 if (ld == NULL) 1378 nfs_pageio_init_read(pgio, inode, compl_ops); 1379 else 1380 nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, server->rsize, 0); 1381 } 1382 1383 void 1384 pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, 1385 int ioflags, 1386 const struct nfs_pgio_completion_ops *compl_ops) 1387 { 1388 struct nfs_server *server = NFS_SERVER(inode); 1389 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; 1390 1391 if (ld == NULL) 1392 nfs_pageio_init_write(pgio, inode, ioflags, compl_ops); 1393 else 1394 nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops, server->wsize, ioflags); 1395 } 1396 1397 bool 1398 pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, 1399 struct nfs_page *req) 1400 { 1401 if (pgio->pg_lseg == NULL) 1402 return nfs_generic_pg_test(pgio, prev, req); 1403 1404 /* 1405 * Test if a nfs_page is fully contained in the pnfs_layout_range. 1406 * Note that this test makes several assumptions: 1407 * - that the previous nfs_page in the struct nfs_pageio_descriptor 1408 * is known to lie within the range. 1409 * - that the nfs_page being tested is known to be contiguous with the 1410 * previous nfs_page. 1411 * - Layout ranges are page aligned, so we only have to test the 1412 * start offset of the request. 1413 * 1414 * Please also note that 'end_offset' is actually the offset of the 1415 * first byte that lies outside the pnfs_layout_range. FIXME? 1416 * 1417 */ 1418 return req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset, 1419 pgio->pg_lseg->pls_range.length); 1420 } 1421 EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); 1422 1423 int pnfs_write_done_resend_to_mds(struct inode *inode, 1424 struct list_head *head, 1425 const struct nfs_pgio_completion_ops *compl_ops) 1426 { 1427 struct nfs_pageio_descriptor pgio; 1428 LIST_HEAD(failed); 1429 1430 /* Resend all requests through the MDS */ 1431 nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, compl_ops); 1432 while (!list_empty(head)) { 1433 struct nfs_page *req = nfs_list_entry(head->next); 1434 1435 nfs_list_remove_request(req); 1436 if (!nfs_pageio_add_request(&pgio, req)) 1437 nfs_list_add_request(req, &failed); 1438 } 1439 nfs_pageio_complete(&pgio); 1440 1441 if (!list_empty(&failed)) { 1442 /* For some reason our attempt to resend pages. Mark the 1443 * overall send request as having failed, and let 1444 * nfs_writeback_release_full deal with the error. 1445 */ 1446 list_move(&failed, head); 1447 return -EIO; 1448 } 1449 return 0; 1450 } 1451 EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); 1452 1453 static void pnfs_ld_handle_write_error(struct nfs_write_data *data) 1454 { 1455 struct nfs_pgio_header *hdr = data->header; 1456 1457 dprintk("pnfs write error = %d\n", hdr->pnfs_error); 1458 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & 1459 PNFS_LAYOUTRET_ON_ERROR) { 1460 clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags); 1461 pnfs_return_layout(hdr->inode); 1462 } 1463 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) 1464 data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode, 1465 &hdr->pages, 1466 hdr->completion_ops); 1467 } 1468 1469 /* 1470 * Called by non rpc-based layout drivers 1471 */ 1472 void pnfs_ld_write_done(struct nfs_write_data *data) 1473 { 1474 struct nfs_pgio_header *hdr = data->header; 1475 1476 if (!hdr->pnfs_error) { 1477 pnfs_set_layoutcommit(data); 1478 hdr->mds_ops->rpc_call_done(&data->task, data); 1479 } else 1480 pnfs_ld_handle_write_error(data); 1481 hdr->mds_ops->rpc_release(data); 1482 } 1483 EXPORT_SYMBOL_GPL(pnfs_ld_write_done); 1484 1485 static void 1486 pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, 1487 struct nfs_write_data *data) 1488 { 1489 struct nfs_pgio_header *hdr = data->header; 1490 1491 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 1492 list_splice_tail_init(&hdr->pages, &desc->pg_list); 1493 nfs_pageio_reset_write_mds(desc); 1494 desc->pg_recoalesce = 1; 1495 } 1496 nfs_writedata_release(data); 1497 } 1498 1499 static enum pnfs_try_status 1500 pnfs_try_to_write_data(struct nfs_write_data *wdata, 1501 const struct rpc_call_ops *call_ops, 1502 struct pnfs_layout_segment *lseg, 1503 int how) 1504 { 1505 struct nfs_pgio_header *hdr = wdata->header; 1506 struct inode *inode = hdr->inode; 1507 enum pnfs_try_status trypnfs; 1508 struct nfs_server *nfss = NFS_SERVER(inode); 1509 1510 hdr->mds_ops = call_ops; 1511 1512 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, 1513 inode->i_ino, wdata->args.count, wdata->args.offset, how); 1514 trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how); 1515 if (trypnfs != PNFS_NOT_ATTEMPTED) 1516 nfs_inc_stats(inode, NFSIOS_PNFS_WRITE); 1517 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 1518 return trypnfs; 1519 } 1520 1521 static void 1522 pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how) 1523 { 1524 struct nfs_write_data *data; 1525 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; 1526 struct pnfs_layout_segment *lseg = desc->pg_lseg; 1527 1528 desc->pg_lseg = NULL; 1529 while (!list_empty(head)) { 1530 enum pnfs_try_status trypnfs; 1531 1532 data = list_first_entry(head, struct nfs_write_data, list); 1533 list_del_init(&data->list); 1534 1535 trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); 1536 if (trypnfs == PNFS_NOT_ATTEMPTED) 1537 pnfs_write_through_mds(desc, data); 1538 } 1539 pnfs_put_lseg(lseg); 1540 } 1541 1542 static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) 1543 { 1544 pnfs_put_lseg(hdr->lseg); 1545 nfs_writehdr_free(hdr); 1546 } 1547 EXPORT_SYMBOL_GPL(pnfs_writehdr_free); 1548 1549 int 1550 pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) 1551 { 1552 struct nfs_write_header *whdr; 1553 struct nfs_pgio_header *hdr; 1554 int ret; 1555 1556 whdr = nfs_writehdr_alloc(); 1557 if (!whdr) { 1558 desc->pg_completion_ops->error_cleanup(&desc->pg_list); 1559 pnfs_put_lseg(desc->pg_lseg); 1560 desc->pg_lseg = NULL; 1561 return -ENOMEM; 1562 } 1563 hdr = &whdr->header; 1564 nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); 1565 hdr->lseg = pnfs_get_lseg(desc->pg_lseg); 1566 atomic_inc(&hdr->refcnt); 1567 ret = nfs_generic_flush(desc, hdr); 1568 if (ret != 0) { 1569 pnfs_put_lseg(desc->pg_lseg); 1570 desc->pg_lseg = NULL; 1571 } else 1572 pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags); 1573 if (atomic_dec_and_test(&hdr->refcnt)) 1574 hdr->completion_ops->completion(hdr); 1575 return ret; 1576 } 1577 EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); 1578 1579 int pnfs_read_done_resend_to_mds(struct inode *inode, 1580 struct list_head *head, 1581 const struct nfs_pgio_completion_ops *compl_ops) 1582 { 1583 struct nfs_pageio_descriptor pgio; 1584 LIST_HEAD(failed); 1585 1586 /* Resend all requests through the MDS */ 1587 nfs_pageio_init_read(&pgio, inode, compl_ops); 1588 while (!list_empty(head)) { 1589 struct nfs_page *req = nfs_list_entry(head->next); 1590 1591 nfs_list_remove_request(req); 1592 if (!nfs_pageio_add_request(&pgio, req)) 1593 nfs_list_add_request(req, &failed); 1594 } 1595 nfs_pageio_complete(&pgio); 1596 1597 if (!list_empty(&failed)) { 1598 list_move(&failed, head); 1599 return -EIO; 1600 } 1601 return 0; 1602 } 1603 EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds); 1604 1605 static void pnfs_ld_handle_read_error(struct nfs_read_data *data) 1606 { 1607 struct nfs_pgio_header *hdr = data->header; 1608 1609 dprintk("pnfs read error = %d\n", hdr->pnfs_error); 1610 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & 1611 PNFS_LAYOUTRET_ON_ERROR) { 1612 clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags); 1613 pnfs_return_layout(hdr->inode); 1614 } 1615 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) 1616 data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode, 1617 &hdr->pages, 1618 hdr->completion_ops); 1619 } 1620 1621 /* 1622 * Called by non rpc-based layout drivers 1623 */ 1624 void pnfs_ld_read_done(struct nfs_read_data *data) 1625 { 1626 struct nfs_pgio_header *hdr = data->header; 1627 1628 if (likely(!hdr->pnfs_error)) { 1629 __nfs4_read_done_cb(data); 1630 hdr->mds_ops->rpc_call_done(&data->task, data); 1631 } else 1632 pnfs_ld_handle_read_error(data); 1633 hdr->mds_ops->rpc_release(data); 1634 } 1635 EXPORT_SYMBOL_GPL(pnfs_ld_read_done); 1636 1637 static void 1638 pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, 1639 struct nfs_read_data *data) 1640 { 1641 struct nfs_pgio_header *hdr = data->header; 1642 1643 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 1644 list_splice_tail_init(&hdr->pages, &desc->pg_list); 1645 nfs_pageio_reset_read_mds(desc); 1646 desc->pg_recoalesce = 1; 1647 } 1648 nfs_readdata_release(data); 1649 } 1650 1651 /* 1652 * Call the appropriate parallel I/O subsystem read function. 1653 */ 1654 static enum pnfs_try_status 1655 pnfs_try_to_read_data(struct nfs_read_data *rdata, 1656 const struct rpc_call_ops *call_ops, 1657 struct pnfs_layout_segment *lseg) 1658 { 1659 struct nfs_pgio_header *hdr = rdata->header; 1660 struct inode *inode = hdr->inode; 1661 struct nfs_server *nfss = NFS_SERVER(inode); 1662 enum pnfs_try_status trypnfs; 1663 1664 hdr->mds_ops = call_ops; 1665 1666 dprintk("%s: Reading ino:%lu %u@%llu\n", 1667 __func__, inode->i_ino, rdata->args.count, rdata->args.offset); 1668 1669 trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata); 1670 if (trypnfs != PNFS_NOT_ATTEMPTED) 1671 nfs_inc_stats(inode, NFSIOS_PNFS_READ); 1672 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 1673 return trypnfs; 1674 } 1675 1676 static void 1677 pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head) 1678 { 1679 struct nfs_read_data *data; 1680 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; 1681 struct pnfs_layout_segment *lseg = desc->pg_lseg; 1682 1683 desc->pg_lseg = NULL; 1684 while (!list_empty(head)) { 1685 enum pnfs_try_status trypnfs; 1686 1687 data = list_first_entry(head, struct nfs_read_data, list); 1688 list_del_init(&data->list); 1689 1690 trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); 1691 if (trypnfs == PNFS_NOT_ATTEMPTED) 1692 pnfs_read_through_mds(desc, data); 1693 } 1694 pnfs_put_lseg(lseg); 1695 } 1696 1697 static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) 1698 { 1699 pnfs_put_lseg(hdr->lseg); 1700 nfs_readhdr_free(hdr); 1701 } 1702 EXPORT_SYMBOL_GPL(pnfs_readhdr_free); 1703 1704 int 1705 pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 1706 { 1707 struct nfs_read_header *rhdr; 1708 struct nfs_pgio_header *hdr; 1709 int ret; 1710 1711 rhdr = nfs_readhdr_alloc(); 1712 if (!rhdr) { 1713 desc->pg_completion_ops->error_cleanup(&desc->pg_list); 1714 ret = -ENOMEM; 1715 pnfs_put_lseg(desc->pg_lseg); 1716 desc->pg_lseg = NULL; 1717 return ret; 1718 } 1719 hdr = &rhdr->header; 1720 nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); 1721 hdr->lseg = pnfs_get_lseg(desc->pg_lseg); 1722 atomic_inc(&hdr->refcnt); 1723 ret = nfs_generic_pagein(desc, hdr); 1724 if (ret != 0) { 1725 pnfs_put_lseg(desc->pg_lseg); 1726 desc->pg_lseg = NULL; 1727 } else 1728 pnfs_do_multiple_reads(desc, &hdr->rpc_list); 1729 if (atomic_dec_and_test(&hdr->refcnt)) 1730 hdr->completion_ops->completion(hdr); 1731 return ret; 1732 } 1733 EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); 1734 1735 /* 1736 * There can be multiple RW segments. 1737 */ 1738 static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp) 1739 { 1740 struct pnfs_layout_segment *lseg; 1741 1742 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { 1743 if (lseg->pls_range.iomode == IOMODE_RW && 1744 test_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) 1745 list_add(&lseg->pls_lc_list, listp); 1746 } 1747 } 1748 1749 void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) 1750 { 1751 pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode); 1752 } 1753 EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); 1754 1755 void 1756 pnfs_set_layoutcommit(struct nfs_write_data *wdata) 1757 { 1758 struct nfs_pgio_header *hdr = wdata->header; 1759 struct inode *inode = hdr->inode; 1760 struct nfs_inode *nfsi = NFS_I(inode); 1761 loff_t end_pos = wdata->mds_offset + wdata->res.count; 1762 bool mark_as_dirty = false; 1763 1764 spin_lock(&inode->i_lock); 1765 if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { 1766 mark_as_dirty = true; 1767 dprintk("%s: Set layoutcommit for inode %lu ", 1768 __func__, inode->i_ino); 1769 } 1770 if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) { 1771 /* references matched in nfs4_layoutcommit_release */ 1772 pnfs_get_lseg(hdr->lseg); 1773 } 1774 if (end_pos > nfsi->layout->plh_lwb) 1775 nfsi->layout->plh_lwb = end_pos; 1776 spin_unlock(&inode->i_lock); 1777 dprintk("%s: lseg %p end_pos %llu\n", 1778 __func__, hdr->lseg, nfsi->layout->plh_lwb); 1779 1780 /* if pnfs_layoutcommit_inode() runs between inode locks, the next one 1781 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ 1782 if (mark_as_dirty) 1783 mark_inode_dirty_sync(inode); 1784 } 1785 EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); 1786 1787 void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) 1788 { 1789 struct nfs_server *nfss = NFS_SERVER(data->args.inode); 1790 1791 if (nfss->pnfs_curr_ld->cleanup_layoutcommit) 1792 nfss->pnfs_curr_ld->cleanup_layoutcommit(data); 1793 } 1794 1795 /* 1796 * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and 1797 * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough 1798 * data to disk to allow the server to recover the data if it crashes. 1799 * LAYOUTCOMMIT is only needed when the NFL4_UFLG_COMMIT_THRU_MDS flag 1800 * is off, and a COMMIT is sent to a data server, or 1801 * if WRITEs to a data server return NFS_DATA_SYNC. 1802 */ 1803 int 1804 pnfs_layoutcommit_inode(struct inode *inode, bool sync) 1805 { 1806 struct nfs4_layoutcommit_data *data; 1807 struct nfs_inode *nfsi = NFS_I(inode); 1808 loff_t end_pos; 1809 int status = 0; 1810 1811 dprintk("--> %s inode %lu\n", __func__, inode->i_ino); 1812 1813 if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) 1814 return 0; 1815 1816 /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */ 1817 data = kzalloc(sizeof(*data), GFP_NOFS); 1818 if (!data) { 1819 status = -ENOMEM; 1820 goto out; 1821 } 1822 1823 if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) 1824 goto out_free; 1825 1826 if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) { 1827 if (!sync) { 1828 status = -EAGAIN; 1829 goto out_free; 1830 } 1831 status = wait_on_bit_lock(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING, 1832 nfs_wait_bit_killable, TASK_KILLABLE); 1833 if (status) 1834 goto out_free; 1835 } 1836 1837 INIT_LIST_HEAD(&data->lseg_list); 1838 spin_lock(&inode->i_lock); 1839 if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { 1840 clear_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags); 1841 spin_unlock(&inode->i_lock); 1842 wake_up_bit(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING); 1843 goto out_free; 1844 } 1845 1846 pnfs_list_write_lseg(inode, &data->lseg_list); 1847 1848 end_pos = nfsi->layout->plh_lwb; 1849 nfsi->layout->plh_lwb = 0; 1850 1851 nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid); 1852 spin_unlock(&inode->i_lock); 1853 1854 data->args.inode = inode; 1855 data->cred = get_rpccred(nfsi->layout->plh_lc_cred); 1856 nfs_fattr_init(&data->fattr); 1857 data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask; 1858 data->res.fattr = &data->fattr; 1859 data->args.lastbytewritten = end_pos - 1; 1860 data->res.server = NFS_SERVER(inode); 1861 1862 status = nfs4_proc_layoutcommit(data, sync); 1863 out: 1864 if (status) 1865 mark_inode_dirty_sync(inode); 1866 dprintk("<-- %s status %d\n", __func__, status); 1867 return status; 1868 out_free: 1869 kfree(data); 1870 goto out; 1871 } 1872 1873 struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) 1874 { 1875 struct nfs4_threshold *thp; 1876 1877 thp = kzalloc(sizeof(*thp), GFP_NOFS); 1878 if (!thp) { 1879 dprintk("%s mdsthreshold allocation failed\n", __func__); 1880 return NULL; 1881 } 1882 return thp; 1883 } 1884