1 /* 2 * pNFS functions to call and manage layout drivers. 3 * 4 * Copyright (c) 2002 [year of first publication] 5 * The Regents of the University of Michigan 6 * All Rights Reserved 7 * 8 * Dean Hildebrand <dhildebz@umich.edu> 9 * 10 * Permission is granted to use, copy, create derivative works, and 11 * redistribute this software and such derivative works for any purpose, 12 * so long as the name of the University of Michigan is not used in 13 * any advertising or publicity pertaining to the use or distribution 14 * of this software without specific, written prior authorization. If 15 * the above copyright notice or any other identification of the 16 * University of Michigan is included in any copy of any portion of 17 * this software, then the disclaimer below must also be included. 18 * 19 * This software is provided as is, without representation or warranty 20 * of any kind either express or implied, including without limitation 21 * the implied warranties of merchantability, fitness for a particular 22 * purpose, or noninfringement. The Regents of the University of 23 * Michigan shall not be liable for any damages, including special, 24 * indirect, incidental, or consequential damages, with respect to any 25 * claim arising out of or in connection with the use of the software, 26 * even if it has been or is hereafter advised of the possibility of 27 * such damages. 28 */ 29 30 #include <linux/nfs_fs.h> 31 #include <linux/nfs_page.h> 32 #include <linux/module.h> 33 #include "internal.h" 34 #include "pnfs.h" 35 #include "iostat.h" 36 #include "nfs4trace.h" 37 #include "delegation.h" 38 #include "nfs42.h" 39 40 #define NFSDBG_FACILITY NFSDBG_PNFS 41 #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) 42 43 /* Locking: 44 * 45 * pnfs_spinlock: 46 * protects pnfs_modules_tbl. 47 */ 48 static DEFINE_SPINLOCK(pnfs_spinlock); 49 50 /* 51 * pnfs_modules_tbl holds all pnfs modules 52 */ 53 static LIST_HEAD(pnfs_modules_tbl); 54 55 static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo); 56 57 /* Return the registered pnfs layout driver module matching given id */ 58 static struct pnfs_layoutdriver_type * 59 find_pnfs_driver_locked(u32 id) 60 { 61 struct pnfs_layoutdriver_type *local; 62 63 list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid) 64 if (local->id == id) 65 goto out; 66 local = NULL; 67 out: 68 dprintk("%s: Searching for id %u, found %p\n", __func__, id, local); 69 return local; 70 } 71 72 static struct pnfs_layoutdriver_type * 73 find_pnfs_driver(u32 id) 74 { 75 struct pnfs_layoutdriver_type *local; 76 77 spin_lock(&pnfs_spinlock); 78 local = find_pnfs_driver_locked(id); 79 if (local != NULL && !try_module_get(local->owner)) { 80 dprintk("%s: Could not grab reference on module\n", __func__); 81 local = NULL; 82 } 83 spin_unlock(&pnfs_spinlock); 84 return local; 85 } 86 87 void 88 unset_pnfs_layoutdriver(struct nfs_server *nfss) 89 { 90 if (nfss->pnfs_curr_ld) { 91 if (nfss->pnfs_curr_ld->clear_layoutdriver) 92 nfss->pnfs_curr_ld->clear_layoutdriver(nfss); 93 /* Decrement the MDS count. Purge the deviceid cache if zero */ 94 if (atomic_dec_and_test(&nfss->nfs_client->cl_mds_count)) 95 nfs4_deviceid_purge_client(nfss->nfs_client); 96 module_put(nfss->pnfs_curr_ld->owner); 97 } 98 nfss->pnfs_curr_ld = NULL; 99 } 100 101 /* 102 * Try to set the server's pnfs module to the pnfs layout type specified by id. 103 * Currently only one pNFS layout driver per filesystem is supported. 104 * 105 * @id layout type. Zero (illegal layout type) indicates pNFS not in use. 106 */ 107 void 108 set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh, 109 u32 id) 110 { 111 struct pnfs_layoutdriver_type *ld_type = NULL; 112 113 if (id == 0) 114 goto out_no_driver; 115 if (!(server->nfs_client->cl_exchange_flags & 116 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) { 117 printk(KERN_ERR "NFS: %s: id %u cl_exchange_flags 0x%x\n", 118 __func__, id, server->nfs_client->cl_exchange_flags); 119 goto out_no_driver; 120 } 121 ld_type = find_pnfs_driver(id); 122 if (!ld_type) { 123 request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id); 124 ld_type = find_pnfs_driver(id); 125 if (!ld_type) { 126 dprintk("%s: No pNFS module found for %u.\n", 127 __func__, id); 128 goto out_no_driver; 129 } 130 } 131 server->pnfs_curr_ld = ld_type; 132 if (ld_type->set_layoutdriver 133 && ld_type->set_layoutdriver(server, mntfh)) { 134 printk(KERN_ERR "NFS: %s: Error initializing pNFS layout " 135 "driver %u.\n", __func__, id); 136 module_put(ld_type->owner); 137 goto out_no_driver; 138 } 139 /* Bump the MDS count */ 140 atomic_inc(&server->nfs_client->cl_mds_count); 141 142 dprintk("%s: pNFS module for %u set\n", __func__, id); 143 return; 144 145 out_no_driver: 146 dprintk("%s: Using NFSv4 I/O\n", __func__); 147 server->pnfs_curr_ld = NULL; 148 } 149 150 int 151 pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type) 152 { 153 int status = -EINVAL; 154 struct pnfs_layoutdriver_type *tmp; 155 156 if (ld_type->id == 0) { 157 printk(KERN_ERR "NFS: %s id 0 is reserved\n", __func__); 158 return status; 159 } 160 if (!ld_type->alloc_lseg || !ld_type->free_lseg) { 161 printk(KERN_ERR "NFS: %s Layout driver must provide " 162 "alloc_lseg and free_lseg.\n", __func__); 163 return status; 164 } 165 166 spin_lock(&pnfs_spinlock); 167 tmp = find_pnfs_driver_locked(ld_type->id); 168 if (!tmp) { 169 list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl); 170 status = 0; 171 dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id, 172 ld_type->name); 173 } else { 174 printk(KERN_ERR "NFS: %s Module with id %d already loaded!\n", 175 __func__, ld_type->id); 176 } 177 spin_unlock(&pnfs_spinlock); 178 179 return status; 180 } 181 EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver); 182 183 void 184 pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type) 185 { 186 dprintk("%s Deregistering id:%u\n", __func__, ld_type->id); 187 spin_lock(&pnfs_spinlock); 188 list_del(&ld_type->pnfs_tblid); 189 spin_unlock(&pnfs_spinlock); 190 } 191 EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); 192 193 /* 194 * pNFS client layout cache 195 */ 196 197 /* Need to hold i_lock if caller does not already hold reference */ 198 void 199 pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo) 200 { 201 atomic_inc(&lo->plh_refcount); 202 } 203 204 static struct pnfs_layout_hdr * 205 pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags) 206 { 207 struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; 208 return ld->alloc_layout_hdr(ino, gfp_flags); 209 } 210 211 static void 212 pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo) 213 { 214 struct nfs_server *server = NFS_SERVER(lo->plh_inode); 215 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; 216 217 if (!list_empty(&lo->plh_layouts)) { 218 struct nfs_client *clp = server->nfs_client; 219 220 spin_lock(&clp->cl_lock); 221 list_del_init(&lo->plh_layouts); 222 spin_unlock(&clp->cl_lock); 223 } 224 put_rpccred(lo->plh_lc_cred); 225 return ld->free_layout_hdr(lo); 226 } 227 228 static void 229 pnfs_detach_layout_hdr(struct pnfs_layout_hdr *lo) 230 { 231 struct nfs_inode *nfsi = NFS_I(lo->plh_inode); 232 dprintk("%s: freeing layout cache %p\n", __func__, lo); 233 nfsi->layout = NULL; 234 /* Reset MDS Threshold I/O counters */ 235 nfsi->write_io = 0; 236 nfsi->read_io = 0; 237 } 238 239 void 240 pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) 241 { 242 struct inode *inode = lo->plh_inode; 243 244 pnfs_layoutreturn_before_put_layout_hdr(lo); 245 246 if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { 247 if (!list_empty(&lo->plh_segs)) 248 WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n"); 249 pnfs_detach_layout_hdr(lo); 250 spin_unlock(&inode->i_lock); 251 pnfs_free_layout_hdr(lo); 252 } 253 } 254 255 /* 256 * Mark a pnfs_layout_hdr and all associated layout segments as invalid 257 * 258 * In order to continue using the pnfs_layout_hdr, a full recovery 259 * is required. 260 * Note that caller must hold inode->i_lock. 261 */ 262 static int 263 pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, 264 struct list_head *lseg_list) 265 { 266 struct pnfs_layout_range range = { 267 .iomode = IOMODE_ANY, 268 .offset = 0, 269 .length = NFS4_MAX_UINT64, 270 }; 271 272 set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); 273 return pnfs_mark_matching_lsegs_invalid(lo, lseg_list, &range, 0); 274 } 275 276 static int 277 pnfs_iomode_to_fail_bit(u32 iomode) 278 { 279 return iomode == IOMODE_RW ? 280 NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED; 281 } 282 283 static void 284 pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) 285 { 286 lo->plh_retry_timestamp = jiffies; 287 if (!test_and_set_bit(fail_bit, &lo->plh_flags)) 288 atomic_inc(&lo->plh_refcount); 289 } 290 291 static void 292 pnfs_layout_clear_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) 293 { 294 if (test_and_clear_bit(fail_bit, &lo->plh_flags)) 295 atomic_dec(&lo->plh_refcount); 296 } 297 298 static void 299 pnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode) 300 { 301 struct inode *inode = lo->plh_inode; 302 struct pnfs_layout_range range = { 303 .iomode = iomode, 304 .offset = 0, 305 .length = NFS4_MAX_UINT64, 306 }; 307 LIST_HEAD(head); 308 309 spin_lock(&inode->i_lock); 310 pnfs_layout_set_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); 311 pnfs_mark_matching_lsegs_invalid(lo, &head, &range, 0); 312 spin_unlock(&inode->i_lock); 313 pnfs_free_lseg_list(&head); 314 dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__, 315 iomode == IOMODE_RW ? "RW" : "READ"); 316 } 317 318 static bool 319 pnfs_layout_io_test_failed(struct pnfs_layout_hdr *lo, u32 iomode) 320 { 321 unsigned long start, end; 322 int fail_bit = pnfs_iomode_to_fail_bit(iomode); 323 324 if (test_bit(fail_bit, &lo->plh_flags) == 0) 325 return false; 326 end = jiffies; 327 start = end - PNFS_LAYOUTGET_RETRY_TIMEOUT; 328 if (!time_in_range(lo->plh_retry_timestamp, start, end)) { 329 /* It is time to retry the failed layoutgets */ 330 pnfs_layout_clear_fail_bit(lo, fail_bit); 331 return false; 332 } 333 return true; 334 } 335 336 static void 337 init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) 338 { 339 INIT_LIST_HEAD(&lseg->pls_list); 340 INIT_LIST_HEAD(&lseg->pls_lc_list); 341 atomic_set(&lseg->pls_refcount, 1); 342 smp_mb(); 343 set_bit(NFS_LSEG_VALID, &lseg->pls_flags); 344 lseg->pls_layout = lo; 345 } 346 347 static void pnfs_free_lseg(struct pnfs_layout_segment *lseg) 348 { 349 struct inode *ino = lseg->pls_layout->plh_inode; 350 351 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); 352 } 353 354 static void 355 pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, 356 struct pnfs_layout_segment *lseg) 357 { 358 struct inode *inode = lo->plh_inode; 359 360 WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 361 list_del_init(&lseg->pls_list); 362 /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ 363 atomic_dec(&lo->plh_refcount); 364 if (list_empty(&lo->plh_segs)) { 365 set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); 366 clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); 367 } 368 rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); 369 } 370 371 void 372 pnfs_put_lseg(struct pnfs_layout_segment *lseg) 373 { 374 struct pnfs_layout_hdr *lo; 375 struct inode *inode; 376 377 if (!lseg) 378 return; 379 380 dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, 381 atomic_read(&lseg->pls_refcount), 382 test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 383 384 lo = lseg->pls_layout; 385 inode = lo->plh_inode; 386 387 if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { 388 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { 389 spin_unlock(&inode->i_lock); 390 return; 391 } 392 pnfs_get_layout_hdr(lo); 393 pnfs_layout_remove_lseg(lo, lseg); 394 spin_unlock(&inode->i_lock); 395 pnfs_free_lseg(lseg); 396 pnfs_put_layout_hdr(lo); 397 } 398 } 399 EXPORT_SYMBOL_GPL(pnfs_put_lseg); 400 401 static void pnfs_free_lseg_async_work(struct work_struct *work) 402 { 403 struct pnfs_layout_segment *lseg; 404 struct pnfs_layout_hdr *lo; 405 406 lseg = container_of(work, struct pnfs_layout_segment, pls_work); 407 lo = lseg->pls_layout; 408 409 pnfs_free_lseg(lseg); 410 pnfs_put_layout_hdr(lo); 411 } 412 413 static void pnfs_free_lseg_async(struct pnfs_layout_segment *lseg) 414 { 415 INIT_WORK(&lseg->pls_work, pnfs_free_lseg_async_work); 416 schedule_work(&lseg->pls_work); 417 } 418 419 void 420 pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg) 421 { 422 if (!lseg) 423 return; 424 425 assert_spin_locked(&lseg->pls_layout->plh_inode->i_lock); 426 427 dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, 428 atomic_read(&lseg->pls_refcount), 429 test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 430 if (atomic_dec_and_test(&lseg->pls_refcount)) { 431 struct pnfs_layout_hdr *lo = lseg->pls_layout; 432 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) 433 return; 434 pnfs_get_layout_hdr(lo); 435 pnfs_layout_remove_lseg(lo, lseg); 436 pnfs_free_lseg_async(lseg); 437 } 438 } 439 EXPORT_SYMBOL_GPL(pnfs_put_lseg_locked); 440 441 static u64 442 end_offset(u64 start, u64 len) 443 { 444 u64 end; 445 446 end = start + len; 447 return end >= start ? end : NFS4_MAX_UINT64; 448 } 449 450 /* 451 * is l2 fully contained in l1? 452 * start1 end1 453 * [----------------------------------) 454 * start2 end2 455 * [----------------) 456 */ 457 static bool 458 pnfs_lseg_range_contained(const struct pnfs_layout_range *l1, 459 const struct pnfs_layout_range *l2) 460 { 461 u64 start1 = l1->offset; 462 u64 end1 = end_offset(start1, l1->length); 463 u64 start2 = l2->offset; 464 u64 end2 = end_offset(start2, l2->length); 465 466 return (start1 <= start2) && (end1 >= end2); 467 } 468 469 /* 470 * is l1 and l2 intersecting? 471 * start1 end1 472 * [----------------------------------) 473 * start2 end2 474 * [----------------) 475 */ 476 static bool 477 pnfs_lseg_range_intersecting(const struct pnfs_layout_range *l1, 478 const struct pnfs_layout_range *l2) 479 { 480 u64 start1 = l1->offset; 481 u64 end1 = end_offset(start1, l1->length); 482 u64 start2 = l2->offset; 483 u64 end2 = end_offset(start2, l2->length); 484 485 return (end1 == NFS4_MAX_UINT64 || end1 > start2) && 486 (end2 == NFS4_MAX_UINT64 || end2 > start1); 487 } 488 489 static bool 490 should_free_lseg(const struct pnfs_layout_range *lseg_range, 491 const struct pnfs_layout_range *recall_range) 492 { 493 return (recall_range->iomode == IOMODE_ANY || 494 lseg_range->iomode == recall_range->iomode) && 495 pnfs_lseg_range_intersecting(lseg_range, recall_range); 496 } 497 498 static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, 499 struct list_head *tmp_list) 500 { 501 if (!atomic_dec_and_test(&lseg->pls_refcount)) 502 return false; 503 pnfs_layout_remove_lseg(lseg->pls_layout, lseg); 504 list_add(&lseg->pls_list, tmp_list); 505 return true; 506 } 507 508 /* Returns 1 if lseg is removed from list, 0 otherwise */ 509 static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, 510 struct list_head *tmp_list) 511 { 512 int rv = 0; 513 514 if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { 515 /* Remove the reference keeping the lseg in the 516 * list. It will now be removed when all 517 * outstanding io is finished. 518 */ 519 dprintk("%s: lseg %p ref %d\n", __func__, lseg, 520 atomic_read(&lseg->pls_refcount)); 521 if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list)) 522 rv = 1; 523 } 524 return rv; 525 } 526 527 /* 528 * Compare 2 layout stateid sequence ids, to see which is newer, 529 * taking into account wraparound issues. 530 */ 531 static bool pnfs_seqid_is_newer(u32 s1, u32 s2) 532 { 533 return (s32)(s1 - s2) > 0; 534 } 535 536 /** 537 * pnfs_mark_matching_lsegs_invalid - tear down lsegs or mark them for later 538 * @lo: layout header containing the lsegs 539 * @tmp_list: list head where doomed lsegs should go 540 * @recall_range: optional recall range argument to match (may be NULL) 541 * @seq: only invalidate lsegs obtained prior to this sequence (may be 0) 542 * 543 * Walk the list of lsegs in the layout header, and tear down any that should 544 * be destroyed. If "recall_range" is specified then the segment must match 545 * that range. If "seq" is non-zero, then only match segments that were handed 546 * out at or before that sequence. 547 * 548 * Returns number of matching invalid lsegs remaining in list after scanning 549 * it and purging them. 550 */ 551 int 552 pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, 553 struct list_head *tmp_list, 554 const struct pnfs_layout_range *recall_range, 555 u32 seq) 556 { 557 struct pnfs_layout_segment *lseg, *next; 558 int remaining = 0; 559 560 dprintk("%s:Begin lo %p\n", __func__, lo); 561 562 if (list_empty(&lo->plh_segs)) 563 return 0; 564 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) 565 if (!recall_range || 566 should_free_lseg(&lseg->pls_range, recall_range)) { 567 if (seq && pnfs_seqid_is_newer(lseg->pls_seq, seq)) 568 continue; 569 dprintk("%s: freeing lseg %p iomode %d seq %u" 570 "offset %llu length %llu\n", __func__, 571 lseg, lseg->pls_range.iomode, lseg->pls_seq, 572 lseg->pls_range.offset, lseg->pls_range.length); 573 if (!mark_lseg_invalid(lseg, tmp_list)) 574 remaining++; 575 } 576 dprintk("%s:Return %i\n", __func__, remaining); 577 return remaining; 578 } 579 580 /* note free_me must contain lsegs from a single layout_hdr */ 581 void 582 pnfs_free_lseg_list(struct list_head *free_me) 583 { 584 struct pnfs_layout_segment *lseg, *tmp; 585 586 if (list_empty(free_me)) 587 return; 588 589 list_for_each_entry_safe(lseg, tmp, free_me, pls_list) { 590 list_del(&lseg->pls_list); 591 pnfs_free_lseg(lseg); 592 } 593 } 594 595 void 596 pnfs_destroy_layout(struct nfs_inode *nfsi) 597 { 598 struct pnfs_layout_hdr *lo; 599 LIST_HEAD(tmp_list); 600 601 spin_lock(&nfsi->vfs_inode.i_lock); 602 lo = nfsi->layout; 603 if (lo) { 604 pnfs_get_layout_hdr(lo); 605 pnfs_mark_layout_stateid_invalid(lo, &tmp_list); 606 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); 607 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); 608 spin_unlock(&nfsi->vfs_inode.i_lock); 609 pnfs_free_lseg_list(&tmp_list); 610 pnfs_put_layout_hdr(lo); 611 } else 612 spin_unlock(&nfsi->vfs_inode.i_lock); 613 } 614 EXPORT_SYMBOL_GPL(pnfs_destroy_layout); 615 616 static bool 617 pnfs_layout_add_bulk_destroy_list(struct inode *inode, 618 struct list_head *layout_list) 619 { 620 struct pnfs_layout_hdr *lo; 621 bool ret = false; 622 623 spin_lock(&inode->i_lock); 624 lo = NFS_I(inode)->layout; 625 if (lo != NULL && list_empty(&lo->plh_bulk_destroy)) { 626 pnfs_get_layout_hdr(lo); 627 list_add(&lo->plh_bulk_destroy, layout_list); 628 ret = true; 629 } 630 spin_unlock(&inode->i_lock); 631 return ret; 632 } 633 634 /* Caller must hold rcu_read_lock and clp->cl_lock */ 635 static int 636 pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp, 637 struct nfs_server *server, 638 struct list_head *layout_list) 639 { 640 struct pnfs_layout_hdr *lo, *next; 641 struct inode *inode; 642 643 list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) { 644 inode = igrab(lo->plh_inode); 645 if (inode == NULL) 646 continue; 647 list_del_init(&lo->plh_layouts); 648 if (pnfs_layout_add_bulk_destroy_list(inode, layout_list)) 649 continue; 650 rcu_read_unlock(); 651 spin_unlock(&clp->cl_lock); 652 iput(inode); 653 spin_lock(&clp->cl_lock); 654 rcu_read_lock(); 655 return -EAGAIN; 656 } 657 return 0; 658 } 659 660 static int 661 pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, 662 bool is_bulk_recall) 663 { 664 struct pnfs_layout_hdr *lo; 665 struct inode *inode; 666 LIST_HEAD(lseg_list); 667 int ret = 0; 668 669 while (!list_empty(layout_list)) { 670 lo = list_entry(layout_list->next, struct pnfs_layout_hdr, 671 plh_bulk_destroy); 672 dprintk("%s freeing layout for inode %lu\n", __func__, 673 lo->plh_inode->i_ino); 674 inode = lo->plh_inode; 675 676 pnfs_layoutcommit_inode(inode, false); 677 678 spin_lock(&inode->i_lock); 679 list_del_init(&lo->plh_bulk_destroy); 680 if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) { 681 if (is_bulk_recall) 682 set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); 683 ret = -EAGAIN; 684 } 685 spin_unlock(&inode->i_lock); 686 pnfs_free_lseg_list(&lseg_list); 687 /* Free all lsegs that are attached to commit buckets */ 688 nfs_commit_inode(inode, 0); 689 pnfs_put_layout_hdr(lo); 690 iput(inode); 691 } 692 return ret; 693 } 694 695 int 696 pnfs_destroy_layouts_byfsid(struct nfs_client *clp, 697 struct nfs_fsid *fsid, 698 bool is_recall) 699 { 700 struct nfs_server *server; 701 LIST_HEAD(layout_list); 702 703 spin_lock(&clp->cl_lock); 704 rcu_read_lock(); 705 restart: 706 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 707 if (memcmp(&server->fsid, fsid, sizeof(*fsid)) != 0) 708 continue; 709 if (pnfs_layout_bulk_destroy_byserver_locked(clp, 710 server, 711 &layout_list) != 0) 712 goto restart; 713 } 714 rcu_read_unlock(); 715 spin_unlock(&clp->cl_lock); 716 717 if (list_empty(&layout_list)) 718 return 0; 719 return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); 720 } 721 722 int 723 pnfs_destroy_layouts_byclid(struct nfs_client *clp, 724 bool is_recall) 725 { 726 struct nfs_server *server; 727 LIST_HEAD(layout_list); 728 729 spin_lock(&clp->cl_lock); 730 rcu_read_lock(); 731 restart: 732 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 733 if (pnfs_layout_bulk_destroy_byserver_locked(clp, 734 server, 735 &layout_list) != 0) 736 goto restart; 737 } 738 rcu_read_unlock(); 739 spin_unlock(&clp->cl_lock); 740 741 if (list_empty(&layout_list)) 742 return 0; 743 return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); 744 } 745 746 /* 747 * Called by the state manger to remove all layouts established under an 748 * expired lease. 749 */ 750 void 751 pnfs_destroy_all_layouts(struct nfs_client *clp) 752 { 753 nfs4_deviceid_mark_client_invalid(clp); 754 nfs4_deviceid_purge_client(clp); 755 756 pnfs_destroy_layouts_byclid(clp, false); 757 } 758 759 /* update lo->plh_stateid with new if is more recent */ 760 void 761 pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, 762 bool update_barrier) 763 { 764 u32 oldseq, newseq, new_barrier; 765 int empty = list_empty(&lo->plh_segs); 766 767 oldseq = be32_to_cpu(lo->plh_stateid.seqid); 768 newseq = be32_to_cpu(new->seqid); 769 if (empty || pnfs_seqid_is_newer(newseq, oldseq)) { 770 nfs4_stateid_copy(&lo->plh_stateid, new); 771 if (update_barrier) { 772 new_barrier = be32_to_cpu(new->seqid); 773 } else { 774 /* Because of wraparound, we want to keep the barrier 775 * "close" to the current seqids. 776 */ 777 new_barrier = newseq - atomic_read(&lo->plh_outstanding); 778 } 779 if (empty || pnfs_seqid_is_newer(new_barrier, lo->plh_barrier)) 780 lo->plh_barrier = new_barrier; 781 } 782 } 783 784 static bool 785 pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo, 786 const nfs4_stateid *stateid) 787 { 788 u32 seqid = be32_to_cpu(stateid->seqid); 789 790 return !pnfs_seqid_is_newer(seqid, lo->plh_barrier); 791 } 792 793 /* lget is set to 1 if called from inside send_layoutget call chain */ 794 static bool 795 pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo) 796 { 797 return lo->plh_block_lgets || 798 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); 799 } 800 801 /* 802 * Get layout from server. 803 * for now, assume that whole file layouts are requested. 804 * arg->offset: 0 805 * arg->length: all ones 806 */ 807 static struct pnfs_layout_segment * 808 send_layoutget(struct pnfs_layout_hdr *lo, 809 struct nfs_open_context *ctx, 810 nfs4_stateid *stateid, 811 const struct pnfs_layout_range *range, 812 long *timeout, gfp_t gfp_flags) 813 { 814 struct inode *ino = lo->plh_inode; 815 struct nfs_server *server = NFS_SERVER(ino); 816 struct nfs4_layoutget *lgp; 817 loff_t i_size; 818 819 dprintk("--> %s\n", __func__); 820 821 /* 822 * Synchronously retrieve layout information from server and 823 * store in lseg. If we race with a concurrent seqid morphing 824 * op, then re-send the LAYOUTGET. 825 */ 826 lgp = kzalloc(sizeof(*lgp), gfp_flags); 827 if (lgp == NULL) 828 return ERR_PTR(-ENOMEM); 829 830 i_size = i_size_read(ino); 831 832 lgp->args.minlength = PAGE_SIZE; 833 if (lgp->args.minlength > range->length) 834 lgp->args.minlength = range->length; 835 if (range->iomode == IOMODE_READ) { 836 if (range->offset >= i_size) 837 lgp->args.minlength = 0; 838 else if (i_size - range->offset < lgp->args.minlength) 839 lgp->args.minlength = i_size - range->offset; 840 } 841 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; 842 pnfs_copy_range(&lgp->args.range, range); 843 lgp->args.type = server->pnfs_curr_ld->id; 844 lgp->args.inode = ino; 845 lgp->args.ctx = get_nfs_open_context(ctx); 846 nfs4_stateid_copy(&lgp->args.stateid, stateid); 847 lgp->gfp_flags = gfp_flags; 848 lgp->cred = lo->plh_lc_cred; 849 850 return nfs4_proc_layoutget(lgp, timeout, gfp_flags); 851 } 852 853 static void pnfs_clear_layoutcommit(struct inode *inode, 854 struct list_head *head) 855 { 856 struct nfs_inode *nfsi = NFS_I(inode); 857 struct pnfs_layout_segment *lseg, *tmp; 858 859 if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) 860 return; 861 list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) { 862 if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) 863 continue; 864 pnfs_lseg_dec_and_remove_zero(lseg, head); 865 } 866 } 867 868 void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) 869 { 870 clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); 871 smp_mb__after_atomic(); 872 wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); 873 rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); 874 } 875 876 static bool 877 pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo) 878 { 879 if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) 880 return false; 881 lo->plh_return_iomode = 0; 882 lo->plh_return_seq = 0; 883 pnfs_get_layout_hdr(lo); 884 clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); 885 return true; 886 } 887 888 static int 889 pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, 890 enum pnfs_iomode iomode, bool sync) 891 { 892 struct inode *ino = lo->plh_inode; 893 struct nfs4_layoutreturn *lrp; 894 int status = 0; 895 896 lrp = kzalloc(sizeof(*lrp), GFP_NOFS); 897 if (unlikely(lrp == NULL)) { 898 status = -ENOMEM; 899 spin_lock(&ino->i_lock); 900 pnfs_clear_layoutreturn_waitbit(lo); 901 spin_unlock(&ino->i_lock); 902 pnfs_put_layout_hdr(lo); 903 goto out; 904 } 905 906 nfs4_stateid_copy(&lrp->args.stateid, stateid); 907 lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; 908 lrp->args.inode = ino; 909 lrp->args.range.iomode = iomode; 910 lrp->args.range.offset = 0; 911 lrp->args.range.length = NFS4_MAX_UINT64; 912 lrp->args.layout = lo; 913 lrp->clp = NFS_SERVER(ino)->nfs_client; 914 lrp->cred = lo->plh_lc_cred; 915 916 status = nfs4_proc_layoutreturn(lrp, sync); 917 out: 918 dprintk("<-- %s status: %d\n", __func__, status); 919 return status; 920 } 921 922 /* Return true if layoutreturn is needed */ 923 static bool 924 pnfs_layout_need_return(struct pnfs_layout_hdr *lo) 925 { 926 struct pnfs_layout_segment *s; 927 928 if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) 929 return false; 930 931 /* Defer layoutreturn until all lsegs are done */ 932 list_for_each_entry(s, &lo->plh_segs, pls_list) { 933 if (test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags)) 934 return false; 935 } 936 937 return true; 938 } 939 940 static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo) 941 { 942 struct inode *inode= lo->plh_inode; 943 944 if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) 945 return; 946 spin_lock(&inode->i_lock); 947 if (pnfs_layout_need_return(lo)) { 948 nfs4_stateid stateid; 949 enum pnfs_iomode iomode; 950 bool send; 951 952 nfs4_stateid_copy(&stateid, &lo->plh_stateid); 953 stateid.seqid = cpu_to_be32(lo->plh_return_seq); 954 iomode = lo->plh_return_iomode; 955 send = pnfs_prepare_layoutreturn(lo); 956 spin_unlock(&inode->i_lock); 957 if (send) { 958 /* Send an async layoutreturn so we dont deadlock */ 959 pnfs_send_layoutreturn(lo, &stateid, iomode, false); 960 } 961 } else 962 spin_unlock(&inode->i_lock); 963 } 964 965 /* 966 * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr 967 * when the layout segment list is empty. 968 * 969 * Note that a pnfs_layout_hdr can exist with an empty layout segment 970 * list when LAYOUTGET has failed, or when LAYOUTGET succeeded, but the 971 * deviceid is marked invalid. 972 */ 973 int 974 _pnfs_return_layout(struct inode *ino) 975 { 976 struct pnfs_layout_hdr *lo = NULL; 977 struct nfs_inode *nfsi = NFS_I(ino); 978 LIST_HEAD(tmp_list); 979 nfs4_stateid stateid; 980 int status = 0, empty; 981 bool send; 982 983 dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino); 984 985 spin_lock(&ino->i_lock); 986 lo = nfsi->layout; 987 if (!lo) { 988 spin_unlock(&ino->i_lock); 989 dprintk("NFS: %s no layout to return\n", __func__); 990 goto out; 991 } 992 nfs4_stateid_copy(&stateid, &nfsi->layout->plh_stateid); 993 /* Reference matched in nfs4_layoutreturn_release */ 994 pnfs_get_layout_hdr(lo); 995 empty = list_empty(&lo->plh_segs); 996 pnfs_clear_layoutcommit(ino, &tmp_list); 997 pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL, 0); 998 999 if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) { 1000 struct pnfs_layout_range range = { 1001 .iomode = IOMODE_ANY, 1002 .offset = 0, 1003 .length = NFS4_MAX_UINT64, 1004 }; 1005 NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, &range); 1006 } 1007 1008 /* Don't send a LAYOUTRETURN if list was initially empty */ 1009 if (empty) { 1010 spin_unlock(&ino->i_lock); 1011 dprintk("NFS: %s no layout segments to return\n", __func__); 1012 goto out_put_layout_hdr; 1013 } 1014 1015 set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); 1016 send = pnfs_prepare_layoutreturn(lo); 1017 spin_unlock(&ino->i_lock); 1018 pnfs_free_lseg_list(&tmp_list); 1019 if (send) 1020 status = pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true); 1021 out_put_layout_hdr: 1022 pnfs_put_layout_hdr(lo); 1023 out: 1024 dprintk("<-- %s status: %d\n", __func__, status); 1025 return status; 1026 } 1027 EXPORT_SYMBOL_GPL(_pnfs_return_layout); 1028 1029 int 1030 pnfs_commit_and_return_layout(struct inode *inode) 1031 { 1032 struct pnfs_layout_hdr *lo; 1033 int ret; 1034 1035 spin_lock(&inode->i_lock); 1036 lo = NFS_I(inode)->layout; 1037 if (lo == NULL) { 1038 spin_unlock(&inode->i_lock); 1039 return 0; 1040 } 1041 pnfs_get_layout_hdr(lo); 1042 /* Block new layoutgets and read/write to ds */ 1043 lo->plh_block_lgets++; 1044 spin_unlock(&inode->i_lock); 1045 filemap_fdatawait(inode->i_mapping); 1046 ret = pnfs_layoutcommit_inode(inode, true); 1047 if (ret == 0) 1048 ret = _pnfs_return_layout(inode); 1049 spin_lock(&inode->i_lock); 1050 lo->plh_block_lgets--; 1051 spin_unlock(&inode->i_lock); 1052 pnfs_put_layout_hdr(lo); 1053 return ret; 1054 } 1055 1056 bool pnfs_roc(struct inode *ino) 1057 { 1058 struct nfs_inode *nfsi = NFS_I(ino); 1059 struct nfs_open_context *ctx; 1060 struct nfs4_state *state; 1061 struct pnfs_layout_hdr *lo; 1062 struct pnfs_layout_segment *lseg, *tmp; 1063 nfs4_stateid stateid; 1064 LIST_HEAD(tmp_list); 1065 bool found = false, layoutreturn = false, roc = false; 1066 1067 spin_lock(&ino->i_lock); 1068 lo = nfsi->layout; 1069 if (!lo || test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) 1070 goto out_noroc; 1071 1072 /* no roc if we hold a delegation */ 1073 if (nfs4_check_delegation(ino, FMODE_READ)) 1074 goto out_noroc; 1075 1076 list_for_each_entry(ctx, &nfsi->open_files, list) { 1077 state = ctx->state; 1078 /* Don't return layout if there is open file state */ 1079 if (state != NULL && state->state != 0) 1080 goto out_noroc; 1081 } 1082 1083 nfs4_stateid_copy(&stateid, &lo->plh_stateid); 1084 /* always send layoutreturn if being marked so */ 1085 if (test_and_clear_bit(NFS_LAYOUT_RETURN_REQUESTED, 1086 &lo->plh_flags)) 1087 layoutreturn = pnfs_prepare_layoutreturn(lo); 1088 1089 list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) 1090 /* If we are sending layoutreturn, invalidate all valid lsegs */ 1091 if (layoutreturn || test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { 1092 mark_lseg_invalid(lseg, &tmp_list); 1093 found = true; 1094 } 1095 /* ROC in two conditions: 1096 * 1. there are ROC lsegs 1097 * 2. we don't send layoutreturn 1098 */ 1099 if (found && !layoutreturn) { 1100 /* lo ref dropped in pnfs_roc_release() */ 1101 pnfs_get_layout_hdr(lo); 1102 roc = true; 1103 } 1104 1105 out_noroc: 1106 spin_unlock(&ino->i_lock); 1107 pnfs_free_lseg_list(&tmp_list); 1108 pnfs_layoutcommit_inode(ino, true); 1109 if (layoutreturn) 1110 pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true); 1111 return roc; 1112 } 1113 1114 void pnfs_roc_release(struct inode *ino) 1115 { 1116 struct pnfs_layout_hdr *lo; 1117 1118 spin_lock(&ino->i_lock); 1119 lo = NFS_I(ino)->layout; 1120 pnfs_clear_layoutreturn_waitbit(lo); 1121 if (atomic_dec_and_test(&lo->plh_refcount)) { 1122 pnfs_detach_layout_hdr(lo); 1123 spin_unlock(&ino->i_lock); 1124 pnfs_free_layout_hdr(lo); 1125 } else 1126 spin_unlock(&ino->i_lock); 1127 } 1128 1129 void pnfs_roc_set_barrier(struct inode *ino, u32 barrier) 1130 { 1131 struct pnfs_layout_hdr *lo; 1132 1133 spin_lock(&ino->i_lock); 1134 lo = NFS_I(ino)->layout; 1135 pnfs_mark_layout_returned_if_empty(lo); 1136 if (pnfs_seqid_is_newer(barrier, lo->plh_barrier)) 1137 lo->plh_barrier = barrier; 1138 spin_unlock(&ino->i_lock); 1139 trace_nfs4_layoutreturn_on_close(ino, 0); 1140 } 1141 1142 void pnfs_roc_get_barrier(struct inode *ino, u32 *barrier) 1143 { 1144 struct nfs_inode *nfsi = NFS_I(ino); 1145 struct pnfs_layout_hdr *lo; 1146 u32 current_seqid; 1147 1148 spin_lock(&ino->i_lock); 1149 lo = nfsi->layout; 1150 current_seqid = be32_to_cpu(lo->plh_stateid.seqid); 1151 1152 /* Since close does not return a layout stateid for use as 1153 * a barrier, we choose the worst-case barrier. 1154 */ 1155 *barrier = current_seqid + atomic_read(&lo->plh_outstanding); 1156 spin_unlock(&ino->i_lock); 1157 } 1158 1159 bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task) 1160 { 1161 struct nfs_inode *nfsi = NFS_I(ino); 1162 struct pnfs_layout_hdr *lo; 1163 bool sleep = false; 1164 1165 /* we might not have grabbed lo reference. so need to check under 1166 * i_lock */ 1167 spin_lock(&ino->i_lock); 1168 lo = nfsi->layout; 1169 if (lo && test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) 1170 sleep = true; 1171 spin_unlock(&ino->i_lock); 1172 1173 if (sleep) 1174 rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); 1175 1176 return sleep; 1177 } 1178 1179 /* 1180 * Compare two layout segments for sorting into layout cache. 1181 * We want to preferentially return RW over RO layouts, so ensure those 1182 * are seen first. 1183 */ 1184 static s64 1185 pnfs_lseg_range_cmp(const struct pnfs_layout_range *l1, 1186 const struct pnfs_layout_range *l2) 1187 { 1188 s64 d; 1189 1190 /* high offset > low offset */ 1191 d = l1->offset - l2->offset; 1192 if (d) 1193 return d; 1194 1195 /* short length > long length */ 1196 d = l2->length - l1->length; 1197 if (d) 1198 return d; 1199 1200 /* read > read/write */ 1201 return (int)(l1->iomode == IOMODE_READ) - (int)(l2->iomode == IOMODE_READ); 1202 } 1203 1204 static bool 1205 pnfs_lseg_range_is_after(const struct pnfs_layout_range *l1, 1206 const struct pnfs_layout_range *l2) 1207 { 1208 return pnfs_lseg_range_cmp(l1, l2) > 0; 1209 } 1210 1211 static bool 1212 pnfs_lseg_no_merge(struct pnfs_layout_segment *lseg, 1213 struct pnfs_layout_segment *old) 1214 { 1215 return false; 1216 } 1217 1218 void 1219 pnfs_generic_layout_insert_lseg(struct pnfs_layout_hdr *lo, 1220 struct pnfs_layout_segment *lseg, 1221 bool (*is_after)(const struct pnfs_layout_range *, 1222 const struct pnfs_layout_range *), 1223 bool (*do_merge)(struct pnfs_layout_segment *, 1224 struct pnfs_layout_segment *), 1225 struct list_head *free_me) 1226 { 1227 struct pnfs_layout_segment *lp, *tmp; 1228 1229 dprintk("%s:Begin\n", __func__); 1230 1231 list_for_each_entry_safe(lp, tmp, &lo->plh_segs, pls_list) { 1232 if (test_bit(NFS_LSEG_VALID, &lp->pls_flags) == 0) 1233 continue; 1234 if (do_merge(lseg, lp)) { 1235 mark_lseg_invalid(lp, free_me); 1236 continue; 1237 } 1238 if (is_after(&lseg->pls_range, &lp->pls_range)) 1239 continue; 1240 list_add_tail(&lseg->pls_list, &lp->pls_list); 1241 dprintk("%s: inserted lseg %p " 1242 "iomode %d offset %llu length %llu before " 1243 "lp %p iomode %d offset %llu length %llu\n", 1244 __func__, lseg, lseg->pls_range.iomode, 1245 lseg->pls_range.offset, lseg->pls_range.length, 1246 lp, lp->pls_range.iomode, lp->pls_range.offset, 1247 lp->pls_range.length); 1248 goto out; 1249 } 1250 list_add_tail(&lseg->pls_list, &lo->plh_segs); 1251 dprintk("%s: inserted lseg %p " 1252 "iomode %d offset %llu length %llu at tail\n", 1253 __func__, lseg, lseg->pls_range.iomode, 1254 lseg->pls_range.offset, lseg->pls_range.length); 1255 out: 1256 pnfs_get_layout_hdr(lo); 1257 1258 dprintk("%s:Return\n", __func__); 1259 } 1260 EXPORT_SYMBOL_GPL(pnfs_generic_layout_insert_lseg); 1261 1262 static void 1263 pnfs_layout_insert_lseg(struct pnfs_layout_hdr *lo, 1264 struct pnfs_layout_segment *lseg, 1265 struct list_head *free_me) 1266 { 1267 struct inode *inode = lo->plh_inode; 1268 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 1269 1270 if (ld->add_lseg != NULL) 1271 ld->add_lseg(lo, lseg, free_me); 1272 else 1273 pnfs_generic_layout_insert_lseg(lo, lseg, 1274 pnfs_lseg_range_is_after, 1275 pnfs_lseg_no_merge, 1276 free_me); 1277 } 1278 1279 static struct pnfs_layout_hdr * 1280 alloc_init_layout_hdr(struct inode *ino, 1281 struct nfs_open_context *ctx, 1282 gfp_t gfp_flags) 1283 { 1284 struct pnfs_layout_hdr *lo; 1285 1286 lo = pnfs_alloc_layout_hdr(ino, gfp_flags); 1287 if (!lo) 1288 return NULL; 1289 atomic_set(&lo->plh_refcount, 1); 1290 INIT_LIST_HEAD(&lo->plh_layouts); 1291 INIT_LIST_HEAD(&lo->plh_segs); 1292 INIT_LIST_HEAD(&lo->plh_bulk_destroy); 1293 lo->plh_inode = ino; 1294 lo->plh_lc_cred = get_rpccred(ctx->cred); 1295 lo->plh_flags |= 1 << NFS_LAYOUT_INVALID_STID; 1296 return lo; 1297 } 1298 1299 static struct pnfs_layout_hdr * 1300 pnfs_find_alloc_layout(struct inode *ino, 1301 struct nfs_open_context *ctx, 1302 gfp_t gfp_flags) 1303 __releases(&ino->i_lock) 1304 __acquires(&ino->i_lock) 1305 { 1306 struct nfs_inode *nfsi = NFS_I(ino); 1307 struct pnfs_layout_hdr *new = NULL; 1308 1309 dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout); 1310 1311 if (nfsi->layout != NULL) 1312 goto out_existing; 1313 spin_unlock(&ino->i_lock); 1314 new = alloc_init_layout_hdr(ino, ctx, gfp_flags); 1315 spin_lock(&ino->i_lock); 1316 1317 if (likely(nfsi->layout == NULL)) { /* Won the race? */ 1318 nfsi->layout = new; 1319 return new; 1320 } else if (new != NULL) 1321 pnfs_free_layout_hdr(new); 1322 out_existing: 1323 pnfs_get_layout_hdr(nfsi->layout); 1324 return nfsi->layout; 1325 } 1326 1327 /* 1328 * iomode matching rules: 1329 * iomode lseg strict match 1330 * iomode 1331 * ----- ----- ------ ----- 1332 * ANY READ N/A true 1333 * ANY RW N/A true 1334 * RW READ N/A false 1335 * RW RW N/A true 1336 * READ READ N/A true 1337 * READ RW true false 1338 * READ RW false true 1339 */ 1340 static bool 1341 pnfs_lseg_range_match(const struct pnfs_layout_range *ls_range, 1342 const struct pnfs_layout_range *range, 1343 bool strict_iomode) 1344 { 1345 struct pnfs_layout_range range1; 1346 1347 if ((range->iomode == IOMODE_RW && 1348 ls_range->iomode != IOMODE_RW) || 1349 (range->iomode != ls_range->iomode && 1350 strict_iomode == true) || 1351 !pnfs_lseg_range_intersecting(ls_range, range)) 1352 return 0; 1353 1354 /* range1 covers only the first byte in the range */ 1355 range1 = *range; 1356 range1.length = 1; 1357 return pnfs_lseg_range_contained(ls_range, &range1); 1358 } 1359 1360 /* 1361 * lookup range in layout 1362 */ 1363 static struct pnfs_layout_segment * 1364 pnfs_find_lseg(struct pnfs_layout_hdr *lo, 1365 struct pnfs_layout_range *range, 1366 bool strict_iomode) 1367 { 1368 struct pnfs_layout_segment *lseg, *ret = NULL; 1369 1370 dprintk("%s:Begin\n", __func__); 1371 1372 list_for_each_entry(lseg, &lo->plh_segs, pls_list) { 1373 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && 1374 !test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags) && 1375 pnfs_lseg_range_match(&lseg->pls_range, range, 1376 strict_iomode)) { 1377 ret = pnfs_get_lseg(lseg); 1378 break; 1379 } 1380 } 1381 1382 dprintk("%s:Return lseg %p ref %d\n", 1383 __func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0); 1384 return ret; 1385 } 1386 1387 /* 1388 * Use mdsthreshold hints set at each OPEN to determine if I/O should go 1389 * to the MDS or over pNFS 1390 * 1391 * The nfs_inode read_io and write_io fields are cumulative counters reset 1392 * when there are no layout segments. Note that in pnfs_update_layout iomode 1393 * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a 1394 * WRITE request. 1395 * 1396 * A return of true means use MDS I/O. 1397 * 1398 * From rfc 5661: 1399 * If a file's size is smaller than the file size threshold, data accesses 1400 * SHOULD be sent to the metadata server. If an I/O request has a length that 1401 * is below the I/O size threshold, the I/O SHOULD be sent to the metadata 1402 * server. If both file size and I/O size are provided, the client SHOULD 1403 * reach or exceed both thresholds before sending its read or write 1404 * requests to the data server. 1405 */ 1406 static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx, 1407 struct inode *ino, int iomode) 1408 { 1409 struct nfs4_threshold *t = ctx->mdsthreshold; 1410 struct nfs_inode *nfsi = NFS_I(ino); 1411 loff_t fsize = i_size_read(ino); 1412 bool size = false, size_set = false, io = false, io_set = false, ret = false; 1413 1414 if (t == NULL) 1415 return ret; 1416 1417 dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n", 1418 __func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz); 1419 1420 switch (iomode) { 1421 case IOMODE_READ: 1422 if (t->bm & THRESHOLD_RD) { 1423 dprintk("%s fsize %llu\n", __func__, fsize); 1424 size_set = true; 1425 if (fsize < t->rd_sz) 1426 size = true; 1427 } 1428 if (t->bm & THRESHOLD_RD_IO) { 1429 dprintk("%s nfsi->read_io %llu\n", __func__, 1430 nfsi->read_io); 1431 io_set = true; 1432 if (nfsi->read_io < t->rd_io_sz) 1433 io = true; 1434 } 1435 break; 1436 case IOMODE_RW: 1437 if (t->bm & THRESHOLD_WR) { 1438 dprintk("%s fsize %llu\n", __func__, fsize); 1439 size_set = true; 1440 if (fsize < t->wr_sz) 1441 size = true; 1442 } 1443 if (t->bm & THRESHOLD_WR_IO) { 1444 dprintk("%s nfsi->write_io %llu\n", __func__, 1445 nfsi->write_io); 1446 io_set = true; 1447 if (nfsi->write_io < t->wr_io_sz) 1448 io = true; 1449 } 1450 break; 1451 } 1452 if (size_set && io_set) { 1453 if (size && io) 1454 ret = true; 1455 } else if (size || io) 1456 ret = true; 1457 1458 dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret); 1459 return ret; 1460 } 1461 1462 static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo) 1463 { 1464 /* 1465 * send layoutcommit as it can hold up layoutreturn due to lseg 1466 * reference 1467 */ 1468 pnfs_layoutcommit_inode(lo->plh_inode, false); 1469 return !wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN, 1470 nfs_wait_bit_killable, 1471 TASK_UNINTERRUPTIBLE); 1472 } 1473 1474 static void pnfs_clear_first_layoutget(struct pnfs_layout_hdr *lo) 1475 { 1476 unsigned long *bitlock = &lo->plh_flags; 1477 1478 clear_bit_unlock(NFS_LAYOUT_FIRST_LAYOUTGET, bitlock); 1479 smp_mb__after_atomic(); 1480 wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET); 1481 } 1482 1483 /* 1484 * Layout segment is retreived from the server if not cached. 1485 * The appropriate layout segment is referenced and returned to the caller. 1486 */ 1487 struct pnfs_layout_segment * 1488 pnfs_update_layout(struct inode *ino, 1489 struct nfs_open_context *ctx, 1490 loff_t pos, 1491 u64 count, 1492 enum pnfs_iomode iomode, 1493 bool strict_iomode, 1494 gfp_t gfp_flags) 1495 { 1496 struct pnfs_layout_range arg = { 1497 .iomode = iomode, 1498 .offset = pos, 1499 .length = count, 1500 }; 1501 unsigned pg_offset, seq; 1502 struct nfs_server *server = NFS_SERVER(ino); 1503 struct nfs_client *clp = server->nfs_client; 1504 struct pnfs_layout_hdr *lo = NULL; 1505 struct pnfs_layout_segment *lseg = NULL; 1506 nfs4_stateid stateid; 1507 long timeout = 0; 1508 unsigned long giveup = jiffies + rpc_get_timeout(server->client); 1509 bool first; 1510 1511 if (!pnfs_enabled_sb(NFS_SERVER(ino))) { 1512 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1513 PNFS_UPDATE_LAYOUT_NO_PNFS); 1514 goto out; 1515 } 1516 1517 if (iomode == IOMODE_READ && i_size_read(ino) == 0) { 1518 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1519 PNFS_UPDATE_LAYOUT_RD_ZEROLEN); 1520 goto out; 1521 } 1522 1523 if (pnfs_within_mdsthreshold(ctx, ino, iomode)) { 1524 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1525 PNFS_UPDATE_LAYOUT_MDSTHRESH); 1526 goto out; 1527 } 1528 1529 lookup_again: 1530 first = false; 1531 spin_lock(&ino->i_lock); 1532 lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); 1533 if (lo == NULL) { 1534 spin_unlock(&ino->i_lock); 1535 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1536 PNFS_UPDATE_LAYOUT_NOMEM); 1537 goto out; 1538 } 1539 1540 /* Do we even need to bother with this? */ 1541 if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { 1542 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1543 PNFS_UPDATE_LAYOUT_BULK_RECALL); 1544 dprintk("%s matches recall, use MDS\n", __func__); 1545 goto out_unlock; 1546 } 1547 1548 /* if LAYOUTGET already failed once we don't try again */ 1549 if (pnfs_layout_io_test_failed(lo, iomode)) { 1550 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1551 PNFS_UPDATE_LAYOUT_IO_TEST_FAIL); 1552 goto out_unlock; 1553 } 1554 1555 lseg = pnfs_find_lseg(lo, &arg, strict_iomode); 1556 if (lseg) { 1557 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1558 PNFS_UPDATE_LAYOUT_FOUND_CACHED); 1559 goto out_unlock; 1560 } 1561 1562 if (!nfs4_valid_open_stateid(ctx->state)) { 1563 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1564 PNFS_UPDATE_LAYOUT_INVALID_OPEN); 1565 goto out_unlock; 1566 } 1567 1568 /* 1569 * Choose a stateid for the LAYOUTGET. If we don't have a layout 1570 * stateid, or it has been invalidated, then we must use the open 1571 * stateid. 1572 */ 1573 if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) { 1574 1575 /* 1576 * The first layoutget for the file. Need to serialize per 1577 * RFC 5661 Errata 3208. 1578 */ 1579 if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET, 1580 &lo->plh_flags)) { 1581 spin_unlock(&ino->i_lock); 1582 wait_on_bit(&lo->plh_flags, NFS_LAYOUT_FIRST_LAYOUTGET, 1583 TASK_UNINTERRUPTIBLE); 1584 pnfs_put_layout_hdr(lo); 1585 dprintk("%s retrying\n", __func__); 1586 goto lookup_again; 1587 } 1588 1589 first = true; 1590 do { 1591 seq = read_seqbegin(&ctx->state->seqlock); 1592 nfs4_stateid_copy(&stateid, &ctx->state->stateid); 1593 } while (read_seqretry(&ctx->state->seqlock, seq)); 1594 } else { 1595 nfs4_stateid_copy(&stateid, &lo->plh_stateid); 1596 } 1597 1598 /* 1599 * Because we free lsegs before sending LAYOUTRETURN, we need to wait 1600 * for LAYOUTRETURN even if first is true. 1601 */ 1602 if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { 1603 spin_unlock(&ino->i_lock); 1604 dprintk("%s wait for layoutreturn\n", __func__); 1605 if (pnfs_prepare_to_retry_layoutget(lo)) { 1606 if (first) 1607 pnfs_clear_first_layoutget(lo); 1608 pnfs_put_layout_hdr(lo); 1609 dprintk("%s retrying\n", __func__); 1610 trace_pnfs_update_layout(ino, pos, count, iomode, lo, 1611 lseg, PNFS_UPDATE_LAYOUT_RETRY); 1612 goto lookup_again; 1613 } 1614 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1615 PNFS_UPDATE_LAYOUT_RETURN); 1616 goto out_put_layout_hdr; 1617 } 1618 1619 if (pnfs_layoutgets_blocked(lo)) { 1620 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1621 PNFS_UPDATE_LAYOUT_BLOCKED); 1622 goto out_unlock; 1623 } 1624 atomic_inc(&lo->plh_outstanding); 1625 spin_unlock(&ino->i_lock); 1626 1627 if (list_empty(&lo->plh_layouts)) { 1628 /* The lo must be on the clp list if there is any 1629 * chance of a CB_LAYOUTRECALL(FILE) coming in. 1630 */ 1631 spin_lock(&clp->cl_lock); 1632 if (list_empty(&lo->plh_layouts)) 1633 list_add_tail(&lo->plh_layouts, &server->layouts); 1634 spin_unlock(&clp->cl_lock); 1635 } 1636 1637 pg_offset = arg.offset & ~PAGE_MASK; 1638 if (pg_offset) { 1639 arg.offset -= pg_offset; 1640 arg.length += pg_offset; 1641 } 1642 if (arg.length != NFS4_MAX_UINT64) 1643 arg.length = PAGE_ALIGN(arg.length); 1644 1645 lseg = send_layoutget(lo, ctx, &stateid, &arg, &timeout, gfp_flags); 1646 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1647 PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET); 1648 if (IS_ERR(lseg)) { 1649 switch(PTR_ERR(lseg)) { 1650 case -ERECALLCONFLICT: 1651 if (time_after(jiffies, giveup)) 1652 lseg = NULL; 1653 /* Fallthrough */ 1654 case -EAGAIN: 1655 pnfs_put_layout_hdr(lo); 1656 if (first) 1657 pnfs_clear_first_layoutget(lo); 1658 if (lseg) { 1659 trace_pnfs_update_layout(ino, pos, count, 1660 iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY); 1661 goto lookup_again; 1662 } 1663 /* Fallthrough */ 1664 default: 1665 if (!nfs_error_is_fatal(PTR_ERR(lseg))) { 1666 pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); 1667 lseg = NULL; 1668 } 1669 } 1670 } else { 1671 pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); 1672 } 1673 1674 atomic_dec(&lo->plh_outstanding); 1675 out_put_layout_hdr: 1676 if (first) 1677 pnfs_clear_first_layoutget(lo); 1678 pnfs_put_layout_hdr(lo); 1679 out: 1680 dprintk("%s: inode %s/%llu pNFS layout segment %s for " 1681 "(%s, offset: %llu, length: %llu)\n", 1682 __func__, ino->i_sb->s_id, 1683 (unsigned long long)NFS_FILEID(ino), 1684 IS_ERR_OR_NULL(lseg) ? "not found" : "found", 1685 iomode==IOMODE_RW ? "read/write" : "read-only", 1686 (unsigned long long)pos, 1687 (unsigned long long)count); 1688 return lseg; 1689 out_unlock: 1690 spin_unlock(&ino->i_lock); 1691 goto out_put_layout_hdr; 1692 } 1693 EXPORT_SYMBOL_GPL(pnfs_update_layout); 1694 1695 static bool 1696 pnfs_sanity_check_layout_range(struct pnfs_layout_range *range) 1697 { 1698 switch (range->iomode) { 1699 case IOMODE_READ: 1700 case IOMODE_RW: 1701 break; 1702 default: 1703 return false; 1704 } 1705 if (range->offset == NFS4_MAX_UINT64) 1706 return false; 1707 if (range->length == 0) 1708 return false; 1709 if (range->length != NFS4_MAX_UINT64 && 1710 range->length > NFS4_MAX_UINT64 - range->offset) 1711 return false; 1712 return true; 1713 } 1714 1715 struct pnfs_layout_segment * 1716 pnfs_layout_process(struct nfs4_layoutget *lgp) 1717 { 1718 struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout; 1719 struct nfs4_layoutget_res *res = &lgp->res; 1720 struct pnfs_layout_segment *lseg; 1721 struct inode *ino = lo->plh_inode; 1722 LIST_HEAD(free_me); 1723 1724 if (!pnfs_sanity_check_layout_range(&res->range)) 1725 return ERR_PTR(-EINVAL); 1726 1727 /* Inject layout blob into I/O device driver */ 1728 lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags); 1729 if (IS_ERR_OR_NULL(lseg)) { 1730 if (!lseg) 1731 lseg = ERR_PTR(-ENOMEM); 1732 1733 dprintk("%s: Could not allocate layout: error %ld\n", 1734 __func__, PTR_ERR(lseg)); 1735 return lseg; 1736 } 1737 1738 init_lseg(lo, lseg); 1739 lseg->pls_range = res->range; 1740 lseg->pls_seq = be32_to_cpu(res->stateid.seqid); 1741 1742 spin_lock(&ino->i_lock); 1743 if (pnfs_layoutgets_blocked(lo)) { 1744 dprintk("%s forget reply due to state\n", __func__); 1745 goto out_forget; 1746 } 1747 1748 if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) { 1749 /* existing state ID, make sure the sequence number matches. */ 1750 if (pnfs_layout_stateid_blocked(lo, &res->stateid)) { 1751 dprintk("%s forget reply due to sequence\n", __func__); 1752 goto out_forget; 1753 } 1754 pnfs_set_layout_stateid(lo, &res->stateid, false); 1755 } else { 1756 /* 1757 * We got an entirely new state ID. Mark all segments for the 1758 * inode invalid, and don't bother validating the stateid 1759 * sequence number. 1760 */ 1761 pnfs_mark_matching_lsegs_invalid(lo, &free_me, NULL, 0); 1762 1763 nfs4_stateid_copy(&lo->plh_stateid, &res->stateid); 1764 lo->plh_barrier = be32_to_cpu(res->stateid.seqid); 1765 } 1766 1767 clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); 1768 1769 pnfs_get_lseg(lseg); 1770 pnfs_layout_insert_lseg(lo, lseg, &free_me); 1771 1772 if (res->return_on_close) 1773 set_bit(NFS_LSEG_ROC, &lseg->pls_flags); 1774 1775 spin_unlock(&ino->i_lock); 1776 pnfs_free_lseg_list(&free_me); 1777 return lseg; 1778 1779 out_forget: 1780 spin_unlock(&ino->i_lock); 1781 lseg->pls_layout = lo; 1782 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); 1783 return ERR_PTR(-EAGAIN); 1784 } 1785 1786 static void 1787 pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode, 1788 u32 seq) 1789 { 1790 if (lo->plh_return_iomode == iomode) 1791 return; 1792 if (lo->plh_return_iomode != 0) 1793 iomode = IOMODE_ANY; 1794 lo->plh_return_iomode = iomode; 1795 set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); 1796 if (!lo->plh_return_seq || pnfs_seqid_is_newer(seq, lo->plh_return_seq)) 1797 lo->plh_return_seq = seq; 1798 } 1799 1800 /** 1801 * pnfs_mark_matching_lsegs_return - Free or return matching layout segments 1802 * @lo: pointer to layout header 1803 * @tmp_list: list header to be used with pnfs_free_lseg_list() 1804 * @return_range: describe layout segment ranges to be returned 1805 * 1806 * This function is mainly intended for use by layoutrecall. It attempts 1807 * to free the layout segment immediately, or else to mark it for return 1808 * as soon as its reference count drops to zero. 1809 */ 1810 int 1811 pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, 1812 struct list_head *tmp_list, 1813 const struct pnfs_layout_range *return_range, 1814 u32 seq) 1815 { 1816 struct pnfs_layout_segment *lseg, *next; 1817 int remaining = 0; 1818 1819 dprintk("%s:Begin lo %p\n", __func__, lo); 1820 1821 if (list_empty(&lo->plh_segs)) 1822 return 0; 1823 1824 assert_spin_locked(&lo->plh_inode->i_lock); 1825 1826 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) 1827 if (should_free_lseg(&lseg->pls_range, return_range)) { 1828 dprintk("%s: marking lseg %p iomode %d " 1829 "offset %llu length %llu\n", __func__, 1830 lseg, lseg->pls_range.iomode, 1831 lseg->pls_range.offset, 1832 lseg->pls_range.length); 1833 if (mark_lseg_invalid(lseg, tmp_list)) 1834 continue; 1835 remaining++; 1836 set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); 1837 } 1838 1839 if (remaining) 1840 pnfs_set_plh_return_info(lo, return_range->iomode, seq); 1841 1842 return remaining; 1843 } 1844 1845 void pnfs_error_mark_layout_for_return(struct inode *inode, 1846 struct pnfs_layout_segment *lseg) 1847 { 1848 struct pnfs_layout_hdr *lo = NFS_I(inode)->layout; 1849 struct pnfs_layout_range range = { 1850 .iomode = lseg->pls_range.iomode, 1851 .offset = 0, 1852 .length = NFS4_MAX_UINT64, 1853 }; 1854 LIST_HEAD(free_me); 1855 bool return_now = false; 1856 1857 spin_lock(&inode->i_lock); 1858 pnfs_set_plh_return_info(lo, range.iomode, lseg->pls_seq); 1859 /* 1860 * mark all matching lsegs so that we are sure to have no live 1861 * segments at hand when sending layoutreturn. See pnfs_put_lseg() 1862 * for how it works. 1863 */ 1864 if (!pnfs_mark_matching_lsegs_return(lo, &free_me, 1865 &range, lseg->pls_seq)) { 1866 nfs4_stateid stateid; 1867 enum pnfs_iomode iomode = lo->plh_return_iomode; 1868 1869 nfs4_stateid_copy(&stateid, &lo->plh_stateid); 1870 return_now = pnfs_prepare_layoutreturn(lo); 1871 spin_unlock(&inode->i_lock); 1872 if (return_now) 1873 pnfs_send_layoutreturn(lo, &stateid, iomode, false); 1874 } else { 1875 spin_unlock(&inode->i_lock); 1876 nfs_commit_inode(inode, 0); 1877 } 1878 pnfs_free_lseg_list(&free_me); 1879 } 1880 EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return); 1881 1882 void 1883 pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) 1884 { 1885 u64 rd_size = req->wb_bytes; 1886 1887 if (pgio->pg_lseg == NULL) { 1888 if (pgio->pg_dreq == NULL) 1889 rd_size = i_size_read(pgio->pg_inode) - req_offset(req); 1890 else 1891 rd_size = nfs_dreq_bytes_left(pgio->pg_dreq); 1892 1893 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1894 req->wb_context, 1895 req_offset(req), 1896 rd_size, 1897 IOMODE_READ, 1898 false, 1899 GFP_KERNEL); 1900 if (IS_ERR(pgio->pg_lseg)) { 1901 pgio->pg_error = PTR_ERR(pgio->pg_lseg); 1902 pgio->pg_lseg = NULL; 1903 return; 1904 } 1905 } 1906 /* If no lseg, fall back to read through mds */ 1907 if (pgio->pg_lseg == NULL) 1908 nfs_pageio_reset_read_mds(pgio); 1909 1910 } 1911 EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); 1912 1913 void 1914 pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, 1915 struct nfs_page *req, u64 wb_size) 1916 { 1917 if (pgio->pg_lseg == NULL) { 1918 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1919 req->wb_context, 1920 req_offset(req), 1921 wb_size, 1922 IOMODE_RW, 1923 false, 1924 GFP_NOFS); 1925 if (IS_ERR(pgio->pg_lseg)) { 1926 pgio->pg_error = PTR_ERR(pgio->pg_lseg); 1927 pgio->pg_lseg = NULL; 1928 return; 1929 } 1930 } 1931 /* If no lseg, fall back to write through mds */ 1932 if (pgio->pg_lseg == NULL) 1933 nfs_pageio_reset_write_mds(pgio); 1934 } 1935 EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); 1936 1937 void 1938 pnfs_generic_pg_cleanup(struct nfs_pageio_descriptor *desc) 1939 { 1940 if (desc->pg_lseg) { 1941 pnfs_put_lseg(desc->pg_lseg); 1942 desc->pg_lseg = NULL; 1943 } 1944 } 1945 EXPORT_SYMBOL_GPL(pnfs_generic_pg_cleanup); 1946 1947 /* 1948 * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number 1949 * of bytes (maximum @req->wb_bytes) that can be coalesced. 1950 */ 1951 size_t 1952 pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, 1953 struct nfs_page *prev, struct nfs_page *req) 1954 { 1955 unsigned int size; 1956 u64 seg_end, req_start, seg_left; 1957 1958 size = nfs_generic_pg_test(pgio, prev, req); 1959 if (!size) 1960 return 0; 1961 1962 /* 1963 * 'size' contains the number of bytes left in the current page (up 1964 * to the original size asked for in @req->wb_bytes). 1965 * 1966 * Calculate how many bytes are left in the layout segment 1967 * and if there are less bytes than 'size', return that instead. 1968 * 1969 * Please also note that 'end_offset' is actually the offset of the 1970 * first byte that lies outside the pnfs_layout_range. FIXME? 1971 * 1972 */ 1973 if (pgio->pg_lseg) { 1974 seg_end = end_offset(pgio->pg_lseg->pls_range.offset, 1975 pgio->pg_lseg->pls_range.length); 1976 req_start = req_offset(req); 1977 WARN_ON_ONCE(req_start >= seg_end); 1978 /* start of request is past the last byte of this segment */ 1979 if (req_start >= seg_end) { 1980 /* reference the new lseg */ 1981 if (pgio->pg_ops->pg_cleanup) 1982 pgio->pg_ops->pg_cleanup(pgio); 1983 if (pgio->pg_ops->pg_init) 1984 pgio->pg_ops->pg_init(pgio, req); 1985 return 0; 1986 } 1987 1988 /* adjust 'size' iff there are fewer bytes left in the 1989 * segment than what nfs_generic_pg_test returned */ 1990 seg_left = seg_end - req_start; 1991 if (seg_left < size) 1992 size = (unsigned int)seg_left; 1993 } 1994 1995 return size; 1996 } 1997 EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); 1998 1999 int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr) 2000 { 2001 struct nfs_pageio_descriptor pgio; 2002 2003 /* Resend all requests through the MDS */ 2004 nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true, 2005 hdr->completion_ops); 2006 set_bit(NFS_CONTEXT_RESEND_WRITES, &hdr->args.context->flags); 2007 return nfs_pageio_resend(&pgio, hdr); 2008 } 2009 EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); 2010 2011 static void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr) 2012 { 2013 2014 dprintk("pnfs write error = %d\n", hdr->pnfs_error); 2015 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & 2016 PNFS_LAYOUTRET_ON_ERROR) { 2017 pnfs_return_layout(hdr->inode); 2018 } 2019 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) 2020 hdr->task.tk_status = pnfs_write_done_resend_to_mds(hdr); 2021 } 2022 2023 /* 2024 * Called by non rpc-based layout drivers 2025 */ 2026 void pnfs_ld_write_done(struct nfs_pgio_header *hdr) 2027 { 2028 if (likely(!hdr->pnfs_error)) { 2029 pnfs_set_layoutcommit(hdr->inode, hdr->lseg, 2030 hdr->mds_offset + hdr->res.count); 2031 hdr->mds_ops->rpc_call_done(&hdr->task, hdr); 2032 } 2033 trace_nfs4_pnfs_write(hdr, hdr->pnfs_error); 2034 if (unlikely(hdr->pnfs_error)) 2035 pnfs_ld_handle_write_error(hdr); 2036 hdr->mds_ops->rpc_release(hdr); 2037 } 2038 EXPORT_SYMBOL_GPL(pnfs_ld_write_done); 2039 2040 static void 2041 pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, 2042 struct nfs_pgio_header *hdr) 2043 { 2044 struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); 2045 2046 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 2047 list_splice_tail_init(&hdr->pages, &mirror->pg_list); 2048 nfs_pageio_reset_write_mds(desc); 2049 mirror->pg_recoalesce = 1; 2050 } 2051 nfs_pgio_data_destroy(hdr); 2052 hdr->release(hdr); 2053 } 2054 2055 static enum pnfs_try_status 2056 pnfs_try_to_write_data(struct nfs_pgio_header *hdr, 2057 const struct rpc_call_ops *call_ops, 2058 struct pnfs_layout_segment *lseg, 2059 int how) 2060 { 2061 struct inode *inode = hdr->inode; 2062 enum pnfs_try_status trypnfs; 2063 struct nfs_server *nfss = NFS_SERVER(inode); 2064 2065 hdr->mds_ops = call_ops; 2066 2067 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, 2068 inode->i_ino, hdr->args.count, hdr->args.offset, how); 2069 trypnfs = nfss->pnfs_curr_ld->write_pagelist(hdr, how); 2070 if (trypnfs != PNFS_NOT_ATTEMPTED) 2071 nfs_inc_stats(inode, NFSIOS_PNFS_WRITE); 2072 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 2073 return trypnfs; 2074 } 2075 2076 static void 2077 pnfs_do_write(struct nfs_pageio_descriptor *desc, 2078 struct nfs_pgio_header *hdr, int how) 2079 { 2080 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; 2081 struct pnfs_layout_segment *lseg = desc->pg_lseg; 2082 enum pnfs_try_status trypnfs; 2083 2084 trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how); 2085 if (trypnfs == PNFS_NOT_ATTEMPTED) 2086 pnfs_write_through_mds(desc, hdr); 2087 } 2088 2089 static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) 2090 { 2091 pnfs_put_lseg(hdr->lseg); 2092 nfs_pgio_header_free(hdr); 2093 } 2094 2095 int 2096 pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) 2097 { 2098 struct nfs_pgio_header *hdr; 2099 int ret; 2100 2101 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); 2102 if (!hdr) { 2103 desc->pg_error = -ENOMEM; 2104 return desc->pg_error; 2105 } 2106 nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); 2107 2108 hdr->lseg = pnfs_get_lseg(desc->pg_lseg); 2109 ret = nfs_generic_pgio(desc, hdr); 2110 if (!ret) 2111 pnfs_do_write(desc, hdr, desc->pg_ioflags); 2112 2113 return ret; 2114 } 2115 EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); 2116 2117 int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *hdr) 2118 { 2119 struct nfs_pageio_descriptor pgio; 2120 2121 /* Resend all requests through the MDS */ 2122 nfs_pageio_init_read(&pgio, hdr->inode, true, hdr->completion_ops); 2123 return nfs_pageio_resend(&pgio, hdr); 2124 } 2125 EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds); 2126 2127 static void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr) 2128 { 2129 dprintk("pnfs read error = %d\n", hdr->pnfs_error); 2130 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & 2131 PNFS_LAYOUTRET_ON_ERROR) { 2132 pnfs_return_layout(hdr->inode); 2133 } 2134 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) 2135 hdr->task.tk_status = pnfs_read_done_resend_to_mds(hdr); 2136 } 2137 2138 /* 2139 * Called by non rpc-based layout drivers 2140 */ 2141 void pnfs_ld_read_done(struct nfs_pgio_header *hdr) 2142 { 2143 if (likely(!hdr->pnfs_error)) { 2144 __nfs4_read_done_cb(hdr); 2145 hdr->mds_ops->rpc_call_done(&hdr->task, hdr); 2146 } 2147 trace_nfs4_pnfs_read(hdr, hdr->pnfs_error); 2148 if (unlikely(hdr->pnfs_error)) 2149 pnfs_ld_handle_read_error(hdr); 2150 hdr->mds_ops->rpc_release(hdr); 2151 } 2152 EXPORT_SYMBOL_GPL(pnfs_ld_read_done); 2153 2154 static void 2155 pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, 2156 struct nfs_pgio_header *hdr) 2157 { 2158 struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); 2159 2160 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 2161 list_splice_tail_init(&hdr->pages, &mirror->pg_list); 2162 nfs_pageio_reset_read_mds(desc); 2163 mirror->pg_recoalesce = 1; 2164 } 2165 nfs_pgio_data_destroy(hdr); 2166 hdr->release(hdr); 2167 } 2168 2169 /* 2170 * Call the appropriate parallel I/O subsystem read function. 2171 */ 2172 static enum pnfs_try_status 2173 pnfs_try_to_read_data(struct nfs_pgio_header *hdr, 2174 const struct rpc_call_ops *call_ops, 2175 struct pnfs_layout_segment *lseg) 2176 { 2177 struct inode *inode = hdr->inode; 2178 struct nfs_server *nfss = NFS_SERVER(inode); 2179 enum pnfs_try_status trypnfs; 2180 2181 hdr->mds_ops = call_ops; 2182 2183 dprintk("%s: Reading ino:%lu %u@%llu\n", 2184 __func__, inode->i_ino, hdr->args.count, hdr->args.offset); 2185 2186 trypnfs = nfss->pnfs_curr_ld->read_pagelist(hdr); 2187 if (trypnfs != PNFS_NOT_ATTEMPTED) 2188 nfs_inc_stats(inode, NFSIOS_PNFS_READ); 2189 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 2190 return trypnfs; 2191 } 2192 2193 /* Resend all requests through pnfs. */ 2194 void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr) 2195 { 2196 struct nfs_pageio_descriptor pgio; 2197 2198 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 2199 nfs_pageio_init_read(&pgio, hdr->inode, false, 2200 hdr->completion_ops); 2201 hdr->task.tk_status = nfs_pageio_resend(&pgio, hdr); 2202 } 2203 } 2204 EXPORT_SYMBOL_GPL(pnfs_read_resend_pnfs); 2205 2206 static void 2207 pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) 2208 { 2209 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; 2210 struct pnfs_layout_segment *lseg = desc->pg_lseg; 2211 enum pnfs_try_status trypnfs; 2212 2213 trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg); 2214 if (trypnfs == PNFS_TRY_AGAIN) 2215 pnfs_read_resend_pnfs(hdr); 2216 if (trypnfs == PNFS_NOT_ATTEMPTED || hdr->task.tk_status) 2217 pnfs_read_through_mds(desc, hdr); 2218 } 2219 2220 static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) 2221 { 2222 pnfs_put_lseg(hdr->lseg); 2223 nfs_pgio_header_free(hdr); 2224 } 2225 2226 int 2227 pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 2228 { 2229 struct nfs_pgio_header *hdr; 2230 int ret; 2231 2232 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); 2233 if (!hdr) { 2234 desc->pg_error = -ENOMEM; 2235 return desc->pg_error; 2236 } 2237 nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); 2238 hdr->lseg = pnfs_get_lseg(desc->pg_lseg); 2239 ret = nfs_generic_pgio(desc, hdr); 2240 if (!ret) 2241 pnfs_do_read(desc, hdr); 2242 return ret; 2243 } 2244 EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); 2245 2246 static void pnfs_clear_layoutcommitting(struct inode *inode) 2247 { 2248 unsigned long *bitlock = &NFS_I(inode)->flags; 2249 2250 clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); 2251 smp_mb__after_atomic(); 2252 wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING); 2253 } 2254 2255 /* 2256 * There can be multiple RW segments. 2257 */ 2258 static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp) 2259 { 2260 struct pnfs_layout_segment *lseg; 2261 2262 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { 2263 if (lseg->pls_range.iomode == IOMODE_RW && 2264 test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) 2265 list_add(&lseg->pls_lc_list, listp); 2266 } 2267 } 2268 2269 static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp) 2270 { 2271 struct pnfs_layout_segment *lseg, *tmp; 2272 2273 /* Matched by references in pnfs_set_layoutcommit */ 2274 list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) { 2275 list_del_init(&lseg->pls_lc_list); 2276 pnfs_put_lseg(lseg); 2277 } 2278 2279 pnfs_clear_layoutcommitting(inode); 2280 } 2281 2282 void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) 2283 { 2284 pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode); 2285 } 2286 EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); 2287 2288 void 2289 pnfs_set_layoutcommit(struct inode *inode, struct pnfs_layout_segment *lseg, 2290 loff_t end_pos) 2291 { 2292 struct nfs_inode *nfsi = NFS_I(inode); 2293 bool mark_as_dirty = false; 2294 2295 spin_lock(&inode->i_lock); 2296 if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { 2297 nfsi->layout->plh_lwb = end_pos; 2298 mark_as_dirty = true; 2299 dprintk("%s: Set layoutcommit for inode %lu ", 2300 __func__, inode->i_ino); 2301 } else if (end_pos > nfsi->layout->plh_lwb) 2302 nfsi->layout->plh_lwb = end_pos; 2303 if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) { 2304 /* references matched in nfs4_layoutcommit_release */ 2305 pnfs_get_lseg(lseg); 2306 } 2307 spin_unlock(&inode->i_lock); 2308 dprintk("%s: lseg %p end_pos %llu\n", 2309 __func__, lseg, nfsi->layout->plh_lwb); 2310 2311 /* if pnfs_layoutcommit_inode() runs between inode locks, the next one 2312 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ 2313 if (mark_as_dirty) 2314 mark_inode_dirty_sync(inode); 2315 } 2316 EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); 2317 2318 void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) 2319 { 2320 struct nfs_server *nfss = NFS_SERVER(data->args.inode); 2321 2322 if (nfss->pnfs_curr_ld->cleanup_layoutcommit) 2323 nfss->pnfs_curr_ld->cleanup_layoutcommit(data); 2324 pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list); 2325 } 2326 2327 /* 2328 * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and 2329 * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough 2330 * data to disk to allow the server to recover the data if it crashes. 2331 * LAYOUTCOMMIT is only needed when the NFL4_UFLG_COMMIT_THRU_MDS flag 2332 * is off, and a COMMIT is sent to a data server, or 2333 * if WRITEs to a data server return NFS_DATA_SYNC. 2334 */ 2335 int 2336 pnfs_layoutcommit_inode(struct inode *inode, bool sync) 2337 { 2338 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 2339 struct nfs4_layoutcommit_data *data; 2340 struct nfs_inode *nfsi = NFS_I(inode); 2341 loff_t end_pos; 2342 int status; 2343 2344 if (!pnfs_layoutcommit_outstanding(inode)) 2345 return 0; 2346 2347 dprintk("--> %s inode %lu\n", __func__, inode->i_ino); 2348 2349 status = -EAGAIN; 2350 if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) { 2351 if (!sync) 2352 goto out; 2353 status = wait_on_bit_lock_action(&nfsi->flags, 2354 NFS_INO_LAYOUTCOMMITTING, 2355 nfs_wait_bit_killable, 2356 TASK_KILLABLE); 2357 if (status) 2358 goto out; 2359 } 2360 2361 status = -ENOMEM; 2362 /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */ 2363 data = kzalloc(sizeof(*data), GFP_NOFS); 2364 if (!data) 2365 goto clear_layoutcommitting; 2366 2367 status = 0; 2368 spin_lock(&inode->i_lock); 2369 if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) 2370 goto out_unlock; 2371 2372 INIT_LIST_HEAD(&data->lseg_list); 2373 pnfs_list_write_lseg(inode, &data->lseg_list); 2374 2375 end_pos = nfsi->layout->plh_lwb; 2376 2377 nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid); 2378 spin_unlock(&inode->i_lock); 2379 2380 data->args.inode = inode; 2381 data->cred = get_rpccred(nfsi->layout->plh_lc_cred); 2382 nfs_fattr_init(&data->fattr); 2383 data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask; 2384 data->res.fattr = &data->fattr; 2385 data->args.lastbytewritten = end_pos - 1; 2386 data->res.server = NFS_SERVER(inode); 2387 2388 if (ld->prepare_layoutcommit) { 2389 status = ld->prepare_layoutcommit(&data->args); 2390 if (status) { 2391 put_rpccred(data->cred); 2392 spin_lock(&inode->i_lock); 2393 set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags); 2394 if (end_pos > nfsi->layout->plh_lwb) 2395 nfsi->layout->plh_lwb = end_pos; 2396 goto out_unlock; 2397 } 2398 } 2399 2400 2401 status = nfs4_proc_layoutcommit(data, sync); 2402 out: 2403 if (status) 2404 mark_inode_dirty_sync(inode); 2405 dprintk("<-- %s status %d\n", __func__, status); 2406 return status; 2407 out_unlock: 2408 spin_unlock(&inode->i_lock); 2409 kfree(data); 2410 clear_layoutcommitting: 2411 pnfs_clear_layoutcommitting(inode); 2412 goto out; 2413 } 2414 EXPORT_SYMBOL_GPL(pnfs_layoutcommit_inode); 2415 2416 int 2417 pnfs_generic_sync(struct inode *inode, bool datasync) 2418 { 2419 return pnfs_layoutcommit_inode(inode, true); 2420 } 2421 EXPORT_SYMBOL_GPL(pnfs_generic_sync); 2422 2423 struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) 2424 { 2425 struct nfs4_threshold *thp; 2426 2427 thp = kzalloc(sizeof(*thp), GFP_NOFS); 2428 if (!thp) { 2429 dprintk("%s mdsthreshold allocation failed\n", __func__); 2430 return NULL; 2431 } 2432 return thp; 2433 } 2434 2435 #if IS_ENABLED(CONFIG_NFS_V4_2) 2436 int 2437 pnfs_report_layoutstat(struct inode *inode, gfp_t gfp_flags) 2438 { 2439 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 2440 struct nfs_server *server = NFS_SERVER(inode); 2441 struct nfs_inode *nfsi = NFS_I(inode); 2442 struct nfs42_layoutstat_data *data; 2443 struct pnfs_layout_hdr *hdr; 2444 int status = 0; 2445 2446 if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats) 2447 goto out; 2448 2449 if (!nfs_server_capable(inode, NFS_CAP_LAYOUTSTATS)) 2450 goto out; 2451 2452 if (test_and_set_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags)) 2453 goto out; 2454 2455 spin_lock(&inode->i_lock); 2456 if (!NFS_I(inode)->layout) { 2457 spin_unlock(&inode->i_lock); 2458 goto out_clear_layoutstats; 2459 } 2460 hdr = NFS_I(inode)->layout; 2461 pnfs_get_layout_hdr(hdr); 2462 spin_unlock(&inode->i_lock); 2463 2464 data = kzalloc(sizeof(*data), gfp_flags); 2465 if (!data) { 2466 status = -ENOMEM; 2467 goto out_put; 2468 } 2469 2470 data->args.fh = NFS_FH(inode); 2471 data->args.inode = inode; 2472 nfs4_stateid_copy(&data->args.stateid, &hdr->plh_stateid); 2473 status = ld->prepare_layoutstats(&data->args); 2474 if (status) 2475 goto out_free; 2476 2477 status = nfs42_proc_layoutstats_generic(NFS_SERVER(inode), data); 2478 2479 out: 2480 dprintk("%s returns %d\n", __func__, status); 2481 return status; 2482 2483 out_free: 2484 kfree(data); 2485 out_put: 2486 pnfs_put_layout_hdr(hdr); 2487 out_clear_layoutstats: 2488 smp_mb__before_atomic(); 2489 clear_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags); 2490 smp_mb__after_atomic(); 2491 goto out; 2492 } 2493 EXPORT_SYMBOL_GPL(pnfs_report_layoutstat); 2494 #endif 2495 2496 unsigned int layoutstats_timer; 2497 module_param(layoutstats_timer, uint, 0644); 2498 EXPORT_SYMBOL_GPL(layoutstats_timer); 2499