1 /* 2 * pNFS functions to call and manage layout drivers. 3 * 4 * Copyright (c) 2002 [year of first publication] 5 * The Regents of the University of Michigan 6 * All Rights Reserved 7 * 8 * Dean Hildebrand <dhildebz@umich.edu> 9 * 10 * Permission is granted to use, copy, create derivative works, and 11 * redistribute this software and such derivative works for any purpose, 12 * so long as the name of the University of Michigan is not used in 13 * any advertising or publicity pertaining to the use or distribution 14 * of this software without specific, written prior authorization. If 15 * the above copyright notice or any other identification of the 16 * University of Michigan is included in any copy of any portion of 17 * this software, then the disclaimer below must also be included. 18 * 19 * This software is provided as is, without representation or warranty 20 * of any kind either express or implied, including without limitation 21 * the implied warranties of merchantability, fitness for a particular 22 * purpose, or noninfringement. The Regents of the University of 23 * Michigan shall not be liable for any damages, including special, 24 * indirect, incidental, or consequential damages, with respect to any 25 * claim arising out of or in connection with the use of the software, 26 * even if it has been or is hereafter advised of the possibility of 27 * such damages. 28 */ 29 30 #include <linux/nfs_fs.h> 31 #include <linux/nfs_page.h> 32 #include <linux/module.h> 33 #include "internal.h" 34 #include "pnfs.h" 35 #include "iostat.h" 36 #include "nfs4trace.h" 37 #include "delegation.h" 38 #include "nfs42.h" 39 40 #define NFSDBG_FACILITY NFSDBG_PNFS 41 #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) 42 43 /* Locking: 44 * 45 * pnfs_spinlock: 46 * protects pnfs_modules_tbl. 47 */ 48 static DEFINE_SPINLOCK(pnfs_spinlock); 49 50 /* 51 * pnfs_modules_tbl holds all pnfs modules 52 */ 53 static LIST_HEAD(pnfs_modules_tbl); 54 55 static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo); 56 57 /* Return the registered pnfs layout driver module matching given id */ 58 static struct pnfs_layoutdriver_type * 59 find_pnfs_driver_locked(u32 id) 60 { 61 struct pnfs_layoutdriver_type *local; 62 63 list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid) 64 if (local->id == id) 65 goto out; 66 local = NULL; 67 out: 68 dprintk("%s: Searching for id %u, found %p\n", __func__, id, local); 69 return local; 70 } 71 72 static struct pnfs_layoutdriver_type * 73 find_pnfs_driver(u32 id) 74 { 75 struct pnfs_layoutdriver_type *local; 76 77 spin_lock(&pnfs_spinlock); 78 local = find_pnfs_driver_locked(id); 79 if (local != NULL && !try_module_get(local->owner)) { 80 dprintk("%s: Could not grab reference on module\n", __func__); 81 local = NULL; 82 } 83 spin_unlock(&pnfs_spinlock); 84 return local; 85 } 86 87 void 88 unset_pnfs_layoutdriver(struct nfs_server *nfss) 89 { 90 if (nfss->pnfs_curr_ld) { 91 if (nfss->pnfs_curr_ld->clear_layoutdriver) 92 nfss->pnfs_curr_ld->clear_layoutdriver(nfss); 93 /* Decrement the MDS count. Purge the deviceid cache if zero */ 94 if (atomic_dec_and_test(&nfss->nfs_client->cl_mds_count)) 95 nfs4_deviceid_purge_client(nfss->nfs_client); 96 module_put(nfss->pnfs_curr_ld->owner); 97 } 98 nfss->pnfs_curr_ld = NULL; 99 } 100 101 /* 102 * Try to set the server's pnfs module to the pnfs layout type specified by id. 103 * Currently only one pNFS layout driver per filesystem is supported. 104 * 105 * @id layout type. Zero (illegal layout type) indicates pNFS not in use. 106 */ 107 void 108 set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh, 109 u32 id) 110 { 111 struct pnfs_layoutdriver_type *ld_type = NULL; 112 113 if (id == 0) 114 goto out_no_driver; 115 if (!(server->nfs_client->cl_exchange_flags & 116 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) { 117 printk(KERN_ERR "NFS: %s: id %u cl_exchange_flags 0x%x\n", 118 __func__, id, server->nfs_client->cl_exchange_flags); 119 goto out_no_driver; 120 } 121 ld_type = find_pnfs_driver(id); 122 if (!ld_type) { 123 request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id); 124 ld_type = find_pnfs_driver(id); 125 if (!ld_type) { 126 dprintk("%s: No pNFS module found for %u.\n", 127 __func__, id); 128 goto out_no_driver; 129 } 130 } 131 server->pnfs_curr_ld = ld_type; 132 if (ld_type->set_layoutdriver 133 && ld_type->set_layoutdriver(server, mntfh)) { 134 printk(KERN_ERR "NFS: %s: Error initializing pNFS layout " 135 "driver %u.\n", __func__, id); 136 module_put(ld_type->owner); 137 goto out_no_driver; 138 } 139 /* Bump the MDS count */ 140 atomic_inc(&server->nfs_client->cl_mds_count); 141 142 dprintk("%s: pNFS module for %u set\n", __func__, id); 143 return; 144 145 out_no_driver: 146 dprintk("%s: Using NFSv4 I/O\n", __func__); 147 server->pnfs_curr_ld = NULL; 148 } 149 150 int 151 pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type) 152 { 153 int status = -EINVAL; 154 struct pnfs_layoutdriver_type *tmp; 155 156 if (ld_type->id == 0) { 157 printk(KERN_ERR "NFS: %s id 0 is reserved\n", __func__); 158 return status; 159 } 160 if (!ld_type->alloc_lseg || !ld_type->free_lseg) { 161 printk(KERN_ERR "NFS: %s Layout driver must provide " 162 "alloc_lseg and free_lseg.\n", __func__); 163 return status; 164 } 165 166 spin_lock(&pnfs_spinlock); 167 tmp = find_pnfs_driver_locked(ld_type->id); 168 if (!tmp) { 169 list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl); 170 status = 0; 171 dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id, 172 ld_type->name); 173 } else { 174 printk(KERN_ERR "NFS: %s Module with id %d already loaded!\n", 175 __func__, ld_type->id); 176 } 177 spin_unlock(&pnfs_spinlock); 178 179 return status; 180 } 181 EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver); 182 183 void 184 pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type) 185 { 186 dprintk("%s Deregistering id:%u\n", __func__, ld_type->id); 187 spin_lock(&pnfs_spinlock); 188 list_del(&ld_type->pnfs_tblid); 189 spin_unlock(&pnfs_spinlock); 190 } 191 EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); 192 193 /* 194 * pNFS client layout cache 195 */ 196 197 /* Need to hold i_lock if caller does not already hold reference */ 198 void 199 pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo) 200 { 201 atomic_inc(&lo->plh_refcount); 202 } 203 204 static struct pnfs_layout_hdr * 205 pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags) 206 { 207 struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; 208 return ld->alloc_layout_hdr(ino, gfp_flags); 209 } 210 211 static void 212 pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo) 213 { 214 struct nfs_server *server = NFS_SERVER(lo->plh_inode); 215 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; 216 217 if (!list_empty(&lo->plh_layouts)) { 218 struct nfs_client *clp = server->nfs_client; 219 220 spin_lock(&clp->cl_lock); 221 list_del_init(&lo->plh_layouts); 222 spin_unlock(&clp->cl_lock); 223 } 224 put_rpccred(lo->plh_lc_cred); 225 return ld->free_layout_hdr(lo); 226 } 227 228 static void 229 pnfs_detach_layout_hdr(struct pnfs_layout_hdr *lo) 230 { 231 struct nfs_inode *nfsi = NFS_I(lo->plh_inode); 232 dprintk("%s: freeing layout cache %p\n", __func__, lo); 233 nfsi->layout = NULL; 234 /* Reset MDS Threshold I/O counters */ 235 nfsi->write_io = 0; 236 nfsi->read_io = 0; 237 } 238 239 void 240 pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) 241 { 242 struct inode *inode = lo->plh_inode; 243 244 pnfs_layoutreturn_before_put_layout_hdr(lo); 245 246 if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { 247 if (!list_empty(&lo->plh_segs)) 248 WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n"); 249 pnfs_detach_layout_hdr(lo); 250 spin_unlock(&inode->i_lock); 251 pnfs_free_layout_hdr(lo); 252 } 253 } 254 255 /* 256 * Mark a pnfs_layout_hdr and all associated layout segments as invalid 257 * 258 * In order to continue using the pnfs_layout_hdr, a full recovery 259 * is required. 260 * Note that caller must hold inode->i_lock. 261 */ 262 static int 263 pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, 264 struct list_head *lseg_list) 265 { 266 struct pnfs_layout_range range = { 267 .iomode = IOMODE_ANY, 268 .offset = 0, 269 .length = NFS4_MAX_UINT64, 270 }; 271 272 set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); 273 return pnfs_mark_matching_lsegs_invalid(lo, lseg_list, &range, 0); 274 } 275 276 static int 277 pnfs_iomode_to_fail_bit(u32 iomode) 278 { 279 return iomode == IOMODE_RW ? 280 NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED; 281 } 282 283 static void 284 pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) 285 { 286 lo->plh_retry_timestamp = jiffies; 287 if (!test_and_set_bit(fail_bit, &lo->plh_flags)) 288 atomic_inc(&lo->plh_refcount); 289 } 290 291 static void 292 pnfs_layout_clear_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) 293 { 294 if (test_and_clear_bit(fail_bit, &lo->plh_flags)) 295 atomic_dec(&lo->plh_refcount); 296 } 297 298 static void 299 pnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode) 300 { 301 struct inode *inode = lo->plh_inode; 302 struct pnfs_layout_range range = { 303 .iomode = iomode, 304 .offset = 0, 305 .length = NFS4_MAX_UINT64, 306 }; 307 LIST_HEAD(head); 308 309 spin_lock(&inode->i_lock); 310 pnfs_layout_set_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); 311 pnfs_mark_matching_lsegs_invalid(lo, &head, &range, 0); 312 spin_unlock(&inode->i_lock); 313 pnfs_free_lseg_list(&head); 314 dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__, 315 iomode == IOMODE_RW ? "RW" : "READ"); 316 } 317 318 static bool 319 pnfs_layout_io_test_failed(struct pnfs_layout_hdr *lo, u32 iomode) 320 { 321 unsigned long start, end; 322 int fail_bit = pnfs_iomode_to_fail_bit(iomode); 323 324 if (test_bit(fail_bit, &lo->plh_flags) == 0) 325 return false; 326 end = jiffies; 327 start = end - PNFS_LAYOUTGET_RETRY_TIMEOUT; 328 if (!time_in_range(lo->plh_retry_timestamp, start, end)) { 329 /* It is time to retry the failed layoutgets */ 330 pnfs_layout_clear_fail_bit(lo, fail_bit); 331 return false; 332 } 333 return true; 334 } 335 336 static void 337 init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) 338 { 339 INIT_LIST_HEAD(&lseg->pls_list); 340 INIT_LIST_HEAD(&lseg->pls_lc_list); 341 atomic_set(&lseg->pls_refcount, 1); 342 smp_mb(); 343 set_bit(NFS_LSEG_VALID, &lseg->pls_flags); 344 lseg->pls_layout = lo; 345 } 346 347 static void pnfs_free_lseg(struct pnfs_layout_segment *lseg) 348 { 349 struct inode *ino = lseg->pls_layout->plh_inode; 350 351 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); 352 } 353 354 static void 355 pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, 356 struct pnfs_layout_segment *lseg) 357 { 358 struct inode *inode = lo->plh_inode; 359 360 WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 361 list_del_init(&lseg->pls_list); 362 /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ 363 atomic_dec(&lo->plh_refcount); 364 if (list_empty(&lo->plh_segs)) 365 clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); 366 rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); 367 } 368 369 void 370 pnfs_put_lseg(struct pnfs_layout_segment *lseg) 371 { 372 struct pnfs_layout_hdr *lo; 373 struct inode *inode; 374 375 if (!lseg) 376 return; 377 378 dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, 379 atomic_read(&lseg->pls_refcount), 380 test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 381 382 lo = lseg->pls_layout; 383 inode = lo->plh_inode; 384 385 if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { 386 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { 387 spin_unlock(&inode->i_lock); 388 return; 389 } 390 pnfs_get_layout_hdr(lo); 391 pnfs_layout_remove_lseg(lo, lseg); 392 spin_unlock(&inode->i_lock); 393 pnfs_free_lseg(lseg); 394 pnfs_put_layout_hdr(lo); 395 } 396 } 397 EXPORT_SYMBOL_GPL(pnfs_put_lseg); 398 399 static void pnfs_free_lseg_async_work(struct work_struct *work) 400 { 401 struct pnfs_layout_segment *lseg; 402 struct pnfs_layout_hdr *lo; 403 404 lseg = container_of(work, struct pnfs_layout_segment, pls_work); 405 lo = lseg->pls_layout; 406 407 pnfs_free_lseg(lseg); 408 pnfs_put_layout_hdr(lo); 409 } 410 411 static void pnfs_free_lseg_async(struct pnfs_layout_segment *lseg) 412 { 413 INIT_WORK(&lseg->pls_work, pnfs_free_lseg_async_work); 414 schedule_work(&lseg->pls_work); 415 } 416 417 void 418 pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg) 419 { 420 if (!lseg) 421 return; 422 423 assert_spin_locked(&lseg->pls_layout->plh_inode->i_lock); 424 425 dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, 426 atomic_read(&lseg->pls_refcount), 427 test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 428 if (atomic_dec_and_test(&lseg->pls_refcount)) { 429 struct pnfs_layout_hdr *lo = lseg->pls_layout; 430 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) 431 return; 432 pnfs_get_layout_hdr(lo); 433 pnfs_layout_remove_lseg(lo, lseg); 434 pnfs_free_lseg_async(lseg); 435 } 436 } 437 EXPORT_SYMBOL_GPL(pnfs_put_lseg_locked); 438 439 static u64 440 end_offset(u64 start, u64 len) 441 { 442 u64 end; 443 444 end = start + len; 445 return end >= start ? end : NFS4_MAX_UINT64; 446 } 447 448 /* 449 * is l2 fully contained in l1? 450 * start1 end1 451 * [----------------------------------) 452 * start2 end2 453 * [----------------) 454 */ 455 static bool 456 pnfs_lseg_range_contained(const struct pnfs_layout_range *l1, 457 const struct pnfs_layout_range *l2) 458 { 459 u64 start1 = l1->offset; 460 u64 end1 = end_offset(start1, l1->length); 461 u64 start2 = l2->offset; 462 u64 end2 = end_offset(start2, l2->length); 463 464 return (start1 <= start2) && (end1 >= end2); 465 } 466 467 /* 468 * is l1 and l2 intersecting? 469 * start1 end1 470 * [----------------------------------) 471 * start2 end2 472 * [----------------) 473 */ 474 static bool 475 pnfs_lseg_range_intersecting(const struct pnfs_layout_range *l1, 476 const struct pnfs_layout_range *l2) 477 { 478 u64 start1 = l1->offset; 479 u64 end1 = end_offset(start1, l1->length); 480 u64 start2 = l2->offset; 481 u64 end2 = end_offset(start2, l2->length); 482 483 return (end1 == NFS4_MAX_UINT64 || end1 > start2) && 484 (end2 == NFS4_MAX_UINT64 || end2 > start1); 485 } 486 487 static bool 488 should_free_lseg(const struct pnfs_layout_range *lseg_range, 489 const struct pnfs_layout_range *recall_range) 490 { 491 return (recall_range->iomode == IOMODE_ANY || 492 lseg_range->iomode == recall_range->iomode) && 493 pnfs_lseg_range_intersecting(lseg_range, recall_range); 494 } 495 496 static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, 497 struct list_head *tmp_list) 498 { 499 if (!atomic_dec_and_test(&lseg->pls_refcount)) 500 return false; 501 pnfs_layout_remove_lseg(lseg->pls_layout, lseg); 502 list_add(&lseg->pls_list, tmp_list); 503 return true; 504 } 505 506 /* Returns 1 if lseg is removed from list, 0 otherwise */ 507 static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, 508 struct list_head *tmp_list) 509 { 510 int rv = 0; 511 512 if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { 513 /* Remove the reference keeping the lseg in the 514 * list. It will now be removed when all 515 * outstanding io is finished. 516 */ 517 dprintk("%s: lseg %p ref %d\n", __func__, lseg, 518 atomic_read(&lseg->pls_refcount)); 519 if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list)) 520 rv = 1; 521 } 522 return rv; 523 } 524 525 /* 526 * Compare 2 layout stateid sequence ids, to see which is newer, 527 * taking into account wraparound issues. 528 */ 529 static bool pnfs_seqid_is_newer(u32 s1, u32 s2) 530 { 531 return (s32)(s1 - s2) > 0; 532 } 533 534 /** 535 * pnfs_mark_matching_lsegs_invalid - tear down lsegs or mark them for later 536 * @lo: layout header containing the lsegs 537 * @tmp_list: list head where doomed lsegs should go 538 * @recall_range: optional recall range argument to match (may be NULL) 539 * @seq: only invalidate lsegs obtained prior to this sequence (may be 0) 540 * 541 * Walk the list of lsegs in the layout header, and tear down any that should 542 * be destroyed. If "recall_range" is specified then the segment must match 543 * that range. If "seq" is non-zero, then only match segments that were handed 544 * out at or before that sequence. 545 * 546 * Returns number of matching invalid lsegs remaining in list after scanning 547 * it and purging them. 548 */ 549 int 550 pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, 551 struct list_head *tmp_list, 552 const struct pnfs_layout_range *recall_range, 553 u32 seq) 554 { 555 struct pnfs_layout_segment *lseg, *next; 556 int remaining = 0; 557 558 dprintk("%s:Begin lo %p\n", __func__, lo); 559 560 if (list_empty(&lo->plh_segs)) 561 return 0; 562 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) 563 if (!recall_range || 564 should_free_lseg(&lseg->pls_range, recall_range)) { 565 if (seq && pnfs_seqid_is_newer(lseg->pls_seq, seq)) 566 continue; 567 dprintk("%s: freeing lseg %p iomode %d seq %u" 568 "offset %llu length %llu\n", __func__, 569 lseg, lseg->pls_range.iomode, lseg->pls_seq, 570 lseg->pls_range.offset, lseg->pls_range.length); 571 if (!mark_lseg_invalid(lseg, tmp_list)) 572 remaining++; 573 } 574 dprintk("%s:Return %i\n", __func__, remaining); 575 return remaining; 576 } 577 578 /* note free_me must contain lsegs from a single layout_hdr */ 579 void 580 pnfs_free_lseg_list(struct list_head *free_me) 581 { 582 struct pnfs_layout_segment *lseg, *tmp; 583 584 if (list_empty(free_me)) 585 return; 586 587 list_for_each_entry_safe(lseg, tmp, free_me, pls_list) { 588 list_del(&lseg->pls_list); 589 pnfs_free_lseg(lseg); 590 } 591 } 592 593 void 594 pnfs_destroy_layout(struct nfs_inode *nfsi) 595 { 596 struct pnfs_layout_hdr *lo; 597 LIST_HEAD(tmp_list); 598 599 spin_lock(&nfsi->vfs_inode.i_lock); 600 lo = nfsi->layout; 601 if (lo) { 602 pnfs_get_layout_hdr(lo); 603 pnfs_mark_layout_stateid_invalid(lo, &tmp_list); 604 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); 605 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); 606 spin_unlock(&nfsi->vfs_inode.i_lock); 607 pnfs_free_lseg_list(&tmp_list); 608 pnfs_put_layout_hdr(lo); 609 } else 610 spin_unlock(&nfsi->vfs_inode.i_lock); 611 } 612 EXPORT_SYMBOL_GPL(pnfs_destroy_layout); 613 614 static bool 615 pnfs_layout_add_bulk_destroy_list(struct inode *inode, 616 struct list_head *layout_list) 617 { 618 struct pnfs_layout_hdr *lo; 619 bool ret = false; 620 621 spin_lock(&inode->i_lock); 622 lo = NFS_I(inode)->layout; 623 if (lo != NULL && list_empty(&lo->plh_bulk_destroy)) { 624 pnfs_get_layout_hdr(lo); 625 list_add(&lo->plh_bulk_destroy, layout_list); 626 ret = true; 627 } 628 spin_unlock(&inode->i_lock); 629 return ret; 630 } 631 632 /* Caller must hold rcu_read_lock and clp->cl_lock */ 633 static int 634 pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp, 635 struct nfs_server *server, 636 struct list_head *layout_list) 637 { 638 struct pnfs_layout_hdr *lo, *next; 639 struct inode *inode; 640 641 list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) { 642 inode = igrab(lo->plh_inode); 643 if (inode == NULL) 644 continue; 645 list_del_init(&lo->plh_layouts); 646 if (pnfs_layout_add_bulk_destroy_list(inode, layout_list)) 647 continue; 648 rcu_read_unlock(); 649 spin_unlock(&clp->cl_lock); 650 iput(inode); 651 spin_lock(&clp->cl_lock); 652 rcu_read_lock(); 653 return -EAGAIN; 654 } 655 return 0; 656 } 657 658 static int 659 pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, 660 bool is_bulk_recall) 661 { 662 struct pnfs_layout_hdr *lo; 663 struct inode *inode; 664 LIST_HEAD(lseg_list); 665 int ret = 0; 666 667 while (!list_empty(layout_list)) { 668 lo = list_entry(layout_list->next, struct pnfs_layout_hdr, 669 plh_bulk_destroy); 670 dprintk("%s freeing layout for inode %lu\n", __func__, 671 lo->plh_inode->i_ino); 672 inode = lo->plh_inode; 673 674 pnfs_layoutcommit_inode(inode, false); 675 676 spin_lock(&inode->i_lock); 677 list_del_init(&lo->plh_bulk_destroy); 678 if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) { 679 if (is_bulk_recall) 680 set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); 681 ret = -EAGAIN; 682 } 683 spin_unlock(&inode->i_lock); 684 pnfs_free_lseg_list(&lseg_list); 685 /* Free all lsegs that are attached to commit buckets */ 686 nfs_commit_inode(inode, 0); 687 pnfs_put_layout_hdr(lo); 688 iput(inode); 689 } 690 return ret; 691 } 692 693 int 694 pnfs_destroy_layouts_byfsid(struct nfs_client *clp, 695 struct nfs_fsid *fsid, 696 bool is_recall) 697 { 698 struct nfs_server *server; 699 LIST_HEAD(layout_list); 700 701 spin_lock(&clp->cl_lock); 702 rcu_read_lock(); 703 restart: 704 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 705 if (memcmp(&server->fsid, fsid, sizeof(*fsid)) != 0) 706 continue; 707 if (pnfs_layout_bulk_destroy_byserver_locked(clp, 708 server, 709 &layout_list) != 0) 710 goto restart; 711 } 712 rcu_read_unlock(); 713 spin_unlock(&clp->cl_lock); 714 715 if (list_empty(&layout_list)) 716 return 0; 717 return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); 718 } 719 720 int 721 pnfs_destroy_layouts_byclid(struct nfs_client *clp, 722 bool is_recall) 723 { 724 struct nfs_server *server; 725 LIST_HEAD(layout_list); 726 727 spin_lock(&clp->cl_lock); 728 rcu_read_lock(); 729 restart: 730 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 731 if (pnfs_layout_bulk_destroy_byserver_locked(clp, 732 server, 733 &layout_list) != 0) 734 goto restart; 735 } 736 rcu_read_unlock(); 737 spin_unlock(&clp->cl_lock); 738 739 if (list_empty(&layout_list)) 740 return 0; 741 return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); 742 } 743 744 /* 745 * Called by the state manger to remove all layouts established under an 746 * expired lease. 747 */ 748 void 749 pnfs_destroy_all_layouts(struct nfs_client *clp) 750 { 751 nfs4_deviceid_mark_client_invalid(clp); 752 nfs4_deviceid_purge_client(clp); 753 754 pnfs_destroy_layouts_byclid(clp, false); 755 } 756 757 /* update lo->plh_stateid with new if is more recent */ 758 void 759 pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, 760 bool update_barrier) 761 { 762 u32 oldseq, newseq, new_barrier; 763 int empty = list_empty(&lo->plh_segs); 764 765 oldseq = be32_to_cpu(lo->plh_stateid.seqid); 766 newseq = be32_to_cpu(new->seqid); 767 if (empty || pnfs_seqid_is_newer(newseq, oldseq)) { 768 nfs4_stateid_copy(&lo->plh_stateid, new); 769 if (update_barrier) { 770 new_barrier = be32_to_cpu(new->seqid); 771 } else { 772 /* Because of wraparound, we want to keep the barrier 773 * "close" to the current seqids. 774 */ 775 new_barrier = newseq - atomic_read(&lo->plh_outstanding); 776 } 777 if (empty || pnfs_seqid_is_newer(new_barrier, lo->plh_barrier)) 778 lo->plh_barrier = new_barrier; 779 } 780 } 781 782 static bool 783 pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo, 784 const nfs4_stateid *stateid) 785 { 786 u32 seqid = be32_to_cpu(stateid->seqid); 787 788 return !pnfs_seqid_is_newer(seqid, lo->plh_barrier); 789 } 790 791 /* lget is set to 1 if called from inside send_layoutget call chain */ 792 static bool 793 pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo) 794 { 795 return lo->plh_block_lgets || 796 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); 797 } 798 799 /* 800 * Get layout from server. 801 * for now, assume that whole file layouts are requested. 802 * arg->offset: 0 803 * arg->length: all ones 804 */ 805 static struct pnfs_layout_segment * 806 send_layoutget(struct pnfs_layout_hdr *lo, 807 struct nfs_open_context *ctx, 808 nfs4_stateid *stateid, 809 const struct pnfs_layout_range *range, 810 long *timeout, gfp_t gfp_flags) 811 { 812 struct inode *ino = lo->plh_inode; 813 struct nfs_server *server = NFS_SERVER(ino); 814 struct nfs4_layoutget *lgp; 815 loff_t i_size; 816 817 dprintk("--> %s\n", __func__); 818 819 /* 820 * Synchronously retrieve layout information from server and 821 * store in lseg. If we race with a concurrent seqid morphing 822 * op, then re-send the LAYOUTGET. 823 */ 824 lgp = kzalloc(sizeof(*lgp), gfp_flags); 825 if (lgp == NULL) 826 return ERR_PTR(-ENOMEM); 827 828 i_size = i_size_read(ino); 829 830 lgp->args.minlength = PAGE_SIZE; 831 if (lgp->args.minlength > range->length) 832 lgp->args.minlength = range->length; 833 if (range->iomode == IOMODE_READ) { 834 if (range->offset >= i_size) 835 lgp->args.minlength = 0; 836 else if (i_size - range->offset < lgp->args.minlength) 837 lgp->args.minlength = i_size - range->offset; 838 } 839 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; 840 pnfs_copy_range(&lgp->args.range, range); 841 lgp->args.type = server->pnfs_curr_ld->id; 842 lgp->args.inode = ino; 843 lgp->args.ctx = get_nfs_open_context(ctx); 844 nfs4_stateid_copy(&lgp->args.stateid, stateid); 845 lgp->gfp_flags = gfp_flags; 846 lgp->cred = lo->plh_lc_cred; 847 848 return nfs4_proc_layoutget(lgp, timeout, gfp_flags); 849 } 850 851 static void pnfs_clear_layoutcommit(struct inode *inode, 852 struct list_head *head) 853 { 854 struct nfs_inode *nfsi = NFS_I(inode); 855 struct pnfs_layout_segment *lseg, *tmp; 856 857 if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) 858 return; 859 list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) { 860 if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) 861 continue; 862 pnfs_lseg_dec_and_remove_zero(lseg, head); 863 } 864 } 865 866 void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) 867 { 868 clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); 869 smp_mb__after_atomic(); 870 wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); 871 rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); 872 } 873 874 static bool 875 pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo) 876 { 877 if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) 878 return false; 879 lo->plh_return_iomode = 0; 880 lo->plh_return_seq = 0; 881 pnfs_get_layout_hdr(lo); 882 clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); 883 return true; 884 } 885 886 static int 887 pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, 888 enum pnfs_iomode iomode, bool sync) 889 { 890 struct inode *ino = lo->plh_inode; 891 struct nfs4_layoutreturn *lrp; 892 int status = 0; 893 894 lrp = kzalloc(sizeof(*lrp), GFP_NOFS); 895 if (unlikely(lrp == NULL)) { 896 status = -ENOMEM; 897 spin_lock(&ino->i_lock); 898 pnfs_clear_layoutreturn_waitbit(lo); 899 spin_unlock(&ino->i_lock); 900 pnfs_put_layout_hdr(lo); 901 goto out; 902 } 903 904 nfs4_stateid_copy(&lrp->args.stateid, stateid); 905 lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; 906 lrp->args.inode = ino; 907 lrp->args.range.iomode = iomode; 908 lrp->args.range.offset = 0; 909 lrp->args.range.length = NFS4_MAX_UINT64; 910 lrp->args.layout = lo; 911 lrp->clp = NFS_SERVER(ino)->nfs_client; 912 lrp->cred = lo->plh_lc_cred; 913 914 status = nfs4_proc_layoutreturn(lrp, sync); 915 out: 916 dprintk("<-- %s status: %d\n", __func__, status); 917 return status; 918 } 919 920 /* Return true if layoutreturn is needed */ 921 static bool 922 pnfs_layout_need_return(struct pnfs_layout_hdr *lo) 923 { 924 struct pnfs_layout_segment *s; 925 926 if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) 927 return false; 928 929 /* Defer layoutreturn until all lsegs are done */ 930 list_for_each_entry(s, &lo->plh_segs, pls_list) { 931 if (test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags)) 932 return false; 933 } 934 935 return true; 936 } 937 938 static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo) 939 { 940 struct inode *inode= lo->plh_inode; 941 942 if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) 943 return; 944 spin_lock(&inode->i_lock); 945 if (pnfs_layout_need_return(lo)) { 946 nfs4_stateid stateid; 947 enum pnfs_iomode iomode; 948 bool send; 949 950 nfs4_stateid_copy(&stateid, &lo->plh_stateid); 951 stateid.seqid = cpu_to_be32(lo->plh_return_seq); 952 iomode = lo->plh_return_iomode; 953 send = pnfs_prepare_layoutreturn(lo); 954 spin_unlock(&inode->i_lock); 955 if (send) { 956 /* Send an async layoutreturn so we dont deadlock */ 957 pnfs_send_layoutreturn(lo, &stateid, iomode, false); 958 } 959 } else 960 spin_unlock(&inode->i_lock); 961 } 962 963 /* 964 * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr 965 * when the layout segment list is empty. 966 * 967 * Note that a pnfs_layout_hdr can exist with an empty layout segment 968 * list when LAYOUTGET has failed, or when LAYOUTGET succeeded, but the 969 * deviceid is marked invalid. 970 */ 971 int 972 _pnfs_return_layout(struct inode *ino) 973 { 974 struct pnfs_layout_hdr *lo = NULL; 975 struct nfs_inode *nfsi = NFS_I(ino); 976 LIST_HEAD(tmp_list); 977 nfs4_stateid stateid; 978 int status = 0, empty; 979 bool send; 980 981 dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino); 982 983 spin_lock(&ino->i_lock); 984 lo = nfsi->layout; 985 if (!lo) { 986 spin_unlock(&ino->i_lock); 987 dprintk("NFS: %s no layout to return\n", __func__); 988 goto out; 989 } 990 nfs4_stateid_copy(&stateid, &nfsi->layout->plh_stateid); 991 /* Reference matched in nfs4_layoutreturn_release */ 992 pnfs_get_layout_hdr(lo); 993 empty = list_empty(&lo->plh_segs); 994 pnfs_clear_layoutcommit(ino, &tmp_list); 995 pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL, 0); 996 997 if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) { 998 struct pnfs_layout_range range = { 999 .iomode = IOMODE_ANY, 1000 .offset = 0, 1001 .length = NFS4_MAX_UINT64, 1002 }; 1003 NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, &range); 1004 } 1005 1006 /* Don't send a LAYOUTRETURN if list was initially empty */ 1007 if (empty) { 1008 spin_unlock(&ino->i_lock); 1009 dprintk("NFS: %s no layout segments to return\n", __func__); 1010 goto out_put_layout_hdr; 1011 } 1012 1013 set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); 1014 send = pnfs_prepare_layoutreturn(lo); 1015 spin_unlock(&ino->i_lock); 1016 pnfs_free_lseg_list(&tmp_list); 1017 if (send) 1018 status = pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true); 1019 out_put_layout_hdr: 1020 pnfs_put_layout_hdr(lo); 1021 out: 1022 dprintk("<-- %s status: %d\n", __func__, status); 1023 return status; 1024 } 1025 EXPORT_SYMBOL_GPL(_pnfs_return_layout); 1026 1027 int 1028 pnfs_commit_and_return_layout(struct inode *inode) 1029 { 1030 struct pnfs_layout_hdr *lo; 1031 int ret; 1032 1033 spin_lock(&inode->i_lock); 1034 lo = NFS_I(inode)->layout; 1035 if (lo == NULL) { 1036 spin_unlock(&inode->i_lock); 1037 return 0; 1038 } 1039 pnfs_get_layout_hdr(lo); 1040 /* Block new layoutgets and read/write to ds */ 1041 lo->plh_block_lgets++; 1042 spin_unlock(&inode->i_lock); 1043 filemap_fdatawait(inode->i_mapping); 1044 ret = pnfs_layoutcommit_inode(inode, true); 1045 if (ret == 0) 1046 ret = _pnfs_return_layout(inode); 1047 spin_lock(&inode->i_lock); 1048 lo->plh_block_lgets--; 1049 spin_unlock(&inode->i_lock); 1050 pnfs_put_layout_hdr(lo); 1051 return ret; 1052 } 1053 1054 bool pnfs_roc(struct inode *ino) 1055 { 1056 struct nfs_inode *nfsi = NFS_I(ino); 1057 struct nfs_open_context *ctx; 1058 struct nfs4_state *state; 1059 struct pnfs_layout_hdr *lo; 1060 struct pnfs_layout_segment *lseg, *tmp; 1061 nfs4_stateid stateid; 1062 LIST_HEAD(tmp_list); 1063 bool found = false, layoutreturn = false, roc = false; 1064 1065 spin_lock(&ino->i_lock); 1066 lo = nfsi->layout; 1067 if (!lo || test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) 1068 goto out_noroc; 1069 1070 /* no roc if we hold a delegation */ 1071 if (nfs4_check_delegation(ino, FMODE_READ)) 1072 goto out_noroc; 1073 1074 list_for_each_entry(ctx, &nfsi->open_files, list) { 1075 state = ctx->state; 1076 /* Don't return layout if there is open file state */ 1077 if (state != NULL && state->state != 0) 1078 goto out_noroc; 1079 } 1080 1081 nfs4_stateid_copy(&stateid, &lo->plh_stateid); 1082 /* always send layoutreturn if being marked so */ 1083 if (test_and_clear_bit(NFS_LAYOUT_RETURN_REQUESTED, 1084 &lo->plh_flags)) 1085 layoutreturn = pnfs_prepare_layoutreturn(lo); 1086 1087 list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) 1088 /* If we are sending layoutreturn, invalidate all valid lsegs */ 1089 if (layoutreturn || test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { 1090 mark_lseg_invalid(lseg, &tmp_list); 1091 found = true; 1092 } 1093 /* ROC in two conditions: 1094 * 1. there are ROC lsegs 1095 * 2. we don't send layoutreturn 1096 */ 1097 if (found && !layoutreturn) { 1098 /* lo ref dropped in pnfs_roc_release() */ 1099 pnfs_get_layout_hdr(lo); 1100 roc = true; 1101 } 1102 1103 out_noroc: 1104 spin_unlock(&ino->i_lock); 1105 pnfs_free_lseg_list(&tmp_list); 1106 pnfs_layoutcommit_inode(ino, true); 1107 if (layoutreturn) 1108 pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true); 1109 return roc; 1110 } 1111 1112 void pnfs_roc_release(struct inode *ino) 1113 { 1114 struct pnfs_layout_hdr *lo; 1115 1116 spin_lock(&ino->i_lock); 1117 lo = NFS_I(ino)->layout; 1118 pnfs_clear_layoutreturn_waitbit(lo); 1119 if (atomic_dec_and_test(&lo->plh_refcount)) { 1120 pnfs_detach_layout_hdr(lo); 1121 spin_unlock(&ino->i_lock); 1122 pnfs_free_layout_hdr(lo); 1123 } else 1124 spin_unlock(&ino->i_lock); 1125 } 1126 1127 void pnfs_roc_set_barrier(struct inode *ino, u32 barrier) 1128 { 1129 struct pnfs_layout_hdr *lo; 1130 1131 spin_lock(&ino->i_lock); 1132 lo = NFS_I(ino)->layout; 1133 pnfs_mark_layout_returned_if_empty(lo); 1134 if (pnfs_seqid_is_newer(barrier, lo->plh_barrier)) 1135 lo->plh_barrier = barrier; 1136 spin_unlock(&ino->i_lock); 1137 trace_nfs4_layoutreturn_on_close(ino, 0); 1138 } 1139 1140 void pnfs_roc_get_barrier(struct inode *ino, u32 *barrier) 1141 { 1142 struct nfs_inode *nfsi = NFS_I(ino); 1143 struct pnfs_layout_hdr *lo; 1144 u32 current_seqid; 1145 1146 spin_lock(&ino->i_lock); 1147 lo = nfsi->layout; 1148 current_seqid = be32_to_cpu(lo->plh_stateid.seqid); 1149 1150 /* Since close does not return a layout stateid for use as 1151 * a barrier, we choose the worst-case barrier. 1152 */ 1153 *barrier = current_seqid + atomic_read(&lo->plh_outstanding); 1154 spin_unlock(&ino->i_lock); 1155 } 1156 1157 bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task) 1158 { 1159 struct nfs_inode *nfsi = NFS_I(ino); 1160 struct pnfs_layout_hdr *lo; 1161 bool sleep = false; 1162 1163 /* we might not have grabbed lo reference. so need to check under 1164 * i_lock */ 1165 spin_lock(&ino->i_lock); 1166 lo = nfsi->layout; 1167 if (lo && test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) 1168 sleep = true; 1169 spin_unlock(&ino->i_lock); 1170 1171 if (sleep) 1172 rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); 1173 1174 return sleep; 1175 } 1176 1177 /* 1178 * Compare two layout segments for sorting into layout cache. 1179 * We want to preferentially return RW over RO layouts, so ensure those 1180 * are seen first. 1181 */ 1182 static s64 1183 pnfs_lseg_range_cmp(const struct pnfs_layout_range *l1, 1184 const struct pnfs_layout_range *l2) 1185 { 1186 s64 d; 1187 1188 /* high offset > low offset */ 1189 d = l1->offset - l2->offset; 1190 if (d) 1191 return d; 1192 1193 /* short length > long length */ 1194 d = l2->length - l1->length; 1195 if (d) 1196 return d; 1197 1198 /* read > read/write */ 1199 return (int)(l1->iomode == IOMODE_READ) - (int)(l2->iomode == IOMODE_READ); 1200 } 1201 1202 static bool 1203 pnfs_lseg_range_is_after(const struct pnfs_layout_range *l1, 1204 const struct pnfs_layout_range *l2) 1205 { 1206 return pnfs_lseg_range_cmp(l1, l2) > 0; 1207 } 1208 1209 static bool 1210 pnfs_lseg_no_merge(struct pnfs_layout_segment *lseg, 1211 struct pnfs_layout_segment *old) 1212 { 1213 return false; 1214 } 1215 1216 void 1217 pnfs_generic_layout_insert_lseg(struct pnfs_layout_hdr *lo, 1218 struct pnfs_layout_segment *lseg, 1219 bool (*is_after)(const struct pnfs_layout_range *, 1220 const struct pnfs_layout_range *), 1221 bool (*do_merge)(struct pnfs_layout_segment *, 1222 struct pnfs_layout_segment *), 1223 struct list_head *free_me) 1224 { 1225 struct pnfs_layout_segment *lp, *tmp; 1226 1227 dprintk("%s:Begin\n", __func__); 1228 1229 list_for_each_entry_safe(lp, tmp, &lo->plh_segs, pls_list) { 1230 if (test_bit(NFS_LSEG_VALID, &lp->pls_flags) == 0) 1231 continue; 1232 if (do_merge(lseg, lp)) { 1233 mark_lseg_invalid(lp, free_me); 1234 continue; 1235 } 1236 if (is_after(&lseg->pls_range, &lp->pls_range)) 1237 continue; 1238 list_add_tail(&lseg->pls_list, &lp->pls_list); 1239 dprintk("%s: inserted lseg %p " 1240 "iomode %d offset %llu length %llu before " 1241 "lp %p iomode %d offset %llu length %llu\n", 1242 __func__, lseg, lseg->pls_range.iomode, 1243 lseg->pls_range.offset, lseg->pls_range.length, 1244 lp, lp->pls_range.iomode, lp->pls_range.offset, 1245 lp->pls_range.length); 1246 goto out; 1247 } 1248 list_add_tail(&lseg->pls_list, &lo->plh_segs); 1249 dprintk("%s: inserted lseg %p " 1250 "iomode %d offset %llu length %llu at tail\n", 1251 __func__, lseg, lseg->pls_range.iomode, 1252 lseg->pls_range.offset, lseg->pls_range.length); 1253 out: 1254 pnfs_get_layout_hdr(lo); 1255 1256 dprintk("%s:Return\n", __func__); 1257 } 1258 EXPORT_SYMBOL_GPL(pnfs_generic_layout_insert_lseg); 1259 1260 static void 1261 pnfs_layout_insert_lseg(struct pnfs_layout_hdr *lo, 1262 struct pnfs_layout_segment *lseg, 1263 struct list_head *free_me) 1264 { 1265 struct inode *inode = lo->plh_inode; 1266 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 1267 1268 if (ld->add_lseg != NULL) 1269 ld->add_lseg(lo, lseg, free_me); 1270 else 1271 pnfs_generic_layout_insert_lseg(lo, lseg, 1272 pnfs_lseg_range_is_after, 1273 pnfs_lseg_no_merge, 1274 free_me); 1275 } 1276 1277 static struct pnfs_layout_hdr * 1278 alloc_init_layout_hdr(struct inode *ino, 1279 struct nfs_open_context *ctx, 1280 gfp_t gfp_flags) 1281 { 1282 struct pnfs_layout_hdr *lo; 1283 1284 lo = pnfs_alloc_layout_hdr(ino, gfp_flags); 1285 if (!lo) 1286 return NULL; 1287 atomic_set(&lo->plh_refcount, 1); 1288 INIT_LIST_HEAD(&lo->plh_layouts); 1289 INIT_LIST_HEAD(&lo->plh_segs); 1290 INIT_LIST_HEAD(&lo->plh_bulk_destroy); 1291 lo->plh_inode = ino; 1292 lo->plh_lc_cred = get_rpccred(ctx->cred); 1293 return lo; 1294 } 1295 1296 static struct pnfs_layout_hdr * 1297 pnfs_find_alloc_layout(struct inode *ino, 1298 struct nfs_open_context *ctx, 1299 gfp_t gfp_flags) 1300 { 1301 struct nfs_inode *nfsi = NFS_I(ino); 1302 struct pnfs_layout_hdr *new = NULL; 1303 1304 dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout); 1305 1306 if (nfsi->layout != NULL) 1307 goto out_existing; 1308 spin_unlock(&ino->i_lock); 1309 new = alloc_init_layout_hdr(ino, ctx, gfp_flags); 1310 spin_lock(&ino->i_lock); 1311 1312 if (likely(nfsi->layout == NULL)) { /* Won the race? */ 1313 nfsi->layout = new; 1314 return new; 1315 } else if (new != NULL) 1316 pnfs_free_layout_hdr(new); 1317 out_existing: 1318 pnfs_get_layout_hdr(nfsi->layout); 1319 return nfsi->layout; 1320 } 1321 1322 /* 1323 * iomode matching rules: 1324 * iomode lseg match 1325 * ----- ----- ----- 1326 * ANY READ true 1327 * ANY RW true 1328 * RW READ false 1329 * RW RW true 1330 * READ READ true 1331 * READ RW true 1332 */ 1333 static bool 1334 pnfs_lseg_range_match(const struct pnfs_layout_range *ls_range, 1335 const struct pnfs_layout_range *range) 1336 { 1337 struct pnfs_layout_range range1; 1338 1339 if ((range->iomode == IOMODE_RW && 1340 ls_range->iomode != IOMODE_RW) || 1341 !pnfs_lseg_range_intersecting(ls_range, range)) 1342 return 0; 1343 1344 /* range1 covers only the first byte in the range */ 1345 range1 = *range; 1346 range1.length = 1; 1347 return pnfs_lseg_range_contained(ls_range, &range1); 1348 } 1349 1350 /* 1351 * lookup range in layout 1352 */ 1353 static struct pnfs_layout_segment * 1354 pnfs_find_lseg(struct pnfs_layout_hdr *lo, 1355 struct pnfs_layout_range *range) 1356 { 1357 struct pnfs_layout_segment *lseg, *ret = NULL; 1358 1359 dprintk("%s:Begin\n", __func__); 1360 1361 list_for_each_entry(lseg, &lo->plh_segs, pls_list) { 1362 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && 1363 !test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags) && 1364 pnfs_lseg_range_match(&lseg->pls_range, range)) { 1365 ret = pnfs_get_lseg(lseg); 1366 break; 1367 } 1368 } 1369 1370 dprintk("%s:Return lseg %p ref %d\n", 1371 __func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0); 1372 return ret; 1373 } 1374 1375 /* 1376 * Use mdsthreshold hints set at each OPEN to determine if I/O should go 1377 * to the MDS or over pNFS 1378 * 1379 * The nfs_inode read_io and write_io fields are cumulative counters reset 1380 * when there are no layout segments. Note that in pnfs_update_layout iomode 1381 * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a 1382 * WRITE request. 1383 * 1384 * A return of true means use MDS I/O. 1385 * 1386 * From rfc 5661: 1387 * If a file's size is smaller than the file size threshold, data accesses 1388 * SHOULD be sent to the metadata server. If an I/O request has a length that 1389 * is below the I/O size threshold, the I/O SHOULD be sent to the metadata 1390 * server. If both file size and I/O size are provided, the client SHOULD 1391 * reach or exceed both thresholds before sending its read or write 1392 * requests to the data server. 1393 */ 1394 static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx, 1395 struct inode *ino, int iomode) 1396 { 1397 struct nfs4_threshold *t = ctx->mdsthreshold; 1398 struct nfs_inode *nfsi = NFS_I(ino); 1399 loff_t fsize = i_size_read(ino); 1400 bool size = false, size_set = false, io = false, io_set = false, ret = false; 1401 1402 if (t == NULL) 1403 return ret; 1404 1405 dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n", 1406 __func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz); 1407 1408 switch (iomode) { 1409 case IOMODE_READ: 1410 if (t->bm & THRESHOLD_RD) { 1411 dprintk("%s fsize %llu\n", __func__, fsize); 1412 size_set = true; 1413 if (fsize < t->rd_sz) 1414 size = true; 1415 } 1416 if (t->bm & THRESHOLD_RD_IO) { 1417 dprintk("%s nfsi->read_io %llu\n", __func__, 1418 nfsi->read_io); 1419 io_set = true; 1420 if (nfsi->read_io < t->rd_io_sz) 1421 io = true; 1422 } 1423 break; 1424 case IOMODE_RW: 1425 if (t->bm & THRESHOLD_WR) { 1426 dprintk("%s fsize %llu\n", __func__, fsize); 1427 size_set = true; 1428 if (fsize < t->wr_sz) 1429 size = true; 1430 } 1431 if (t->bm & THRESHOLD_WR_IO) { 1432 dprintk("%s nfsi->write_io %llu\n", __func__, 1433 nfsi->write_io); 1434 io_set = true; 1435 if (nfsi->write_io < t->wr_io_sz) 1436 io = true; 1437 } 1438 break; 1439 } 1440 if (size_set && io_set) { 1441 if (size && io) 1442 ret = true; 1443 } else if (size || io) 1444 ret = true; 1445 1446 dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret); 1447 return ret; 1448 } 1449 1450 static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo) 1451 { 1452 /* 1453 * send layoutcommit as it can hold up layoutreturn due to lseg 1454 * reference 1455 */ 1456 pnfs_layoutcommit_inode(lo->plh_inode, false); 1457 return !wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN, 1458 nfs_wait_bit_killable, 1459 TASK_UNINTERRUPTIBLE); 1460 } 1461 1462 static void pnfs_clear_first_layoutget(struct pnfs_layout_hdr *lo) 1463 { 1464 unsigned long *bitlock = &lo->plh_flags; 1465 1466 clear_bit_unlock(NFS_LAYOUT_FIRST_LAYOUTGET, bitlock); 1467 smp_mb__after_atomic(); 1468 wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET); 1469 } 1470 1471 /* 1472 * Layout segment is retreived from the server if not cached. 1473 * The appropriate layout segment is referenced and returned to the caller. 1474 */ 1475 struct pnfs_layout_segment * 1476 pnfs_update_layout(struct inode *ino, 1477 struct nfs_open_context *ctx, 1478 loff_t pos, 1479 u64 count, 1480 enum pnfs_iomode iomode, 1481 gfp_t gfp_flags) 1482 { 1483 struct pnfs_layout_range arg = { 1484 .iomode = iomode, 1485 .offset = pos, 1486 .length = count, 1487 }; 1488 unsigned pg_offset, seq; 1489 struct nfs_server *server = NFS_SERVER(ino); 1490 struct nfs_client *clp = server->nfs_client; 1491 struct pnfs_layout_hdr *lo = NULL; 1492 struct pnfs_layout_segment *lseg = NULL; 1493 nfs4_stateid stateid; 1494 long timeout = 0; 1495 unsigned long giveup = jiffies + rpc_get_timeout(server->client); 1496 bool first; 1497 1498 if (!pnfs_enabled_sb(NFS_SERVER(ino))) { 1499 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1500 PNFS_UPDATE_LAYOUT_NO_PNFS); 1501 goto out; 1502 } 1503 1504 if (iomode == IOMODE_READ && i_size_read(ino) == 0) { 1505 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1506 PNFS_UPDATE_LAYOUT_RD_ZEROLEN); 1507 goto out; 1508 } 1509 1510 if (pnfs_within_mdsthreshold(ctx, ino, iomode)) { 1511 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1512 PNFS_UPDATE_LAYOUT_MDSTHRESH); 1513 goto out; 1514 } 1515 1516 lookup_again: 1517 first = false; 1518 spin_lock(&ino->i_lock); 1519 lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); 1520 if (lo == NULL) { 1521 spin_unlock(&ino->i_lock); 1522 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1523 PNFS_UPDATE_LAYOUT_NOMEM); 1524 goto out; 1525 } 1526 1527 /* Do we even need to bother with this? */ 1528 if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { 1529 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1530 PNFS_UPDATE_LAYOUT_BULK_RECALL); 1531 dprintk("%s matches recall, use MDS\n", __func__); 1532 goto out_unlock; 1533 } 1534 1535 /* if LAYOUTGET already failed once we don't try again */ 1536 if (pnfs_layout_io_test_failed(lo, iomode)) { 1537 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1538 PNFS_UPDATE_LAYOUT_IO_TEST_FAIL); 1539 goto out_unlock; 1540 } 1541 1542 lseg = pnfs_find_lseg(lo, &arg); 1543 if (lseg) { 1544 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1545 PNFS_UPDATE_LAYOUT_FOUND_CACHED); 1546 goto out_unlock; 1547 } 1548 1549 if (!nfs4_valid_open_stateid(ctx->state)) { 1550 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1551 PNFS_UPDATE_LAYOUT_INVALID_OPEN); 1552 goto out_unlock; 1553 } 1554 1555 /* 1556 * Choose a stateid for the LAYOUTGET. If we don't have a layout 1557 * stateid, or it has been invalidated, then we must use the open 1558 * stateid. 1559 */ 1560 if (lo->plh_stateid.seqid == 0 || 1561 test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) { 1562 1563 /* 1564 * The first layoutget for the file. Need to serialize per 1565 * RFC 5661 Errata 3208. 1566 */ 1567 if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET, 1568 &lo->plh_flags)) { 1569 spin_unlock(&ino->i_lock); 1570 wait_on_bit(&lo->plh_flags, NFS_LAYOUT_FIRST_LAYOUTGET, 1571 TASK_UNINTERRUPTIBLE); 1572 pnfs_put_layout_hdr(lo); 1573 dprintk("%s retrying\n", __func__); 1574 goto lookup_again; 1575 } 1576 1577 first = true; 1578 do { 1579 seq = read_seqbegin(&ctx->state->seqlock); 1580 nfs4_stateid_copy(&stateid, &ctx->state->stateid); 1581 } while (read_seqretry(&ctx->state->seqlock, seq)); 1582 } else { 1583 nfs4_stateid_copy(&stateid, &lo->plh_stateid); 1584 } 1585 1586 /* 1587 * Because we free lsegs before sending LAYOUTRETURN, we need to wait 1588 * for LAYOUTRETURN even if first is true. 1589 */ 1590 if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { 1591 spin_unlock(&ino->i_lock); 1592 dprintk("%s wait for layoutreturn\n", __func__); 1593 if (pnfs_prepare_to_retry_layoutget(lo)) { 1594 if (first) 1595 pnfs_clear_first_layoutget(lo); 1596 pnfs_put_layout_hdr(lo); 1597 dprintk("%s retrying\n", __func__); 1598 trace_pnfs_update_layout(ino, pos, count, iomode, lo, 1599 lseg, PNFS_UPDATE_LAYOUT_RETRY); 1600 goto lookup_again; 1601 } 1602 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1603 PNFS_UPDATE_LAYOUT_RETURN); 1604 goto out_put_layout_hdr; 1605 } 1606 1607 if (pnfs_layoutgets_blocked(lo)) { 1608 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1609 PNFS_UPDATE_LAYOUT_BLOCKED); 1610 goto out_unlock; 1611 } 1612 atomic_inc(&lo->plh_outstanding); 1613 spin_unlock(&ino->i_lock); 1614 1615 if (list_empty(&lo->plh_layouts)) { 1616 /* The lo must be on the clp list if there is any 1617 * chance of a CB_LAYOUTRECALL(FILE) coming in. 1618 */ 1619 spin_lock(&clp->cl_lock); 1620 if (list_empty(&lo->plh_layouts)) 1621 list_add_tail(&lo->plh_layouts, &server->layouts); 1622 spin_unlock(&clp->cl_lock); 1623 } 1624 1625 pg_offset = arg.offset & ~PAGE_MASK; 1626 if (pg_offset) { 1627 arg.offset -= pg_offset; 1628 arg.length += pg_offset; 1629 } 1630 if (arg.length != NFS4_MAX_UINT64) 1631 arg.length = PAGE_ALIGN(arg.length); 1632 1633 lseg = send_layoutget(lo, ctx, &stateid, &arg, &timeout, gfp_flags); 1634 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1635 PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET); 1636 if (IS_ERR(lseg)) { 1637 switch(PTR_ERR(lseg)) { 1638 case -ERECALLCONFLICT: 1639 if (time_after(jiffies, giveup)) 1640 lseg = NULL; 1641 /* Fallthrough */ 1642 case -EAGAIN: 1643 pnfs_put_layout_hdr(lo); 1644 if (first) 1645 pnfs_clear_first_layoutget(lo); 1646 if (lseg) { 1647 trace_pnfs_update_layout(ino, pos, count, 1648 iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY); 1649 goto lookup_again; 1650 } 1651 /* Fallthrough */ 1652 default: 1653 if (!nfs_error_is_fatal(PTR_ERR(lseg))) { 1654 pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); 1655 lseg = NULL; 1656 } 1657 } 1658 } else { 1659 pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); 1660 } 1661 1662 atomic_dec(&lo->plh_outstanding); 1663 out_put_layout_hdr: 1664 if (first) 1665 pnfs_clear_first_layoutget(lo); 1666 pnfs_put_layout_hdr(lo); 1667 out: 1668 dprintk("%s: inode %s/%llu pNFS layout segment %s for " 1669 "(%s, offset: %llu, length: %llu)\n", 1670 __func__, ino->i_sb->s_id, 1671 (unsigned long long)NFS_FILEID(ino), 1672 IS_ERR_OR_NULL(lseg) ? "not found" : "found", 1673 iomode==IOMODE_RW ? "read/write" : "read-only", 1674 (unsigned long long)pos, 1675 (unsigned long long)count); 1676 return lseg; 1677 out_unlock: 1678 spin_unlock(&ino->i_lock); 1679 goto out_put_layout_hdr; 1680 } 1681 EXPORT_SYMBOL_GPL(pnfs_update_layout); 1682 1683 static bool 1684 pnfs_sanity_check_layout_range(struct pnfs_layout_range *range) 1685 { 1686 switch (range->iomode) { 1687 case IOMODE_READ: 1688 case IOMODE_RW: 1689 break; 1690 default: 1691 return false; 1692 } 1693 if (range->offset == NFS4_MAX_UINT64) 1694 return false; 1695 if (range->length == 0) 1696 return false; 1697 if (range->length != NFS4_MAX_UINT64 && 1698 range->length > NFS4_MAX_UINT64 - range->offset) 1699 return false; 1700 return true; 1701 } 1702 1703 struct pnfs_layout_segment * 1704 pnfs_layout_process(struct nfs4_layoutget *lgp) 1705 { 1706 struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout; 1707 struct nfs4_layoutget_res *res = &lgp->res; 1708 struct pnfs_layout_segment *lseg; 1709 struct inode *ino = lo->plh_inode; 1710 LIST_HEAD(free_me); 1711 int status = -EINVAL; 1712 1713 if (!pnfs_sanity_check_layout_range(&res->range)) 1714 goto out; 1715 1716 /* Inject layout blob into I/O device driver */ 1717 lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags); 1718 if (!lseg || IS_ERR(lseg)) { 1719 if (!lseg) 1720 status = -ENOMEM; 1721 else 1722 status = PTR_ERR(lseg); 1723 dprintk("%s: Could not allocate layout: error %d\n", 1724 __func__, status); 1725 goto out; 1726 } 1727 1728 init_lseg(lo, lseg); 1729 lseg->pls_range = res->range; 1730 lseg->pls_seq = be32_to_cpu(res->stateid.seqid); 1731 1732 spin_lock(&ino->i_lock); 1733 if (pnfs_layoutgets_blocked(lo)) { 1734 dprintk("%s forget reply due to state\n", __func__); 1735 goto out_forget_reply; 1736 } 1737 1738 if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) { 1739 /* existing state ID, make sure the sequence number matches. */ 1740 if (pnfs_layout_stateid_blocked(lo, &res->stateid)) { 1741 dprintk("%s forget reply due to sequence\n", __func__); 1742 status = -EAGAIN; 1743 goto out_forget_reply; 1744 } 1745 pnfs_set_layout_stateid(lo, &res->stateid, false); 1746 } else { 1747 /* 1748 * We got an entirely new state ID. Mark all segments for the 1749 * inode invalid, and don't bother validating the stateid 1750 * sequence number. 1751 */ 1752 pnfs_mark_matching_lsegs_invalid(lo, &free_me, NULL, 0); 1753 1754 nfs4_stateid_copy(&lo->plh_stateid, &res->stateid); 1755 lo->plh_barrier = be32_to_cpu(res->stateid.seqid); 1756 } 1757 1758 clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); 1759 1760 pnfs_get_lseg(lseg); 1761 pnfs_layout_insert_lseg(lo, lseg, &free_me); 1762 1763 if (res->return_on_close) 1764 set_bit(NFS_LSEG_ROC, &lseg->pls_flags); 1765 1766 spin_unlock(&ino->i_lock); 1767 pnfs_free_lseg_list(&free_me); 1768 return lseg; 1769 out: 1770 return ERR_PTR(status); 1771 1772 out_forget_reply: 1773 spin_unlock(&ino->i_lock); 1774 lseg->pls_layout = lo; 1775 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); 1776 goto out; 1777 } 1778 1779 static void 1780 pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode, 1781 u32 seq) 1782 { 1783 if (lo->plh_return_iomode == iomode) 1784 return; 1785 if (lo->plh_return_iomode != 0) 1786 iomode = IOMODE_ANY; 1787 lo->plh_return_iomode = iomode; 1788 set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); 1789 if (!lo->plh_return_seq || pnfs_seqid_is_newer(seq, lo->plh_return_seq)) 1790 lo->plh_return_seq = seq; 1791 } 1792 1793 /** 1794 * pnfs_mark_matching_lsegs_return - Free or return matching layout segments 1795 * @lo: pointer to layout header 1796 * @tmp_list: list header to be used with pnfs_free_lseg_list() 1797 * @return_range: describe layout segment ranges to be returned 1798 * 1799 * This function is mainly intended for use by layoutrecall. It attempts 1800 * to free the layout segment immediately, or else to mark it for return 1801 * as soon as its reference count drops to zero. 1802 */ 1803 int 1804 pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, 1805 struct list_head *tmp_list, 1806 const struct pnfs_layout_range *return_range, 1807 u32 seq) 1808 { 1809 struct pnfs_layout_segment *lseg, *next; 1810 int remaining = 0; 1811 1812 dprintk("%s:Begin lo %p\n", __func__, lo); 1813 1814 if (list_empty(&lo->plh_segs)) 1815 return 0; 1816 1817 assert_spin_locked(&lo->plh_inode->i_lock); 1818 1819 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) 1820 if (should_free_lseg(&lseg->pls_range, return_range)) { 1821 dprintk("%s: marking lseg %p iomode %d " 1822 "offset %llu length %llu\n", __func__, 1823 lseg, lseg->pls_range.iomode, 1824 lseg->pls_range.offset, 1825 lseg->pls_range.length); 1826 if (mark_lseg_invalid(lseg, tmp_list)) 1827 continue; 1828 remaining++; 1829 set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); 1830 } 1831 1832 if (remaining) 1833 pnfs_set_plh_return_info(lo, return_range->iomode, seq); 1834 1835 return remaining; 1836 } 1837 1838 void pnfs_error_mark_layout_for_return(struct inode *inode, 1839 struct pnfs_layout_segment *lseg) 1840 { 1841 struct pnfs_layout_hdr *lo = NFS_I(inode)->layout; 1842 struct pnfs_layout_range range = { 1843 .iomode = lseg->pls_range.iomode, 1844 .offset = 0, 1845 .length = NFS4_MAX_UINT64, 1846 }; 1847 LIST_HEAD(free_me); 1848 bool return_now = false; 1849 1850 spin_lock(&inode->i_lock); 1851 pnfs_set_plh_return_info(lo, range.iomode, lseg->pls_seq); 1852 /* 1853 * mark all matching lsegs so that we are sure to have no live 1854 * segments at hand when sending layoutreturn. See pnfs_put_lseg() 1855 * for how it works. 1856 */ 1857 if (!pnfs_mark_matching_lsegs_return(lo, &free_me, 1858 &range, lseg->pls_seq)) { 1859 nfs4_stateid stateid; 1860 enum pnfs_iomode iomode = lo->plh_return_iomode; 1861 1862 nfs4_stateid_copy(&stateid, &lo->plh_stateid); 1863 return_now = pnfs_prepare_layoutreturn(lo); 1864 spin_unlock(&inode->i_lock); 1865 if (return_now) 1866 pnfs_send_layoutreturn(lo, &stateid, iomode, false); 1867 } else { 1868 spin_unlock(&inode->i_lock); 1869 nfs_commit_inode(inode, 0); 1870 } 1871 pnfs_free_lseg_list(&free_me); 1872 } 1873 EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return); 1874 1875 void 1876 pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) 1877 { 1878 u64 rd_size = req->wb_bytes; 1879 1880 if (pgio->pg_lseg == NULL) { 1881 if (pgio->pg_dreq == NULL) 1882 rd_size = i_size_read(pgio->pg_inode) - req_offset(req); 1883 else 1884 rd_size = nfs_dreq_bytes_left(pgio->pg_dreq); 1885 1886 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1887 req->wb_context, 1888 req_offset(req), 1889 rd_size, 1890 IOMODE_READ, 1891 GFP_KERNEL); 1892 if (IS_ERR(pgio->pg_lseg)) { 1893 pgio->pg_error = PTR_ERR(pgio->pg_lseg); 1894 pgio->pg_lseg = NULL; 1895 return; 1896 } 1897 } 1898 /* If no lseg, fall back to read through mds */ 1899 if (pgio->pg_lseg == NULL) 1900 nfs_pageio_reset_read_mds(pgio); 1901 1902 } 1903 EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); 1904 1905 void 1906 pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, 1907 struct nfs_page *req, u64 wb_size) 1908 { 1909 if (pgio->pg_lseg == NULL) { 1910 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1911 req->wb_context, 1912 req_offset(req), 1913 wb_size, 1914 IOMODE_RW, 1915 GFP_NOFS); 1916 if (IS_ERR(pgio->pg_lseg)) { 1917 pgio->pg_error = PTR_ERR(pgio->pg_lseg); 1918 pgio->pg_lseg = NULL; 1919 return; 1920 } 1921 } 1922 /* If no lseg, fall back to write through mds */ 1923 if (pgio->pg_lseg == NULL) 1924 nfs_pageio_reset_write_mds(pgio); 1925 } 1926 EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); 1927 1928 void 1929 pnfs_generic_pg_cleanup(struct nfs_pageio_descriptor *desc) 1930 { 1931 if (desc->pg_lseg) { 1932 pnfs_put_lseg(desc->pg_lseg); 1933 desc->pg_lseg = NULL; 1934 } 1935 } 1936 EXPORT_SYMBOL_GPL(pnfs_generic_pg_cleanup); 1937 1938 /* 1939 * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number 1940 * of bytes (maximum @req->wb_bytes) that can be coalesced. 1941 */ 1942 size_t 1943 pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, 1944 struct nfs_page *prev, struct nfs_page *req) 1945 { 1946 unsigned int size; 1947 u64 seg_end, req_start, seg_left; 1948 1949 size = nfs_generic_pg_test(pgio, prev, req); 1950 if (!size) 1951 return 0; 1952 1953 /* 1954 * 'size' contains the number of bytes left in the current page (up 1955 * to the original size asked for in @req->wb_bytes). 1956 * 1957 * Calculate how many bytes are left in the layout segment 1958 * and if there are less bytes than 'size', return that instead. 1959 * 1960 * Please also note that 'end_offset' is actually the offset of the 1961 * first byte that lies outside the pnfs_layout_range. FIXME? 1962 * 1963 */ 1964 if (pgio->pg_lseg) { 1965 seg_end = end_offset(pgio->pg_lseg->pls_range.offset, 1966 pgio->pg_lseg->pls_range.length); 1967 req_start = req_offset(req); 1968 WARN_ON_ONCE(req_start >= seg_end); 1969 /* start of request is past the last byte of this segment */ 1970 if (req_start >= seg_end) { 1971 /* reference the new lseg */ 1972 if (pgio->pg_ops->pg_cleanup) 1973 pgio->pg_ops->pg_cleanup(pgio); 1974 if (pgio->pg_ops->pg_init) 1975 pgio->pg_ops->pg_init(pgio, req); 1976 return 0; 1977 } 1978 1979 /* adjust 'size' iff there are fewer bytes left in the 1980 * segment than what nfs_generic_pg_test returned */ 1981 seg_left = seg_end - req_start; 1982 if (seg_left < size) 1983 size = (unsigned int)seg_left; 1984 } 1985 1986 return size; 1987 } 1988 EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); 1989 1990 int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr) 1991 { 1992 struct nfs_pageio_descriptor pgio; 1993 1994 /* Resend all requests through the MDS */ 1995 nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true, 1996 hdr->completion_ops); 1997 set_bit(NFS_CONTEXT_RESEND_WRITES, &hdr->args.context->flags); 1998 return nfs_pageio_resend(&pgio, hdr); 1999 } 2000 EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); 2001 2002 static void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr) 2003 { 2004 2005 dprintk("pnfs write error = %d\n", hdr->pnfs_error); 2006 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & 2007 PNFS_LAYOUTRET_ON_ERROR) { 2008 pnfs_return_layout(hdr->inode); 2009 } 2010 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) 2011 hdr->task.tk_status = pnfs_write_done_resend_to_mds(hdr); 2012 } 2013 2014 /* 2015 * Called by non rpc-based layout drivers 2016 */ 2017 void pnfs_ld_write_done(struct nfs_pgio_header *hdr) 2018 { 2019 if (likely(!hdr->pnfs_error)) { 2020 pnfs_set_layoutcommit(hdr->inode, hdr->lseg, 2021 hdr->mds_offset + hdr->res.count); 2022 hdr->mds_ops->rpc_call_done(&hdr->task, hdr); 2023 } 2024 trace_nfs4_pnfs_write(hdr, hdr->pnfs_error); 2025 if (unlikely(hdr->pnfs_error)) 2026 pnfs_ld_handle_write_error(hdr); 2027 hdr->mds_ops->rpc_release(hdr); 2028 } 2029 EXPORT_SYMBOL_GPL(pnfs_ld_write_done); 2030 2031 static void 2032 pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, 2033 struct nfs_pgio_header *hdr) 2034 { 2035 struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); 2036 2037 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 2038 list_splice_tail_init(&hdr->pages, &mirror->pg_list); 2039 nfs_pageio_reset_write_mds(desc); 2040 mirror->pg_recoalesce = 1; 2041 } 2042 nfs_pgio_data_destroy(hdr); 2043 hdr->release(hdr); 2044 } 2045 2046 static enum pnfs_try_status 2047 pnfs_try_to_write_data(struct nfs_pgio_header *hdr, 2048 const struct rpc_call_ops *call_ops, 2049 struct pnfs_layout_segment *lseg, 2050 int how) 2051 { 2052 struct inode *inode = hdr->inode; 2053 enum pnfs_try_status trypnfs; 2054 struct nfs_server *nfss = NFS_SERVER(inode); 2055 2056 hdr->mds_ops = call_ops; 2057 2058 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, 2059 inode->i_ino, hdr->args.count, hdr->args.offset, how); 2060 trypnfs = nfss->pnfs_curr_ld->write_pagelist(hdr, how); 2061 if (trypnfs != PNFS_NOT_ATTEMPTED) 2062 nfs_inc_stats(inode, NFSIOS_PNFS_WRITE); 2063 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 2064 return trypnfs; 2065 } 2066 2067 static void 2068 pnfs_do_write(struct nfs_pageio_descriptor *desc, 2069 struct nfs_pgio_header *hdr, int how) 2070 { 2071 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; 2072 struct pnfs_layout_segment *lseg = desc->pg_lseg; 2073 enum pnfs_try_status trypnfs; 2074 2075 trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how); 2076 if (trypnfs == PNFS_NOT_ATTEMPTED) 2077 pnfs_write_through_mds(desc, hdr); 2078 } 2079 2080 static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) 2081 { 2082 pnfs_put_lseg(hdr->lseg); 2083 nfs_pgio_header_free(hdr); 2084 } 2085 2086 int 2087 pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) 2088 { 2089 struct nfs_pgio_header *hdr; 2090 int ret; 2091 2092 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); 2093 if (!hdr) { 2094 desc->pg_error = -ENOMEM; 2095 return desc->pg_error; 2096 } 2097 nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); 2098 2099 hdr->lseg = pnfs_get_lseg(desc->pg_lseg); 2100 ret = nfs_generic_pgio(desc, hdr); 2101 if (!ret) 2102 pnfs_do_write(desc, hdr, desc->pg_ioflags); 2103 2104 return ret; 2105 } 2106 EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); 2107 2108 int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *hdr) 2109 { 2110 struct nfs_pageio_descriptor pgio; 2111 2112 /* Resend all requests through the MDS */ 2113 nfs_pageio_init_read(&pgio, hdr->inode, true, hdr->completion_ops); 2114 return nfs_pageio_resend(&pgio, hdr); 2115 } 2116 EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds); 2117 2118 static void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr) 2119 { 2120 dprintk("pnfs read error = %d\n", hdr->pnfs_error); 2121 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & 2122 PNFS_LAYOUTRET_ON_ERROR) { 2123 pnfs_return_layout(hdr->inode); 2124 } 2125 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) 2126 hdr->task.tk_status = pnfs_read_done_resend_to_mds(hdr); 2127 } 2128 2129 /* 2130 * Called by non rpc-based layout drivers 2131 */ 2132 void pnfs_ld_read_done(struct nfs_pgio_header *hdr) 2133 { 2134 if (likely(!hdr->pnfs_error)) { 2135 __nfs4_read_done_cb(hdr); 2136 hdr->mds_ops->rpc_call_done(&hdr->task, hdr); 2137 } 2138 trace_nfs4_pnfs_read(hdr, hdr->pnfs_error); 2139 if (unlikely(hdr->pnfs_error)) 2140 pnfs_ld_handle_read_error(hdr); 2141 hdr->mds_ops->rpc_release(hdr); 2142 } 2143 EXPORT_SYMBOL_GPL(pnfs_ld_read_done); 2144 2145 static void 2146 pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, 2147 struct nfs_pgio_header *hdr) 2148 { 2149 struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); 2150 2151 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 2152 list_splice_tail_init(&hdr->pages, &mirror->pg_list); 2153 nfs_pageio_reset_read_mds(desc); 2154 mirror->pg_recoalesce = 1; 2155 } 2156 nfs_pgio_data_destroy(hdr); 2157 hdr->release(hdr); 2158 } 2159 2160 /* 2161 * Call the appropriate parallel I/O subsystem read function. 2162 */ 2163 static enum pnfs_try_status 2164 pnfs_try_to_read_data(struct nfs_pgio_header *hdr, 2165 const struct rpc_call_ops *call_ops, 2166 struct pnfs_layout_segment *lseg) 2167 { 2168 struct inode *inode = hdr->inode; 2169 struct nfs_server *nfss = NFS_SERVER(inode); 2170 enum pnfs_try_status trypnfs; 2171 2172 hdr->mds_ops = call_ops; 2173 2174 dprintk("%s: Reading ino:%lu %u@%llu\n", 2175 __func__, inode->i_ino, hdr->args.count, hdr->args.offset); 2176 2177 trypnfs = nfss->pnfs_curr_ld->read_pagelist(hdr); 2178 if (trypnfs != PNFS_NOT_ATTEMPTED) 2179 nfs_inc_stats(inode, NFSIOS_PNFS_READ); 2180 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 2181 return trypnfs; 2182 } 2183 2184 /* Resend all requests through pnfs. */ 2185 void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr) 2186 { 2187 struct nfs_pageio_descriptor pgio; 2188 2189 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 2190 nfs_pageio_init_read(&pgio, hdr->inode, false, 2191 hdr->completion_ops); 2192 hdr->task.tk_status = nfs_pageio_resend(&pgio, hdr); 2193 } 2194 } 2195 EXPORT_SYMBOL_GPL(pnfs_read_resend_pnfs); 2196 2197 static void 2198 pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) 2199 { 2200 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; 2201 struct pnfs_layout_segment *lseg = desc->pg_lseg; 2202 enum pnfs_try_status trypnfs; 2203 2204 trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg); 2205 if (trypnfs == PNFS_TRY_AGAIN) 2206 pnfs_read_resend_pnfs(hdr); 2207 if (trypnfs == PNFS_NOT_ATTEMPTED || hdr->task.tk_status) 2208 pnfs_read_through_mds(desc, hdr); 2209 } 2210 2211 static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) 2212 { 2213 pnfs_put_lseg(hdr->lseg); 2214 nfs_pgio_header_free(hdr); 2215 } 2216 2217 int 2218 pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 2219 { 2220 struct nfs_pgio_header *hdr; 2221 int ret; 2222 2223 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); 2224 if (!hdr) { 2225 desc->pg_error = -ENOMEM; 2226 return desc->pg_error; 2227 } 2228 nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); 2229 hdr->lseg = pnfs_get_lseg(desc->pg_lseg); 2230 ret = nfs_generic_pgio(desc, hdr); 2231 if (!ret) 2232 pnfs_do_read(desc, hdr); 2233 return ret; 2234 } 2235 EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); 2236 2237 static void pnfs_clear_layoutcommitting(struct inode *inode) 2238 { 2239 unsigned long *bitlock = &NFS_I(inode)->flags; 2240 2241 clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); 2242 smp_mb__after_atomic(); 2243 wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING); 2244 } 2245 2246 /* 2247 * There can be multiple RW segments. 2248 */ 2249 static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp) 2250 { 2251 struct pnfs_layout_segment *lseg; 2252 2253 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { 2254 if (lseg->pls_range.iomode == IOMODE_RW && 2255 test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) 2256 list_add(&lseg->pls_lc_list, listp); 2257 } 2258 } 2259 2260 static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp) 2261 { 2262 struct pnfs_layout_segment *lseg, *tmp; 2263 2264 /* Matched by references in pnfs_set_layoutcommit */ 2265 list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) { 2266 list_del_init(&lseg->pls_lc_list); 2267 pnfs_put_lseg(lseg); 2268 } 2269 2270 pnfs_clear_layoutcommitting(inode); 2271 } 2272 2273 void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) 2274 { 2275 pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode); 2276 } 2277 EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); 2278 2279 void 2280 pnfs_set_layoutcommit(struct inode *inode, struct pnfs_layout_segment *lseg, 2281 loff_t end_pos) 2282 { 2283 struct nfs_inode *nfsi = NFS_I(inode); 2284 bool mark_as_dirty = false; 2285 2286 spin_lock(&inode->i_lock); 2287 if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { 2288 nfsi->layout->plh_lwb = end_pos; 2289 mark_as_dirty = true; 2290 dprintk("%s: Set layoutcommit for inode %lu ", 2291 __func__, inode->i_ino); 2292 } else if (end_pos > nfsi->layout->plh_lwb) 2293 nfsi->layout->plh_lwb = end_pos; 2294 if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) { 2295 /* references matched in nfs4_layoutcommit_release */ 2296 pnfs_get_lseg(lseg); 2297 } 2298 spin_unlock(&inode->i_lock); 2299 dprintk("%s: lseg %p end_pos %llu\n", 2300 __func__, lseg, nfsi->layout->plh_lwb); 2301 2302 /* if pnfs_layoutcommit_inode() runs between inode locks, the next one 2303 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ 2304 if (mark_as_dirty) 2305 mark_inode_dirty_sync(inode); 2306 } 2307 EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); 2308 2309 void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) 2310 { 2311 struct nfs_server *nfss = NFS_SERVER(data->args.inode); 2312 2313 if (nfss->pnfs_curr_ld->cleanup_layoutcommit) 2314 nfss->pnfs_curr_ld->cleanup_layoutcommit(data); 2315 pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list); 2316 } 2317 2318 /* 2319 * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and 2320 * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough 2321 * data to disk to allow the server to recover the data if it crashes. 2322 * LAYOUTCOMMIT is only needed when the NFL4_UFLG_COMMIT_THRU_MDS flag 2323 * is off, and a COMMIT is sent to a data server, or 2324 * if WRITEs to a data server return NFS_DATA_SYNC. 2325 */ 2326 int 2327 pnfs_layoutcommit_inode(struct inode *inode, bool sync) 2328 { 2329 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 2330 struct nfs4_layoutcommit_data *data; 2331 struct nfs_inode *nfsi = NFS_I(inode); 2332 loff_t end_pos; 2333 int status; 2334 2335 if (!pnfs_layoutcommit_outstanding(inode)) 2336 return 0; 2337 2338 dprintk("--> %s inode %lu\n", __func__, inode->i_ino); 2339 2340 status = -EAGAIN; 2341 if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) { 2342 if (!sync) 2343 goto out; 2344 status = wait_on_bit_lock_action(&nfsi->flags, 2345 NFS_INO_LAYOUTCOMMITTING, 2346 nfs_wait_bit_killable, 2347 TASK_KILLABLE); 2348 if (status) 2349 goto out; 2350 } 2351 2352 status = -ENOMEM; 2353 /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */ 2354 data = kzalloc(sizeof(*data), GFP_NOFS); 2355 if (!data) 2356 goto clear_layoutcommitting; 2357 2358 status = 0; 2359 spin_lock(&inode->i_lock); 2360 if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) 2361 goto out_unlock; 2362 2363 INIT_LIST_HEAD(&data->lseg_list); 2364 pnfs_list_write_lseg(inode, &data->lseg_list); 2365 2366 end_pos = nfsi->layout->plh_lwb; 2367 2368 nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid); 2369 spin_unlock(&inode->i_lock); 2370 2371 data->args.inode = inode; 2372 data->cred = get_rpccred(nfsi->layout->plh_lc_cred); 2373 nfs_fattr_init(&data->fattr); 2374 data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask; 2375 data->res.fattr = &data->fattr; 2376 data->args.lastbytewritten = end_pos - 1; 2377 data->res.server = NFS_SERVER(inode); 2378 2379 if (ld->prepare_layoutcommit) { 2380 status = ld->prepare_layoutcommit(&data->args); 2381 if (status) { 2382 put_rpccred(data->cred); 2383 spin_lock(&inode->i_lock); 2384 set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags); 2385 if (end_pos > nfsi->layout->plh_lwb) 2386 nfsi->layout->plh_lwb = end_pos; 2387 goto out_unlock; 2388 } 2389 } 2390 2391 2392 status = nfs4_proc_layoutcommit(data, sync); 2393 out: 2394 if (status) 2395 mark_inode_dirty_sync(inode); 2396 dprintk("<-- %s status %d\n", __func__, status); 2397 return status; 2398 out_unlock: 2399 spin_unlock(&inode->i_lock); 2400 kfree(data); 2401 clear_layoutcommitting: 2402 pnfs_clear_layoutcommitting(inode); 2403 goto out; 2404 } 2405 EXPORT_SYMBOL_GPL(pnfs_layoutcommit_inode); 2406 2407 int 2408 pnfs_generic_sync(struct inode *inode, bool datasync) 2409 { 2410 return pnfs_layoutcommit_inode(inode, true); 2411 } 2412 EXPORT_SYMBOL_GPL(pnfs_generic_sync); 2413 2414 struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) 2415 { 2416 struct nfs4_threshold *thp; 2417 2418 thp = kzalloc(sizeof(*thp), GFP_NOFS); 2419 if (!thp) { 2420 dprintk("%s mdsthreshold allocation failed\n", __func__); 2421 return NULL; 2422 } 2423 return thp; 2424 } 2425 2426 #if IS_ENABLED(CONFIG_NFS_V4_2) 2427 int 2428 pnfs_report_layoutstat(struct inode *inode, gfp_t gfp_flags) 2429 { 2430 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 2431 struct nfs_server *server = NFS_SERVER(inode); 2432 struct nfs_inode *nfsi = NFS_I(inode); 2433 struct nfs42_layoutstat_data *data; 2434 struct pnfs_layout_hdr *hdr; 2435 int status = 0; 2436 2437 if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats) 2438 goto out; 2439 2440 if (!nfs_server_capable(inode, NFS_CAP_LAYOUTSTATS)) 2441 goto out; 2442 2443 if (test_and_set_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags)) 2444 goto out; 2445 2446 spin_lock(&inode->i_lock); 2447 if (!NFS_I(inode)->layout) { 2448 spin_unlock(&inode->i_lock); 2449 goto out_clear_layoutstats; 2450 } 2451 hdr = NFS_I(inode)->layout; 2452 pnfs_get_layout_hdr(hdr); 2453 spin_unlock(&inode->i_lock); 2454 2455 data = kzalloc(sizeof(*data), gfp_flags); 2456 if (!data) { 2457 status = -ENOMEM; 2458 goto out_put; 2459 } 2460 2461 data->args.fh = NFS_FH(inode); 2462 data->args.inode = inode; 2463 nfs4_stateid_copy(&data->args.stateid, &hdr->plh_stateid); 2464 status = ld->prepare_layoutstats(&data->args); 2465 if (status) 2466 goto out_free; 2467 2468 status = nfs42_proc_layoutstats_generic(NFS_SERVER(inode), data); 2469 2470 out: 2471 dprintk("%s returns %d\n", __func__, status); 2472 return status; 2473 2474 out_free: 2475 kfree(data); 2476 out_put: 2477 pnfs_put_layout_hdr(hdr); 2478 out_clear_layoutstats: 2479 smp_mb__before_atomic(); 2480 clear_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags); 2481 smp_mb__after_atomic(); 2482 goto out; 2483 } 2484 EXPORT_SYMBOL_GPL(pnfs_report_layoutstat); 2485 #endif 2486 2487 unsigned int layoutstats_timer; 2488 module_param(layoutstats_timer, uint, 0644); 2489 EXPORT_SYMBOL_GPL(layoutstats_timer); 2490