1 /* 2 * pNFS functions to call and manage layout drivers. 3 * 4 * Copyright (c) 2002 [year of first publication] 5 * The Regents of the University of Michigan 6 * All Rights Reserved 7 * 8 * Dean Hildebrand <dhildebz@umich.edu> 9 * 10 * Permission is granted to use, copy, create derivative works, and 11 * redistribute this software and such derivative works for any purpose, 12 * so long as the name of the University of Michigan is not used in 13 * any advertising or publicity pertaining to the use or distribution 14 * of this software without specific, written prior authorization. If 15 * the above copyright notice or any other identification of the 16 * University of Michigan is included in any copy of any portion of 17 * this software, then the disclaimer below must also be included. 18 * 19 * This software is provided as is, without representation or warranty 20 * of any kind either express or implied, including without limitation 21 * the implied warranties of merchantability, fitness for a particular 22 * purpose, or noninfringement. The Regents of the University of 23 * Michigan shall not be liable for any damages, including special, 24 * indirect, incidental, or consequential damages, with respect to any 25 * claim arising out of or in connection with the use of the software, 26 * even if it has been or is hereafter advised of the possibility of 27 * such damages. 28 */ 29 30 #include <linux/nfs_fs.h> 31 #include <linux/nfs_page.h> 32 #include <linux/module.h> 33 #include "internal.h" 34 #include "pnfs.h" 35 #include "iostat.h" 36 #include "nfs4trace.h" 37 #include "delegation.h" 38 #include "nfs42.h" 39 40 #define NFSDBG_FACILITY NFSDBG_PNFS 41 #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) 42 43 /* Locking: 44 * 45 * pnfs_spinlock: 46 * protects pnfs_modules_tbl. 47 */ 48 static DEFINE_SPINLOCK(pnfs_spinlock); 49 50 /* 51 * pnfs_modules_tbl holds all pnfs modules 52 */ 53 static LIST_HEAD(pnfs_modules_tbl); 54 55 static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo); 56 57 /* Return the registered pnfs layout driver module matching given id */ 58 static struct pnfs_layoutdriver_type * 59 find_pnfs_driver_locked(u32 id) 60 { 61 struct pnfs_layoutdriver_type *local; 62 63 list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid) 64 if (local->id == id) 65 goto out; 66 local = NULL; 67 out: 68 dprintk("%s: Searching for id %u, found %p\n", __func__, id, local); 69 return local; 70 } 71 72 static struct pnfs_layoutdriver_type * 73 find_pnfs_driver(u32 id) 74 { 75 struct pnfs_layoutdriver_type *local; 76 77 spin_lock(&pnfs_spinlock); 78 local = find_pnfs_driver_locked(id); 79 if (local != NULL && !try_module_get(local->owner)) { 80 dprintk("%s: Could not grab reference on module\n", __func__); 81 local = NULL; 82 } 83 spin_unlock(&pnfs_spinlock); 84 return local; 85 } 86 87 void 88 unset_pnfs_layoutdriver(struct nfs_server *nfss) 89 { 90 if (nfss->pnfs_curr_ld) { 91 if (nfss->pnfs_curr_ld->clear_layoutdriver) 92 nfss->pnfs_curr_ld->clear_layoutdriver(nfss); 93 /* Decrement the MDS count. Purge the deviceid cache if zero */ 94 if (atomic_dec_and_test(&nfss->nfs_client->cl_mds_count)) 95 nfs4_deviceid_purge_client(nfss->nfs_client); 96 module_put(nfss->pnfs_curr_ld->owner); 97 } 98 nfss->pnfs_curr_ld = NULL; 99 } 100 101 /* 102 * Try to set the server's pnfs module to the pnfs layout type specified by id. 103 * Currently only one pNFS layout driver per filesystem is supported. 104 * 105 * @id layout type. Zero (illegal layout type) indicates pNFS not in use. 106 */ 107 void 108 set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh, 109 u32 id) 110 { 111 struct pnfs_layoutdriver_type *ld_type = NULL; 112 113 if (id == 0) 114 goto out_no_driver; 115 if (!(server->nfs_client->cl_exchange_flags & 116 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) { 117 printk(KERN_ERR "NFS: %s: id %u cl_exchange_flags 0x%x\n", 118 __func__, id, server->nfs_client->cl_exchange_flags); 119 goto out_no_driver; 120 } 121 ld_type = find_pnfs_driver(id); 122 if (!ld_type) { 123 request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id); 124 ld_type = find_pnfs_driver(id); 125 if (!ld_type) { 126 dprintk("%s: No pNFS module found for %u.\n", 127 __func__, id); 128 goto out_no_driver; 129 } 130 } 131 server->pnfs_curr_ld = ld_type; 132 if (ld_type->set_layoutdriver 133 && ld_type->set_layoutdriver(server, mntfh)) { 134 printk(KERN_ERR "NFS: %s: Error initializing pNFS layout " 135 "driver %u.\n", __func__, id); 136 module_put(ld_type->owner); 137 goto out_no_driver; 138 } 139 /* Bump the MDS count */ 140 atomic_inc(&server->nfs_client->cl_mds_count); 141 142 dprintk("%s: pNFS module for %u set\n", __func__, id); 143 return; 144 145 out_no_driver: 146 dprintk("%s: Using NFSv4 I/O\n", __func__); 147 server->pnfs_curr_ld = NULL; 148 } 149 150 int 151 pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type) 152 { 153 int status = -EINVAL; 154 struct pnfs_layoutdriver_type *tmp; 155 156 if (ld_type->id == 0) { 157 printk(KERN_ERR "NFS: %s id 0 is reserved\n", __func__); 158 return status; 159 } 160 if (!ld_type->alloc_lseg || !ld_type->free_lseg) { 161 printk(KERN_ERR "NFS: %s Layout driver must provide " 162 "alloc_lseg and free_lseg.\n", __func__); 163 return status; 164 } 165 166 spin_lock(&pnfs_spinlock); 167 tmp = find_pnfs_driver_locked(ld_type->id); 168 if (!tmp) { 169 list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl); 170 status = 0; 171 dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id, 172 ld_type->name); 173 } else { 174 printk(KERN_ERR "NFS: %s Module with id %d already loaded!\n", 175 __func__, ld_type->id); 176 } 177 spin_unlock(&pnfs_spinlock); 178 179 return status; 180 } 181 EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver); 182 183 void 184 pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type) 185 { 186 dprintk("%s Deregistering id:%u\n", __func__, ld_type->id); 187 spin_lock(&pnfs_spinlock); 188 list_del(&ld_type->pnfs_tblid); 189 spin_unlock(&pnfs_spinlock); 190 } 191 EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); 192 193 /* 194 * pNFS client layout cache 195 */ 196 197 /* Need to hold i_lock if caller does not already hold reference */ 198 void 199 pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo) 200 { 201 atomic_inc(&lo->plh_refcount); 202 } 203 204 static struct pnfs_layout_hdr * 205 pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags) 206 { 207 struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; 208 return ld->alloc_layout_hdr(ino, gfp_flags); 209 } 210 211 static void 212 pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo) 213 { 214 struct nfs_server *server = NFS_SERVER(lo->plh_inode); 215 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; 216 217 if (!list_empty(&lo->plh_layouts)) { 218 struct nfs_client *clp = server->nfs_client; 219 220 spin_lock(&clp->cl_lock); 221 list_del_init(&lo->plh_layouts); 222 spin_unlock(&clp->cl_lock); 223 } 224 put_rpccred(lo->plh_lc_cred); 225 return ld->free_layout_hdr(lo); 226 } 227 228 static void 229 pnfs_detach_layout_hdr(struct pnfs_layout_hdr *lo) 230 { 231 struct nfs_inode *nfsi = NFS_I(lo->plh_inode); 232 dprintk("%s: freeing layout cache %p\n", __func__, lo); 233 nfsi->layout = NULL; 234 /* Reset MDS Threshold I/O counters */ 235 nfsi->write_io = 0; 236 nfsi->read_io = 0; 237 } 238 239 void 240 pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) 241 { 242 struct inode *inode = lo->plh_inode; 243 244 pnfs_layoutreturn_before_put_layout_hdr(lo); 245 246 if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { 247 if (!list_empty(&lo->plh_segs)) 248 WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n"); 249 pnfs_detach_layout_hdr(lo); 250 spin_unlock(&inode->i_lock); 251 pnfs_free_layout_hdr(lo); 252 } 253 } 254 255 /* 256 * Mark a pnfs_layout_hdr and all associated layout segments as invalid 257 * 258 * In order to continue using the pnfs_layout_hdr, a full recovery 259 * is required. 260 * Note that caller must hold inode->i_lock. 261 */ 262 int 263 pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, 264 struct list_head *lseg_list) 265 { 266 struct pnfs_layout_range range = { 267 .iomode = IOMODE_ANY, 268 .offset = 0, 269 .length = NFS4_MAX_UINT64, 270 }; 271 272 set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); 273 return pnfs_mark_matching_lsegs_invalid(lo, lseg_list, &range, 0); 274 } 275 276 static int 277 pnfs_iomode_to_fail_bit(u32 iomode) 278 { 279 return iomode == IOMODE_RW ? 280 NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED; 281 } 282 283 static void 284 pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) 285 { 286 lo->plh_retry_timestamp = jiffies; 287 if (!test_and_set_bit(fail_bit, &lo->plh_flags)) 288 atomic_inc(&lo->plh_refcount); 289 } 290 291 static void 292 pnfs_layout_clear_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) 293 { 294 if (test_and_clear_bit(fail_bit, &lo->plh_flags)) 295 atomic_dec(&lo->plh_refcount); 296 } 297 298 static void 299 pnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode) 300 { 301 struct inode *inode = lo->plh_inode; 302 struct pnfs_layout_range range = { 303 .iomode = iomode, 304 .offset = 0, 305 .length = NFS4_MAX_UINT64, 306 }; 307 LIST_HEAD(head); 308 309 spin_lock(&inode->i_lock); 310 pnfs_layout_set_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); 311 pnfs_mark_matching_lsegs_invalid(lo, &head, &range, 0); 312 spin_unlock(&inode->i_lock); 313 pnfs_free_lseg_list(&head); 314 dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__, 315 iomode == IOMODE_RW ? "RW" : "READ"); 316 } 317 318 static bool 319 pnfs_layout_io_test_failed(struct pnfs_layout_hdr *lo, u32 iomode) 320 { 321 unsigned long start, end; 322 int fail_bit = pnfs_iomode_to_fail_bit(iomode); 323 324 if (test_bit(fail_bit, &lo->plh_flags) == 0) 325 return false; 326 end = jiffies; 327 start = end - PNFS_LAYOUTGET_RETRY_TIMEOUT; 328 if (!time_in_range(lo->plh_retry_timestamp, start, end)) { 329 /* It is time to retry the failed layoutgets */ 330 pnfs_layout_clear_fail_bit(lo, fail_bit); 331 return false; 332 } 333 return true; 334 } 335 336 static void 337 pnfs_init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg, 338 const struct pnfs_layout_range *range, 339 const nfs4_stateid *stateid) 340 { 341 INIT_LIST_HEAD(&lseg->pls_list); 342 INIT_LIST_HEAD(&lseg->pls_lc_list); 343 atomic_set(&lseg->pls_refcount, 1); 344 set_bit(NFS_LSEG_VALID, &lseg->pls_flags); 345 lseg->pls_layout = lo; 346 lseg->pls_range = *range; 347 lseg->pls_seq = be32_to_cpu(stateid->seqid); 348 } 349 350 static void pnfs_free_lseg(struct pnfs_layout_segment *lseg) 351 { 352 struct inode *ino = lseg->pls_layout->plh_inode; 353 354 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); 355 } 356 357 static void 358 pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, 359 struct pnfs_layout_segment *lseg) 360 { 361 struct inode *inode = lo->plh_inode; 362 363 WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 364 list_del_init(&lseg->pls_list); 365 /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ 366 atomic_dec(&lo->plh_refcount); 367 if (list_empty(&lo->plh_segs)) { 368 set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); 369 clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); 370 } 371 rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); 372 } 373 374 void 375 pnfs_put_lseg(struct pnfs_layout_segment *lseg) 376 { 377 struct pnfs_layout_hdr *lo; 378 struct inode *inode; 379 380 if (!lseg) 381 return; 382 383 dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, 384 atomic_read(&lseg->pls_refcount), 385 test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 386 387 lo = lseg->pls_layout; 388 inode = lo->plh_inode; 389 390 if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { 391 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { 392 spin_unlock(&inode->i_lock); 393 return; 394 } 395 pnfs_get_layout_hdr(lo); 396 pnfs_layout_remove_lseg(lo, lseg); 397 spin_unlock(&inode->i_lock); 398 pnfs_free_lseg(lseg); 399 pnfs_put_layout_hdr(lo); 400 } 401 } 402 EXPORT_SYMBOL_GPL(pnfs_put_lseg); 403 404 static void pnfs_free_lseg_async_work(struct work_struct *work) 405 { 406 struct pnfs_layout_segment *lseg; 407 struct pnfs_layout_hdr *lo; 408 409 lseg = container_of(work, struct pnfs_layout_segment, pls_work); 410 lo = lseg->pls_layout; 411 412 pnfs_free_lseg(lseg); 413 pnfs_put_layout_hdr(lo); 414 } 415 416 static void pnfs_free_lseg_async(struct pnfs_layout_segment *lseg) 417 { 418 INIT_WORK(&lseg->pls_work, pnfs_free_lseg_async_work); 419 schedule_work(&lseg->pls_work); 420 } 421 422 void 423 pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg) 424 { 425 if (!lseg) 426 return; 427 428 assert_spin_locked(&lseg->pls_layout->plh_inode->i_lock); 429 430 dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, 431 atomic_read(&lseg->pls_refcount), 432 test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 433 if (atomic_dec_and_test(&lseg->pls_refcount)) { 434 struct pnfs_layout_hdr *lo = lseg->pls_layout; 435 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) 436 return; 437 pnfs_get_layout_hdr(lo); 438 pnfs_layout_remove_lseg(lo, lseg); 439 pnfs_free_lseg_async(lseg); 440 } 441 } 442 EXPORT_SYMBOL_GPL(pnfs_put_lseg_locked); 443 444 static u64 445 end_offset(u64 start, u64 len) 446 { 447 u64 end; 448 449 end = start + len; 450 return end >= start ? end : NFS4_MAX_UINT64; 451 } 452 453 /* 454 * is l2 fully contained in l1? 455 * start1 end1 456 * [----------------------------------) 457 * start2 end2 458 * [----------------) 459 */ 460 static bool 461 pnfs_lseg_range_contained(const struct pnfs_layout_range *l1, 462 const struct pnfs_layout_range *l2) 463 { 464 u64 start1 = l1->offset; 465 u64 end1 = end_offset(start1, l1->length); 466 u64 start2 = l2->offset; 467 u64 end2 = end_offset(start2, l2->length); 468 469 return (start1 <= start2) && (end1 >= end2); 470 } 471 472 /* 473 * is l1 and l2 intersecting? 474 * start1 end1 475 * [----------------------------------) 476 * start2 end2 477 * [----------------) 478 */ 479 static bool 480 pnfs_lseg_range_intersecting(const struct pnfs_layout_range *l1, 481 const struct pnfs_layout_range *l2) 482 { 483 u64 start1 = l1->offset; 484 u64 end1 = end_offset(start1, l1->length); 485 u64 start2 = l2->offset; 486 u64 end2 = end_offset(start2, l2->length); 487 488 return (end1 == NFS4_MAX_UINT64 || end1 > start2) && 489 (end2 == NFS4_MAX_UINT64 || end2 > start1); 490 } 491 492 static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, 493 struct list_head *tmp_list) 494 { 495 if (!atomic_dec_and_test(&lseg->pls_refcount)) 496 return false; 497 pnfs_layout_remove_lseg(lseg->pls_layout, lseg); 498 list_add(&lseg->pls_list, tmp_list); 499 return true; 500 } 501 502 /* Returns 1 if lseg is removed from list, 0 otherwise */ 503 static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, 504 struct list_head *tmp_list) 505 { 506 int rv = 0; 507 508 if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { 509 /* Remove the reference keeping the lseg in the 510 * list. It will now be removed when all 511 * outstanding io is finished. 512 */ 513 dprintk("%s: lseg %p ref %d\n", __func__, lseg, 514 atomic_read(&lseg->pls_refcount)); 515 if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list)) 516 rv = 1; 517 } 518 return rv; 519 } 520 521 /* 522 * Compare 2 layout stateid sequence ids, to see which is newer, 523 * taking into account wraparound issues. 524 */ 525 static bool pnfs_seqid_is_newer(u32 s1, u32 s2) 526 { 527 return (s32)(s1 - s2) > 0; 528 } 529 530 static bool 531 pnfs_should_free_range(const struct pnfs_layout_range *lseg_range, 532 const struct pnfs_layout_range *recall_range) 533 { 534 return (recall_range->iomode == IOMODE_ANY || 535 lseg_range->iomode == recall_range->iomode) && 536 pnfs_lseg_range_intersecting(lseg_range, recall_range); 537 } 538 539 static bool 540 pnfs_match_lseg_recall(const struct pnfs_layout_segment *lseg, 541 const struct pnfs_layout_range *recall_range, 542 u32 seq) 543 { 544 if (seq != 0 && pnfs_seqid_is_newer(lseg->pls_seq, seq)) 545 return false; 546 if (recall_range == NULL) 547 return true; 548 return pnfs_should_free_range(&lseg->pls_range, recall_range); 549 } 550 551 /** 552 * pnfs_mark_matching_lsegs_invalid - tear down lsegs or mark them for later 553 * @lo: layout header containing the lsegs 554 * @tmp_list: list head where doomed lsegs should go 555 * @recall_range: optional recall range argument to match (may be NULL) 556 * @seq: only invalidate lsegs obtained prior to this sequence (may be 0) 557 * 558 * Walk the list of lsegs in the layout header, and tear down any that should 559 * be destroyed. If "recall_range" is specified then the segment must match 560 * that range. If "seq" is non-zero, then only match segments that were handed 561 * out at or before that sequence. 562 * 563 * Returns number of matching invalid lsegs remaining in list after scanning 564 * it and purging them. 565 */ 566 int 567 pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, 568 struct list_head *tmp_list, 569 const struct pnfs_layout_range *recall_range, 570 u32 seq) 571 { 572 struct pnfs_layout_segment *lseg, *next; 573 int remaining = 0; 574 575 dprintk("%s:Begin lo %p\n", __func__, lo); 576 577 if (list_empty(&lo->plh_segs)) 578 return 0; 579 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) 580 if (pnfs_match_lseg_recall(lseg, recall_range, seq)) { 581 dprintk("%s: freeing lseg %p iomode %d seq %u" 582 "offset %llu length %llu\n", __func__, 583 lseg, lseg->pls_range.iomode, lseg->pls_seq, 584 lseg->pls_range.offset, lseg->pls_range.length); 585 if (!mark_lseg_invalid(lseg, tmp_list)) 586 remaining++; 587 } 588 dprintk("%s:Return %i\n", __func__, remaining); 589 return remaining; 590 } 591 592 /* note free_me must contain lsegs from a single layout_hdr */ 593 void 594 pnfs_free_lseg_list(struct list_head *free_me) 595 { 596 struct pnfs_layout_segment *lseg, *tmp; 597 598 if (list_empty(free_me)) 599 return; 600 601 list_for_each_entry_safe(lseg, tmp, free_me, pls_list) { 602 list_del(&lseg->pls_list); 603 pnfs_free_lseg(lseg); 604 } 605 } 606 607 void 608 pnfs_destroy_layout(struct nfs_inode *nfsi) 609 { 610 struct pnfs_layout_hdr *lo; 611 LIST_HEAD(tmp_list); 612 613 spin_lock(&nfsi->vfs_inode.i_lock); 614 lo = nfsi->layout; 615 if (lo) { 616 pnfs_get_layout_hdr(lo); 617 pnfs_mark_layout_stateid_invalid(lo, &tmp_list); 618 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); 619 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); 620 spin_unlock(&nfsi->vfs_inode.i_lock); 621 pnfs_free_lseg_list(&tmp_list); 622 pnfs_put_layout_hdr(lo); 623 } else 624 spin_unlock(&nfsi->vfs_inode.i_lock); 625 } 626 EXPORT_SYMBOL_GPL(pnfs_destroy_layout); 627 628 static bool 629 pnfs_layout_add_bulk_destroy_list(struct inode *inode, 630 struct list_head *layout_list) 631 { 632 struct pnfs_layout_hdr *lo; 633 bool ret = false; 634 635 spin_lock(&inode->i_lock); 636 lo = NFS_I(inode)->layout; 637 if (lo != NULL && list_empty(&lo->plh_bulk_destroy)) { 638 pnfs_get_layout_hdr(lo); 639 list_add(&lo->plh_bulk_destroy, layout_list); 640 ret = true; 641 } 642 spin_unlock(&inode->i_lock); 643 return ret; 644 } 645 646 /* Caller must hold rcu_read_lock and clp->cl_lock */ 647 static int 648 pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp, 649 struct nfs_server *server, 650 struct list_head *layout_list) 651 { 652 struct pnfs_layout_hdr *lo, *next; 653 struct inode *inode; 654 655 list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) { 656 inode = igrab(lo->plh_inode); 657 if (inode == NULL) 658 continue; 659 list_del_init(&lo->plh_layouts); 660 if (pnfs_layout_add_bulk_destroy_list(inode, layout_list)) 661 continue; 662 rcu_read_unlock(); 663 spin_unlock(&clp->cl_lock); 664 iput(inode); 665 spin_lock(&clp->cl_lock); 666 rcu_read_lock(); 667 return -EAGAIN; 668 } 669 return 0; 670 } 671 672 static int 673 pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, 674 bool is_bulk_recall) 675 { 676 struct pnfs_layout_hdr *lo; 677 struct inode *inode; 678 LIST_HEAD(lseg_list); 679 int ret = 0; 680 681 while (!list_empty(layout_list)) { 682 lo = list_entry(layout_list->next, struct pnfs_layout_hdr, 683 plh_bulk_destroy); 684 dprintk("%s freeing layout for inode %lu\n", __func__, 685 lo->plh_inode->i_ino); 686 inode = lo->plh_inode; 687 688 pnfs_layoutcommit_inode(inode, false); 689 690 spin_lock(&inode->i_lock); 691 list_del_init(&lo->plh_bulk_destroy); 692 if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) { 693 if (is_bulk_recall) 694 set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); 695 ret = -EAGAIN; 696 } 697 spin_unlock(&inode->i_lock); 698 pnfs_free_lseg_list(&lseg_list); 699 /* Free all lsegs that are attached to commit buckets */ 700 nfs_commit_inode(inode, 0); 701 pnfs_put_layout_hdr(lo); 702 iput(inode); 703 } 704 return ret; 705 } 706 707 int 708 pnfs_destroy_layouts_byfsid(struct nfs_client *clp, 709 struct nfs_fsid *fsid, 710 bool is_recall) 711 { 712 struct nfs_server *server; 713 LIST_HEAD(layout_list); 714 715 spin_lock(&clp->cl_lock); 716 rcu_read_lock(); 717 restart: 718 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 719 if (memcmp(&server->fsid, fsid, sizeof(*fsid)) != 0) 720 continue; 721 if (pnfs_layout_bulk_destroy_byserver_locked(clp, 722 server, 723 &layout_list) != 0) 724 goto restart; 725 } 726 rcu_read_unlock(); 727 spin_unlock(&clp->cl_lock); 728 729 if (list_empty(&layout_list)) 730 return 0; 731 return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); 732 } 733 734 int 735 pnfs_destroy_layouts_byclid(struct nfs_client *clp, 736 bool is_recall) 737 { 738 struct nfs_server *server; 739 LIST_HEAD(layout_list); 740 741 spin_lock(&clp->cl_lock); 742 rcu_read_lock(); 743 restart: 744 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 745 if (pnfs_layout_bulk_destroy_byserver_locked(clp, 746 server, 747 &layout_list) != 0) 748 goto restart; 749 } 750 rcu_read_unlock(); 751 spin_unlock(&clp->cl_lock); 752 753 if (list_empty(&layout_list)) 754 return 0; 755 return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); 756 } 757 758 /* 759 * Called by the state manger to remove all layouts established under an 760 * expired lease. 761 */ 762 void 763 pnfs_destroy_all_layouts(struct nfs_client *clp) 764 { 765 nfs4_deviceid_mark_client_invalid(clp); 766 nfs4_deviceid_purge_client(clp); 767 768 pnfs_destroy_layouts_byclid(clp, false); 769 } 770 771 /* update lo->plh_stateid with new if is more recent */ 772 void 773 pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, 774 bool update_barrier) 775 { 776 u32 oldseq, newseq, new_barrier = 0; 777 bool invalid = !pnfs_layout_is_valid(lo); 778 779 oldseq = be32_to_cpu(lo->plh_stateid.seqid); 780 newseq = be32_to_cpu(new->seqid); 781 if (invalid || pnfs_seqid_is_newer(newseq, oldseq)) { 782 nfs4_stateid_copy(&lo->plh_stateid, new); 783 /* 784 * Because of wraparound, we want to keep the barrier 785 * "close" to the current seqids. 786 */ 787 new_barrier = newseq - atomic_read(&lo->plh_outstanding); 788 } 789 if (update_barrier) 790 new_barrier = be32_to_cpu(new->seqid); 791 else if (new_barrier == 0) 792 return; 793 if (invalid || pnfs_seqid_is_newer(new_barrier, lo->plh_barrier)) 794 lo->plh_barrier = new_barrier; 795 } 796 797 static bool 798 pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo, 799 const nfs4_stateid *stateid) 800 { 801 u32 seqid = be32_to_cpu(stateid->seqid); 802 803 return !pnfs_seqid_is_newer(seqid, lo->plh_barrier); 804 } 805 806 /* lget is set to 1 if called from inside send_layoutget call chain */ 807 static bool 808 pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo) 809 { 810 return lo->plh_block_lgets || 811 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); 812 } 813 814 /* 815 * Get layout from server. 816 * for now, assume that whole file layouts are requested. 817 * arg->offset: 0 818 * arg->length: all ones 819 */ 820 static struct pnfs_layout_segment * 821 send_layoutget(struct pnfs_layout_hdr *lo, 822 struct nfs_open_context *ctx, 823 nfs4_stateid *stateid, 824 const struct pnfs_layout_range *range, 825 long *timeout, gfp_t gfp_flags) 826 { 827 struct inode *ino = lo->plh_inode; 828 struct nfs_server *server = NFS_SERVER(ino); 829 struct nfs4_layoutget *lgp; 830 loff_t i_size; 831 832 dprintk("--> %s\n", __func__); 833 834 /* 835 * Synchronously retrieve layout information from server and 836 * store in lseg. If we race with a concurrent seqid morphing 837 * op, then re-send the LAYOUTGET. 838 */ 839 lgp = kzalloc(sizeof(*lgp), gfp_flags); 840 if (lgp == NULL) 841 return ERR_PTR(-ENOMEM); 842 843 i_size = i_size_read(ino); 844 845 lgp->args.minlength = PAGE_SIZE; 846 if (lgp->args.minlength > range->length) 847 lgp->args.minlength = range->length; 848 if (range->iomode == IOMODE_READ) { 849 if (range->offset >= i_size) 850 lgp->args.minlength = 0; 851 else if (i_size - range->offset < lgp->args.minlength) 852 lgp->args.minlength = i_size - range->offset; 853 } 854 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; 855 pnfs_copy_range(&lgp->args.range, range); 856 lgp->args.type = server->pnfs_curr_ld->id; 857 lgp->args.inode = ino; 858 lgp->args.ctx = get_nfs_open_context(ctx); 859 nfs4_stateid_copy(&lgp->args.stateid, stateid); 860 lgp->gfp_flags = gfp_flags; 861 lgp->cred = lo->plh_lc_cred; 862 863 return nfs4_proc_layoutget(lgp, timeout, gfp_flags); 864 } 865 866 static void pnfs_clear_layoutcommit(struct inode *inode, 867 struct list_head *head) 868 { 869 struct nfs_inode *nfsi = NFS_I(inode); 870 struct pnfs_layout_segment *lseg, *tmp; 871 872 if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) 873 return; 874 list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) { 875 if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) 876 continue; 877 pnfs_lseg_dec_and_remove_zero(lseg, head); 878 } 879 } 880 881 void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) 882 { 883 clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); 884 smp_mb__after_atomic(); 885 wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); 886 rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); 887 } 888 889 static void 890 pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo) 891 { 892 lo->plh_return_iomode = 0; 893 lo->plh_return_seq = 0; 894 clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); 895 } 896 897 static bool 898 pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo, 899 nfs4_stateid *stateid, 900 enum pnfs_iomode *iomode) 901 { 902 if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) 903 return false; 904 pnfs_get_layout_hdr(lo); 905 if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) { 906 if (stateid != NULL) { 907 nfs4_stateid_copy(stateid, &lo->plh_stateid); 908 if (lo->plh_return_seq != 0) 909 stateid->seqid = cpu_to_be32(lo->plh_return_seq); 910 } 911 if (iomode != NULL) 912 *iomode = lo->plh_return_iomode; 913 pnfs_clear_layoutreturn_info(lo); 914 return true; 915 } 916 if (stateid != NULL) 917 nfs4_stateid_copy(stateid, &lo->plh_stateid); 918 if (iomode != NULL) 919 *iomode = IOMODE_ANY; 920 return true; 921 } 922 923 static int 924 pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, 925 enum pnfs_iomode iomode, bool sync) 926 { 927 struct inode *ino = lo->plh_inode; 928 struct nfs4_layoutreturn *lrp; 929 int status = 0; 930 931 lrp = kzalloc(sizeof(*lrp), GFP_NOFS); 932 if (unlikely(lrp == NULL)) { 933 status = -ENOMEM; 934 spin_lock(&ino->i_lock); 935 pnfs_clear_layoutreturn_waitbit(lo); 936 spin_unlock(&ino->i_lock); 937 pnfs_put_layout_hdr(lo); 938 goto out; 939 } 940 941 nfs4_stateid_copy(&lrp->args.stateid, stateid); 942 lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; 943 lrp->args.inode = ino; 944 lrp->args.range.iomode = iomode; 945 lrp->args.range.offset = 0; 946 lrp->args.range.length = NFS4_MAX_UINT64; 947 lrp->args.layout = lo; 948 lrp->clp = NFS_SERVER(ino)->nfs_client; 949 lrp->cred = lo->plh_lc_cred; 950 951 status = nfs4_proc_layoutreturn(lrp, sync); 952 out: 953 dprintk("<-- %s status: %d\n", __func__, status); 954 return status; 955 } 956 957 /* Return true if layoutreturn is needed */ 958 static bool 959 pnfs_layout_need_return(struct pnfs_layout_hdr *lo) 960 { 961 struct pnfs_layout_segment *s; 962 963 if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) 964 return false; 965 966 /* Defer layoutreturn until all lsegs are done */ 967 list_for_each_entry(s, &lo->plh_segs, pls_list) { 968 if (test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags)) 969 return false; 970 } 971 972 return true; 973 } 974 975 static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo) 976 { 977 struct inode *inode= lo->plh_inode; 978 979 if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) 980 return; 981 spin_lock(&inode->i_lock); 982 if (pnfs_layout_need_return(lo)) { 983 nfs4_stateid stateid; 984 enum pnfs_iomode iomode; 985 bool send; 986 987 send = pnfs_prepare_layoutreturn(lo, &stateid, &iomode); 988 spin_unlock(&inode->i_lock); 989 if (send) { 990 /* Send an async layoutreturn so we dont deadlock */ 991 pnfs_send_layoutreturn(lo, &stateid, iomode, false); 992 } 993 } else 994 spin_unlock(&inode->i_lock); 995 } 996 997 /* 998 * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr 999 * when the layout segment list is empty. 1000 * 1001 * Note that a pnfs_layout_hdr can exist with an empty layout segment 1002 * list when LAYOUTGET has failed, or when LAYOUTGET succeeded, but the 1003 * deviceid is marked invalid. 1004 */ 1005 int 1006 _pnfs_return_layout(struct inode *ino) 1007 { 1008 struct pnfs_layout_hdr *lo = NULL; 1009 struct nfs_inode *nfsi = NFS_I(ino); 1010 LIST_HEAD(tmp_list); 1011 nfs4_stateid stateid; 1012 int status = 0, empty; 1013 bool send; 1014 1015 dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino); 1016 1017 spin_lock(&ino->i_lock); 1018 lo = nfsi->layout; 1019 if (!lo) { 1020 spin_unlock(&ino->i_lock); 1021 dprintk("NFS: %s no layout to return\n", __func__); 1022 goto out; 1023 } 1024 /* Reference matched in nfs4_layoutreturn_release */ 1025 pnfs_get_layout_hdr(lo); 1026 empty = list_empty(&lo->plh_segs); 1027 pnfs_clear_layoutcommit(ino, &tmp_list); 1028 pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL, 0); 1029 1030 if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) { 1031 struct pnfs_layout_range range = { 1032 .iomode = IOMODE_ANY, 1033 .offset = 0, 1034 .length = NFS4_MAX_UINT64, 1035 }; 1036 NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, &range); 1037 } 1038 1039 /* Don't send a LAYOUTRETURN if list was initially empty */ 1040 if (empty) { 1041 spin_unlock(&ino->i_lock); 1042 dprintk("NFS: %s no layout segments to return\n", __func__); 1043 goto out_put_layout_hdr; 1044 } 1045 1046 send = pnfs_prepare_layoutreturn(lo, &stateid, NULL); 1047 spin_unlock(&ino->i_lock); 1048 pnfs_free_lseg_list(&tmp_list); 1049 if (send) 1050 status = pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true); 1051 out_put_layout_hdr: 1052 pnfs_put_layout_hdr(lo); 1053 out: 1054 dprintk("<-- %s status: %d\n", __func__, status); 1055 return status; 1056 } 1057 EXPORT_SYMBOL_GPL(_pnfs_return_layout); 1058 1059 int 1060 pnfs_commit_and_return_layout(struct inode *inode) 1061 { 1062 struct pnfs_layout_hdr *lo; 1063 int ret; 1064 1065 spin_lock(&inode->i_lock); 1066 lo = NFS_I(inode)->layout; 1067 if (lo == NULL) { 1068 spin_unlock(&inode->i_lock); 1069 return 0; 1070 } 1071 pnfs_get_layout_hdr(lo); 1072 /* Block new layoutgets and read/write to ds */ 1073 lo->plh_block_lgets++; 1074 spin_unlock(&inode->i_lock); 1075 filemap_fdatawait(inode->i_mapping); 1076 ret = pnfs_layoutcommit_inode(inode, true); 1077 if (ret == 0) 1078 ret = _pnfs_return_layout(inode); 1079 spin_lock(&inode->i_lock); 1080 lo->plh_block_lgets--; 1081 spin_unlock(&inode->i_lock); 1082 pnfs_put_layout_hdr(lo); 1083 return ret; 1084 } 1085 1086 bool pnfs_roc(struct inode *ino) 1087 { 1088 struct nfs_inode *nfsi = NFS_I(ino); 1089 struct nfs_open_context *ctx; 1090 struct nfs4_state *state; 1091 struct pnfs_layout_hdr *lo; 1092 struct pnfs_layout_segment *lseg, *tmp; 1093 nfs4_stateid stateid; 1094 LIST_HEAD(tmp_list); 1095 bool found = false, layoutreturn = false, roc = false; 1096 1097 spin_lock(&ino->i_lock); 1098 lo = nfsi->layout; 1099 if (!lo || test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) 1100 goto out_noroc; 1101 1102 /* no roc if we hold a delegation */ 1103 if (nfs4_check_delegation(ino, FMODE_READ)) 1104 goto out_noroc; 1105 1106 list_for_each_entry(ctx, &nfsi->open_files, list) { 1107 state = ctx->state; 1108 /* Don't return layout if there is open file state */ 1109 if (state != NULL && state->state != 0) 1110 goto out_noroc; 1111 } 1112 1113 /* always send layoutreturn if being marked so */ 1114 if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) 1115 layoutreturn = pnfs_prepare_layoutreturn(lo, 1116 &stateid, NULL); 1117 1118 list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) 1119 /* If we are sending layoutreturn, invalidate all valid lsegs */ 1120 if (layoutreturn || test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { 1121 mark_lseg_invalid(lseg, &tmp_list); 1122 found = true; 1123 } 1124 /* ROC in two conditions: 1125 * 1. there are ROC lsegs 1126 * 2. we don't send layoutreturn 1127 */ 1128 if (found && !layoutreturn) { 1129 /* lo ref dropped in pnfs_roc_release() */ 1130 pnfs_get_layout_hdr(lo); 1131 roc = true; 1132 } 1133 1134 out_noroc: 1135 spin_unlock(&ino->i_lock); 1136 pnfs_free_lseg_list(&tmp_list); 1137 pnfs_layoutcommit_inode(ino, true); 1138 if (layoutreturn) 1139 pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true); 1140 return roc; 1141 } 1142 1143 void pnfs_roc_release(struct inode *ino) 1144 { 1145 struct pnfs_layout_hdr *lo; 1146 1147 spin_lock(&ino->i_lock); 1148 lo = NFS_I(ino)->layout; 1149 pnfs_clear_layoutreturn_waitbit(lo); 1150 if (atomic_dec_and_test(&lo->plh_refcount)) { 1151 pnfs_detach_layout_hdr(lo); 1152 spin_unlock(&ino->i_lock); 1153 pnfs_free_layout_hdr(lo); 1154 } else 1155 spin_unlock(&ino->i_lock); 1156 } 1157 1158 void pnfs_roc_set_barrier(struct inode *ino, u32 barrier) 1159 { 1160 struct pnfs_layout_hdr *lo; 1161 1162 spin_lock(&ino->i_lock); 1163 lo = NFS_I(ino)->layout; 1164 if (pnfs_seqid_is_newer(barrier, lo->plh_barrier)) 1165 lo->plh_barrier = barrier; 1166 spin_unlock(&ino->i_lock); 1167 trace_nfs4_layoutreturn_on_close(ino, 0); 1168 } 1169 1170 void pnfs_roc_get_barrier(struct inode *ino, u32 *barrier) 1171 { 1172 struct nfs_inode *nfsi = NFS_I(ino); 1173 struct pnfs_layout_hdr *lo; 1174 u32 current_seqid; 1175 1176 spin_lock(&ino->i_lock); 1177 lo = nfsi->layout; 1178 current_seqid = be32_to_cpu(lo->plh_stateid.seqid); 1179 1180 /* Since close does not return a layout stateid for use as 1181 * a barrier, we choose the worst-case barrier. 1182 */ 1183 *barrier = current_seqid + atomic_read(&lo->plh_outstanding); 1184 spin_unlock(&ino->i_lock); 1185 } 1186 1187 bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task) 1188 { 1189 struct nfs_inode *nfsi = NFS_I(ino); 1190 struct pnfs_layout_hdr *lo; 1191 bool sleep = false; 1192 1193 /* we might not have grabbed lo reference. so need to check under 1194 * i_lock */ 1195 spin_lock(&ino->i_lock); 1196 lo = nfsi->layout; 1197 if (lo && test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) 1198 sleep = true; 1199 spin_unlock(&ino->i_lock); 1200 1201 if (sleep) 1202 rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); 1203 1204 return sleep; 1205 } 1206 1207 /* 1208 * Compare two layout segments for sorting into layout cache. 1209 * We want to preferentially return RW over RO layouts, so ensure those 1210 * are seen first. 1211 */ 1212 static s64 1213 pnfs_lseg_range_cmp(const struct pnfs_layout_range *l1, 1214 const struct pnfs_layout_range *l2) 1215 { 1216 s64 d; 1217 1218 /* high offset > low offset */ 1219 d = l1->offset - l2->offset; 1220 if (d) 1221 return d; 1222 1223 /* short length > long length */ 1224 d = l2->length - l1->length; 1225 if (d) 1226 return d; 1227 1228 /* read > read/write */ 1229 return (int)(l1->iomode == IOMODE_READ) - (int)(l2->iomode == IOMODE_READ); 1230 } 1231 1232 static bool 1233 pnfs_lseg_range_is_after(const struct pnfs_layout_range *l1, 1234 const struct pnfs_layout_range *l2) 1235 { 1236 return pnfs_lseg_range_cmp(l1, l2) > 0; 1237 } 1238 1239 static bool 1240 pnfs_lseg_no_merge(struct pnfs_layout_segment *lseg, 1241 struct pnfs_layout_segment *old) 1242 { 1243 return false; 1244 } 1245 1246 void 1247 pnfs_generic_layout_insert_lseg(struct pnfs_layout_hdr *lo, 1248 struct pnfs_layout_segment *lseg, 1249 bool (*is_after)(const struct pnfs_layout_range *, 1250 const struct pnfs_layout_range *), 1251 bool (*do_merge)(struct pnfs_layout_segment *, 1252 struct pnfs_layout_segment *), 1253 struct list_head *free_me) 1254 { 1255 struct pnfs_layout_segment *lp, *tmp; 1256 1257 dprintk("%s:Begin\n", __func__); 1258 1259 list_for_each_entry_safe(lp, tmp, &lo->plh_segs, pls_list) { 1260 if (test_bit(NFS_LSEG_VALID, &lp->pls_flags) == 0) 1261 continue; 1262 if (do_merge(lseg, lp)) { 1263 mark_lseg_invalid(lp, free_me); 1264 continue; 1265 } 1266 if (is_after(&lseg->pls_range, &lp->pls_range)) 1267 continue; 1268 list_add_tail(&lseg->pls_list, &lp->pls_list); 1269 dprintk("%s: inserted lseg %p " 1270 "iomode %d offset %llu length %llu before " 1271 "lp %p iomode %d offset %llu length %llu\n", 1272 __func__, lseg, lseg->pls_range.iomode, 1273 lseg->pls_range.offset, lseg->pls_range.length, 1274 lp, lp->pls_range.iomode, lp->pls_range.offset, 1275 lp->pls_range.length); 1276 goto out; 1277 } 1278 list_add_tail(&lseg->pls_list, &lo->plh_segs); 1279 dprintk("%s: inserted lseg %p " 1280 "iomode %d offset %llu length %llu at tail\n", 1281 __func__, lseg, lseg->pls_range.iomode, 1282 lseg->pls_range.offset, lseg->pls_range.length); 1283 out: 1284 pnfs_get_layout_hdr(lo); 1285 1286 dprintk("%s:Return\n", __func__); 1287 } 1288 EXPORT_SYMBOL_GPL(pnfs_generic_layout_insert_lseg); 1289 1290 static void 1291 pnfs_layout_insert_lseg(struct pnfs_layout_hdr *lo, 1292 struct pnfs_layout_segment *lseg, 1293 struct list_head *free_me) 1294 { 1295 struct inode *inode = lo->plh_inode; 1296 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 1297 1298 if (ld->add_lseg != NULL) 1299 ld->add_lseg(lo, lseg, free_me); 1300 else 1301 pnfs_generic_layout_insert_lseg(lo, lseg, 1302 pnfs_lseg_range_is_after, 1303 pnfs_lseg_no_merge, 1304 free_me); 1305 } 1306 1307 static struct pnfs_layout_hdr * 1308 alloc_init_layout_hdr(struct inode *ino, 1309 struct nfs_open_context *ctx, 1310 gfp_t gfp_flags) 1311 { 1312 struct pnfs_layout_hdr *lo; 1313 1314 lo = pnfs_alloc_layout_hdr(ino, gfp_flags); 1315 if (!lo) 1316 return NULL; 1317 atomic_set(&lo->plh_refcount, 1); 1318 INIT_LIST_HEAD(&lo->plh_layouts); 1319 INIT_LIST_HEAD(&lo->plh_segs); 1320 INIT_LIST_HEAD(&lo->plh_bulk_destroy); 1321 lo->plh_inode = ino; 1322 lo->plh_lc_cred = get_rpccred(ctx->cred); 1323 lo->plh_flags |= 1 << NFS_LAYOUT_INVALID_STID; 1324 return lo; 1325 } 1326 1327 static struct pnfs_layout_hdr * 1328 pnfs_find_alloc_layout(struct inode *ino, 1329 struct nfs_open_context *ctx, 1330 gfp_t gfp_flags) 1331 __releases(&ino->i_lock) 1332 __acquires(&ino->i_lock) 1333 { 1334 struct nfs_inode *nfsi = NFS_I(ino); 1335 struct pnfs_layout_hdr *new = NULL; 1336 1337 dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout); 1338 1339 if (nfsi->layout != NULL) 1340 goto out_existing; 1341 spin_unlock(&ino->i_lock); 1342 new = alloc_init_layout_hdr(ino, ctx, gfp_flags); 1343 spin_lock(&ino->i_lock); 1344 1345 if (likely(nfsi->layout == NULL)) { /* Won the race? */ 1346 nfsi->layout = new; 1347 return new; 1348 } else if (new != NULL) 1349 pnfs_free_layout_hdr(new); 1350 out_existing: 1351 pnfs_get_layout_hdr(nfsi->layout); 1352 return nfsi->layout; 1353 } 1354 1355 /* 1356 * iomode matching rules: 1357 * iomode lseg strict match 1358 * iomode 1359 * ----- ----- ------ ----- 1360 * ANY READ N/A true 1361 * ANY RW N/A true 1362 * RW READ N/A false 1363 * RW RW N/A true 1364 * READ READ N/A true 1365 * READ RW true false 1366 * READ RW false true 1367 */ 1368 static bool 1369 pnfs_lseg_range_match(const struct pnfs_layout_range *ls_range, 1370 const struct pnfs_layout_range *range, 1371 bool strict_iomode) 1372 { 1373 struct pnfs_layout_range range1; 1374 1375 if ((range->iomode == IOMODE_RW && 1376 ls_range->iomode != IOMODE_RW) || 1377 (range->iomode != ls_range->iomode && 1378 strict_iomode == true) || 1379 !pnfs_lseg_range_intersecting(ls_range, range)) 1380 return 0; 1381 1382 /* range1 covers only the first byte in the range */ 1383 range1 = *range; 1384 range1.length = 1; 1385 return pnfs_lseg_range_contained(ls_range, &range1); 1386 } 1387 1388 /* 1389 * lookup range in layout 1390 */ 1391 static struct pnfs_layout_segment * 1392 pnfs_find_lseg(struct pnfs_layout_hdr *lo, 1393 struct pnfs_layout_range *range, 1394 bool strict_iomode) 1395 { 1396 struct pnfs_layout_segment *lseg, *ret = NULL; 1397 1398 dprintk("%s:Begin\n", __func__); 1399 1400 list_for_each_entry(lseg, &lo->plh_segs, pls_list) { 1401 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && 1402 !test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags) && 1403 pnfs_lseg_range_match(&lseg->pls_range, range, 1404 strict_iomode)) { 1405 ret = pnfs_get_lseg(lseg); 1406 break; 1407 } 1408 } 1409 1410 dprintk("%s:Return lseg %p ref %d\n", 1411 __func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0); 1412 return ret; 1413 } 1414 1415 /* 1416 * Use mdsthreshold hints set at each OPEN to determine if I/O should go 1417 * to the MDS or over pNFS 1418 * 1419 * The nfs_inode read_io and write_io fields are cumulative counters reset 1420 * when there are no layout segments. Note that in pnfs_update_layout iomode 1421 * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a 1422 * WRITE request. 1423 * 1424 * A return of true means use MDS I/O. 1425 * 1426 * From rfc 5661: 1427 * If a file's size is smaller than the file size threshold, data accesses 1428 * SHOULD be sent to the metadata server. If an I/O request has a length that 1429 * is below the I/O size threshold, the I/O SHOULD be sent to the metadata 1430 * server. If both file size and I/O size are provided, the client SHOULD 1431 * reach or exceed both thresholds before sending its read or write 1432 * requests to the data server. 1433 */ 1434 static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx, 1435 struct inode *ino, int iomode) 1436 { 1437 struct nfs4_threshold *t = ctx->mdsthreshold; 1438 struct nfs_inode *nfsi = NFS_I(ino); 1439 loff_t fsize = i_size_read(ino); 1440 bool size = false, size_set = false, io = false, io_set = false, ret = false; 1441 1442 if (t == NULL) 1443 return ret; 1444 1445 dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n", 1446 __func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz); 1447 1448 switch (iomode) { 1449 case IOMODE_READ: 1450 if (t->bm & THRESHOLD_RD) { 1451 dprintk("%s fsize %llu\n", __func__, fsize); 1452 size_set = true; 1453 if (fsize < t->rd_sz) 1454 size = true; 1455 } 1456 if (t->bm & THRESHOLD_RD_IO) { 1457 dprintk("%s nfsi->read_io %llu\n", __func__, 1458 nfsi->read_io); 1459 io_set = true; 1460 if (nfsi->read_io < t->rd_io_sz) 1461 io = true; 1462 } 1463 break; 1464 case IOMODE_RW: 1465 if (t->bm & THRESHOLD_WR) { 1466 dprintk("%s fsize %llu\n", __func__, fsize); 1467 size_set = true; 1468 if (fsize < t->wr_sz) 1469 size = true; 1470 } 1471 if (t->bm & THRESHOLD_WR_IO) { 1472 dprintk("%s nfsi->write_io %llu\n", __func__, 1473 nfsi->write_io); 1474 io_set = true; 1475 if (nfsi->write_io < t->wr_io_sz) 1476 io = true; 1477 } 1478 break; 1479 } 1480 if (size_set && io_set) { 1481 if (size && io) 1482 ret = true; 1483 } else if (size || io) 1484 ret = true; 1485 1486 dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret); 1487 return ret; 1488 } 1489 1490 static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo) 1491 { 1492 /* 1493 * send layoutcommit as it can hold up layoutreturn due to lseg 1494 * reference 1495 */ 1496 pnfs_layoutcommit_inode(lo->plh_inode, false); 1497 return !wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN, 1498 nfs_wait_bit_killable, 1499 TASK_UNINTERRUPTIBLE); 1500 } 1501 1502 static void pnfs_clear_first_layoutget(struct pnfs_layout_hdr *lo) 1503 { 1504 unsigned long *bitlock = &lo->plh_flags; 1505 1506 clear_bit_unlock(NFS_LAYOUT_FIRST_LAYOUTGET, bitlock); 1507 smp_mb__after_atomic(); 1508 wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET); 1509 } 1510 1511 /* 1512 * Layout segment is retreived from the server if not cached. 1513 * The appropriate layout segment is referenced and returned to the caller. 1514 */ 1515 struct pnfs_layout_segment * 1516 pnfs_update_layout(struct inode *ino, 1517 struct nfs_open_context *ctx, 1518 loff_t pos, 1519 u64 count, 1520 enum pnfs_iomode iomode, 1521 bool strict_iomode, 1522 gfp_t gfp_flags) 1523 { 1524 struct pnfs_layout_range arg = { 1525 .iomode = iomode, 1526 .offset = pos, 1527 .length = count, 1528 }; 1529 unsigned pg_offset, seq; 1530 struct nfs_server *server = NFS_SERVER(ino); 1531 struct nfs_client *clp = server->nfs_client; 1532 struct pnfs_layout_hdr *lo = NULL; 1533 struct pnfs_layout_segment *lseg = NULL; 1534 nfs4_stateid stateid; 1535 long timeout = 0; 1536 unsigned long giveup = jiffies + (clp->cl_lease_time << 1); 1537 bool first; 1538 1539 if (!pnfs_enabled_sb(NFS_SERVER(ino))) { 1540 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1541 PNFS_UPDATE_LAYOUT_NO_PNFS); 1542 goto out; 1543 } 1544 1545 if (iomode == IOMODE_READ && i_size_read(ino) == 0) { 1546 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1547 PNFS_UPDATE_LAYOUT_RD_ZEROLEN); 1548 goto out; 1549 } 1550 1551 if (pnfs_within_mdsthreshold(ctx, ino, iomode)) { 1552 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1553 PNFS_UPDATE_LAYOUT_MDSTHRESH); 1554 goto out; 1555 } 1556 1557 lookup_again: 1558 first = false; 1559 spin_lock(&ino->i_lock); 1560 lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); 1561 if (lo == NULL) { 1562 spin_unlock(&ino->i_lock); 1563 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1564 PNFS_UPDATE_LAYOUT_NOMEM); 1565 goto out; 1566 } 1567 1568 /* Do we even need to bother with this? */ 1569 if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { 1570 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1571 PNFS_UPDATE_LAYOUT_BULK_RECALL); 1572 dprintk("%s matches recall, use MDS\n", __func__); 1573 goto out_unlock; 1574 } 1575 1576 /* if LAYOUTGET already failed once we don't try again */ 1577 if (pnfs_layout_io_test_failed(lo, iomode)) { 1578 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1579 PNFS_UPDATE_LAYOUT_IO_TEST_FAIL); 1580 goto out_unlock; 1581 } 1582 1583 lseg = pnfs_find_lseg(lo, &arg, strict_iomode); 1584 if (lseg) { 1585 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1586 PNFS_UPDATE_LAYOUT_FOUND_CACHED); 1587 goto out_unlock; 1588 } 1589 1590 if (!nfs4_valid_open_stateid(ctx->state)) { 1591 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1592 PNFS_UPDATE_LAYOUT_INVALID_OPEN); 1593 goto out_unlock; 1594 } 1595 1596 /* 1597 * Choose a stateid for the LAYOUTGET. If we don't have a layout 1598 * stateid, or it has been invalidated, then we must use the open 1599 * stateid. 1600 */ 1601 if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) { 1602 1603 /* 1604 * The first layoutget for the file. Need to serialize per 1605 * RFC 5661 Errata 3208. 1606 */ 1607 if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET, 1608 &lo->plh_flags)) { 1609 spin_unlock(&ino->i_lock); 1610 wait_on_bit(&lo->plh_flags, NFS_LAYOUT_FIRST_LAYOUTGET, 1611 TASK_UNINTERRUPTIBLE); 1612 pnfs_put_layout_hdr(lo); 1613 dprintk("%s retrying\n", __func__); 1614 goto lookup_again; 1615 } 1616 1617 first = true; 1618 do { 1619 seq = read_seqbegin(&ctx->state->seqlock); 1620 nfs4_stateid_copy(&stateid, &ctx->state->stateid); 1621 } while (read_seqretry(&ctx->state->seqlock, seq)); 1622 } else { 1623 nfs4_stateid_copy(&stateid, &lo->plh_stateid); 1624 } 1625 1626 /* 1627 * Because we free lsegs before sending LAYOUTRETURN, we need to wait 1628 * for LAYOUTRETURN even if first is true. 1629 */ 1630 if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { 1631 spin_unlock(&ino->i_lock); 1632 dprintk("%s wait for layoutreturn\n", __func__); 1633 if (pnfs_prepare_to_retry_layoutget(lo)) { 1634 if (first) 1635 pnfs_clear_first_layoutget(lo); 1636 pnfs_put_layout_hdr(lo); 1637 dprintk("%s retrying\n", __func__); 1638 trace_pnfs_update_layout(ino, pos, count, iomode, lo, 1639 lseg, PNFS_UPDATE_LAYOUT_RETRY); 1640 goto lookup_again; 1641 } 1642 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1643 PNFS_UPDATE_LAYOUT_RETURN); 1644 goto out_put_layout_hdr; 1645 } 1646 1647 if (pnfs_layoutgets_blocked(lo)) { 1648 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1649 PNFS_UPDATE_LAYOUT_BLOCKED); 1650 goto out_unlock; 1651 } 1652 atomic_inc(&lo->plh_outstanding); 1653 spin_unlock(&ino->i_lock); 1654 1655 if (list_empty(&lo->plh_layouts)) { 1656 /* The lo must be on the clp list if there is any 1657 * chance of a CB_LAYOUTRECALL(FILE) coming in. 1658 */ 1659 spin_lock(&clp->cl_lock); 1660 if (list_empty(&lo->plh_layouts)) 1661 list_add_tail(&lo->plh_layouts, &server->layouts); 1662 spin_unlock(&clp->cl_lock); 1663 } 1664 1665 pg_offset = arg.offset & ~PAGE_MASK; 1666 if (pg_offset) { 1667 arg.offset -= pg_offset; 1668 arg.length += pg_offset; 1669 } 1670 if (arg.length != NFS4_MAX_UINT64) 1671 arg.length = PAGE_ALIGN(arg.length); 1672 1673 lseg = send_layoutget(lo, ctx, &stateid, &arg, &timeout, gfp_flags); 1674 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1675 PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET); 1676 atomic_dec(&lo->plh_outstanding); 1677 if (IS_ERR(lseg)) { 1678 switch(PTR_ERR(lseg)) { 1679 case -EBUSY: 1680 if (time_after(jiffies, giveup)) 1681 lseg = NULL; 1682 break; 1683 case -ERECALLCONFLICT: 1684 /* Huh? We hold no layouts, how is there a recall? */ 1685 if (first) { 1686 lseg = NULL; 1687 break; 1688 } 1689 /* Destroy the existing layout and start over */ 1690 if (time_after(jiffies, giveup)) 1691 pnfs_destroy_layout(NFS_I(ino)); 1692 /* Fallthrough */ 1693 case -EAGAIN: 1694 break; 1695 default: 1696 if (!nfs_error_is_fatal(PTR_ERR(lseg))) { 1697 pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); 1698 lseg = NULL; 1699 } 1700 goto out_put_layout_hdr; 1701 } 1702 if (lseg) { 1703 if (first) 1704 pnfs_clear_first_layoutget(lo); 1705 trace_pnfs_update_layout(ino, pos, count, 1706 iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY); 1707 pnfs_put_layout_hdr(lo); 1708 goto lookup_again; 1709 } 1710 } else { 1711 pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); 1712 } 1713 1714 out_put_layout_hdr: 1715 if (first) 1716 pnfs_clear_first_layoutget(lo); 1717 pnfs_put_layout_hdr(lo); 1718 out: 1719 dprintk("%s: inode %s/%llu pNFS layout segment %s for " 1720 "(%s, offset: %llu, length: %llu)\n", 1721 __func__, ino->i_sb->s_id, 1722 (unsigned long long)NFS_FILEID(ino), 1723 IS_ERR_OR_NULL(lseg) ? "not found" : "found", 1724 iomode==IOMODE_RW ? "read/write" : "read-only", 1725 (unsigned long long)pos, 1726 (unsigned long long)count); 1727 return lseg; 1728 out_unlock: 1729 spin_unlock(&ino->i_lock); 1730 goto out_put_layout_hdr; 1731 } 1732 EXPORT_SYMBOL_GPL(pnfs_update_layout); 1733 1734 static bool 1735 pnfs_sanity_check_layout_range(struct pnfs_layout_range *range) 1736 { 1737 switch (range->iomode) { 1738 case IOMODE_READ: 1739 case IOMODE_RW: 1740 break; 1741 default: 1742 return false; 1743 } 1744 if (range->offset == NFS4_MAX_UINT64) 1745 return false; 1746 if (range->length == 0) 1747 return false; 1748 if (range->length != NFS4_MAX_UINT64 && 1749 range->length > NFS4_MAX_UINT64 - range->offset) 1750 return false; 1751 return true; 1752 } 1753 1754 struct pnfs_layout_segment * 1755 pnfs_layout_process(struct nfs4_layoutget *lgp) 1756 { 1757 struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout; 1758 struct nfs4_layoutget_res *res = &lgp->res; 1759 struct pnfs_layout_segment *lseg; 1760 struct inode *ino = lo->plh_inode; 1761 LIST_HEAD(free_me); 1762 1763 if (!pnfs_sanity_check_layout_range(&res->range)) 1764 return ERR_PTR(-EINVAL); 1765 1766 /* Inject layout blob into I/O device driver */ 1767 lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags); 1768 if (IS_ERR_OR_NULL(lseg)) { 1769 if (!lseg) 1770 lseg = ERR_PTR(-ENOMEM); 1771 1772 dprintk("%s: Could not allocate layout: error %ld\n", 1773 __func__, PTR_ERR(lseg)); 1774 return lseg; 1775 } 1776 1777 pnfs_init_lseg(lo, lseg, &res->range, &res->stateid); 1778 1779 spin_lock(&ino->i_lock); 1780 if (pnfs_layoutgets_blocked(lo)) { 1781 dprintk("%s forget reply due to state\n", __func__); 1782 goto out_forget; 1783 } 1784 1785 if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) { 1786 /* existing state ID, make sure the sequence number matches. */ 1787 if (pnfs_layout_stateid_blocked(lo, &res->stateid)) { 1788 dprintk("%s forget reply due to sequence\n", __func__); 1789 goto out_forget; 1790 } 1791 pnfs_set_layout_stateid(lo, &res->stateid, false); 1792 } else { 1793 /* 1794 * We got an entirely new state ID. Mark all segments for the 1795 * inode invalid, and don't bother validating the stateid 1796 * sequence number. 1797 */ 1798 pnfs_mark_layout_stateid_invalid(lo, &free_me); 1799 1800 nfs4_stateid_copy(&lo->plh_stateid, &res->stateid); 1801 lo->plh_barrier = be32_to_cpu(res->stateid.seqid); 1802 } 1803 1804 pnfs_get_lseg(lseg); 1805 pnfs_layout_insert_lseg(lo, lseg, &free_me); 1806 if (!pnfs_layout_is_valid(lo)) { 1807 pnfs_clear_layoutreturn_info(lo); 1808 clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); 1809 } 1810 1811 1812 if (res->return_on_close) 1813 set_bit(NFS_LSEG_ROC, &lseg->pls_flags); 1814 1815 spin_unlock(&ino->i_lock); 1816 pnfs_free_lseg_list(&free_me); 1817 return lseg; 1818 1819 out_forget: 1820 spin_unlock(&ino->i_lock); 1821 lseg->pls_layout = lo; 1822 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); 1823 return ERR_PTR(-EAGAIN); 1824 } 1825 1826 static void 1827 pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode, 1828 u32 seq) 1829 { 1830 if (lo->plh_return_iomode != 0 && lo->plh_return_iomode != iomode) 1831 iomode = IOMODE_ANY; 1832 lo->plh_return_iomode = iomode; 1833 set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); 1834 if (seq != 0) { 1835 WARN_ON_ONCE(lo->plh_return_seq != 0 && lo->plh_return_seq != seq); 1836 lo->plh_return_seq = seq; 1837 } 1838 } 1839 1840 /** 1841 * pnfs_mark_matching_lsegs_return - Free or return matching layout segments 1842 * @lo: pointer to layout header 1843 * @tmp_list: list header to be used with pnfs_free_lseg_list() 1844 * @return_range: describe layout segment ranges to be returned 1845 * 1846 * This function is mainly intended for use by layoutrecall. It attempts 1847 * to free the layout segment immediately, or else to mark it for return 1848 * as soon as its reference count drops to zero. 1849 */ 1850 int 1851 pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, 1852 struct list_head *tmp_list, 1853 const struct pnfs_layout_range *return_range, 1854 u32 seq) 1855 { 1856 struct pnfs_layout_segment *lseg, *next; 1857 int remaining = 0; 1858 1859 dprintk("%s:Begin lo %p\n", __func__, lo); 1860 1861 if (list_empty(&lo->plh_segs)) 1862 return 0; 1863 1864 assert_spin_locked(&lo->plh_inode->i_lock); 1865 1866 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) 1867 if (pnfs_match_lseg_recall(lseg, return_range, seq)) { 1868 dprintk("%s: marking lseg %p iomode %d " 1869 "offset %llu length %llu\n", __func__, 1870 lseg, lseg->pls_range.iomode, 1871 lseg->pls_range.offset, 1872 lseg->pls_range.length); 1873 if (mark_lseg_invalid(lseg, tmp_list)) 1874 continue; 1875 remaining++; 1876 set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); 1877 } 1878 1879 if (remaining) 1880 pnfs_set_plh_return_info(lo, return_range->iomode, seq); 1881 1882 return remaining; 1883 } 1884 1885 void pnfs_error_mark_layout_for_return(struct inode *inode, 1886 struct pnfs_layout_segment *lseg) 1887 { 1888 struct pnfs_layout_hdr *lo = NFS_I(inode)->layout; 1889 struct pnfs_layout_range range = { 1890 .iomode = lseg->pls_range.iomode, 1891 .offset = 0, 1892 .length = NFS4_MAX_UINT64, 1893 }; 1894 LIST_HEAD(free_me); 1895 bool return_now = false; 1896 1897 spin_lock(&inode->i_lock); 1898 pnfs_set_plh_return_info(lo, range.iomode, 0); 1899 /* 1900 * mark all matching lsegs so that we are sure to have no live 1901 * segments at hand when sending layoutreturn. See pnfs_put_lseg() 1902 * for how it works. 1903 */ 1904 if (!pnfs_mark_matching_lsegs_return(lo, &free_me, &range, 0)) { 1905 nfs4_stateid stateid; 1906 enum pnfs_iomode iomode; 1907 1908 return_now = pnfs_prepare_layoutreturn(lo, &stateid, &iomode); 1909 spin_unlock(&inode->i_lock); 1910 if (return_now) 1911 pnfs_send_layoutreturn(lo, &stateid, iomode, false); 1912 } else { 1913 spin_unlock(&inode->i_lock); 1914 nfs_commit_inode(inode, 0); 1915 } 1916 pnfs_free_lseg_list(&free_me); 1917 } 1918 EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return); 1919 1920 void 1921 pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) 1922 { 1923 u64 rd_size = req->wb_bytes; 1924 1925 if (pgio->pg_lseg == NULL) { 1926 if (pgio->pg_dreq == NULL) 1927 rd_size = i_size_read(pgio->pg_inode) - req_offset(req); 1928 else 1929 rd_size = nfs_dreq_bytes_left(pgio->pg_dreq); 1930 1931 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1932 req->wb_context, 1933 req_offset(req), 1934 rd_size, 1935 IOMODE_READ, 1936 false, 1937 GFP_KERNEL); 1938 if (IS_ERR(pgio->pg_lseg)) { 1939 pgio->pg_error = PTR_ERR(pgio->pg_lseg); 1940 pgio->pg_lseg = NULL; 1941 return; 1942 } 1943 } 1944 /* If no lseg, fall back to read through mds */ 1945 if (pgio->pg_lseg == NULL) 1946 nfs_pageio_reset_read_mds(pgio); 1947 1948 } 1949 EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); 1950 1951 void 1952 pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, 1953 struct nfs_page *req, u64 wb_size) 1954 { 1955 if (pgio->pg_lseg == NULL) { 1956 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1957 req->wb_context, 1958 req_offset(req), 1959 wb_size, 1960 IOMODE_RW, 1961 false, 1962 GFP_NOFS); 1963 if (IS_ERR(pgio->pg_lseg)) { 1964 pgio->pg_error = PTR_ERR(pgio->pg_lseg); 1965 pgio->pg_lseg = NULL; 1966 return; 1967 } 1968 } 1969 /* If no lseg, fall back to write through mds */ 1970 if (pgio->pg_lseg == NULL) 1971 nfs_pageio_reset_write_mds(pgio); 1972 } 1973 EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); 1974 1975 void 1976 pnfs_generic_pg_cleanup(struct nfs_pageio_descriptor *desc) 1977 { 1978 if (desc->pg_lseg) { 1979 pnfs_put_lseg(desc->pg_lseg); 1980 desc->pg_lseg = NULL; 1981 } 1982 } 1983 EXPORT_SYMBOL_GPL(pnfs_generic_pg_cleanup); 1984 1985 /* 1986 * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number 1987 * of bytes (maximum @req->wb_bytes) that can be coalesced. 1988 */ 1989 size_t 1990 pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, 1991 struct nfs_page *prev, struct nfs_page *req) 1992 { 1993 unsigned int size; 1994 u64 seg_end, req_start, seg_left; 1995 1996 size = nfs_generic_pg_test(pgio, prev, req); 1997 if (!size) 1998 return 0; 1999 2000 /* 2001 * 'size' contains the number of bytes left in the current page (up 2002 * to the original size asked for in @req->wb_bytes). 2003 * 2004 * Calculate how many bytes are left in the layout segment 2005 * and if there are less bytes than 'size', return that instead. 2006 * 2007 * Please also note that 'end_offset' is actually the offset of the 2008 * first byte that lies outside the pnfs_layout_range. FIXME? 2009 * 2010 */ 2011 if (pgio->pg_lseg) { 2012 seg_end = end_offset(pgio->pg_lseg->pls_range.offset, 2013 pgio->pg_lseg->pls_range.length); 2014 req_start = req_offset(req); 2015 WARN_ON_ONCE(req_start >= seg_end); 2016 /* start of request is past the last byte of this segment */ 2017 if (req_start >= seg_end) { 2018 /* reference the new lseg */ 2019 if (pgio->pg_ops->pg_cleanup) 2020 pgio->pg_ops->pg_cleanup(pgio); 2021 if (pgio->pg_ops->pg_init) 2022 pgio->pg_ops->pg_init(pgio, req); 2023 return 0; 2024 } 2025 2026 /* adjust 'size' iff there are fewer bytes left in the 2027 * segment than what nfs_generic_pg_test returned */ 2028 seg_left = seg_end - req_start; 2029 if (seg_left < size) 2030 size = (unsigned int)seg_left; 2031 } 2032 2033 return size; 2034 } 2035 EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); 2036 2037 int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr) 2038 { 2039 struct nfs_pageio_descriptor pgio; 2040 2041 /* Resend all requests through the MDS */ 2042 nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true, 2043 hdr->completion_ops); 2044 set_bit(NFS_CONTEXT_RESEND_WRITES, &hdr->args.context->flags); 2045 return nfs_pageio_resend(&pgio, hdr); 2046 } 2047 EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); 2048 2049 static void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr) 2050 { 2051 2052 dprintk("pnfs write error = %d\n", hdr->pnfs_error); 2053 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & 2054 PNFS_LAYOUTRET_ON_ERROR) { 2055 pnfs_return_layout(hdr->inode); 2056 } 2057 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) 2058 hdr->task.tk_status = pnfs_write_done_resend_to_mds(hdr); 2059 } 2060 2061 /* 2062 * Called by non rpc-based layout drivers 2063 */ 2064 void pnfs_ld_write_done(struct nfs_pgio_header *hdr) 2065 { 2066 if (likely(!hdr->pnfs_error)) { 2067 pnfs_set_layoutcommit(hdr->inode, hdr->lseg, 2068 hdr->mds_offset + hdr->res.count); 2069 hdr->mds_ops->rpc_call_done(&hdr->task, hdr); 2070 } 2071 trace_nfs4_pnfs_write(hdr, hdr->pnfs_error); 2072 if (unlikely(hdr->pnfs_error)) 2073 pnfs_ld_handle_write_error(hdr); 2074 hdr->mds_ops->rpc_release(hdr); 2075 } 2076 EXPORT_SYMBOL_GPL(pnfs_ld_write_done); 2077 2078 static void 2079 pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, 2080 struct nfs_pgio_header *hdr) 2081 { 2082 struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); 2083 2084 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 2085 list_splice_tail_init(&hdr->pages, &mirror->pg_list); 2086 nfs_pageio_reset_write_mds(desc); 2087 mirror->pg_recoalesce = 1; 2088 } 2089 nfs_pgio_data_destroy(hdr); 2090 hdr->release(hdr); 2091 } 2092 2093 static enum pnfs_try_status 2094 pnfs_try_to_write_data(struct nfs_pgio_header *hdr, 2095 const struct rpc_call_ops *call_ops, 2096 struct pnfs_layout_segment *lseg, 2097 int how) 2098 { 2099 struct inode *inode = hdr->inode; 2100 enum pnfs_try_status trypnfs; 2101 struct nfs_server *nfss = NFS_SERVER(inode); 2102 2103 hdr->mds_ops = call_ops; 2104 2105 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, 2106 inode->i_ino, hdr->args.count, hdr->args.offset, how); 2107 trypnfs = nfss->pnfs_curr_ld->write_pagelist(hdr, how); 2108 if (trypnfs != PNFS_NOT_ATTEMPTED) 2109 nfs_inc_stats(inode, NFSIOS_PNFS_WRITE); 2110 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 2111 return trypnfs; 2112 } 2113 2114 static void 2115 pnfs_do_write(struct nfs_pageio_descriptor *desc, 2116 struct nfs_pgio_header *hdr, int how) 2117 { 2118 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; 2119 struct pnfs_layout_segment *lseg = desc->pg_lseg; 2120 enum pnfs_try_status trypnfs; 2121 2122 trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how); 2123 if (trypnfs == PNFS_NOT_ATTEMPTED) 2124 pnfs_write_through_mds(desc, hdr); 2125 } 2126 2127 static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) 2128 { 2129 pnfs_put_lseg(hdr->lseg); 2130 nfs_pgio_header_free(hdr); 2131 } 2132 2133 int 2134 pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) 2135 { 2136 struct nfs_pgio_header *hdr; 2137 int ret; 2138 2139 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); 2140 if (!hdr) { 2141 desc->pg_error = -ENOMEM; 2142 return desc->pg_error; 2143 } 2144 nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); 2145 2146 hdr->lseg = pnfs_get_lseg(desc->pg_lseg); 2147 ret = nfs_generic_pgio(desc, hdr); 2148 if (!ret) 2149 pnfs_do_write(desc, hdr, desc->pg_ioflags); 2150 2151 return ret; 2152 } 2153 EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); 2154 2155 int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *hdr) 2156 { 2157 struct nfs_pageio_descriptor pgio; 2158 2159 /* Resend all requests through the MDS */ 2160 nfs_pageio_init_read(&pgio, hdr->inode, true, hdr->completion_ops); 2161 return nfs_pageio_resend(&pgio, hdr); 2162 } 2163 EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds); 2164 2165 static void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr) 2166 { 2167 dprintk("pnfs read error = %d\n", hdr->pnfs_error); 2168 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & 2169 PNFS_LAYOUTRET_ON_ERROR) { 2170 pnfs_return_layout(hdr->inode); 2171 } 2172 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) 2173 hdr->task.tk_status = pnfs_read_done_resend_to_mds(hdr); 2174 } 2175 2176 /* 2177 * Called by non rpc-based layout drivers 2178 */ 2179 void pnfs_ld_read_done(struct nfs_pgio_header *hdr) 2180 { 2181 if (likely(!hdr->pnfs_error)) { 2182 __nfs4_read_done_cb(hdr); 2183 hdr->mds_ops->rpc_call_done(&hdr->task, hdr); 2184 } 2185 trace_nfs4_pnfs_read(hdr, hdr->pnfs_error); 2186 if (unlikely(hdr->pnfs_error)) 2187 pnfs_ld_handle_read_error(hdr); 2188 hdr->mds_ops->rpc_release(hdr); 2189 } 2190 EXPORT_SYMBOL_GPL(pnfs_ld_read_done); 2191 2192 static void 2193 pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, 2194 struct nfs_pgio_header *hdr) 2195 { 2196 struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); 2197 2198 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 2199 list_splice_tail_init(&hdr->pages, &mirror->pg_list); 2200 nfs_pageio_reset_read_mds(desc); 2201 mirror->pg_recoalesce = 1; 2202 } 2203 nfs_pgio_data_destroy(hdr); 2204 hdr->release(hdr); 2205 } 2206 2207 /* 2208 * Call the appropriate parallel I/O subsystem read function. 2209 */ 2210 static enum pnfs_try_status 2211 pnfs_try_to_read_data(struct nfs_pgio_header *hdr, 2212 const struct rpc_call_ops *call_ops, 2213 struct pnfs_layout_segment *lseg) 2214 { 2215 struct inode *inode = hdr->inode; 2216 struct nfs_server *nfss = NFS_SERVER(inode); 2217 enum pnfs_try_status trypnfs; 2218 2219 hdr->mds_ops = call_ops; 2220 2221 dprintk("%s: Reading ino:%lu %u@%llu\n", 2222 __func__, inode->i_ino, hdr->args.count, hdr->args.offset); 2223 2224 trypnfs = nfss->pnfs_curr_ld->read_pagelist(hdr); 2225 if (trypnfs != PNFS_NOT_ATTEMPTED) 2226 nfs_inc_stats(inode, NFSIOS_PNFS_READ); 2227 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 2228 return trypnfs; 2229 } 2230 2231 /* Resend all requests through pnfs. */ 2232 void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr) 2233 { 2234 struct nfs_pageio_descriptor pgio; 2235 2236 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 2237 nfs_pageio_init_read(&pgio, hdr->inode, false, 2238 hdr->completion_ops); 2239 hdr->task.tk_status = nfs_pageio_resend(&pgio, hdr); 2240 } 2241 } 2242 EXPORT_SYMBOL_GPL(pnfs_read_resend_pnfs); 2243 2244 static void 2245 pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) 2246 { 2247 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; 2248 struct pnfs_layout_segment *lseg = desc->pg_lseg; 2249 enum pnfs_try_status trypnfs; 2250 2251 trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg); 2252 if (trypnfs == PNFS_TRY_AGAIN) 2253 pnfs_read_resend_pnfs(hdr); 2254 if (trypnfs == PNFS_NOT_ATTEMPTED || hdr->task.tk_status) 2255 pnfs_read_through_mds(desc, hdr); 2256 } 2257 2258 static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) 2259 { 2260 pnfs_put_lseg(hdr->lseg); 2261 nfs_pgio_header_free(hdr); 2262 } 2263 2264 int 2265 pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 2266 { 2267 struct nfs_pgio_header *hdr; 2268 int ret; 2269 2270 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); 2271 if (!hdr) { 2272 desc->pg_error = -ENOMEM; 2273 return desc->pg_error; 2274 } 2275 nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); 2276 hdr->lseg = pnfs_get_lseg(desc->pg_lseg); 2277 ret = nfs_generic_pgio(desc, hdr); 2278 if (!ret) 2279 pnfs_do_read(desc, hdr); 2280 return ret; 2281 } 2282 EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); 2283 2284 static void pnfs_clear_layoutcommitting(struct inode *inode) 2285 { 2286 unsigned long *bitlock = &NFS_I(inode)->flags; 2287 2288 clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); 2289 smp_mb__after_atomic(); 2290 wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING); 2291 } 2292 2293 /* 2294 * There can be multiple RW segments. 2295 */ 2296 static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp) 2297 { 2298 struct pnfs_layout_segment *lseg; 2299 2300 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { 2301 if (lseg->pls_range.iomode == IOMODE_RW && 2302 test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) 2303 list_add(&lseg->pls_lc_list, listp); 2304 } 2305 } 2306 2307 static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp) 2308 { 2309 struct pnfs_layout_segment *lseg, *tmp; 2310 2311 /* Matched by references in pnfs_set_layoutcommit */ 2312 list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) { 2313 list_del_init(&lseg->pls_lc_list); 2314 pnfs_put_lseg(lseg); 2315 } 2316 2317 pnfs_clear_layoutcommitting(inode); 2318 } 2319 2320 void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) 2321 { 2322 pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode); 2323 } 2324 EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); 2325 2326 void 2327 pnfs_set_layoutcommit(struct inode *inode, struct pnfs_layout_segment *lseg, 2328 loff_t end_pos) 2329 { 2330 struct nfs_inode *nfsi = NFS_I(inode); 2331 bool mark_as_dirty = false; 2332 2333 spin_lock(&inode->i_lock); 2334 if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { 2335 nfsi->layout->plh_lwb = end_pos; 2336 mark_as_dirty = true; 2337 dprintk("%s: Set layoutcommit for inode %lu ", 2338 __func__, inode->i_ino); 2339 } else if (end_pos > nfsi->layout->plh_lwb) 2340 nfsi->layout->plh_lwb = end_pos; 2341 if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) { 2342 /* references matched in nfs4_layoutcommit_release */ 2343 pnfs_get_lseg(lseg); 2344 } 2345 spin_unlock(&inode->i_lock); 2346 dprintk("%s: lseg %p end_pos %llu\n", 2347 __func__, lseg, nfsi->layout->plh_lwb); 2348 2349 /* if pnfs_layoutcommit_inode() runs between inode locks, the next one 2350 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ 2351 if (mark_as_dirty) 2352 mark_inode_dirty_sync(inode); 2353 } 2354 EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); 2355 2356 void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) 2357 { 2358 struct nfs_server *nfss = NFS_SERVER(data->args.inode); 2359 2360 if (nfss->pnfs_curr_ld->cleanup_layoutcommit) 2361 nfss->pnfs_curr_ld->cleanup_layoutcommit(data); 2362 pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list); 2363 } 2364 2365 /* 2366 * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and 2367 * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough 2368 * data to disk to allow the server to recover the data if it crashes. 2369 * LAYOUTCOMMIT is only needed when the NFL4_UFLG_COMMIT_THRU_MDS flag 2370 * is off, and a COMMIT is sent to a data server, or 2371 * if WRITEs to a data server return NFS_DATA_SYNC. 2372 */ 2373 int 2374 pnfs_layoutcommit_inode(struct inode *inode, bool sync) 2375 { 2376 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 2377 struct nfs4_layoutcommit_data *data; 2378 struct nfs_inode *nfsi = NFS_I(inode); 2379 loff_t end_pos; 2380 int status; 2381 2382 if (!pnfs_layoutcommit_outstanding(inode)) 2383 return 0; 2384 2385 dprintk("--> %s inode %lu\n", __func__, inode->i_ino); 2386 2387 status = -EAGAIN; 2388 if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) { 2389 if (!sync) 2390 goto out; 2391 status = wait_on_bit_lock_action(&nfsi->flags, 2392 NFS_INO_LAYOUTCOMMITTING, 2393 nfs_wait_bit_killable, 2394 TASK_KILLABLE); 2395 if (status) 2396 goto out; 2397 } 2398 2399 status = -ENOMEM; 2400 /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */ 2401 data = kzalloc(sizeof(*data), GFP_NOFS); 2402 if (!data) 2403 goto clear_layoutcommitting; 2404 2405 status = 0; 2406 spin_lock(&inode->i_lock); 2407 if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) 2408 goto out_unlock; 2409 2410 INIT_LIST_HEAD(&data->lseg_list); 2411 pnfs_list_write_lseg(inode, &data->lseg_list); 2412 2413 end_pos = nfsi->layout->plh_lwb; 2414 2415 nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid); 2416 spin_unlock(&inode->i_lock); 2417 2418 data->args.inode = inode; 2419 data->cred = get_rpccred(nfsi->layout->plh_lc_cred); 2420 nfs_fattr_init(&data->fattr); 2421 data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask; 2422 data->res.fattr = &data->fattr; 2423 if (end_pos != 0) 2424 data->args.lastbytewritten = end_pos - 1; 2425 else 2426 data->args.lastbytewritten = U64_MAX; 2427 data->res.server = NFS_SERVER(inode); 2428 2429 if (ld->prepare_layoutcommit) { 2430 status = ld->prepare_layoutcommit(&data->args); 2431 if (status) { 2432 put_rpccred(data->cred); 2433 spin_lock(&inode->i_lock); 2434 set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags); 2435 if (end_pos > nfsi->layout->plh_lwb) 2436 nfsi->layout->plh_lwb = end_pos; 2437 goto out_unlock; 2438 } 2439 } 2440 2441 2442 status = nfs4_proc_layoutcommit(data, sync); 2443 out: 2444 if (status) 2445 mark_inode_dirty_sync(inode); 2446 dprintk("<-- %s status %d\n", __func__, status); 2447 return status; 2448 out_unlock: 2449 spin_unlock(&inode->i_lock); 2450 kfree(data); 2451 clear_layoutcommitting: 2452 pnfs_clear_layoutcommitting(inode); 2453 goto out; 2454 } 2455 EXPORT_SYMBOL_GPL(pnfs_layoutcommit_inode); 2456 2457 int 2458 pnfs_generic_sync(struct inode *inode, bool datasync) 2459 { 2460 return pnfs_layoutcommit_inode(inode, true); 2461 } 2462 EXPORT_SYMBOL_GPL(pnfs_generic_sync); 2463 2464 struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) 2465 { 2466 struct nfs4_threshold *thp; 2467 2468 thp = kzalloc(sizeof(*thp), GFP_NOFS); 2469 if (!thp) { 2470 dprintk("%s mdsthreshold allocation failed\n", __func__); 2471 return NULL; 2472 } 2473 return thp; 2474 } 2475 2476 #if IS_ENABLED(CONFIG_NFS_V4_2) 2477 int 2478 pnfs_report_layoutstat(struct inode *inode, gfp_t gfp_flags) 2479 { 2480 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 2481 struct nfs_server *server = NFS_SERVER(inode); 2482 struct nfs_inode *nfsi = NFS_I(inode); 2483 struct nfs42_layoutstat_data *data; 2484 struct pnfs_layout_hdr *hdr; 2485 int status = 0; 2486 2487 if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats) 2488 goto out; 2489 2490 if (!nfs_server_capable(inode, NFS_CAP_LAYOUTSTATS)) 2491 goto out; 2492 2493 if (test_and_set_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags)) 2494 goto out; 2495 2496 spin_lock(&inode->i_lock); 2497 if (!NFS_I(inode)->layout) { 2498 spin_unlock(&inode->i_lock); 2499 goto out_clear_layoutstats; 2500 } 2501 hdr = NFS_I(inode)->layout; 2502 pnfs_get_layout_hdr(hdr); 2503 spin_unlock(&inode->i_lock); 2504 2505 data = kzalloc(sizeof(*data), gfp_flags); 2506 if (!data) { 2507 status = -ENOMEM; 2508 goto out_put; 2509 } 2510 2511 data->args.fh = NFS_FH(inode); 2512 data->args.inode = inode; 2513 nfs4_stateid_copy(&data->args.stateid, &hdr->plh_stateid); 2514 status = ld->prepare_layoutstats(&data->args); 2515 if (status) 2516 goto out_free; 2517 2518 status = nfs42_proc_layoutstats_generic(NFS_SERVER(inode), data); 2519 2520 out: 2521 dprintk("%s returns %d\n", __func__, status); 2522 return status; 2523 2524 out_free: 2525 kfree(data); 2526 out_put: 2527 pnfs_put_layout_hdr(hdr); 2528 out_clear_layoutstats: 2529 smp_mb__before_atomic(); 2530 clear_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags); 2531 smp_mb__after_atomic(); 2532 goto out; 2533 } 2534 EXPORT_SYMBOL_GPL(pnfs_report_layoutstat); 2535 #endif 2536 2537 unsigned int layoutstats_timer; 2538 module_param(layoutstats_timer, uint, 0644); 2539 EXPORT_SYMBOL_GPL(layoutstats_timer); 2540