1 /* 2 * pNFS functions to call and manage layout drivers. 3 * 4 * Copyright (c) 2002 [year of first publication] 5 * The Regents of the University of Michigan 6 * All Rights Reserved 7 * 8 * Dean Hildebrand <dhildebz@umich.edu> 9 * 10 * Permission is granted to use, copy, create derivative works, and 11 * redistribute this software and such derivative works for any purpose, 12 * so long as the name of the University of Michigan is not used in 13 * any advertising or publicity pertaining to the use or distribution 14 * of this software without specific, written prior authorization. If 15 * the above copyright notice or any other identification of the 16 * University of Michigan is included in any copy of any portion of 17 * this software, then the disclaimer below must also be included. 18 * 19 * This software is provided as is, without representation or warranty 20 * of any kind either express or implied, including without limitation 21 * the implied warranties of merchantability, fitness for a particular 22 * purpose, or noninfringement. The Regents of the University of 23 * Michigan shall not be liable for any damages, including special, 24 * indirect, incidental, or consequential damages, with respect to any 25 * claim arising out of or in connection with the use of the software, 26 * even if it has been or is hereafter advised of the possibility of 27 * such damages. 28 */ 29 30 #include <linux/nfs_fs.h> 31 #include <linux/nfs_page.h> 32 #include <linux/module.h> 33 #include "internal.h" 34 #include "pnfs.h" 35 #include "iostat.h" 36 #include "nfs4trace.h" 37 #include "delegation.h" 38 #include "nfs42.h" 39 40 #define NFSDBG_FACILITY NFSDBG_PNFS 41 #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) 42 43 /* Locking: 44 * 45 * pnfs_spinlock: 46 * protects pnfs_modules_tbl. 47 */ 48 static DEFINE_SPINLOCK(pnfs_spinlock); 49 50 /* 51 * pnfs_modules_tbl holds all pnfs modules 52 */ 53 static LIST_HEAD(pnfs_modules_tbl); 54 55 static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo); 56 57 /* Return the registered pnfs layout driver module matching given id */ 58 static struct pnfs_layoutdriver_type * 59 find_pnfs_driver_locked(u32 id) 60 { 61 struct pnfs_layoutdriver_type *local; 62 63 list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid) 64 if (local->id == id) 65 goto out; 66 local = NULL; 67 out: 68 dprintk("%s: Searching for id %u, found %p\n", __func__, id, local); 69 return local; 70 } 71 72 static struct pnfs_layoutdriver_type * 73 find_pnfs_driver(u32 id) 74 { 75 struct pnfs_layoutdriver_type *local; 76 77 spin_lock(&pnfs_spinlock); 78 local = find_pnfs_driver_locked(id); 79 if (local != NULL && !try_module_get(local->owner)) { 80 dprintk("%s: Could not grab reference on module\n", __func__); 81 local = NULL; 82 } 83 spin_unlock(&pnfs_spinlock); 84 return local; 85 } 86 87 void 88 unset_pnfs_layoutdriver(struct nfs_server *nfss) 89 { 90 if (nfss->pnfs_curr_ld) { 91 if (nfss->pnfs_curr_ld->clear_layoutdriver) 92 nfss->pnfs_curr_ld->clear_layoutdriver(nfss); 93 /* Decrement the MDS count. Purge the deviceid cache if zero */ 94 if (atomic_dec_and_test(&nfss->nfs_client->cl_mds_count)) 95 nfs4_deviceid_purge_client(nfss->nfs_client); 96 module_put(nfss->pnfs_curr_ld->owner); 97 } 98 nfss->pnfs_curr_ld = NULL; 99 } 100 101 /* 102 * Try to set the server's pnfs module to the pnfs layout type specified by id. 103 * Currently only one pNFS layout driver per filesystem is supported. 104 * 105 * @id layout type. Zero (illegal layout type) indicates pNFS not in use. 106 */ 107 void 108 set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh, 109 u32 id) 110 { 111 struct pnfs_layoutdriver_type *ld_type = NULL; 112 113 if (id == 0) 114 goto out_no_driver; 115 if (!(server->nfs_client->cl_exchange_flags & 116 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) { 117 printk(KERN_ERR "NFS: %s: id %u cl_exchange_flags 0x%x\n", 118 __func__, id, server->nfs_client->cl_exchange_flags); 119 goto out_no_driver; 120 } 121 ld_type = find_pnfs_driver(id); 122 if (!ld_type) { 123 request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id); 124 ld_type = find_pnfs_driver(id); 125 if (!ld_type) { 126 dprintk("%s: No pNFS module found for %u.\n", 127 __func__, id); 128 goto out_no_driver; 129 } 130 } 131 server->pnfs_curr_ld = ld_type; 132 if (ld_type->set_layoutdriver 133 && ld_type->set_layoutdriver(server, mntfh)) { 134 printk(KERN_ERR "NFS: %s: Error initializing pNFS layout " 135 "driver %u.\n", __func__, id); 136 module_put(ld_type->owner); 137 goto out_no_driver; 138 } 139 /* Bump the MDS count */ 140 atomic_inc(&server->nfs_client->cl_mds_count); 141 142 dprintk("%s: pNFS module for %u set\n", __func__, id); 143 return; 144 145 out_no_driver: 146 dprintk("%s: Using NFSv4 I/O\n", __func__); 147 server->pnfs_curr_ld = NULL; 148 } 149 150 int 151 pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type) 152 { 153 int status = -EINVAL; 154 struct pnfs_layoutdriver_type *tmp; 155 156 if (ld_type->id == 0) { 157 printk(KERN_ERR "NFS: %s id 0 is reserved\n", __func__); 158 return status; 159 } 160 if (!ld_type->alloc_lseg || !ld_type->free_lseg) { 161 printk(KERN_ERR "NFS: %s Layout driver must provide " 162 "alloc_lseg and free_lseg.\n", __func__); 163 return status; 164 } 165 166 spin_lock(&pnfs_spinlock); 167 tmp = find_pnfs_driver_locked(ld_type->id); 168 if (!tmp) { 169 list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl); 170 status = 0; 171 dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id, 172 ld_type->name); 173 } else { 174 printk(KERN_ERR "NFS: %s Module with id %d already loaded!\n", 175 __func__, ld_type->id); 176 } 177 spin_unlock(&pnfs_spinlock); 178 179 return status; 180 } 181 EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver); 182 183 void 184 pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type) 185 { 186 dprintk("%s Deregistering id:%u\n", __func__, ld_type->id); 187 spin_lock(&pnfs_spinlock); 188 list_del(&ld_type->pnfs_tblid); 189 spin_unlock(&pnfs_spinlock); 190 } 191 EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); 192 193 /* 194 * pNFS client layout cache 195 */ 196 197 /* Need to hold i_lock if caller does not already hold reference */ 198 void 199 pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo) 200 { 201 atomic_inc(&lo->plh_refcount); 202 } 203 204 static struct pnfs_layout_hdr * 205 pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags) 206 { 207 struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; 208 return ld->alloc_layout_hdr(ino, gfp_flags); 209 } 210 211 static void 212 pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo) 213 { 214 struct nfs_server *server = NFS_SERVER(lo->plh_inode); 215 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; 216 217 if (!list_empty(&lo->plh_layouts)) { 218 struct nfs_client *clp = server->nfs_client; 219 220 spin_lock(&clp->cl_lock); 221 list_del_init(&lo->plh_layouts); 222 spin_unlock(&clp->cl_lock); 223 } 224 put_rpccred(lo->plh_lc_cred); 225 return ld->free_layout_hdr(lo); 226 } 227 228 static void 229 pnfs_detach_layout_hdr(struct pnfs_layout_hdr *lo) 230 { 231 struct nfs_inode *nfsi = NFS_I(lo->plh_inode); 232 dprintk("%s: freeing layout cache %p\n", __func__, lo); 233 nfsi->layout = NULL; 234 /* Reset MDS Threshold I/O counters */ 235 nfsi->write_io = 0; 236 nfsi->read_io = 0; 237 } 238 239 void 240 pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) 241 { 242 struct inode *inode = lo->plh_inode; 243 244 pnfs_layoutreturn_before_put_layout_hdr(lo); 245 246 if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { 247 if (!list_empty(&lo->plh_segs)) 248 WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n"); 249 pnfs_detach_layout_hdr(lo); 250 spin_unlock(&inode->i_lock); 251 pnfs_free_layout_hdr(lo); 252 } 253 } 254 255 /* 256 * Mark a pnfs_layout_hdr and all associated layout segments as invalid 257 * 258 * In order to continue using the pnfs_layout_hdr, a full recovery 259 * is required. 260 * Note that caller must hold inode->i_lock. 261 */ 262 int 263 pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, 264 struct list_head *lseg_list) 265 { 266 struct pnfs_layout_range range = { 267 .iomode = IOMODE_ANY, 268 .offset = 0, 269 .length = NFS4_MAX_UINT64, 270 }; 271 272 set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); 273 return pnfs_mark_matching_lsegs_invalid(lo, lseg_list, &range, 0); 274 } 275 276 static int 277 pnfs_iomode_to_fail_bit(u32 iomode) 278 { 279 return iomode == IOMODE_RW ? 280 NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED; 281 } 282 283 static void 284 pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) 285 { 286 lo->plh_retry_timestamp = jiffies; 287 if (!test_and_set_bit(fail_bit, &lo->plh_flags)) 288 atomic_inc(&lo->plh_refcount); 289 } 290 291 static void 292 pnfs_layout_clear_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) 293 { 294 if (test_and_clear_bit(fail_bit, &lo->plh_flags)) 295 atomic_dec(&lo->plh_refcount); 296 } 297 298 static void 299 pnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode) 300 { 301 struct inode *inode = lo->plh_inode; 302 struct pnfs_layout_range range = { 303 .iomode = iomode, 304 .offset = 0, 305 .length = NFS4_MAX_UINT64, 306 }; 307 LIST_HEAD(head); 308 309 spin_lock(&inode->i_lock); 310 pnfs_layout_set_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); 311 pnfs_mark_matching_lsegs_invalid(lo, &head, &range, 0); 312 spin_unlock(&inode->i_lock); 313 pnfs_free_lseg_list(&head); 314 dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__, 315 iomode == IOMODE_RW ? "RW" : "READ"); 316 } 317 318 static bool 319 pnfs_layout_io_test_failed(struct pnfs_layout_hdr *lo, u32 iomode) 320 { 321 unsigned long start, end; 322 int fail_bit = pnfs_iomode_to_fail_bit(iomode); 323 324 if (test_bit(fail_bit, &lo->plh_flags) == 0) 325 return false; 326 end = jiffies; 327 start = end - PNFS_LAYOUTGET_RETRY_TIMEOUT; 328 if (!time_in_range(lo->plh_retry_timestamp, start, end)) { 329 /* It is time to retry the failed layoutgets */ 330 pnfs_layout_clear_fail_bit(lo, fail_bit); 331 return false; 332 } 333 return true; 334 } 335 336 static void 337 pnfs_init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg, 338 const struct pnfs_layout_range *range, 339 const nfs4_stateid *stateid) 340 { 341 INIT_LIST_HEAD(&lseg->pls_list); 342 INIT_LIST_HEAD(&lseg->pls_lc_list); 343 atomic_set(&lseg->pls_refcount, 1); 344 set_bit(NFS_LSEG_VALID, &lseg->pls_flags); 345 lseg->pls_layout = lo; 346 lseg->pls_range = *range; 347 lseg->pls_seq = be32_to_cpu(stateid->seqid); 348 } 349 350 static void pnfs_free_lseg(struct pnfs_layout_segment *lseg) 351 { 352 struct inode *ino = lseg->pls_layout->plh_inode; 353 354 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); 355 } 356 357 static void 358 pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, 359 struct pnfs_layout_segment *lseg) 360 { 361 struct inode *inode = lo->plh_inode; 362 363 WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 364 list_del_init(&lseg->pls_list); 365 /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ 366 atomic_dec(&lo->plh_refcount); 367 if (list_empty(&lo->plh_segs)) { 368 if (atomic_read(&lo->plh_outstanding) == 0) 369 set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); 370 clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); 371 } 372 rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); 373 } 374 375 void 376 pnfs_put_lseg(struct pnfs_layout_segment *lseg) 377 { 378 struct pnfs_layout_hdr *lo; 379 struct inode *inode; 380 381 if (!lseg) 382 return; 383 384 dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, 385 atomic_read(&lseg->pls_refcount), 386 test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 387 388 lo = lseg->pls_layout; 389 inode = lo->plh_inode; 390 391 if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { 392 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { 393 spin_unlock(&inode->i_lock); 394 return; 395 } 396 pnfs_get_layout_hdr(lo); 397 pnfs_layout_remove_lseg(lo, lseg); 398 spin_unlock(&inode->i_lock); 399 pnfs_free_lseg(lseg); 400 pnfs_put_layout_hdr(lo); 401 } 402 } 403 EXPORT_SYMBOL_GPL(pnfs_put_lseg); 404 405 static void pnfs_free_lseg_async_work(struct work_struct *work) 406 { 407 struct pnfs_layout_segment *lseg; 408 struct pnfs_layout_hdr *lo; 409 410 lseg = container_of(work, struct pnfs_layout_segment, pls_work); 411 lo = lseg->pls_layout; 412 413 pnfs_free_lseg(lseg); 414 pnfs_put_layout_hdr(lo); 415 } 416 417 static void pnfs_free_lseg_async(struct pnfs_layout_segment *lseg) 418 { 419 INIT_WORK(&lseg->pls_work, pnfs_free_lseg_async_work); 420 schedule_work(&lseg->pls_work); 421 } 422 423 void 424 pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg) 425 { 426 if (!lseg) 427 return; 428 429 assert_spin_locked(&lseg->pls_layout->plh_inode->i_lock); 430 431 dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, 432 atomic_read(&lseg->pls_refcount), 433 test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 434 if (atomic_dec_and_test(&lseg->pls_refcount)) { 435 struct pnfs_layout_hdr *lo = lseg->pls_layout; 436 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) 437 return; 438 pnfs_get_layout_hdr(lo); 439 pnfs_layout_remove_lseg(lo, lseg); 440 pnfs_free_lseg_async(lseg); 441 } 442 } 443 EXPORT_SYMBOL_GPL(pnfs_put_lseg_locked); 444 445 static u64 446 end_offset(u64 start, u64 len) 447 { 448 u64 end; 449 450 end = start + len; 451 return end >= start ? end : NFS4_MAX_UINT64; 452 } 453 454 /* 455 * is l2 fully contained in l1? 456 * start1 end1 457 * [----------------------------------) 458 * start2 end2 459 * [----------------) 460 */ 461 static bool 462 pnfs_lseg_range_contained(const struct pnfs_layout_range *l1, 463 const struct pnfs_layout_range *l2) 464 { 465 u64 start1 = l1->offset; 466 u64 end1 = end_offset(start1, l1->length); 467 u64 start2 = l2->offset; 468 u64 end2 = end_offset(start2, l2->length); 469 470 return (start1 <= start2) && (end1 >= end2); 471 } 472 473 /* 474 * is l1 and l2 intersecting? 475 * start1 end1 476 * [----------------------------------) 477 * start2 end2 478 * [----------------) 479 */ 480 static bool 481 pnfs_lseg_range_intersecting(const struct pnfs_layout_range *l1, 482 const struct pnfs_layout_range *l2) 483 { 484 u64 start1 = l1->offset; 485 u64 end1 = end_offset(start1, l1->length); 486 u64 start2 = l2->offset; 487 u64 end2 = end_offset(start2, l2->length); 488 489 return (end1 == NFS4_MAX_UINT64 || end1 > start2) && 490 (end2 == NFS4_MAX_UINT64 || end2 > start1); 491 } 492 493 static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, 494 struct list_head *tmp_list) 495 { 496 if (!atomic_dec_and_test(&lseg->pls_refcount)) 497 return false; 498 pnfs_layout_remove_lseg(lseg->pls_layout, lseg); 499 list_add(&lseg->pls_list, tmp_list); 500 return true; 501 } 502 503 /* Returns 1 if lseg is removed from list, 0 otherwise */ 504 static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, 505 struct list_head *tmp_list) 506 { 507 int rv = 0; 508 509 if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { 510 /* Remove the reference keeping the lseg in the 511 * list. It will now be removed when all 512 * outstanding io is finished. 513 */ 514 dprintk("%s: lseg %p ref %d\n", __func__, lseg, 515 atomic_read(&lseg->pls_refcount)); 516 if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list)) 517 rv = 1; 518 } 519 return rv; 520 } 521 522 /* 523 * Compare 2 layout stateid sequence ids, to see which is newer, 524 * taking into account wraparound issues. 525 */ 526 static bool pnfs_seqid_is_newer(u32 s1, u32 s2) 527 { 528 return (s32)(s1 - s2) > 0; 529 } 530 531 static bool 532 pnfs_should_free_range(const struct pnfs_layout_range *lseg_range, 533 const struct pnfs_layout_range *recall_range) 534 { 535 return (recall_range->iomode == IOMODE_ANY || 536 lseg_range->iomode == recall_range->iomode) && 537 pnfs_lseg_range_intersecting(lseg_range, recall_range); 538 } 539 540 static bool 541 pnfs_match_lseg_recall(const struct pnfs_layout_segment *lseg, 542 const struct pnfs_layout_range *recall_range, 543 u32 seq) 544 { 545 if (seq != 0 && pnfs_seqid_is_newer(lseg->pls_seq, seq)) 546 return false; 547 if (recall_range == NULL) 548 return true; 549 return pnfs_should_free_range(&lseg->pls_range, recall_range); 550 } 551 552 /** 553 * pnfs_mark_matching_lsegs_invalid - tear down lsegs or mark them for later 554 * @lo: layout header containing the lsegs 555 * @tmp_list: list head where doomed lsegs should go 556 * @recall_range: optional recall range argument to match (may be NULL) 557 * @seq: only invalidate lsegs obtained prior to this sequence (may be 0) 558 * 559 * Walk the list of lsegs in the layout header, and tear down any that should 560 * be destroyed. If "recall_range" is specified then the segment must match 561 * that range. If "seq" is non-zero, then only match segments that were handed 562 * out at or before that sequence. 563 * 564 * Returns number of matching invalid lsegs remaining in list after scanning 565 * it and purging them. 566 */ 567 int 568 pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, 569 struct list_head *tmp_list, 570 const struct pnfs_layout_range *recall_range, 571 u32 seq) 572 { 573 struct pnfs_layout_segment *lseg, *next; 574 int remaining = 0; 575 576 dprintk("%s:Begin lo %p\n", __func__, lo); 577 578 if (list_empty(&lo->plh_segs)) 579 return 0; 580 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) 581 if (pnfs_match_lseg_recall(lseg, recall_range, seq)) { 582 dprintk("%s: freeing lseg %p iomode %d seq %u" 583 "offset %llu length %llu\n", __func__, 584 lseg, lseg->pls_range.iomode, lseg->pls_seq, 585 lseg->pls_range.offset, lseg->pls_range.length); 586 if (!mark_lseg_invalid(lseg, tmp_list)) 587 remaining++; 588 } 589 dprintk("%s:Return %i\n", __func__, remaining); 590 return remaining; 591 } 592 593 /* note free_me must contain lsegs from a single layout_hdr */ 594 void 595 pnfs_free_lseg_list(struct list_head *free_me) 596 { 597 struct pnfs_layout_segment *lseg, *tmp; 598 599 if (list_empty(free_me)) 600 return; 601 602 list_for_each_entry_safe(lseg, tmp, free_me, pls_list) { 603 list_del(&lseg->pls_list); 604 pnfs_free_lseg(lseg); 605 } 606 } 607 608 void 609 pnfs_destroy_layout(struct nfs_inode *nfsi) 610 { 611 struct pnfs_layout_hdr *lo; 612 LIST_HEAD(tmp_list); 613 614 spin_lock(&nfsi->vfs_inode.i_lock); 615 lo = nfsi->layout; 616 if (lo) { 617 pnfs_get_layout_hdr(lo); 618 pnfs_mark_layout_stateid_invalid(lo, &tmp_list); 619 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); 620 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); 621 spin_unlock(&nfsi->vfs_inode.i_lock); 622 pnfs_free_lseg_list(&tmp_list); 623 pnfs_put_layout_hdr(lo); 624 } else 625 spin_unlock(&nfsi->vfs_inode.i_lock); 626 } 627 EXPORT_SYMBOL_GPL(pnfs_destroy_layout); 628 629 static bool 630 pnfs_layout_add_bulk_destroy_list(struct inode *inode, 631 struct list_head *layout_list) 632 { 633 struct pnfs_layout_hdr *lo; 634 bool ret = false; 635 636 spin_lock(&inode->i_lock); 637 lo = NFS_I(inode)->layout; 638 if (lo != NULL && list_empty(&lo->plh_bulk_destroy)) { 639 pnfs_get_layout_hdr(lo); 640 list_add(&lo->plh_bulk_destroy, layout_list); 641 ret = true; 642 } 643 spin_unlock(&inode->i_lock); 644 return ret; 645 } 646 647 /* Caller must hold rcu_read_lock and clp->cl_lock */ 648 static int 649 pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp, 650 struct nfs_server *server, 651 struct list_head *layout_list) 652 { 653 struct pnfs_layout_hdr *lo, *next; 654 struct inode *inode; 655 656 list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) { 657 inode = igrab(lo->plh_inode); 658 if (inode == NULL) 659 continue; 660 list_del_init(&lo->plh_layouts); 661 if (pnfs_layout_add_bulk_destroy_list(inode, layout_list)) 662 continue; 663 rcu_read_unlock(); 664 spin_unlock(&clp->cl_lock); 665 iput(inode); 666 spin_lock(&clp->cl_lock); 667 rcu_read_lock(); 668 return -EAGAIN; 669 } 670 return 0; 671 } 672 673 static int 674 pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, 675 bool is_bulk_recall) 676 { 677 struct pnfs_layout_hdr *lo; 678 struct inode *inode; 679 LIST_HEAD(lseg_list); 680 int ret = 0; 681 682 while (!list_empty(layout_list)) { 683 lo = list_entry(layout_list->next, struct pnfs_layout_hdr, 684 plh_bulk_destroy); 685 dprintk("%s freeing layout for inode %lu\n", __func__, 686 lo->plh_inode->i_ino); 687 inode = lo->plh_inode; 688 689 pnfs_layoutcommit_inode(inode, false); 690 691 spin_lock(&inode->i_lock); 692 list_del_init(&lo->plh_bulk_destroy); 693 if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) { 694 if (is_bulk_recall) 695 set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); 696 ret = -EAGAIN; 697 } 698 spin_unlock(&inode->i_lock); 699 pnfs_free_lseg_list(&lseg_list); 700 /* Free all lsegs that are attached to commit buckets */ 701 nfs_commit_inode(inode, 0); 702 pnfs_put_layout_hdr(lo); 703 iput(inode); 704 } 705 return ret; 706 } 707 708 int 709 pnfs_destroy_layouts_byfsid(struct nfs_client *clp, 710 struct nfs_fsid *fsid, 711 bool is_recall) 712 { 713 struct nfs_server *server; 714 LIST_HEAD(layout_list); 715 716 spin_lock(&clp->cl_lock); 717 rcu_read_lock(); 718 restart: 719 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 720 if (memcmp(&server->fsid, fsid, sizeof(*fsid)) != 0) 721 continue; 722 if (pnfs_layout_bulk_destroy_byserver_locked(clp, 723 server, 724 &layout_list) != 0) 725 goto restart; 726 } 727 rcu_read_unlock(); 728 spin_unlock(&clp->cl_lock); 729 730 if (list_empty(&layout_list)) 731 return 0; 732 return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); 733 } 734 735 int 736 pnfs_destroy_layouts_byclid(struct nfs_client *clp, 737 bool is_recall) 738 { 739 struct nfs_server *server; 740 LIST_HEAD(layout_list); 741 742 spin_lock(&clp->cl_lock); 743 rcu_read_lock(); 744 restart: 745 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 746 if (pnfs_layout_bulk_destroy_byserver_locked(clp, 747 server, 748 &layout_list) != 0) 749 goto restart; 750 } 751 rcu_read_unlock(); 752 spin_unlock(&clp->cl_lock); 753 754 if (list_empty(&layout_list)) 755 return 0; 756 return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); 757 } 758 759 /* 760 * Called by the state manger to remove all layouts established under an 761 * expired lease. 762 */ 763 void 764 pnfs_destroy_all_layouts(struct nfs_client *clp) 765 { 766 nfs4_deviceid_mark_client_invalid(clp); 767 nfs4_deviceid_purge_client(clp); 768 769 pnfs_destroy_layouts_byclid(clp, false); 770 } 771 772 static void 773 pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo) 774 { 775 lo->plh_return_iomode = 0; 776 lo->plh_return_seq = 0; 777 clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); 778 } 779 780 /* update lo->plh_stateid with new if is more recent */ 781 void 782 pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, 783 bool update_barrier) 784 { 785 u32 oldseq, newseq, new_barrier = 0; 786 787 oldseq = be32_to_cpu(lo->plh_stateid.seqid); 788 newseq = be32_to_cpu(new->seqid); 789 790 if (!pnfs_layout_is_valid(lo)) { 791 nfs4_stateid_copy(&lo->plh_stateid, new); 792 lo->plh_barrier = newseq; 793 pnfs_clear_layoutreturn_info(lo); 794 clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); 795 return; 796 } 797 if (pnfs_seqid_is_newer(newseq, oldseq)) { 798 nfs4_stateid_copy(&lo->plh_stateid, new); 799 /* 800 * Because of wraparound, we want to keep the barrier 801 * "close" to the current seqids. 802 */ 803 new_barrier = newseq - atomic_read(&lo->plh_outstanding); 804 } 805 if (update_barrier) 806 new_barrier = be32_to_cpu(new->seqid); 807 else if (new_barrier == 0) 808 return; 809 if (pnfs_seqid_is_newer(new_barrier, lo->plh_barrier)) 810 lo->plh_barrier = new_barrier; 811 } 812 813 static bool 814 pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo, 815 const nfs4_stateid *stateid) 816 { 817 u32 seqid = be32_to_cpu(stateid->seqid); 818 819 return !pnfs_seqid_is_newer(seqid, lo->plh_barrier); 820 } 821 822 /* lget is set to 1 if called from inside send_layoutget call chain */ 823 static bool 824 pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo) 825 { 826 return lo->plh_block_lgets || 827 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); 828 } 829 830 /* 831 * Get layout from server. 832 * for now, assume that whole file layouts are requested. 833 * arg->offset: 0 834 * arg->length: all ones 835 */ 836 static struct pnfs_layout_segment * 837 send_layoutget(struct pnfs_layout_hdr *lo, 838 struct nfs_open_context *ctx, 839 nfs4_stateid *stateid, 840 const struct pnfs_layout_range *range, 841 long *timeout, gfp_t gfp_flags) 842 { 843 struct inode *ino = lo->plh_inode; 844 struct nfs_server *server = NFS_SERVER(ino); 845 struct nfs4_layoutget *lgp; 846 loff_t i_size; 847 848 dprintk("--> %s\n", __func__); 849 850 /* 851 * Synchronously retrieve layout information from server and 852 * store in lseg. If we race with a concurrent seqid morphing 853 * op, then re-send the LAYOUTGET. 854 */ 855 lgp = kzalloc(sizeof(*lgp), gfp_flags); 856 if (lgp == NULL) 857 return ERR_PTR(-ENOMEM); 858 859 i_size = i_size_read(ino); 860 861 lgp->args.minlength = PAGE_SIZE; 862 if (lgp->args.minlength > range->length) 863 lgp->args.minlength = range->length; 864 if (range->iomode == IOMODE_READ) { 865 if (range->offset >= i_size) 866 lgp->args.minlength = 0; 867 else if (i_size - range->offset < lgp->args.minlength) 868 lgp->args.minlength = i_size - range->offset; 869 } 870 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; 871 pnfs_copy_range(&lgp->args.range, range); 872 lgp->args.type = server->pnfs_curr_ld->id; 873 lgp->args.inode = ino; 874 lgp->args.ctx = get_nfs_open_context(ctx); 875 nfs4_stateid_copy(&lgp->args.stateid, stateid); 876 lgp->gfp_flags = gfp_flags; 877 lgp->cred = lo->plh_lc_cred; 878 879 return nfs4_proc_layoutget(lgp, timeout, gfp_flags); 880 } 881 882 static void pnfs_clear_layoutcommit(struct inode *inode, 883 struct list_head *head) 884 { 885 struct nfs_inode *nfsi = NFS_I(inode); 886 struct pnfs_layout_segment *lseg, *tmp; 887 888 if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) 889 return; 890 list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) { 891 if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) 892 continue; 893 pnfs_lseg_dec_and_remove_zero(lseg, head); 894 } 895 } 896 897 void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) 898 { 899 clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); 900 smp_mb__after_atomic(); 901 wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); 902 rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); 903 } 904 905 static bool 906 pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo, 907 nfs4_stateid *stateid, 908 enum pnfs_iomode *iomode) 909 { 910 /* Serialise LAYOUTGET/LAYOUTRETURN */ 911 if (atomic_read(&lo->plh_outstanding) != 0) 912 return false; 913 if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) 914 return false; 915 pnfs_get_layout_hdr(lo); 916 if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) { 917 if (stateid != NULL) { 918 nfs4_stateid_copy(stateid, &lo->plh_stateid); 919 if (lo->plh_return_seq != 0) 920 stateid->seqid = cpu_to_be32(lo->plh_return_seq); 921 } 922 if (iomode != NULL) 923 *iomode = lo->plh_return_iomode; 924 pnfs_clear_layoutreturn_info(lo); 925 return true; 926 } 927 if (stateid != NULL) 928 nfs4_stateid_copy(stateid, &lo->plh_stateid); 929 if (iomode != NULL) 930 *iomode = IOMODE_ANY; 931 return true; 932 } 933 934 static int 935 pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, 936 enum pnfs_iomode iomode, bool sync) 937 { 938 struct inode *ino = lo->plh_inode; 939 struct nfs4_layoutreturn *lrp; 940 int status = 0; 941 942 lrp = kzalloc(sizeof(*lrp), GFP_NOFS); 943 if (unlikely(lrp == NULL)) { 944 status = -ENOMEM; 945 spin_lock(&ino->i_lock); 946 pnfs_clear_layoutreturn_waitbit(lo); 947 spin_unlock(&ino->i_lock); 948 pnfs_put_layout_hdr(lo); 949 goto out; 950 } 951 952 nfs4_stateid_copy(&lrp->args.stateid, stateid); 953 lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; 954 lrp->args.inode = ino; 955 lrp->args.range.iomode = iomode; 956 lrp->args.range.offset = 0; 957 lrp->args.range.length = NFS4_MAX_UINT64; 958 lrp->args.layout = lo; 959 lrp->clp = NFS_SERVER(ino)->nfs_client; 960 lrp->cred = lo->plh_lc_cred; 961 962 status = nfs4_proc_layoutreturn(lrp, sync); 963 out: 964 dprintk("<-- %s status: %d\n", __func__, status); 965 return status; 966 } 967 968 /* Return true if layoutreturn is needed */ 969 static bool 970 pnfs_layout_need_return(struct pnfs_layout_hdr *lo) 971 { 972 struct pnfs_layout_segment *s; 973 974 if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) 975 return false; 976 977 /* Defer layoutreturn until all lsegs are done */ 978 list_for_each_entry(s, &lo->plh_segs, pls_list) { 979 if (test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags)) 980 return false; 981 } 982 983 return true; 984 } 985 986 static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo) 987 { 988 struct inode *inode= lo->plh_inode; 989 990 if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) 991 return; 992 spin_lock(&inode->i_lock); 993 if (pnfs_layout_need_return(lo)) { 994 nfs4_stateid stateid; 995 enum pnfs_iomode iomode; 996 bool send; 997 998 send = pnfs_prepare_layoutreturn(lo, &stateid, &iomode); 999 spin_unlock(&inode->i_lock); 1000 if (send) { 1001 /* Send an async layoutreturn so we dont deadlock */ 1002 pnfs_send_layoutreturn(lo, &stateid, iomode, false); 1003 } 1004 } else 1005 spin_unlock(&inode->i_lock); 1006 } 1007 1008 /* 1009 * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr 1010 * when the layout segment list is empty. 1011 * 1012 * Note that a pnfs_layout_hdr can exist with an empty layout segment 1013 * list when LAYOUTGET has failed, or when LAYOUTGET succeeded, but the 1014 * deviceid is marked invalid. 1015 */ 1016 int 1017 _pnfs_return_layout(struct inode *ino) 1018 { 1019 struct pnfs_layout_hdr *lo = NULL; 1020 struct nfs_inode *nfsi = NFS_I(ino); 1021 LIST_HEAD(tmp_list); 1022 nfs4_stateid stateid; 1023 int status = 0, empty; 1024 bool send; 1025 1026 dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino); 1027 1028 spin_lock(&ino->i_lock); 1029 lo = nfsi->layout; 1030 if (!lo) { 1031 spin_unlock(&ino->i_lock); 1032 dprintk("NFS: %s no layout to return\n", __func__); 1033 goto out; 1034 } 1035 /* Reference matched in nfs4_layoutreturn_release */ 1036 pnfs_get_layout_hdr(lo); 1037 empty = list_empty(&lo->plh_segs); 1038 pnfs_clear_layoutcommit(ino, &tmp_list); 1039 pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL, 0); 1040 1041 if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) { 1042 struct pnfs_layout_range range = { 1043 .iomode = IOMODE_ANY, 1044 .offset = 0, 1045 .length = NFS4_MAX_UINT64, 1046 }; 1047 NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, &range); 1048 } 1049 1050 /* Don't send a LAYOUTRETURN if list was initially empty */ 1051 if (empty) { 1052 spin_unlock(&ino->i_lock); 1053 dprintk("NFS: %s no layout segments to return\n", __func__); 1054 goto out_put_layout_hdr; 1055 } 1056 1057 send = pnfs_prepare_layoutreturn(lo, &stateid, NULL); 1058 spin_unlock(&ino->i_lock); 1059 pnfs_free_lseg_list(&tmp_list); 1060 if (send) 1061 status = pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true); 1062 out_put_layout_hdr: 1063 pnfs_put_layout_hdr(lo); 1064 out: 1065 dprintk("<-- %s status: %d\n", __func__, status); 1066 return status; 1067 } 1068 EXPORT_SYMBOL_GPL(_pnfs_return_layout); 1069 1070 int 1071 pnfs_commit_and_return_layout(struct inode *inode) 1072 { 1073 struct pnfs_layout_hdr *lo; 1074 int ret; 1075 1076 spin_lock(&inode->i_lock); 1077 lo = NFS_I(inode)->layout; 1078 if (lo == NULL) { 1079 spin_unlock(&inode->i_lock); 1080 return 0; 1081 } 1082 pnfs_get_layout_hdr(lo); 1083 /* Block new layoutgets and read/write to ds */ 1084 lo->plh_block_lgets++; 1085 spin_unlock(&inode->i_lock); 1086 filemap_fdatawait(inode->i_mapping); 1087 ret = pnfs_layoutcommit_inode(inode, true); 1088 if (ret == 0) 1089 ret = _pnfs_return_layout(inode); 1090 spin_lock(&inode->i_lock); 1091 lo->plh_block_lgets--; 1092 spin_unlock(&inode->i_lock); 1093 pnfs_put_layout_hdr(lo); 1094 return ret; 1095 } 1096 1097 bool pnfs_roc(struct inode *ino) 1098 { 1099 struct nfs_inode *nfsi = NFS_I(ino); 1100 struct nfs_open_context *ctx; 1101 struct nfs4_state *state; 1102 struct pnfs_layout_hdr *lo; 1103 struct pnfs_layout_segment *lseg, *tmp; 1104 nfs4_stateid stateid; 1105 LIST_HEAD(tmp_list); 1106 bool found = false, layoutreturn = false, roc = false; 1107 1108 spin_lock(&ino->i_lock); 1109 lo = nfsi->layout; 1110 if (!lo || test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) 1111 goto out_noroc; 1112 1113 /* no roc if we hold a delegation */ 1114 if (nfs4_check_delegation(ino, FMODE_READ)) 1115 goto out_noroc; 1116 1117 list_for_each_entry(ctx, &nfsi->open_files, list) { 1118 state = ctx->state; 1119 /* Don't return layout if there is open file state */ 1120 if (state != NULL && state->state != 0) 1121 goto out_noroc; 1122 } 1123 1124 /* always send layoutreturn if being marked so */ 1125 if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) 1126 layoutreturn = pnfs_prepare_layoutreturn(lo, 1127 &stateid, NULL); 1128 1129 list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) 1130 /* If we are sending layoutreturn, invalidate all valid lsegs */ 1131 if (layoutreturn || test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { 1132 mark_lseg_invalid(lseg, &tmp_list); 1133 found = true; 1134 } 1135 /* ROC in two conditions: 1136 * 1. there are ROC lsegs 1137 * 2. we don't send layoutreturn 1138 */ 1139 if (found && !layoutreturn) { 1140 /* lo ref dropped in pnfs_roc_release() */ 1141 pnfs_get_layout_hdr(lo); 1142 roc = true; 1143 } 1144 1145 out_noroc: 1146 spin_unlock(&ino->i_lock); 1147 pnfs_free_lseg_list(&tmp_list); 1148 pnfs_layoutcommit_inode(ino, true); 1149 if (layoutreturn) 1150 pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true); 1151 return roc; 1152 } 1153 1154 void pnfs_roc_release(struct inode *ino) 1155 { 1156 struct pnfs_layout_hdr *lo; 1157 1158 spin_lock(&ino->i_lock); 1159 lo = NFS_I(ino)->layout; 1160 pnfs_clear_layoutreturn_waitbit(lo); 1161 if (atomic_dec_and_test(&lo->plh_refcount)) { 1162 pnfs_detach_layout_hdr(lo); 1163 spin_unlock(&ino->i_lock); 1164 pnfs_free_layout_hdr(lo); 1165 } else 1166 spin_unlock(&ino->i_lock); 1167 } 1168 1169 void pnfs_roc_set_barrier(struct inode *ino, u32 barrier) 1170 { 1171 struct pnfs_layout_hdr *lo; 1172 1173 spin_lock(&ino->i_lock); 1174 lo = NFS_I(ino)->layout; 1175 if (pnfs_seqid_is_newer(barrier, lo->plh_barrier)) 1176 lo->plh_barrier = barrier; 1177 spin_unlock(&ino->i_lock); 1178 trace_nfs4_layoutreturn_on_close(ino, 0); 1179 } 1180 1181 void pnfs_roc_get_barrier(struct inode *ino, u32 *barrier) 1182 { 1183 struct nfs_inode *nfsi = NFS_I(ino); 1184 struct pnfs_layout_hdr *lo; 1185 u32 current_seqid; 1186 1187 spin_lock(&ino->i_lock); 1188 lo = nfsi->layout; 1189 current_seqid = be32_to_cpu(lo->plh_stateid.seqid); 1190 1191 /* Since close does not return a layout stateid for use as 1192 * a barrier, we choose the worst-case barrier. 1193 */ 1194 *barrier = current_seqid + atomic_read(&lo->plh_outstanding); 1195 spin_unlock(&ino->i_lock); 1196 } 1197 1198 bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task) 1199 { 1200 struct nfs_inode *nfsi = NFS_I(ino); 1201 struct pnfs_layout_hdr *lo; 1202 bool sleep = false; 1203 1204 /* we might not have grabbed lo reference. so need to check under 1205 * i_lock */ 1206 spin_lock(&ino->i_lock); 1207 lo = nfsi->layout; 1208 if (lo && test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) 1209 sleep = true; 1210 spin_unlock(&ino->i_lock); 1211 1212 if (sleep) 1213 rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); 1214 1215 return sleep; 1216 } 1217 1218 /* 1219 * Compare two layout segments for sorting into layout cache. 1220 * We want to preferentially return RW over RO layouts, so ensure those 1221 * are seen first. 1222 */ 1223 static s64 1224 pnfs_lseg_range_cmp(const struct pnfs_layout_range *l1, 1225 const struct pnfs_layout_range *l2) 1226 { 1227 s64 d; 1228 1229 /* high offset > low offset */ 1230 d = l1->offset - l2->offset; 1231 if (d) 1232 return d; 1233 1234 /* short length > long length */ 1235 d = l2->length - l1->length; 1236 if (d) 1237 return d; 1238 1239 /* read > read/write */ 1240 return (int)(l1->iomode == IOMODE_READ) - (int)(l2->iomode == IOMODE_READ); 1241 } 1242 1243 static bool 1244 pnfs_lseg_range_is_after(const struct pnfs_layout_range *l1, 1245 const struct pnfs_layout_range *l2) 1246 { 1247 return pnfs_lseg_range_cmp(l1, l2) > 0; 1248 } 1249 1250 static bool 1251 pnfs_lseg_no_merge(struct pnfs_layout_segment *lseg, 1252 struct pnfs_layout_segment *old) 1253 { 1254 return false; 1255 } 1256 1257 void 1258 pnfs_generic_layout_insert_lseg(struct pnfs_layout_hdr *lo, 1259 struct pnfs_layout_segment *lseg, 1260 bool (*is_after)(const struct pnfs_layout_range *, 1261 const struct pnfs_layout_range *), 1262 bool (*do_merge)(struct pnfs_layout_segment *, 1263 struct pnfs_layout_segment *), 1264 struct list_head *free_me) 1265 { 1266 struct pnfs_layout_segment *lp, *tmp; 1267 1268 dprintk("%s:Begin\n", __func__); 1269 1270 list_for_each_entry_safe(lp, tmp, &lo->plh_segs, pls_list) { 1271 if (test_bit(NFS_LSEG_VALID, &lp->pls_flags) == 0) 1272 continue; 1273 if (do_merge(lseg, lp)) { 1274 mark_lseg_invalid(lp, free_me); 1275 continue; 1276 } 1277 if (is_after(&lseg->pls_range, &lp->pls_range)) 1278 continue; 1279 list_add_tail(&lseg->pls_list, &lp->pls_list); 1280 dprintk("%s: inserted lseg %p " 1281 "iomode %d offset %llu length %llu before " 1282 "lp %p iomode %d offset %llu length %llu\n", 1283 __func__, lseg, lseg->pls_range.iomode, 1284 lseg->pls_range.offset, lseg->pls_range.length, 1285 lp, lp->pls_range.iomode, lp->pls_range.offset, 1286 lp->pls_range.length); 1287 goto out; 1288 } 1289 list_add_tail(&lseg->pls_list, &lo->plh_segs); 1290 dprintk("%s: inserted lseg %p " 1291 "iomode %d offset %llu length %llu at tail\n", 1292 __func__, lseg, lseg->pls_range.iomode, 1293 lseg->pls_range.offset, lseg->pls_range.length); 1294 out: 1295 pnfs_get_layout_hdr(lo); 1296 1297 dprintk("%s:Return\n", __func__); 1298 } 1299 EXPORT_SYMBOL_GPL(pnfs_generic_layout_insert_lseg); 1300 1301 static void 1302 pnfs_layout_insert_lseg(struct pnfs_layout_hdr *lo, 1303 struct pnfs_layout_segment *lseg, 1304 struct list_head *free_me) 1305 { 1306 struct inode *inode = lo->plh_inode; 1307 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 1308 1309 if (ld->add_lseg != NULL) 1310 ld->add_lseg(lo, lseg, free_me); 1311 else 1312 pnfs_generic_layout_insert_lseg(lo, lseg, 1313 pnfs_lseg_range_is_after, 1314 pnfs_lseg_no_merge, 1315 free_me); 1316 } 1317 1318 static struct pnfs_layout_hdr * 1319 alloc_init_layout_hdr(struct inode *ino, 1320 struct nfs_open_context *ctx, 1321 gfp_t gfp_flags) 1322 { 1323 struct pnfs_layout_hdr *lo; 1324 1325 lo = pnfs_alloc_layout_hdr(ino, gfp_flags); 1326 if (!lo) 1327 return NULL; 1328 atomic_set(&lo->plh_refcount, 1); 1329 INIT_LIST_HEAD(&lo->plh_layouts); 1330 INIT_LIST_HEAD(&lo->plh_segs); 1331 INIT_LIST_HEAD(&lo->plh_bulk_destroy); 1332 lo->plh_inode = ino; 1333 lo->plh_lc_cred = get_rpccred(ctx->cred); 1334 lo->plh_flags |= 1 << NFS_LAYOUT_INVALID_STID; 1335 return lo; 1336 } 1337 1338 static struct pnfs_layout_hdr * 1339 pnfs_find_alloc_layout(struct inode *ino, 1340 struct nfs_open_context *ctx, 1341 gfp_t gfp_flags) 1342 __releases(&ino->i_lock) 1343 __acquires(&ino->i_lock) 1344 { 1345 struct nfs_inode *nfsi = NFS_I(ino); 1346 struct pnfs_layout_hdr *new = NULL; 1347 1348 dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout); 1349 1350 if (nfsi->layout != NULL) 1351 goto out_existing; 1352 spin_unlock(&ino->i_lock); 1353 new = alloc_init_layout_hdr(ino, ctx, gfp_flags); 1354 spin_lock(&ino->i_lock); 1355 1356 if (likely(nfsi->layout == NULL)) { /* Won the race? */ 1357 nfsi->layout = new; 1358 return new; 1359 } else if (new != NULL) 1360 pnfs_free_layout_hdr(new); 1361 out_existing: 1362 pnfs_get_layout_hdr(nfsi->layout); 1363 return nfsi->layout; 1364 } 1365 1366 /* 1367 * iomode matching rules: 1368 * iomode lseg strict match 1369 * iomode 1370 * ----- ----- ------ ----- 1371 * ANY READ N/A true 1372 * ANY RW N/A true 1373 * RW READ N/A false 1374 * RW RW N/A true 1375 * READ READ N/A true 1376 * READ RW true false 1377 * READ RW false true 1378 */ 1379 static bool 1380 pnfs_lseg_range_match(const struct pnfs_layout_range *ls_range, 1381 const struct pnfs_layout_range *range, 1382 bool strict_iomode) 1383 { 1384 struct pnfs_layout_range range1; 1385 1386 if ((range->iomode == IOMODE_RW && 1387 ls_range->iomode != IOMODE_RW) || 1388 (range->iomode != ls_range->iomode && 1389 strict_iomode == true) || 1390 !pnfs_lseg_range_intersecting(ls_range, range)) 1391 return 0; 1392 1393 /* range1 covers only the first byte in the range */ 1394 range1 = *range; 1395 range1.length = 1; 1396 return pnfs_lseg_range_contained(ls_range, &range1); 1397 } 1398 1399 /* 1400 * lookup range in layout 1401 */ 1402 static struct pnfs_layout_segment * 1403 pnfs_find_lseg(struct pnfs_layout_hdr *lo, 1404 struct pnfs_layout_range *range, 1405 bool strict_iomode) 1406 { 1407 struct pnfs_layout_segment *lseg, *ret = NULL; 1408 1409 dprintk("%s:Begin\n", __func__); 1410 1411 list_for_each_entry(lseg, &lo->plh_segs, pls_list) { 1412 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && 1413 !test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags) && 1414 pnfs_lseg_range_match(&lseg->pls_range, range, 1415 strict_iomode)) { 1416 ret = pnfs_get_lseg(lseg); 1417 break; 1418 } 1419 } 1420 1421 dprintk("%s:Return lseg %p ref %d\n", 1422 __func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0); 1423 return ret; 1424 } 1425 1426 /* 1427 * Use mdsthreshold hints set at each OPEN to determine if I/O should go 1428 * to the MDS or over pNFS 1429 * 1430 * The nfs_inode read_io and write_io fields are cumulative counters reset 1431 * when there are no layout segments. Note that in pnfs_update_layout iomode 1432 * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a 1433 * WRITE request. 1434 * 1435 * A return of true means use MDS I/O. 1436 * 1437 * From rfc 5661: 1438 * If a file's size is smaller than the file size threshold, data accesses 1439 * SHOULD be sent to the metadata server. If an I/O request has a length that 1440 * is below the I/O size threshold, the I/O SHOULD be sent to the metadata 1441 * server. If both file size and I/O size are provided, the client SHOULD 1442 * reach or exceed both thresholds before sending its read or write 1443 * requests to the data server. 1444 */ 1445 static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx, 1446 struct inode *ino, int iomode) 1447 { 1448 struct nfs4_threshold *t = ctx->mdsthreshold; 1449 struct nfs_inode *nfsi = NFS_I(ino); 1450 loff_t fsize = i_size_read(ino); 1451 bool size = false, size_set = false, io = false, io_set = false, ret = false; 1452 1453 if (t == NULL) 1454 return ret; 1455 1456 dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n", 1457 __func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz); 1458 1459 switch (iomode) { 1460 case IOMODE_READ: 1461 if (t->bm & THRESHOLD_RD) { 1462 dprintk("%s fsize %llu\n", __func__, fsize); 1463 size_set = true; 1464 if (fsize < t->rd_sz) 1465 size = true; 1466 } 1467 if (t->bm & THRESHOLD_RD_IO) { 1468 dprintk("%s nfsi->read_io %llu\n", __func__, 1469 nfsi->read_io); 1470 io_set = true; 1471 if (nfsi->read_io < t->rd_io_sz) 1472 io = true; 1473 } 1474 break; 1475 case IOMODE_RW: 1476 if (t->bm & THRESHOLD_WR) { 1477 dprintk("%s fsize %llu\n", __func__, fsize); 1478 size_set = true; 1479 if (fsize < t->wr_sz) 1480 size = true; 1481 } 1482 if (t->bm & THRESHOLD_WR_IO) { 1483 dprintk("%s nfsi->write_io %llu\n", __func__, 1484 nfsi->write_io); 1485 io_set = true; 1486 if (nfsi->write_io < t->wr_io_sz) 1487 io = true; 1488 } 1489 break; 1490 } 1491 if (size_set && io_set) { 1492 if (size && io) 1493 ret = true; 1494 } else if (size || io) 1495 ret = true; 1496 1497 dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret); 1498 return ret; 1499 } 1500 1501 static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo) 1502 { 1503 /* 1504 * send layoutcommit as it can hold up layoutreturn due to lseg 1505 * reference 1506 */ 1507 pnfs_layoutcommit_inode(lo->plh_inode, false); 1508 return !wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN, 1509 nfs_wait_bit_killable, 1510 TASK_UNINTERRUPTIBLE); 1511 } 1512 1513 static void pnfs_clear_first_layoutget(struct pnfs_layout_hdr *lo) 1514 { 1515 unsigned long *bitlock = &lo->plh_flags; 1516 1517 clear_bit_unlock(NFS_LAYOUT_FIRST_LAYOUTGET, bitlock); 1518 smp_mb__after_atomic(); 1519 wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET); 1520 } 1521 1522 /* 1523 * Layout segment is retreived from the server if not cached. 1524 * The appropriate layout segment is referenced and returned to the caller. 1525 */ 1526 struct pnfs_layout_segment * 1527 pnfs_update_layout(struct inode *ino, 1528 struct nfs_open_context *ctx, 1529 loff_t pos, 1530 u64 count, 1531 enum pnfs_iomode iomode, 1532 bool strict_iomode, 1533 gfp_t gfp_flags) 1534 { 1535 struct pnfs_layout_range arg = { 1536 .iomode = iomode, 1537 .offset = pos, 1538 .length = count, 1539 }; 1540 unsigned pg_offset, seq; 1541 struct nfs_server *server = NFS_SERVER(ino); 1542 struct nfs_client *clp = server->nfs_client; 1543 struct pnfs_layout_hdr *lo = NULL; 1544 struct pnfs_layout_segment *lseg = NULL; 1545 nfs4_stateid stateid; 1546 long timeout = 0; 1547 unsigned long giveup = jiffies + (clp->cl_lease_time << 1); 1548 bool first; 1549 1550 if (!pnfs_enabled_sb(NFS_SERVER(ino))) { 1551 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1552 PNFS_UPDATE_LAYOUT_NO_PNFS); 1553 goto out; 1554 } 1555 1556 if (iomode == IOMODE_READ && i_size_read(ino) == 0) { 1557 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1558 PNFS_UPDATE_LAYOUT_RD_ZEROLEN); 1559 goto out; 1560 } 1561 1562 if (pnfs_within_mdsthreshold(ctx, ino, iomode)) { 1563 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1564 PNFS_UPDATE_LAYOUT_MDSTHRESH); 1565 goto out; 1566 } 1567 1568 lookup_again: 1569 nfs4_client_recover_expired_lease(clp); 1570 first = false; 1571 spin_lock(&ino->i_lock); 1572 lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); 1573 if (lo == NULL) { 1574 spin_unlock(&ino->i_lock); 1575 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1576 PNFS_UPDATE_LAYOUT_NOMEM); 1577 goto out; 1578 } 1579 1580 /* Do we even need to bother with this? */ 1581 if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { 1582 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1583 PNFS_UPDATE_LAYOUT_BULK_RECALL); 1584 dprintk("%s matches recall, use MDS\n", __func__); 1585 goto out_unlock; 1586 } 1587 1588 /* if LAYOUTGET already failed once we don't try again */ 1589 if (pnfs_layout_io_test_failed(lo, iomode)) { 1590 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1591 PNFS_UPDATE_LAYOUT_IO_TEST_FAIL); 1592 goto out_unlock; 1593 } 1594 1595 lseg = pnfs_find_lseg(lo, &arg, strict_iomode); 1596 if (lseg) { 1597 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1598 PNFS_UPDATE_LAYOUT_FOUND_CACHED); 1599 goto out_unlock; 1600 } 1601 1602 if (!nfs4_valid_open_stateid(ctx->state)) { 1603 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1604 PNFS_UPDATE_LAYOUT_INVALID_OPEN); 1605 goto out_unlock; 1606 } 1607 1608 /* 1609 * Choose a stateid for the LAYOUTGET. If we don't have a layout 1610 * stateid, or it has been invalidated, then we must use the open 1611 * stateid. 1612 */ 1613 if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) { 1614 1615 /* 1616 * The first layoutget for the file. Need to serialize per 1617 * RFC 5661 Errata 3208. 1618 */ 1619 if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET, 1620 &lo->plh_flags)) { 1621 spin_unlock(&ino->i_lock); 1622 wait_on_bit(&lo->plh_flags, NFS_LAYOUT_FIRST_LAYOUTGET, 1623 TASK_UNINTERRUPTIBLE); 1624 pnfs_put_layout_hdr(lo); 1625 dprintk("%s retrying\n", __func__); 1626 goto lookup_again; 1627 } 1628 1629 first = true; 1630 do { 1631 seq = read_seqbegin(&ctx->state->seqlock); 1632 nfs4_stateid_copy(&stateid, &ctx->state->stateid); 1633 } while (read_seqretry(&ctx->state->seqlock, seq)); 1634 } else { 1635 nfs4_stateid_copy(&stateid, &lo->plh_stateid); 1636 } 1637 1638 /* 1639 * Because we free lsegs before sending LAYOUTRETURN, we need to wait 1640 * for LAYOUTRETURN even if first is true. 1641 */ 1642 if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { 1643 spin_unlock(&ino->i_lock); 1644 dprintk("%s wait for layoutreturn\n", __func__); 1645 if (pnfs_prepare_to_retry_layoutget(lo)) { 1646 if (first) 1647 pnfs_clear_first_layoutget(lo); 1648 pnfs_put_layout_hdr(lo); 1649 dprintk("%s retrying\n", __func__); 1650 trace_pnfs_update_layout(ino, pos, count, iomode, lo, 1651 lseg, PNFS_UPDATE_LAYOUT_RETRY); 1652 goto lookup_again; 1653 } 1654 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1655 PNFS_UPDATE_LAYOUT_RETURN); 1656 goto out_put_layout_hdr; 1657 } 1658 1659 if (pnfs_layoutgets_blocked(lo)) { 1660 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1661 PNFS_UPDATE_LAYOUT_BLOCKED); 1662 goto out_unlock; 1663 } 1664 atomic_inc(&lo->plh_outstanding); 1665 spin_unlock(&ino->i_lock); 1666 1667 if (list_empty(&lo->plh_layouts)) { 1668 /* The lo must be on the clp list if there is any 1669 * chance of a CB_LAYOUTRECALL(FILE) coming in. 1670 */ 1671 spin_lock(&clp->cl_lock); 1672 if (list_empty(&lo->plh_layouts)) 1673 list_add_tail(&lo->plh_layouts, &server->layouts); 1674 spin_unlock(&clp->cl_lock); 1675 } 1676 1677 pg_offset = arg.offset & ~PAGE_MASK; 1678 if (pg_offset) { 1679 arg.offset -= pg_offset; 1680 arg.length += pg_offset; 1681 } 1682 if (arg.length != NFS4_MAX_UINT64) 1683 arg.length = PAGE_ALIGN(arg.length); 1684 1685 lseg = send_layoutget(lo, ctx, &stateid, &arg, &timeout, gfp_flags); 1686 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1687 PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET); 1688 atomic_dec(&lo->plh_outstanding); 1689 if (IS_ERR(lseg)) { 1690 switch(PTR_ERR(lseg)) { 1691 case -EBUSY: 1692 if (time_after(jiffies, giveup)) 1693 lseg = NULL; 1694 break; 1695 case -ERECALLCONFLICT: 1696 /* Huh? We hold no layouts, how is there a recall? */ 1697 if (first) { 1698 lseg = NULL; 1699 break; 1700 } 1701 /* Destroy the existing layout and start over */ 1702 if (time_after(jiffies, giveup)) 1703 pnfs_destroy_layout(NFS_I(ino)); 1704 /* Fallthrough */ 1705 case -EAGAIN: 1706 break; 1707 default: 1708 if (!nfs_error_is_fatal(PTR_ERR(lseg))) { 1709 pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); 1710 lseg = NULL; 1711 } 1712 goto out_put_layout_hdr; 1713 } 1714 if (lseg) { 1715 if (first) 1716 pnfs_clear_first_layoutget(lo); 1717 trace_pnfs_update_layout(ino, pos, count, 1718 iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY); 1719 pnfs_put_layout_hdr(lo); 1720 goto lookup_again; 1721 } 1722 } else { 1723 pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); 1724 } 1725 1726 out_put_layout_hdr: 1727 if (first) 1728 pnfs_clear_first_layoutget(lo); 1729 pnfs_put_layout_hdr(lo); 1730 out: 1731 dprintk("%s: inode %s/%llu pNFS layout segment %s for " 1732 "(%s, offset: %llu, length: %llu)\n", 1733 __func__, ino->i_sb->s_id, 1734 (unsigned long long)NFS_FILEID(ino), 1735 IS_ERR_OR_NULL(lseg) ? "not found" : "found", 1736 iomode==IOMODE_RW ? "read/write" : "read-only", 1737 (unsigned long long)pos, 1738 (unsigned long long)count); 1739 return lseg; 1740 out_unlock: 1741 spin_unlock(&ino->i_lock); 1742 goto out_put_layout_hdr; 1743 } 1744 EXPORT_SYMBOL_GPL(pnfs_update_layout); 1745 1746 static bool 1747 pnfs_sanity_check_layout_range(struct pnfs_layout_range *range) 1748 { 1749 switch (range->iomode) { 1750 case IOMODE_READ: 1751 case IOMODE_RW: 1752 break; 1753 default: 1754 return false; 1755 } 1756 if (range->offset == NFS4_MAX_UINT64) 1757 return false; 1758 if (range->length == 0) 1759 return false; 1760 if (range->length != NFS4_MAX_UINT64 && 1761 range->length > NFS4_MAX_UINT64 - range->offset) 1762 return false; 1763 return true; 1764 } 1765 1766 struct pnfs_layout_segment * 1767 pnfs_layout_process(struct nfs4_layoutget *lgp) 1768 { 1769 struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout; 1770 struct nfs4_layoutget_res *res = &lgp->res; 1771 struct pnfs_layout_segment *lseg; 1772 struct inode *ino = lo->plh_inode; 1773 LIST_HEAD(free_me); 1774 1775 if (!pnfs_sanity_check_layout_range(&res->range)) 1776 return ERR_PTR(-EINVAL); 1777 1778 /* Inject layout blob into I/O device driver */ 1779 lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags); 1780 if (IS_ERR_OR_NULL(lseg)) { 1781 if (!lseg) 1782 lseg = ERR_PTR(-ENOMEM); 1783 1784 dprintk("%s: Could not allocate layout: error %ld\n", 1785 __func__, PTR_ERR(lseg)); 1786 return lseg; 1787 } 1788 1789 pnfs_init_lseg(lo, lseg, &res->range, &res->stateid); 1790 1791 spin_lock(&ino->i_lock); 1792 if (pnfs_layoutgets_blocked(lo)) { 1793 dprintk("%s forget reply due to state\n", __func__); 1794 goto out_forget; 1795 } 1796 1797 if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) { 1798 /* existing state ID, make sure the sequence number matches. */ 1799 if (pnfs_layout_stateid_blocked(lo, &res->stateid)) { 1800 dprintk("%s forget reply due to sequence\n", __func__); 1801 goto out_forget; 1802 } 1803 pnfs_set_layout_stateid(lo, &res->stateid, false); 1804 } else { 1805 /* 1806 * We got an entirely new state ID. Mark all segments for the 1807 * inode invalid, and don't bother validating the stateid 1808 * sequence number. 1809 */ 1810 pnfs_mark_layout_stateid_invalid(lo, &free_me); 1811 1812 pnfs_set_layout_stateid(lo, &res->stateid, true); 1813 } 1814 1815 pnfs_get_lseg(lseg); 1816 pnfs_layout_insert_lseg(lo, lseg, &free_me); 1817 1818 1819 if (res->return_on_close) 1820 set_bit(NFS_LSEG_ROC, &lseg->pls_flags); 1821 1822 spin_unlock(&ino->i_lock); 1823 pnfs_free_lseg_list(&free_me); 1824 return lseg; 1825 1826 out_forget: 1827 spin_unlock(&ino->i_lock); 1828 lseg->pls_layout = lo; 1829 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); 1830 return ERR_PTR(-EAGAIN); 1831 } 1832 1833 static void 1834 pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode, 1835 u32 seq) 1836 { 1837 if (lo->plh_return_iomode != 0 && lo->plh_return_iomode != iomode) 1838 iomode = IOMODE_ANY; 1839 lo->plh_return_iomode = iomode; 1840 set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); 1841 if (seq != 0) { 1842 WARN_ON_ONCE(lo->plh_return_seq != 0 && lo->plh_return_seq != seq); 1843 lo->plh_return_seq = seq; 1844 } 1845 } 1846 1847 /** 1848 * pnfs_mark_matching_lsegs_return - Free or return matching layout segments 1849 * @lo: pointer to layout header 1850 * @tmp_list: list header to be used with pnfs_free_lseg_list() 1851 * @return_range: describe layout segment ranges to be returned 1852 * 1853 * This function is mainly intended for use by layoutrecall. It attempts 1854 * to free the layout segment immediately, or else to mark it for return 1855 * as soon as its reference count drops to zero. 1856 */ 1857 int 1858 pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, 1859 struct list_head *tmp_list, 1860 const struct pnfs_layout_range *return_range, 1861 u32 seq) 1862 { 1863 struct pnfs_layout_segment *lseg, *next; 1864 int remaining = 0; 1865 1866 dprintk("%s:Begin lo %p\n", __func__, lo); 1867 1868 if (list_empty(&lo->plh_segs)) 1869 return 0; 1870 1871 assert_spin_locked(&lo->plh_inode->i_lock); 1872 1873 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) 1874 if (pnfs_match_lseg_recall(lseg, return_range, seq)) { 1875 dprintk("%s: marking lseg %p iomode %d " 1876 "offset %llu length %llu\n", __func__, 1877 lseg, lseg->pls_range.iomode, 1878 lseg->pls_range.offset, 1879 lseg->pls_range.length); 1880 if (mark_lseg_invalid(lseg, tmp_list)) 1881 continue; 1882 remaining++; 1883 set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); 1884 } 1885 1886 if (remaining) 1887 pnfs_set_plh_return_info(lo, return_range->iomode, seq); 1888 1889 return remaining; 1890 } 1891 1892 void pnfs_error_mark_layout_for_return(struct inode *inode, 1893 struct pnfs_layout_segment *lseg) 1894 { 1895 struct pnfs_layout_hdr *lo = NFS_I(inode)->layout; 1896 struct pnfs_layout_range range = { 1897 .iomode = lseg->pls_range.iomode, 1898 .offset = 0, 1899 .length = NFS4_MAX_UINT64, 1900 }; 1901 LIST_HEAD(free_me); 1902 bool return_now = false; 1903 1904 spin_lock(&inode->i_lock); 1905 pnfs_set_plh_return_info(lo, range.iomode, 0); 1906 /* 1907 * mark all matching lsegs so that we are sure to have no live 1908 * segments at hand when sending layoutreturn. See pnfs_put_lseg() 1909 * for how it works. 1910 */ 1911 if (!pnfs_mark_matching_lsegs_return(lo, &free_me, &range, 0)) { 1912 nfs4_stateid stateid; 1913 enum pnfs_iomode iomode; 1914 1915 return_now = pnfs_prepare_layoutreturn(lo, &stateid, &iomode); 1916 spin_unlock(&inode->i_lock); 1917 if (return_now) 1918 pnfs_send_layoutreturn(lo, &stateid, iomode, false); 1919 } else { 1920 spin_unlock(&inode->i_lock); 1921 nfs_commit_inode(inode, 0); 1922 } 1923 pnfs_free_lseg_list(&free_me); 1924 } 1925 EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return); 1926 1927 void 1928 pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) 1929 { 1930 u64 rd_size = req->wb_bytes; 1931 1932 if (pgio->pg_lseg == NULL) { 1933 if (pgio->pg_dreq == NULL) 1934 rd_size = i_size_read(pgio->pg_inode) - req_offset(req); 1935 else 1936 rd_size = nfs_dreq_bytes_left(pgio->pg_dreq); 1937 1938 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1939 req->wb_context, 1940 req_offset(req), 1941 rd_size, 1942 IOMODE_READ, 1943 false, 1944 GFP_KERNEL); 1945 if (IS_ERR(pgio->pg_lseg)) { 1946 pgio->pg_error = PTR_ERR(pgio->pg_lseg); 1947 pgio->pg_lseg = NULL; 1948 return; 1949 } 1950 } 1951 /* If no lseg, fall back to read through mds */ 1952 if (pgio->pg_lseg == NULL) 1953 nfs_pageio_reset_read_mds(pgio); 1954 1955 } 1956 EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); 1957 1958 void 1959 pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, 1960 struct nfs_page *req, u64 wb_size) 1961 { 1962 if (pgio->pg_lseg == NULL) { 1963 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1964 req->wb_context, 1965 req_offset(req), 1966 wb_size, 1967 IOMODE_RW, 1968 false, 1969 GFP_NOFS); 1970 if (IS_ERR(pgio->pg_lseg)) { 1971 pgio->pg_error = PTR_ERR(pgio->pg_lseg); 1972 pgio->pg_lseg = NULL; 1973 return; 1974 } 1975 } 1976 /* If no lseg, fall back to write through mds */ 1977 if (pgio->pg_lseg == NULL) 1978 nfs_pageio_reset_write_mds(pgio); 1979 } 1980 EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); 1981 1982 void 1983 pnfs_generic_pg_cleanup(struct nfs_pageio_descriptor *desc) 1984 { 1985 if (desc->pg_lseg) { 1986 pnfs_put_lseg(desc->pg_lseg); 1987 desc->pg_lseg = NULL; 1988 } 1989 } 1990 EXPORT_SYMBOL_GPL(pnfs_generic_pg_cleanup); 1991 1992 /* 1993 * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number 1994 * of bytes (maximum @req->wb_bytes) that can be coalesced. 1995 */ 1996 size_t 1997 pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, 1998 struct nfs_page *prev, struct nfs_page *req) 1999 { 2000 unsigned int size; 2001 u64 seg_end, req_start, seg_left; 2002 2003 size = nfs_generic_pg_test(pgio, prev, req); 2004 if (!size) 2005 return 0; 2006 2007 /* 2008 * 'size' contains the number of bytes left in the current page (up 2009 * to the original size asked for in @req->wb_bytes). 2010 * 2011 * Calculate how many bytes are left in the layout segment 2012 * and if there are less bytes than 'size', return that instead. 2013 * 2014 * Please also note that 'end_offset' is actually the offset of the 2015 * first byte that lies outside the pnfs_layout_range. FIXME? 2016 * 2017 */ 2018 if (pgio->pg_lseg) { 2019 seg_end = end_offset(pgio->pg_lseg->pls_range.offset, 2020 pgio->pg_lseg->pls_range.length); 2021 req_start = req_offset(req); 2022 WARN_ON_ONCE(req_start >= seg_end); 2023 /* start of request is past the last byte of this segment */ 2024 if (req_start >= seg_end) { 2025 /* reference the new lseg */ 2026 if (pgio->pg_ops->pg_cleanup) 2027 pgio->pg_ops->pg_cleanup(pgio); 2028 if (pgio->pg_ops->pg_init) 2029 pgio->pg_ops->pg_init(pgio, req); 2030 return 0; 2031 } 2032 2033 /* adjust 'size' iff there are fewer bytes left in the 2034 * segment than what nfs_generic_pg_test returned */ 2035 seg_left = seg_end - req_start; 2036 if (seg_left < size) 2037 size = (unsigned int)seg_left; 2038 } 2039 2040 return size; 2041 } 2042 EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); 2043 2044 int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr) 2045 { 2046 struct nfs_pageio_descriptor pgio; 2047 2048 /* Resend all requests through the MDS */ 2049 nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true, 2050 hdr->completion_ops); 2051 set_bit(NFS_CONTEXT_RESEND_WRITES, &hdr->args.context->flags); 2052 return nfs_pageio_resend(&pgio, hdr); 2053 } 2054 EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); 2055 2056 static void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr) 2057 { 2058 2059 dprintk("pnfs write error = %d\n", hdr->pnfs_error); 2060 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & 2061 PNFS_LAYOUTRET_ON_ERROR) { 2062 pnfs_return_layout(hdr->inode); 2063 } 2064 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) 2065 hdr->task.tk_status = pnfs_write_done_resend_to_mds(hdr); 2066 } 2067 2068 /* 2069 * Called by non rpc-based layout drivers 2070 */ 2071 void pnfs_ld_write_done(struct nfs_pgio_header *hdr) 2072 { 2073 if (likely(!hdr->pnfs_error)) { 2074 pnfs_set_layoutcommit(hdr->inode, hdr->lseg, 2075 hdr->mds_offset + hdr->res.count); 2076 hdr->mds_ops->rpc_call_done(&hdr->task, hdr); 2077 } 2078 trace_nfs4_pnfs_write(hdr, hdr->pnfs_error); 2079 if (unlikely(hdr->pnfs_error)) 2080 pnfs_ld_handle_write_error(hdr); 2081 hdr->mds_ops->rpc_release(hdr); 2082 } 2083 EXPORT_SYMBOL_GPL(pnfs_ld_write_done); 2084 2085 static void 2086 pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, 2087 struct nfs_pgio_header *hdr) 2088 { 2089 struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); 2090 2091 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 2092 list_splice_tail_init(&hdr->pages, &mirror->pg_list); 2093 nfs_pageio_reset_write_mds(desc); 2094 mirror->pg_recoalesce = 1; 2095 } 2096 nfs_pgio_data_destroy(hdr); 2097 hdr->release(hdr); 2098 } 2099 2100 static enum pnfs_try_status 2101 pnfs_try_to_write_data(struct nfs_pgio_header *hdr, 2102 const struct rpc_call_ops *call_ops, 2103 struct pnfs_layout_segment *lseg, 2104 int how) 2105 { 2106 struct inode *inode = hdr->inode; 2107 enum pnfs_try_status trypnfs; 2108 struct nfs_server *nfss = NFS_SERVER(inode); 2109 2110 hdr->mds_ops = call_ops; 2111 2112 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, 2113 inode->i_ino, hdr->args.count, hdr->args.offset, how); 2114 trypnfs = nfss->pnfs_curr_ld->write_pagelist(hdr, how); 2115 if (trypnfs != PNFS_NOT_ATTEMPTED) 2116 nfs_inc_stats(inode, NFSIOS_PNFS_WRITE); 2117 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 2118 return trypnfs; 2119 } 2120 2121 static void 2122 pnfs_do_write(struct nfs_pageio_descriptor *desc, 2123 struct nfs_pgio_header *hdr, int how) 2124 { 2125 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; 2126 struct pnfs_layout_segment *lseg = desc->pg_lseg; 2127 enum pnfs_try_status trypnfs; 2128 2129 trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how); 2130 if (trypnfs == PNFS_NOT_ATTEMPTED) 2131 pnfs_write_through_mds(desc, hdr); 2132 } 2133 2134 static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) 2135 { 2136 pnfs_put_lseg(hdr->lseg); 2137 nfs_pgio_header_free(hdr); 2138 } 2139 2140 int 2141 pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) 2142 { 2143 struct nfs_pgio_header *hdr; 2144 int ret; 2145 2146 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); 2147 if (!hdr) { 2148 desc->pg_error = -ENOMEM; 2149 return desc->pg_error; 2150 } 2151 nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); 2152 2153 hdr->lseg = pnfs_get_lseg(desc->pg_lseg); 2154 ret = nfs_generic_pgio(desc, hdr); 2155 if (!ret) 2156 pnfs_do_write(desc, hdr, desc->pg_ioflags); 2157 2158 return ret; 2159 } 2160 EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); 2161 2162 int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *hdr) 2163 { 2164 struct nfs_pageio_descriptor pgio; 2165 2166 /* Resend all requests through the MDS */ 2167 nfs_pageio_init_read(&pgio, hdr->inode, true, hdr->completion_ops); 2168 return nfs_pageio_resend(&pgio, hdr); 2169 } 2170 EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds); 2171 2172 static void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr) 2173 { 2174 dprintk("pnfs read error = %d\n", hdr->pnfs_error); 2175 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & 2176 PNFS_LAYOUTRET_ON_ERROR) { 2177 pnfs_return_layout(hdr->inode); 2178 } 2179 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) 2180 hdr->task.tk_status = pnfs_read_done_resend_to_mds(hdr); 2181 } 2182 2183 /* 2184 * Called by non rpc-based layout drivers 2185 */ 2186 void pnfs_ld_read_done(struct nfs_pgio_header *hdr) 2187 { 2188 if (likely(!hdr->pnfs_error)) { 2189 __nfs4_read_done_cb(hdr); 2190 hdr->mds_ops->rpc_call_done(&hdr->task, hdr); 2191 } 2192 trace_nfs4_pnfs_read(hdr, hdr->pnfs_error); 2193 if (unlikely(hdr->pnfs_error)) 2194 pnfs_ld_handle_read_error(hdr); 2195 hdr->mds_ops->rpc_release(hdr); 2196 } 2197 EXPORT_SYMBOL_GPL(pnfs_ld_read_done); 2198 2199 static void 2200 pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, 2201 struct nfs_pgio_header *hdr) 2202 { 2203 struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); 2204 2205 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 2206 list_splice_tail_init(&hdr->pages, &mirror->pg_list); 2207 nfs_pageio_reset_read_mds(desc); 2208 mirror->pg_recoalesce = 1; 2209 } 2210 nfs_pgio_data_destroy(hdr); 2211 hdr->release(hdr); 2212 } 2213 2214 /* 2215 * Call the appropriate parallel I/O subsystem read function. 2216 */ 2217 static enum pnfs_try_status 2218 pnfs_try_to_read_data(struct nfs_pgio_header *hdr, 2219 const struct rpc_call_ops *call_ops, 2220 struct pnfs_layout_segment *lseg) 2221 { 2222 struct inode *inode = hdr->inode; 2223 struct nfs_server *nfss = NFS_SERVER(inode); 2224 enum pnfs_try_status trypnfs; 2225 2226 hdr->mds_ops = call_ops; 2227 2228 dprintk("%s: Reading ino:%lu %u@%llu\n", 2229 __func__, inode->i_ino, hdr->args.count, hdr->args.offset); 2230 2231 trypnfs = nfss->pnfs_curr_ld->read_pagelist(hdr); 2232 if (trypnfs != PNFS_NOT_ATTEMPTED) 2233 nfs_inc_stats(inode, NFSIOS_PNFS_READ); 2234 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 2235 return trypnfs; 2236 } 2237 2238 /* Resend all requests through pnfs. */ 2239 void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr) 2240 { 2241 struct nfs_pageio_descriptor pgio; 2242 2243 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 2244 nfs_pageio_init_read(&pgio, hdr->inode, false, 2245 hdr->completion_ops); 2246 hdr->task.tk_status = nfs_pageio_resend(&pgio, hdr); 2247 } 2248 } 2249 EXPORT_SYMBOL_GPL(pnfs_read_resend_pnfs); 2250 2251 static void 2252 pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) 2253 { 2254 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; 2255 struct pnfs_layout_segment *lseg = desc->pg_lseg; 2256 enum pnfs_try_status trypnfs; 2257 2258 trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg); 2259 if (trypnfs == PNFS_TRY_AGAIN) 2260 pnfs_read_resend_pnfs(hdr); 2261 if (trypnfs == PNFS_NOT_ATTEMPTED || hdr->task.tk_status) 2262 pnfs_read_through_mds(desc, hdr); 2263 } 2264 2265 static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) 2266 { 2267 pnfs_put_lseg(hdr->lseg); 2268 nfs_pgio_header_free(hdr); 2269 } 2270 2271 int 2272 pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 2273 { 2274 struct nfs_pgio_header *hdr; 2275 int ret; 2276 2277 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); 2278 if (!hdr) { 2279 desc->pg_error = -ENOMEM; 2280 return desc->pg_error; 2281 } 2282 nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); 2283 hdr->lseg = pnfs_get_lseg(desc->pg_lseg); 2284 ret = nfs_generic_pgio(desc, hdr); 2285 if (!ret) 2286 pnfs_do_read(desc, hdr); 2287 return ret; 2288 } 2289 EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); 2290 2291 static void pnfs_clear_layoutcommitting(struct inode *inode) 2292 { 2293 unsigned long *bitlock = &NFS_I(inode)->flags; 2294 2295 clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); 2296 smp_mb__after_atomic(); 2297 wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING); 2298 } 2299 2300 /* 2301 * There can be multiple RW segments. 2302 */ 2303 static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp) 2304 { 2305 struct pnfs_layout_segment *lseg; 2306 2307 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { 2308 if (lseg->pls_range.iomode == IOMODE_RW && 2309 test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) 2310 list_add(&lseg->pls_lc_list, listp); 2311 } 2312 } 2313 2314 static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp) 2315 { 2316 struct pnfs_layout_segment *lseg, *tmp; 2317 2318 /* Matched by references in pnfs_set_layoutcommit */ 2319 list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) { 2320 list_del_init(&lseg->pls_lc_list); 2321 pnfs_put_lseg(lseg); 2322 } 2323 2324 pnfs_clear_layoutcommitting(inode); 2325 } 2326 2327 void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) 2328 { 2329 pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode); 2330 } 2331 EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); 2332 2333 void 2334 pnfs_set_layoutcommit(struct inode *inode, struct pnfs_layout_segment *lseg, 2335 loff_t end_pos) 2336 { 2337 struct nfs_inode *nfsi = NFS_I(inode); 2338 bool mark_as_dirty = false; 2339 2340 spin_lock(&inode->i_lock); 2341 if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { 2342 nfsi->layout->plh_lwb = end_pos; 2343 mark_as_dirty = true; 2344 dprintk("%s: Set layoutcommit for inode %lu ", 2345 __func__, inode->i_ino); 2346 } else if (end_pos > nfsi->layout->plh_lwb) 2347 nfsi->layout->plh_lwb = end_pos; 2348 if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) { 2349 /* references matched in nfs4_layoutcommit_release */ 2350 pnfs_get_lseg(lseg); 2351 } 2352 spin_unlock(&inode->i_lock); 2353 dprintk("%s: lseg %p end_pos %llu\n", 2354 __func__, lseg, nfsi->layout->plh_lwb); 2355 2356 /* if pnfs_layoutcommit_inode() runs between inode locks, the next one 2357 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ 2358 if (mark_as_dirty) 2359 mark_inode_dirty_sync(inode); 2360 } 2361 EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); 2362 2363 void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) 2364 { 2365 struct nfs_server *nfss = NFS_SERVER(data->args.inode); 2366 2367 if (nfss->pnfs_curr_ld->cleanup_layoutcommit) 2368 nfss->pnfs_curr_ld->cleanup_layoutcommit(data); 2369 pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list); 2370 } 2371 2372 /* 2373 * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and 2374 * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough 2375 * data to disk to allow the server to recover the data if it crashes. 2376 * LAYOUTCOMMIT is only needed when the NFL4_UFLG_COMMIT_THRU_MDS flag 2377 * is off, and a COMMIT is sent to a data server, or 2378 * if WRITEs to a data server return NFS_DATA_SYNC. 2379 */ 2380 int 2381 pnfs_layoutcommit_inode(struct inode *inode, bool sync) 2382 { 2383 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 2384 struct nfs4_layoutcommit_data *data; 2385 struct nfs_inode *nfsi = NFS_I(inode); 2386 loff_t end_pos; 2387 int status; 2388 2389 if (!pnfs_layoutcommit_outstanding(inode)) 2390 return 0; 2391 2392 dprintk("--> %s inode %lu\n", __func__, inode->i_ino); 2393 2394 status = -EAGAIN; 2395 if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) { 2396 if (!sync) 2397 goto out; 2398 status = wait_on_bit_lock_action(&nfsi->flags, 2399 NFS_INO_LAYOUTCOMMITTING, 2400 nfs_wait_bit_killable, 2401 TASK_KILLABLE); 2402 if (status) 2403 goto out; 2404 } 2405 2406 status = -ENOMEM; 2407 /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */ 2408 data = kzalloc(sizeof(*data), GFP_NOFS); 2409 if (!data) 2410 goto clear_layoutcommitting; 2411 2412 status = 0; 2413 spin_lock(&inode->i_lock); 2414 if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) 2415 goto out_unlock; 2416 2417 INIT_LIST_HEAD(&data->lseg_list); 2418 pnfs_list_write_lseg(inode, &data->lseg_list); 2419 2420 end_pos = nfsi->layout->plh_lwb; 2421 2422 nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid); 2423 spin_unlock(&inode->i_lock); 2424 2425 data->args.inode = inode; 2426 data->cred = get_rpccred(nfsi->layout->plh_lc_cred); 2427 nfs_fattr_init(&data->fattr); 2428 data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask; 2429 data->res.fattr = &data->fattr; 2430 if (end_pos != 0) 2431 data->args.lastbytewritten = end_pos - 1; 2432 else 2433 data->args.lastbytewritten = U64_MAX; 2434 data->res.server = NFS_SERVER(inode); 2435 2436 if (ld->prepare_layoutcommit) { 2437 status = ld->prepare_layoutcommit(&data->args); 2438 if (status) { 2439 put_rpccred(data->cred); 2440 spin_lock(&inode->i_lock); 2441 set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags); 2442 if (end_pos > nfsi->layout->plh_lwb) 2443 nfsi->layout->plh_lwb = end_pos; 2444 goto out_unlock; 2445 } 2446 } 2447 2448 2449 status = nfs4_proc_layoutcommit(data, sync); 2450 out: 2451 if (status) 2452 mark_inode_dirty_sync(inode); 2453 dprintk("<-- %s status %d\n", __func__, status); 2454 return status; 2455 out_unlock: 2456 spin_unlock(&inode->i_lock); 2457 kfree(data); 2458 clear_layoutcommitting: 2459 pnfs_clear_layoutcommitting(inode); 2460 goto out; 2461 } 2462 EXPORT_SYMBOL_GPL(pnfs_layoutcommit_inode); 2463 2464 int 2465 pnfs_generic_sync(struct inode *inode, bool datasync) 2466 { 2467 return pnfs_layoutcommit_inode(inode, true); 2468 } 2469 EXPORT_SYMBOL_GPL(pnfs_generic_sync); 2470 2471 struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) 2472 { 2473 struct nfs4_threshold *thp; 2474 2475 thp = kzalloc(sizeof(*thp), GFP_NOFS); 2476 if (!thp) { 2477 dprintk("%s mdsthreshold allocation failed\n", __func__); 2478 return NULL; 2479 } 2480 return thp; 2481 } 2482 2483 #if IS_ENABLED(CONFIG_NFS_V4_2) 2484 int 2485 pnfs_report_layoutstat(struct inode *inode, gfp_t gfp_flags) 2486 { 2487 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 2488 struct nfs_server *server = NFS_SERVER(inode); 2489 struct nfs_inode *nfsi = NFS_I(inode); 2490 struct nfs42_layoutstat_data *data; 2491 struct pnfs_layout_hdr *hdr; 2492 int status = 0; 2493 2494 if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats) 2495 goto out; 2496 2497 if (!nfs_server_capable(inode, NFS_CAP_LAYOUTSTATS)) 2498 goto out; 2499 2500 if (test_and_set_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags)) 2501 goto out; 2502 2503 spin_lock(&inode->i_lock); 2504 if (!NFS_I(inode)->layout) { 2505 spin_unlock(&inode->i_lock); 2506 goto out_clear_layoutstats; 2507 } 2508 hdr = NFS_I(inode)->layout; 2509 pnfs_get_layout_hdr(hdr); 2510 spin_unlock(&inode->i_lock); 2511 2512 data = kzalloc(sizeof(*data), gfp_flags); 2513 if (!data) { 2514 status = -ENOMEM; 2515 goto out_put; 2516 } 2517 2518 data->args.fh = NFS_FH(inode); 2519 data->args.inode = inode; 2520 status = ld->prepare_layoutstats(&data->args); 2521 if (status) 2522 goto out_free; 2523 2524 status = nfs42_proc_layoutstats_generic(NFS_SERVER(inode), data); 2525 2526 out: 2527 dprintk("%s returns %d\n", __func__, status); 2528 return status; 2529 2530 out_free: 2531 kfree(data); 2532 out_put: 2533 pnfs_put_layout_hdr(hdr); 2534 out_clear_layoutstats: 2535 smp_mb__before_atomic(); 2536 clear_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags); 2537 smp_mb__after_atomic(); 2538 goto out; 2539 } 2540 EXPORT_SYMBOL_GPL(pnfs_report_layoutstat); 2541 #endif 2542 2543 unsigned int layoutstats_timer; 2544 module_param(layoutstats_timer, uint, 0644); 2545 EXPORT_SYMBOL_GPL(layoutstats_timer); 2546