1 /* 2 * pNFS functions to call and manage layout drivers. 3 * 4 * Copyright (c) 2002 [year of first publication] 5 * The Regents of the University of Michigan 6 * All Rights Reserved 7 * 8 * Dean Hildebrand <dhildebz@umich.edu> 9 * 10 * Permission is granted to use, copy, create derivative works, and 11 * redistribute this software and such derivative works for any purpose, 12 * so long as the name of the University of Michigan is not used in 13 * any advertising or publicity pertaining to the use or distribution 14 * of this software without specific, written prior authorization. If 15 * the above copyright notice or any other identification of the 16 * University of Michigan is included in any copy of any portion of 17 * this software, then the disclaimer below must also be included. 18 * 19 * This software is provided as is, without representation or warranty 20 * of any kind either express or implied, including without limitation 21 * the implied warranties of merchantability, fitness for a particular 22 * purpose, or noninfringement. The Regents of the University of 23 * Michigan shall not be liable for any damages, including special, 24 * indirect, incidental, or consequential damages, with respect to any 25 * claim arising out of or in connection with the use of the software, 26 * even if it has been or is hereafter advised of the possibility of 27 * such damages. 28 */ 29 30 #include <linux/nfs_fs.h> 31 #include <linux/nfs_page.h> 32 #include <linux/module.h> 33 #include <linux/sort.h> 34 #include "internal.h" 35 #include "pnfs.h" 36 #include "iostat.h" 37 #include "nfs4trace.h" 38 #include "delegation.h" 39 #include "nfs42.h" 40 41 #define NFSDBG_FACILITY NFSDBG_PNFS 42 #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) 43 44 /* Locking: 45 * 46 * pnfs_spinlock: 47 * protects pnfs_modules_tbl. 48 */ 49 static DEFINE_SPINLOCK(pnfs_spinlock); 50 51 /* 52 * pnfs_modules_tbl holds all pnfs modules 53 */ 54 static LIST_HEAD(pnfs_modules_tbl); 55 56 static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo); 57 58 /* Return the registered pnfs layout driver module matching given id */ 59 static struct pnfs_layoutdriver_type * 60 find_pnfs_driver_locked(u32 id) 61 { 62 struct pnfs_layoutdriver_type *local; 63 64 list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid) 65 if (local->id == id) 66 goto out; 67 local = NULL; 68 out: 69 dprintk("%s: Searching for id %u, found %p\n", __func__, id, local); 70 return local; 71 } 72 73 static struct pnfs_layoutdriver_type * 74 find_pnfs_driver(u32 id) 75 { 76 struct pnfs_layoutdriver_type *local; 77 78 spin_lock(&pnfs_spinlock); 79 local = find_pnfs_driver_locked(id); 80 if (local != NULL && !try_module_get(local->owner)) { 81 dprintk("%s: Could not grab reference on module\n", __func__); 82 local = NULL; 83 } 84 spin_unlock(&pnfs_spinlock); 85 return local; 86 } 87 88 void 89 unset_pnfs_layoutdriver(struct nfs_server *nfss) 90 { 91 if (nfss->pnfs_curr_ld) { 92 if (nfss->pnfs_curr_ld->clear_layoutdriver) 93 nfss->pnfs_curr_ld->clear_layoutdriver(nfss); 94 /* Decrement the MDS count. Purge the deviceid cache if zero */ 95 if (atomic_dec_and_test(&nfss->nfs_client->cl_mds_count)) 96 nfs4_deviceid_purge_client(nfss->nfs_client); 97 module_put(nfss->pnfs_curr_ld->owner); 98 } 99 nfss->pnfs_curr_ld = NULL; 100 } 101 102 /* 103 * When the server sends a list of layout types, we choose one in the order 104 * given in the list below. 105 * 106 * FIXME: should this list be configurable in some fashion? module param? 107 * mount option? something else? 108 */ 109 static const u32 ld_prefs[] = { 110 LAYOUT_SCSI, 111 LAYOUT_BLOCK_VOLUME, 112 LAYOUT_OSD2_OBJECTS, 113 LAYOUT_FLEX_FILES, 114 LAYOUT_NFSV4_1_FILES, 115 0 116 }; 117 118 static int 119 ld_cmp(const void *e1, const void *e2) 120 { 121 u32 ld1 = *((u32 *)e1); 122 u32 ld2 = *((u32 *)e2); 123 int i; 124 125 for (i = 0; ld_prefs[i] != 0; i++) { 126 if (ld1 == ld_prefs[i]) 127 return -1; 128 129 if (ld2 == ld_prefs[i]) 130 return 1; 131 } 132 return 0; 133 } 134 135 /* 136 * Try to set the server's pnfs module to the pnfs layout type specified by id. 137 * Currently only one pNFS layout driver per filesystem is supported. 138 * 139 * @ids array of layout types supported by MDS. 140 */ 141 void 142 set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh, 143 struct nfs_fsinfo *fsinfo) 144 { 145 struct pnfs_layoutdriver_type *ld_type = NULL; 146 u32 id; 147 int i; 148 149 if (!(server->nfs_client->cl_exchange_flags & 150 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) { 151 printk(KERN_ERR "NFS: %s: cl_exchange_flags 0x%x\n", 152 __func__, server->nfs_client->cl_exchange_flags); 153 goto out_no_driver; 154 } 155 156 sort(fsinfo->layouttype, fsinfo->nlayouttypes, 157 sizeof(*fsinfo->layouttype), ld_cmp, NULL); 158 159 for (i = 0; i < fsinfo->nlayouttypes; i++) { 160 id = fsinfo->layouttype[i]; 161 ld_type = find_pnfs_driver(id); 162 if (!ld_type) { 163 request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, 164 id); 165 ld_type = find_pnfs_driver(id); 166 } 167 if (ld_type) 168 break; 169 } 170 171 if (!ld_type) { 172 dprintk("%s: No pNFS module found!\n", __func__); 173 goto out_no_driver; 174 } 175 176 server->pnfs_curr_ld = ld_type; 177 if (ld_type->set_layoutdriver 178 && ld_type->set_layoutdriver(server, mntfh)) { 179 printk(KERN_ERR "NFS: %s: Error initializing pNFS layout " 180 "driver %u.\n", __func__, id); 181 module_put(ld_type->owner); 182 goto out_no_driver; 183 } 184 /* Bump the MDS count */ 185 atomic_inc(&server->nfs_client->cl_mds_count); 186 187 dprintk("%s: pNFS module for %u set\n", __func__, id); 188 return; 189 190 out_no_driver: 191 dprintk("%s: Using NFSv4 I/O\n", __func__); 192 server->pnfs_curr_ld = NULL; 193 } 194 195 int 196 pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type) 197 { 198 int status = -EINVAL; 199 struct pnfs_layoutdriver_type *tmp; 200 201 if (ld_type->id == 0) { 202 printk(KERN_ERR "NFS: %s id 0 is reserved\n", __func__); 203 return status; 204 } 205 if (!ld_type->alloc_lseg || !ld_type->free_lseg) { 206 printk(KERN_ERR "NFS: %s Layout driver must provide " 207 "alloc_lseg and free_lseg.\n", __func__); 208 return status; 209 } 210 211 spin_lock(&pnfs_spinlock); 212 tmp = find_pnfs_driver_locked(ld_type->id); 213 if (!tmp) { 214 list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl); 215 status = 0; 216 dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id, 217 ld_type->name); 218 } else { 219 printk(KERN_ERR "NFS: %s Module with id %d already loaded!\n", 220 __func__, ld_type->id); 221 } 222 spin_unlock(&pnfs_spinlock); 223 224 return status; 225 } 226 EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver); 227 228 void 229 pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type) 230 { 231 dprintk("%s Deregistering id:%u\n", __func__, ld_type->id); 232 spin_lock(&pnfs_spinlock); 233 list_del(&ld_type->pnfs_tblid); 234 spin_unlock(&pnfs_spinlock); 235 } 236 EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); 237 238 /* 239 * pNFS client layout cache 240 */ 241 242 /* Need to hold i_lock if caller does not already hold reference */ 243 void 244 pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo) 245 { 246 atomic_inc(&lo->plh_refcount); 247 } 248 249 static struct pnfs_layout_hdr * 250 pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags) 251 { 252 struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; 253 return ld->alloc_layout_hdr(ino, gfp_flags); 254 } 255 256 static void 257 pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo) 258 { 259 struct nfs_server *server = NFS_SERVER(lo->plh_inode); 260 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; 261 262 if (!list_empty(&lo->plh_layouts)) { 263 struct nfs_client *clp = server->nfs_client; 264 265 spin_lock(&clp->cl_lock); 266 list_del_init(&lo->plh_layouts); 267 spin_unlock(&clp->cl_lock); 268 } 269 put_rpccred(lo->plh_lc_cred); 270 return ld->free_layout_hdr(lo); 271 } 272 273 static void 274 pnfs_detach_layout_hdr(struct pnfs_layout_hdr *lo) 275 { 276 struct nfs_inode *nfsi = NFS_I(lo->plh_inode); 277 dprintk("%s: freeing layout cache %p\n", __func__, lo); 278 nfsi->layout = NULL; 279 /* Reset MDS Threshold I/O counters */ 280 nfsi->write_io = 0; 281 nfsi->read_io = 0; 282 } 283 284 void 285 pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) 286 { 287 struct inode *inode = lo->plh_inode; 288 289 pnfs_layoutreturn_before_put_layout_hdr(lo); 290 291 if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { 292 if (!list_empty(&lo->plh_segs)) 293 WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n"); 294 pnfs_detach_layout_hdr(lo); 295 spin_unlock(&inode->i_lock); 296 pnfs_free_layout_hdr(lo); 297 } 298 } 299 300 /* 301 * Mark a pnfs_layout_hdr and all associated layout segments as invalid 302 * 303 * In order to continue using the pnfs_layout_hdr, a full recovery 304 * is required. 305 * Note that caller must hold inode->i_lock. 306 */ 307 int 308 pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, 309 struct list_head *lseg_list) 310 { 311 struct pnfs_layout_range range = { 312 .iomode = IOMODE_ANY, 313 .offset = 0, 314 .length = NFS4_MAX_UINT64, 315 }; 316 317 set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); 318 return pnfs_mark_matching_lsegs_invalid(lo, lseg_list, &range, 0); 319 } 320 321 static int 322 pnfs_iomode_to_fail_bit(u32 iomode) 323 { 324 return iomode == IOMODE_RW ? 325 NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED; 326 } 327 328 static void 329 pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) 330 { 331 lo->plh_retry_timestamp = jiffies; 332 if (!test_and_set_bit(fail_bit, &lo->plh_flags)) 333 atomic_inc(&lo->plh_refcount); 334 } 335 336 static void 337 pnfs_layout_clear_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) 338 { 339 if (test_and_clear_bit(fail_bit, &lo->plh_flags)) 340 atomic_dec(&lo->plh_refcount); 341 } 342 343 static void 344 pnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode) 345 { 346 struct inode *inode = lo->plh_inode; 347 struct pnfs_layout_range range = { 348 .iomode = iomode, 349 .offset = 0, 350 .length = NFS4_MAX_UINT64, 351 }; 352 LIST_HEAD(head); 353 354 spin_lock(&inode->i_lock); 355 pnfs_layout_set_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); 356 pnfs_mark_matching_lsegs_invalid(lo, &head, &range, 0); 357 spin_unlock(&inode->i_lock); 358 pnfs_free_lseg_list(&head); 359 dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__, 360 iomode == IOMODE_RW ? "RW" : "READ"); 361 } 362 363 static bool 364 pnfs_layout_io_test_failed(struct pnfs_layout_hdr *lo, u32 iomode) 365 { 366 unsigned long start, end; 367 int fail_bit = pnfs_iomode_to_fail_bit(iomode); 368 369 if (test_bit(fail_bit, &lo->plh_flags) == 0) 370 return false; 371 end = jiffies; 372 start = end - PNFS_LAYOUTGET_RETRY_TIMEOUT; 373 if (!time_in_range(lo->plh_retry_timestamp, start, end)) { 374 /* It is time to retry the failed layoutgets */ 375 pnfs_layout_clear_fail_bit(lo, fail_bit); 376 return false; 377 } 378 return true; 379 } 380 381 static void 382 pnfs_init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg, 383 const struct pnfs_layout_range *range, 384 const nfs4_stateid *stateid) 385 { 386 INIT_LIST_HEAD(&lseg->pls_list); 387 INIT_LIST_HEAD(&lseg->pls_lc_list); 388 atomic_set(&lseg->pls_refcount, 1); 389 set_bit(NFS_LSEG_VALID, &lseg->pls_flags); 390 lseg->pls_layout = lo; 391 lseg->pls_range = *range; 392 lseg->pls_seq = be32_to_cpu(stateid->seqid); 393 } 394 395 static void pnfs_free_lseg(struct pnfs_layout_segment *lseg) 396 { 397 struct inode *ino = lseg->pls_layout->plh_inode; 398 399 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); 400 } 401 402 static void 403 pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, 404 struct pnfs_layout_segment *lseg) 405 { 406 struct inode *inode = lo->plh_inode; 407 408 WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 409 list_del_init(&lseg->pls_list); 410 /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ 411 atomic_dec(&lo->plh_refcount); 412 if (list_empty(&lo->plh_segs)) { 413 if (atomic_read(&lo->plh_outstanding) == 0) 414 set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); 415 clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); 416 } 417 rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); 418 } 419 420 void 421 pnfs_put_lseg(struct pnfs_layout_segment *lseg) 422 { 423 struct pnfs_layout_hdr *lo; 424 struct inode *inode; 425 426 if (!lseg) 427 return; 428 429 dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, 430 atomic_read(&lseg->pls_refcount), 431 test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 432 433 lo = lseg->pls_layout; 434 inode = lo->plh_inode; 435 436 if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { 437 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { 438 spin_unlock(&inode->i_lock); 439 return; 440 } 441 pnfs_get_layout_hdr(lo); 442 pnfs_layout_remove_lseg(lo, lseg); 443 spin_unlock(&inode->i_lock); 444 pnfs_free_lseg(lseg); 445 pnfs_put_layout_hdr(lo); 446 } 447 } 448 EXPORT_SYMBOL_GPL(pnfs_put_lseg); 449 450 static void pnfs_free_lseg_async_work(struct work_struct *work) 451 { 452 struct pnfs_layout_segment *lseg; 453 struct pnfs_layout_hdr *lo; 454 455 lseg = container_of(work, struct pnfs_layout_segment, pls_work); 456 lo = lseg->pls_layout; 457 458 pnfs_free_lseg(lseg); 459 pnfs_put_layout_hdr(lo); 460 } 461 462 static void pnfs_free_lseg_async(struct pnfs_layout_segment *lseg) 463 { 464 INIT_WORK(&lseg->pls_work, pnfs_free_lseg_async_work); 465 schedule_work(&lseg->pls_work); 466 } 467 468 void 469 pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg) 470 { 471 if (!lseg) 472 return; 473 474 assert_spin_locked(&lseg->pls_layout->plh_inode->i_lock); 475 476 dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, 477 atomic_read(&lseg->pls_refcount), 478 test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 479 if (atomic_dec_and_test(&lseg->pls_refcount)) { 480 struct pnfs_layout_hdr *lo = lseg->pls_layout; 481 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) 482 return; 483 pnfs_get_layout_hdr(lo); 484 pnfs_layout_remove_lseg(lo, lseg); 485 pnfs_free_lseg_async(lseg); 486 } 487 } 488 EXPORT_SYMBOL_GPL(pnfs_put_lseg_locked); 489 490 static u64 491 end_offset(u64 start, u64 len) 492 { 493 u64 end; 494 495 end = start + len; 496 return end >= start ? end : NFS4_MAX_UINT64; 497 } 498 499 /* 500 * is l2 fully contained in l1? 501 * start1 end1 502 * [----------------------------------) 503 * start2 end2 504 * [----------------) 505 */ 506 static bool 507 pnfs_lseg_range_contained(const struct pnfs_layout_range *l1, 508 const struct pnfs_layout_range *l2) 509 { 510 u64 start1 = l1->offset; 511 u64 end1 = end_offset(start1, l1->length); 512 u64 start2 = l2->offset; 513 u64 end2 = end_offset(start2, l2->length); 514 515 return (start1 <= start2) && (end1 >= end2); 516 } 517 518 /* 519 * is l1 and l2 intersecting? 520 * start1 end1 521 * [----------------------------------) 522 * start2 end2 523 * [----------------) 524 */ 525 static bool 526 pnfs_lseg_range_intersecting(const struct pnfs_layout_range *l1, 527 const struct pnfs_layout_range *l2) 528 { 529 u64 start1 = l1->offset; 530 u64 end1 = end_offset(start1, l1->length); 531 u64 start2 = l2->offset; 532 u64 end2 = end_offset(start2, l2->length); 533 534 return (end1 == NFS4_MAX_UINT64 || end1 > start2) && 535 (end2 == NFS4_MAX_UINT64 || end2 > start1); 536 } 537 538 static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, 539 struct list_head *tmp_list) 540 { 541 if (!atomic_dec_and_test(&lseg->pls_refcount)) 542 return false; 543 pnfs_layout_remove_lseg(lseg->pls_layout, lseg); 544 list_add(&lseg->pls_list, tmp_list); 545 return true; 546 } 547 548 /* Returns 1 if lseg is removed from list, 0 otherwise */ 549 static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, 550 struct list_head *tmp_list) 551 { 552 int rv = 0; 553 554 if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { 555 /* Remove the reference keeping the lseg in the 556 * list. It will now be removed when all 557 * outstanding io is finished. 558 */ 559 dprintk("%s: lseg %p ref %d\n", __func__, lseg, 560 atomic_read(&lseg->pls_refcount)); 561 if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list)) 562 rv = 1; 563 } 564 return rv; 565 } 566 567 /* 568 * Compare 2 layout stateid sequence ids, to see which is newer, 569 * taking into account wraparound issues. 570 */ 571 static bool pnfs_seqid_is_newer(u32 s1, u32 s2) 572 { 573 return (s32)(s1 - s2) > 0; 574 } 575 576 static bool 577 pnfs_should_free_range(const struct pnfs_layout_range *lseg_range, 578 const struct pnfs_layout_range *recall_range) 579 { 580 return (recall_range->iomode == IOMODE_ANY || 581 lseg_range->iomode == recall_range->iomode) && 582 pnfs_lseg_range_intersecting(lseg_range, recall_range); 583 } 584 585 static bool 586 pnfs_match_lseg_recall(const struct pnfs_layout_segment *lseg, 587 const struct pnfs_layout_range *recall_range, 588 u32 seq) 589 { 590 if (seq != 0 && pnfs_seqid_is_newer(lseg->pls_seq, seq)) 591 return false; 592 if (recall_range == NULL) 593 return true; 594 return pnfs_should_free_range(&lseg->pls_range, recall_range); 595 } 596 597 /** 598 * pnfs_mark_matching_lsegs_invalid - tear down lsegs or mark them for later 599 * @lo: layout header containing the lsegs 600 * @tmp_list: list head where doomed lsegs should go 601 * @recall_range: optional recall range argument to match (may be NULL) 602 * @seq: only invalidate lsegs obtained prior to this sequence (may be 0) 603 * 604 * Walk the list of lsegs in the layout header, and tear down any that should 605 * be destroyed. If "recall_range" is specified then the segment must match 606 * that range. If "seq" is non-zero, then only match segments that were handed 607 * out at or before that sequence. 608 * 609 * Returns number of matching invalid lsegs remaining in list after scanning 610 * it and purging them. 611 */ 612 int 613 pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, 614 struct list_head *tmp_list, 615 const struct pnfs_layout_range *recall_range, 616 u32 seq) 617 { 618 struct pnfs_layout_segment *lseg, *next; 619 int remaining = 0; 620 621 dprintk("%s:Begin lo %p\n", __func__, lo); 622 623 if (list_empty(&lo->plh_segs)) 624 return 0; 625 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) 626 if (pnfs_match_lseg_recall(lseg, recall_range, seq)) { 627 dprintk("%s: freeing lseg %p iomode %d seq %u" 628 "offset %llu length %llu\n", __func__, 629 lseg, lseg->pls_range.iomode, lseg->pls_seq, 630 lseg->pls_range.offset, lseg->pls_range.length); 631 if (!mark_lseg_invalid(lseg, tmp_list)) 632 remaining++; 633 } 634 dprintk("%s:Return %i\n", __func__, remaining); 635 return remaining; 636 } 637 638 /* note free_me must contain lsegs from a single layout_hdr */ 639 void 640 pnfs_free_lseg_list(struct list_head *free_me) 641 { 642 struct pnfs_layout_segment *lseg, *tmp; 643 644 if (list_empty(free_me)) 645 return; 646 647 list_for_each_entry_safe(lseg, tmp, free_me, pls_list) { 648 list_del(&lseg->pls_list); 649 pnfs_free_lseg(lseg); 650 } 651 } 652 653 void 654 pnfs_destroy_layout(struct nfs_inode *nfsi) 655 { 656 struct pnfs_layout_hdr *lo; 657 LIST_HEAD(tmp_list); 658 659 spin_lock(&nfsi->vfs_inode.i_lock); 660 lo = nfsi->layout; 661 if (lo) { 662 pnfs_get_layout_hdr(lo); 663 pnfs_mark_layout_stateid_invalid(lo, &tmp_list); 664 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); 665 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); 666 spin_unlock(&nfsi->vfs_inode.i_lock); 667 pnfs_free_lseg_list(&tmp_list); 668 pnfs_put_layout_hdr(lo); 669 } else 670 spin_unlock(&nfsi->vfs_inode.i_lock); 671 } 672 EXPORT_SYMBOL_GPL(pnfs_destroy_layout); 673 674 static bool 675 pnfs_layout_add_bulk_destroy_list(struct inode *inode, 676 struct list_head *layout_list) 677 { 678 struct pnfs_layout_hdr *lo; 679 bool ret = false; 680 681 spin_lock(&inode->i_lock); 682 lo = NFS_I(inode)->layout; 683 if (lo != NULL && list_empty(&lo->plh_bulk_destroy)) { 684 pnfs_get_layout_hdr(lo); 685 list_add(&lo->plh_bulk_destroy, layout_list); 686 ret = true; 687 } 688 spin_unlock(&inode->i_lock); 689 return ret; 690 } 691 692 /* Caller must hold rcu_read_lock and clp->cl_lock */ 693 static int 694 pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp, 695 struct nfs_server *server, 696 struct list_head *layout_list) 697 { 698 struct pnfs_layout_hdr *lo, *next; 699 struct inode *inode; 700 701 list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) { 702 inode = igrab(lo->plh_inode); 703 if (inode == NULL) 704 continue; 705 list_del_init(&lo->plh_layouts); 706 if (pnfs_layout_add_bulk_destroy_list(inode, layout_list)) 707 continue; 708 rcu_read_unlock(); 709 spin_unlock(&clp->cl_lock); 710 iput(inode); 711 spin_lock(&clp->cl_lock); 712 rcu_read_lock(); 713 return -EAGAIN; 714 } 715 return 0; 716 } 717 718 static int 719 pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, 720 bool is_bulk_recall) 721 { 722 struct pnfs_layout_hdr *lo; 723 struct inode *inode; 724 LIST_HEAD(lseg_list); 725 int ret = 0; 726 727 while (!list_empty(layout_list)) { 728 lo = list_entry(layout_list->next, struct pnfs_layout_hdr, 729 plh_bulk_destroy); 730 dprintk("%s freeing layout for inode %lu\n", __func__, 731 lo->plh_inode->i_ino); 732 inode = lo->plh_inode; 733 734 pnfs_layoutcommit_inode(inode, false); 735 736 spin_lock(&inode->i_lock); 737 list_del_init(&lo->plh_bulk_destroy); 738 if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) { 739 if (is_bulk_recall) 740 set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); 741 ret = -EAGAIN; 742 } 743 spin_unlock(&inode->i_lock); 744 pnfs_free_lseg_list(&lseg_list); 745 /* Free all lsegs that are attached to commit buckets */ 746 nfs_commit_inode(inode, 0); 747 pnfs_put_layout_hdr(lo); 748 iput(inode); 749 } 750 return ret; 751 } 752 753 int 754 pnfs_destroy_layouts_byfsid(struct nfs_client *clp, 755 struct nfs_fsid *fsid, 756 bool is_recall) 757 { 758 struct nfs_server *server; 759 LIST_HEAD(layout_list); 760 761 spin_lock(&clp->cl_lock); 762 rcu_read_lock(); 763 restart: 764 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 765 if (memcmp(&server->fsid, fsid, sizeof(*fsid)) != 0) 766 continue; 767 if (pnfs_layout_bulk_destroy_byserver_locked(clp, 768 server, 769 &layout_list) != 0) 770 goto restart; 771 } 772 rcu_read_unlock(); 773 spin_unlock(&clp->cl_lock); 774 775 if (list_empty(&layout_list)) 776 return 0; 777 return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); 778 } 779 780 int 781 pnfs_destroy_layouts_byclid(struct nfs_client *clp, 782 bool is_recall) 783 { 784 struct nfs_server *server; 785 LIST_HEAD(layout_list); 786 787 spin_lock(&clp->cl_lock); 788 rcu_read_lock(); 789 restart: 790 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 791 if (pnfs_layout_bulk_destroy_byserver_locked(clp, 792 server, 793 &layout_list) != 0) 794 goto restart; 795 } 796 rcu_read_unlock(); 797 spin_unlock(&clp->cl_lock); 798 799 if (list_empty(&layout_list)) 800 return 0; 801 return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); 802 } 803 804 /* 805 * Called by the state manger to remove all layouts established under an 806 * expired lease. 807 */ 808 void 809 pnfs_destroy_all_layouts(struct nfs_client *clp) 810 { 811 nfs4_deviceid_mark_client_invalid(clp); 812 nfs4_deviceid_purge_client(clp); 813 814 pnfs_destroy_layouts_byclid(clp, false); 815 } 816 817 static void 818 pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo) 819 { 820 lo->plh_return_iomode = 0; 821 lo->plh_return_seq = 0; 822 clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); 823 } 824 825 /* update lo->plh_stateid with new if is more recent */ 826 void 827 pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, 828 bool update_barrier) 829 { 830 u32 oldseq, newseq, new_barrier = 0; 831 832 oldseq = be32_to_cpu(lo->plh_stateid.seqid); 833 newseq = be32_to_cpu(new->seqid); 834 835 if (!pnfs_layout_is_valid(lo)) { 836 nfs4_stateid_copy(&lo->plh_stateid, new); 837 lo->plh_barrier = newseq; 838 pnfs_clear_layoutreturn_info(lo); 839 clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); 840 return; 841 } 842 if (pnfs_seqid_is_newer(newseq, oldseq)) { 843 nfs4_stateid_copy(&lo->plh_stateid, new); 844 /* 845 * Because of wraparound, we want to keep the barrier 846 * "close" to the current seqids. 847 */ 848 new_barrier = newseq - atomic_read(&lo->plh_outstanding); 849 } 850 if (update_barrier) 851 new_barrier = be32_to_cpu(new->seqid); 852 else if (new_barrier == 0) 853 return; 854 if (pnfs_seqid_is_newer(new_barrier, lo->plh_barrier)) 855 lo->plh_barrier = new_barrier; 856 } 857 858 static bool 859 pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo, 860 const nfs4_stateid *stateid) 861 { 862 u32 seqid = be32_to_cpu(stateid->seqid); 863 864 return !pnfs_seqid_is_newer(seqid, lo->plh_barrier); 865 } 866 867 /* lget is set to 1 if called from inside send_layoutget call chain */ 868 static bool 869 pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo) 870 { 871 return lo->plh_block_lgets || 872 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); 873 } 874 875 /* 876 * Get layout from server. 877 * for now, assume that whole file layouts are requested. 878 * arg->offset: 0 879 * arg->length: all ones 880 */ 881 static struct pnfs_layout_segment * 882 send_layoutget(struct pnfs_layout_hdr *lo, 883 struct nfs_open_context *ctx, 884 nfs4_stateid *stateid, 885 const struct pnfs_layout_range *range, 886 long *timeout, gfp_t gfp_flags) 887 { 888 struct inode *ino = lo->plh_inode; 889 struct nfs_server *server = NFS_SERVER(ino); 890 struct nfs4_layoutget *lgp; 891 loff_t i_size; 892 893 dprintk("--> %s\n", __func__); 894 895 /* 896 * Synchronously retrieve layout information from server and 897 * store in lseg. If we race with a concurrent seqid morphing 898 * op, then re-send the LAYOUTGET. 899 */ 900 lgp = kzalloc(sizeof(*lgp), gfp_flags); 901 if (lgp == NULL) 902 return ERR_PTR(-ENOMEM); 903 904 i_size = i_size_read(ino); 905 906 lgp->args.minlength = PAGE_SIZE; 907 if (lgp->args.minlength > range->length) 908 lgp->args.minlength = range->length; 909 if (range->iomode == IOMODE_READ) { 910 if (range->offset >= i_size) 911 lgp->args.minlength = 0; 912 else if (i_size - range->offset < lgp->args.minlength) 913 lgp->args.minlength = i_size - range->offset; 914 } 915 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; 916 pnfs_copy_range(&lgp->args.range, range); 917 lgp->args.type = server->pnfs_curr_ld->id; 918 lgp->args.inode = ino; 919 lgp->args.ctx = get_nfs_open_context(ctx); 920 nfs4_stateid_copy(&lgp->args.stateid, stateid); 921 lgp->gfp_flags = gfp_flags; 922 lgp->cred = lo->plh_lc_cred; 923 924 return nfs4_proc_layoutget(lgp, timeout, gfp_flags); 925 } 926 927 static void pnfs_clear_layoutcommit(struct inode *inode, 928 struct list_head *head) 929 { 930 struct nfs_inode *nfsi = NFS_I(inode); 931 struct pnfs_layout_segment *lseg, *tmp; 932 933 if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) 934 return; 935 list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) { 936 if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) 937 continue; 938 pnfs_lseg_dec_and_remove_zero(lseg, head); 939 } 940 } 941 942 void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) 943 { 944 clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); 945 smp_mb__after_atomic(); 946 wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); 947 rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); 948 } 949 950 static bool 951 pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo, 952 nfs4_stateid *stateid, 953 enum pnfs_iomode *iomode) 954 { 955 /* Serialise LAYOUTGET/LAYOUTRETURN */ 956 if (atomic_read(&lo->plh_outstanding) != 0) 957 return false; 958 if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) 959 return false; 960 pnfs_get_layout_hdr(lo); 961 if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) { 962 if (stateid != NULL) { 963 nfs4_stateid_copy(stateid, &lo->plh_stateid); 964 if (lo->plh_return_seq != 0) 965 stateid->seqid = cpu_to_be32(lo->plh_return_seq); 966 } 967 if (iomode != NULL) 968 *iomode = lo->plh_return_iomode; 969 pnfs_clear_layoutreturn_info(lo); 970 return true; 971 } 972 if (stateid != NULL) 973 nfs4_stateid_copy(stateid, &lo->plh_stateid); 974 if (iomode != NULL) 975 *iomode = IOMODE_ANY; 976 return true; 977 } 978 979 static int 980 pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, 981 enum pnfs_iomode iomode, bool sync) 982 { 983 struct inode *ino = lo->plh_inode; 984 struct nfs4_layoutreturn *lrp; 985 int status = 0; 986 987 lrp = kzalloc(sizeof(*lrp), GFP_NOFS); 988 if (unlikely(lrp == NULL)) { 989 status = -ENOMEM; 990 spin_lock(&ino->i_lock); 991 pnfs_clear_layoutreturn_waitbit(lo); 992 spin_unlock(&ino->i_lock); 993 pnfs_put_layout_hdr(lo); 994 goto out; 995 } 996 997 nfs4_stateid_copy(&lrp->args.stateid, stateid); 998 lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; 999 lrp->args.inode = ino; 1000 lrp->args.range.iomode = iomode; 1001 lrp->args.range.offset = 0; 1002 lrp->args.range.length = NFS4_MAX_UINT64; 1003 lrp->args.layout = lo; 1004 lrp->clp = NFS_SERVER(ino)->nfs_client; 1005 lrp->cred = lo->plh_lc_cred; 1006 1007 status = nfs4_proc_layoutreturn(lrp, sync); 1008 out: 1009 dprintk("<-- %s status: %d\n", __func__, status); 1010 return status; 1011 } 1012 1013 /* Return true if layoutreturn is needed */ 1014 static bool 1015 pnfs_layout_need_return(struct pnfs_layout_hdr *lo) 1016 { 1017 struct pnfs_layout_segment *s; 1018 1019 if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) 1020 return false; 1021 1022 /* Defer layoutreturn until all lsegs are done */ 1023 list_for_each_entry(s, &lo->plh_segs, pls_list) { 1024 if (test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags)) 1025 return false; 1026 } 1027 1028 return true; 1029 } 1030 1031 static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo) 1032 { 1033 struct inode *inode= lo->plh_inode; 1034 1035 if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) 1036 return; 1037 spin_lock(&inode->i_lock); 1038 if (pnfs_layout_need_return(lo)) { 1039 nfs4_stateid stateid; 1040 enum pnfs_iomode iomode; 1041 bool send; 1042 1043 send = pnfs_prepare_layoutreturn(lo, &stateid, &iomode); 1044 spin_unlock(&inode->i_lock); 1045 if (send) { 1046 /* Send an async layoutreturn so we dont deadlock */ 1047 pnfs_send_layoutreturn(lo, &stateid, iomode, false); 1048 } 1049 } else 1050 spin_unlock(&inode->i_lock); 1051 } 1052 1053 /* 1054 * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr 1055 * when the layout segment list is empty. 1056 * 1057 * Note that a pnfs_layout_hdr can exist with an empty layout segment 1058 * list when LAYOUTGET has failed, or when LAYOUTGET succeeded, but the 1059 * deviceid is marked invalid. 1060 */ 1061 int 1062 _pnfs_return_layout(struct inode *ino) 1063 { 1064 struct pnfs_layout_hdr *lo = NULL; 1065 struct nfs_inode *nfsi = NFS_I(ino); 1066 LIST_HEAD(tmp_list); 1067 nfs4_stateid stateid; 1068 int status = 0, empty; 1069 bool send; 1070 1071 dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino); 1072 1073 spin_lock(&ino->i_lock); 1074 lo = nfsi->layout; 1075 if (!lo) { 1076 spin_unlock(&ino->i_lock); 1077 dprintk("NFS: %s no layout to return\n", __func__); 1078 goto out; 1079 } 1080 /* Reference matched in nfs4_layoutreturn_release */ 1081 pnfs_get_layout_hdr(lo); 1082 empty = list_empty(&lo->plh_segs); 1083 pnfs_clear_layoutcommit(ino, &tmp_list); 1084 pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL, 0); 1085 1086 if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) { 1087 struct pnfs_layout_range range = { 1088 .iomode = IOMODE_ANY, 1089 .offset = 0, 1090 .length = NFS4_MAX_UINT64, 1091 }; 1092 NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, &range); 1093 } 1094 1095 /* Don't send a LAYOUTRETURN if list was initially empty */ 1096 if (empty) { 1097 spin_unlock(&ino->i_lock); 1098 dprintk("NFS: %s no layout segments to return\n", __func__); 1099 goto out_put_layout_hdr; 1100 } 1101 1102 send = pnfs_prepare_layoutreturn(lo, &stateid, NULL); 1103 spin_unlock(&ino->i_lock); 1104 pnfs_free_lseg_list(&tmp_list); 1105 if (send) 1106 status = pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true); 1107 out_put_layout_hdr: 1108 pnfs_put_layout_hdr(lo); 1109 out: 1110 dprintk("<-- %s status: %d\n", __func__, status); 1111 return status; 1112 } 1113 EXPORT_SYMBOL_GPL(_pnfs_return_layout); 1114 1115 int 1116 pnfs_commit_and_return_layout(struct inode *inode) 1117 { 1118 struct pnfs_layout_hdr *lo; 1119 int ret; 1120 1121 spin_lock(&inode->i_lock); 1122 lo = NFS_I(inode)->layout; 1123 if (lo == NULL) { 1124 spin_unlock(&inode->i_lock); 1125 return 0; 1126 } 1127 pnfs_get_layout_hdr(lo); 1128 /* Block new layoutgets and read/write to ds */ 1129 lo->plh_block_lgets++; 1130 spin_unlock(&inode->i_lock); 1131 filemap_fdatawait(inode->i_mapping); 1132 ret = pnfs_layoutcommit_inode(inode, true); 1133 if (ret == 0) 1134 ret = _pnfs_return_layout(inode); 1135 spin_lock(&inode->i_lock); 1136 lo->plh_block_lgets--; 1137 spin_unlock(&inode->i_lock); 1138 pnfs_put_layout_hdr(lo); 1139 return ret; 1140 } 1141 1142 bool pnfs_roc(struct inode *ino) 1143 { 1144 struct nfs_inode *nfsi = NFS_I(ino); 1145 struct nfs_open_context *ctx; 1146 struct nfs4_state *state; 1147 struct pnfs_layout_hdr *lo; 1148 struct pnfs_layout_segment *lseg, *tmp; 1149 nfs4_stateid stateid; 1150 LIST_HEAD(tmp_list); 1151 bool found = false, layoutreturn = false, roc = false; 1152 1153 spin_lock(&ino->i_lock); 1154 lo = nfsi->layout; 1155 if (!lo || test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) 1156 goto out_noroc; 1157 1158 /* no roc if we hold a delegation */ 1159 if (nfs4_check_delegation(ino, FMODE_READ)) 1160 goto out_noroc; 1161 1162 list_for_each_entry(ctx, &nfsi->open_files, list) { 1163 state = ctx->state; 1164 /* Don't return layout if there is open file state */ 1165 if (state != NULL && state->state != 0) 1166 goto out_noroc; 1167 } 1168 1169 /* always send layoutreturn if being marked so */ 1170 if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) 1171 layoutreturn = pnfs_prepare_layoutreturn(lo, 1172 &stateid, NULL); 1173 1174 list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) 1175 /* If we are sending layoutreturn, invalidate all valid lsegs */ 1176 if (layoutreturn || test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { 1177 mark_lseg_invalid(lseg, &tmp_list); 1178 found = true; 1179 } 1180 /* ROC in two conditions: 1181 * 1. there are ROC lsegs 1182 * 2. we don't send layoutreturn 1183 */ 1184 if (found && !layoutreturn) { 1185 /* lo ref dropped in pnfs_roc_release() */ 1186 pnfs_get_layout_hdr(lo); 1187 roc = true; 1188 } 1189 1190 out_noroc: 1191 spin_unlock(&ino->i_lock); 1192 pnfs_free_lseg_list(&tmp_list); 1193 pnfs_layoutcommit_inode(ino, true); 1194 if (layoutreturn) 1195 pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true); 1196 return roc; 1197 } 1198 1199 void pnfs_roc_release(struct inode *ino) 1200 { 1201 struct pnfs_layout_hdr *lo; 1202 1203 spin_lock(&ino->i_lock); 1204 lo = NFS_I(ino)->layout; 1205 pnfs_clear_layoutreturn_waitbit(lo); 1206 if (atomic_dec_and_test(&lo->plh_refcount)) { 1207 pnfs_detach_layout_hdr(lo); 1208 spin_unlock(&ino->i_lock); 1209 pnfs_free_layout_hdr(lo); 1210 } else 1211 spin_unlock(&ino->i_lock); 1212 } 1213 1214 void pnfs_roc_set_barrier(struct inode *ino, u32 barrier) 1215 { 1216 struct pnfs_layout_hdr *lo; 1217 1218 spin_lock(&ino->i_lock); 1219 lo = NFS_I(ino)->layout; 1220 if (pnfs_seqid_is_newer(barrier, lo->plh_barrier)) 1221 lo->plh_barrier = barrier; 1222 spin_unlock(&ino->i_lock); 1223 trace_nfs4_layoutreturn_on_close(ino, 0); 1224 } 1225 1226 void pnfs_roc_get_barrier(struct inode *ino, u32 *barrier) 1227 { 1228 struct nfs_inode *nfsi = NFS_I(ino); 1229 struct pnfs_layout_hdr *lo; 1230 u32 current_seqid; 1231 1232 spin_lock(&ino->i_lock); 1233 lo = nfsi->layout; 1234 current_seqid = be32_to_cpu(lo->plh_stateid.seqid); 1235 1236 /* Since close does not return a layout stateid for use as 1237 * a barrier, we choose the worst-case barrier. 1238 */ 1239 *barrier = current_seqid + atomic_read(&lo->plh_outstanding); 1240 spin_unlock(&ino->i_lock); 1241 } 1242 1243 bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task) 1244 { 1245 struct nfs_inode *nfsi = NFS_I(ino); 1246 struct pnfs_layout_hdr *lo; 1247 bool sleep = false; 1248 1249 /* we might not have grabbed lo reference. so need to check under 1250 * i_lock */ 1251 spin_lock(&ino->i_lock); 1252 lo = nfsi->layout; 1253 if (lo && test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) 1254 sleep = true; 1255 spin_unlock(&ino->i_lock); 1256 1257 if (sleep) 1258 rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); 1259 1260 return sleep; 1261 } 1262 1263 /* 1264 * Compare two layout segments for sorting into layout cache. 1265 * We want to preferentially return RW over RO layouts, so ensure those 1266 * are seen first. 1267 */ 1268 static s64 1269 pnfs_lseg_range_cmp(const struct pnfs_layout_range *l1, 1270 const struct pnfs_layout_range *l2) 1271 { 1272 s64 d; 1273 1274 /* high offset > low offset */ 1275 d = l1->offset - l2->offset; 1276 if (d) 1277 return d; 1278 1279 /* short length > long length */ 1280 d = l2->length - l1->length; 1281 if (d) 1282 return d; 1283 1284 /* read > read/write */ 1285 return (int)(l1->iomode == IOMODE_READ) - (int)(l2->iomode == IOMODE_READ); 1286 } 1287 1288 static bool 1289 pnfs_lseg_range_is_after(const struct pnfs_layout_range *l1, 1290 const struct pnfs_layout_range *l2) 1291 { 1292 return pnfs_lseg_range_cmp(l1, l2) > 0; 1293 } 1294 1295 static bool 1296 pnfs_lseg_no_merge(struct pnfs_layout_segment *lseg, 1297 struct pnfs_layout_segment *old) 1298 { 1299 return false; 1300 } 1301 1302 void 1303 pnfs_generic_layout_insert_lseg(struct pnfs_layout_hdr *lo, 1304 struct pnfs_layout_segment *lseg, 1305 bool (*is_after)(const struct pnfs_layout_range *, 1306 const struct pnfs_layout_range *), 1307 bool (*do_merge)(struct pnfs_layout_segment *, 1308 struct pnfs_layout_segment *), 1309 struct list_head *free_me) 1310 { 1311 struct pnfs_layout_segment *lp, *tmp; 1312 1313 dprintk("%s:Begin\n", __func__); 1314 1315 list_for_each_entry_safe(lp, tmp, &lo->plh_segs, pls_list) { 1316 if (test_bit(NFS_LSEG_VALID, &lp->pls_flags) == 0) 1317 continue; 1318 if (do_merge(lseg, lp)) { 1319 mark_lseg_invalid(lp, free_me); 1320 continue; 1321 } 1322 if (is_after(&lseg->pls_range, &lp->pls_range)) 1323 continue; 1324 list_add_tail(&lseg->pls_list, &lp->pls_list); 1325 dprintk("%s: inserted lseg %p " 1326 "iomode %d offset %llu length %llu before " 1327 "lp %p iomode %d offset %llu length %llu\n", 1328 __func__, lseg, lseg->pls_range.iomode, 1329 lseg->pls_range.offset, lseg->pls_range.length, 1330 lp, lp->pls_range.iomode, lp->pls_range.offset, 1331 lp->pls_range.length); 1332 goto out; 1333 } 1334 list_add_tail(&lseg->pls_list, &lo->plh_segs); 1335 dprintk("%s: inserted lseg %p " 1336 "iomode %d offset %llu length %llu at tail\n", 1337 __func__, lseg, lseg->pls_range.iomode, 1338 lseg->pls_range.offset, lseg->pls_range.length); 1339 out: 1340 pnfs_get_layout_hdr(lo); 1341 1342 dprintk("%s:Return\n", __func__); 1343 } 1344 EXPORT_SYMBOL_GPL(pnfs_generic_layout_insert_lseg); 1345 1346 static void 1347 pnfs_layout_insert_lseg(struct pnfs_layout_hdr *lo, 1348 struct pnfs_layout_segment *lseg, 1349 struct list_head *free_me) 1350 { 1351 struct inode *inode = lo->plh_inode; 1352 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 1353 1354 if (ld->add_lseg != NULL) 1355 ld->add_lseg(lo, lseg, free_me); 1356 else 1357 pnfs_generic_layout_insert_lseg(lo, lseg, 1358 pnfs_lseg_range_is_after, 1359 pnfs_lseg_no_merge, 1360 free_me); 1361 } 1362 1363 static struct pnfs_layout_hdr * 1364 alloc_init_layout_hdr(struct inode *ino, 1365 struct nfs_open_context *ctx, 1366 gfp_t gfp_flags) 1367 { 1368 struct pnfs_layout_hdr *lo; 1369 1370 lo = pnfs_alloc_layout_hdr(ino, gfp_flags); 1371 if (!lo) 1372 return NULL; 1373 atomic_set(&lo->plh_refcount, 1); 1374 INIT_LIST_HEAD(&lo->plh_layouts); 1375 INIT_LIST_HEAD(&lo->plh_segs); 1376 INIT_LIST_HEAD(&lo->plh_bulk_destroy); 1377 lo->plh_inode = ino; 1378 lo->plh_lc_cred = get_rpccred(ctx->cred); 1379 lo->plh_flags |= 1 << NFS_LAYOUT_INVALID_STID; 1380 return lo; 1381 } 1382 1383 static struct pnfs_layout_hdr * 1384 pnfs_find_alloc_layout(struct inode *ino, 1385 struct nfs_open_context *ctx, 1386 gfp_t gfp_flags) 1387 __releases(&ino->i_lock) 1388 __acquires(&ino->i_lock) 1389 { 1390 struct nfs_inode *nfsi = NFS_I(ino); 1391 struct pnfs_layout_hdr *new = NULL; 1392 1393 dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout); 1394 1395 if (nfsi->layout != NULL) 1396 goto out_existing; 1397 spin_unlock(&ino->i_lock); 1398 new = alloc_init_layout_hdr(ino, ctx, gfp_flags); 1399 spin_lock(&ino->i_lock); 1400 1401 if (likely(nfsi->layout == NULL)) { /* Won the race? */ 1402 nfsi->layout = new; 1403 return new; 1404 } else if (new != NULL) 1405 pnfs_free_layout_hdr(new); 1406 out_existing: 1407 pnfs_get_layout_hdr(nfsi->layout); 1408 return nfsi->layout; 1409 } 1410 1411 /* 1412 * iomode matching rules: 1413 * iomode lseg strict match 1414 * iomode 1415 * ----- ----- ------ ----- 1416 * ANY READ N/A true 1417 * ANY RW N/A true 1418 * RW READ N/A false 1419 * RW RW N/A true 1420 * READ READ N/A true 1421 * READ RW true false 1422 * READ RW false true 1423 */ 1424 static bool 1425 pnfs_lseg_range_match(const struct pnfs_layout_range *ls_range, 1426 const struct pnfs_layout_range *range, 1427 bool strict_iomode) 1428 { 1429 struct pnfs_layout_range range1; 1430 1431 if ((range->iomode == IOMODE_RW && 1432 ls_range->iomode != IOMODE_RW) || 1433 (range->iomode != ls_range->iomode && 1434 strict_iomode == true) || 1435 !pnfs_lseg_range_intersecting(ls_range, range)) 1436 return 0; 1437 1438 /* range1 covers only the first byte in the range */ 1439 range1 = *range; 1440 range1.length = 1; 1441 return pnfs_lseg_range_contained(ls_range, &range1); 1442 } 1443 1444 /* 1445 * lookup range in layout 1446 */ 1447 static struct pnfs_layout_segment * 1448 pnfs_find_lseg(struct pnfs_layout_hdr *lo, 1449 struct pnfs_layout_range *range, 1450 bool strict_iomode) 1451 { 1452 struct pnfs_layout_segment *lseg, *ret = NULL; 1453 1454 dprintk("%s:Begin\n", __func__); 1455 1456 list_for_each_entry(lseg, &lo->plh_segs, pls_list) { 1457 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && 1458 !test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags) && 1459 pnfs_lseg_range_match(&lseg->pls_range, range, 1460 strict_iomode)) { 1461 ret = pnfs_get_lseg(lseg); 1462 break; 1463 } 1464 } 1465 1466 dprintk("%s:Return lseg %p ref %d\n", 1467 __func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0); 1468 return ret; 1469 } 1470 1471 /* 1472 * Use mdsthreshold hints set at each OPEN to determine if I/O should go 1473 * to the MDS or over pNFS 1474 * 1475 * The nfs_inode read_io and write_io fields are cumulative counters reset 1476 * when there are no layout segments. Note that in pnfs_update_layout iomode 1477 * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a 1478 * WRITE request. 1479 * 1480 * A return of true means use MDS I/O. 1481 * 1482 * From rfc 5661: 1483 * If a file's size is smaller than the file size threshold, data accesses 1484 * SHOULD be sent to the metadata server. If an I/O request has a length that 1485 * is below the I/O size threshold, the I/O SHOULD be sent to the metadata 1486 * server. If both file size and I/O size are provided, the client SHOULD 1487 * reach or exceed both thresholds before sending its read or write 1488 * requests to the data server. 1489 */ 1490 static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx, 1491 struct inode *ino, int iomode) 1492 { 1493 struct nfs4_threshold *t = ctx->mdsthreshold; 1494 struct nfs_inode *nfsi = NFS_I(ino); 1495 loff_t fsize = i_size_read(ino); 1496 bool size = false, size_set = false, io = false, io_set = false, ret = false; 1497 1498 if (t == NULL) 1499 return ret; 1500 1501 dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n", 1502 __func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz); 1503 1504 switch (iomode) { 1505 case IOMODE_READ: 1506 if (t->bm & THRESHOLD_RD) { 1507 dprintk("%s fsize %llu\n", __func__, fsize); 1508 size_set = true; 1509 if (fsize < t->rd_sz) 1510 size = true; 1511 } 1512 if (t->bm & THRESHOLD_RD_IO) { 1513 dprintk("%s nfsi->read_io %llu\n", __func__, 1514 nfsi->read_io); 1515 io_set = true; 1516 if (nfsi->read_io < t->rd_io_sz) 1517 io = true; 1518 } 1519 break; 1520 case IOMODE_RW: 1521 if (t->bm & THRESHOLD_WR) { 1522 dprintk("%s fsize %llu\n", __func__, fsize); 1523 size_set = true; 1524 if (fsize < t->wr_sz) 1525 size = true; 1526 } 1527 if (t->bm & THRESHOLD_WR_IO) { 1528 dprintk("%s nfsi->write_io %llu\n", __func__, 1529 nfsi->write_io); 1530 io_set = true; 1531 if (nfsi->write_io < t->wr_io_sz) 1532 io = true; 1533 } 1534 break; 1535 } 1536 if (size_set && io_set) { 1537 if (size && io) 1538 ret = true; 1539 } else if (size || io) 1540 ret = true; 1541 1542 dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret); 1543 return ret; 1544 } 1545 1546 static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo) 1547 { 1548 /* 1549 * send layoutcommit as it can hold up layoutreturn due to lseg 1550 * reference 1551 */ 1552 pnfs_layoutcommit_inode(lo->plh_inode, false); 1553 return !wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN, 1554 nfs_wait_bit_killable, 1555 TASK_UNINTERRUPTIBLE); 1556 } 1557 1558 static void pnfs_clear_first_layoutget(struct pnfs_layout_hdr *lo) 1559 { 1560 unsigned long *bitlock = &lo->plh_flags; 1561 1562 clear_bit_unlock(NFS_LAYOUT_FIRST_LAYOUTGET, bitlock); 1563 smp_mb__after_atomic(); 1564 wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET); 1565 } 1566 1567 /* 1568 * Layout segment is retreived from the server if not cached. 1569 * The appropriate layout segment is referenced and returned to the caller. 1570 */ 1571 struct pnfs_layout_segment * 1572 pnfs_update_layout(struct inode *ino, 1573 struct nfs_open_context *ctx, 1574 loff_t pos, 1575 u64 count, 1576 enum pnfs_iomode iomode, 1577 bool strict_iomode, 1578 gfp_t gfp_flags) 1579 { 1580 struct pnfs_layout_range arg = { 1581 .iomode = iomode, 1582 .offset = pos, 1583 .length = count, 1584 }; 1585 unsigned pg_offset, seq; 1586 struct nfs_server *server = NFS_SERVER(ino); 1587 struct nfs_client *clp = server->nfs_client; 1588 struct pnfs_layout_hdr *lo = NULL; 1589 struct pnfs_layout_segment *lseg = NULL; 1590 nfs4_stateid stateid; 1591 long timeout = 0; 1592 unsigned long giveup = jiffies + (clp->cl_lease_time << 1); 1593 bool first; 1594 1595 if (!pnfs_enabled_sb(NFS_SERVER(ino))) { 1596 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1597 PNFS_UPDATE_LAYOUT_NO_PNFS); 1598 goto out; 1599 } 1600 1601 if (iomode == IOMODE_READ && i_size_read(ino) == 0) { 1602 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1603 PNFS_UPDATE_LAYOUT_RD_ZEROLEN); 1604 goto out; 1605 } 1606 1607 if (pnfs_within_mdsthreshold(ctx, ino, iomode)) { 1608 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1609 PNFS_UPDATE_LAYOUT_MDSTHRESH); 1610 goto out; 1611 } 1612 1613 lookup_again: 1614 nfs4_client_recover_expired_lease(clp); 1615 first = false; 1616 spin_lock(&ino->i_lock); 1617 lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); 1618 if (lo == NULL) { 1619 spin_unlock(&ino->i_lock); 1620 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1621 PNFS_UPDATE_LAYOUT_NOMEM); 1622 goto out; 1623 } 1624 1625 /* Do we even need to bother with this? */ 1626 if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { 1627 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1628 PNFS_UPDATE_LAYOUT_BULK_RECALL); 1629 dprintk("%s matches recall, use MDS\n", __func__); 1630 goto out_unlock; 1631 } 1632 1633 /* if LAYOUTGET already failed once we don't try again */ 1634 if (pnfs_layout_io_test_failed(lo, iomode)) { 1635 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1636 PNFS_UPDATE_LAYOUT_IO_TEST_FAIL); 1637 goto out_unlock; 1638 } 1639 1640 lseg = pnfs_find_lseg(lo, &arg, strict_iomode); 1641 if (lseg) { 1642 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1643 PNFS_UPDATE_LAYOUT_FOUND_CACHED); 1644 goto out_unlock; 1645 } 1646 1647 if (!nfs4_valid_open_stateid(ctx->state)) { 1648 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1649 PNFS_UPDATE_LAYOUT_INVALID_OPEN); 1650 goto out_unlock; 1651 } 1652 1653 /* 1654 * Choose a stateid for the LAYOUTGET. If we don't have a layout 1655 * stateid, or it has been invalidated, then we must use the open 1656 * stateid. 1657 */ 1658 if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) { 1659 1660 /* 1661 * The first layoutget for the file. Need to serialize per 1662 * RFC 5661 Errata 3208. 1663 */ 1664 if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET, 1665 &lo->plh_flags)) { 1666 spin_unlock(&ino->i_lock); 1667 wait_on_bit(&lo->plh_flags, NFS_LAYOUT_FIRST_LAYOUTGET, 1668 TASK_UNINTERRUPTIBLE); 1669 pnfs_put_layout_hdr(lo); 1670 dprintk("%s retrying\n", __func__); 1671 goto lookup_again; 1672 } 1673 1674 first = true; 1675 do { 1676 seq = read_seqbegin(&ctx->state->seqlock); 1677 nfs4_stateid_copy(&stateid, &ctx->state->stateid); 1678 } while (read_seqretry(&ctx->state->seqlock, seq)); 1679 } else { 1680 nfs4_stateid_copy(&stateid, &lo->plh_stateid); 1681 } 1682 1683 /* 1684 * Because we free lsegs before sending LAYOUTRETURN, we need to wait 1685 * for LAYOUTRETURN even if first is true. 1686 */ 1687 if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { 1688 spin_unlock(&ino->i_lock); 1689 dprintk("%s wait for layoutreturn\n", __func__); 1690 if (pnfs_prepare_to_retry_layoutget(lo)) { 1691 if (first) 1692 pnfs_clear_first_layoutget(lo); 1693 pnfs_put_layout_hdr(lo); 1694 dprintk("%s retrying\n", __func__); 1695 trace_pnfs_update_layout(ino, pos, count, iomode, lo, 1696 lseg, PNFS_UPDATE_LAYOUT_RETRY); 1697 goto lookup_again; 1698 } 1699 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1700 PNFS_UPDATE_LAYOUT_RETURN); 1701 goto out_put_layout_hdr; 1702 } 1703 1704 if (pnfs_layoutgets_blocked(lo)) { 1705 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1706 PNFS_UPDATE_LAYOUT_BLOCKED); 1707 goto out_unlock; 1708 } 1709 atomic_inc(&lo->plh_outstanding); 1710 spin_unlock(&ino->i_lock); 1711 1712 if (list_empty(&lo->plh_layouts)) { 1713 /* The lo must be on the clp list if there is any 1714 * chance of a CB_LAYOUTRECALL(FILE) coming in. 1715 */ 1716 spin_lock(&clp->cl_lock); 1717 if (list_empty(&lo->plh_layouts)) 1718 list_add_tail(&lo->plh_layouts, &server->layouts); 1719 spin_unlock(&clp->cl_lock); 1720 } 1721 1722 pg_offset = arg.offset & ~PAGE_MASK; 1723 if (pg_offset) { 1724 arg.offset -= pg_offset; 1725 arg.length += pg_offset; 1726 } 1727 if (arg.length != NFS4_MAX_UINT64) 1728 arg.length = PAGE_ALIGN(arg.length); 1729 1730 lseg = send_layoutget(lo, ctx, &stateid, &arg, &timeout, gfp_flags); 1731 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1732 PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET); 1733 atomic_dec(&lo->plh_outstanding); 1734 if (IS_ERR(lseg)) { 1735 switch(PTR_ERR(lseg)) { 1736 case -EBUSY: 1737 if (time_after(jiffies, giveup)) 1738 lseg = NULL; 1739 break; 1740 case -ERECALLCONFLICT: 1741 /* Huh? We hold no layouts, how is there a recall? */ 1742 if (first) { 1743 lseg = NULL; 1744 break; 1745 } 1746 /* Destroy the existing layout and start over */ 1747 if (time_after(jiffies, giveup)) 1748 pnfs_destroy_layout(NFS_I(ino)); 1749 /* Fallthrough */ 1750 case -EAGAIN: 1751 break; 1752 default: 1753 if (!nfs_error_is_fatal(PTR_ERR(lseg))) { 1754 pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); 1755 lseg = NULL; 1756 } 1757 goto out_put_layout_hdr; 1758 } 1759 if (lseg) { 1760 if (first) 1761 pnfs_clear_first_layoutget(lo); 1762 trace_pnfs_update_layout(ino, pos, count, 1763 iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY); 1764 pnfs_put_layout_hdr(lo); 1765 goto lookup_again; 1766 } 1767 } else { 1768 pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); 1769 } 1770 1771 out_put_layout_hdr: 1772 if (first) 1773 pnfs_clear_first_layoutget(lo); 1774 pnfs_put_layout_hdr(lo); 1775 out: 1776 dprintk("%s: inode %s/%llu pNFS layout segment %s for " 1777 "(%s, offset: %llu, length: %llu)\n", 1778 __func__, ino->i_sb->s_id, 1779 (unsigned long long)NFS_FILEID(ino), 1780 IS_ERR_OR_NULL(lseg) ? "not found" : "found", 1781 iomode==IOMODE_RW ? "read/write" : "read-only", 1782 (unsigned long long)pos, 1783 (unsigned long long)count); 1784 return lseg; 1785 out_unlock: 1786 spin_unlock(&ino->i_lock); 1787 goto out_put_layout_hdr; 1788 } 1789 EXPORT_SYMBOL_GPL(pnfs_update_layout); 1790 1791 static bool 1792 pnfs_sanity_check_layout_range(struct pnfs_layout_range *range) 1793 { 1794 switch (range->iomode) { 1795 case IOMODE_READ: 1796 case IOMODE_RW: 1797 break; 1798 default: 1799 return false; 1800 } 1801 if (range->offset == NFS4_MAX_UINT64) 1802 return false; 1803 if (range->length == 0) 1804 return false; 1805 if (range->length != NFS4_MAX_UINT64 && 1806 range->length > NFS4_MAX_UINT64 - range->offset) 1807 return false; 1808 return true; 1809 } 1810 1811 struct pnfs_layout_segment * 1812 pnfs_layout_process(struct nfs4_layoutget *lgp) 1813 { 1814 struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout; 1815 struct nfs4_layoutget_res *res = &lgp->res; 1816 struct pnfs_layout_segment *lseg; 1817 struct inode *ino = lo->plh_inode; 1818 LIST_HEAD(free_me); 1819 1820 if (!pnfs_sanity_check_layout_range(&res->range)) 1821 return ERR_PTR(-EINVAL); 1822 1823 /* Inject layout blob into I/O device driver */ 1824 lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags); 1825 if (IS_ERR_OR_NULL(lseg)) { 1826 if (!lseg) 1827 lseg = ERR_PTR(-ENOMEM); 1828 1829 dprintk("%s: Could not allocate layout: error %ld\n", 1830 __func__, PTR_ERR(lseg)); 1831 return lseg; 1832 } 1833 1834 pnfs_init_lseg(lo, lseg, &res->range, &res->stateid); 1835 1836 spin_lock(&ino->i_lock); 1837 if (pnfs_layoutgets_blocked(lo)) { 1838 dprintk("%s forget reply due to state\n", __func__); 1839 goto out_forget; 1840 } 1841 1842 if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) { 1843 /* existing state ID, make sure the sequence number matches. */ 1844 if (pnfs_layout_stateid_blocked(lo, &res->stateid)) { 1845 dprintk("%s forget reply due to sequence\n", __func__); 1846 goto out_forget; 1847 } 1848 pnfs_set_layout_stateid(lo, &res->stateid, false); 1849 } else { 1850 /* 1851 * We got an entirely new state ID. Mark all segments for the 1852 * inode invalid, and don't bother validating the stateid 1853 * sequence number. 1854 */ 1855 pnfs_mark_layout_stateid_invalid(lo, &free_me); 1856 1857 pnfs_set_layout_stateid(lo, &res->stateid, true); 1858 } 1859 1860 pnfs_get_lseg(lseg); 1861 pnfs_layout_insert_lseg(lo, lseg, &free_me); 1862 1863 1864 if (res->return_on_close) 1865 set_bit(NFS_LSEG_ROC, &lseg->pls_flags); 1866 1867 spin_unlock(&ino->i_lock); 1868 pnfs_free_lseg_list(&free_me); 1869 return lseg; 1870 1871 out_forget: 1872 spin_unlock(&ino->i_lock); 1873 lseg->pls_layout = lo; 1874 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); 1875 return ERR_PTR(-EAGAIN); 1876 } 1877 1878 static void 1879 pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode, 1880 u32 seq) 1881 { 1882 if (lo->plh_return_iomode != 0 && lo->plh_return_iomode != iomode) 1883 iomode = IOMODE_ANY; 1884 lo->plh_return_iomode = iomode; 1885 set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); 1886 if (seq != 0) { 1887 WARN_ON_ONCE(lo->plh_return_seq != 0 && lo->plh_return_seq != seq); 1888 lo->plh_return_seq = seq; 1889 } 1890 } 1891 1892 /** 1893 * pnfs_mark_matching_lsegs_return - Free or return matching layout segments 1894 * @lo: pointer to layout header 1895 * @tmp_list: list header to be used with pnfs_free_lseg_list() 1896 * @return_range: describe layout segment ranges to be returned 1897 * 1898 * This function is mainly intended for use by layoutrecall. It attempts 1899 * to free the layout segment immediately, or else to mark it for return 1900 * as soon as its reference count drops to zero. 1901 */ 1902 int 1903 pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, 1904 struct list_head *tmp_list, 1905 const struct pnfs_layout_range *return_range, 1906 u32 seq) 1907 { 1908 struct pnfs_layout_segment *lseg, *next; 1909 int remaining = 0; 1910 1911 dprintk("%s:Begin lo %p\n", __func__, lo); 1912 1913 if (list_empty(&lo->plh_segs)) 1914 return 0; 1915 1916 assert_spin_locked(&lo->plh_inode->i_lock); 1917 1918 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) 1919 if (pnfs_match_lseg_recall(lseg, return_range, seq)) { 1920 dprintk("%s: marking lseg %p iomode %d " 1921 "offset %llu length %llu\n", __func__, 1922 lseg, lseg->pls_range.iomode, 1923 lseg->pls_range.offset, 1924 lseg->pls_range.length); 1925 if (mark_lseg_invalid(lseg, tmp_list)) 1926 continue; 1927 remaining++; 1928 set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); 1929 } 1930 1931 if (remaining) 1932 pnfs_set_plh_return_info(lo, return_range->iomode, seq); 1933 1934 return remaining; 1935 } 1936 1937 void pnfs_error_mark_layout_for_return(struct inode *inode, 1938 struct pnfs_layout_segment *lseg) 1939 { 1940 struct pnfs_layout_hdr *lo = NFS_I(inode)->layout; 1941 struct pnfs_layout_range range = { 1942 .iomode = lseg->pls_range.iomode, 1943 .offset = 0, 1944 .length = NFS4_MAX_UINT64, 1945 }; 1946 LIST_HEAD(free_me); 1947 bool return_now = false; 1948 1949 spin_lock(&inode->i_lock); 1950 pnfs_set_plh_return_info(lo, range.iomode, 0); 1951 /* 1952 * mark all matching lsegs so that we are sure to have no live 1953 * segments at hand when sending layoutreturn. See pnfs_put_lseg() 1954 * for how it works. 1955 */ 1956 if (!pnfs_mark_matching_lsegs_return(lo, &free_me, &range, 0)) { 1957 nfs4_stateid stateid; 1958 enum pnfs_iomode iomode; 1959 1960 return_now = pnfs_prepare_layoutreturn(lo, &stateid, &iomode); 1961 spin_unlock(&inode->i_lock); 1962 if (return_now) 1963 pnfs_send_layoutreturn(lo, &stateid, iomode, false); 1964 } else { 1965 spin_unlock(&inode->i_lock); 1966 nfs_commit_inode(inode, 0); 1967 } 1968 pnfs_free_lseg_list(&free_me); 1969 } 1970 EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return); 1971 1972 void 1973 pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) 1974 { 1975 u64 rd_size = req->wb_bytes; 1976 1977 if (pgio->pg_lseg == NULL) { 1978 if (pgio->pg_dreq == NULL) 1979 rd_size = i_size_read(pgio->pg_inode) - req_offset(req); 1980 else 1981 rd_size = nfs_dreq_bytes_left(pgio->pg_dreq); 1982 1983 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1984 req->wb_context, 1985 req_offset(req), 1986 rd_size, 1987 IOMODE_READ, 1988 false, 1989 GFP_KERNEL); 1990 if (IS_ERR(pgio->pg_lseg)) { 1991 pgio->pg_error = PTR_ERR(pgio->pg_lseg); 1992 pgio->pg_lseg = NULL; 1993 return; 1994 } 1995 } 1996 /* If no lseg, fall back to read through mds */ 1997 if (pgio->pg_lseg == NULL) 1998 nfs_pageio_reset_read_mds(pgio); 1999 2000 } 2001 EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); 2002 2003 void 2004 pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, 2005 struct nfs_page *req, u64 wb_size) 2006 { 2007 if (pgio->pg_lseg == NULL) { 2008 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 2009 req->wb_context, 2010 req_offset(req), 2011 wb_size, 2012 IOMODE_RW, 2013 false, 2014 GFP_NOFS); 2015 if (IS_ERR(pgio->pg_lseg)) { 2016 pgio->pg_error = PTR_ERR(pgio->pg_lseg); 2017 pgio->pg_lseg = NULL; 2018 return; 2019 } 2020 } 2021 /* If no lseg, fall back to write through mds */ 2022 if (pgio->pg_lseg == NULL) 2023 nfs_pageio_reset_write_mds(pgio); 2024 } 2025 EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); 2026 2027 void 2028 pnfs_generic_pg_cleanup(struct nfs_pageio_descriptor *desc) 2029 { 2030 if (desc->pg_lseg) { 2031 pnfs_put_lseg(desc->pg_lseg); 2032 desc->pg_lseg = NULL; 2033 } 2034 } 2035 EXPORT_SYMBOL_GPL(pnfs_generic_pg_cleanup); 2036 2037 /* 2038 * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number 2039 * of bytes (maximum @req->wb_bytes) that can be coalesced. 2040 */ 2041 size_t 2042 pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, 2043 struct nfs_page *prev, struct nfs_page *req) 2044 { 2045 unsigned int size; 2046 u64 seg_end, req_start, seg_left; 2047 2048 size = nfs_generic_pg_test(pgio, prev, req); 2049 if (!size) 2050 return 0; 2051 2052 /* 2053 * 'size' contains the number of bytes left in the current page (up 2054 * to the original size asked for in @req->wb_bytes). 2055 * 2056 * Calculate how many bytes are left in the layout segment 2057 * and if there are less bytes than 'size', return that instead. 2058 * 2059 * Please also note that 'end_offset' is actually the offset of the 2060 * first byte that lies outside the pnfs_layout_range. FIXME? 2061 * 2062 */ 2063 if (pgio->pg_lseg) { 2064 seg_end = end_offset(pgio->pg_lseg->pls_range.offset, 2065 pgio->pg_lseg->pls_range.length); 2066 req_start = req_offset(req); 2067 WARN_ON_ONCE(req_start >= seg_end); 2068 /* start of request is past the last byte of this segment */ 2069 if (req_start >= seg_end) { 2070 /* reference the new lseg */ 2071 if (pgio->pg_ops->pg_cleanup) 2072 pgio->pg_ops->pg_cleanup(pgio); 2073 if (pgio->pg_ops->pg_init) 2074 pgio->pg_ops->pg_init(pgio, req); 2075 return 0; 2076 } 2077 2078 /* adjust 'size' iff there are fewer bytes left in the 2079 * segment than what nfs_generic_pg_test returned */ 2080 seg_left = seg_end - req_start; 2081 if (seg_left < size) 2082 size = (unsigned int)seg_left; 2083 } 2084 2085 return size; 2086 } 2087 EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); 2088 2089 int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr) 2090 { 2091 struct nfs_pageio_descriptor pgio; 2092 2093 /* Resend all requests through the MDS */ 2094 nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true, 2095 hdr->completion_ops); 2096 set_bit(NFS_CONTEXT_RESEND_WRITES, &hdr->args.context->flags); 2097 return nfs_pageio_resend(&pgio, hdr); 2098 } 2099 EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); 2100 2101 static void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr) 2102 { 2103 2104 dprintk("pnfs write error = %d\n", hdr->pnfs_error); 2105 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & 2106 PNFS_LAYOUTRET_ON_ERROR) { 2107 pnfs_return_layout(hdr->inode); 2108 } 2109 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) 2110 hdr->task.tk_status = pnfs_write_done_resend_to_mds(hdr); 2111 } 2112 2113 /* 2114 * Called by non rpc-based layout drivers 2115 */ 2116 void pnfs_ld_write_done(struct nfs_pgio_header *hdr) 2117 { 2118 if (likely(!hdr->pnfs_error)) { 2119 pnfs_set_layoutcommit(hdr->inode, hdr->lseg, 2120 hdr->mds_offset + hdr->res.count); 2121 hdr->mds_ops->rpc_call_done(&hdr->task, hdr); 2122 } 2123 trace_nfs4_pnfs_write(hdr, hdr->pnfs_error); 2124 if (unlikely(hdr->pnfs_error)) 2125 pnfs_ld_handle_write_error(hdr); 2126 hdr->mds_ops->rpc_release(hdr); 2127 } 2128 EXPORT_SYMBOL_GPL(pnfs_ld_write_done); 2129 2130 static void 2131 pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, 2132 struct nfs_pgio_header *hdr) 2133 { 2134 struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); 2135 2136 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 2137 list_splice_tail_init(&hdr->pages, &mirror->pg_list); 2138 nfs_pageio_reset_write_mds(desc); 2139 mirror->pg_recoalesce = 1; 2140 } 2141 nfs_pgio_data_destroy(hdr); 2142 hdr->release(hdr); 2143 } 2144 2145 static enum pnfs_try_status 2146 pnfs_try_to_write_data(struct nfs_pgio_header *hdr, 2147 const struct rpc_call_ops *call_ops, 2148 struct pnfs_layout_segment *lseg, 2149 int how) 2150 { 2151 struct inode *inode = hdr->inode; 2152 enum pnfs_try_status trypnfs; 2153 struct nfs_server *nfss = NFS_SERVER(inode); 2154 2155 hdr->mds_ops = call_ops; 2156 2157 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, 2158 inode->i_ino, hdr->args.count, hdr->args.offset, how); 2159 trypnfs = nfss->pnfs_curr_ld->write_pagelist(hdr, how); 2160 if (trypnfs != PNFS_NOT_ATTEMPTED) 2161 nfs_inc_stats(inode, NFSIOS_PNFS_WRITE); 2162 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 2163 return trypnfs; 2164 } 2165 2166 static void 2167 pnfs_do_write(struct nfs_pageio_descriptor *desc, 2168 struct nfs_pgio_header *hdr, int how) 2169 { 2170 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; 2171 struct pnfs_layout_segment *lseg = desc->pg_lseg; 2172 enum pnfs_try_status trypnfs; 2173 2174 trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how); 2175 if (trypnfs == PNFS_NOT_ATTEMPTED) 2176 pnfs_write_through_mds(desc, hdr); 2177 } 2178 2179 static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) 2180 { 2181 pnfs_put_lseg(hdr->lseg); 2182 nfs_pgio_header_free(hdr); 2183 } 2184 2185 int 2186 pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) 2187 { 2188 struct nfs_pgio_header *hdr; 2189 int ret; 2190 2191 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); 2192 if (!hdr) { 2193 desc->pg_error = -ENOMEM; 2194 return desc->pg_error; 2195 } 2196 nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); 2197 2198 hdr->lseg = pnfs_get_lseg(desc->pg_lseg); 2199 ret = nfs_generic_pgio(desc, hdr); 2200 if (!ret) 2201 pnfs_do_write(desc, hdr, desc->pg_ioflags); 2202 2203 return ret; 2204 } 2205 EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); 2206 2207 int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *hdr) 2208 { 2209 struct nfs_pageio_descriptor pgio; 2210 2211 /* Resend all requests through the MDS */ 2212 nfs_pageio_init_read(&pgio, hdr->inode, true, hdr->completion_ops); 2213 return nfs_pageio_resend(&pgio, hdr); 2214 } 2215 EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds); 2216 2217 static void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr) 2218 { 2219 dprintk("pnfs read error = %d\n", hdr->pnfs_error); 2220 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & 2221 PNFS_LAYOUTRET_ON_ERROR) { 2222 pnfs_return_layout(hdr->inode); 2223 } 2224 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) 2225 hdr->task.tk_status = pnfs_read_done_resend_to_mds(hdr); 2226 } 2227 2228 /* 2229 * Called by non rpc-based layout drivers 2230 */ 2231 void pnfs_ld_read_done(struct nfs_pgio_header *hdr) 2232 { 2233 if (likely(!hdr->pnfs_error)) 2234 hdr->mds_ops->rpc_call_done(&hdr->task, hdr); 2235 trace_nfs4_pnfs_read(hdr, hdr->pnfs_error); 2236 if (unlikely(hdr->pnfs_error)) 2237 pnfs_ld_handle_read_error(hdr); 2238 hdr->mds_ops->rpc_release(hdr); 2239 } 2240 EXPORT_SYMBOL_GPL(pnfs_ld_read_done); 2241 2242 static void 2243 pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, 2244 struct nfs_pgio_header *hdr) 2245 { 2246 struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); 2247 2248 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 2249 list_splice_tail_init(&hdr->pages, &mirror->pg_list); 2250 nfs_pageio_reset_read_mds(desc); 2251 mirror->pg_recoalesce = 1; 2252 } 2253 nfs_pgio_data_destroy(hdr); 2254 hdr->release(hdr); 2255 } 2256 2257 /* 2258 * Call the appropriate parallel I/O subsystem read function. 2259 */ 2260 static enum pnfs_try_status 2261 pnfs_try_to_read_data(struct nfs_pgio_header *hdr, 2262 const struct rpc_call_ops *call_ops, 2263 struct pnfs_layout_segment *lseg) 2264 { 2265 struct inode *inode = hdr->inode; 2266 struct nfs_server *nfss = NFS_SERVER(inode); 2267 enum pnfs_try_status trypnfs; 2268 2269 hdr->mds_ops = call_ops; 2270 2271 dprintk("%s: Reading ino:%lu %u@%llu\n", 2272 __func__, inode->i_ino, hdr->args.count, hdr->args.offset); 2273 2274 trypnfs = nfss->pnfs_curr_ld->read_pagelist(hdr); 2275 if (trypnfs != PNFS_NOT_ATTEMPTED) 2276 nfs_inc_stats(inode, NFSIOS_PNFS_READ); 2277 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 2278 return trypnfs; 2279 } 2280 2281 /* Resend all requests through pnfs. */ 2282 void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr) 2283 { 2284 struct nfs_pageio_descriptor pgio; 2285 2286 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 2287 nfs_pageio_init_read(&pgio, hdr->inode, false, 2288 hdr->completion_ops); 2289 hdr->task.tk_status = nfs_pageio_resend(&pgio, hdr); 2290 } 2291 } 2292 EXPORT_SYMBOL_GPL(pnfs_read_resend_pnfs); 2293 2294 static void 2295 pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) 2296 { 2297 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; 2298 struct pnfs_layout_segment *lseg = desc->pg_lseg; 2299 enum pnfs_try_status trypnfs; 2300 2301 trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg); 2302 if (trypnfs == PNFS_TRY_AGAIN) 2303 pnfs_read_resend_pnfs(hdr); 2304 if (trypnfs == PNFS_NOT_ATTEMPTED || hdr->task.tk_status) 2305 pnfs_read_through_mds(desc, hdr); 2306 } 2307 2308 static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) 2309 { 2310 pnfs_put_lseg(hdr->lseg); 2311 nfs_pgio_header_free(hdr); 2312 } 2313 2314 int 2315 pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 2316 { 2317 struct nfs_pgio_header *hdr; 2318 int ret; 2319 2320 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); 2321 if (!hdr) { 2322 desc->pg_error = -ENOMEM; 2323 return desc->pg_error; 2324 } 2325 nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); 2326 hdr->lseg = pnfs_get_lseg(desc->pg_lseg); 2327 ret = nfs_generic_pgio(desc, hdr); 2328 if (!ret) 2329 pnfs_do_read(desc, hdr); 2330 return ret; 2331 } 2332 EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); 2333 2334 static void pnfs_clear_layoutcommitting(struct inode *inode) 2335 { 2336 unsigned long *bitlock = &NFS_I(inode)->flags; 2337 2338 clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); 2339 smp_mb__after_atomic(); 2340 wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING); 2341 } 2342 2343 /* 2344 * There can be multiple RW segments. 2345 */ 2346 static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp) 2347 { 2348 struct pnfs_layout_segment *lseg; 2349 2350 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { 2351 if (lseg->pls_range.iomode == IOMODE_RW && 2352 test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) 2353 list_add(&lseg->pls_lc_list, listp); 2354 } 2355 } 2356 2357 static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp) 2358 { 2359 struct pnfs_layout_segment *lseg, *tmp; 2360 2361 /* Matched by references in pnfs_set_layoutcommit */ 2362 list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) { 2363 list_del_init(&lseg->pls_lc_list); 2364 pnfs_put_lseg(lseg); 2365 } 2366 2367 pnfs_clear_layoutcommitting(inode); 2368 } 2369 2370 void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) 2371 { 2372 pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode); 2373 } 2374 EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); 2375 2376 void 2377 pnfs_set_layoutcommit(struct inode *inode, struct pnfs_layout_segment *lseg, 2378 loff_t end_pos) 2379 { 2380 struct nfs_inode *nfsi = NFS_I(inode); 2381 bool mark_as_dirty = false; 2382 2383 spin_lock(&inode->i_lock); 2384 if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { 2385 nfsi->layout->plh_lwb = end_pos; 2386 mark_as_dirty = true; 2387 dprintk("%s: Set layoutcommit for inode %lu ", 2388 __func__, inode->i_ino); 2389 } else if (end_pos > nfsi->layout->plh_lwb) 2390 nfsi->layout->plh_lwb = end_pos; 2391 if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) { 2392 /* references matched in nfs4_layoutcommit_release */ 2393 pnfs_get_lseg(lseg); 2394 } 2395 spin_unlock(&inode->i_lock); 2396 dprintk("%s: lseg %p end_pos %llu\n", 2397 __func__, lseg, nfsi->layout->plh_lwb); 2398 2399 /* if pnfs_layoutcommit_inode() runs between inode locks, the next one 2400 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ 2401 if (mark_as_dirty) 2402 mark_inode_dirty_sync(inode); 2403 } 2404 EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); 2405 2406 void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) 2407 { 2408 struct nfs_server *nfss = NFS_SERVER(data->args.inode); 2409 2410 if (nfss->pnfs_curr_ld->cleanup_layoutcommit) 2411 nfss->pnfs_curr_ld->cleanup_layoutcommit(data); 2412 pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list); 2413 } 2414 2415 /* 2416 * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and 2417 * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough 2418 * data to disk to allow the server to recover the data if it crashes. 2419 * LAYOUTCOMMIT is only needed when the NFL4_UFLG_COMMIT_THRU_MDS flag 2420 * is off, and a COMMIT is sent to a data server, or 2421 * if WRITEs to a data server return NFS_DATA_SYNC. 2422 */ 2423 int 2424 pnfs_layoutcommit_inode(struct inode *inode, bool sync) 2425 { 2426 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 2427 struct nfs4_layoutcommit_data *data; 2428 struct nfs_inode *nfsi = NFS_I(inode); 2429 loff_t end_pos; 2430 int status; 2431 2432 if (!pnfs_layoutcommit_outstanding(inode)) 2433 return 0; 2434 2435 dprintk("--> %s inode %lu\n", __func__, inode->i_ino); 2436 2437 status = -EAGAIN; 2438 if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) { 2439 if (!sync) 2440 goto out; 2441 status = wait_on_bit_lock_action(&nfsi->flags, 2442 NFS_INO_LAYOUTCOMMITTING, 2443 nfs_wait_bit_killable, 2444 TASK_KILLABLE); 2445 if (status) 2446 goto out; 2447 } 2448 2449 status = -ENOMEM; 2450 /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */ 2451 data = kzalloc(sizeof(*data), GFP_NOFS); 2452 if (!data) 2453 goto clear_layoutcommitting; 2454 2455 status = 0; 2456 spin_lock(&inode->i_lock); 2457 if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) 2458 goto out_unlock; 2459 2460 INIT_LIST_HEAD(&data->lseg_list); 2461 pnfs_list_write_lseg(inode, &data->lseg_list); 2462 2463 end_pos = nfsi->layout->plh_lwb; 2464 2465 nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid); 2466 spin_unlock(&inode->i_lock); 2467 2468 data->args.inode = inode; 2469 data->cred = get_rpccred(nfsi->layout->plh_lc_cred); 2470 nfs_fattr_init(&data->fattr); 2471 data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask; 2472 data->res.fattr = &data->fattr; 2473 if (end_pos != 0) 2474 data->args.lastbytewritten = end_pos - 1; 2475 else 2476 data->args.lastbytewritten = U64_MAX; 2477 data->res.server = NFS_SERVER(inode); 2478 2479 if (ld->prepare_layoutcommit) { 2480 status = ld->prepare_layoutcommit(&data->args); 2481 if (status) { 2482 put_rpccred(data->cred); 2483 spin_lock(&inode->i_lock); 2484 set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags); 2485 if (end_pos > nfsi->layout->plh_lwb) 2486 nfsi->layout->plh_lwb = end_pos; 2487 goto out_unlock; 2488 } 2489 } 2490 2491 2492 status = nfs4_proc_layoutcommit(data, sync); 2493 out: 2494 if (status) 2495 mark_inode_dirty_sync(inode); 2496 dprintk("<-- %s status %d\n", __func__, status); 2497 return status; 2498 out_unlock: 2499 spin_unlock(&inode->i_lock); 2500 kfree(data); 2501 clear_layoutcommitting: 2502 pnfs_clear_layoutcommitting(inode); 2503 goto out; 2504 } 2505 EXPORT_SYMBOL_GPL(pnfs_layoutcommit_inode); 2506 2507 int 2508 pnfs_generic_sync(struct inode *inode, bool datasync) 2509 { 2510 return pnfs_layoutcommit_inode(inode, true); 2511 } 2512 EXPORT_SYMBOL_GPL(pnfs_generic_sync); 2513 2514 struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) 2515 { 2516 struct nfs4_threshold *thp; 2517 2518 thp = kzalloc(sizeof(*thp), GFP_NOFS); 2519 if (!thp) { 2520 dprintk("%s mdsthreshold allocation failed\n", __func__); 2521 return NULL; 2522 } 2523 return thp; 2524 } 2525 2526 #if IS_ENABLED(CONFIG_NFS_V4_2) 2527 int 2528 pnfs_report_layoutstat(struct inode *inode, gfp_t gfp_flags) 2529 { 2530 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 2531 struct nfs_server *server = NFS_SERVER(inode); 2532 struct nfs_inode *nfsi = NFS_I(inode); 2533 struct nfs42_layoutstat_data *data; 2534 struct pnfs_layout_hdr *hdr; 2535 int status = 0; 2536 2537 if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats) 2538 goto out; 2539 2540 if (!nfs_server_capable(inode, NFS_CAP_LAYOUTSTATS)) 2541 goto out; 2542 2543 if (test_and_set_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags)) 2544 goto out; 2545 2546 spin_lock(&inode->i_lock); 2547 if (!NFS_I(inode)->layout) { 2548 spin_unlock(&inode->i_lock); 2549 goto out_clear_layoutstats; 2550 } 2551 hdr = NFS_I(inode)->layout; 2552 pnfs_get_layout_hdr(hdr); 2553 spin_unlock(&inode->i_lock); 2554 2555 data = kzalloc(sizeof(*data), gfp_flags); 2556 if (!data) { 2557 status = -ENOMEM; 2558 goto out_put; 2559 } 2560 2561 data->args.fh = NFS_FH(inode); 2562 data->args.inode = inode; 2563 status = ld->prepare_layoutstats(&data->args); 2564 if (status) 2565 goto out_free; 2566 2567 status = nfs42_proc_layoutstats_generic(NFS_SERVER(inode), data); 2568 2569 out: 2570 dprintk("%s returns %d\n", __func__, status); 2571 return status; 2572 2573 out_free: 2574 kfree(data); 2575 out_put: 2576 pnfs_put_layout_hdr(hdr); 2577 out_clear_layoutstats: 2578 smp_mb__before_atomic(); 2579 clear_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags); 2580 smp_mb__after_atomic(); 2581 goto out; 2582 } 2583 EXPORT_SYMBOL_GPL(pnfs_report_layoutstat); 2584 #endif 2585 2586 unsigned int layoutstats_timer; 2587 module_param(layoutstats_timer, uint, 0644); 2588 EXPORT_SYMBOL_GPL(layoutstats_timer); 2589