1 /* 2 * Module for pnfs flexfile layout driver. 3 * 4 * Copyright (c) 2014, Primary Data, Inc. All rights reserved. 5 * 6 * Tao Peng <bergwolf@primarydata.com> 7 */ 8 9 #include <linux/nfs_fs.h> 10 #include <linux/nfs_page.h> 11 #include <linux/module.h> 12 #include <linux/sched/mm.h> 13 14 #include <linux/sunrpc/metrics.h> 15 16 #include "flexfilelayout.h" 17 #include "../nfs4session.h" 18 #include "../nfs4idmap.h" 19 #include "../internal.h" 20 #include "../delegation.h" 21 #include "../nfs4trace.h" 22 #include "../iostat.h" 23 #include "../nfs.h" 24 #include "../nfs42.h" 25 26 #define NFSDBG_FACILITY NFSDBG_PNFS_LD 27 28 #define FF_LAYOUT_POLL_RETRY_MAX (15*HZ) 29 #define FF_LAYOUTRETURN_MAXERR 20 30 31 static unsigned short io_maxretrans; 32 33 static void ff_layout_read_record_layoutstats_done(struct rpc_task *task, 34 struct nfs_pgio_header *hdr); 35 static int ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, 36 struct nfs42_layoutstat_devinfo *devinfo, 37 int dev_limit); 38 static void ff_layout_encode_ff_layoutupdate(struct xdr_stream *xdr, 39 const struct nfs42_layoutstat_devinfo *devinfo, 40 struct nfs4_ff_layout_mirror *mirror); 41 42 static struct pnfs_layout_hdr * 43 ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) 44 { 45 struct nfs4_flexfile_layout *ffl; 46 47 ffl = kzalloc(sizeof(*ffl), gfp_flags); 48 if (ffl) { 49 INIT_LIST_HEAD(&ffl->error_list); 50 INIT_LIST_HEAD(&ffl->mirrors); 51 ffl->last_report_time = ktime_get(); 52 return &ffl->generic_hdr; 53 } else 54 return NULL; 55 } 56 57 static void 58 ff_layout_free_layout_hdr(struct pnfs_layout_hdr *lo) 59 { 60 struct nfs4_ff_layout_ds_err *err, *n; 61 62 list_for_each_entry_safe(err, n, &FF_LAYOUT_FROM_HDR(lo)->error_list, 63 list) { 64 list_del(&err->list); 65 kfree(err); 66 } 67 kfree(FF_LAYOUT_FROM_HDR(lo)); 68 } 69 70 static int decode_pnfs_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) 71 { 72 __be32 *p; 73 74 p = xdr_inline_decode(xdr, NFS4_STATEID_SIZE); 75 if (unlikely(p == NULL)) 76 return -ENOBUFS; 77 stateid->type = NFS4_PNFS_DS_STATEID_TYPE; 78 memcpy(stateid->data, p, NFS4_STATEID_SIZE); 79 dprintk("%s: stateid id= [%x%x%x%x]\n", __func__, 80 p[0], p[1], p[2], p[3]); 81 return 0; 82 } 83 84 static int decode_deviceid(struct xdr_stream *xdr, struct nfs4_deviceid *devid) 85 { 86 __be32 *p; 87 88 p = xdr_inline_decode(xdr, NFS4_DEVICEID4_SIZE); 89 if (unlikely(!p)) 90 return -ENOBUFS; 91 memcpy(devid, p, NFS4_DEVICEID4_SIZE); 92 nfs4_print_deviceid(devid); 93 return 0; 94 } 95 96 static int decode_nfs_fh(struct xdr_stream *xdr, struct nfs_fh *fh) 97 { 98 __be32 *p; 99 100 p = xdr_inline_decode(xdr, 4); 101 if (unlikely(!p)) 102 return -ENOBUFS; 103 fh->size = be32_to_cpup(p++); 104 if (fh->size > sizeof(struct nfs_fh)) { 105 printk(KERN_ERR "NFS flexfiles: Too big fh received %d\n", 106 fh->size); 107 return -EOVERFLOW; 108 } 109 /* fh.data */ 110 p = xdr_inline_decode(xdr, fh->size); 111 if (unlikely(!p)) 112 return -ENOBUFS; 113 memcpy(&fh->data, p, fh->size); 114 dprintk("%s: fh len %d\n", __func__, fh->size); 115 116 return 0; 117 } 118 119 /* 120 * Currently only stringified uids and gids are accepted. 121 * I.e., kerberos is not supported to the DSes, so no pricipals. 122 * 123 * That means that one common function will suffice, but when 124 * principals are added, this should be split to accomodate 125 * calls to both nfs_map_name_to_uid() and nfs_map_group_to_gid(). 126 */ 127 static int 128 decode_name(struct xdr_stream *xdr, u32 *id) 129 { 130 __be32 *p; 131 int len; 132 133 /* opaque_length(4)*/ 134 p = xdr_inline_decode(xdr, 4); 135 if (unlikely(!p)) 136 return -ENOBUFS; 137 len = be32_to_cpup(p++); 138 if (len < 0) 139 return -EINVAL; 140 141 dprintk("%s: len %u\n", __func__, len); 142 143 /* opaque body */ 144 p = xdr_inline_decode(xdr, len); 145 if (unlikely(!p)) 146 return -ENOBUFS; 147 148 if (!nfs_map_string_to_numeric((char *)p, len, id)) 149 return -EINVAL; 150 151 return 0; 152 } 153 154 static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1, 155 const struct nfs4_ff_layout_mirror *m2) 156 { 157 int i, j; 158 159 if (m1->fh_versions_cnt != m2->fh_versions_cnt) 160 return false; 161 for (i = 0; i < m1->fh_versions_cnt; i++) { 162 bool found_fh = false; 163 for (j = 0; j < m2->fh_versions_cnt; j++) { 164 if (nfs_compare_fh(&m1->fh_versions[i], 165 &m2->fh_versions[j]) == 0) { 166 found_fh = true; 167 break; 168 } 169 } 170 if (!found_fh) 171 return false; 172 } 173 return true; 174 } 175 176 static struct nfs4_ff_layout_mirror * 177 ff_layout_add_mirror(struct pnfs_layout_hdr *lo, 178 struct nfs4_ff_layout_mirror *mirror) 179 { 180 struct nfs4_flexfile_layout *ff_layout = FF_LAYOUT_FROM_HDR(lo); 181 struct nfs4_ff_layout_mirror *pos; 182 struct inode *inode = lo->plh_inode; 183 184 spin_lock(&inode->i_lock); 185 list_for_each_entry(pos, &ff_layout->mirrors, mirrors) { 186 if (memcmp(&mirror->devid, &pos->devid, sizeof(pos->devid)) != 0) 187 continue; 188 if (!ff_mirror_match_fh(mirror, pos)) 189 continue; 190 if (refcount_inc_not_zero(&pos->ref)) { 191 spin_unlock(&inode->i_lock); 192 return pos; 193 } 194 } 195 list_add(&mirror->mirrors, &ff_layout->mirrors); 196 mirror->layout = lo; 197 spin_unlock(&inode->i_lock); 198 return mirror; 199 } 200 201 static void 202 ff_layout_remove_mirror(struct nfs4_ff_layout_mirror *mirror) 203 { 204 struct inode *inode; 205 if (mirror->layout == NULL) 206 return; 207 inode = mirror->layout->plh_inode; 208 spin_lock(&inode->i_lock); 209 list_del(&mirror->mirrors); 210 spin_unlock(&inode->i_lock); 211 mirror->layout = NULL; 212 } 213 214 static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags) 215 { 216 struct nfs4_ff_layout_mirror *mirror; 217 218 mirror = kzalloc(sizeof(*mirror), gfp_flags); 219 if (mirror != NULL) { 220 spin_lock_init(&mirror->lock); 221 refcount_set(&mirror->ref, 1); 222 INIT_LIST_HEAD(&mirror->mirrors); 223 } 224 return mirror; 225 } 226 227 static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror) 228 { 229 const struct cred *cred; 230 231 ff_layout_remove_mirror(mirror); 232 kfree(mirror->fh_versions); 233 cred = rcu_access_pointer(mirror->ro_cred); 234 put_cred(cred); 235 cred = rcu_access_pointer(mirror->rw_cred); 236 put_cred(cred); 237 nfs4_ff_layout_put_deviceid(mirror->mirror_ds); 238 kfree(mirror); 239 } 240 241 static void ff_layout_put_mirror(struct nfs4_ff_layout_mirror *mirror) 242 { 243 if (mirror != NULL && refcount_dec_and_test(&mirror->ref)) 244 ff_layout_free_mirror(mirror); 245 } 246 247 static void ff_layout_free_mirror_array(struct nfs4_ff_layout_segment *fls) 248 { 249 int i; 250 251 if (fls->mirror_array) { 252 for (i = 0; i < fls->mirror_array_cnt; i++) { 253 /* normally mirror_ds is freed in 254 * .free_deviceid_node but we still do it here 255 * for .alloc_lseg error path */ 256 ff_layout_put_mirror(fls->mirror_array[i]); 257 } 258 kfree(fls->mirror_array); 259 fls->mirror_array = NULL; 260 } 261 } 262 263 static int ff_layout_check_layout(struct nfs4_layoutget_res *lgr) 264 { 265 int ret = 0; 266 267 dprintk("--> %s\n", __func__); 268 269 /* FIXME: remove this check when layout segment support is added */ 270 if (lgr->range.offset != 0 || 271 lgr->range.length != NFS4_MAX_UINT64) { 272 dprintk("%s Only whole file layouts supported. Use MDS i/o\n", 273 __func__); 274 ret = -EINVAL; 275 } 276 277 dprintk("--> %s returns %d\n", __func__, ret); 278 return ret; 279 } 280 281 static void _ff_layout_free_lseg(struct nfs4_ff_layout_segment *fls) 282 { 283 if (fls) { 284 ff_layout_free_mirror_array(fls); 285 kfree(fls); 286 } 287 } 288 289 static bool 290 ff_lseg_range_is_after(const struct pnfs_layout_range *l1, 291 const struct pnfs_layout_range *l2) 292 { 293 u64 end1, end2; 294 295 if (l1->iomode != l2->iomode) 296 return l1->iomode != IOMODE_READ; 297 end1 = pnfs_calc_offset_end(l1->offset, l1->length); 298 end2 = pnfs_calc_offset_end(l2->offset, l2->length); 299 if (end1 < l2->offset) 300 return false; 301 if (end2 < l1->offset) 302 return true; 303 return l2->offset <= l1->offset; 304 } 305 306 static bool 307 ff_lseg_merge(struct pnfs_layout_segment *new, 308 struct pnfs_layout_segment *old) 309 { 310 u64 new_end, old_end; 311 312 if (test_bit(NFS_LSEG_LAYOUTRETURN, &old->pls_flags)) 313 return false; 314 if (new->pls_range.iomode != old->pls_range.iomode) 315 return false; 316 old_end = pnfs_calc_offset_end(old->pls_range.offset, 317 old->pls_range.length); 318 if (old_end < new->pls_range.offset) 319 return false; 320 new_end = pnfs_calc_offset_end(new->pls_range.offset, 321 new->pls_range.length); 322 if (new_end < old->pls_range.offset) 323 return false; 324 325 /* Mergeable: copy info from 'old' to 'new' */ 326 if (new_end < old_end) 327 new_end = old_end; 328 if (new->pls_range.offset < old->pls_range.offset) 329 new->pls_range.offset = old->pls_range.offset; 330 new->pls_range.length = pnfs_calc_offset_length(new->pls_range.offset, 331 new_end); 332 if (test_bit(NFS_LSEG_ROC, &old->pls_flags)) 333 set_bit(NFS_LSEG_ROC, &new->pls_flags); 334 return true; 335 } 336 337 static void 338 ff_layout_add_lseg(struct pnfs_layout_hdr *lo, 339 struct pnfs_layout_segment *lseg, 340 struct list_head *free_me) 341 { 342 pnfs_generic_layout_insert_lseg(lo, lseg, 343 ff_lseg_range_is_after, 344 ff_lseg_merge, 345 free_me); 346 } 347 348 static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls) 349 { 350 int i, j; 351 352 for (i = 0; i < fls->mirror_array_cnt - 1; i++) { 353 for (j = i + 1; j < fls->mirror_array_cnt; j++) 354 if (fls->mirror_array[i]->efficiency < 355 fls->mirror_array[j]->efficiency) 356 swap(fls->mirror_array[i], 357 fls->mirror_array[j]); 358 } 359 } 360 361 static struct pnfs_layout_segment * 362 ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, 363 struct nfs4_layoutget_res *lgr, 364 gfp_t gfp_flags) 365 { 366 struct pnfs_layout_segment *ret; 367 struct nfs4_ff_layout_segment *fls = NULL; 368 struct xdr_stream stream; 369 struct xdr_buf buf; 370 struct page *scratch; 371 u64 stripe_unit; 372 u32 mirror_array_cnt; 373 __be32 *p; 374 int i, rc; 375 376 dprintk("--> %s\n", __func__); 377 scratch = alloc_page(gfp_flags); 378 if (!scratch) 379 return ERR_PTR(-ENOMEM); 380 381 xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, 382 lgr->layoutp->len); 383 xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); 384 385 /* stripe unit and mirror_array_cnt */ 386 rc = -EIO; 387 p = xdr_inline_decode(&stream, 8 + 4); 388 if (!p) 389 goto out_err_free; 390 391 p = xdr_decode_hyper(p, &stripe_unit); 392 mirror_array_cnt = be32_to_cpup(p++); 393 dprintk("%s: stripe_unit=%llu mirror_array_cnt=%u\n", __func__, 394 stripe_unit, mirror_array_cnt); 395 396 if (mirror_array_cnt > NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT || 397 mirror_array_cnt == 0) 398 goto out_err_free; 399 400 rc = -ENOMEM; 401 fls = kzalloc(sizeof(*fls), gfp_flags); 402 if (!fls) 403 goto out_err_free; 404 405 fls->mirror_array_cnt = mirror_array_cnt; 406 fls->stripe_unit = stripe_unit; 407 fls->mirror_array = kcalloc(fls->mirror_array_cnt, 408 sizeof(fls->mirror_array[0]), gfp_flags); 409 if (fls->mirror_array == NULL) 410 goto out_err_free; 411 412 for (i = 0; i < fls->mirror_array_cnt; i++) { 413 struct nfs4_ff_layout_mirror *mirror; 414 struct cred *kcred; 415 const struct cred __rcu *cred; 416 kuid_t uid; 417 kgid_t gid; 418 u32 ds_count, fh_count, id; 419 int j; 420 421 rc = -EIO; 422 p = xdr_inline_decode(&stream, 4); 423 if (!p) 424 goto out_err_free; 425 ds_count = be32_to_cpup(p); 426 427 /* FIXME: allow for striping? */ 428 if (ds_count != 1) 429 goto out_err_free; 430 431 fls->mirror_array[i] = ff_layout_alloc_mirror(gfp_flags); 432 if (fls->mirror_array[i] == NULL) { 433 rc = -ENOMEM; 434 goto out_err_free; 435 } 436 437 fls->mirror_array[i]->ds_count = ds_count; 438 439 /* deviceid */ 440 rc = decode_deviceid(&stream, &fls->mirror_array[i]->devid); 441 if (rc) 442 goto out_err_free; 443 444 /* efficiency */ 445 rc = -EIO; 446 p = xdr_inline_decode(&stream, 4); 447 if (!p) 448 goto out_err_free; 449 fls->mirror_array[i]->efficiency = be32_to_cpup(p); 450 451 /* stateid */ 452 rc = decode_pnfs_stateid(&stream, &fls->mirror_array[i]->stateid); 453 if (rc) 454 goto out_err_free; 455 456 /* fh */ 457 rc = -EIO; 458 p = xdr_inline_decode(&stream, 4); 459 if (!p) 460 goto out_err_free; 461 fh_count = be32_to_cpup(p); 462 463 fls->mirror_array[i]->fh_versions = 464 kcalloc(fh_count, sizeof(struct nfs_fh), 465 gfp_flags); 466 if (fls->mirror_array[i]->fh_versions == NULL) { 467 rc = -ENOMEM; 468 goto out_err_free; 469 } 470 471 for (j = 0; j < fh_count; j++) { 472 rc = decode_nfs_fh(&stream, 473 &fls->mirror_array[i]->fh_versions[j]); 474 if (rc) 475 goto out_err_free; 476 } 477 478 fls->mirror_array[i]->fh_versions_cnt = fh_count; 479 480 /* user */ 481 rc = decode_name(&stream, &id); 482 if (rc) 483 goto out_err_free; 484 485 uid = make_kuid(&init_user_ns, id); 486 487 /* group */ 488 rc = decode_name(&stream, &id); 489 if (rc) 490 goto out_err_free; 491 492 gid = make_kgid(&init_user_ns, id); 493 494 if (gfp_flags & __GFP_FS) 495 kcred = prepare_kernel_cred(NULL); 496 else { 497 unsigned int nofs_flags = memalloc_nofs_save(); 498 kcred = prepare_kernel_cred(NULL); 499 memalloc_nofs_restore(nofs_flags); 500 } 501 rc = -ENOMEM; 502 if (!kcred) 503 goto out_err_free; 504 kcred->fsuid = uid; 505 kcred->fsgid = gid; 506 cred = RCU_INITIALIZER(kcred); 507 508 if (lgr->range.iomode == IOMODE_READ) 509 rcu_assign_pointer(fls->mirror_array[i]->ro_cred, cred); 510 else 511 rcu_assign_pointer(fls->mirror_array[i]->rw_cred, cred); 512 513 mirror = ff_layout_add_mirror(lh, fls->mirror_array[i]); 514 if (mirror != fls->mirror_array[i]) { 515 /* swap cred ptrs so free_mirror will clean up old */ 516 if (lgr->range.iomode == IOMODE_READ) { 517 cred = xchg(&mirror->ro_cred, cred); 518 rcu_assign_pointer(fls->mirror_array[i]->ro_cred, cred); 519 } else { 520 cred = xchg(&mirror->rw_cred, cred); 521 rcu_assign_pointer(fls->mirror_array[i]->rw_cred, cred); 522 } 523 ff_layout_free_mirror(fls->mirror_array[i]); 524 fls->mirror_array[i] = mirror; 525 } 526 527 dprintk("%s: iomode %s uid %u gid %u\n", __func__, 528 lgr->range.iomode == IOMODE_READ ? "READ" : "RW", 529 from_kuid(&init_user_ns, uid), 530 from_kgid(&init_user_ns, gid)); 531 } 532 533 p = xdr_inline_decode(&stream, 4); 534 if (!p) 535 goto out_sort_mirrors; 536 fls->flags = be32_to_cpup(p); 537 538 p = xdr_inline_decode(&stream, 4); 539 if (!p) 540 goto out_sort_mirrors; 541 for (i=0; i < fls->mirror_array_cnt; i++) 542 fls->mirror_array[i]->report_interval = be32_to_cpup(p); 543 544 out_sort_mirrors: 545 ff_layout_sort_mirrors(fls); 546 rc = ff_layout_check_layout(lgr); 547 if (rc) 548 goto out_err_free; 549 ret = &fls->generic_hdr; 550 dprintk("<-- %s (success)\n", __func__); 551 out_free_page: 552 __free_page(scratch); 553 return ret; 554 out_err_free: 555 _ff_layout_free_lseg(fls); 556 ret = ERR_PTR(rc); 557 dprintk("<-- %s (%d)\n", __func__, rc); 558 goto out_free_page; 559 } 560 561 static bool ff_layout_has_rw_segments(struct pnfs_layout_hdr *layout) 562 { 563 struct pnfs_layout_segment *lseg; 564 565 list_for_each_entry(lseg, &layout->plh_segs, pls_list) 566 if (lseg->pls_range.iomode == IOMODE_RW) 567 return true; 568 569 return false; 570 } 571 572 static void 573 ff_layout_free_lseg(struct pnfs_layout_segment *lseg) 574 { 575 struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); 576 577 dprintk("--> %s\n", __func__); 578 579 if (lseg->pls_range.iomode == IOMODE_RW) { 580 struct nfs4_flexfile_layout *ffl; 581 struct inode *inode; 582 583 ffl = FF_LAYOUT_FROM_HDR(lseg->pls_layout); 584 inode = ffl->generic_hdr.plh_inode; 585 spin_lock(&inode->i_lock); 586 if (!ff_layout_has_rw_segments(lseg->pls_layout)) { 587 ffl->commit_info.nbuckets = 0; 588 kfree(ffl->commit_info.buckets); 589 ffl->commit_info.buckets = NULL; 590 } 591 spin_unlock(&inode->i_lock); 592 } 593 _ff_layout_free_lseg(fls); 594 } 595 596 /* Return 1 until we have multiple lsegs support */ 597 static int 598 ff_layout_get_lseg_count(struct nfs4_ff_layout_segment *fls) 599 { 600 return 1; 601 } 602 603 static void 604 nfs4_ff_start_busy_timer(struct nfs4_ff_busy_timer *timer, ktime_t now) 605 { 606 /* first IO request? */ 607 if (atomic_inc_return(&timer->n_ops) == 1) { 608 timer->start_time = now; 609 } 610 } 611 612 static ktime_t 613 nfs4_ff_end_busy_timer(struct nfs4_ff_busy_timer *timer, ktime_t now) 614 { 615 ktime_t start; 616 617 if (atomic_dec_return(&timer->n_ops) < 0) 618 WARN_ON_ONCE(1); 619 620 start = timer->start_time; 621 timer->start_time = now; 622 return ktime_sub(now, start); 623 } 624 625 static bool 626 nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror, 627 struct nfs4_ff_layoutstat *layoutstat, 628 ktime_t now) 629 { 630 s64 report_interval = FF_LAYOUTSTATS_REPORT_INTERVAL; 631 struct nfs4_flexfile_layout *ffl = FF_LAYOUT_FROM_HDR(mirror->layout); 632 633 nfs4_ff_start_busy_timer(&layoutstat->busy_timer, now); 634 if (!mirror->start_time) 635 mirror->start_time = now; 636 if (mirror->report_interval != 0) 637 report_interval = (s64)mirror->report_interval * 1000LL; 638 else if (layoutstats_timer != 0) 639 report_interval = (s64)layoutstats_timer * 1000LL; 640 if (ktime_to_ms(ktime_sub(now, ffl->last_report_time)) >= 641 report_interval) { 642 ffl->last_report_time = now; 643 return true; 644 } 645 646 return false; 647 } 648 649 static void 650 nfs4_ff_layout_stat_io_update_requested(struct nfs4_ff_layoutstat *layoutstat, 651 __u64 requested) 652 { 653 struct nfs4_ff_io_stat *iostat = &layoutstat->io_stat; 654 655 iostat->ops_requested++; 656 iostat->bytes_requested += requested; 657 } 658 659 static void 660 nfs4_ff_layout_stat_io_update_completed(struct nfs4_ff_layoutstat *layoutstat, 661 __u64 requested, 662 __u64 completed, 663 ktime_t time_completed, 664 ktime_t time_started) 665 { 666 struct nfs4_ff_io_stat *iostat = &layoutstat->io_stat; 667 ktime_t completion_time = ktime_sub(time_completed, time_started); 668 ktime_t timer; 669 670 iostat->ops_completed++; 671 iostat->bytes_completed += completed; 672 iostat->bytes_not_delivered += requested - completed; 673 674 timer = nfs4_ff_end_busy_timer(&layoutstat->busy_timer, time_completed); 675 iostat->total_busy_time = 676 ktime_add(iostat->total_busy_time, timer); 677 iostat->aggregate_completion_time = 678 ktime_add(iostat->aggregate_completion_time, 679 completion_time); 680 } 681 682 static void 683 nfs4_ff_layout_stat_io_start_read(struct inode *inode, 684 struct nfs4_ff_layout_mirror *mirror, 685 __u64 requested, ktime_t now) 686 { 687 bool report; 688 689 spin_lock(&mirror->lock); 690 report = nfs4_ff_layoutstat_start_io(mirror, &mirror->read_stat, now); 691 nfs4_ff_layout_stat_io_update_requested(&mirror->read_stat, requested); 692 set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags); 693 spin_unlock(&mirror->lock); 694 695 if (report) 696 pnfs_report_layoutstat(inode, GFP_KERNEL); 697 } 698 699 static void 700 nfs4_ff_layout_stat_io_end_read(struct rpc_task *task, 701 struct nfs4_ff_layout_mirror *mirror, 702 __u64 requested, 703 __u64 completed) 704 { 705 spin_lock(&mirror->lock); 706 nfs4_ff_layout_stat_io_update_completed(&mirror->read_stat, 707 requested, completed, 708 ktime_get(), task->tk_start); 709 set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags); 710 spin_unlock(&mirror->lock); 711 } 712 713 static void 714 nfs4_ff_layout_stat_io_start_write(struct inode *inode, 715 struct nfs4_ff_layout_mirror *mirror, 716 __u64 requested, ktime_t now) 717 { 718 bool report; 719 720 spin_lock(&mirror->lock); 721 report = nfs4_ff_layoutstat_start_io(mirror , &mirror->write_stat, now); 722 nfs4_ff_layout_stat_io_update_requested(&mirror->write_stat, requested); 723 set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags); 724 spin_unlock(&mirror->lock); 725 726 if (report) 727 pnfs_report_layoutstat(inode, GFP_NOIO); 728 } 729 730 static void 731 nfs4_ff_layout_stat_io_end_write(struct rpc_task *task, 732 struct nfs4_ff_layout_mirror *mirror, 733 __u64 requested, 734 __u64 completed, 735 enum nfs3_stable_how committed) 736 { 737 if (committed == NFS_UNSTABLE) 738 requested = completed = 0; 739 740 spin_lock(&mirror->lock); 741 nfs4_ff_layout_stat_io_update_completed(&mirror->write_stat, 742 requested, completed, ktime_get(), task->tk_start); 743 set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags); 744 spin_unlock(&mirror->lock); 745 } 746 747 static int 748 ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg, 749 struct nfs_commit_info *cinfo, 750 gfp_t gfp_flags) 751 { 752 struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); 753 struct pnfs_commit_bucket *buckets; 754 int size; 755 756 if (cinfo->ds->nbuckets != 0) { 757 /* This assumes there is only one RW lseg per file. 758 * To support multiple lseg per file, we need to 759 * change struct pnfs_commit_bucket to allow dynamic 760 * increasing nbuckets. 761 */ 762 return 0; 763 } 764 765 size = ff_layout_get_lseg_count(fls) * FF_LAYOUT_MIRROR_COUNT(lseg); 766 767 buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket), 768 gfp_flags); 769 if (!buckets) 770 return -ENOMEM; 771 else { 772 int i; 773 774 spin_lock(&cinfo->inode->i_lock); 775 if (cinfo->ds->nbuckets != 0) 776 kfree(buckets); 777 else { 778 cinfo->ds->buckets = buckets; 779 cinfo->ds->nbuckets = size; 780 for (i = 0; i < size; i++) { 781 INIT_LIST_HEAD(&buckets[i].written); 782 INIT_LIST_HEAD(&buckets[i].committing); 783 /* mark direct verifier as unset */ 784 buckets[i].direct_verf.committed = 785 NFS_INVALID_STABLE_HOW; 786 } 787 } 788 spin_unlock(&cinfo->inode->i_lock); 789 return 0; 790 } 791 } 792 793 static void 794 ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, int idx) 795 { 796 struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); 797 798 if (devid) 799 nfs4_mark_deviceid_unavailable(devid); 800 } 801 802 static void 803 ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, int idx) 804 { 805 struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); 806 807 if (devid) 808 nfs4_mark_deviceid_available(devid); 809 } 810 811 static struct nfs4_pnfs_ds * 812 ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg, 813 int start_idx, int *best_idx, 814 bool check_device) 815 { 816 struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); 817 struct nfs4_ff_layout_mirror *mirror; 818 struct nfs4_pnfs_ds *ds; 819 bool fail_return = false; 820 int idx; 821 822 /* mirrors are initially sorted by efficiency */ 823 for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) { 824 if (idx+1 == fls->mirror_array_cnt) 825 fail_return = !check_device; 826 827 mirror = FF_LAYOUT_COMP(lseg, idx); 828 ds = nfs4_ff_layout_prepare_ds(lseg, mirror, fail_return); 829 if (!ds) 830 continue; 831 832 if (check_device && 833 nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node)) 834 continue; 835 836 *best_idx = idx; 837 return ds; 838 } 839 840 return NULL; 841 } 842 843 static struct nfs4_pnfs_ds * 844 ff_layout_choose_any_ds_for_read(struct pnfs_layout_segment *lseg, 845 int start_idx, int *best_idx) 846 { 847 return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, false); 848 } 849 850 static struct nfs4_pnfs_ds * 851 ff_layout_choose_valid_ds_for_read(struct pnfs_layout_segment *lseg, 852 int start_idx, int *best_idx) 853 { 854 return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, true); 855 } 856 857 static struct nfs4_pnfs_ds * 858 ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg, 859 int start_idx, int *best_idx) 860 { 861 struct nfs4_pnfs_ds *ds; 862 863 ds = ff_layout_choose_valid_ds_for_read(lseg, start_idx, best_idx); 864 if (ds) 865 return ds; 866 return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx); 867 } 868 869 static void 870 ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio, 871 struct nfs_page *req, 872 bool strict_iomode) 873 { 874 pnfs_put_lseg(pgio->pg_lseg); 875 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 876 nfs_req_openctx(req), 877 0, 878 NFS4_MAX_UINT64, 879 IOMODE_READ, 880 strict_iomode, 881 GFP_KERNEL); 882 if (IS_ERR(pgio->pg_lseg)) { 883 pgio->pg_error = PTR_ERR(pgio->pg_lseg); 884 pgio->pg_lseg = NULL; 885 } 886 } 887 888 static void 889 ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, 890 struct nfs_page *req) 891 { 892 struct nfs_pgio_mirror *pgm; 893 struct nfs4_ff_layout_mirror *mirror; 894 struct nfs4_pnfs_ds *ds; 895 int ds_idx; 896 897 retry: 898 pnfs_generic_pg_check_layout(pgio); 899 /* Use full layout for now */ 900 if (!pgio->pg_lseg) { 901 ff_layout_pg_get_read(pgio, req, false); 902 if (!pgio->pg_lseg) 903 goto out_nolseg; 904 } 905 if (ff_layout_avoid_read_on_rw(pgio->pg_lseg)) { 906 ff_layout_pg_get_read(pgio, req, true); 907 if (!pgio->pg_lseg) 908 goto out_nolseg; 909 } 910 911 ds = ff_layout_choose_best_ds_for_read(pgio->pg_lseg, 0, &ds_idx); 912 if (!ds) { 913 if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg)) 914 goto out_mds; 915 pnfs_put_lseg(pgio->pg_lseg); 916 pgio->pg_lseg = NULL; 917 /* Sleep for 1 second before retrying */ 918 ssleep(1); 919 goto retry; 920 } 921 922 mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx); 923 924 pgio->pg_mirror_idx = ds_idx; 925 926 /* read always uses only one mirror - idx 0 for pgio layer */ 927 pgm = &pgio->pg_mirrors[0]; 928 pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize; 929 930 pgio->pg_maxretrans = io_maxretrans; 931 return; 932 out_nolseg: 933 if (pgio->pg_error < 0) 934 return; 935 out_mds: 936 pnfs_put_lseg(pgio->pg_lseg); 937 pgio->pg_lseg = NULL; 938 nfs_pageio_reset_read_mds(pgio); 939 } 940 941 static void 942 ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio, 943 struct nfs_page *req) 944 { 945 struct nfs4_ff_layout_mirror *mirror; 946 struct nfs_pgio_mirror *pgm; 947 struct nfs_commit_info cinfo; 948 struct nfs4_pnfs_ds *ds; 949 int i; 950 int status; 951 952 retry: 953 pnfs_generic_pg_check_layout(pgio); 954 if (!pgio->pg_lseg) { 955 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 956 nfs_req_openctx(req), 957 0, 958 NFS4_MAX_UINT64, 959 IOMODE_RW, 960 false, 961 GFP_NOFS); 962 if (IS_ERR(pgio->pg_lseg)) { 963 pgio->pg_error = PTR_ERR(pgio->pg_lseg); 964 pgio->pg_lseg = NULL; 965 return; 966 } 967 } 968 /* If no lseg, fall back to write through mds */ 969 if (pgio->pg_lseg == NULL) 970 goto out_mds; 971 972 nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq); 973 status = ff_layout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS); 974 if (status < 0) 975 goto out_mds; 976 977 /* Use a direct mapping of ds_idx to pgio mirror_idx */ 978 if (WARN_ON_ONCE(pgio->pg_mirror_count != 979 FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg))) 980 goto out_mds; 981 982 for (i = 0; i < pgio->pg_mirror_count; i++) { 983 mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i); 984 ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, mirror, true); 985 if (!ds) { 986 if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg)) 987 goto out_mds; 988 pnfs_put_lseg(pgio->pg_lseg); 989 pgio->pg_lseg = NULL; 990 /* Sleep for 1 second before retrying */ 991 ssleep(1); 992 goto retry; 993 } 994 pgm = &pgio->pg_mirrors[i]; 995 pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize; 996 } 997 998 pgio->pg_maxretrans = io_maxretrans; 999 return; 1000 1001 out_mds: 1002 pnfs_put_lseg(pgio->pg_lseg); 1003 pgio->pg_lseg = NULL; 1004 nfs_pageio_reset_write_mds(pgio); 1005 } 1006 1007 static unsigned int 1008 ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio, 1009 struct nfs_page *req) 1010 { 1011 if (!pgio->pg_lseg) { 1012 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1013 nfs_req_openctx(req), 1014 0, 1015 NFS4_MAX_UINT64, 1016 IOMODE_RW, 1017 false, 1018 GFP_NOFS); 1019 if (IS_ERR(pgio->pg_lseg)) { 1020 pgio->pg_error = PTR_ERR(pgio->pg_lseg); 1021 pgio->pg_lseg = NULL; 1022 goto out; 1023 } 1024 } 1025 if (pgio->pg_lseg) 1026 return FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg); 1027 1028 /* no lseg means that pnfs is not in use, so no mirroring here */ 1029 nfs_pageio_reset_write_mds(pgio); 1030 out: 1031 return 1; 1032 } 1033 1034 static const struct nfs_pageio_ops ff_layout_pg_read_ops = { 1035 .pg_init = ff_layout_pg_init_read, 1036 .pg_test = pnfs_generic_pg_test, 1037 .pg_doio = pnfs_generic_pg_readpages, 1038 .pg_cleanup = pnfs_generic_pg_cleanup, 1039 }; 1040 1041 static const struct nfs_pageio_ops ff_layout_pg_write_ops = { 1042 .pg_init = ff_layout_pg_init_write, 1043 .pg_test = pnfs_generic_pg_test, 1044 .pg_doio = pnfs_generic_pg_writepages, 1045 .pg_get_mirror_count = ff_layout_pg_get_mirror_count_write, 1046 .pg_cleanup = pnfs_generic_pg_cleanup, 1047 }; 1048 1049 static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs) 1050 { 1051 struct rpc_task *task = &hdr->task; 1052 1053 pnfs_layoutcommit_inode(hdr->inode, false); 1054 1055 if (retry_pnfs) { 1056 dprintk("%s Reset task %5u for i/o through pNFS " 1057 "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, 1058 hdr->task.tk_pid, 1059 hdr->inode->i_sb->s_id, 1060 (unsigned long long)NFS_FILEID(hdr->inode), 1061 hdr->args.count, 1062 (unsigned long long)hdr->args.offset); 1063 1064 hdr->completion_ops->reschedule_io(hdr); 1065 return; 1066 } 1067 1068 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 1069 dprintk("%s Reset task %5u for i/o through MDS " 1070 "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, 1071 hdr->task.tk_pid, 1072 hdr->inode->i_sb->s_id, 1073 (unsigned long long)NFS_FILEID(hdr->inode), 1074 hdr->args.count, 1075 (unsigned long long)hdr->args.offset); 1076 1077 task->tk_status = pnfs_write_done_resend_to_mds(hdr); 1078 } 1079 } 1080 1081 static void ff_layout_reset_read(struct nfs_pgio_header *hdr) 1082 { 1083 struct rpc_task *task = &hdr->task; 1084 1085 pnfs_layoutcommit_inode(hdr->inode, false); 1086 1087 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 1088 dprintk("%s Reset task %5u for i/o through MDS " 1089 "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, 1090 hdr->task.tk_pid, 1091 hdr->inode->i_sb->s_id, 1092 (unsigned long long)NFS_FILEID(hdr->inode), 1093 hdr->args.count, 1094 (unsigned long long)hdr->args.offset); 1095 1096 task->tk_status = pnfs_read_done_resend_to_mds(hdr); 1097 } 1098 } 1099 1100 static int ff_layout_async_handle_error_v4(struct rpc_task *task, 1101 struct nfs4_state *state, 1102 struct nfs_client *clp, 1103 struct pnfs_layout_segment *lseg, 1104 int idx) 1105 { 1106 struct pnfs_layout_hdr *lo = lseg->pls_layout; 1107 struct inode *inode = lo->plh_inode; 1108 struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); 1109 struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table; 1110 1111 switch (task->tk_status) { 1112 case -NFS4ERR_BADSESSION: 1113 case -NFS4ERR_BADSLOT: 1114 case -NFS4ERR_BAD_HIGH_SLOT: 1115 case -NFS4ERR_DEADSESSION: 1116 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 1117 case -NFS4ERR_SEQ_FALSE_RETRY: 1118 case -NFS4ERR_SEQ_MISORDERED: 1119 dprintk("%s ERROR %d, Reset session. Exchangeid " 1120 "flags 0x%x\n", __func__, task->tk_status, 1121 clp->cl_exchange_flags); 1122 nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); 1123 break; 1124 case -NFS4ERR_DELAY: 1125 case -NFS4ERR_GRACE: 1126 rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX); 1127 break; 1128 case -NFS4ERR_RETRY_UNCACHED_REP: 1129 break; 1130 case -EAGAIN: 1131 return -NFS4ERR_RESET_TO_PNFS; 1132 /* Invalidate Layout errors */ 1133 case -NFS4ERR_PNFS_NO_LAYOUT: 1134 case -ESTALE: /* mapped NFS4ERR_STALE */ 1135 case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */ 1136 case -EISDIR: /* mapped NFS4ERR_ISDIR */ 1137 case -NFS4ERR_FHEXPIRED: 1138 case -NFS4ERR_WRONG_TYPE: 1139 dprintk("%s Invalid layout error %d\n", __func__, 1140 task->tk_status); 1141 /* 1142 * Destroy layout so new i/o will get a new layout. 1143 * Layout will not be destroyed until all current lseg 1144 * references are put. Mark layout as invalid to resend failed 1145 * i/o and all i/o waiting on the slot table to the MDS until 1146 * layout is destroyed and a new valid layout is obtained. 1147 */ 1148 pnfs_destroy_layout(NFS_I(inode)); 1149 rpc_wake_up(&tbl->slot_tbl_waitq); 1150 goto reset; 1151 /* RPC connection errors */ 1152 case -ECONNREFUSED: 1153 case -EHOSTDOWN: 1154 case -EHOSTUNREACH: 1155 case -ENETUNREACH: 1156 case -EIO: 1157 case -ETIMEDOUT: 1158 case -EPIPE: 1159 dprintk("%s DS connection error %d\n", __func__, 1160 task->tk_status); 1161 nfs4_delete_deviceid(devid->ld, devid->nfs_client, 1162 &devid->deviceid); 1163 rpc_wake_up(&tbl->slot_tbl_waitq); 1164 /* fall through */ 1165 default: 1166 if (ff_layout_avoid_mds_available_ds(lseg)) 1167 return -NFS4ERR_RESET_TO_PNFS; 1168 reset: 1169 dprintk("%s Retry through MDS. Error %d\n", __func__, 1170 task->tk_status); 1171 return -NFS4ERR_RESET_TO_MDS; 1172 } 1173 task->tk_status = 0; 1174 return -EAGAIN; 1175 } 1176 1177 /* Retry all errors through either pNFS or MDS except for -EJUKEBOX */ 1178 static int ff_layout_async_handle_error_v3(struct rpc_task *task, 1179 struct pnfs_layout_segment *lseg, 1180 int idx) 1181 { 1182 struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); 1183 1184 switch (task->tk_status) { 1185 /* File access problems. Don't mark the device as unavailable */ 1186 case -EACCES: 1187 case -ESTALE: 1188 case -EISDIR: 1189 case -EBADHANDLE: 1190 case -ELOOP: 1191 case -ENOSPC: 1192 case -EAGAIN: 1193 break; 1194 case -EJUKEBOX: 1195 nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY); 1196 goto out_retry; 1197 default: 1198 dprintk("%s DS connection error %d\n", __func__, 1199 task->tk_status); 1200 nfs4_delete_deviceid(devid->ld, devid->nfs_client, 1201 &devid->deviceid); 1202 } 1203 /* FIXME: Need to prevent infinite looping here. */ 1204 return -NFS4ERR_RESET_TO_PNFS; 1205 out_retry: 1206 task->tk_status = 0; 1207 rpc_restart_call_prepare(task); 1208 rpc_delay(task, NFS_JUKEBOX_RETRY_TIME); 1209 return -EAGAIN; 1210 } 1211 1212 static int ff_layout_async_handle_error(struct rpc_task *task, 1213 struct nfs4_state *state, 1214 struct nfs_client *clp, 1215 struct pnfs_layout_segment *lseg, 1216 int idx) 1217 { 1218 int vers = clp->cl_nfs_mod->rpc_vers->number; 1219 1220 if (task->tk_status >= 0) { 1221 ff_layout_mark_ds_reachable(lseg, idx); 1222 return 0; 1223 } 1224 1225 /* Handle the case of an invalid layout segment */ 1226 if (!pnfs_is_valid_lseg(lseg)) 1227 return -NFS4ERR_RESET_TO_PNFS; 1228 1229 switch (vers) { 1230 case 3: 1231 return ff_layout_async_handle_error_v3(task, lseg, idx); 1232 case 4: 1233 return ff_layout_async_handle_error_v4(task, state, clp, 1234 lseg, idx); 1235 default: 1236 /* should never happen */ 1237 WARN_ON_ONCE(1); 1238 return 0; 1239 } 1240 } 1241 1242 static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, 1243 int idx, u64 offset, u64 length, 1244 u32 status, int opnum, int error) 1245 { 1246 struct nfs4_ff_layout_mirror *mirror; 1247 int err; 1248 1249 if (status == 0) { 1250 switch (error) { 1251 case -ETIMEDOUT: 1252 case -EPFNOSUPPORT: 1253 case -EPROTONOSUPPORT: 1254 case -EOPNOTSUPP: 1255 case -ECONNREFUSED: 1256 case -ECONNRESET: 1257 case -EHOSTDOWN: 1258 case -EHOSTUNREACH: 1259 case -ENETUNREACH: 1260 case -EADDRINUSE: 1261 case -ENOBUFS: 1262 case -EPIPE: 1263 case -EPERM: 1264 status = NFS4ERR_NXIO; 1265 break; 1266 case -EACCES: 1267 status = NFS4ERR_ACCESS; 1268 break; 1269 default: 1270 return; 1271 } 1272 } 1273 1274 switch (status) { 1275 case NFS4ERR_DELAY: 1276 case NFS4ERR_GRACE: 1277 return; 1278 default: 1279 break; 1280 } 1281 1282 mirror = FF_LAYOUT_COMP(lseg, idx); 1283 err = ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), 1284 mirror, offset, length, status, opnum, 1285 GFP_NOIO); 1286 if (status == NFS4ERR_NXIO) 1287 ff_layout_mark_ds_unreachable(lseg, idx); 1288 pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, lseg); 1289 dprintk("%s: err %d op %d status %u\n", __func__, err, opnum, status); 1290 } 1291 1292 /* NFS_PROTO call done callback routines */ 1293 static int ff_layout_read_done_cb(struct rpc_task *task, 1294 struct nfs_pgio_header *hdr) 1295 { 1296 int new_idx = hdr->pgio_mirror_idx; 1297 int err; 1298 1299 trace_nfs4_pnfs_read(hdr, task->tk_status); 1300 if (task->tk_status < 0) 1301 ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx, 1302 hdr->args.offset, hdr->args.count, 1303 hdr->res.op_status, OP_READ, 1304 task->tk_status); 1305 err = ff_layout_async_handle_error(task, hdr->args.context->state, 1306 hdr->ds_clp, hdr->lseg, 1307 hdr->pgio_mirror_idx); 1308 1309 clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags); 1310 clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags); 1311 switch (err) { 1312 case -NFS4ERR_RESET_TO_PNFS: 1313 if (ff_layout_choose_best_ds_for_read(hdr->lseg, 1314 hdr->pgio_mirror_idx + 1, 1315 &new_idx)) 1316 goto out_layouterror; 1317 set_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags); 1318 return task->tk_status; 1319 case -NFS4ERR_RESET_TO_MDS: 1320 set_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags); 1321 return task->tk_status; 1322 case -EAGAIN: 1323 goto out_eagain; 1324 } 1325 1326 return 0; 1327 out_layouterror: 1328 ff_layout_read_record_layoutstats_done(task, hdr); 1329 ff_layout_send_layouterror(hdr->lseg); 1330 hdr->pgio_mirror_idx = new_idx; 1331 out_eagain: 1332 rpc_restart_call_prepare(task); 1333 return -EAGAIN; 1334 } 1335 1336 static bool 1337 ff_layout_need_layoutcommit(struct pnfs_layout_segment *lseg) 1338 { 1339 return !(FF_LAYOUT_LSEG(lseg)->flags & FF_FLAGS_NO_LAYOUTCOMMIT); 1340 } 1341 1342 /* 1343 * We reference the rpc_cred of the first WRITE that triggers the need for 1344 * a LAYOUTCOMMIT, and use it to send the layoutcommit compound. 1345 * rfc5661 is not clear about which credential should be used. 1346 * 1347 * Flexlayout client should treat DS replied FILE_SYNC as DATA_SYNC, so 1348 * to follow http://www.rfc-editor.org/errata_search.php?rfc=5661&eid=2751 1349 * we always send layoutcommit after DS writes. 1350 */ 1351 static void 1352 ff_layout_set_layoutcommit(struct inode *inode, 1353 struct pnfs_layout_segment *lseg, 1354 loff_t end_offset) 1355 { 1356 if (!ff_layout_need_layoutcommit(lseg)) 1357 return; 1358 1359 pnfs_set_layoutcommit(inode, lseg, end_offset); 1360 dprintk("%s inode %lu pls_end_pos %llu\n", __func__, inode->i_ino, 1361 (unsigned long long) NFS_I(inode)->layout->plh_lwb); 1362 } 1363 1364 static void ff_layout_read_record_layoutstats_start(struct rpc_task *task, 1365 struct nfs_pgio_header *hdr) 1366 { 1367 if (test_and_set_bit(NFS_IOHDR_STAT, &hdr->flags)) 1368 return; 1369 nfs4_ff_layout_stat_io_start_read(hdr->inode, 1370 FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), 1371 hdr->args.count, 1372 task->tk_start); 1373 } 1374 1375 static void ff_layout_read_record_layoutstats_done(struct rpc_task *task, 1376 struct nfs_pgio_header *hdr) 1377 { 1378 if (!test_and_clear_bit(NFS_IOHDR_STAT, &hdr->flags)) 1379 return; 1380 nfs4_ff_layout_stat_io_end_read(task, 1381 FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), 1382 hdr->args.count, 1383 hdr->res.count); 1384 set_bit(NFS_LSEG_LAYOUTRETURN, &hdr->lseg->pls_flags); 1385 } 1386 1387 static int ff_layout_read_prepare_common(struct rpc_task *task, 1388 struct nfs_pgio_header *hdr) 1389 { 1390 if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { 1391 rpc_exit(task, -EIO); 1392 return -EIO; 1393 } 1394 1395 ff_layout_read_record_layoutstats_start(task, hdr); 1396 return 0; 1397 } 1398 1399 /* 1400 * Call ops for the async read/write cases 1401 * In the case of dense layouts, the offset needs to be reset to its 1402 * original value. 1403 */ 1404 static void ff_layout_read_prepare_v3(struct rpc_task *task, void *data) 1405 { 1406 struct nfs_pgio_header *hdr = data; 1407 1408 if (ff_layout_read_prepare_common(task, hdr)) 1409 return; 1410 1411 rpc_call_start(task); 1412 } 1413 1414 static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data) 1415 { 1416 struct nfs_pgio_header *hdr = data; 1417 1418 if (nfs4_setup_sequence(hdr->ds_clp, 1419 &hdr->args.seq_args, 1420 &hdr->res.seq_res, 1421 task)) 1422 return; 1423 1424 ff_layout_read_prepare_common(task, hdr); 1425 } 1426 1427 static void 1428 ff_layout_io_prepare_transmit(struct rpc_task *task, 1429 void *data) 1430 { 1431 struct nfs_pgio_header *hdr = data; 1432 1433 if (!pnfs_is_valid_lseg(hdr->lseg)) 1434 rpc_exit(task, -EAGAIN); 1435 } 1436 1437 static void ff_layout_read_call_done(struct rpc_task *task, void *data) 1438 { 1439 struct nfs_pgio_header *hdr = data; 1440 1441 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); 1442 1443 if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && 1444 task->tk_status == 0) { 1445 nfs4_sequence_done(task, &hdr->res.seq_res); 1446 return; 1447 } 1448 1449 /* Note this may cause RPC to be resent */ 1450 hdr->mds_ops->rpc_call_done(task, hdr); 1451 } 1452 1453 static void ff_layout_read_count_stats(struct rpc_task *task, void *data) 1454 { 1455 struct nfs_pgio_header *hdr = data; 1456 1457 ff_layout_read_record_layoutstats_done(task, hdr); 1458 rpc_count_iostats_metrics(task, 1459 &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_READ]); 1460 } 1461 1462 static void ff_layout_read_release(void *data) 1463 { 1464 struct nfs_pgio_header *hdr = data; 1465 1466 ff_layout_read_record_layoutstats_done(&hdr->task, hdr); 1467 if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags)) { 1468 ff_layout_send_layouterror(hdr->lseg); 1469 pnfs_read_resend_pnfs(hdr); 1470 } else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags)) 1471 ff_layout_reset_read(hdr); 1472 pnfs_generic_rw_release(data); 1473 } 1474 1475 1476 static int ff_layout_write_done_cb(struct rpc_task *task, 1477 struct nfs_pgio_header *hdr) 1478 { 1479 loff_t end_offs = 0; 1480 int err; 1481 1482 trace_nfs4_pnfs_write(hdr, task->tk_status); 1483 if (task->tk_status < 0) 1484 ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx, 1485 hdr->args.offset, hdr->args.count, 1486 hdr->res.op_status, OP_WRITE, 1487 task->tk_status); 1488 err = ff_layout_async_handle_error(task, hdr->args.context->state, 1489 hdr->ds_clp, hdr->lseg, 1490 hdr->pgio_mirror_idx); 1491 1492 clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags); 1493 clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags); 1494 switch (err) { 1495 case -NFS4ERR_RESET_TO_PNFS: 1496 set_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags); 1497 return task->tk_status; 1498 case -NFS4ERR_RESET_TO_MDS: 1499 set_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags); 1500 return task->tk_status; 1501 case -EAGAIN: 1502 return -EAGAIN; 1503 } 1504 1505 if (hdr->res.verf->committed == NFS_FILE_SYNC || 1506 hdr->res.verf->committed == NFS_DATA_SYNC) 1507 end_offs = hdr->mds_offset + (loff_t)hdr->res.count; 1508 1509 /* Note: if the write is unstable, don't set end_offs until commit */ 1510 ff_layout_set_layoutcommit(hdr->inode, hdr->lseg, end_offs); 1511 1512 /* zero out fattr since we don't care DS attr at all */ 1513 hdr->fattr.valid = 0; 1514 if (task->tk_status >= 0) 1515 nfs_writeback_update_inode(hdr); 1516 1517 return 0; 1518 } 1519 1520 static int ff_layout_commit_done_cb(struct rpc_task *task, 1521 struct nfs_commit_data *data) 1522 { 1523 int err; 1524 1525 trace_nfs4_pnfs_commit_ds(data, task->tk_status); 1526 if (task->tk_status < 0) 1527 ff_layout_io_track_ds_error(data->lseg, data->ds_commit_index, 1528 data->args.offset, data->args.count, 1529 data->res.op_status, OP_COMMIT, 1530 task->tk_status); 1531 err = ff_layout_async_handle_error(task, NULL, data->ds_clp, 1532 data->lseg, data->ds_commit_index); 1533 1534 switch (err) { 1535 case -NFS4ERR_RESET_TO_PNFS: 1536 pnfs_generic_prepare_to_resend_writes(data); 1537 return -EAGAIN; 1538 case -NFS4ERR_RESET_TO_MDS: 1539 pnfs_generic_prepare_to_resend_writes(data); 1540 return -EAGAIN; 1541 case -EAGAIN: 1542 rpc_restart_call_prepare(task); 1543 return -EAGAIN; 1544 } 1545 1546 ff_layout_set_layoutcommit(data->inode, data->lseg, data->lwb); 1547 1548 return 0; 1549 } 1550 1551 static void ff_layout_write_record_layoutstats_start(struct rpc_task *task, 1552 struct nfs_pgio_header *hdr) 1553 { 1554 if (test_and_set_bit(NFS_IOHDR_STAT, &hdr->flags)) 1555 return; 1556 nfs4_ff_layout_stat_io_start_write(hdr->inode, 1557 FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), 1558 hdr->args.count, 1559 task->tk_start); 1560 } 1561 1562 static void ff_layout_write_record_layoutstats_done(struct rpc_task *task, 1563 struct nfs_pgio_header *hdr) 1564 { 1565 if (!test_and_clear_bit(NFS_IOHDR_STAT, &hdr->flags)) 1566 return; 1567 nfs4_ff_layout_stat_io_end_write(task, 1568 FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), 1569 hdr->args.count, hdr->res.count, 1570 hdr->res.verf->committed); 1571 set_bit(NFS_LSEG_LAYOUTRETURN, &hdr->lseg->pls_flags); 1572 } 1573 1574 static int ff_layout_write_prepare_common(struct rpc_task *task, 1575 struct nfs_pgio_header *hdr) 1576 { 1577 if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { 1578 rpc_exit(task, -EIO); 1579 return -EIO; 1580 } 1581 1582 ff_layout_write_record_layoutstats_start(task, hdr); 1583 return 0; 1584 } 1585 1586 static void ff_layout_write_prepare_v3(struct rpc_task *task, void *data) 1587 { 1588 struct nfs_pgio_header *hdr = data; 1589 1590 if (ff_layout_write_prepare_common(task, hdr)) 1591 return; 1592 1593 rpc_call_start(task); 1594 } 1595 1596 static void ff_layout_write_prepare_v4(struct rpc_task *task, void *data) 1597 { 1598 struct nfs_pgio_header *hdr = data; 1599 1600 if (nfs4_setup_sequence(hdr->ds_clp, 1601 &hdr->args.seq_args, 1602 &hdr->res.seq_res, 1603 task)) 1604 return; 1605 1606 ff_layout_write_prepare_common(task, hdr); 1607 } 1608 1609 static void ff_layout_write_call_done(struct rpc_task *task, void *data) 1610 { 1611 struct nfs_pgio_header *hdr = data; 1612 1613 if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && 1614 task->tk_status == 0) { 1615 nfs4_sequence_done(task, &hdr->res.seq_res); 1616 return; 1617 } 1618 1619 /* Note this may cause RPC to be resent */ 1620 hdr->mds_ops->rpc_call_done(task, hdr); 1621 } 1622 1623 static void ff_layout_write_count_stats(struct rpc_task *task, void *data) 1624 { 1625 struct nfs_pgio_header *hdr = data; 1626 1627 ff_layout_write_record_layoutstats_done(task, hdr); 1628 rpc_count_iostats_metrics(task, 1629 &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]); 1630 } 1631 1632 static void ff_layout_write_release(void *data) 1633 { 1634 struct nfs_pgio_header *hdr = data; 1635 1636 ff_layout_write_record_layoutstats_done(&hdr->task, hdr); 1637 if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags)) { 1638 ff_layout_send_layouterror(hdr->lseg); 1639 ff_layout_reset_write(hdr, true); 1640 } else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags)) 1641 ff_layout_reset_write(hdr, false); 1642 pnfs_generic_rw_release(data); 1643 } 1644 1645 static void ff_layout_commit_record_layoutstats_start(struct rpc_task *task, 1646 struct nfs_commit_data *cdata) 1647 { 1648 if (test_and_set_bit(NFS_IOHDR_STAT, &cdata->flags)) 1649 return; 1650 nfs4_ff_layout_stat_io_start_write(cdata->inode, 1651 FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), 1652 0, task->tk_start); 1653 } 1654 1655 static void ff_layout_commit_record_layoutstats_done(struct rpc_task *task, 1656 struct nfs_commit_data *cdata) 1657 { 1658 struct nfs_page *req; 1659 __u64 count = 0; 1660 1661 if (!test_and_clear_bit(NFS_IOHDR_STAT, &cdata->flags)) 1662 return; 1663 1664 if (task->tk_status == 0) { 1665 list_for_each_entry(req, &cdata->pages, wb_list) 1666 count += req->wb_bytes; 1667 } 1668 nfs4_ff_layout_stat_io_end_write(task, 1669 FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), 1670 count, count, NFS_FILE_SYNC); 1671 set_bit(NFS_LSEG_LAYOUTRETURN, &cdata->lseg->pls_flags); 1672 } 1673 1674 static void ff_layout_commit_prepare_common(struct rpc_task *task, 1675 struct nfs_commit_data *cdata) 1676 { 1677 ff_layout_commit_record_layoutstats_start(task, cdata); 1678 } 1679 1680 static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data) 1681 { 1682 ff_layout_commit_prepare_common(task, data); 1683 rpc_call_start(task); 1684 } 1685 1686 static void ff_layout_commit_prepare_v4(struct rpc_task *task, void *data) 1687 { 1688 struct nfs_commit_data *wdata = data; 1689 1690 if (nfs4_setup_sequence(wdata->ds_clp, 1691 &wdata->args.seq_args, 1692 &wdata->res.seq_res, 1693 task)) 1694 return; 1695 ff_layout_commit_prepare_common(task, data); 1696 } 1697 1698 static void ff_layout_commit_done(struct rpc_task *task, void *data) 1699 { 1700 pnfs_generic_write_commit_done(task, data); 1701 } 1702 1703 static void ff_layout_commit_count_stats(struct rpc_task *task, void *data) 1704 { 1705 struct nfs_commit_data *cdata = data; 1706 1707 ff_layout_commit_record_layoutstats_done(task, cdata); 1708 rpc_count_iostats_metrics(task, 1709 &NFS_CLIENT(cdata->inode)->cl_metrics[NFSPROC4_CLNT_COMMIT]); 1710 } 1711 1712 static void ff_layout_commit_release(void *data) 1713 { 1714 struct nfs_commit_data *cdata = data; 1715 1716 ff_layout_commit_record_layoutstats_done(&cdata->task, cdata); 1717 pnfs_generic_commit_release(data); 1718 } 1719 1720 static const struct rpc_call_ops ff_layout_read_call_ops_v3 = { 1721 .rpc_call_prepare = ff_layout_read_prepare_v3, 1722 .rpc_call_prepare_transmit = ff_layout_io_prepare_transmit, 1723 .rpc_call_done = ff_layout_read_call_done, 1724 .rpc_count_stats = ff_layout_read_count_stats, 1725 .rpc_release = ff_layout_read_release, 1726 }; 1727 1728 static const struct rpc_call_ops ff_layout_read_call_ops_v4 = { 1729 .rpc_call_prepare = ff_layout_read_prepare_v4, 1730 .rpc_call_prepare_transmit = ff_layout_io_prepare_transmit, 1731 .rpc_call_done = ff_layout_read_call_done, 1732 .rpc_count_stats = ff_layout_read_count_stats, 1733 .rpc_release = ff_layout_read_release, 1734 }; 1735 1736 static const struct rpc_call_ops ff_layout_write_call_ops_v3 = { 1737 .rpc_call_prepare = ff_layout_write_prepare_v3, 1738 .rpc_call_prepare_transmit = ff_layout_io_prepare_transmit, 1739 .rpc_call_done = ff_layout_write_call_done, 1740 .rpc_count_stats = ff_layout_write_count_stats, 1741 .rpc_release = ff_layout_write_release, 1742 }; 1743 1744 static const struct rpc_call_ops ff_layout_write_call_ops_v4 = { 1745 .rpc_call_prepare = ff_layout_write_prepare_v4, 1746 .rpc_call_prepare_transmit = ff_layout_io_prepare_transmit, 1747 .rpc_call_done = ff_layout_write_call_done, 1748 .rpc_count_stats = ff_layout_write_count_stats, 1749 .rpc_release = ff_layout_write_release, 1750 }; 1751 1752 static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = { 1753 .rpc_call_prepare = ff_layout_commit_prepare_v3, 1754 .rpc_call_done = ff_layout_commit_done, 1755 .rpc_count_stats = ff_layout_commit_count_stats, 1756 .rpc_release = ff_layout_commit_release, 1757 }; 1758 1759 static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = { 1760 .rpc_call_prepare = ff_layout_commit_prepare_v4, 1761 .rpc_call_done = ff_layout_commit_done, 1762 .rpc_count_stats = ff_layout_commit_count_stats, 1763 .rpc_release = ff_layout_commit_release, 1764 }; 1765 1766 static enum pnfs_try_status 1767 ff_layout_read_pagelist(struct nfs_pgio_header *hdr) 1768 { 1769 struct pnfs_layout_segment *lseg = hdr->lseg; 1770 struct nfs4_pnfs_ds *ds; 1771 struct rpc_clnt *ds_clnt; 1772 struct nfs4_ff_layout_mirror *mirror; 1773 const struct cred *ds_cred; 1774 loff_t offset = hdr->args.offset; 1775 u32 idx = hdr->pgio_mirror_idx; 1776 int vers; 1777 struct nfs_fh *fh; 1778 1779 dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n", 1780 __func__, hdr->inode->i_ino, 1781 hdr->args.pgbase, (size_t)hdr->args.count, offset); 1782 1783 mirror = FF_LAYOUT_COMP(lseg, idx); 1784 ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false); 1785 if (!ds) 1786 goto out_failed; 1787 1788 ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp, 1789 hdr->inode); 1790 if (IS_ERR(ds_clnt)) 1791 goto out_failed; 1792 1793 ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred); 1794 if (!ds_cred) 1795 goto out_failed; 1796 1797 vers = nfs4_ff_layout_ds_version(mirror); 1798 1799 dprintk("%s USE DS: %s cl_count %d vers %d\n", __func__, 1800 ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count), vers); 1801 1802 hdr->pgio_done_cb = ff_layout_read_done_cb; 1803 refcount_inc(&ds->ds_clp->cl_count); 1804 hdr->ds_clp = ds->ds_clp; 1805 fh = nfs4_ff_layout_select_ds_fh(mirror); 1806 if (fh) 1807 hdr->args.fh = fh; 1808 1809 nfs4_ff_layout_select_ds_stateid(mirror, &hdr->args.stateid); 1810 1811 /* 1812 * Note that if we ever decide to split across DSes, 1813 * then we may need to handle dense-like offsets. 1814 */ 1815 hdr->args.offset = offset; 1816 hdr->mds_offset = offset; 1817 1818 /* Perform an asynchronous read to ds */ 1819 nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops, 1820 vers == 3 ? &ff_layout_read_call_ops_v3 : 1821 &ff_layout_read_call_ops_v4, 1822 0, RPC_TASK_SOFTCONN); 1823 put_cred(ds_cred); 1824 return PNFS_ATTEMPTED; 1825 1826 out_failed: 1827 if (ff_layout_avoid_mds_available_ds(lseg)) 1828 return PNFS_TRY_AGAIN; 1829 return PNFS_NOT_ATTEMPTED; 1830 } 1831 1832 /* Perform async writes. */ 1833 static enum pnfs_try_status 1834 ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) 1835 { 1836 struct pnfs_layout_segment *lseg = hdr->lseg; 1837 struct nfs4_pnfs_ds *ds; 1838 struct rpc_clnt *ds_clnt; 1839 struct nfs4_ff_layout_mirror *mirror; 1840 const struct cred *ds_cred; 1841 loff_t offset = hdr->args.offset; 1842 int vers; 1843 struct nfs_fh *fh; 1844 int idx = hdr->pgio_mirror_idx; 1845 1846 mirror = FF_LAYOUT_COMP(lseg, idx); 1847 ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true); 1848 if (!ds) 1849 goto out_failed; 1850 1851 ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp, 1852 hdr->inode); 1853 if (IS_ERR(ds_clnt)) 1854 goto out_failed; 1855 1856 ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred); 1857 if (!ds_cred) 1858 goto out_failed; 1859 1860 vers = nfs4_ff_layout_ds_version(mirror); 1861 1862 dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d vers %d\n", 1863 __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count, 1864 offset, ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count), 1865 vers); 1866 1867 hdr->pgio_done_cb = ff_layout_write_done_cb; 1868 refcount_inc(&ds->ds_clp->cl_count); 1869 hdr->ds_clp = ds->ds_clp; 1870 hdr->ds_commit_idx = idx; 1871 fh = nfs4_ff_layout_select_ds_fh(mirror); 1872 if (fh) 1873 hdr->args.fh = fh; 1874 1875 nfs4_ff_layout_select_ds_stateid(mirror, &hdr->args.stateid); 1876 1877 /* 1878 * Note that if we ever decide to split across DSes, 1879 * then we may need to handle dense-like offsets. 1880 */ 1881 hdr->args.offset = offset; 1882 1883 /* Perform an asynchronous write */ 1884 nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops, 1885 vers == 3 ? &ff_layout_write_call_ops_v3 : 1886 &ff_layout_write_call_ops_v4, 1887 sync, RPC_TASK_SOFTCONN); 1888 put_cred(ds_cred); 1889 return PNFS_ATTEMPTED; 1890 1891 out_failed: 1892 if (ff_layout_avoid_mds_available_ds(lseg)) 1893 return PNFS_TRY_AGAIN; 1894 return PNFS_NOT_ATTEMPTED; 1895 } 1896 1897 static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) 1898 { 1899 return i; 1900 } 1901 1902 static struct nfs_fh * 1903 select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i) 1904 { 1905 struct nfs4_ff_layout_segment *flseg = FF_LAYOUT_LSEG(lseg); 1906 1907 /* FIXME: Assume that there is only one NFS version available 1908 * for the DS. 1909 */ 1910 return &flseg->mirror_array[i]->fh_versions[0]; 1911 } 1912 1913 static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) 1914 { 1915 struct pnfs_layout_segment *lseg = data->lseg; 1916 struct nfs4_pnfs_ds *ds; 1917 struct rpc_clnt *ds_clnt; 1918 struct nfs4_ff_layout_mirror *mirror; 1919 const struct cred *ds_cred; 1920 u32 idx; 1921 int vers, ret; 1922 struct nfs_fh *fh; 1923 1924 if (!lseg || !(pnfs_is_valid_lseg(lseg) || 1925 test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))) 1926 goto out_err; 1927 1928 idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); 1929 mirror = FF_LAYOUT_COMP(lseg, idx); 1930 ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true); 1931 if (!ds) 1932 goto out_err; 1933 1934 ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp, 1935 data->inode); 1936 if (IS_ERR(ds_clnt)) 1937 goto out_err; 1938 1939 ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, data->cred); 1940 if (!ds_cred) 1941 goto out_err; 1942 1943 vers = nfs4_ff_layout_ds_version(mirror); 1944 1945 dprintk("%s ino %lu, how %d cl_count %d vers %d\n", __func__, 1946 data->inode->i_ino, how, refcount_read(&ds->ds_clp->cl_count), 1947 vers); 1948 data->commit_done_cb = ff_layout_commit_done_cb; 1949 data->cred = ds_cred; 1950 refcount_inc(&ds->ds_clp->cl_count); 1951 data->ds_clp = ds->ds_clp; 1952 fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); 1953 if (fh) 1954 data->args.fh = fh; 1955 1956 ret = nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops, 1957 vers == 3 ? &ff_layout_commit_call_ops_v3 : 1958 &ff_layout_commit_call_ops_v4, 1959 how, RPC_TASK_SOFTCONN); 1960 put_cred(ds_cred); 1961 return ret; 1962 out_err: 1963 pnfs_generic_prepare_to_resend_writes(data); 1964 pnfs_generic_commit_release(data); 1965 return -EAGAIN; 1966 } 1967 1968 static int 1969 ff_layout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, 1970 int how, struct nfs_commit_info *cinfo) 1971 { 1972 return pnfs_generic_commit_pagelist(inode, mds_pages, how, cinfo, 1973 ff_layout_initiate_commit); 1974 } 1975 1976 static struct pnfs_ds_commit_info * 1977 ff_layout_get_ds_info(struct inode *inode) 1978 { 1979 struct pnfs_layout_hdr *layout = NFS_I(inode)->layout; 1980 1981 if (layout == NULL) 1982 return NULL; 1983 1984 return &FF_LAYOUT_FROM_HDR(layout)->commit_info; 1985 } 1986 1987 static void 1988 ff_layout_free_deviceid_node(struct nfs4_deviceid_node *d) 1989 { 1990 nfs4_ff_layout_free_deviceid(container_of(d, struct nfs4_ff_layout_ds, 1991 id_node)); 1992 } 1993 1994 static int ff_layout_encode_ioerr(struct xdr_stream *xdr, 1995 const struct nfs4_layoutreturn_args *args, 1996 const struct nfs4_flexfile_layoutreturn_args *ff_args) 1997 { 1998 __be32 *start; 1999 2000 start = xdr_reserve_space(xdr, 4); 2001 if (unlikely(!start)) 2002 return -E2BIG; 2003 2004 *start = cpu_to_be32(ff_args->num_errors); 2005 /* This assume we always return _ALL_ layouts */ 2006 return ff_layout_encode_ds_ioerr(xdr, &ff_args->errors); 2007 } 2008 2009 static void 2010 encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len) 2011 { 2012 WARN_ON_ONCE(xdr_stream_encode_opaque_fixed(xdr, buf, len) < 0); 2013 } 2014 2015 static void 2016 ff_layout_encode_ff_iostat_head(struct xdr_stream *xdr, 2017 const nfs4_stateid *stateid, 2018 const struct nfs42_layoutstat_devinfo *devinfo) 2019 { 2020 __be32 *p; 2021 2022 p = xdr_reserve_space(xdr, 8 + 8); 2023 p = xdr_encode_hyper(p, devinfo->offset); 2024 p = xdr_encode_hyper(p, devinfo->length); 2025 encode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE); 2026 p = xdr_reserve_space(xdr, 4*8); 2027 p = xdr_encode_hyper(p, devinfo->read_count); 2028 p = xdr_encode_hyper(p, devinfo->read_bytes); 2029 p = xdr_encode_hyper(p, devinfo->write_count); 2030 p = xdr_encode_hyper(p, devinfo->write_bytes); 2031 encode_opaque_fixed(xdr, devinfo->dev_id.data, NFS4_DEVICEID4_SIZE); 2032 } 2033 2034 static void 2035 ff_layout_encode_ff_iostat(struct xdr_stream *xdr, 2036 const nfs4_stateid *stateid, 2037 const struct nfs42_layoutstat_devinfo *devinfo) 2038 { 2039 ff_layout_encode_ff_iostat_head(xdr, stateid, devinfo); 2040 ff_layout_encode_ff_layoutupdate(xdr, devinfo, 2041 devinfo->ld_private.data); 2042 } 2043 2044 /* report nothing for now */ 2045 static void ff_layout_encode_iostats_array(struct xdr_stream *xdr, 2046 const struct nfs4_layoutreturn_args *args, 2047 struct nfs4_flexfile_layoutreturn_args *ff_args) 2048 { 2049 __be32 *p; 2050 int i; 2051 2052 p = xdr_reserve_space(xdr, 4); 2053 *p = cpu_to_be32(ff_args->num_dev); 2054 for (i = 0; i < ff_args->num_dev; i++) 2055 ff_layout_encode_ff_iostat(xdr, 2056 &args->layout->plh_stateid, 2057 &ff_args->devinfo[i]); 2058 } 2059 2060 static void 2061 ff_layout_free_iostats_array(struct nfs42_layoutstat_devinfo *devinfo, 2062 unsigned int num_entries) 2063 { 2064 unsigned int i; 2065 2066 for (i = 0; i < num_entries; i++) { 2067 if (!devinfo[i].ld_private.ops) 2068 continue; 2069 if (!devinfo[i].ld_private.ops->free) 2070 continue; 2071 devinfo[i].ld_private.ops->free(&devinfo[i].ld_private); 2072 } 2073 } 2074 2075 static struct nfs4_deviceid_node * 2076 ff_layout_alloc_deviceid_node(struct nfs_server *server, 2077 struct pnfs_device *pdev, gfp_t gfp_flags) 2078 { 2079 struct nfs4_ff_layout_ds *dsaddr; 2080 2081 dsaddr = nfs4_ff_alloc_deviceid_node(server, pdev, gfp_flags); 2082 if (!dsaddr) 2083 return NULL; 2084 return &dsaddr->id_node; 2085 } 2086 2087 static void 2088 ff_layout_encode_layoutreturn(struct xdr_stream *xdr, 2089 const void *voidargs, 2090 const struct nfs4_xdr_opaque_data *ff_opaque) 2091 { 2092 const struct nfs4_layoutreturn_args *args = voidargs; 2093 struct nfs4_flexfile_layoutreturn_args *ff_args = ff_opaque->data; 2094 struct xdr_buf tmp_buf = { 2095 .head = { 2096 [0] = { 2097 .iov_base = page_address(ff_args->pages[0]), 2098 }, 2099 }, 2100 .buflen = PAGE_SIZE, 2101 }; 2102 struct xdr_stream tmp_xdr; 2103 __be32 *start; 2104 2105 dprintk("%s: Begin\n", __func__); 2106 2107 xdr_init_encode(&tmp_xdr, &tmp_buf, NULL, NULL); 2108 2109 ff_layout_encode_ioerr(&tmp_xdr, args, ff_args); 2110 ff_layout_encode_iostats_array(&tmp_xdr, args, ff_args); 2111 2112 start = xdr_reserve_space(xdr, 4); 2113 *start = cpu_to_be32(tmp_buf.len); 2114 xdr_write_pages(xdr, ff_args->pages, 0, tmp_buf.len); 2115 2116 dprintk("%s: Return\n", __func__); 2117 } 2118 2119 static void 2120 ff_layout_free_layoutreturn(struct nfs4_xdr_opaque_data *args) 2121 { 2122 struct nfs4_flexfile_layoutreturn_args *ff_args; 2123 2124 if (!args->data) 2125 return; 2126 ff_args = args->data; 2127 args->data = NULL; 2128 2129 ff_layout_free_ds_ioerr(&ff_args->errors); 2130 ff_layout_free_iostats_array(ff_args->devinfo, ff_args->num_dev); 2131 2132 put_page(ff_args->pages[0]); 2133 kfree(ff_args); 2134 } 2135 2136 static const struct nfs4_xdr_opaque_ops layoutreturn_ops = { 2137 .encode = ff_layout_encode_layoutreturn, 2138 .free = ff_layout_free_layoutreturn, 2139 }; 2140 2141 static int 2142 ff_layout_prepare_layoutreturn(struct nfs4_layoutreturn_args *args) 2143 { 2144 struct nfs4_flexfile_layoutreturn_args *ff_args; 2145 struct nfs4_flexfile_layout *ff_layout = FF_LAYOUT_FROM_HDR(args->layout); 2146 2147 ff_args = kmalloc(sizeof(*ff_args), GFP_KERNEL); 2148 if (!ff_args) 2149 goto out_nomem; 2150 ff_args->pages[0] = alloc_page(GFP_KERNEL); 2151 if (!ff_args->pages[0]) 2152 goto out_nomem_free; 2153 2154 INIT_LIST_HEAD(&ff_args->errors); 2155 ff_args->num_errors = ff_layout_fetch_ds_ioerr(args->layout, 2156 &args->range, &ff_args->errors, 2157 FF_LAYOUTRETURN_MAXERR); 2158 2159 spin_lock(&args->inode->i_lock); 2160 ff_args->num_dev = ff_layout_mirror_prepare_stats(&ff_layout->generic_hdr, 2161 &ff_args->devinfo[0], ARRAY_SIZE(ff_args->devinfo)); 2162 spin_unlock(&args->inode->i_lock); 2163 2164 args->ld_private->ops = &layoutreturn_ops; 2165 args->ld_private->data = ff_args; 2166 return 0; 2167 out_nomem_free: 2168 kfree(ff_args); 2169 out_nomem: 2170 return -ENOMEM; 2171 } 2172 2173 #ifdef CONFIG_NFS_V4_2 2174 void 2175 ff_layout_send_layouterror(struct pnfs_layout_segment *lseg) 2176 { 2177 struct pnfs_layout_hdr *lo = lseg->pls_layout; 2178 struct nfs42_layout_error *errors; 2179 LIST_HEAD(head); 2180 2181 if (!nfs_server_capable(lo->plh_inode, NFS_CAP_LAYOUTERROR)) 2182 return; 2183 ff_layout_fetch_ds_ioerr(lo, &lseg->pls_range, &head, -1); 2184 if (list_empty(&head)) 2185 return; 2186 2187 errors = kmalloc_array(NFS42_LAYOUTERROR_MAX, 2188 sizeof(*errors), GFP_NOFS); 2189 if (errors != NULL) { 2190 const struct nfs4_ff_layout_ds_err *pos; 2191 size_t n = 0; 2192 2193 list_for_each_entry(pos, &head, list) { 2194 errors[n].offset = pos->offset; 2195 errors[n].length = pos->length; 2196 nfs4_stateid_copy(&errors[n].stateid, &pos->stateid); 2197 errors[n].errors[0].dev_id = pos->deviceid; 2198 errors[n].errors[0].status = pos->status; 2199 errors[n].errors[0].opnum = pos->opnum; 2200 n++; 2201 if (!list_is_last(&pos->list, &head) && 2202 n < NFS42_LAYOUTERROR_MAX) 2203 continue; 2204 if (nfs42_proc_layouterror(lseg, errors, n) < 0) 2205 break; 2206 n = 0; 2207 } 2208 kfree(errors); 2209 } 2210 ff_layout_free_ds_ioerr(&head); 2211 } 2212 #else 2213 void 2214 ff_layout_send_layouterror(struct pnfs_layout_segment *lseg) 2215 { 2216 } 2217 #endif 2218 2219 static int 2220 ff_layout_ntop4(const struct sockaddr *sap, char *buf, const size_t buflen) 2221 { 2222 const struct sockaddr_in *sin = (struct sockaddr_in *)sap; 2223 2224 return snprintf(buf, buflen, "%pI4", &sin->sin_addr); 2225 } 2226 2227 static size_t 2228 ff_layout_ntop6_noscopeid(const struct sockaddr *sap, char *buf, 2229 const int buflen) 2230 { 2231 const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; 2232 const struct in6_addr *addr = &sin6->sin6_addr; 2233 2234 /* 2235 * RFC 4291, Section 2.2.2 2236 * 2237 * Shorthanded ANY address 2238 */ 2239 if (ipv6_addr_any(addr)) 2240 return snprintf(buf, buflen, "::"); 2241 2242 /* 2243 * RFC 4291, Section 2.2.2 2244 * 2245 * Shorthanded loopback address 2246 */ 2247 if (ipv6_addr_loopback(addr)) 2248 return snprintf(buf, buflen, "::1"); 2249 2250 /* 2251 * RFC 4291, Section 2.2.3 2252 * 2253 * Special presentation address format for mapped v4 2254 * addresses. 2255 */ 2256 if (ipv6_addr_v4mapped(addr)) 2257 return snprintf(buf, buflen, "::ffff:%pI4", 2258 &addr->s6_addr32[3]); 2259 2260 /* 2261 * RFC 4291, Section 2.2.1 2262 */ 2263 return snprintf(buf, buflen, "%pI6c", addr); 2264 } 2265 2266 /* Derived from rpc_sockaddr2uaddr */ 2267 static void 2268 ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da) 2269 { 2270 struct sockaddr *sap = (struct sockaddr *)&da->da_addr; 2271 char portbuf[RPCBIND_MAXUADDRPLEN]; 2272 char addrbuf[RPCBIND_MAXUADDRLEN]; 2273 char *netid; 2274 unsigned short port; 2275 int len, netid_len; 2276 __be32 *p; 2277 2278 switch (sap->sa_family) { 2279 case AF_INET: 2280 if (ff_layout_ntop4(sap, addrbuf, sizeof(addrbuf)) == 0) 2281 return; 2282 port = ntohs(((struct sockaddr_in *)sap)->sin_port); 2283 netid = "tcp"; 2284 netid_len = 3; 2285 break; 2286 case AF_INET6: 2287 if (ff_layout_ntop6_noscopeid(sap, addrbuf, sizeof(addrbuf)) == 0) 2288 return; 2289 port = ntohs(((struct sockaddr_in6 *)sap)->sin6_port); 2290 netid = "tcp6"; 2291 netid_len = 4; 2292 break; 2293 default: 2294 /* we only support tcp and tcp6 */ 2295 WARN_ON_ONCE(1); 2296 return; 2297 } 2298 2299 snprintf(portbuf, sizeof(portbuf), ".%u.%u", port >> 8, port & 0xff); 2300 len = strlcat(addrbuf, portbuf, sizeof(addrbuf)); 2301 2302 p = xdr_reserve_space(xdr, 4 + netid_len); 2303 xdr_encode_opaque(p, netid, netid_len); 2304 2305 p = xdr_reserve_space(xdr, 4 + len); 2306 xdr_encode_opaque(p, addrbuf, len); 2307 } 2308 2309 static void 2310 ff_layout_encode_nfstime(struct xdr_stream *xdr, 2311 ktime_t t) 2312 { 2313 struct timespec64 ts; 2314 __be32 *p; 2315 2316 p = xdr_reserve_space(xdr, 12); 2317 ts = ktime_to_timespec64(t); 2318 p = xdr_encode_hyper(p, ts.tv_sec); 2319 *p++ = cpu_to_be32(ts.tv_nsec); 2320 } 2321 2322 static void 2323 ff_layout_encode_io_latency(struct xdr_stream *xdr, 2324 struct nfs4_ff_io_stat *stat) 2325 { 2326 __be32 *p; 2327 2328 p = xdr_reserve_space(xdr, 5 * 8); 2329 p = xdr_encode_hyper(p, stat->ops_requested); 2330 p = xdr_encode_hyper(p, stat->bytes_requested); 2331 p = xdr_encode_hyper(p, stat->ops_completed); 2332 p = xdr_encode_hyper(p, stat->bytes_completed); 2333 p = xdr_encode_hyper(p, stat->bytes_not_delivered); 2334 ff_layout_encode_nfstime(xdr, stat->total_busy_time); 2335 ff_layout_encode_nfstime(xdr, stat->aggregate_completion_time); 2336 } 2337 2338 static void 2339 ff_layout_encode_ff_layoutupdate(struct xdr_stream *xdr, 2340 const struct nfs42_layoutstat_devinfo *devinfo, 2341 struct nfs4_ff_layout_mirror *mirror) 2342 { 2343 struct nfs4_pnfs_ds_addr *da; 2344 struct nfs4_pnfs_ds *ds = mirror->mirror_ds->ds; 2345 struct nfs_fh *fh = &mirror->fh_versions[0]; 2346 __be32 *p; 2347 2348 da = list_first_entry(&ds->ds_addrs, struct nfs4_pnfs_ds_addr, da_node); 2349 dprintk("%s: DS %s: encoding address %s\n", 2350 __func__, ds->ds_remotestr, da->da_remotestr); 2351 /* netaddr4 */ 2352 ff_layout_encode_netaddr(xdr, da); 2353 /* nfs_fh4 */ 2354 p = xdr_reserve_space(xdr, 4 + fh->size); 2355 xdr_encode_opaque(p, fh->data, fh->size); 2356 /* ff_io_latency4 read */ 2357 spin_lock(&mirror->lock); 2358 ff_layout_encode_io_latency(xdr, &mirror->read_stat.io_stat); 2359 /* ff_io_latency4 write */ 2360 ff_layout_encode_io_latency(xdr, &mirror->write_stat.io_stat); 2361 spin_unlock(&mirror->lock); 2362 /* nfstime4 */ 2363 ff_layout_encode_nfstime(xdr, ktime_sub(ktime_get(), mirror->start_time)); 2364 /* bool */ 2365 p = xdr_reserve_space(xdr, 4); 2366 *p = cpu_to_be32(false); 2367 } 2368 2369 static void 2370 ff_layout_encode_layoutstats(struct xdr_stream *xdr, const void *args, 2371 const struct nfs4_xdr_opaque_data *opaque) 2372 { 2373 struct nfs42_layoutstat_devinfo *devinfo = container_of(opaque, 2374 struct nfs42_layoutstat_devinfo, ld_private); 2375 __be32 *start; 2376 2377 /* layoutupdate length */ 2378 start = xdr_reserve_space(xdr, 4); 2379 ff_layout_encode_ff_layoutupdate(xdr, devinfo, opaque->data); 2380 2381 *start = cpu_to_be32((xdr->p - start - 1) * 4); 2382 } 2383 2384 static void 2385 ff_layout_free_layoutstats(struct nfs4_xdr_opaque_data *opaque) 2386 { 2387 struct nfs4_ff_layout_mirror *mirror = opaque->data; 2388 2389 ff_layout_put_mirror(mirror); 2390 } 2391 2392 static const struct nfs4_xdr_opaque_ops layoutstat_ops = { 2393 .encode = ff_layout_encode_layoutstats, 2394 .free = ff_layout_free_layoutstats, 2395 }; 2396 2397 static int 2398 ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, 2399 struct nfs42_layoutstat_devinfo *devinfo, 2400 int dev_limit) 2401 { 2402 struct nfs4_flexfile_layout *ff_layout = FF_LAYOUT_FROM_HDR(lo); 2403 struct nfs4_ff_layout_mirror *mirror; 2404 struct nfs4_deviceid_node *dev; 2405 int i = 0; 2406 2407 list_for_each_entry(mirror, &ff_layout->mirrors, mirrors) { 2408 if (i >= dev_limit) 2409 break; 2410 if (IS_ERR_OR_NULL(mirror->mirror_ds)) 2411 continue; 2412 if (!test_and_clear_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags)) 2413 continue; 2414 /* mirror refcount put in cleanup_layoutstats */ 2415 if (!refcount_inc_not_zero(&mirror->ref)) 2416 continue; 2417 dev = &mirror->mirror_ds->id_node; 2418 memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE); 2419 devinfo->offset = 0; 2420 devinfo->length = NFS4_MAX_UINT64; 2421 spin_lock(&mirror->lock); 2422 devinfo->read_count = mirror->read_stat.io_stat.ops_completed; 2423 devinfo->read_bytes = mirror->read_stat.io_stat.bytes_completed; 2424 devinfo->write_count = mirror->write_stat.io_stat.ops_completed; 2425 devinfo->write_bytes = mirror->write_stat.io_stat.bytes_completed; 2426 spin_unlock(&mirror->lock); 2427 devinfo->layout_type = LAYOUT_FLEX_FILES; 2428 devinfo->ld_private.ops = &layoutstat_ops; 2429 devinfo->ld_private.data = mirror; 2430 2431 devinfo++; 2432 i++; 2433 } 2434 return i; 2435 } 2436 2437 static int 2438 ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args) 2439 { 2440 struct nfs4_flexfile_layout *ff_layout; 2441 const int dev_count = PNFS_LAYOUTSTATS_MAXDEV; 2442 2443 /* For now, send at most PNFS_LAYOUTSTATS_MAXDEV statistics */ 2444 args->devinfo = kmalloc_array(dev_count, sizeof(*args->devinfo), GFP_NOIO); 2445 if (!args->devinfo) 2446 return -ENOMEM; 2447 2448 spin_lock(&args->inode->i_lock); 2449 ff_layout = FF_LAYOUT_FROM_HDR(NFS_I(args->inode)->layout); 2450 args->num_dev = ff_layout_mirror_prepare_stats(&ff_layout->generic_hdr, 2451 &args->devinfo[0], dev_count); 2452 spin_unlock(&args->inode->i_lock); 2453 if (!args->num_dev) { 2454 kfree(args->devinfo); 2455 args->devinfo = NULL; 2456 return -ENOENT; 2457 } 2458 2459 return 0; 2460 } 2461 2462 static int 2463 ff_layout_set_layoutdriver(struct nfs_server *server, 2464 const struct nfs_fh *dummy) 2465 { 2466 #if IS_ENABLED(CONFIG_NFS_V4_2) 2467 server->caps |= NFS_CAP_LAYOUTSTATS; 2468 #endif 2469 return 0; 2470 } 2471 2472 static struct pnfs_layoutdriver_type flexfilelayout_type = { 2473 .id = LAYOUT_FLEX_FILES, 2474 .name = "LAYOUT_FLEX_FILES", 2475 .owner = THIS_MODULE, 2476 .flags = PNFS_LAYOUTGET_ON_OPEN, 2477 .max_layoutget_response = 4096, /* 1 page or so... */ 2478 .set_layoutdriver = ff_layout_set_layoutdriver, 2479 .alloc_layout_hdr = ff_layout_alloc_layout_hdr, 2480 .free_layout_hdr = ff_layout_free_layout_hdr, 2481 .alloc_lseg = ff_layout_alloc_lseg, 2482 .free_lseg = ff_layout_free_lseg, 2483 .add_lseg = ff_layout_add_lseg, 2484 .pg_read_ops = &ff_layout_pg_read_ops, 2485 .pg_write_ops = &ff_layout_pg_write_ops, 2486 .get_ds_info = ff_layout_get_ds_info, 2487 .free_deviceid_node = ff_layout_free_deviceid_node, 2488 .mark_request_commit = pnfs_layout_mark_request_commit, 2489 .clear_request_commit = pnfs_generic_clear_request_commit, 2490 .scan_commit_lists = pnfs_generic_scan_commit_lists, 2491 .recover_commit_reqs = pnfs_generic_recover_commit_reqs, 2492 .commit_pagelist = ff_layout_commit_pagelist, 2493 .read_pagelist = ff_layout_read_pagelist, 2494 .write_pagelist = ff_layout_write_pagelist, 2495 .alloc_deviceid_node = ff_layout_alloc_deviceid_node, 2496 .prepare_layoutreturn = ff_layout_prepare_layoutreturn, 2497 .sync = pnfs_nfs_generic_sync, 2498 .prepare_layoutstats = ff_layout_prepare_layoutstats, 2499 }; 2500 2501 static int __init nfs4flexfilelayout_init(void) 2502 { 2503 printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Registering...\n", 2504 __func__); 2505 return pnfs_register_layoutdriver(&flexfilelayout_type); 2506 } 2507 2508 static void __exit nfs4flexfilelayout_exit(void) 2509 { 2510 printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Unregistering...\n", 2511 __func__); 2512 pnfs_unregister_layoutdriver(&flexfilelayout_type); 2513 } 2514 2515 MODULE_ALIAS("nfs-layouttype4-4"); 2516 2517 MODULE_LICENSE("GPL"); 2518 MODULE_DESCRIPTION("The NFSv4 flexfile layout driver"); 2519 2520 module_init(nfs4flexfilelayout_init); 2521 module_exit(nfs4flexfilelayout_exit); 2522 2523 module_param(io_maxretrans, ushort, 0644); 2524 MODULE_PARM_DESC(io_maxretrans, "The number of times the NFSv4.1 client " 2525 "retries an I/O request before returning an error. "); 2526