1 /* 2 * Module for pnfs flexfile layout driver. 3 * 4 * Copyright (c) 2014, Primary Data, Inc. All rights reserved. 5 * 6 * Tao Peng <bergwolf@primarydata.com> 7 */ 8 9 #include <linux/nfs_fs.h> 10 #include <linux/nfs_page.h> 11 #include <linux/module.h> 12 13 #include <linux/sunrpc/metrics.h> 14 15 #include "flexfilelayout.h" 16 #include "../nfs4session.h" 17 #include "../nfs4idmap.h" 18 #include "../internal.h" 19 #include "../delegation.h" 20 #include "../nfs4trace.h" 21 #include "../iostat.h" 22 #include "../nfs.h" 23 24 #define NFSDBG_FACILITY NFSDBG_PNFS_LD 25 26 #define FF_LAYOUT_POLL_RETRY_MAX (15*HZ) 27 28 static struct pnfs_layout_hdr * 29 ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) 30 { 31 struct nfs4_flexfile_layout *ffl; 32 33 ffl = kzalloc(sizeof(*ffl), gfp_flags); 34 if (ffl) { 35 INIT_LIST_HEAD(&ffl->error_list); 36 return &ffl->generic_hdr; 37 } else 38 return NULL; 39 } 40 41 static void 42 ff_layout_free_layout_hdr(struct pnfs_layout_hdr *lo) 43 { 44 struct nfs4_ff_layout_ds_err *err, *n; 45 46 list_for_each_entry_safe(err, n, &FF_LAYOUT_FROM_HDR(lo)->error_list, 47 list) { 48 list_del(&err->list); 49 kfree(err); 50 } 51 kfree(FF_LAYOUT_FROM_HDR(lo)); 52 } 53 54 static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) 55 { 56 __be32 *p; 57 58 p = xdr_inline_decode(xdr, NFS4_STATEID_SIZE); 59 if (unlikely(p == NULL)) 60 return -ENOBUFS; 61 memcpy(stateid, p, NFS4_STATEID_SIZE); 62 dprintk("%s: stateid id= [%x%x%x%x]\n", __func__, 63 p[0], p[1], p[2], p[3]); 64 return 0; 65 } 66 67 static int decode_deviceid(struct xdr_stream *xdr, struct nfs4_deviceid *devid) 68 { 69 __be32 *p; 70 71 p = xdr_inline_decode(xdr, NFS4_DEVICEID4_SIZE); 72 if (unlikely(!p)) 73 return -ENOBUFS; 74 memcpy(devid, p, NFS4_DEVICEID4_SIZE); 75 nfs4_print_deviceid(devid); 76 return 0; 77 } 78 79 static int decode_nfs_fh(struct xdr_stream *xdr, struct nfs_fh *fh) 80 { 81 __be32 *p; 82 83 p = xdr_inline_decode(xdr, 4); 84 if (unlikely(!p)) 85 return -ENOBUFS; 86 fh->size = be32_to_cpup(p++); 87 if (fh->size > sizeof(struct nfs_fh)) { 88 printk(KERN_ERR "NFS flexfiles: Too big fh received %d\n", 89 fh->size); 90 return -EOVERFLOW; 91 } 92 /* fh.data */ 93 p = xdr_inline_decode(xdr, fh->size); 94 if (unlikely(!p)) 95 return -ENOBUFS; 96 memcpy(&fh->data, p, fh->size); 97 dprintk("%s: fh len %d\n", __func__, fh->size); 98 99 return 0; 100 } 101 102 /* 103 * Currently only stringified uids and gids are accepted. 104 * I.e., kerberos is not supported to the DSes, so no pricipals. 105 * 106 * That means that one common function will suffice, but when 107 * principals are added, this should be split to accomodate 108 * calls to both nfs_map_name_to_uid() and nfs_map_group_to_gid(). 109 */ 110 static int 111 decode_name(struct xdr_stream *xdr, u32 *id) 112 { 113 __be32 *p; 114 int len; 115 116 /* opaque_length(4)*/ 117 p = xdr_inline_decode(xdr, 4); 118 if (unlikely(!p)) 119 return -ENOBUFS; 120 len = be32_to_cpup(p++); 121 if (len < 0) 122 return -EINVAL; 123 124 dprintk("%s: len %u\n", __func__, len); 125 126 /* opaque body */ 127 p = xdr_inline_decode(xdr, len); 128 if (unlikely(!p)) 129 return -ENOBUFS; 130 131 if (!nfs_map_string_to_numeric((char *)p, len, id)) 132 return -EINVAL; 133 134 return 0; 135 } 136 137 static void ff_layout_free_mirror_array(struct nfs4_ff_layout_segment *fls) 138 { 139 int i; 140 141 if (fls->mirror_array) { 142 for (i = 0; i < fls->mirror_array_cnt; i++) { 143 /* normally mirror_ds is freed in 144 * .free_deviceid_node but we still do it here 145 * for .alloc_lseg error path */ 146 if (fls->mirror_array[i]) { 147 kfree(fls->mirror_array[i]->fh_versions); 148 nfs4_ff_layout_put_deviceid(fls->mirror_array[i]->mirror_ds); 149 kfree(fls->mirror_array[i]); 150 } 151 } 152 kfree(fls->mirror_array); 153 fls->mirror_array = NULL; 154 } 155 } 156 157 static int ff_layout_check_layout(struct nfs4_layoutget_res *lgr) 158 { 159 int ret = 0; 160 161 dprintk("--> %s\n", __func__); 162 163 /* FIXME: remove this check when layout segment support is added */ 164 if (lgr->range.offset != 0 || 165 lgr->range.length != NFS4_MAX_UINT64) { 166 dprintk("%s Only whole file layouts supported. Use MDS i/o\n", 167 __func__); 168 ret = -EINVAL; 169 } 170 171 dprintk("--> %s returns %d\n", __func__, ret); 172 return ret; 173 } 174 175 static void _ff_layout_free_lseg(struct nfs4_ff_layout_segment *fls) 176 { 177 if (fls) { 178 ff_layout_free_mirror_array(fls); 179 kfree(fls); 180 } 181 } 182 183 static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls) 184 { 185 struct nfs4_ff_layout_mirror *tmp; 186 int i, j; 187 188 for (i = 0; i < fls->mirror_array_cnt - 1; i++) { 189 for (j = i + 1; j < fls->mirror_array_cnt; j++) 190 if (fls->mirror_array[i]->efficiency < 191 fls->mirror_array[j]->efficiency) { 192 tmp = fls->mirror_array[i]; 193 fls->mirror_array[i] = fls->mirror_array[j]; 194 fls->mirror_array[j] = tmp; 195 } 196 } 197 } 198 199 static struct pnfs_layout_segment * 200 ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, 201 struct nfs4_layoutget_res *lgr, 202 gfp_t gfp_flags) 203 { 204 struct pnfs_layout_segment *ret; 205 struct nfs4_ff_layout_segment *fls = NULL; 206 struct xdr_stream stream; 207 struct xdr_buf buf; 208 struct page *scratch; 209 u64 stripe_unit; 210 u32 mirror_array_cnt; 211 __be32 *p; 212 int i, rc; 213 214 dprintk("--> %s\n", __func__); 215 scratch = alloc_page(gfp_flags); 216 if (!scratch) 217 return ERR_PTR(-ENOMEM); 218 219 xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, 220 lgr->layoutp->len); 221 xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); 222 223 /* stripe unit and mirror_array_cnt */ 224 rc = -EIO; 225 p = xdr_inline_decode(&stream, 8 + 4); 226 if (!p) 227 goto out_err_free; 228 229 p = xdr_decode_hyper(p, &stripe_unit); 230 mirror_array_cnt = be32_to_cpup(p++); 231 dprintk("%s: stripe_unit=%llu mirror_array_cnt=%u\n", __func__, 232 stripe_unit, mirror_array_cnt); 233 234 if (mirror_array_cnt > NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT || 235 mirror_array_cnt == 0) 236 goto out_err_free; 237 238 rc = -ENOMEM; 239 fls = kzalloc(sizeof(*fls), gfp_flags); 240 if (!fls) 241 goto out_err_free; 242 243 fls->mirror_array_cnt = mirror_array_cnt; 244 fls->stripe_unit = stripe_unit; 245 fls->mirror_array = kcalloc(fls->mirror_array_cnt, 246 sizeof(fls->mirror_array[0]), gfp_flags); 247 if (fls->mirror_array == NULL) 248 goto out_err_free; 249 250 for (i = 0; i < fls->mirror_array_cnt; i++) { 251 struct nfs4_deviceid devid; 252 struct nfs4_deviceid_node *idnode; 253 u32 ds_count; 254 u32 fh_count; 255 int j; 256 257 rc = -EIO; 258 p = xdr_inline_decode(&stream, 4); 259 if (!p) 260 goto out_err_free; 261 ds_count = be32_to_cpup(p); 262 263 /* FIXME: allow for striping? */ 264 if (ds_count != 1) 265 goto out_err_free; 266 267 fls->mirror_array[i] = 268 kzalloc(sizeof(struct nfs4_ff_layout_mirror), 269 gfp_flags); 270 if (fls->mirror_array[i] == NULL) { 271 rc = -ENOMEM; 272 goto out_err_free; 273 } 274 275 spin_lock_init(&fls->mirror_array[i]->lock); 276 fls->mirror_array[i]->ds_count = ds_count; 277 278 /* deviceid */ 279 rc = decode_deviceid(&stream, &devid); 280 if (rc) 281 goto out_err_free; 282 283 idnode = nfs4_find_get_deviceid(NFS_SERVER(lh->plh_inode), 284 &devid, lh->plh_lc_cred, 285 gfp_flags); 286 /* 287 * upon success, mirror_ds is allocated by previous 288 * getdeviceinfo, or newly by .alloc_deviceid_node 289 * nfs4_find_get_deviceid failure is indeed getdeviceinfo falure 290 */ 291 if (idnode) 292 fls->mirror_array[i]->mirror_ds = 293 FF_LAYOUT_MIRROR_DS(idnode); 294 else 295 goto out_err_free; 296 297 /* efficiency */ 298 rc = -EIO; 299 p = xdr_inline_decode(&stream, 4); 300 if (!p) 301 goto out_err_free; 302 fls->mirror_array[i]->efficiency = be32_to_cpup(p); 303 304 /* stateid */ 305 rc = decode_stateid(&stream, &fls->mirror_array[i]->stateid); 306 if (rc) 307 goto out_err_free; 308 309 /* fh */ 310 p = xdr_inline_decode(&stream, 4); 311 if (!p) 312 goto out_err_free; 313 fh_count = be32_to_cpup(p); 314 315 fls->mirror_array[i]->fh_versions = 316 kzalloc(fh_count * sizeof(struct nfs_fh), 317 gfp_flags); 318 if (fls->mirror_array[i]->fh_versions == NULL) { 319 rc = -ENOMEM; 320 goto out_err_free; 321 } 322 323 for (j = 0; j < fh_count; j++) { 324 rc = decode_nfs_fh(&stream, 325 &fls->mirror_array[i]->fh_versions[j]); 326 if (rc) 327 goto out_err_free; 328 } 329 330 fls->mirror_array[i]->fh_versions_cnt = fh_count; 331 332 /* user */ 333 rc = decode_name(&stream, &fls->mirror_array[i]->uid); 334 if (rc) 335 goto out_err_free; 336 337 /* group */ 338 rc = decode_name(&stream, &fls->mirror_array[i]->gid); 339 if (rc) 340 goto out_err_free; 341 342 dprintk("%s: uid %d gid %d\n", __func__, 343 fls->mirror_array[i]->uid, 344 fls->mirror_array[i]->gid); 345 } 346 347 ff_layout_sort_mirrors(fls); 348 rc = ff_layout_check_layout(lgr); 349 if (rc) 350 goto out_err_free; 351 352 ret = &fls->generic_hdr; 353 dprintk("<-- %s (success)\n", __func__); 354 out_free_page: 355 __free_page(scratch); 356 return ret; 357 out_err_free: 358 _ff_layout_free_lseg(fls); 359 ret = ERR_PTR(rc); 360 dprintk("<-- %s (%d)\n", __func__, rc); 361 goto out_free_page; 362 } 363 364 static bool ff_layout_has_rw_segments(struct pnfs_layout_hdr *layout) 365 { 366 struct pnfs_layout_segment *lseg; 367 368 list_for_each_entry(lseg, &layout->plh_segs, pls_list) 369 if (lseg->pls_range.iomode == IOMODE_RW) 370 return true; 371 372 return false; 373 } 374 375 static void 376 ff_layout_free_lseg(struct pnfs_layout_segment *lseg) 377 { 378 struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); 379 int i; 380 381 dprintk("--> %s\n", __func__); 382 383 for (i = 0; i < fls->mirror_array_cnt; i++) { 384 if (fls->mirror_array[i]) { 385 nfs4_ff_layout_put_deviceid(fls->mirror_array[i]->mirror_ds); 386 fls->mirror_array[i]->mirror_ds = NULL; 387 if (fls->mirror_array[i]->cred) { 388 put_rpccred(fls->mirror_array[i]->cred); 389 fls->mirror_array[i]->cred = NULL; 390 } 391 } 392 } 393 394 if (lseg->pls_range.iomode == IOMODE_RW) { 395 struct nfs4_flexfile_layout *ffl; 396 struct inode *inode; 397 398 ffl = FF_LAYOUT_FROM_HDR(lseg->pls_layout); 399 inode = ffl->generic_hdr.plh_inode; 400 spin_lock(&inode->i_lock); 401 if (!ff_layout_has_rw_segments(lseg->pls_layout)) { 402 ffl->commit_info.nbuckets = 0; 403 kfree(ffl->commit_info.buckets); 404 ffl->commit_info.buckets = NULL; 405 } 406 spin_unlock(&inode->i_lock); 407 } 408 _ff_layout_free_lseg(fls); 409 } 410 411 /* Return 1 until we have multiple lsegs support */ 412 static int 413 ff_layout_get_lseg_count(struct nfs4_ff_layout_segment *fls) 414 { 415 return 1; 416 } 417 418 static int 419 ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg, 420 struct nfs_commit_info *cinfo, 421 gfp_t gfp_flags) 422 { 423 struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); 424 struct pnfs_commit_bucket *buckets; 425 int size; 426 427 if (cinfo->ds->nbuckets != 0) { 428 /* This assumes there is only one RW lseg per file. 429 * To support multiple lseg per file, we need to 430 * change struct pnfs_commit_bucket to allow dynamic 431 * increasing nbuckets. 432 */ 433 return 0; 434 } 435 436 size = ff_layout_get_lseg_count(fls) * FF_LAYOUT_MIRROR_COUNT(lseg); 437 438 buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket), 439 gfp_flags); 440 if (!buckets) 441 return -ENOMEM; 442 else { 443 int i; 444 445 spin_lock(cinfo->lock); 446 if (cinfo->ds->nbuckets != 0) 447 kfree(buckets); 448 else { 449 cinfo->ds->buckets = buckets; 450 cinfo->ds->nbuckets = size; 451 for (i = 0; i < size; i++) { 452 INIT_LIST_HEAD(&buckets[i].written); 453 INIT_LIST_HEAD(&buckets[i].committing); 454 /* mark direct verifier as unset */ 455 buckets[i].direct_verf.committed = 456 NFS_INVALID_STABLE_HOW; 457 } 458 } 459 spin_unlock(cinfo->lock); 460 return 0; 461 } 462 } 463 464 static struct nfs4_pnfs_ds * 465 ff_layout_choose_best_ds_for_read(struct nfs_pageio_descriptor *pgio, 466 int *best_idx) 467 { 468 struct nfs4_ff_layout_segment *fls; 469 struct nfs4_pnfs_ds *ds; 470 int idx; 471 472 fls = FF_LAYOUT_LSEG(pgio->pg_lseg); 473 /* mirrors are sorted by efficiency */ 474 for (idx = 0; idx < fls->mirror_array_cnt; idx++) { 475 ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, idx, false); 476 if (ds) { 477 *best_idx = idx; 478 return ds; 479 } 480 } 481 482 return NULL; 483 } 484 485 static void 486 ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, 487 struct nfs_page *req) 488 { 489 struct nfs_pgio_mirror *pgm; 490 struct nfs4_ff_layout_mirror *mirror; 491 struct nfs4_pnfs_ds *ds; 492 int ds_idx; 493 494 /* Use full layout for now */ 495 if (!pgio->pg_lseg) 496 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 497 req->wb_context, 498 0, 499 NFS4_MAX_UINT64, 500 IOMODE_READ, 501 GFP_KERNEL); 502 /* If no lseg, fall back to read through mds */ 503 if (pgio->pg_lseg == NULL) 504 goto out_mds; 505 506 ds = ff_layout_choose_best_ds_for_read(pgio, &ds_idx); 507 if (!ds) 508 goto out_mds; 509 mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx); 510 511 pgio->pg_mirror_idx = ds_idx; 512 513 /* read always uses only one mirror - idx 0 for pgio layer */ 514 pgm = &pgio->pg_mirrors[0]; 515 pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize; 516 517 return; 518 out_mds: 519 pnfs_put_lseg(pgio->pg_lseg); 520 pgio->pg_lseg = NULL; 521 nfs_pageio_reset_read_mds(pgio); 522 } 523 524 static void 525 ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio, 526 struct nfs_page *req) 527 { 528 struct nfs4_ff_layout_mirror *mirror; 529 struct nfs_pgio_mirror *pgm; 530 struct nfs_commit_info cinfo; 531 struct nfs4_pnfs_ds *ds; 532 int i; 533 int status; 534 535 if (!pgio->pg_lseg) 536 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 537 req->wb_context, 538 0, 539 NFS4_MAX_UINT64, 540 IOMODE_RW, 541 GFP_NOFS); 542 /* If no lseg, fall back to write through mds */ 543 if (pgio->pg_lseg == NULL) 544 goto out_mds; 545 546 nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq); 547 status = ff_layout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS); 548 if (status < 0) 549 goto out_mds; 550 551 /* Use a direct mapping of ds_idx to pgio mirror_idx */ 552 if (WARN_ON_ONCE(pgio->pg_mirror_count != 553 FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg))) 554 goto out_mds; 555 556 for (i = 0; i < pgio->pg_mirror_count; i++) { 557 ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, i, true); 558 if (!ds) 559 goto out_mds; 560 pgm = &pgio->pg_mirrors[i]; 561 mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i); 562 pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize; 563 } 564 565 return; 566 567 out_mds: 568 pnfs_put_lseg(pgio->pg_lseg); 569 pgio->pg_lseg = NULL; 570 nfs_pageio_reset_write_mds(pgio); 571 } 572 573 static unsigned int 574 ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio, 575 struct nfs_page *req) 576 { 577 if (!pgio->pg_lseg) 578 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 579 req->wb_context, 580 0, 581 NFS4_MAX_UINT64, 582 IOMODE_RW, 583 GFP_NOFS); 584 if (pgio->pg_lseg) 585 return FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg); 586 587 /* no lseg means that pnfs is not in use, so no mirroring here */ 588 pnfs_put_lseg(pgio->pg_lseg); 589 pgio->pg_lseg = NULL; 590 nfs_pageio_reset_write_mds(pgio); 591 return 1; 592 } 593 594 static const struct nfs_pageio_ops ff_layout_pg_read_ops = { 595 .pg_init = ff_layout_pg_init_read, 596 .pg_test = pnfs_generic_pg_test, 597 .pg_doio = pnfs_generic_pg_readpages, 598 .pg_cleanup = pnfs_generic_pg_cleanup, 599 }; 600 601 static const struct nfs_pageio_ops ff_layout_pg_write_ops = { 602 .pg_init = ff_layout_pg_init_write, 603 .pg_test = pnfs_generic_pg_test, 604 .pg_doio = pnfs_generic_pg_writepages, 605 .pg_get_mirror_count = ff_layout_pg_get_mirror_count_write, 606 .pg_cleanup = pnfs_generic_pg_cleanup, 607 }; 608 609 static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs) 610 { 611 struct rpc_task *task = &hdr->task; 612 613 pnfs_layoutcommit_inode(hdr->inode, false); 614 615 if (retry_pnfs) { 616 dprintk("%s Reset task %5u for i/o through pNFS " 617 "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, 618 hdr->task.tk_pid, 619 hdr->inode->i_sb->s_id, 620 (unsigned long long)NFS_FILEID(hdr->inode), 621 hdr->args.count, 622 (unsigned long long)hdr->args.offset); 623 624 if (!hdr->dreq) { 625 struct nfs_open_context *ctx; 626 627 ctx = nfs_list_entry(hdr->pages.next)->wb_context; 628 set_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); 629 hdr->completion_ops->error_cleanup(&hdr->pages); 630 } else { 631 nfs_direct_set_resched_writes(hdr->dreq); 632 /* fake unstable write to let common nfs resend pages */ 633 hdr->verf.committed = NFS_UNSTABLE; 634 hdr->good_bytes = 0; 635 } 636 return; 637 } 638 639 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 640 dprintk("%s Reset task %5u for i/o through MDS " 641 "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, 642 hdr->task.tk_pid, 643 hdr->inode->i_sb->s_id, 644 (unsigned long long)NFS_FILEID(hdr->inode), 645 hdr->args.count, 646 (unsigned long long)hdr->args.offset); 647 648 task->tk_status = pnfs_write_done_resend_to_mds(hdr); 649 } 650 } 651 652 static void ff_layout_reset_read(struct nfs_pgio_header *hdr) 653 { 654 struct rpc_task *task = &hdr->task; 655 656 pnfs_layoutcommit_inode(hdr->inode, false); 657 658 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 659 dprintk("%s Reset task %5u for i/o through MDS " 660 "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, 661 hdr->task.tk_pid, 662 hdr->inode->i_sb->s_id, 663 (unsigned long long)NFS_FILEID(hdr->inode), 664 hdr->args.count, 665 (unsigned long long)hdr->args.offset); 666 667 task->tk_status = pnfs_read_done_resend_to_mds(hdr); 668 } 669 } 670 671 static int ff_layout_async_handle_error_v4(struct rpc_task *task, 672 struct nfs4_state *state, 673 struct nfs_client *clp, 674 struct pnfs_layout_segment *lseg, 675 int idx) 676 { 677 struct pnfs_layout_hdr *lo = lseg->pls_layout; 678 struct inode *inode = lo->plh_inode; 679 struct nfs_server *mds_server = NFS_SERVER(inode); 680 681 struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); 682 struct nfs_client *mds_client = mds_server->nfs_client; 683 struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table; 684 685 if (task->tk_status >= 0) 686 return 0; 687 688 switch (task->tk_status) { 689 /* MDS state errors */ 690 case -NFS4ERR_DELEG_REVOKED: 691 case -NFS4ERR_ADMIN_REVOKED: 692 case -NFS4ERR_BAD_STATEID: 693 if (state == NULL) 694 break; 695 nfs_remove_bad_delegation(state->inode); 696 case -NFS4ERR_OPENMODE: 697 if (state == NULL) 698 break; 699 if (nfs4_schedule_stateid_recovery(mds_server, state) < 0) 700 goto out_bad_stateid; 701 goto wait_on_recovery; 702 case -NFS4ERR_EXPIRED: 703 if (state != NULL) { 704 if (nfs4_schedule_stateid_recovery(mds_server, state) < 0) 705 goto out_bad_stateid; 706 } 707 nfs4_schedule_lease_recovery(mds_client); 708 goto wait_on_recovery; 709 /* DS session errors */ 710 case -NFS4ERR_BADSESSION: 711 case -NFS4ERR_BADSLOT: 712 case -NFS4ERR_BAD_HIGH_SLOT: 713 case -NFS4ERR_DEADSESSION: 714 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 715 case -NFS4ERR_SEQ_FALSE_RETRY: 716 case -NFS4ERR_SEQ_MISORDERED: 717 dprintk("%s ERROR %d, Reset session. Exchangeid " 718 "flags 0x%x\n", __func__, task->tk_status, 719 clp->cl_exchange_flags); 720 nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); 721 break; 722 case -NFS4ERR_DELAY: 723 case -NFS4ERR_GRACE: 724 rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX); 725 break; 726 case -NFS4ERR_RETRY_UNCACHED_REP: 727 break; 728 /* Invalidate Layout errors */ 729 case -NFS4ERR_PNFS_NO_LAYOUT: 730 case -ESTALE: /* mapped NFS4ERR_STALE */ 731 case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */ 732 case -EISDIR: /* mapped NFS4ERR_ISDIR */ 733 case -NFS4ERR_FHEXPIRED: 734 case -NFS4ERR_WRONG_TYPE: 735 dprintk("%s Invalid layout error %d\n", __func__, 736 task->tk_status); 737 /* 738 * Destroy layout so new i/o will get a new layout. 739 * Layout will not be destroyed until all current lseg 740 * references are put. Mark layout as invalid to resend failed 741 * i/o and all i/o waiting on the slot table to the MDS until 742 * layout is destroyed and a new valid layout is obtained. 743 */ 744 pnfs_destroy_layout(NFS_I(inode)); 745 rpc_wake_up(&tbl->slot_tbl_waitq); 746 goto reset; 747 /* RPC connection errors */ 748 case -ECONNREFUSED: 749 case -EHOSTDOWN: 750 case -EHOSTUNREACH: 751 case -ENETUNREACH: 752 case -EIO: 753 case -ETIMEDOUT: 754 case -EPIPE: 755 dprintk("%s DS connection error %d\n", __func__, 756 task->tk_status); 757 nfs4_mark_deviceid_unavailable(devid); 758 rpc_wake_up(&tbl->slot_tbl_waitq); 759 /* fall through */ 760 default: 761 if (ff_layout_has_available_ds(lseg)) 762 return -NFS4ERR_RESET_TO_PNFS; 763 reset: 764 dprintk("%s Retry through MDS. Error %d\n", __func__, 765 task->tk_status); 766 return -NFS4ERR_RESET_TO_MDS; 767 } 768 out: 769 task->tk_status = 0; 770 return -EAGAIN; 771 out_bad_stateid: 772 task->tk_status = -EIO; 773 return 0; 774 wait_on_recovery: 775 rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL); 776 if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0) 777 rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task); 778 goto out; 779 } 780 781 /* Retry all errors through either pNFS or MDS except for -EJUKEBOX */ 782 static int ff_layout_async_handle_error_v3(struct rpc_task *task, 783 struct pnfs_layout_segment *lseg, 784 int idx) 785 { 786 struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); 787 788 if (task->tk_status >= 0) 789 return 0; 790 791 if (task->tk_status != -EJUKEBOX) { 792 dprintk("%s DS connection error %d\n", __func__, 793 task->tk_status); 794 nfs4_mark_deviceid_unavailable(devid); 795 if (ff_layout_has_available_ds(lseg)) 796 return -NFS4ERR_RESET_TO_PNFS; 797 else 798 return -NFS4ERR_RESET_TO_MDS; 799 } 800 801 if (task->tk_status == -EJUKEBOX) 802 nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY); 803 task->tk_status = 0; 804 rpc_restart_call(task); 805 rpc_delay(task, NFS_JUKEBOX_RETRY_TIME); 806 return -EAGAIN; 807 } 808 809 static int ff_layout_async_handle_error(struct rpc_task *task, 810 struct nfs4_state *state, 811 struct nfs_client *clp, 812 struct pnfs_layout_segment *lseg, 813 int idx) 814 { 815 int vers = clp->cl_nfs_mod->rpc_vers->number; 816 817 switch (vers) { 818 case 3: 819 return ff_layout_async_handle_error_v3(task, lseg, idx); 820 case 4: 821 return ff_layout_async_handle_error_v4(task, state, clp, 822 lseg, idx); 823 default: 824 /* should never happen */ 825 WARN_ON_ONCE(1); 826 return 0; 827 } 828 } 829 830 static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, 831 int idx, u64 offset, u64 length, 832 u32 status, int opnum) 833 { 834 struct nfs4_ff_layout_mirror *mirror; 835 int err; 836 837 mirror = FF_LAYOUT_COMP(lseg, idx); 838 err = ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), 839 mirror, offset, length, status, opnum, 840 GFP_NOIO); 841 dprintk("%s: err %d op %d status %u\n", __func__, err, opnum, status); 842 } 843 844 /* NFS_PROTO call done callback routines */ 845 846 static int ff_layout_read_done_cb(struct rpc_task *task, 847 struct nfs_pgio_header *hdr) 848 { 849 struct inode *inode; 850 int err; 851 852 trace_nfs4_pnfs_read(hdr, task->tk_status); 853 if (task->tk_status == -ETIMEDOUT && !hdr->res.op_status) 854 hdr->res.op_status = NFS4ERR_NXIO; 855 if (task->tk_status < 0 && hdr->res.op_status) 856 ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx, 857 hdr->args.offset, hdr->args.count, 858 hdr->res.op_status, OP_READ); 859 err = ff_layout_async_handle_error(task, hdr->args.context->state, 860 hdr->ds_clp, hdr->lseg, 861 hdr->pgio_mirror_idx); 862 863 switch (err) { 864 case -NFS4ERR_RESET_TO_PNFS: 865 set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, 866 &hdr->lseg->pls_layout->plh_flags); 867 pnfs_read_resend_pnfs(hdr); 868 return task->tk_status; 869 case -NFS4ERR_RESET_TO_MDS: 870 inode = hdr->lseg->pls_layout->plh_inode; 871 pnfs_error_mark_layout_for_return(inode, hdr->lseg); 872 ff_layout_reset_read(hdr); 873 return task->tk_status; 874 case -EAGAIN: 875 rpc_restart_call_prepare(task); 876 return -EAGAIN; 877 } 878 879 return 0; 880 } 881 882 /* 883 * We reference the rpc_cred of the first WRITE that triggers the need for 884 * a LAYOUTCOMMIT, and use it to send the layoutcommit compound. 885 * rfc5661 is not clear about which credential should be used. 886 * 887 * Flexlayout client should treat DS replied FILE_SYNC as DATA_SYNC, so 888 * to follow http://www.rfc-editor.org/errata_search.php?rfc=5661&eid=2751 889 * we always send layoutcommit after DS writes. 890 */ 891 static void 892 ff_layout_set_layoutcommit(struct nfs_pgio_header *hdr) 893 { 894 pnfs_set_layoutcommit(hdr->inode, hdr->lseg, 895 hdr->mds_offset + hdr->res.count); 896 dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, 897 (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); 898 } 899 900 static bool 901 ff_layout_reset_to_mds(struct pnfs_layout_segment *lseg, int idx) 902 { 903 /* No mirroring for now */ 904 struct nfs4_deviceid_node *node = FF_LAYOUT_DEVID_NODE(lseg, idx); 905 906 return ff_layout_test_devid_unavailable(node); 907 } 908 909 static int ff_layout_read_prepare_common(struct rpc_task *task, 910 struct nfs_pgio_header *hdr) 911 { 912 if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { 913 rpc_exit(task, -EIO); 914 return -EIO; 915 } 916 if (ff_layout_reset_to_mds(hdr->lseg, hdr->pgio_mirror_idx)) { 917 dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); 918 if (ff_layout_has_available_ds(hdr->lseg)) 919 pnfs_read_resend_pnfs(hdr); 920 else 921 ff_layout_reset_read(hdr); 922 rpc_exit(task, 0); 923 return -EAGAIN; 924 } 925 hdr->pgio_done_cb = ff_layout_read_done_cb; 926 927 return 0; 928 } 929 930 /* 931 * Call ops for the async read/write cases 932 * In the case of dense layouts, the offset needs to be reset to its 933 * original value. 934 */ 935 static void ff_layout_read_prepare_v3(struct rpc_task *task, void *data) 936 { 937 struct nfs_pgio_header *hdr = data; 938 939 if (ff_layout_read_prepare_common(task, hdr)) 940 return; 941 942 rpc_call_start(task); 943 } 944 945 static int ff_layout_setup_sequence(struct nfs_client *ds_clp, 946 struct nfs4_sequence_args *args, 947 struct nfs4_sequence_res *res, 948 struct rpc_task *task) 949 { 950 if (ds_clp->cl_session) 951 return nfs41_setup_sequence(ds_clp->cl_session, 952 args, 953 res, 954 task); 955 return nfs40_setup_sequence(ds_clp->cl_slot_tbl, 956 args, 957 res, 958 task); 959 } 960 961 static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data) 962 { 963 struct nfs_pgio_header *hdr = data; 964 965 if (ff_layout_read_prepare_common(task, hdr)) 966 return; 967 968 if (ff_layout_setup_sequence(hdr->ds_clp, 969 &hdr->args.seq_args, 970 &hdr->res.seq_res, 971 task)) 972 return; 973 974 if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, 975 hdr->args.lock_context, FMODE_READ) == -EIO) 976 rpc_exit(task, -EIO); /* lost lock, terminate I/O */ 977 } 978 979 static void ff_layout_read_call_done(struct rpc_task *task, void *data) 980 { 981 struct nfs_pgio_header *hdr = data; 982 983 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); 984 985 if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && 986 task->tk_status == 0) { 987 nfs4_sequence_done(task, &hdr->res.seq_res); 988 return; 989 } 990 991 /* Note this may cause RPC to be resent */ 992 hdr->mds_ops->rpc_call_done(task, hdr); 993 } 994 995 static void ff_layout_read_count_stats(struct rpc_task *task, void *data) 996 { 997 struct nfs_pgio_header *hdr = data; 998 999 rpc_count_iostats_metrics(task, 1000 &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_READ]); 1001 } 1002 1003 static int ff_layout_write_done_cb(struct rpc_task *task, 1004 struct nfs_pgio_header *hdr) 1005 { 1006 struct inode *inode; 1007 int err; 1008 1009 trace_nfs4_pnfs_write(hdr, task->tk_status); 1010 if (task->tk_status == -ETIMEDOUT && !hdr->res.op_status) 1011 hdr->res.op_status = NFS4ERR_NXIO; 1012 if (task->tk_status < 0 && hdr->res.op_status) 1013 ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx, 1014 hdr->args.offset, hdr->args.count, 1015 hdr->res.op_status, OP_WRITE); 1016 err = ff_layout_async_handle_error(task, hdr->args.context->state, 1017 hdr->ds_clp, hdr->lseg, 1018 hdr->pgio_mirror_idx); 1019 1020 switch (err) { 1021 case -NFS4ERR_RESET_TO_PNFS: 1022 case -NFS4ERR_RESET_TO_MDS: 1023 inode = hdr->lseg->pls_layout->plh_inode; 1024 pnfs_error_mark_layout_for_return(inode, hdr->lseg); 1025 if (err == -NFS4ERR_RESET_TO_PNFS) { 1026 pnfs_set_retry_layoutget(hdr->lseg->pls_layout); 1027 ff_layout_reset_write(hdr, true); 1028 } else { 1029 pnfs_clear_retry_layoutget(hdr->lseg->pls_layout); 1030 ff_layout_reset_write(hdr, false); 1031 } 1032 return task->tk_status; 1033 case -EAGAIN: 1034 rpc_restart_call_prepare(task); 1035 return -EAGAIN; 1036 } 1037 1038 if (hdr->res.verf->committed == NFS_FILE_SYNC || 1039 hdr->res.verf->committed == NFS_DATA_SYNC) 1040 ff_layout_set_layoutcommit(hdr); 1041 1042 return 0; 1043 } 1044 1045 static int ff_layout_commit_done_cb(struct rpc_task *task, 1046 struct nfs_commit_data *data) 1047 { 1048 struct inode *inode; 1049 int err; 1050 1051 trace_nfs4_pnfs_commit_ds(data, task->tk_status); 1052 if (task->tk_status == -ETIMEDOUT && !data->res.op_status) 1053 data->res.op_status = NFS4ERR_NXIO; 1054 if (task->tk_status < 0 && data->res.op_status) 1055 ff_layout_io_track_ds_error(data->lseg, data->ds_commit_index, 1056 data->args.offset, data->args.count, 1057 data->res.op_status, OP_COMMIT); 1058 err = ff_layout_async_handle_error(task, NULL, data->ds_clp, 1059 data->lseg, data->ds_commit_index); 1060 1061 switch (err) { 1062 case -NFS4ERR_RESET_TO_PNFS: 1063 case -NFS4ERR_RESET_TO_MDS: 1064 inode = data->lseg->pls_layout->plh_inode; 1065 pnfs_error_mark_layout_for_return(inode, data->lseg); 1066 if (err == -NFS4ERR_RESET_TO_PNFS) 1067 pnfs_set_retry_layoutget(data->lseg->pls_layout); 1068 else 1069 pnfs_clear_retry_layoutget(data->lseg->pls_layout); 1070 pnfs_generic_prepare_to_resend_writes(data); 1071 return -EAGAIN; 1072 case -EAGAIN: 1073 rpc_restart_call_prepare(task); 1074 return -EAGAIN; 1075 } 1076 1077 if (data->verf.committed == NFS_UNSTABLE) 1078 pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb); 1079 1080 return 0; 1081 } 1082 1083 static int ff_layout_write_prepare_common(struct rpc_task *task, 1084 struct nfs_pgio_header *hdr) 1085 { 1086 if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { 1087 rpc_exit(task, -EIO); 1088 return -EIO; 1089 } 1090 1091 if (ff_layout_reset_to_mds(hdr->lseg, hdr->pgio_mirror_idx)) { 1092 bool retry_pnfs; 1093 1094 retry_pnfs = ff_layout_has_available_ds(hdr->lseg); 1095 dprintk("%s task %u reset io to %s\n", __func__, 1096 task->tk_pid, retry_pnfs ? "pNFS" : "MDS"); 1097 ff_layout_reset_write(hdr, retry_pnfs); 1098 rpc_exit(task, 0); 1099 return -EAGAIN; 1100 } 1101 1102 return 0; 1103 } 1104 1105 static void ff_layout_write_prepare_v3(struct rpc_task *task, void *data) 1106 { 1107 struct nfs_pgio_header *hdr = data; 1108 1109 if (ff_layout_write_prepare_common(task, hdr)) 1110 return; 1111 1112 rpc_call_start(task); 1113 } 1114 1115 static void ff_layout_write_prepare_v4(struct rpc_task *task, void *data) 1116 { 1117 struct nfs_pgio_header *hdr = data; 1118 1119 if (ff_layout_write_prepare_common(task, hdr)) 1120 return; 1121 1122 if (ff_layout_setup_sequence(hdr->ds_clp, 1123 &hdr->args.seq_args, 1124 &hdr->res.seq_res, 1125 task)) 1126 return; 1127 1128 if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, 1129 hdr->args.lock_context, FMODE_WRITE) == -EIO) 1130 rpc_exit(task, -EIO); /* lost lock, terminate I/O */ 1131 } 1132 1133 static void ff_layout_write_call_done(struct rpc_task *task, void *data) 1134 { 1135 struct nfs_pgio_header *hdr = data; 1136 1137 if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && 1138 task->tk_status == 0) { 1139 nfs4_sequence_done(task, &hdr->res.seq_res); 1140 return; 1141 } 1142 1143 /* Note this may cause RPC to be resent */ 1144 hdr->mds_ops->rpc_call_done(task, hdr); 1145 } 1146 1147 static void ff_layout_write_count_stats(struct rpc_task *task, void *data) 1148 { 1149 struct nfs_pgio_header *hdr = data; 1150 1151 rpc_count_iostats_metrics(task, 1152 &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]); 1153 } 1154 1155 static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data) 1156 { 1157 rpc_call_start(task); 1158 } 1159 1160 static void ff_layout_commit_prepare_v4(struct rpc_task *task, void *data) 1161 { 1162 struct nfs_commit_data *wdata = data; 1163 1164 ff_layout_setup_sequence(wdata->ds_clp, 1165 &wdata->args.seq_args, 1166 &wdata->res.seq_res, 1167 task); 1168 } 1169 1170 static void ff_layout_commit_count_stats(struct rpc_task *task, void *data) 1171 { 1172 struct nfs_commit_data *cdata = data; 1173 1174 rpc_count_iostats_metrics(task, 1175 &NFS_CLIENT(cdata->inode)->cl_metrics[NFSPROC4_CLNT_COMMIT]); 1176 } 1177 1178 static const struct rpc_call_ops ff_layout_read_call_ops_v3 = { 1179 .rpc_call_prepare = ff_layout_read_prepare_v3, 1180 .rpc_call_done = ff_layout_read_call_done, 1181 .rpc_count_stats = ff_layout_read_count_stats, 1182 .rpc_release = pnfs_generic_rw_release, 1183 }; 1184 1185 static const struct rpc_call_ops ff_layout_read_call_ops_v4 = { 1186 .rpc_call_prepare = ff_layout_read_prepare_v4, 1187 .rpc_call_done = ff_layout_read_call_done, 1188 .rpc_count_stats = ff_layout_read_count_stats, 1189 .rpc_release = pnfs_generic_rw_release, 1190 }; 1191 1192 static const struct rpc_call_ops ff_layout_write_call_ops_v3 = { 1193 .rpc_call_prepare = ff_layout_write_prepare_v3, 1194 .rpc_call_done = ff_layout_write_call_done, 1195 .rpc_count_stats = ff_layout_write_count_stats, 1196 .rpc_release = pnfs_generic_rw_release, 1197 }; 1198 1199 static const struct rpc_call_ops ff_layout_write_call_ops_v4 = { 1200 .rpc_call_prepare = ff_layout_write_prepare_v4, 1201 .rpc_call_done = ff_layout_write_call_done, 1202 .rpc_count_stats = ff_layout_write_count_stats, 1203 .rpc_release = pnfs_generic_rw_release, 1204 }; 1205 1206 static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = { 1207 .rpc_call_prepare = ff_layout_commit_prepare_v3, 1208 .rpc_call_done = pnfs_generic_write_commit_done, 1209 .rpc_count_stats = ff_layout_commit_count_stats, 1210 .rpc_release = pnfs_generic_commit_release, 1211 }; 1212 1213 static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = { 1214 .rpc_call_prepare = ff_layout_commit_prepare_v4, 1215 .rpc_call_done = pnfs_generic_write_commit_done, 1216 .rpc_count_stats = ff_layout_commit_count_stats, 1217 .rpc_release = pnfs_generic_commit_release, 1218 }; 1219 1220 static enum pnfs_try_status 1221 ff_layout_read_pagelist(struct nfs_pgio_header *hdr) 1222 { 1223 struct pnfs_layout_segment *lseg = hdr->lseg; 1224 struct nfs4_pnfs_ds *ds; 1225 struct rpc_clnt *ds_clnt; 1226 struct rpc_cred *ds_cred; 1227 loff_t offset = hdr->args.offset; 1228 u32 idx = hdr->pgio_mirror_idx; 1229 int vers; 1230 struct nfs_fh *fh; 1231 1232 dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n", 1233 __func__, hdr->inode->i_ino, 1234 hdr->args.pgbase, (size_t)hdr->args.count, offset); 1235 1236 ds = nfs4_ff_layout_prepare_ds(lseg, idx, false); 1237 if (!ds) 1238 goto out_failed; 1239 1240 ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp, 1241 hdr->inode); 1242 if (IS_ERR(ds_clnt)) 1243 goto out_failed; 1244 1245 ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred); 1246 if (IS_ERR(ds_cred)) 1247 goto out_failed; 1248 1249 vers = nfs4_ff_layout_ds_version(lseg, idx); 1250 1251 dprintk("%s USE DS: %s cl_count %d vers %d\n", __func__, 1252 ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count), vers); 1253 1254 atomic_inc(&ds->ds_clp->cl_count); 1255 hdr->ds_clp = ds->ds_clp; 1256 fh = nfs4_ff_layout_select_ds_fh(lseg, idx); 1257 if (fh) 1258 hdr->args.fh = fh; 1259 1260 /* 1261 * Note that if we ever decide to split across DSes, 1262 * then we may need to handle dense-like offsets. 1263 */ 1264 hdr->args.offset = offset; 1265 hdr->mds_offset = offset; 1266 1267 /* Perform an asynchronous read to ds */ 1268 nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops, 1269 vers == 3 ? &ff_layout_read_call_ops_v3 : 1270 &ff_layout_read_call_ops_v4, 1271 0, RPC_TASK_SOFTCONN); 1272 1273 return PNFS_ATTEMPTED; 1274 1275 out_failed: 1276 if (ff_layout_has_available_ds(lseg)) 1277 return PNFS_TRY_AGAIN; 1278 return PNFS_NOT_ATTEMPTED; 1279 } 1280 1281 /* Perform async writes. */ 1282 static enum pnfs_try_status 1283 ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) 1284 { 1285 struct pnfs_layout_segment *lseg = hdr->lseg; 1286 struct nfs4_pnfs_ds *ds; 1287 struct rpc_clnt *ds_clnt; 1288 struct rpc_cred *ds_cred; 1289 loff_t offset = hdr->args.offset; 1290 int vers; 1291 struct nfs_fh *fh; 1292 int idx = hdr->pgio_mirror_idx; 1293 1294 ds = nfs4_ff_layout_prepare_ds(lseg, idx, true); 1295 if (!ds) 1296 return PNFS_NOT_ATTEMPTED; 1297 1298 ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp, 1299 hdr->inode); 1300 if (IS_ERR(ds_clnt)) 1301 return PNFS_NOT_ATTEMPTED; 1302 1303 ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred); 1304 if (IS_ERR(ds_cred)) 1305 return PNFS_NOT_ATTEMPTED; 1306 1307 vers = nfs4_ff_layout_ds_version(lseg, idx); 1308 1309 dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d vers %d\n", 1310 __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count, 1311 offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count), 1312 vers); 1313 1314 hdr->pgio_done_cb = ff_layout_write_done_cb; 1315 atomic_inc(&ds->ds_clp->cl_count); 1316 hdr->ds_clp = ds->ds_clp; 1317 hdr->ds_commit_idx = idx; 1318 fh = nfs4_ff_layout_select_ds_fh(lseg, idx); 1319 if (fh) 1320 hdr->args.fh = fh; 1321 1322 /* 1323 * Note that if we ever decide to split across DSes, 1324 * then we may need to handle dense-like offsets. 1325 */ 1326 hdr->args.offset = offset; 1327 1328 /* Perform an asynchronous write */ 1329 nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops, 1330 vers == 3 ? &ff_layout_write_call_ops_v3 : 1331 &ff_layout_write_call_ops_v4, 1332 sync, RPC_TASK_SOFTCONN); 1333 return PNFS_ATTEMPTED; 1334 } 1335 1336 static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) 1337 { 1338 return i; 1339 } 1340 1341 static struct nfs_fh * 1342 select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i) 1343 { 1344 struct nfs4_ff_layout_segment *flseg = FF_LAYOUT_LSEG(lseg); 1345 1346 /* FIXME: Assume that there is only one NFS version available 1347 * for the DS. 1348 */ 1349 return &flseg->mirror_array[i]->fh_versions[0]; 1350 } 1351 1352 static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) 1353 { 1354 struct pnfs_layout_segment *lseg = data->lseg; 1355 struct nfs4_pnfs_ds *ds; 1356 struct rpc_clnt *ds_clnt; 1357 struct rpc_cred *ds_cred; 1358 u32 idx; 1359 int vers; 1360 struct nfs_fh *fh; 1361 1362 idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); 1363 ds = nfs4_ff_layout_prepare_ds(lseg, idx, true); 1364 if (!ds) 1365 goto out_err; 1366 1367 ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp, 1368 data->inode); 1369 if (IS_ERR(ds_clnt)) 1370 goto out_err; 1371 1372 ds_cred = ff_layout_get_ds_cred(lseg, idx, data->cred); 1373 if (IS_ERR(ds_cred)) 1374 goto out_err; 1375 1376 vers = nfs4_ff_layout_ds_version(lseg, idx); 1377 1378 dprintk("%s ino %lu, how %d cl_count %d vers %d\n", __func__, 1379 data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count), 1380 vers); 1381 data->commit_done_cb = ff_layout_commit_done_cb; 1382 data->cred = ds_cred; 1383 atomic_inc(&ds->ds_clp->cl_count); 1384 data->ds_clp = ds->ds_clp; 1385 fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); 1386 if (fh) 1387 data->args.fh = fh; 1388 return nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops, 1389 vers == 3 ? &ff_layout_commit_call_ops_v3 : 1390 &ff_layout_commit_call_ops_v4, 1391 how, RPC_TASK_SOFTCONN); 1392 out_err: 1393 pnfs_generic_prepare_to_resend_writes(data); 1394 pnfs_generic_commit_release(data); 1395 return -EAGAIN; 1396 } 1397 1398 static int 1399 ff_layout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, 1400 int how, struct nfs_commit_info *cinfo) 1401 { 1402 return pnfs_generic_commit_pagelist(inode, mds_pages, how, cinfo, 1403 ff_layout_initiate_commit); 1404 } 1405 1406 static struct pnfs_ds_commit_info * 1407 ff_layout_get_ds_info(struct inode *inode) 1408 { 1409 struct pnfs_layout_hdr *layout = NFS_I(inode)->layout; 1410 1411 if (layout == NULL) 1412 return NULL; 1413 1414 return &FF_LAYOUT_FROM_HDR(layout)->commit_info; 1415 } 1416 1417 static void 1418 ff_layout_free_deviceid_node(struct nfs4_deviceid_node *d) 1419 { 1420 nfs4_ff_layout_free_deviceid(container_of(d, struct nfs4_ff_layout_ds, 1421 id_node)); 1422 } 1423 1424 static int ff_layout_encode_ioerr(struct nfs4_flexfile_layout *flo, 1425 struct xdr_stream *xdr, 1426 const struct nfs4_layoutreturn_args *args) 1427 { 1428 struct pnfs_layout_hdr *hdr = &flo->generic_hdr; 1429 __be32 *start; 1430 int count = 0, ret = 0; 1431 1432 start = xdr_reserve_space(xdr, 4); 1433 if (unlikely(!start)) 1434 return -E2BIG; 1435 1436 /* This assume we always return _ALL_ layouts */ 1437 spin_lock(&hdr->plh_inode->i_lock); 1438 ret = ff_layout_encode_ds_ioerr(flo, xdr, &count, &args->range); 1439 spin_unlock(&hdr->plh_inode->i_lock); 1440 1441 *start = cpu_to_be32(count); 1442 1443 return ret; 1444 } 1445 1446 /* report nothing for now */ 1447 static void ff_layout_encode_iostats(struct nfs4_flexfile_layout *flo, 1448 struct xdr_stream *xdr, 1449 const struct nfs4_layoutreturn_args *args) 1450 { 1451 __be32 *p; 1452 1453 p = xdr_reserve_space(xdr, 4); 1454 if (likely(p)) 1455 *p = cpu_to_be32(0); 1456 } 1457 1458 static struct nfs4_deviceid_node * 1459 ff_layout_alloc_deviceid_node(struct nfs_server *server, 1460 struct pnfs_device *pdev, gfp_t gfp_flags) 1461 { 1462 struct nfs4_ff_layout_ds *dsaddr; 1463 1464 dsaddr = nfs4_ff_alloc_deviceid_node(server, pdev, gfp_flags); 1465 if (!dsaddr) 1466 return NULL; 1467 return &dsaddr->id_node; 1468 } 1469 1470 static void 1471 ff_layout_encode_layoutreturn(struct pnfs_layout_hdr *lo, 1472 struct xdr_stream *xdr, 1473 const struct nfs4_layoutreturn_args *args) 1474 { 1475 struct nfs4_flexfile_layout *flo = FF_LAYOUT_FROM_HDR(lo); 1476 __be32 *start; 1477 1478 dprintk("%s: Begin\n", __func__); 1479 start = xdr_reserve_space(xdr, 4); 1480 BUG_ON(!start); 1481 1482 if (ff_layout_encode_ioerr(flo, xdr, args)) 1483 goto out; 1484 1485 ff_layout_encode_iostats(flo, xdr, args); 1486 out: 1487 *start = cpu_to_be32((xdr->p - start - 1) * 4); 1488 dprintk("%s: Return\n", __func__); 1489 } 1490 1491 static struct pnfs_layoutdriver_type flexfilelayout_type = { 1492 .id = LAYOUT_FLEX_FILES, 1493 .name = "LAYOUT_FLEX_FILES", 1494 .owner = THIS_MODULE, 1495 .alloc_layout_hdr = ff_layout_alloc_layout_hdr, 1496 .free_layout_hdr = ff_layout_free_layout_hdr, 1497 .alloc_lseg = ff_layout_alloc_lseg, 1498 .free_lseg = ff_layout_free_lseg, 1499 .pg_read_ops = &ff_layout_pg_read_ops, 1500 .pg_write_ops = &ff_layout_pg_write_ops, 1501 .get_ds_info = ff_layout_get_ds_info, 1502 .free_deviceid_node = ff_layout_free_deviceid_node, 1503 .mark_request_commit = pnfs_layout_mark_request_commit, 1504 .clear_request_commit = pnfs_generic_clear_request_commit, 1505 .scan_commit_lists = pnfs_generic_scan_commit_lists, 1506 .recover_commit_reqs = pnfs_generic_recover_commit_reqs, 1507 .commit_pagelist = ff_layout_commit_pagelist, 1508 .read_pagelist = ff_layout_read_pagelist, 1509 .write_pagelist = ff_layout_write_pagelist, 1510 .alloc_deviceid_node = ff_layout_alloc_deviceid_node, 1511 .encode_layoutreturn = ff_layout_encode_layoutreturn, 1512 .sync = pnfs_nfs_generic_sync, 1513 }; 1514 1515 static int __init nfs4flexfilelayout_init(void) 1516 { 1517 printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Registering...\n", 1518 __func__); 1519 return pnfs_register_layoutdriver(&flexfilelayout_type); 1520 } 1521 1522 static void __exit nfs4flexfilelayout_exit(void) 1523 { 1524 printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Unregistering...\n", 1525 __func__); 1526 pnfs_unregister_layoutdriver(&flexfilelayout_type); 1527 } 1528 1529 MODULE_ALIAS("nfs-layouttype4-4"); 1530 1531 MODULE_LICENSE("GPL"); 1532 MODULE_DESCRIPTION("The NFSv4 flexfile layout driver"); 1533 1534 module_init(nfs4flexfilelayout_init); 1535 module_exit(nfs4flexfilelayout_exit); 1536