1 /* 2 * linux/fs/nfs/blocklayout/blocklayout.c 3 * 4 * Module for the NFSv4.1 pNFS block layout driver. 5 * 6 * Copyright (c) 2006 The Regents of the University of Michigan. 7 * All rights reserved. 8 * 9 * Andy Adamson <andros@citi.umich.edu> 10 * Fred Isaman <iisaman@umich.edu> 11 * 12 * permission is granted to use, copy, create derivative works and 13 * redistribute this software and such derivative works for any purpose, 14 * so long as the name of the university of michigan is not used in 15 * any advertising or publicity pertaining to the use or distribution 16 * of this software without specific, written prior authorization. if 17 * the above copyright notice or any other identification of the 18 * university of michigan is included in any copy of any portion of 19 * this software, then the disclaimer below must also be included. 20 * 21 * this software is provided as is, without representation from the 22 * university of michigan as to its fitness for any purpose, and without 23 * warranty by the university of michigan of any kind, either express 24 * or implied, including without limitation the implied warranties of 25 * merchantability and fitness for a particular purpose. the regents 26 * of the university of michigan shall not be liable for any damages, 27 * including special, indirect, incidental, or consequential damages, 28 * with respect to any claim arising out or in connection with the use 29 * of the software, even if it has been or is hereafter advised of the 30 * possibility of such damages. 31 */ 32 33 #include <linux/module.h> 34 #include <linux/init.h> 35 #include <linux/mount.h> 36 #include <linux/namei.h> 37 #include <linux/bio.h> /* struct bio */ 38 #include <linux/buffer_head.h> /* various write calls */ 39 #include <linux/prefetch.h> 40 #include <linux/pagevec.h> 41 42 #include "../pnfs.h" 43 #include "../internal.h" 44 #include "blocklayout.h" 45 46 #define NFSDBG_FACILITY NFSDBG_PNFS_LD 47 48 MODULE_LICENSE("GPL"); 49 MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>"); 50 MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver"); 51 52 static void print_page(struct page *page) 53 { 54 dprintk("PRINTPAGE page %p\n", page); 55 dprintk(" PagePrivate %d\n", PagePrivate(page)); 56 dprintk(" PageUptodate %d\n", PageUptodate(page)); 57 dprintk(" PageError %d\n", PageError(page)); 58 dprintk(" PageDirty %d\n", PageDirty(page)); 59 dprintk(" PageReferenced %d\n", PageReferenced(page)); 60 dprintk(" PageLocked %d\n", PageLocked(page)); 61 dprintk(" PageWriteback %d\n", PageWriteback(page)); 62 dprintk(" PageMappedToDisk %d\n", PageMappedToDisk(page)); 63 dprintk("\n"); 64 } 65 66 /* Given the be associated with isect, determine if page data needs to be 67 * initialized. 68 */ 69 static int is_hole(struct pnfs_block_extent *be, sector_t isect) 70 { 71 if (be->be_state == PNFS_BLOCK_NONE_DATA) 72 return 1; 73 else if (be->be_state != PNFS_BLOCK_INVALID_DATA) 74 return 0; 75 else 76 return !bl_is_sector_init(be->be_inval, isect); 77 } 78 79 /* Given the be associated with isect, determine if page data can be 80 * written to disk. 81 */ 82 static int is_writable(struct pnfs_block_extent *be, sector_t isect) 83 { 84 return (be->be_state == PNFS_BLOCK_READWRITE_DATA || 85 be->be_state == PNFS_BLOCK_INVALID_DATA); 86 } 87 88 /* The data we are handed might be spread across several bios. We need 89 * to track when the last one is finished. 90 */ 91 struct parallel_io { 92 struct kref refcnt; 93 void (*pnfs_callback) (void *data, int num_se); 94 void *data; 95 int bse_count; 96 }; 97 98 static inline struct parallel_io *alloc_parallel(void *data) 99 { 100 struct parallel_io *rv; 101 102 rv = kmalloc(sizeof(*rv), GFP_NOFS); 103 if (rv) { 104 rv->data = data; 105 kref_init(&rv->refcnt); 106 rv->bse_count = 0; 107 } 108 return rv; 109 } 110 111 static inline void get_parallel(struct parallel_io *p) 112 { 113 kref_get(&p->refcnt); 114 } 115 116 static void destroy_parallel(struct kref *kref) 117 { 118 struct parallel_io *p = container_of(kref, struct parallel_io, refcnt); 119 120 dprintk("%s enter\n", __func__); 121 p->pnfs_callback(p->data, p->bse_count); 122 kfree(p); 123 } 124 125 static inline void put_parallel(struct parallel_io *p) 126 { 127 kref_put(&p->refcnt, destroy_parallel); 128 } 129 130 static struct bio * 131 bl_submit_bio(int rw, struct bio *bio) 132 { 133 if (bio) { 134 get_parallel(bio->bi_private); 135 dprintk("%s submitting %s bio %u@%llu\n", __func__, 136 rw == READ ? "read" : "write", 137 bio->bi_size, (unsigned long long)bio->bi_sector); 138 submit_bio(rw, bio); 139 } 140 return NULL; 141 } 142 143 static struct bio *bl_alloc_init_bio(int npg, sector_t isect, 144 struct pnfs_block_extent *be, 145 void (*end_io)(struct bio *, int err), 146 struct parallel_io *par) 147 { 148 struct bio *bio; 149 150 npg = min(npg, BIO_MAX_PAGES); 151 bio = bio_alloc(GFP_NOIO, npg); 152 if (!bio && (current->flags & PF_MEMALLOC)) { 153 while (!bio && (npg /= 2)) 154 bio = bio_alloc(GFP_NOIO, npg); 155 } 156 157 if (bio) { 158 bio->bi_sector = isect - be->be_f_offset + be->be_v_offset; 159 bio->bi_bdev = be->be_mdev; 160 bio->bi_end_io = end_io; 161 bio->bi_private = par; 162 } 163 return bio; 164 } 165 166 static struct bio *do_add_page_to_bio(struct bio *bio, int npg, int rw, 167 sector_t isect, struct page *page, 168 struct pnfs_block_extent *be, 169 void (*end_io)(struct bio *, int err), 170 struct parallel_io *par, 171 unsigned int offset, int len) 172 { 173 isect = isect + (offset >> SECTOR_SHIFT); 174 dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__, 175 npg, rw, (unsigned long long)isect, offset, len); 176 retry: 177 if (!bio) { 178 bio = bl_alloc_init_bio(npg, isect, be, end_io, par); 179 if (!bio) 180 return ERR_PTR(-ENOMEM); 181 } 182 if (bio_add_page(bio, page, len, offset) < len) { 183 bio = bl_submit_bio(rw, bio); 184 goto retry; 185 } 186 return bio; 187 } 188 189 static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw, 190 sector_t isect, struct page *page, 191 struct pnfs_block_extent *be, 192 void (*end_io)(struct bio *, int err), 193 struct parallel_io *par) 194 { 195 return do_add_page_to_bio(bio, npg, rw, isect, page, be, 196 end_io, par, 0, PAGE_CACHE_SIZE); 197 } 198 199 /* This is basically copied from mpage_end_io_read */ 200 static void bl_end_io_read(struct bio *bio, int err) 201 { 202 struct parallel_io *par = bio->bi_private; 203 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 204 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 205 206 do { 207 struct page *page = bvec->bv_page; 208 209 if (--bvec >= bio->bi_io_vec) 210 prefetchw(&bvec->bv_page->flags); 211 if (uptodate) 212 SetPageUptodate(page); 213 } while (bvec >= bio->bi_io_vec); 214 if (!uptodate) { 215 struct nfs_read_data *rdata = par->data; 216 struct nfs_pgio_header *header = rdata->header; 217 218 if (!header->pnfs_error) 219 header->pnfs_error = -EIO; 220 pnfs_set_lo_fail(header->lseg); 221 } 222 bio_put(bio); 223 put_parallel(par); 224 } 225 226 static void bl_read_cleanup(struct work_struct *work) 227 { 228 struct rpc_task *task; 229 struct nfs_read_data *rdata; 230 dprintk("%s enter\n", __func__); 231 task = container_of(work, struct rpc_task, u.tk_work); 232 rdata = container_of(task, struct nfs_read_data, task); 233 pnfs_ld_read_done(rdata); 234 } 235 236 static void 237 bl_end_par_io_read(void *data, int unused) 238 { 239 struct nfs_read_data *rdata = data; 240 241 rdata->task.tk_status = rdata->header->pnfs_error; 242 INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); 243 schedule_work(&rdata->task.u.tk_work); 244 } 245 246 static enum pnfs_try_status 247 bl_read_pagelist(struct nfs_read_data *rdata) 248 { 249 struct nfs_pgio_header *header = rdata->header; 250 int i, hole; 251 struct bio *bio = NULL; 252 struct pnfs_block_extent *be = NULL, *cow_read = NULL; 253 sector_t isect, extent_length = 0; 254 struct parallel_io *par; 255 loff_t f_offset = rdata->args.offset; 256 size_t bytes_left = rdata->args.count; 257 unsigned int pg_offset, pg_len; 258 struct page **pages = rdata->args.pages; 259 int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; 260 const bool is_dio = (header->dreq != NULL); 261 262 dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, 263 rdata->pages.npages, f_offset, (unsigned int)rdata->args.count); 264 265 par = alloc_parallel(rdata); 266 if (!par) 267 goto use_mds; 268 par->pnfs_callback = bl_end_par_io_read; 269 /* At this point, we can no longer jump to use_mds */ 270 271 isect = (sector_t) (f_offset >> SECTOR_SHIFT); 272 /* Code assumes extents are page-aligned */ 273 for (i = pg_index; i < rdata->pages.npages; i++) { 274 if (!extent_length) { 275 /* We've used up the previous extent */ 276 bl_put_extent(be); 277 bl_put_extent(cow_read); 278 bio = bl_submit_bio(READ, bio); 279 /* Get the next one */ 280 be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), 281 isect, &cow_read); 282 if (!be) { 283 header->pnfs_error = -EIO; 284 goto out; 285 } 286 extent_length = be->be_length - 287 (isect - be->be_f_offset); 288 if (cow_read) { 289 sector_t cow_length = cow_read->be_length - 290 (isect - cow_read->be_f_offset); 291 extent_length = min(extent_length, cow_length); 292 } 293 } 294 295 if (is_dio) { 296 pg_offset = f_offset & ~PAGE_CACHE_MASK; 297 if (pg_offset + bytes_left > PAGE_CACHE_SIZE) 298 pg_len = PAGE_CACHE_SIZE - pg_offset; 299 else 300 pg_len = bytes_left; 301 302 f_offset += pg_len; 303 bytes_left -= pg_len; 304 isect += (pg_offset >> SECTOR_SHIFT); 305 } else { 306 pg_offset = 0; 307 pg_len = PAGE_CACHE_SIZE; 308 } 309 310 hole = is_hole(be, isect); 311 if (hole && !cow_read) { 312 bio = bl_submit_bio(READ, bio); 313 /* Fill hole w/ zeroes w/o accessing device */ 314 dprintk("%s Zeroing page for hole\n", __func__); 315 zero_user_segment(pages[i], pg_offset, pg_len); 316 print_page(pages[i]); 317 SetPageUptodate(pages[i]); 318 } else { 319 struct pnfs_block_extent *be_read; 320 321 be_read = (hole && cow_read) ? cow_read : be; 322 bio = do_add_page_to_bio(bio, rdata->pages.npages - i, 323 READ, 324 isect, pages[i], be_read, 325 bl_end_io_read, par, 326 pg_offset, pg_len); 327 if (IS_ERR(bio)) { 328 header->pnfs_error = PTR_ERR(bio); 329 bio = NULL; 330 goto out; 331 } 332 } 333 isect += (pg_len >> SECTOR_SHIFT); 334 extent_length -= PAGE_CACHE_SECTORS; 335 } 336 if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { 337 rdata->res.eof = 1; 338 rdata->res.count = header->inode->i_size - rdata->args.offset; 339 } else { 340 rdata->res.count = (isect << SECTOR_SHIFT) - rdata->args.offset; 341 } 342 out: 343 bl_put_extent(be); 344 bl_put_extent(cow_read); 345 bl_submit_bio(READ, bio); 346 put_parallel(par); 347 return PNFS_ATTEMPTED; 348 349 use_mds: 350 dprintk("Giving up and using normal NFS\n"); 351 return PNFS_NOT_ATTEMPTED; 352 } 353 354 static void mark_extents_written(struct pnfs_block_layout *bl, 355 __u64 offset, __u32 count) 356 { 357 sector_t isect, end; 358 struct pnfs_block_extent *be; 359 struct pnfs_block_short_extent *se; 360 361 dprintk("%s(%llu, %u)\n", __func__, offset, count); 362 if (count == 0) 363 return; 364 isect = (offset & (long)(PAGE_CACHE_MASK)) >> SECTOR_SHIFT; 365 end = (offset + count + PAGE_CACHE_SIZE - 1) & (long)(PAGE_CACHE_MASK); 366 end >>= SECTOR_SHIFT; 367 while (isect < end) { 368 sector_t len; 369 be = bl_find_get_extent(bl, isect, NULL); 370 BUG_ON(!be); /* FIXME */ 371 len = min(end, be->be_f_offset + be->be_length) - isect; 372 if (be->be_state == PNFS_BLOCK_INVALID_DATA) { 373 se = bl_pop_one_short_extent(be->be_inval); 374 BUG_ON(!se); 375 bl_mark_for_commit(be, isect, len, se); 376 } 377 isect += len; 378 bl_put_extent(be); 379 } 380 } 381 382 static void bl_end_io_write_zero(struct bio *bio, int err) 383 { 384 struct parallel_io *par = bio->bi_private; 385 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 386 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 387 388 do { 389 struct page *page = bvec->bv_page; 390 391 if (--bvec >= bio->bi_io_vec) 392 prefetchw(&bvec->bv_page->flags); 393 /* This is the zeroing page we added */ 394 end_page_writeback(page); 395 page_cache_release(page); 396 } while (bvec >= bio->bi_io_vec); 397 398 if (unlikely(!uptodate)) { 399 struct nfs_write_data *data = par->data; 400 struct nfs_pgio_header *header = data->header; 401 402 if (!header->pnfs_error) 403 header->pnfs_error = -EIO; 404 pnfs_set_lo_fail(header->lseg); 405 } 406 bio_put(bio); 407 put_parallel(par); 408 } 409 410 static void bl_end_io_write(struct bio *bio, int err) 411 { 412 struct parallel_io *par = bio->bi_private; 413 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 414 struct nfs_write_data *data = par->data; 415 struct nfs_pgio_header *header = data->header; 416 417 if (!uptodate) { 418 if (!header->pnfs_error) 419 header->pnfs_error = -EIO; 420 pnfs_set_lo_fail(header->lseg); 421 } 422 bio_put(bio); 423 put_parallel(par); 424 } 425 426 /* Function scheduled for call during bl_end_par_io_write, 427 * it marks sectors as written and extends the commitlist. 428 */ 429 static void bl_write_cleanup(struct work_struct *work) 430 { 431 struct rpc_task *task; 432 struct nfs_write_data *wdata; 433 dprintk("%s enter\n", __func__); 434 task = container_of(work, struct rpc_task, u.tk_work); 435 wdata = container_of(task, struct nfs_write_data, task); 436 if (likely(!wdata->header->pnfs_error)) { 437 /* Marks for LAYOUTCOMMIT */ 438 mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg), 439 wdata->args.offset, wdata->args.count); 440 } 441 pnfs_ld_write_done(wdata); 442 } 443 444 /* Called when last of bios associated with a bl_write_pagelist call finishes */ 445 static void bl_end_par_io_write(void *data, int num_se) 446 { 447 struct nfs_write_data *wdata = data; 448 449 if (unlikely(wdata->header->pnfs_error)) { 450 bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval, 451 num_se); 452 } 453 454 wdata->task.tk_status = wdata->header->pnfs_error; 455 wdata->verf.committed = NFS_FILE_SYNC; 456 INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup); 457 schedule_work(&wdata->task.u.tk_work); 458 } 459 460 /* FIXME STUB - mark intersection of layout and page as bad, so is not 461 * used again. 462 */ 463 static void mark_bad_read(void) 464 { 465 return; 466 } 467 468 /* 469 * map_block: map a requested I/0 block (isect) into an offset in the LVM 470 * block_device 471 */ 472 static void 473 map_block(struct buffer_head *bh, sector_t isect, struct pnfs_block_extent *be) 474 { 475 dprintk("%s enter be=%p\n", __func__, be); 476 477 set_buffer_mapped(bh); 478 bh->b_bdev = be->be_mdev; 479 bh->b_blocknr = (isect - be->be_f_offset + be->be_v_offset) >> 480 (be->be_mdev->bd_inode->i_blkbits - SECTOR_SHIFT); 481 482 dprintk("%s isect %llu, bh->b_blocknr %ld, using bsize %Zd\n", 483 __func__, (unsigned long long)isect, (long)bh->b_blocknr, 484 bh->b_size); 485 return; 486 } 487 488 static void 489 bl_read_single_end_io(struct bio *bio, int error) 490 { 491 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 492 struct page *page = bvec->bv_page; 493 494 /* Only one page in bvec */ 495 unlock_page(page); 496 } 497 498 static int 499 bl_do_readpage_sync(struct page *page, struct pnfs_block_extent *be, 500 unsigned int offset, unsigned int len) 501 { 502 struct bio *bio; 503 struct page *shadow_page; 504 sector_t isect; 505 char *kaddr, *kshadow_addr; 506 int ret = 0; 507 508 dprintk("%s: offset %u len %u\n", __func__, offset, len); 509 510 shadow_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 511 if (shadow_page == NULL) 512 return -ENOMEM; 513 514 bio = bio_alloc(GFP_NOIO, 1); 515 if (bio == NULL) 516 return -ENOMEM; 517 518 isect = (page->index << PAGE_CACHE_SECTOR_SHIFT) + 519 (offset / SECTOR_SIZE); 520 521 bio->bi_sector = isect - be->be_f_offset + be->be_v_offset; 522 bio->bi_bdev = be->be_mdev; 523 bio->bi_end_io = bl_read_single_end_io; 524 525 lock_page(shadow_page); 526 if (bio_add_page(bio, shadow_page, 527 SECTOR_SIZE, round_down(offset, SECTOR_SIZE)) == 0) { 528 unlock_page(shadow_page); 529 bio_put(bio); 530 return -EIO; 531 } 532 533 submit_bio(READ, bio); 534 wait_on_page_locked(shadow_page); 535 if (unlikely(!test_bit(BIO_UPTODATE, &bio->bi_flags))) { 536 ret = -EIO; 537 } else { 538 kaddr = kmap_atomic(page); 539 kshadow_addr = kmap_atomic(shadow_page); 540 memcpy(kaddr + offset, kshadow_addr + offset, len); 541 kunmap_atomic(kshadow_addr); 542 kunmap_atomic(kaddr); 543 } 544 __free_page(shadow_page); 545 bio_put(bio); 546 547 return ret; 548 } 549 550 static int 551 bl_read_partial_page_sync(struct page *page, struct pnfs_block_extent *be, 552 unsigned int dirty_offset, unsigned int dirty_len, 553 bool full_page) 554 { 555 int ret = 0; 556 unsigned int start, end; 557 558 if (full_page) { 559 start = 0; 560 end = PAGE_CACHE_SIZE; 561 } else { 562 start = round_down(dirty_offset, SECTOR_SIZE); 563 end = round_up(dirty_offset + dirty_len, SECTOR_SIZE); 564 } 565 566 dprintk("%s: offset %u len %d\n", __func__, dirty_offset, dirty_len); 567 if (!be) { 568 zero_user_segments(page, start, dirty_offset, 569 dirty_offset + dirty_len, end); 570 if (start == 0 && end == PAGE_CACHE_SIZE && 571 trylock_page(page)) { 572 SetPageUptodate(page); 573 unlock_page(page); 574 } 575 return ret; 576 } 577 578 if (start != dirty_offset) 579 ret = bl_do_readpage_sync(page, be, start, dirty_offset - start); 580 581 if (!ret && (dirty_offset + dirty_len < end)) 582 ret = bl_do_readpage_sync(page, be, dirty_offset + dirty_len, 583 end - dirty_offset - dirty_len); 584 585 return ret; 586 } 587 588 /* Given an unmapped page, zero it or read in page for COW, page is locked 589 * by caller. 590 */ 591 static int 592 init_page_for_write(struct page *page, struct pnfs_block_extent *cow_read) 593 { 594 struct buffer_head *bh = NULL; 595 int ret = 0; 596 sector_t isect; 597 598 dprintk("%s enter, %p\n", __func__, page); 599 BUG_ON(PageUptodate(page)); 600 if (!cow_read) { 601 zero_user_segment(page, 0, PAGE_SIZE); 602 SetPageUptodate(page); 603 goto cleanup; 604 } 605 606 bh = alloc_page_buffers(page, PAGE_CACHE_SIZE, 0); 607 if (!bh) { 608 ret = -ENOMEM; 609 goto cleanup; 610 } 611 612 isect = (sector_t) page->index << PAGE_CACHE_SECTOR_SHIFT; 613 map_block(bh, isect, cow_read); 614 if (!bh_uptodate_or_lock(bh)) 615 ret = bh_submit_read(bh); 616 if (ret) 617 goto cleanup; 618 SetPageUptodate(page); 619 620 cleanup: 621 if (bh) 622 free_buffer_head(bh); 623 if (ret) { 624 /* Need to mark layout with bad read...should now 625 * just use nfs4 for reads and writes. 626 */ 627 mark_bad_read(); 628 } 629 return ret; 630 } 631 632 /* Find or create a zeroing page marked being writeback. 633 * Return ERR_PTR on error, NULL to indicate skip this page and page itself 634 * to indicate write out. 635 */ 636 static struct page * 637 bl_find_get_zeroing_page(struct inode *inode, pgoff_t index, 638 struct pnfs_block_extent *cow_read) 639 { 640 struct page *page; 641 int locked = 0; 642 page = find_get_page(inode->i_mapping, index); 643 if (page) 644 goto check_page; 645 646 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); 647 if (unlikely(!page)) { 648 dprintk("%s oom\n", __func__); 649 return ERR_PTR(-ENOMEM); 650 } 651 locked = 1; 652 653 check_page: 654 /* PageDirty: Other will write this out 655 * PageWriteback: Other is writing this out 656 * PageUptodate: It was read before 657 */ 658 if (PageDirty(page) || PageWriteback(page)) { 659 print_page(page); 660 if (locked) 661 unlock_page(page); 662 page_cache_release(page); 663 return NULL; 664 } 665 666 if (!locked) { 667 lock_page(page); 668 locked = 1; 669 goto check_page; 670 } 671 if (!PageUptodate(page)) { 672 /* New page, readin or zero it */ 673 init_page_for_write(page, cow_read); 674 } 675 set_page_writeback(page); 676 unlock_page(page); 677 678 return page; 679 } 680 681 static enum pnfs_try_status 682 bl_write_pagelist(struct nfs_write_data *wdata, int sync) 683 { 684 struct nfs_pgio_header *header = wdata->header; 685 int i, ret, npg_zero, pg_index, last = 0; 686 struct bio *bio = NULL; 687 struct pnfs_block_extent *be = NULL, *cow_read = NULL; 688 sector_t isect, last_isect = 0, extent_length = 0; 689 struct parallel_io *par = NULL; 690 loff_t offset = wdata->args.offset; 691 size_t count = wdata->args.count; 692 unsigned int pg_offset, pg_len, saved_len; 693 struct page **pages = wdata->args.pages; 694 struct page *page; 695 pgoff_t index; 696 u64 temp; 697 int npg_per_block = 698 NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT; 699 700 dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); 701 702 if (header->dreq != NULL && 703 (!IS_ALIGNED(offset, NFS_SERVER(header->inode)->pnfs_blksize) || 704 !IS_ALIGNED(count, NFS_SERVER(header->inode)->pnfs_blksize))) { 705 dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n"); 706 goto out_mds; 707 } 708 /* At this point, wdata->pages is a (sequential) list of nfs_pages. 709 * We want to write each, and if there is an error set pnfs_error 710 * to have it redone using nfs. 711 */ 712 par = alloc_parallel(wdata); 713 if (!par) 714 goto out_mds; 715 par->pnfs_callback = bl_end_par_io_write; 716 /* At this point, have to be more careful with error handling */ 717 718 isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT); 719 be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), isect, &cow_read); 720 if (!be || !is_writable(be, isect)) { 721 dprintk("%s no matching extents!\n", __func__); 722 goto out_mds; 723 } 724 725 /* First page inside INVALID extent */ 726 if (be->be_state == PNFS_BLOCK_INVALID_DATA) { 727 if (likely(!bl_push_one_short_extent(be->be_inval))) 728 par->bse_count++; 729 else 730 goto out_mds; 731 temp = offset >> PAGE_CACHE_SHIFT; 732 npg_zero = do_div(temp, npg_per_block); 733 isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) & 734 (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT); 735 extent_length = be->be_length - (isect - be->be_f_offset); 736 737 fill_invalid_ext: 738 dprintk("%s need to zero %d pages\n", __func__, npg_zero); 739 for (;npg_zero > 0; npg_zero--) { 740 if (bl_is_sector_init(be->be_inval, isect)) { 741 dprintk("isect %llu already init\n", 742 (unsigned long long)isect); 743 goto next_page; 744 } 745 /* page ref released in bl_end_io_write_zero */ 746 index = isect >> PAGE_CACHE_SECTOR_SHIFT; 747 dprintk("%s zero %dth page: index %lu isect %llu\n", 748 __func__, npg_zero, index, 749 (unsigned long long)isect); 750 page = bl_find_get_zeroing_page(header->inode, index, 751 cow_read); 752 if (unlikely(IS_ERR(page))) { 753 header->pnfs_error = PTR_ERR(page); 754 goto out; 755 } else if (page == NULL) 756 goto next_page; 757 758 ret = bl_mark_sectors_init(be->be_inval, isect, 759 PAGE_CACHE_SECTORS); 760 if (unlikely(ret)) { 761 dprintk("%s bl_mark_sectors_init fail %d\n", 762 __func__, ret); 763 end_page_writeback(page); 764 page_cache_release(page); 765 header->pnfs_error = ret; 766 goto out; 767 } 768 if (likely(!bl_push_one_short_extent(be->be_inval))) 769 par->bse_count++; 770 else { 771 end_page_writeback(page); 772 page_cache_release(page); 773 header->pnfs_error = -ENOMEM; 774 goto out; 775 } 776 /* FIXME: This should be done in bi_end_io */ 777 mark_extents_written(BLK_LSEG2EXT(header->lseg), 778 page->index << PAGE_CACHE_SHIFT, 779 PAGE_CACHE_SIZE); 780 781 bio = bl_add_page_to_bio(bio, npg_zero, WRITE, 782 isect, page, be, 783 bl_end_io_write_zero, par); 784 if (IS_ERR(bio)) { 785 header->pnfs_error = PTR_ERR(bio); 786 bio = NULL; 787 goto out; 788 } 789 next_page: 790 isect += PAGE_CACHE_SECTORS; 791 extent_length -= PAGE_CACHE_SECTORS; 792 } 793 if (last) 794 goto write_done; 795 } 796 bio = bl_submit_bio(WRITE, bio); 797 798 /* Middle pages */ 799 pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT; 800 for (i = pg_index; i < wdata->pages.npages; i++) { 801 if (!extent_length) { 802 /* We've used up the previous extent */ 803 bl_put_extent(be); 804 bl_put_extent(cow_read); 805 bio = bl_submit_bio(WRITE, bio); 806 /* Get the next one */ 807 be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), 808 isect, &cow_read); 809 if (!be || !is_writable(be, isect)) { 810 header->pnfs_error = -EINVAL; 811 goto out; 812 } 813 if (be->be_state == PNFS_BLOCK_INVALID_DATA) { 814 if (likely(!bl_push_one_short_extent( 815 be->be_inval))) 816 par->bse_count++; 817 else { 818 header->pnfs_error = -ENOMEM; 819 goto out; 820 } 821 } 822 extent_length = be->be_length - 823 (isect - be->be_f_offset); 824 } 825 826 dprintk("%s offset %lld count %Zu\n", __func__, offset, count); 827 pg_offset = offset & ~PAGE_CACHE_MASK; 828 if (pg_offset + count > PAGE_CACHE_SIZE) 829 pg_len = PAGE_CACHE_SIZE - pg_offset; 830 else 831 pg_len = count; 832 833 saved_len = pg_len; 834 if (be->be_state == PNFS_BLOCK_INVALID_DATA && 835 !bl_is_sector_init(be->be_inval, isect)) { 836 ret = bl_read_partial_page_sync(pages[i], cow_read, 837 pg_offset, pg_len, true); 838 if (ret) { 839 dprintk("%s bl_read_partial_page_sync fail %d\n", 840 __func__, ret); 841 header->pnfs_error = ret; 842 goto out; 843 } 844 845 ret = bl_mark_sectors_init(be->be_inval, isect, 846 PAGE_CACHE_SECTORS); 847 if (unlikely(ret)) { 848 dprintk("%s bl_mark_sectors_init fail %d\n", 849 __func__, ret); 850 header->pnfs_error = ret; 851 goto out; 852 } 853 854 /* Expand to full page write */ 855 pg_offset = 0; 856 pg_len = PAGE_CACHE_SIZE; 857 } else if ((pg_offset & (SECTOR_SIZE - 1)) || 858 (pg_len & (SECTOR_SIZE - 1))){ 859 /* ahh, nasty case. We have to do sync full sector 860 * read-modify-write cycles. 861 */ 862 unsigned int saved_offset = pg_offset; 863 ret = bl_read_partial_page_sync(pages[i], be, pg_offset, 864 pg_len, false); 865 pg_offset = round_down(pg_offset, SECTOR_SIZE); 866 pg_len = round_up(saved_offset + pg_len, SECTOR_SIZE) 867 - pg_offset; 868 } 869 870 871 bio = do_add_page_to_bio(bio, wdata->pages.npages - i, WRITE, 872 isect, pages[i], be, 873 bl_end_io_write, par, 874 pg_offset, pg_len); 875 if (IS_ERR(bio)) { 876 header->pnfs_error = PTR_ERR(bio); 877 bio = NULL; 878 goto out; 879 } 880 offset += saved_len; 881 count -= saved_len; 882 isect += PAGE_CACHE_SECTORS; 883 last_isect = isect; 884 extent_length -= PAGE_CACHE_SECTORS; 885 } 886 887 /* Last page inside INVALID extent */ 888 if (be->be_state == PNFS_BLOCK_INVALID_DATA) { 889 bio = bl_submit_bio(WRITE, bio); 890 temp = last_isect >> PAGE_CACHE_SECTOR_SHIFT; 891 npg_zero = npg_per_block - do_div(temp, npg_per_block); 892 if (npg_zero < npg_per_block) { 893 last = 1; 894 goto fill_invalid_ext; 895 } 896 } 897 898 write_done: 899 wdata->res.count = wdata->args.count; 900 out: 901 bl_put_extent(be); 902 bl_put_extent(cow_read); 903 bl_submit_bio(WRITE, bio); 904 put_parallel(par); 905 return PNFS_ATTEMPTED; 906 out_mds: 907 bl_put_extent(be); 908 bl_put_extent(cow_read); 909 kfree(par); 910 return PNFS_NOT_ATTEMPTED; 911 } 912 913 /* FIXME - range ignored */ 914 static void 915 release_extents(struct pnfs_block_layout *bl, struct pnfs_layout_range *range) 916 { 917 int i; 918 struct pnfs_block_extent *be; 919 920 spin_lock(&bl->bl_ext_lock); 921 for (i = 0; i < EXTENT_LISTS; i++) { 922 while (!list_empty(&bl->bl_extents[i])) { 923 be = list_first_entry(&bl->bl_extents[i], 924 struct pnfs_block_extent, 925 be_node); 926 list_del(&be->be_node); 927 bl_put_extent(be); 928 } 929 } 930 spin_unlock(&bl->bl_ext_lock); 931 } 932 933 static void 934 release_inval_marks(struct pnfs_inval_markings *marks) 935 { 936 struct pnfs_inval_tracking *pos, *temp; 937 struct pnfs_block_short_extent *se, *stemp; 938 939 list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) { 940 list_del(&pos->it_link); 941 kfree(pos); 942 } 943 944 list_for_each_entry_safe(se, stemp, &marks->im_extents, bse_node) { 945 list_del(&se->bse_node); 946 kfree(se); 947 } 948 return; 949 } 950 951 static void bl_free_layout_hdr(struct pnfs_layout_hdr *lo) 952 { 953 struct pnfs_block_layout *bl = BLK_LO2EXT(lo); 954 955 dprintk("%s enter\n", __func__); 956 release_extents(bl, NULL); 957 release_inval_marks(&bl->bl_inval); 958 kfree(bl); 959 } 960 961 static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode, 962 gfp_t gfp_flags) 963 { 964 struct pnfs_block_layout *bl; 965 966 dprintk("%s enter\n", __func__); 967 bl = kzalloc(sizeof(*bl), gfp_flags); 968 if (!bl) 969 return NULL; 970 spin_lock_init(&bl->bl_ext_lock); 971 INIT_LIST_HEAD(&bl->bl_extents[0]); 972 INIT_LIST_HEAD(&bl->bl_extents[1]); 973 INIT_LIST_HEAD(&bl->bl_commit); 974 INIT_LIST_HEAD(&bl->bl_committing); 975 bl->bl_count = 0; 976 bl->bl_blocksize = NFS_SERVER(inode)->pnfs_blksize >> SECTOR_SHIFT; 977 BL_INIT_INVAL_MARKS(&bl->bl_inval, bl->bl_blocksize); 978 return &bl->bl_layout; 979 } 980 981 static void bl_free_lseg(struct pnfs_layout_segment *lseg) 982 { 983 dprintk("%s enter\n", __func__); 984 kfree(lseg); 985 } 986 987 /* We pretty much ignore lseg, and store all data layout wide, so we 988 * can correctly merge. 989 */ 990 static struct pnfs_layout_segment *bl_alloc_lseg(struct pnfs_layout_hdr *lo, 991 struct nfs4_layoutget_res *lgr, 992 gfp_t gfp_flags) 993 { 994 struct pnfs_layout_segment *lseg; 995 int status; 996 997 dprintk("%s enter\n", __func__); 998 lseg = kzalloc(sizeof(*lseg), gfp_flags); 999 if (!lseg) 1000 return ERR_PTR(-ENOMEM); 1001 status = nfs4_blk_process_layoutget(lo, lgr, gfp_flags); 1002 if (status) { 1003 /* We don't want to call the full-blown bl_free_lseg, 1004 * since on error extents were not touched. 1005 */ 1006 kfree(lseg); 1007 return ERR_PTR(status); 1008 } 1009 return lseg; 1010 } 1011 1012 static void 1013 bl_encode_layoutcommit(struct pnfs_layout_hdr *lo, struct xdr_stream *xdr, 1014 const struct nfs4_layoutcommit_args *arg) 1015 { 1016 dprintk("%s enter\n", __func__); 1017 encode_pnfs_block_layoutupdate(BLK_LO2EXT(lo), xdr, arg); 1018 } 1019 1020 static void 1021 bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata) 1022 { 1023 struct pnfs_layout_hdr *lo = NFS_I(lcdata->args.inode)->layout; 1024 1025 dprintk("%s enter\n", __func__); 1026 clean_pnfs_block_layoutupdate(BLK_LO2EXT(lo), &lcdata->args, lcdata->res.status); 1027 } 1028 1029 static void free_blk_mountid(struct block_mount_id *mid) 1030 { 1031 if (mid) { 1032 struct pnfs_block_dev *dev, *tmp; 1033 1034 /* No need to take bm_lock as we are last user freeing bm_devlist */ 1035 list_for_each_entry_safe(dev, tmp, &mid->bm_devlist, bm_node) { 1036 list_del(&dev->bm_node); 1037 bl_free_block_dev(dev); 1038 } 1039 kfree(mid); 1040 } 1041 } 1042 1043 /* This is mostly copied from the filelayout_get_device_info function. 1044 * It seems much of this should be at the generic pnfs level. 1045 */ 1046 static struct pnfs_block_dev * 1047 nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh, 1048 struct nfs4_deviceid *d_id) 1049 { 1050 struct pnfs_device *dev; 1051 struct pnfs_block_dev *rv; 1052 u32 max_resp_sz; 1053 int max_pages; 1054 struct page **pages = NULL; 1055 int i, rc; 1056 1057 /* 1058 * Use the session max response size as the basis for setting 1059 * GETDEVICEINFO's maxcount 1060 */ 1061 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; 1062 max_pages = nfs_page_array_len(0, max_resp_sz); 1063 dprintk("%s max_resp_sz %u max_pages %d\n", 1064 __func__, max_resp_sz, max_pages); 1065 1066 dev = kmalloc(sizeof(*dev), GFP_NOFS); 1067 if (!dev) { 1068 dprintk("%s kmalloc failed\n", __func__); 1069 return ERR_PTR(-ENOMEM); 1070 } 1071 1072 pages = kzalloc(max_pages * sizeof(struct page *), GFP_NOFS); 1073 if (pages == NULL) { 1074 kfree(dev); 1075 return ERR_PTR(-ENOMEM); 1076 } 1077 for (i = 0; i < max_pages; i++) { 1078 pages[i] = alloc_page(GFP_NOFS); 1079 if (!pages[i]) { 1080 rv = ERR_PTR(-ENOMEM); 1081 goto out_free; 1082 } 1083 } 1084 1085 memcpy(&dev->dev_id, d_id, sizeof(*d_id)); 1086 dev->layout_type = LAYOUT_BLOCK_VOLUME; 1087 dev->pages = pages; 1088 dev->pgbase = 0; 1089 dev->pglen = PAGE_SIZE * max_pages; 1090 dev->mincount = 0; 1091 1092 dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data); 1093 rc = nfs4_proc_getdeviceinfo(server, dev); 1094 dprintk("%s getdevice info returns %d\n", __func__, rc); 1095 if (rc) { 1096 rv = ERR_PTR(rc); 1097 goto out_free; 1098 } 1099 1100 rv = nfs4_blk_decode_device(server, dev); 1101 out_free: 1102 for (i = 0; i < max_pages; i++) 1103 __free_page(pages[i]); 1104 kfree(pages); 1105 kfree(dev); 1106 return rv; 1107 } 1108 1109 static int 1110 bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh) 1111 { 1112 struct block_mount_id *b_mt_id = NULL; 1113 struct pnfs_devicelist *dlist = NULL; 1114 struct pnfs_block_dev *bdev; 1115 LIST_HEAD(block_disklist); 1116 int status, i; 1117 1118 dprintk("%s enter\n", __func__); 1119 1120 if (server->pnfs_blksize == 0) { 1121 dprintk("%s Server did not return blksize\n", __func__); 1122 return -EINVAL; 1123 } 1124 b_mt_id = kzalloc(sizeof(struct block_mount_id), GFP_NOFS); 1125 if (!b_mt_id) { 1126 status = -ENOMEM; 1127 goto out_error; 1128 } 1129 /* Initialize nfs4 block layout mount id */ 1130 spin_lock_init(&b_mt_id->bm_lock); 1131 INIT_LIST_HEAD(&b_mt_id->bm_devlist); 1132 1133 dlist = kmalloc(sizeof(struct pnfs_devicelist), GFP_NOFS); 1134 if (!dlist) { 1135 status = -ENOMEM; 1136 goto out_error; 1137 } 1138 dlist->eof = 0; 1139 while (!dlist->eof) { 1140 status = nfs4_proc_getdevicelist(server, fh, dlist); 1141 if (status) 1142 goto out_error; 1143 dprintk("%s GETDEVICELIST numdevs=%i, eof=%i\n", 1144 __func__, dlist->num_devs, dlist->eof); 1145 for (i = 0; i < dlist->num_devs; i++) { 1146 bdev = nfs4_blk_get_deviceinfo(server, fh, 1147 &dlist->dev_id[i]); 1148 if (IS_ERR(bdev)) { 1149 status = PTR_ERR(bdev); 1150 goto out_error; 1151 } 1152 spin_lock(&b_mt_id->bm_lock); 1153 list_add(&bdev->bm_node, &b_mt_id->bm_devlist); 1154 spin_unlock(&b_mt_id->bm_lock); 1155 } 1156 } 1157 dprintk("%s SUCCESS\n", __func__); 1158 server->pnfs_ld_data = b_mt_id; 1159 1160 out_return: 1161 kfree(dlist); 1162 return status; 1163 1164 out_error: 1165 free_blk_mountid(b_mt_id); 1166 goto out_return; 1167 } 1168 1169 static int 1170 bl_clear_layoutdriver(struct nfs_server *server) 1171 { 1172 struct block_mount_id *b_mt_id = server->pnfs_ld_data; 1173 1174 dprintk("%s enter\n", __func__); 1175 free_blk_mountid(b_mt_id); 1176 dprintk("%s RETURNS\n", __func__); 1177 return 0; 1178 } 1179 1180 static bool 1181 is_aligned_req(struct nfs_page *req, unsigned int alignment) 1182 { 1183 return IS_ALIGNED(req->wb_offset, alignment) && 1184 IS_ALIGNED(req->wb_bytes, alignment); 1185 } 1186 1187 static void 1188 bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) 1189 { 1190 if (pgio->pg_dreq != NULL && 1191 !is_aligned_req(req, SECTOR_SIZE)) 1192 nfs_pageio_reset_read_mds(pgio); 1193 else 1194 pnfs_generic_pg_init_read(pgio, req); 1195 } 1196 1197 static bool 1198 bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, 1199 struct nfs_page *req) 1200 { 1201 if (pgio->pg_dreq != NULL && 1202 !is_aligned_req(req, SECTOR_SIZE)) 1203 return false; 1204 1205 return pnfs_generic_pg_test(pgio, prev, req); 1206 } 1207 1208 /* 1209 * Return the number of contiguous bytes for a given inode 1210 * starting at page frame idx. 1211 */ 1212 static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx) 1213 { 1214 struct address_space *mapping = inode->i_mapping; 1215 pgoff_t end; 1216 1217 /* Optimize common case that writes from 0 to end of file */ 1218 end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE); 1219 if (end != NFS_I(inode)->npages) { 1220 rcu_read_lock(); 1221 end = radix_tree_next_hole(&mapping->page_tree, idx + 1, ULONG_MAX); 1222 rcu_read_unlock(); 1223 } 1224 1225 if (!end) 1226 return i_size_read(inode) - (idx << PAGE_CACHE_SHIFT); 1227 else 1228 return (end - idx) << PAGE_CACHE_SHIFT; 1229 } 1230 1231 static void 1232 bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) 1233 { 1234 if (pgio->pg_dreq != NULL && 1235 !is_aligned_req(req, PAGE_CACHE_SIZE)) { 1236 nfs_pageio_reset_write_mds(pgio); 1237 } else { 1238 u64 wb_size; 1239 if (pgio->pg_dreq == NULL) 1240 wb_size = pnfs_num_cont_bytes(pgio->pg_inode, 1241 req->wb_index); 1242 else 1243 wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); 1244 1245 pnfs_generic_pg_init_write(pgio, req, wb_size); 1246 } 1247 } 1248 1249 static bool 1250 bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, 1251 struct nfs_page *req) 1252 { 1253 if (pgio->pg_dreq != NULL && 1254 !is_aligned_req(req, PAGE_CACHE_SIZE)) 1255 return false; 1256 1257 return pnfs_generic_pg_test(pgio, prev, req); 1258 } 1259 1260 static const struct nfs_pageio_ops bl_pg_read_ops = { 1261 .pg_init = bl_pg_init_read, 1262 .pg_test = bl_pg_test_read, 1263 .pg_doio = pnfs_generic_pg_readpages, 1264 }; 1265 1266 static const struct nfs_pageio_ops bl_pg_write_ops = { 1267 .pg_init = bl_pg_init_write, 1268 .pg_test = bl_pg_test_write, 1269 .pg_doio = pnfs_generic_pg_writepages, 1270 }; 1271 1272 static struct pnfs_layoutdriver_type blocklayout_type = { 1273 .id = LAYOUT_BLOCK_VOLUME, 1274 .name = "LAYOUT_BLOCK_VOLUME", 1275 .read_pagelist = bl_read_pagelist, 1276 .write_pagelist = bl_write_pagelist, 1277 .alloc_layout_hdr = bl_alloc_layout_hdr, 1278 .free_layout_hdr = bl_free_layout_hdr, 1279 .alloc_lseg = bl_alloc_lseg, 1280 .free_lseg = bl_free_lseg, 1281 .encode_layoutcommit = bl_encode_layoutcommit, 1282 .cleanup_layoutcommit = bl_cleanup_layoutcommit, 1283 .set_layoutdriver = bl_set_layoutdriver, 1284 .clear_layoutdriver = bl_clear_layoutdriver, 1285 .pg_read_ops = &bl_pg_read_ops, 1286 .pg_write_ops = &bl_pg_write_ops, 1287 }; 1288 1289 static const struct rpc_pipe_ops bl_upcall_ops = { 1290 .upcall = rpc_pipe_generic_upcall, 1291 .downcall = bl_pipe_downcall, 1292 .destroy_msg = bl_pipe_destroy_msg, 1293 }; 1294 1295 static struct dentry *nfs4blocklayout_register_sb(struct super_block *sb, 1296 struct rpc_pipe *pipe) 1297 { 1298 struct dentry *dir, *dentry; 1299 1300 dir = rpc_d_lookup_sb(sb, NFS_PIPE_DIRNAME); 1301 if (dir == NULL) 1302 return ERR_PTR(-ENOENT); 1303 dentry = rpc_mkpipe_dentry(dir, "blocklayout", NULL, pipe); 1304 dput(dir); 1305 return dentry; 1306 } 1307 1308 static void nfs4blocklayout_unregister_sb(struct super_block *sb, 1309 struct rpc_pipe *pipe) 1310 { 1311 if (pipe->dentry) 1312 rpc_unlink(pipe->dentry); 1313 } 1314 1315 static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, 1316 void *ptr) 1317 { 1318 struct super_block *sb = ptr; 1319 struct net *net = sb->s_fs_info; 1320 struct nfs_net *nn = net_generic(net, nfs_net_id); 1321 struct dentry *dentry; 1322 int ret = 0; 1323 1324 if (!try_module_get(THIS_MODULE)) 1325 return 0; 1326 1327 if (nn->bl_device_pipe == NULL) { 1328 module_put(THIS_MODULE); 1329 return 0; 1330 } 1331 1332 switch (event) { 1333 case RPC_PIPEFS_MOUNT: 1334 dentry = nfs4blocklayout_register_sb(sb, nn->bl_device_pipe); 1335 if (IS_ERR(dentry)) { 1336 ret = PTR_ERR(dentry); 1337 break; 1338 } 1339 nn->bl_device_pipe->dentry = dentry; 1340 break; 1341 case RPC_PIPEFS_UMOUNT: 1342 if (nn->bl_device_pipe->dentry) 1343 nfs4blocklayout_unregister_sb(sb, nn->bl_device_pipe); 1344 break; 1345 default: 1346 ret = -ENOTSUPP; 1347 break; 1348 } 1349 module_put(THIS_MODULE); 1350 return ret; 1351 } 1352 1353 static struct notifier_block nfs4blocklayout_block = { 1354 .notifier_call = rpc_pipefs_event, 1355 }; 1356 1357 static struct dentry *nfs4blocklayout_register_net(struct net *net, 1358 struct rpc_pipe *pipe) 1359 { 1360 struct super_block *pipefs_sb; 1361 struct dentry *dentry; 1362 1363 pipefs_sb = rpc_get_sb_net(net); 1364 if (!pipefs_sb) 1365 return NULL; 1366 dentry = nfs4blocklayout_register_sb(pipefs_sb, pipe); 1367 rpc_put_sb_net(net); 1368 return dentry; 1369 } 1370 1371 static void nfs4blocklayout_unregister_net(struct net *net, 1372 struct rpc_pipe *pipe) 1373 { 1374 struct super_block *pipefs_sb; 1375 1376 pipefs_sb = rpc_get_sb_net(net); 1377 if (pipefs_sb) { 1378 nfs4blocklayout_unregister_sb(pipefs_sb, pipe); 1379 rpc_put_sb_net(net); 1380 } 1381 } 1382 1383 static int nfs4blocklayout_net_init(struct net *net) 1384 { 1385 struct nfs_net *nn = net_generic(net, nfs_net_id); 1386 struct dentry *dentry; 1387 1388 init_waitqueue_head(&nn->bl_wq); 1389 nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0); 1390 if (IS_ERR(nn->bl_device_pipe)) 1391 return PTR_ERR(nn->bl_device_pipe); 1392 dentry = nfs4blocklayout_register_net(net, nn->bl_device_pipe); 1393 if (IS_ERR(dentry)) { 1394 rpc_destroy_pipe_data(nn->bl_device_pipe); 1395 return PTR_ERR(dentry); 1396 } 1397 nn->bl_device_pipe->dentry = dentry; 1398 return 0; 1399 } 1400 1401 static void nfs4blocklayout_net_exit(struct net *net) 1402 { 1403 struct nfs_net *nn = net_generic(net, nfs_net_id); 1404 1405 nfs4blocklayout_unregister_net(net, nn->bl_device_pipe); 1406 rpc_destroy_pipe_data(nn->bl_device_pipe); 1407 nn->bl_device_pipe = NULL; 1408 } 1409 1410 static struct pernet_operations nfs4blocklayout_net_ops = { 1411 .init = nfs4blocklayout_net_init, 1412 .exit = nfs4blocklayout_net_exit, 1413 }; 1414 1415 static int __init nfs4blocklayout_init(void) 1416 { 1417 int ret; 1418 1419 dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__); 1420 1421 ret = pnfs_register_layoutdriver(&blocklayout_type); 1422 if (ret) 1423 goto out; 1424 1425 ret = rpc_pipefs_notifier_register(&nfs4blocklayout_block); 1426 if (ret) 1427 goto out_remove; 1428 ret = register_pernet_subsys(&nfs4blocklayout_net_ops); 1429 if (ret) 1430 goto out_notifier; 1431 out: 1432 return ret; 1433 1434 out_notifier: 1435 rpc_pipefs_notifier_unregister(&nfs4blocklayout_block); 1436 out_remove: 1437 pnfs_unregister_layoutdriver(&blocklayout_type); 1438 return ret; 1439 } 1440 1441 static void __exit nfs4blocklayout_exit(void) 1442 { 1443 dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n", 1444 __func__); 1445 1446 rpc_pipefs_notifier_unregister(&nfs4blocklayout_block); 1447 unregister_pernet_subsys(&nfs4blocklayout_net_ops); 1448 pnfs_unregister_layoutdriver(&blocklayout_type); 1449 } 1450 1451 MODULE_ALIAS("nfs-layouttype4-3"); 1452 1453 module_init(nfs4blocklayout_init); 1454 module_exit(nfs4blocklayout_exit); 1455