1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * (C) 2001 Clemson University and The University of Chicago 4 * Copyright 2018 Omnibond Systems, L.L.C. 5 * 6 * See COPYING in top-level directory. 7 */ 8 9 /* 10 * Linux VFS inode operations. 11 */ 12 13 #include <linux/bvec.h> 14 #include "protocol.h" 15 #include "orangefs-kernel.h" 16 #include "orangefs-bufmap.h" 17 18 static int orangefs_writepage_locked(struct page *page, 19 struct writeback_control *wbc) 20 { 21 struct inode *inode = page->mapping->host; 22 struct orangefs_write_range *wr = NULL; 23 struct iov_iter iter; 24 struct bio_vec bv; 25 size_t len, wlen; 26 ssize_t ret; 27 loff_t off; 28 29 set_page_writeback(page); 30 31 len = i_size_read(inode); 32 if (PagePrivate(page)) { 33 wr = (struct orangefs_write_range *)page_private(page); 34 WARN_ON(wr->pos >= len); 35 off = wr->pos; 36 if (off + wr->len > len) 37 wlen = len - off; 38 else 39 wlen = wr->len; 40 } else { 41 WARN_ON(1); 42 off = page_offset(page); 43 if (off + PAGE_SIZE > len) 44 wlen = len - off; 45 else 46 wlen = PAGE_SIZE; 47 } 48 /* Should've been handled in orangefs_invalidatepage. */ 49 WARN_ON(off == len || off + wlen > len); 50 51 bv.bv_page = page; 52 bv.bv_len = wlen; 53 bv.bv_offset = off % PAGE_SIZE; 54 WARN_ON(wlen == 0); 55 iov_iter_bvec(&iter, WRITE, &bv, 1, wlen); 56 57 ret = wait_for_direct_io(ORANGEFS_IO_WRITE, inode, &off, &iter, wlen, 58 len, wr, NULL, NULL); 59 if (ret < 0) { 60 SetPageError(page); 61 mapping_set_error(page->mapping, ret); 62 } else { 63 ret = 0; 64 } 65 if (wr) { 66 kfree(wr); 67 set_page_private(page, 0); 68 ClearPagePrivate(page); 69 put_page(page); 70 } 71 return ret; 72 } 73 74 static int orangefs_writepage(struct page *page, struct writeback_control *wbc) 75 { 76 int ret; 77 ret = orangefs_writepage_locked(page, wbc); 78 unlock_page(page); 79 end_page_writeback(page); 80 return ret; 81 } 82 83 struct orangefs_writepages { 84 loff_t off; 85 size_t len; 86 kuid_t uid; 87 kgid_t gid; 88 int maxpages; 89 int npages; 90 struct page **pages; 91 struct bio_vec *bv; 92 }; 93 94 static int orangefs_writepages_work(struct orangefs_writepages *ow, 95 struct writeback_control *wbc) 96 { 97 struct inode *inode = ow->pages[0]->mapping->host; 98 struct orangefs_write_range *wrp, wr; 99 struct iov_iter iter; 100 ssize_t ret; 101 size_t len; 102 loff_t off; 103 int i; 104 105 len = i_size_read(inode); 106 107 for (i = 0; i < ow->npages; i++) { 108 set_page_writeback(ow->pages[i]); 109 ow->bv[i].bv_page = ow->pages[i]; 110 ow->bv[i].bv_len = min(page_offset(ow->pages[i]) + PAGE_SIZE, 111 ow->off + ow->len) - 112 max(ow->off, page_offset(ow->pages[i])); 113 if (i == 0) 114 ow->bv[i].bv_offset = ow->off - 115 page_offset(ow->pages[i]); 116 else 117 ow->bv[i].bv_offset = 0; 118 } 119 iov_iter_bvec(&iter, WRITE, ow->bv, ow->npages, ow->len); 120 121 WARN_ON(ow->off >= len); 122 if (ow->off + ow->len > len) 123 ow->len = len - ow->off; 124 125 off = ow->off; 126 wr.uid = ow->uid; 127 wr.gid = ow->gid; 128 ret = wait_for_direct_io(ORANGEFS_IO_WRITE, inode, &off, &iter, ow->len, 129 0, &wr, NULL, NULL); 130 if (ret < 0) { 131 for (i = 0; i < ow->npages; i++) { 132 SetPageError(ow->pages[i]); 133 mapping_set_error(ow->pages[i]->mapping, ret); 134 if (PagePrivate(ow->pages[i])) { 135 wrp = (struct orangefs_write_range *) 136 page_private(ow->pages[i]); 137 ClearPagePrivate(ow->pages[i]); 138 put_page(ow->pages[i]); 139 kfree(wrp); 140 } 141 end_page_writeback(ow->pages[i]); 142 unlock_page(ow->pages[i]); 143 } 144 } else { 145 ret = 0; 146 for (i = 0; i < ow->npages; i++) { 147 if (PagePrivate(ow->pages[i])) { 148 wrp = (struct orangefs_write_range *) 149 page_private(ow->pages[i]); 150 ClearPagePrivate(ow->pages[i]); 151 put_page(ow->pages[i]); 152 kfree(wrp); 153 } 154 end_page_writeback(ow->pages[i]); 155 unlock_page(ow->pages[i]); 156 } 157 } 158 return ret; 159 } 160 161 static int orangefs_writepages_callback(struct page *page, 162 struct writeback_control *wbc, void *data) 163 { 164 struct orangefs_writepages *ow = data; 165 struct orangefs_write_range *wr; 166 int ret; 167 168 if (!PagePrivate(page)) { 169 unlock_page(page); 170 /* It's not private so there's nothing to write, right? */ 171 printk("writepages_callback not private!\n"); 172 BUG(); 173 return 0; 174 } 175 wr = (struct orangefs_write_range *)page_private(page); 176 177 ret = -1; 178 if (ow->npages == 0) { 179 ow->off = wr->pos; 180 ow->len = wr->len; 181 ow->uid = wr->uid; 182 ow->gid = wr->gid; 183 ow->pages[ow->npages++] = page; 184 ret = 0; 185 goto done; 186 } 187 if (!uid_eq(ow->uid, wr->uid) || !gid_eq(ow->gid, wr->gid)) { 188 orangefs_writepages_work(ow, wbc); 189 ow->npages = 0; 190 ret = -1; 191 goto done; 192 } 193 if (ow->off + ow->len == wr->pos) { 194 ow->len += wr->len; 195 ow->pages[ow->npages++] = page; 196 ret = 0; 197 goto done; 198 } 199 done: 200 if (ret == -1) { 201 if (ow->npages) { 202 orangefs_writepages_work(ow, wbc); 203 ow->npages = 0; 204 } 205 ret = orangefs_writepage_locked(page, wbc); 206 mapping_set_error(page->mapping, ret); 207 unlock_page(page); 208 end_page_writeback(page); 209 } else { 210 if (ow->npages == ow->maxpages) { 211 orangefs_writepages_work(ow, wbc); 212 ow->npages = 0; 213 } 214 } 215 return ret; 216 } 217 218 static int orangefs_writepages(struct address_space *mapping, 219 struct writeback_control *wbc) 220 { 221 struct orangefs_writepages *ow; 222 struct blk_plug plug; 223 int ret; 224 ow = kzalloc(sizeof(struct orangefs_writepages), GFP_KERNEL); 225 if (!ow) 226 return -ENOMEM; 227 ow->maxpages = orangefs_bufmap_size_query()/PAGE_SIZE; 228 ow->pages = kcalloc(ow->maxpages, sizeof(struct page *), GFP_KERNEL); 229 if (!ow->pages) { 230 kfree(ow); 231 return -ENOMEM; 232 } 233 ow->bv = kcalloc(ow->maxpages, sizeof(struct bio_vec), GFP_KERNEL); 234 if (!ow->bv) { 235 kfree(ow->pages); 236 kfree(ow); 237 return -ENOMEM; 238 } 239 blk_start_plug(&plug); 240 ret = write_cache_pages(mapping, wbc, orangefs_writepages_callback, ow); 241 if (ow->npages) 242 ret = orangefs_writepages_work(ow, wbc); 243 blk_finish_plug(&plug); 244 kfree(ow->pages); 245 kfree(ow->bv); 246 kfree(ow); 247 return ret; 248 } 249 250 static int orangefs_launder_page(struct page *); 251 252 static int orangefs_readpage(struct file *file, struct page *page) 253 { 254 struct inode *inode = page->mapping->host; 255 struct iov_iter iter; 256 struct bio_vec bv; 257 ssize_t ret; 258 loff_t off; /* offset into this page */ 259 pgoff_t index; /* which page */ 260 struct page *next_page; 261 char *kaddr; 262 loff_t read_size; 263 int buffer_index = -1; /* orangefs shared memory slot */ 264 int slot_index; /* index into slot */ 265 int remaining; 266 267 /* 268 * Get up to this many bytes from Orangefs at a time and try 269 * to fill them into the page cache at once. Tests with dd made 270 * this seem like a reasonable static number, if there was 271 * interest perhaps this number could be made setable through 272 * sysfs... 273 */ 274 read_size = 524288; 275 276 if (PageDirty(page)) 277 orangefs_launder_page(page); 278 279 off = page_offset(page); 280 index = off >> PAGE_SHIFT; 281 bv.bv_page = page; 282 bv.bv_len = PAGE_SIZE; 283 bv.bv_offset = 0; 284 iov_iter_bvec(&iter, READ, &bv, 1, PAGE_SIZE); 285 286 ret = wait_for_direct_io(ORANGEFS_IO_READ, inode, &off, &iter, 287 read_size, inode->i_size, NULL, &buffer_index, file); 288 remaining = ret; 289 /* this will only zero remaining unread portions of the page data */ 290 iov_iter_zero(~0U, &iter); 291 /* takes care of potential aliasing */ 292 flush_dcache_page(page); 293 if (ret < 0) { 294 SetPageError(page); 295 unlock_page(page); 296 goto out; 297 } else { 298 SetPageUptodate(page); 299 if (PageError(page)) 300 ClearPageError(page); 301 ret = 0; 302 } 303 /* unlock the page after the ->readpage() routine completes */ 304 unlock_page(page); 305 306 if (remaining > PAGE_SIZE) { 307 slot_index = 0; 308 while ((remaining - PAGE_SIZE) >= PAGE_SIZE) { 309 remaining -= PAGE_SIZE; 310 /* 311 * It is an optimization to try and fill more than one 312 * page... by now we've already gotten the single 313 * page we were after, if stuff doesn't seem to 314 * be going our way at this point just return 315 * and hope for the best. 316 * 317 * If we look for pages and they're already there is 318 * one reason to give up, and if they're not there 319 * and we can't create them is another reason. 320 */ 321 322 index++; 323 slot_index++; 324 next_page = find_get_page(inode->i_mapping, index); 325 if (next_page) { 326 gossip_debug(GOSSIP_FILE_DEBUG, 327 "%s: found next page, quitting\n", 328 __func__); 329 put_page(next_page); 330 goto out; 331 } 332 next_page = find_or_create_page(inode->i_mapping, 333 index, 334 GFP_KERNEL); 335 /* 336 * I've never hit this, leave it as a printk for 337 * now so it will be obvious. 338 */ 339 if (!next_page) { 340 printk("%s: can't create next page, quitting\n", 341 __func__); 342 goto out; 343 } 344 kaddr = kmap_atomic(next_page); 345 orangefs_bufmap_page_fill(kaddr, 346 buffer_index, 347 slot_index); 348 kunmap_atomic(kaddr); 349 SetPageUptodate(next_page); 350 unlock_page(next_page); 351 put_page(next_page); 352 } 353 } 354 355 out: 356 if (buffer_index != -1) 357 orangefs_bufmap_put(buffer_index); 358 return ret; 359 } 360 361 static int orangefs_write_begin(struct file *file, 362 struct address_space *mapping, 363 loff_t pos, unsigned len, unsigned flags, struct page **pagep, 364 void **fsdata) 365 { 366 struct orangefs_write_range *wr; 367 struct page *page; 368 pgoff_t index; 369 int ret; 370 371 index = pos >> PAGE_SHIFT; 372 373 page = grab_cache_page_write_begin(mapping, index, flags); 374 if (!page) 375 return -ENOMEM; 376 377 *pagep = page; 378 379 if (PageDirty(page) && !PagePrivate(page)) { 380 /* 381 * Should be impossible. If it happens, launder the page 382 * since we don't know what's dirty. This will WARN in 383 * orangefs_writepage_locked. 384 */ 385 ret = orangefs_launder_page(page); 386 if (ret) 387 return ret; 388 } 389 if (PagePrivate(page)) { 390 struct orangefs_write_range *wr; 391 wr = (struct orangefs_write_range *)page_private(page); 392 if (wr->pos + wr->len == pos && 393 uid_eq(wr->uid, current_fsuid()) && 394 gid_eq(wr->gid, current_fsgid())) { 395 wr->len += len; 396 goto okay; 397 } else { 398 ret = orangefs_launder_page(page); 399 if (ret) 400 return ret; 401 } 402 } 403 404 wr = kmalloc(sizeof *wr, GFP_KERNEL); 405 if (!wr) 406 return -ENOMEM; 407 408 wr->pos = pos; 409 wr->len = len; 410 wr->uid = current_fsuid(); 411 wr->gid = current_fsgid(); 412 SetPagePrivate(page); 413 set_page_private(page, (unsigned long)wr); 414 get_page(page); 415 okay: 416 return 0; 417 } 418 419 static int orangefs_write_end(struct file *file, struct address_space *mapping, 420 loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) 421 { 422 struct inode *inode = page->mapping->host; 423 loff_t last_pos = pos + copied; 424 425 /* 426 * No need to use i_size_read() here, the i_size 427 * cannot change under us because we hold the i_mutex. 428 */ 429 if (last_pos > inode->i_size) 430 i_size_write(inode, last_pos); 431 432 /* zero the stale part of the page if we did a short copy */ 433 if (!PageUptodate(page)) { 434 unsigned from = pos & (PAGE_SIZE - 1); 435 if (copied < len) { 436 zero_user(page, from + copied, len - copied); 437 } 438 /* Set fully written pages uptodate. */ 439 if (pos == page_offset(page) && 440 (len == PAGE_SIZE || pos + len == inode->i_size)) { 441 zero_user_segment(page, from + copied, PAGE_SIZE); 442 SetPageUptodate(page); 443 } 444 } 445 446 set_page_dirty(page); 447 unlock_page(page); 448 put_page(page); 449 450 mark_inode_dirty_sync(file_inode(file)); 451 return copied; 452 } 453 454 static void orangefs_invalidatepage(struct page *page, 455 unsigned int offset, 456 unsigned int length) 457 { 458 struct orangefs_write_range *wr; 459 wr = (struct orangefs_write_range *)page_private(page); 460 461 if (offset == 0 && length == PAGE_SIZE) { 462 kfree((struct orangefs_write_range *)page_private(page)); 463 set_page_private(page, 0); 464 ClearPagePrivate(page); 465 put_page(page); 466 return; 467 /* write range entirely within invalidate range (or equal) */ 468 } else if (page_offset(page) + offset <= wr->pos && 469 wr->pos + wr->len <= page_offset(page) + offset + length) { 470 kfree((struct orangefs_write_range *)page_private(page)); 471 set_page_private(page, 0); 472 ClearPagePrivate(page); 473 put_page(page); 474 /* XXX is this right? only caller in fs */ 475 cancel_dirty_page(page); 476 return; 477 /* invalidate range chops off end of write range */ 478 } else if (wr->pos < page_offset(page) + offset && 479 wr->pos + wr->len <= page_offset(page) + offset + length && 480 page_offset(page) + offset < wr->pos + wr->len) { 481 size_t x; 482 x = wr->pos + wr->len - (page_offset(page) + offset); 483 WARN_ON(x > wr->len); 484 wr->len -= x; 485 wr->uid = current_fsuid(); 486 wr->gid = current_fsgid(); 487 /* invalidate range chops off beginning of write range */ 488 } else if (page_offset(page) + offset <= wr->pos && 489 page_offset(page) + offset + length < wr->pos + wr->len && 490 wr->pos < page_offset(page) + offset + length) { 491 size_t x; 492 x = page_offset(page) + offset + length - wr->pos; 493 WARN_ON(x > wr->len); 494 wr->pos += x; 495 wr->len -= x; 496 wr->uid = current_fsuid(); 497 wr->gid = current_fsgid(); 498 /* invalidate range entirely within write range (punch hole) */ 499 } else if (wr->pos < page_offset(page) + offset && 500 page_offset(page) + offset + length < wr->pos + wr->len) { 501 /* XXX what do we do here... should not WARN_ON */ 502 WARN_ON(1); 503 /* punch hole */ 504 /* 505 * should we just ignore this and write it out anyway? 506 * it hardly makes sense 507 */ 508 return; 509 /* non-overlapping ranges */ 510 } else { 511 /* WARN if they do overlap */ 512 if (!((page_offset(page) + offset + length <= wr->pos) ^ 513 (wr->pos + wr->len <= page_offset(page) + offset))) { 514 WARN_ON(1); 515 printk("invalidate range offset %llu length %u\n", 516 page_offset(page) + offset, length); 517 printk("write range offset %llu length %zu\n", 518 wr->pos, wr->len); 519 } 520 return; 521 } 522 523 /* 524 * Above there are returns where wr is freed or where we WARN. 525 * Thus the following runs if wr was modified above. 526 */ 527 528 orangefs_launder_page(page); 529 } 530 531 static int orangefs_releasepage(struct page *page, gfp_t foo) 532 { 533 return !PagePrivate(page); 534 } 535 536 static void orangefs_freepage(struct page *page) 537 { 538 if (PagePrivate(page)) { 539 kfree((struct orangefs_write_range *)page_private(page)); 540 set_page_private(page, 0); 541 ClearPagePrivate(page); 542 put_page(page); 543 } 544 } 545 546 static int orangefs_launder_page(struct page *page) 547 { 548 int r = 0; 549 struct writeback_control wbc = { 550 .sync_mode = WB_SYNC_ALL, 551 .nr_to_write = 0, 552 }; 553 wait_on_page_writeback(page); 554 if (clear_page_dirty_for_io(page)) { 555 r = orangefs_writepage_locked(page, &wbc); 556 end_page_writeback(page); 557 } 558 return r; 559 } 560 561 static ssize_t orangefs_direct_IO(struct kiocb *iocb, 562 struct iov_iter *iter) 563 { 564 /* 565 * Comment from original do_readv_writev: 566 * Common entry point for read/write/readv/writev 567 * This function will dispatch it to either the direct I/O 568 * or buffered I/O path depending on the mount options and/or 569 * augmented/extended metadata attached to the file. 570 * Note: File extended attributes override any mount options. 571 */ 572 struct file *file = iocb->ki_filp; 573 loff_t pos = iocb->ki_pos; 574 enum ORANGEFS_io_type type = iov_iter_rw(iter) == WRITE ? 575 ORANGEFS_IO_WRITE : ORANGEFS_IO_READ; 576 loff_t *offset = &pos; 577 struct inode *inode = file->f_mapping->host; 578 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); 579 struct orangefs_khandle *handle = &orangefs_inode->refn.khandle; 580 size_t count = iov_iter_count(iter); 581 ssize_t total_count = 0; 582 ssize_t ret = -EINVAL; 583 int i = 0; 584 585 gossip_debug(GOSSIP_FILE_DEBUG, 586 "%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n", 587 __func__, 588 handle, 589 (int)count); 590 591 if (type == ORANGEFS_IO_WRITE) { 592 gossip_debug(GOSSIP_FILE_DEBUG, 593 "%s(%pU): proceeding with offset : %llu, " 594 "size %d\n", 595 __func__, 596 handle, 597 llu(*offset), 598 (int)count); 599 } 600 601 if (count == 0) { 602 ret = 0; 603 goto out; 604 } 605 606 while (iov_iter_count(iter)) { 607 size_t each_count = iov_iter_count(iter); 608 size_t amt_complete; 609 i++; 610 611 /* how much to transfer in this loop iteration */ 612 if (each_count > orangefs_bufmap_size_query()) 613 each_count = orangefs_bufmap_size_query(); 614 615 gossip_debug(GOSSIP_FILE_DEBUG, 616 "%s(%pU): size of each_count(%d)\n", 617 __func__, 618 handle, 619 (int)each_count); 620 gossip_debug(GOSSIP_FILE_DEBUG, 621 "%s(%pU): BEFORE wait_for_io: offset is %d\n", 622 __func__, 623 handle, 624 (int)*offset); 625 626 ret = wait_for_direct_io(type, inode, offset, iter, 627 each_count, 0, NULL, NULL, file); 628 gossip_debug(GOSSIP_FILE_DEBUG, 629 "%s(%pU): return from wait_for_io:%d\n", 630 __func__, 631 handle, 632 (int)ret); 633 634 if (ret < 0) 635 goto out; 636 637 *offset += ret; 638 total_count += ret; 639 amt_complete = ret; 640 641 gossip_debug(GOSSIP_FILE_DEBUG, 642 "%s(%pU): AFTER wait_for_io: offset is %d\n", 643 __func__, 644 handle, 645 (int)*offset); 646 647 /* 648 * if we got a short I/O operations, 649 * fall out and return what we got so far 650 */ 651 if (amt_complete < each_count) 652 break; 653 } /*end while */ 654 655 out: 656 if (total_count > 0) 657 ret = total_count; 658 if (ret > 0) { 659 if (type == ORANGEFS_IO_READ) { 660 file_accessed(file); 661 } else { 662 file_update_time(file); 663 if (*offset > i_size_read(inode)) 664 i_size_write(inode, *offset); 665 } 666 } 667 668 gossip_debug(GOSSIP_FILE_DEBUG, 669 "%s(%pU): Value(%d) returned.\n", 670 __func__, 671 handle, 672 (int)ret); 673 674 return ret; 675 } 676 677 /** ORANGEFS2 implementation of address space operations */ 678 static const struct address_space_operations orangefs_address_operations = { 679 .writepage = orangefs_writepage, 680 .readpage = orangefs_readpage, 681 .writepages = orangefs_writepages, 682 .set_page_dirty = __set_page_dirty_nobuffers, 683 .write_begin = orangefs_write_begin, 684 .write_end = orangefs_write_end, 685 .invalidatepage = orangefs_invalidatepage, 686 .releasepage = orangefs_releasepage, 687 .freepage = orangefs_freepage, 688 .launder_page = orangefs_launder_page, 689 .direct_IO = orangefs_direct_IO, 690 }; 691 692 vm_fault_t orangefs_page_mkwrite(struct vm_fault *vmf) 693 { 694 struct page *page = vmf->page; 695 struct inode *inode = file_inode(vmf->vma->vm_file); 696 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); 697 unsigned long *bitlock = &orangefs_inode->bitlock; 698 vm_fault_t ret; 699 struct orangefs_write_range *wr; 700 701 sb_start_pagefault(inode->i_sb); 702 703 if (wait_on_bit(bitlock, 1, TASK_KILLABLE)) { 704 ret = VM_FAULT_RETRY; 705 goto out; 706 } 707 708 lock_page(page); 709 if (PageDirty(page) && !PagePrivate(page)) { 710 /* 711 * Should be impossible. If it happens, launder the page 712 * since we don't know what's dirty. This will WARN in 713 * orangefs_writepage_locked. 714 */ 715 if (orangefs_launder_page(page)) { 716 ret = VM_FAULT_LOCKED|VM_FAULT_RETRY; 717 goto out; 718 } 719 } 720 if (PagePrivate(page)) { 721 wr = (struct orangefs_write_range *)page_private(page); 722 if (uid_eq(wr->uid, current_fsuid()) && 723 gid_eq(wr->gid, current_fsgid())) { 724 wr->pos = page_offset(page); 725 wr->len = PAGE_SIZE; 726 goto okay; 727 } else { 728 if (orangefs_launder_page(page)) { 729 ret = VM_FAULT_LOCKED|VM_FAULT_RETRY; 730 goto out; 731 } 732 } 733 } 734 wr = kmalloc(sizeof *wr, GFP_KERNEL); 735 if (!wr) { 736 ret = VM_FAULT_LOCKED|VM_FAULT_RETRY; 737 goto out; 738 } 739 wr->pos = page_offset(page); 740 wr->len = PAGE_SIZE; 741 wr->uid = current_fsuid(); 742 wr->gid = current_fsgid(); 743 SetPagePrivate(page); 744 set_page_private(page, (unsigned long)wr); 745 get_page(page); 746 okay: 747 748 file_update_time(vmf->vma->vm_file); 749 if (page->mapping != inode->i_mapping) { 750 unlock_page(page); 751 ret = VM_FAULT_LOCKED|VM_FAULT_NOPAGE; 752 goto out; 753 } 754 755 /* 756 * We mark the page dirty already here so that when freeze is in 757 * progress, we are guaranteed that writeback during freezing will 758 * see the dirty page and writeprotect it again. 759 */ 760 set_page_dirty(page); 761 wait_for_stable_page(page); 762 ret = VM_FAULT_LOCKED; 763 out: 764 sb_end_pagefault(inode->i_sb); 765 return ret; 766 } 767 768 static int orangefs_setattr_size(struct inode *inode, struct iattr *iattr) 769 { 770 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); 771 struct orangefs_kernel_op_s *new_op; 772 loff_t orig_size; 773 int ret = -EINVAL; 774 775 gossip_debug(GOSSIP_INODE_DEBUG, 776 "%s: %pU: Handle is %pU | fs_id %d | size is %llu\n", 777 __func__, 778 get_khandle_from_ino(inode), 779 &orangefs_inode->refn.khandle, 780 orangefs_inode->refn.fs_id, 781 iattr->ia_size); 782 783 /* Ensure that we have a up to date size, so we know if it changed. */ 784 ret = orangefs_inode_getattr(inode, ORANGEFS_GETATTR_SIZE); 785 if (ret == -ESTALE) 786 ret = -EIO; 787 if (ret) { 788 gossip_err("%s: orangefs_inode_getattr failed, ret:%d:.\n", 789 __func__, ret); 790 return ret; 791 } 792 orig_size = i_size_read(inode); 793 794 /* This is truncate_setsize in a different order. */ 795 truncate_pagecache(inode, iattr->ia_size); 796 i_size_write(inode, iattr->ia_size); 797 if (iattr->ia_size > orig_size) 798 pagecache_isize_extended(inode, orig_size, iattr->ia_size); 799 800 new_op = op_alloc(ORANGEFS_VFS_OP_TRUNCATE); 801 if (!new_op) 802 return -ENOMEM; 803 804 new_op->upcall.req.truncate.refn = orangefs_inode->refn; 805 new_op->upcall.req.truncate.size = (__s64) iattr->ia_size; 806 807 ret = service_operation(new_op, 808 __func__, 809 get_interruptible_flag(inode)); 810 811 /* 812 * the truncate has no downcall members to retrieve, but 813 * the status value tells us if it went through ok or not 814 */ 815 gossip_debug(GOSSIP_INODE_DEBUG, "%s: ret:%d:\n", __func__, ret); 816 817 op_release(new_op); 818 819 if (ret != 0) 820 return ret; 821 822 if (orig_size != i_size_read(inode)) 823 iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME; 824 825 return ret; 826 } 827 828 int __orangefs_setattr(struct inode *inode, struct iattr *iattr) 829 { 830 int ret; 831 832 if (iattr->ia_valid & ATTR_MODE) { 833 if (iattr->ia_mode & (S_ISVTX)) { 834 if (is_root_handle(inode)) { 835 /* 836 * allow sticky bit to be set on root (since 837 * it shows up that way by default anyhow), 838 * but don't show it to the server 839 */ 840 iattr->ia_mode -= S_ISVTX; 841 } else { 842 gossip_debug(GOSSIP_UTILS_DEBUG, 843 "User attempted to set sticky bit on non-root directory; returning EINVAL.\n"); 844 ret = -EINVAL; 845 goto out; 846 } 847 } 848 if (iattr->ia_mode & (S_ISUID)) { 849 gossip_debug(GOSSIP_UTILS_DEBUG, 850 "Attempting to set setuid bit (not supported); returning EINVAL.\n"); 851 ret = -EINVAL; 852 goto out; 853 } 854 } 855 856 if (iattr->ia_valid & ATTR_SIZE) { 857 ret = orangefs_setattr_size(inode, iattr); 858 if (ret) 859 goto out; 860 } 861 862 again: 863 spin_lock(&inode->i_lock); 864 if (ORANGEFS_I(inode)->attr_valid) { 865 if (uid_eq(ORANGEFS_I(inode)->attr_uid, current_fsuid()) && 866 gid_eq(ORANGEFS_I(inode)->attr_gid, current_fsgid())) { 867 ORANGEFS_I(inode)->attr_valid = iattr->ia_valid; 868 } else { 869 spin_unlock(&inode->i_lock); 870 write_inode_now(inode, 1); 871 goto again; 872 } 873 } else { 874 ORANGEFS_I(inode)->attr_valid = iattr->ia_valid; 875 ORANGEFS_I(inode)->attr_uid = current_fsuid(); 876 ORANGEFS_I(inode)->attr_gid = current_fsgid(); 877 } 878 setattr_copy(inode, iattr); 879 spin_unlock(&inode->i_lock); 880 mark_inode_dirty(inode); 881 882 if (iattr->ia_valid & ATTR_MODE) 883 /* change mod on a file that has ACLs */ 884 ret = posix_acl_chmod(inode, inode->i_mode); 885 886 ret = 0; 887 out: 888 return ret; 889 } 890 891 /* 892 * Change attributes of an object referenced by dentry. 893 */ 894 int orangefs_setattr(struct dentry *dentry, struct iattr *iattr) 895 { 896 int ret; 897 gossip_debug(GOSSIP_INODE_DEBUG, "__orangefs_setattr: called on %pd\n", 898 dentry); 899 ret = setattr_prepare(dentry, iattr); 900 if (ret) 901 goto out; 902 ret = __orangefs_setattr(d_inode(dentry), iattr); 903 sync_inode_metadata(d_inode(dentry), 1); 904 out: 905 gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_setattr: returning %d\n", 906 ret); 907 return ret; 908 } 909 910 /* 911 * Obtain attributes of an object given a dentry 912 */ 913 int orangefs_getattr(const struct path *path, struct kstat *stat, 914 u32 request_mask, unsigned int flags) 915 { 916 int ret; 917 struct inode *inode = path->dentry->d_inode; 918 919 gossip_debug(GOSSIP_INODE_DEBUG, 920 "orangefs_getattr: called on %pd mask %u\n", 921 path->dentry, request_mask); 922 923 ret = orangefs_inode_getattr(inode, 924 request_mask & STATX_SIZE ? ORANGEFS_GETATTR_SIZE : 0); 925 if (ret == 0) { 926 generic_fillattr(inode, stat); 927 928 /* override block size reported to stat */ 929 if (!(request_mask & STATX_SIZE)) 930 stat->result_mask &= ~STATX_SIZE; 931 932 stat->attributes_mask = STATX_ATTR_IMMUTABLE | 933 STATX_ATTR_APPEND; 934 if (inode->i_flags & S_IMMUTABLE) 935 stat->attributes |= STATX_ATTR_IMMUTABLE; 936 if (inode->i_flags & S_APPEND) 937 stat->attributes |= STATX_ATTR_APPEND; 938 } 939 return ret; 940 } 941 942 int orangefs_permission(struct inode *inode, int mask) 943 { 944 int ret; 945 946 if (mask & MAY_NOT_BLOCK) 947 return -ECHILD; 948 949 gossip_debug(GOSSIP_INODE_DEBUG, "%s: refreshing\n", __func__); 950 951 /* Make sure the permission (and other common attrs) are up to date. */ 952 ret = orangefs_inode_getattr(inode, 0); 953 if (ret < 0) 954 return ret; 955 956 return generic_permission(inode, mask); 957 } 958 959 int orangefs_update_time(struct inode *inode, struct timespec64 *time, int flags) 960 { 961 struct iattr iattr; 962 gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_update_time: %pU\n", 963 get_khandle_from_ino(inode)); 964 generic_update_time(inode, time, flags); 965 memset(&iattr, 0, sizeof iattr); 966 if (flags & S_ATIME) 967 iattr.ia_valid |= ATTR_ATIME; 968 if (flags & S_CTIME) 969 iattr.ia_valid |= ATTR_CTIME; 970 if (flags & S_MTIME) 971 iattr.ia_valid |= ATTR_MTIME; 972 return __orangefs_setattr(inode, &iattr); 973 } 974 975 /* ORANGEFS2 implementation of VFS inode operations for files */ 976 static const struct inode_operations orangefs_file_inode_operations = { 977 .get_acl = orangefs_get_acl, 978 .set_acl = orangefs_set_acl, 979 .setattr = orangefs_setattr, 980 .getattr = orangefs_getattr, 981 .listxattr = orangefs_listxattr, 982 .permission = orangefs_permission, 983 .update_time = orangefs_update_time, 984 }; 985 986 static int orangefs_init_iops(struct inode *inode) 987 { 988 inode->i_mapping->a_ops = &orangefs_address_operations; 989 990 switch (inode->i_mode & S_IFMT) { 991 case S_IFREG: 992 inode->i_op = &orangefs_file_inode_operations; 993 inode->i_fop = &orangefs_file_operations; 994 break; 995 case S_IFLNK: 996 inode->i_op = &orangefs_symlink_inode_operations; 997 break; 998 case S_IFDIR: 999 inode->i_op = &orangefs_dir_inode_operations; 1000 inode->i_fop = &orangefs_dir_operations; 1001 break; 1002 default: 1003 gossip_debug(GOSSIP_INODE_DEBUG, 1004 "%s: unsupported mode\n", 1005 __func__); 1006 return -EINVAL; 1007 } 1008 1009 return 0; 1010 } 1011 1012 /* 1013 * Given an ORANGEFS object identifier (fsid, handle), convert it into 1014 * a ino_t type that will be used as a hash-index from where the handle will 1015 * be searched for in the VFS hash table of inodes. 1016 */ 1017 static inline ino_t orangefs_handle_hash(struct orangefs_object_kref *ref) 1018 { 1019 if (!ref) 1020 return 0; 1021 return orangefs_khandle_to_ino(&(ref->khandle)); 1022 } 1023 1024 /* 1025 * Called to set up an inode from iget5_locked. 1026 */ 1027 static int orangefs_set_inode(struct inode *inode, void *data) 1028 { 1029 struct orangefs_object_kref *ref = (struct orangefs_object_kref *) data; 1030 ORANGEFS_I(inode)->refn.fs_id = ref->fs_id; 1031 ORANGEFS_I(inode)->refn.khandle = ref->khandle; 1032 ORANGEFS_I(inode)->attr_valid = 0; 1033 hash_init(ORANGEFS_I(inode)->xattr_cache); 1034 ORANGEFS_I(inode)->mapping_time = jiffies - 1; 1035 ORANGEFS_I(inode)->bitlock = 0; 1036 return 0; 1037 } 1038 1039 /* 1040 * Called to determine if handles match. 1041 */ 1042 static int orangefs_test_inode(struct inode *inode, void *data) 1043 { 1044 struct orangefs_object_kref *ref = (struct orangefs_object_kref *) data; 1045 struct orangefs_inode_s *orangefs_inode = NULL; 1046 1047 orangefs_inode = ORANGEFS_I(inode); 1048 /* test handles and fs_ids... */ 1049 return (!ORANGEFS_khandle_cmp(&(orangefs_inode->refn.khandle), 1050 &(ref->khandle)) && 1051 orangefs_inode->refn.fs_id == ref->fs_id); 1052 } 1053 1054 /* 1055 * Front-end to lookup the inode-cache maintained by the VFS using the ORANGEFS 1056 * file handle. 1057 * 1058 * @sb: the file system super block instance. 1059 * @ref: The ORANGEFS object for which we are trying to locate an inode. 1060 */ 1061 struct inode *orangefs_iget(struct super_block *sb, 1062 struct orangefs_object_kref *ref) 1063 { 1064 struct inode *inode = NULL; 1065 unsigned long hash; 1066 int error; 1067 1068 hash = orangefs_handle_hash(ref); 1069 inode = iget5_locked(sb, 1070 hash, 1071 orangefs_test_inode, 1072 orangefs_set_inode, 1073 ref); 1074 1075 if (!inode) 1076 return ERR_PTR(-ENOMEM); 1077 1078 if (!(inode->i_state & I_NEW)) 1079 return inode; 1080 1081 error = orangefs_inode_getattr(inode, ORANGEFS_GETATTR_NEW); 1082 if (error) { 1083 iget_failed(inode); 1084 return ERR_PTR(error); 1085 } 1086 1087 inode->i_ino = hash; /* needed for stat etc */ 1088 orangefs_init_iops(inode); 1089 unlock_new_inode(inode); 1090 1091 gossip_debug(GOSSIP_INODE_DEBUG, 1092 "iget handle %pU, fsid %d hash %ld i_ino %lu\n", 1093 &ref->khandle, 1094 ref->fs_id, 1095 hash, 1096 inode->i_ino); 1097 1098 return inode; 1099 } 1100 1101 /* 1102 * Allocate an inode for a newly created file and insert it into the inode hash. 1103 */ 1104 struct inode *orangefs_new_inode(struct super_block *sb, struct inode *dir, 1105 int mode, dev_t dev, struct orangefs_object_kref *ref) 1106 { 1107 unsigned long hash = orangefs_handle_hash(ref); 1108 struct inode *inode; 1109 int error; 1110 1111 gossip_debug(GOSSIP_INODE_DEBUG, 1112 "%s:(sb is %p | MAJOR(dev)=%u | MINOR(dev)=%u mode=%o)\n", 1113 __func__, 1114 sb, 1115 MAJOR(dev), 1116 MINOR(dev), 1117 mode); 1118 1119 inode = new_inode(sb); 1120 if (!inode) 1121 return ERR_PTR(-ENOMEM); 1122 1123 orangefs_set_inode(inode, ref); 1124 inode->i_ino = hash; /* needed for stat etc */ 1125 1126 error = orangefs_inode_getattr(inode, ORANGEFS_GETATTR_NEW); 1127 if (error) 1128 goto out_iput; 1129 1130 orangefs_init_iops(inode); 1131 inode->i_rdev = dev; 1132 1133 error = insert_inode_locked4(inode, hash, orangefs_test_inode, ref); 1134 if (error < 0) 1135 goto out_iput; 1136 1137 gossip_debug(GOSSIP_INODE_DEBUG, 1138 "Initializing ACL's for inode %pU\n", 1139 get_khandle_from_ino(inode)); 1140 orangefs_init_acl(inode, dir); 1141 return inode; 1142 1143 out_iput: 1144 iput(inode); 1145 return ERR_PTR(error); 1146 } 1147