1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * inode.c - NILFS inode operations. 4 * 5 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. 6 * 7 * Written by Ryusuke Konishi. 8 * 9 */ 10 11 #include <linux/buffer_head.h> 12 #include <linux/gfp.h> 13 #include <linux/mpage.h> 14 #include <linux/pagemap.h> 15 #include <linux/writeback.h> 16 #include <linux/uio.h> 17 #include "nilfs.h" 18 #include "btnode.h" 19 #include "segment.h" 20 #include "page.h" 21 #include "mdt.h" 22 #include "cpfile.h" 23 #include "ifile.h" 24 25 /** 26 * struct nilfs_iget_args - arguments used during comparison between inodes 27 * @ino: inode number 28 * @cno: checkpoint number 29 * @root: pointer on NILFS root object (mounted checkpoint) 30 * @for_gc: inode for GC flag 31 */ 32 struct nilfs_iget_args { 33 u64 ino; 34 __u64 cno; 35 struct nilfs_root *root; 36 int for_gc; 37 }; 38 39 static int nilfs_iget_test(struct inode *inode, void *opaque); 40 41 void nilfs_inode_add_blocks(struct inode *inode, int n) 42 { 43 struct nilfs_root *root = NILFS_I(inode)->i_root; 44 45 inode_add_bytes(inode, i_blocksize(inode) * n); 46 if (root) 47 atomic64_add(n, &root->blocks_count); 48 } 49 50 void nilfs_inode_sub_blocks(struct inode *inode, int n) 51 { 52 struct nilfs_root *root = NILFS_I(inode)->i_root; 53 54 inode_sub_bytes(inode, i_blocksize(inode) * n); 55 if (root) 56 atomic64_sub(n, &root->blocks_count); 57 } 58 59 /** 60 * nilfs_get_block() - get a file block on the filesystem (callback function) 61 * @inode - inode struct of the target file 62 * @blkoff - file block number 63 * @bh_result - buffer head to be mapped on 64 * @create - indicate whether allocating the block or not when it has not 65 * been allocated yet. 66 * 67 * This function does not issue actual read request of the specified data 68 * block. It is done by VFS. 69 */ 70 int nilfs_get_block(struct inode *inode, sector_t blkoff, 71 struct buffer_head *bh_result, int create) 72 { 73 struct nilfs_inode_info *ii = NILFS_I(inode); 74 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 75 __u64 blknum = 0; 76 int err = 0, ret; 77 unsigned int maxblocks = bh_result->b_size >> inode->i_blkbits; 78 79 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 80 ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks); 81 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 82 if (ret >= 0) { /* found */ 83 map_bh(bh_result, inode->i_sb, blknum); 84 if (ret > 0) 85 bh_result->b_size = (ret << inode->i_blkbits); 86 goto out; 87 } 88 /* data block was not found */ 89 if (ret == -ENOENT && create) { 90 struct nilfs_transaction_info ti; 91 92 bh_result->b_blocknr = 0; 93 err = nilfs_transaction_begin(inode->i_sb, &ti, 1); 94 if (unlikely(err)) 95 goto out; 96 err = nilfs_bmap_insert(ii->i_bmap, blkoff, 97 (unsigned long)bh_result); 98 if (unlikely(err != 0)) { 99 if (err == -EEXIST) { 100 /* 101 * The get_block() function could be called 102 * from multiple callers for an inode. 103 * However, the page having this block must 104 * be locked in this case. 105 */ 106 nilfs_msg(inode->i_sb, KERN_WARNING, 107 "%s (ino=%lu): a race condition while inserting a data block at offset=%llu", 108 __func__, inode->i_ino, 109 (unsigned long long)blkoff); 110 err = 0; 111 } 112 nilfs_transaction_abort(inode->i_sb); 113 goto out; 114 } 115 nilfs_mark_inode_dirty_sync(inode); 116 nilfs_transaction_commit(inode->i_sb); /* never fails */ 117 /* Error handling should be detailed */ 118 set_buffer_new(bh_result); 119 set_buffer_delay(bh_result); 120 map_bh(bh_result, inode->i_sb, 0); 121 /* Disk block number must be changed to proper value */ 122 123 } else if (ret == -ENOENT) { 124 /* 125 * not found is not error (e.g. hole); must return without 126 * the mapped state flag. 127 */ 128 ; 129 } else { 130 err = ret; 131 } 132 133 out: 134 return err; 135 } 136 137 /** 138 * nilfs_readpage() - implement readpage() method of nilfs_aops {} 139 * address_space_operations. 140 * @file - file struct of the file to be read 141 * @page - the page to be read 142 */ 143 static int nilfs_readpage(struct file *file, struct page *page) 144 { 145 return mpage_readpage(page, nilfs_get_block); 146 } 147 148 /** 149 * nilfs_readpages() - implement readpages() method of nilfs_aops {} 150 * address_space_operations. 151 * @file - file struct of the file to be read 152 * @mapping - address_space struct used for reading multiple pages 153 * @pages - the pages to be read 154 * @nr_pages - number of pages to be read 155 */ 156 static int nilfs_readpages(struct file *file, struct address_space *mapping, 157 struct list_head *pages, unsigned int nr_pages) 158 { 159 return mpage_readpages(mapping, pages, nr_pages, nilfs_get_block); 160 } 161 162 static int nilfs_writepages(struct address_space *mapping, 163 struct writeback_control *wbc) 164 { 165 struct inode *inode = mapping->host; 166 int err = 0; 167 168 if (sb_rdonly(inode->i_sb)) { 169 nilfs_clear_dirty_pages(mapping, false); 170 return -EROFS; 171 } 172 173 if (wbc->sync_mode == WB_SYNC_ALL) 174 err = nilfs_construct_dsync_segment(inode->i_sb, inode, 175 wbc->range_start, 176 wbc->range_end); 177 return err; 178 } 179 180 static int nilfs_writepage(struct page *page, struct writeback_control *wbc) 181 { 182 struct inode *inode = page->mapping->host; 183 int err; 184 185 if (sb_rdonly(inode->i_sb)) { 186 /* 187 * It means that filesystem was remounted in read-only 188 * mode because of error or metadata corruption. But we 189 * have dirty pages that try to be flushed in background. 190 * So, here we simply discard this dirty page. 191 */ 192 nilfs_clear_dirty_page(page, false); 193 unlock_page(page); 194 return -EROFS; 195 } 196 197 redirty_page_for_writepage(wbc, page); 198 unlock_page(page); 199 200 if (wbc->sync_mode == WB_SYNC_ALL) { 201 err = nilfs_construct_segment(inode->i_sb); 202 if (unlikely(err)) 203 return err; 204 } else if (wbc->for_reclaim) 205 nilfs_flush_segment(inode->i_sb, inode->i_ino); 206 207 return 0; 208 } 209 210 static int nilfs_set_page_dirty(struct page *page) 211 { 212 struct inode *inode = page->mapping->host; 213 int ret = __set_page_dirty_nobuffers(page); 214 215 if (page_has_buffers(page)) { 216 unsigned int nr_dirty = 0; 217 struct buffer_head *bh, *head; 218 219 /* 220 * This page is locked by callers, and no other thread 221 * concurrently marks its buffers dirty since they are 222 * only dirtied through routines in fs/buffer.c in 223 * which call sites of mark_buffer_dirty are protected 224 * by page lock. 225 */ 226 bh = head = page_buffers(page); 227 do { 228 /* Do not mark hole blocks dirty */ 229 if (buffer_dirty(bh) || !buffer_mapped(bh)) 230 continue; 231 232 set_buffer_dirty(bh); 233 nr_dirty++; 234 } while (bh = bh->b_this_page, bh != head); 235 236 if (nr_dirty) 237 nilfs_set_file_dirty(inode, nr_dirty); 238 } else if (ret) { 239 unsigned int nr_dirty = 1 << (PAGE_SHIFT - inode->i_blkbits); 240 241 nilfs_set_file_dirty(inode, nr_dirty); 242 } 243 return ret; 244 } 245 246 void nilfs_write_failed(struct address_space *mapping, loff_t to) 247 { 248 struct inode *inode = mapping->host; 249 250 if (to > inode->i_size) { 251 truncate_pagecache(inode, inode->i_size); 252 nilfs_truncate(inode); 253 } 254 } 255 256 static int nilfs_write_begin(struct file *file, struct address_space *mapping, 257 loff_t pos, unsigned len, unsigned flags, 258 struct page **pagep, void **fsdata) 259 260 { 261 struct inode *inode = mapping->host; 262 int err = nilfs_transaction_begin(inode->i_sb, NULL, 1); 263 264 if (unlikely(err)) 265 return err; 266 267 err = block_write_begin(mapping, pos, len, flags, pagep, 268 nilfs_get_block); 269 if (unlikely(err)) { 270 nilfs_write_failed(mapping, pos + len); 271 nilfs_transaction_abort(inode->i_sb); 272 } 273 return err; 274 } 275 276 static int nilfs_write_end(struct file *file, struct address_space *mapping, 277 loff_t pos, unsigned len, unsigned copied, 278 struct page *page, void *fsdata) 279 { 280 struct inode *inode = mapping->host; 281 unsigned int start = pos & (PAGE_SIZE - 1); 282 unsigned int nr_dirty; 283 int err; 284 285 nr_dirty = nilfs_page_count_clean_buffers(page, start, 286 start + copied); 287 copied = generic_write_end(file, mapping, pos, len, copied, page, 288 fsdata); 289 nilfs_set_file_dirty(inode, nr_dirty); 290 err = nilfs_transaction_commit(inode->i_sb); 291 return err ? : copied; 292 } 293 294 static ssize_t 295 nilfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) 296 { 297 struct inode *inode = file_inode(iocb->ki_filp); 298 299 if (iov_iter_rw(iter) == WRITE) 300 return 0; 301 302 /* Needs synchronization with the cleaner */ 303 return blockdev_direct_IO(iocb, inode, iter, nilfs_get_block); 304 } 305 306 const struct address_space_operations nilfs_aops = { 307 .writepage = nilfs_writepage, 308 .readpage = nilfs_readpage, 309 .writepages = nilfs_writepages, 310 .set_page_dirty = nilfs_set_page_dirty, 311 .readpages = nilfs_readpages, 312 .write_begin = nilfs_write_begin, 313 .write_end = nilfs_write_end, 314 /* .releasepage = nilfs_releasepage, */ 315 .invalidatepage = block_invalidatepage, 316 .direct_IO = nilfs_direct_IO, 317 .is_partially_uptodate = block_is_partially_uptodate, 318 }; 319 320 static int nilfs_insert_inode_locked(struct inode *inode, 321 struct nilfs_root *root, 322 unsigned long ino) 323 { 324 struct nilfs_iget_args args = { 325 .ino = ino, .root = root, .cno = 0, .for_gc = 0 326 }; 327 328 return insert_inode_locked4(inode, ino, nilfs_iget_test, &args); 329 } 330 331 struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) 332 { 333 struct super_block *sb = dir->i_sb; 334 struct the_nilfs *nilfs = sb->s_fs_info; 335 struct inode *inode; 336 struct nilfs_inode_info *ii; 337 struct nilfs_root *root; 338 int err = -ENOMEM; 339 ino_t ino; 340 341 inode = new_inode(sb); 342 if (unlikely(!inode)) 343 goto failed; 344 345 mapping_set_gfp_mask(inode->i_mapping, 346 mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS)); 347 348 root = NILFS_I(dir)->i_root; 349 ii = NILFS_I(inode); 350 ii->i_state = BIT(NILFS_I_NEW); 351 ii->i_root = root; 352 353 err = nilfs_ifile_create_inode(root->ifile, &ino, &ii->i_bh); 354 if (unlikely(err)) 355 goto failed_ifile_create_inode; 356 /* reference count of i_bh inherits from nilfs_mdt_read_block() */ 357 358 atomic64_inc(&root->inodes_count); 359 inode_init_owner(inode, dir, mode); 360 inode->i_ino = ino; 361 inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); 362 363 if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { 364 err = nilfs_bmap_read(ii->i_bmap, NULL); 365 if (err < 0) 366 goto failed_after_creation; 367 368 set_bit(NILFS_I_BMAP, &ii->i_state); 369 /* No lock is needed; iget() ensures it. */ 370 } 371 372 ii->i_flags = nilfs_mask_flags( 373 mode, NILFS_I(dir)->i_flags & NILFS_FL_INHERITED); 374 375 /* ii->i_file_acl = 0; */ 376 /* ii->i_dir_acl = 0; */ 377 ii->i_dir_start_lookup = 0; 378 nilfs_set_inode_flags(inode); 379 spin_lock(&nilfs->ns_next_gen_lock); 380 inode->i_generation = nilfs->ns_next_generation++; 381 spin_unlock(&nilfs->ns_next_gen_lock); 382 if (nilfs_insert_inode_locked(inode, root, ino) < 0) { 383 err = -EIO; 384 goto failed_after_creation; 385 } 386 387 err = nilfs_init_acl(inode, dir); 388 if (unlikely(err)) 389 /* 390 * Never occur. When supporting nilfs_init_acl(), 391 * proper cancellation of above jobs should be considered. 392 */ 393 goto failed_after_creation; 394 395 return inode; 396 397 failed_after_creation: 398 clear_nlink(inode); 399 unlock_new_inode(inode); 400 iput(inode); /* 401 * raw_inode will be deleted through 402 * nilfs_evict_inode(). 403 */ 404 goto failed; 405 406 failed_ifile_create_inode: 407 make_bad_inode(inode); 408 iput(inode); 409 failed: 410 return ERR_PTR(err); 411 } 412 413 void nilfs_set_inode_flags(struct inode *inode) 414 { 415 unsigned int flags = NILFS_I(inode)->i_flags; 416 unsigned int new_fl = 0; 417 418 if (flags & FS_SYNC_FL) 419 new_fl |= S_SYNC; 420 if (flags & FS_APPEND_FL) 421 new_fl |= S_APPEND; 422 if (flags & FS_IMMUTABLE_FL) 423 new_fl |= S_IMMUTABLE; 424 if (flags & FS_NOATIME_FL) 425 new_fl |= S_NOATIME; 426 if (flags & FS_DIRSYNC_FL) 427 new_fl |= S_DIRSYNC; 428 inode_set_flags(inode, new_fl, S_SYNC | S_APPEND | S_IMMUTABLE | 429 S_NOATIME | S_DIRSYNC); 430 } 431 432 int nilfs_read_inode_common(struct inode *inode, 433 struct nilfs_inode *raw_inode) 434 { 435 struct nilfs_inode_info *ii = NILFS_I(inode); 436 int err; 437 438 inode->i_mode = le16_to_cpu(raw_inode->i_mode); 439 i_uid_write(inode, le32_to_cpu(raw_inode->i_uid)); 440 i_gid_write(inode, le32_to_cpu(raw_inode->i_gid)); 441 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); 442 inode->i_size = le64_to_cpu(raw_inode->i_size); 443 inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); 444 inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); 445 inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime); 446 inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); 447 inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); 448 inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); 449 if (inode->i_nlink == 0) 450 return -ESTALE; /* this inode is deleted */ 451 452 inode->i_blocks = le64_to_cpu(raw_inode->i_blocks); 453 ii->i_flags = le32_to_cpu(raw_inode->i_flags); 454 #if 0 455 ii->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); 456 ii->i_dir_acl = S_ISREG(inode->i_mode) ? 457 0 : le32_to_cpu(raw_inode->i_dir_acl); 458 #endif 459 ii->i_dir_start_lookup = 0; 460 inode->i_generation = le32_to_cpu(raw_inode->i_generation); 461 462 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 463 S_ISLNK(inode->i_mode)) { 464 err = nilfs_bmap_read(ii->i_bmap, raw_inode); 465 if (err < 0) 466 return err; 467 set_bit(NILFS_I_BMAP, &ii->i_state); 468 /* No lock is needed; iget() ensures it. */ 469 } 470 return 0; 471 } 472 473 static int __nilfs_read_inode(struct super_block *sb, 474 struct nilfs_root *root, unsigned long ino, 475 struct inode *inode) 476 { 477 struct the_nilfs *nilfs = sb->s_fs_info; 478 struct buffer_head *bh; 479 struct nilfs_inode *raw_inode; 480 int err; 481 482 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 483 err = nilfs_ifile_get_inode_block(root->ifile, ino, &bh); 484 if (unlikely(err)) 485 goto bad_inode; 486 487 raw_inode = nilfs_ifile_map_inode(root->ifile, ino, bh); 488 489 err = nilfs_read_inode_common(inode, raw_inode); 490 if (err) 491 goto failed_unmap; 492 493 if (S_ISREG(inode->i_mode)) { 494 inode->i_op = &nilfs_file_inode_operations; 495 inode->i_fop = &nilfs_file_operations; 496 inode->i_mapping->a_ops = &nilfs_aops; 497 } else if (S_ISDIR(inode->i_mode)) { 498 inode->i_op = &nilfs_dir_inode_operations; 499 inode->i_fop = &nilfs_dir_operations; 500 inode->i_mapping->a_ops = &nilfs_aops; 501 } else if (S_ISLNK(inode->i_mode)) { 502 inode->i_op = &nilfs_symlink_inode_operations; 503 inode_nohighmem(inode); 504 inode->i_mapping->a_ops = &nilfs_aops; 505 } else { 506 inode->i_op = &nilfs_special_inode_operations; 507 init_special_inode( 508 inode, inode->i_mode, 509 huge_decode_dev(le64_to_cpu(raw_inode->i_device_code))); 510 } 511 nilfs_ifile_unmap_inode(root->ifile, ino, bh); 512 brelse(bh); 513 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 514 nilfs_set_inode_flags(inode); 515 mapping_set_gfp_mask(inode->i_mapping, 516 mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS)); 517 return 0; 518 519 failed_unmap: 520 nilfs_ifile_unmap_inode(root->ifile, ino, bh); 521 brelse(bh); 522 523 bad_inode: 524 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 525 return err; 526 } 527 528 static int nilfs_iget_test(struct inode *inode, void *opaque) 529 { 530 struct nilfs_iget_args *args = opaque; 531 struct nilfs_inode_info *ii; 532 533 if (args->ino != inode->i_ino || args->root != NILFS_I(inode)->i_root) 534 return 0; 535 536 ii = NILFS_I(inode); 537 if (!test_bit(NILFS_I_GCINODE, &ii->i_state)) 538 return !args->for_gc; 539 540 return args->for_gc && args->cno == ii->i_cno; 541 } 542 543 static int nilfs_iget_set(struct inode *inode, void *opaque) 544 { 545 struct nilfs_iget_args *args = opaque; 546 547 inode->i_ino = args->ino; 548 if (args->for_gc) { 549 NILFS_I(inode)->i_state = BIT(NILFS_I_GCINODE); 550 NILFS_I(inode)->i_cno = args->cno; 551 NILFS_I(inode)->i_root = NULL; 552 } else { 553 if (args->root && args->ino == NILFS_ROOT_INO) 554 nilfs_get_root(args->root); 555 NILFS_I(inode)->i_root = args->root; 556 } 557 return 0; 558 } 559 560 struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, 561 unsigned long ino) 562 { 563 struct nilfs_iget_args args = { 564 .ino = ino, .root = root, .cno = 0, .for_gc = 0 565 }; 566 567 return ilookup5(sb, ino, nilfs_iget_test, &args); 568 } 569 570 struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, 571 unsigned long ino) 572 { 573 struct nilfs_iget_args args = { 574 .ino = ino, .root = root, .cno = 0, .for_gc = 0 575 }; 576 577 return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); 578 } 579 580 struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root, 581 unsigned long ino) 582 { 583 struct inode *inode; 584 int err; 585 586 inode = nilfs_iget_locked(sb, root, ino); 587 if (unlikely(!inode)) 588 return ERR_PTR(-ENOMEM); 589 if (!(inode->i_state & I_NEW)) 590 return inode; 591 592 err = __nilfs_read_inode(sb, root, ino, inode); 593 if (unlikely(err)) { 594 iget_failed(inode); 595 return ERR_PTR(err); 596 } 597 unlock_new_inode(inode); 598 return inode; 599 } 600 601 struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, 602 __u64 cno) 603 { 604 struct nilfs_iget_args args = { 605 .ino = ino, .root = NULL, .cno = cno, .for_gc = 1 606 }; 607 struct inode *inode; 608 int err; 609 610 inode = iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); 611 if (unlikely(!inode)) 612 return ERR_PTR(-ENOMEM); 613 if (!(inode->i_state & I_NEW)) 614 return inode; 615 616 err = nilfs_init_gcinode(inode); 617 if (unlikely(err)) { 618 iget_failed(inode); 619 return ERR_PTR(err); 620 } 621 unlock_new_inode(inode); 622 return inode; 623 } 624 625 void nilfs_write_inode_common(struct inode *inode, 626 struct nilfs_inode *raw_inode, int has_bmap) 627 { 628 struct nilfs_inode_info *ii = NILFS_I(inode); 629 630 raw_inode->i_mode = cpu_to_le16(inode->i_mode); 631 raw_inode->i_uid = cpu_to_le32(i_uid_read(inode)); 632 raw_inode->i_gid = cpu_to_le32(i_gid_read(inode)); 633 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); 634 raw_inode->i_size = cpu_to_le64(inode->i_size); 635 raw_inode->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); 636 raw_inode->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); 637 raw_inode->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); 638 raw_inode->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); 639 raw_inode->i_blocks = cpu_to_le64(inode->i_blocks); 640 641 raw_inode->i_flags = cpu_to_le32(ii->i_flags); 642 raw_inode->i_generation = cpu_to_le32(inode->i_generation); 643 644 if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) { 645 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 646 647 /* zero-fill unused portion in the case of super root block */ 648 raw_inode->i_xattr = 0; 649 raw_inode->i_pad = 0; 650 memset((void *)raw_inode + sizeof(*raw_inode), 0, 651 nilfs->ns_inode_size - sizeof(*raw_inode)); 652 } 653 654 if (has_bmap) 655 nilfs_bmap_write(ii->i_bmap, raw_inode); 656 else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) 657 raw_inode->i_device_code = 658 cpu_to_le64(huge_encode_dev(inode->i_rdev)); 659 /* 660 * When extending inode, nilfs->ns_inode_size should be checked 661 * for substitutions of appended fields. 662 */ 663 } 664 665 void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh, int flags) 666 { 667 ino_t ino = inode->i_ino; 668 struct nilfs_inode_info *ii = NILFS_I(inode); 669 struct inode *ifile = ii->i_root->ifile; 670 struct nilfs_inode *raw_inode; 671 672 raw_inode = nilfs_ifile_map_inode(ifile, ino, ibh); 673 674 if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state)) 675 memset(raw_inode, 0, NILFS_MDT(ifile)->mi_entry_size); 676 if (flags & I_DIRTY_DATASYNC) 677 set_bit(NILFS_I_INODE_SYNC, &ii->i_state); 678 679 nilfs_write_inode_common(inode, raw_inode, 0); 680 /* 681 * XXX: call with has_bmap = 0 is a workaround to avoid 682 * deadlock of bmap. This delays update of i_bmap to just 683 * before writing. 684 */ 685 686 nilfs_ifile_unmap_inode(ifile, ino, ibh); 687 } 688 689 #define NILFS_MAX_TRUNCATE_BLOCKS 16384 /* 64MB for 4KB block */ 690 691 static void nilfs_truncate_bmap(struct nilfs_inode_info *ii, 692 unsigned long from) 693 { 694 __u64 b; 695 int ret; 696 697 if (!test_bit(NILFS_I_BMAP, &ii->i_state)) 698 return; 699 repeat: 700 ret = nilfs_bmap_last_key(ii->i_bmap, &b); 701 if (ret == -ENOENT) 702 return; 703 else if (ret < 0) 704 goto failed; 705 706 if (b < from) 707 return; 708 709 b -= min_t(__u64, NILFS_MAX_TRUNCATE_BLOCKS, b - from); 710 ret = nilfs_bmap_truncate(ii->i_bmap, b); 711 nilfs_relax_pressure_in_lock(ii->vfs_inode.i_sb); 712 if (!ret || (ret == -ENOMEM && 713 nilfs_bmap_truncate(ii->i_bmap, b) == 0)) 714 goto repeat; 715 716 failed: 717 nilfs_msg(ii->vfs_inode.i_sb, KERN_WARNING, 718 "error %d truncating bmap (ino=%lu)", ret, 719 ii->vfs_inode.i_ino); 720 } 721 722 void nilfs_truncate(struct inode *inode) 723 { 724 unsigned long blkoff; 725 unsigned int blocksize; 726 struct nilfs_transaction_info ti; 727 struct super_block *sb = inode->i_sb; 728 struct nilfs_inode_info *ii = NILFS_I(inode); 729 730 if (!test_bit(NILFS_I_BMAP, &ii->i_state)) 731 return; 732 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 733 return; 734 735 blocksize = sb->s_blocksize; 736 blkoff = (inode->i_size + blocksize - 1) >> sb->s_blocksize_bits; 737 nilfs_transaction_begin(sb, &ti, 0); /* never fails */ 738 739 block_truncate_page(inode->i_mapping, inode->i_size, nilfs_get_block); 740 741 nilfs_truncate_bmap(ii, blkoff); 742 743 inode->i_mtime = inode->i_ctime = current_time(inode); 744 if (IS_SYNC(inode)) 745 nilfs_set_transaction_flag(NILFS_TI_SYNC); 746 747 nilfs_mark_inode_dirty(inode); 748 nilfs_set_file_dirty(inode, 0); 749 nilfs_transaction_commit(sb); 750 /* 751 * May construct a logical segment and may fail in sync mode. 752 * But truncate has no return value. 753 */ 754 } 755 756 static void nilfs_clear_inode(struct inode *inode) 757 { 758 struct nilfs_inode_info *ii = NILFS_I(inode); 759 760 /* 761 * Free resources allocated in nilfs_read_inode(), here. 762 */ 763 BUG_ON(!list_empty(&ii->i_dirty)); 764 brelse(ii->i_bh); 765 ii->i_bh = NULL; 766 767 if (nilfs_is_metadata_file_inode(inode)) 768 nilfs_mdt_clear(inode); 769 770 if (test_bit(NILFS_I_BMAP, &ii->i_state)) 771 nilfs_bmap_clear(ii->i_bmap); 772 773 nilfs_btnode_cache_clear(&ii->i_btnode_cache); 774 775 if (ii->i_root && inode->i_ino == NILFS_ROOT_INO) 776 nilfs_put_root(ii->i_root); 777 } 778 779 void nilfs_evict_inode(struct inode *inode) 780 { 781 struct nilfs_transaction_info ti; 782 struct super_block *sb = inode->i_sb; 783 struct nilfs_inode_info *ii = NILFS_I(inode); 784 int ret; 785 786 if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) { 787 truncate_inode_pages_final(&inode->i_data); 788 clear_inode(inode); 789 nilfs_clear_inode(inode); 790 return; 791 } 792 nilfs_transaction_begin(sb, &ti, 0); /* never fails */ 793 794 truncate_inode_pages_final(&inode->i_data); 795 796 /* TODO: some of the following operations may fail. */ 797 nilfs_truncate_bmap(ii, 0); 798 nilfs_mark_inode_dirty(inode); 799 clear_inode(inode); 800 801 ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino); 802 if (!ret) 803 atomic64_dec(&ii->i_root->inodes_count); 804 805 nilfs_clear_inode(inode); 806 807 if (IS_SYNC(inode)) 808 nilfs_set_transaction_flag(NILFS_TI_SYNC); 809 nilfs_transaction_commit(sb); 810 /* 811 * May construct a logical segment and may fail in sync mode. 812 * But delete_inode has no return value. 813 */ 814 } 815 816 int nilfs_setattr(struct dentry *dentry, struct iattr *iattr) 817 { 818 struct nilfs_transaction_info ti; 819 struct inode *inode = d_inode(dentry); 820 struct super_block *sb = inode->i_sb; 821 int err; 822 823 err = setattr_prepare(dentry, iattr); 824 if (err) 825 return err; 826 827 err = nilfs_transaction_begin(sb, &ti, 0); 828 if (unlikely(err)) 829 return err; 830 831 if ((iattr->ia_valid & ATTR_SIZE) && 832 iattr->ia_size != i_size_read(inode)) { 833 inode_dio_wait(inode); 834 truncate_setsize(inode, iattr->ia_size); 835 nilfs_truncate(inode); 836 } 837 838 setattr_copy(inode, iattr); 839 mark_inode_dirty(inode); 840 841 if (iattr->ia_valid & ATTR_MODE) { 842 err = nilfs_acl_chmod(inode); 843 if (unlikely(err)) 844 goto out_err; 845 } 846 847 return nilfs_transaction_commit(sb); 848 849 out_err: 850 nilfs_transaction_abort(sb); 851 return err; 852 } 853 854 int nilfs_permission(struct inode *inode, int mask) 855 { 856 struct nilfs_root *root = NILFS_I(inode)->i_root; 857 858 if ((mask & MAY_WRITE) && root && 859 root->cno != NILFS_CPTREE_CURRENT_CNO) 860 return -EROFS; /* snapshot is not writable */ 861 862 return generic_permission(inode, mask); 863 } 864 865 int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh) 866 { 867 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 868 struct nilfs_inode_info *ii = NILFS_I(inode); 869 int err; 870 871 spin_lock(&nilfs->ns_inode_lock); 872 if (ii->i_bh == NULL) { 873 spin_unlock(&nilfs->ns_inode_lock); 874 err = nilfs_ifile_get_inode_block(ii->i_root->ifile, 875 inode->i_ino, pbh); 876 if (unlikely(err)) 877 return err; 878 spin_lock(&nilfs->ns_inode_lock); 879 if (ii->i_bh == NULL) 880 ii->i_bh = *pbh; 881 else { 882 brelse(*pbh); 883 *pbh = ii->i_bh; 884 } 885 } else 886 *pbh = ii->i_bh; 887 888 get_bh(*pbh); 889 spin_unlock(&nilfs->ns_inode_lock); 890 return 0; 891 } 892 893 int nilfs_inode_dirty(struct inode *inode) 894 { 895 struct nilfs_inode_info *ii = NILFS_I(inode); 896 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 897 int ret = 0; 898 899 if (!list_empty(&ii->i_dirty)) { 900 spin_lock(&nilfs->ns_inode_lock); 901 ret = test_bit(NILFS_I_DIRTY, &ii->i_state) || 902 test_bit(NILFS_I_BUSY, &ii->i_state); 903 spin_unlock(&nilfs->ns_inode_lock); 904 } 905 return ret; 906 } 907 908 int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty) 909 { 910 struct nilfs_inode_info *ii = NILFS_I(inode); 911 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 912 913 atomic_add(nr_dirty, &nilfs->ns_ndirtyblks); 914 915 if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state)) 916 return 0; 917 918 spin_lock(&nilfs->ns_inode_lock); 919 if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && 920 !test_bit(NILFS_I_BUSY, &ii->i_state)) { 921 /* 922 * Because this routine may race with nilfs_dispose_list(), 923 * we have to check NILFS_I_QUEUED here, too. 924 */ 925 if (list_empty(&ii->i_dirty) && igrab(inode) == NULL) { 926 /* 927 * This will happen when somebody is freeing 928 * this inode. 929 */ 930 nilfs_msg(inode->i_sb, KERN_WARNING, 931 "cannot set file dirty (ino=%lu): the file is being freed", 932 inode->i_ino); 933 spin_unlock(&nilfs->ns_inode_lock); 934 return -EINVAL; /* 935 * NILFS_I_DIRTY may remain for 936 * freeing inode. 937 */ 938 } 939 list_move_tail(&ii->i_dirty, &nilfs->ns_dirty_files); 940 set_bit(NILFS_I_QUEUED, &ii->i_state); 941 } 942 spin_unlock(&nilfs->ns_inode_lock); 943 return 0; 944 } 945 946 int __nilfs_mark_inode_dirty(struct inode *inode, int flags) 947 { 948 struct buffer_head *ibh; 949 int err; 950 951 err = nilfs_load_inode_block(inode, &ibh); 952 if (unlikely(err)) { 953 nilfs_msg(inode->i_sb, KERN_WARNING, 954 "cannot mark inode dirty (ino=%lu): error %d loading inode block", 955 inode->i_ino, err); 956 return err; 957 } 958 nilfs_update_inode(inode, ibh, flags); 959 mark_buffer_dirty(ibh); 960 nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile); 961 brelse(ibh); 962 return 0; 963 } 964 965 /** 966 * nilfs_dirty_inode - reflect changes on given inode to an inode block. 967 * @inode: inode of the file to be registered. 968 * 969 * nilfs_dirty_inode() loads a inode block containing the specified 970 * @inode and copies data from a nilfs_inode to a corresponding inode 971 * entry in the inode block. This operation is excluded from the segment 972 * construction. This function can be called both as a single operation 973 * and as a part of indivisible file operations. 974 */ 975 void nilfs_dirty_inode(struct inode *inode, int flags) 976 { 977 struct nilfs_transaction_info ti; 978 struct nilfs_mdt_info *mdi = NILFS_MDT(inode); 979 980 if (is_bad_inode(inode)) { 981 nilfs_msg(inode->i_sb, KERN_WARNING, 982 "tried to mark bad_inode dirty. ignored."); 983 dump_stack(); 984 return; 985 } 986 if (mdi) { 987 nilfs_mdt_mark_dirty(inode); 988 return; 989 } 990 nilfs_transaction_begin(inode->i_sb, &ti, 0); 991 __nilfs_mark_inode_dirty(inode, flags); 992 nilfs_transaction_commit(inode->i_sb); /* never fails */ 993 } 994 995 int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 996 __u64 start, __u64 len) 997 { 998 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 999 __u64 logical = 0, phys = 0, size = 0; 1000 __u32 flags = 0; 1001 loff_t isize; 1002 sector_t blkoff, end_blkoff; 1003 sector_t delalloc_blkoff; 1004 unsigned long delalloc_blklen; 1005 unsigned int blkbits = inode->i_blkbits; 1006 int ret, n; 1007 1008 ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC); 1009 if (ret) 1010 return ret; 1011 1012 inode_lock(inode); 1013 1014 isize = i_size_read(inode); 1015 1016 blkoff = start >> blkbits; 1017 end_blkoff = (start + len - 1) >> blkbits; 1018 1019 delalloc_blklen = nilfs_find_uncommitted_extent(inode, blkoff, 1020 &delalloc_blkoff); 1021 1022 do { 1023 __u64 blkphy; 1024 unsigned int maxblocks; 1025 1026 if (delalloc_blklen && blkoff == delalloc_blkoff) { 1027 if (size) { 1028 /* End of the current extent */ 1029 ret = fiemap_fill_next_extent( 1030 fieinfo, logical, phys, size, flags); 1031 if (ret) 1032 break; 1033 } 1034 if (blkoff > end_blkoff) 1035 break; 1036 1037 flags = FIEMAP_EXTENT_MERGED | FIEMAP_EXTENT_DELALLOC; 1038 logical = blkoff << blkbits; 1039 phys = 0; 1040 size = delalloc_blklen << blkbits; 1041 1042 blkoff = delalloc_blkoff + delalloc_blklen; 1043 delalloc_blklen = nilfs_find_uncommitted_extent( 1044 inode, blkoff, &delalloc_blkoff); 1045 continue; 1046 } 1047 1048 /* 1049 * Limit the number of blocks that we look up so as 1050 * not to get into the next delayed allocation extent. 1051 */ 1052 maxblocks = INT_MAX; 1053 if (delalloc_blklen) 1054 maxblocks = min_t(sector_t, delalloc_blkoff - blkoff, 1055 maxblocks); 1056 blkphy = 0; 1057 1058 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 1059 n = nilfs_bmap_lookup_contig( 1060 NILFS_I(inode)->i_bmap, blkoff, &blkphy, maxblocks); 1061 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 1062 1063 if (n < 0) { 1064 int past_eof; 1065 1066 if (unlikely(n != -ENOENT)) 1067 break; /* error */ 1068 1069 /* HOLE */ 1070 blkoff++; 1071 past_eof = ((blkoff << blkbits) >= isize); 1072 1073 if (size) { 1074 /* End of the current extent */ 1075 1076 if (past_eof) 1077 flags |= FIEMAP_EXTENT_LAST; 1078 1079 ret = fiemap_fill_next_extent( 1080 fieinfo, logical, phys, size, flags); 1081 if (ret) 1082 break; 1083 size = 0; 1084 } 1085 if (blkoff > end_blkoff || past_eof) 1086 break; 1087 } else { 1088 if (size) { 1089 if (phys && blkphy << blkbits == phys + size) { 1090 /* The current extent goes on */ 1091 size += n << blkbits; 1092 } else { 1093 /* Terminate the current extent */ 1094 ret = fiemap_fill_next_extent( 1095 fieinfo, logical, phys, size, 1096 flags); 1097 if (ret || blkoff > end_blkoff) 1098 break; 1099 1100 /* Start another extent */ 1101 flags = FIEMAP_EXTENT_MERGED; 1102 logical = blkoff << blkbits; 1103 phys = blkphy << blkbits; 1104 size = n << blkbits; 1105 } 1106 } else { 1107 /* Start a new extent */ 1108 flags = FIEMAP_EXTENT_MERGED; 1109 logical = blkoff << blkbits; 1110 phys = blkphy << blkbits; 1111 size = n << blkbits; 1112 } 1113 blkoff += n; 1114 } 1115 cond_resched(); 1116 } while (true); 1117 1118 /* If ret is 1 then we just hit the end of the extent array */ 1119 if (ret == 1) 1120 ret = 0; 1121 1122 inode_unlock(inode); 1123 return ret; 1124 } 1125