1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * NILFS inode operations. 4 * 5 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. 6 * 7 * Written by Ryusuke Konishi. 8 * 9 */ 10 11 #include <linux/buffer_head.h> 12 #include <linux/gfp.h> 13 #include <linux/mpage.h> 14 #include <linux/pagemap.h> 15 #include <linux/writeback.h> 16 #include <linux/uio.h> 17 #include <linux/fiemap.h> 18 #include "nilfs.h" 19 #include "btnode.h" 20 #include "segment.h" 21 #include "page.h" 22 #include "mdt.h" 23 #include "cpfile.h" 24 #include "ifile.h" 25 26 /** 27 * struct nilfs_iget_args - arguments used during comparison between inodes 28 * @ino: inode number 29 * @cno: checkpoint number 30 * @root: pointer on NILFS root object (mounted checkpoint) 31 * @for_gc: inode for GC flag 32 */ 33 struct nilfs_iget_args { 34 u64 ino; 35 __u64 cno; 36 struct nilfs_root *root; 37 int for_gc; 38 }; 39 40 static int nilfs_iget_test(struct inode *inode, void *opaque); 41 42 void nilfs_inode_add_blocks(struct inode *inode, int n) 43 { 44 struct nilfs_root *root = NILFS_I(inode)->i_root; 45 46 inode_add_bytes(inode, i_blocksize(inode) * n); 47 if (root) 48 atomic64_add(n, &root->blocks_count); 49 } 50 51 void nilfs_inode_sub_blocks(struct inode *inode, int n) 52 { 53 struct nilfs_root *root = NILFS_I(inode)->i_root; 54 55 inode_sub_bytes(inode, i_blocksize(inode) * n); 56 if (root) 57 atomic64_sub(n, &root->blocks_count); 58 } 59 60 /** 61 * nilfs_get_block() - get a file block on the filesystem (callback function) 62 * @inode - inode struct of the target file 63 * @blkoff - file block number 64 * @bh_result - buffer head to be mapped on 65 * @create - indicate whether allocating the block or not when it has not 66 * been allocated yet. 67 * 68 * This function does not issue actual read request of the specified data 69 * block. It is done by VFS. 70 */ 71 int nilfs_get_block(struct inode *inode, sector_t blkoff, 72 struct buffer_head *bh_result, int create) 73 { 74 struct nilfs_inode_info *ii = NILFS_I(inode); 75 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 76 __u64 blknum = 0; 77 int err = 0, ret; 78 unsigned int maxblocks = bh_result->b_size >> inode->i_blkbits; 79 80 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 81 ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks); 82 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 83 if (ret >= 0) { /* found */ 84 map_bh(bh_result, inode->i_sb, blknum); 85 if (ret > 0) 86 bh_result->b_size = (ret << inode->i_blkbits); 87 goto out; 88 } 89 /* data block was not found */ 90 if (ret == -ENOENT && create) { 91 struct nilfs_transaction_info ti; 92 93 bh_result->b_blocknr = 0; 94 err = nilfs_transaction_begin(inode->i_sb, &ti, 1); 95 if (unlikely(err)) 96 goto out; 97 err = nilfs_bmap_insert(ii->i_bmap, blkoff, 98 (unsigned long)bh_result); 99 if (unlikely(err != 0)) { 100 if (err == -EEXIST) { 101 /* 102 * The get_block() function could be called 103 * from multiple callers for an inode. 104 * However, the page having this block must 105 * be locked in this case. 106 */ 107 nilfs_warn(inode->i_sb, 108 "%s (ino=%lu): a race condition while inserting a data block at offset=%llu", 109 __func__, inode->i_ino, 110 (unsigned long long)blkoff); 111 err = 0; 112 } 113 nilfs_transaction_abort(inode->i_sb); 114 goto out; 115 } 116 nilfs_mark_inode_dirty_sync(inode); 117 nilfs_transaction_commit(inode->i_sb); /* never fails */ 118 /* Error handling should be detailed */ 119 set_buffer_new(bh_result); 120 set_buffer_delay(bh_result); 121 map_bh(bh_result, inode->i_sb, 0); 122 /* Disk block number must be changed to proper value */ 123 124 } else if (ret == -ENOENT) { 125 /* 126 * not found is not error (e.g. hole); must return without 127 * the mapped state flag. 128 */ 129 ; 130 } else { 131 err = ret; 132 } 133 134 out: 135 return err; 136 } 137 138 /** 139 * nilfs_readpage() - implement readpage() method of nilfs_aops {} 140 * address_space_operations. 141 * @file - file struct of the file to be read 142 * @page - the page to be read 143 */ 144 static int nilfs_readpage(struct file *file, struct page *page) 145 { 146 return mpage_readpage(page, nilfs_get_block); 147 } 148 149 static void nilfs_readahead(struct readahead_control *rac) 150 { 151 mpage_readahead(rac, nilfs_get_block); 152 } 153 154 static int nilfs_writepages(struct address_space *mapping, 155 struct writeback_control *wbc) 156 { 157 struct inode *inode = mapping->host; 158 int err = 0; 159 160 if (sb_rdonly(inode->i_sb)) { 161 nilfs_clear_dirty_pages(mapping, false); 162 return -EROFS; 163 } 164 165 if (wbc->sync_mode == WB_SYNC_ALL) 166 err = nilfs_construct_dsync_segment(inode->i_sb, inode, 167 wbc->range_start, 168 wbc->range_end); 169 return err; 170 } 171 172 static int nilfs_writepage(struct page *page, struct writeback_control *wbc) 173 { 174 struct inode *inode = page->mapping->host; 175 int err; 176 177 if (sb_rdonly(inode->i_sb)) { 178 /* 179 * It means that filesystem was remounted in read-only 180 * mode because of error or metadata corruption. But we 181 * have dirty pages that try to be flushed in background. 182 * So, here we simply discard this dirty page. 183 */ 184 nilfs_clear_dirty_page(page, false); 185 unlock_page(page); 186 return -EROFS; 187 } 188 189 redirty_page_for_writepage(wbc, page); 190 unlock_page(page); 191 192 if (wbc->sync_mode == WB_SYNC_ALL) { 193 err = nilfs_construct_segment(inode->i_sb); 194 if (unlikely(err)) 195 return err; 196 } else if (wbc->for_reclaim) 197 nilfs_flush_segment(inode->i_sb, inode->i_ino); 198 199 return 0; 200 } 201 202 static bool nilfs_dirty_folio(struct address_space *mapping, 203 struct folio *folio) 204 { 205 struct inode *inode = mapping->host; 206 struct buffer_head *head; 207 unsigned int nr_dirty = 0; 208 bool ret = filemap_dirty_folio(mapping, folio); 209 210 /* 211 * The page may not be locked, eg if called from try_to_unmap_one() 212 */ 213 spin_lock(&mapping->private_lock); 214 head = folio_buffers(folio); 215 if (head) { 216 struct buffer_head *bh = head; 217 218 do { 219 /* Do not mark hole blocks dirty */ 220 if (buffer_dirty(bh) || !buffer_mapped(bh)) 221 continue; 222 223 set_buffer_dirty(bh); 224 nr_dirty++; 225 } while (bh = bh->b_this_page, bh != head); 226 } else if (ret) { 227 nr_dirty = 1 << (folio_shift(folio) - inode->i_blkbits); 228 } 229 spin_unlock(&mapping->private_lock); 230 231 if (nr_dirty) 232 nilfs_set_file_dirty(inode, nr_dirty); 233 return ret; 234 } 235 236 void nilfs_write_failed(struct address_space *mapping, loff_t to) 237 { 238 struct inode *inode = mapping->host; 239 240 if (to > inode->i_size) { 241 truncate_pagecache(inode, inode->i_size); 242 nilfs_truncate(inode); 243 } 244 } 245 246 static int nilfs_write_begin(struct file *file, struct address_space *mapping, 247 loff_t pos, unsigned len, unsigned flags, 248 struct page **pagep, void **fsdata) 249 250 { 251 struct inode *inode = mapping->host; 252 int err = nilfs_transaction_begin(inode->i_sb, NULL, 1); 253 254 if (unlikely(err)) 255 return err; 256 257 err = block_write_begin(mapping, pos, len, flags, pagep, 258 nilfs_get_block); 259 if (unlikely(err)) { 260 nilfs_write_failed(mapping, pos + len); 261 nilfs_transaction_abort(inode->i_sb); 262 } 263 return err; 264 } 265 266 static int nilfs_write_end(struct file *file, struct address_space *mapping, 267 loff_t pos, unsigned len, unsigned copied, 268 struct page *page, void *fsdata) 269 { 270 struct inode *inode = mapping->host; 271 unsigned int start = pos & (PAGE_SIZE - 1); 272 unsigned int nr_dirty; 273 int err; 274 275 nr_dirty = nilfs_page_count_clean_buffers(page, start, 276 start + copied); 277 copied = generic_write_end(file, mapping, pos, len, copied, page, 278 fsdata); 279 nilfs_set_file_dirty(inode, nr_dirty); 280 err = nilfs_transaction_commit(inode->i_sb); 281 return err ? : copied; 282 } 283 284 static ssize_t 285 nilfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) 286 { 287 struct inode *inode = file_inode(iocb->ki_filp); 288 289 if (iov_iter_rw(iter) == WRITE) 290 return 0; 291 292 /* Needs synchronization with the cleaner */ 293 return blockdev_direct_IO(iocb, inode, iter, nilfs_get_block); 294 } 295 296 const struct address_space_operations nilfs_aops = { 297 .writepage = nilfs_writepage, 298 .readpage = nilfs_readpage, 299 .writepages = nilfs_writepages, 300 .dirty_folio = nilfs_dirty_folio, 301 .readahead = nilfs_readahead, 302 .write_begin = nilfs_write_begin, 303 .write_end = nilfs_write_end, 304 /* .releasepage = nilfs_releasepage, */ 305 .invalidate_folio = block_invalidate_folio, 306 .direct_IO = nilfs_direct_IO, 307 .is_partially_uptodate = block_is_partially_uptodate, 308 }; 309 310 static int nilfs_insert_inode_locked(struct inode *inode, 311 struct nilfs_root *root, 312 unsigned long ino) 313 { 314 struct nilfs_iget_args args = { 315 .ino = ino, .root = root, .cno = 0, .for_gc = 0 316 }; 317 318 return insert_inode_locked4(inode, ino, nilfs_iget_test, &args); 319 } 320 321 struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) 322 { 323 struct super_block *sb = dir->i_sb; 324 struct the_nilfs *nilfs = sb->s_fs_info; 325 struct inode *inode; 326 struct nilfs_inode_info *ii; 327 struct nilfs_root *root; 328 int err = -ENOMEM; 329 ino_t ino; 330 331 inode = new_inode(sb); 332 if (unlikely(!inode)) 333 goto failed; 334 335 mapping_set_gfp_mask(inode->i_mapping, 336 mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS)); 337 338 root = NILFS_I(dir)->i_root; 339 ii = NILFS_I(inode); 340 ii->i_state = BIT(NILFS_I_NEW); 341 ii->i_root = root; 342 343 err = nilfs_ifile_create_inode(root->ifile, &ino, &ii->i_bh); 344 if (unlikely(err)) 345 goto failed_ifile_create_inode; 346 /* reference count of i_bh inherits from nilfs_mdt_read_block() */ 347 348 atomic64_inc(&root->inodes_count); 349 inode_init_owner(&init_user_ns, inode, dir, mode); 350 inode->i_ino = ino; 351 inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); 352 353 if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { 354 err = nilfs_bmap_read(ii->i_bmap, NULL); 355 if (err < 0) 356 goto failed_after_creation; 357 358 set_bit(NILFS_I_BMAP, &ii->i_state); 359 /* No lock is needed; iget() ensures it. */ 360 } 361 362 ii->i_flags = nilfs_mask_flags( 363 mode, NILFS_I(dir)->i_flags & NILFS_FL_INHERITED); 364 365 /* ii->i_file_acl = 0; */ 366 /* ii->i_dir_acl = 0; */ 367 ii->i_dir_start_lookup = 0; 368 nilfs_set_inode_flags(inode); 369 spin_lock(&nilfs->ns_next_gen_lock); 370 inode->i_generation = nilfs->ns_next_generation++; 371 spin_unlock(&nilfs->ns_next_gen_lock); 372 if (nilfs_insert_inode_locked(inode, root, ino) < 0) { 373 err = -EIO; 374 goto failed_after_creation; 375 } 376 377 err = nilfs_init_acl(inode, dir); 378 if (unlikely(err)) 379 /* 380 * Never occur. When supporting nilfs_init_acl(), 381 * proper cancellation of above jobs should be considered. 382 */ 383 goto failed_after_creation; 384 385 return inode; 386 387 failed_after_creation: 388 clear_nlink(inode); 389 if (inode->i_state & I_NEW) 390 unlock_new_inode(inode); 391 iput(inode); /* 392 * raw_inode will be deleted through 393 * nilfs_evict_inode(). 394 */ 395 goto failed; 396 397 failed_ifile_create_inode: 398 make_bad_inode(inode); 399 iput(inode); 400 failed: 401 return ERR_PTR(err); 402 } 403 404 void nilfs_set_inode_flags(struct inode *inode) 405 { 406 unsigned int flags = NILFS_I(inode)->i_flags; 407 unsigned int new_fl = 0; 408 409 if (flags & FS_SYNC_FL) 410 new_fl |= S_SYNC; 411 if (flags & FS_APPEND_FL) 412 new_fl |= S_APPEND; 413 if (flags & FS_IMMUTABLE_FL) 414 new_fl |= S_IMMUTABLE; 415 if (flags & FS_NOATIME_FL) 416 new_fl |= S_NOATIME; 417 if (flags & FS_DIRSYNC_FL) 418 new_fl |= S_DIRSYNC; 419 inode_set_flags(inode, new_fl, S_SYNC | S_APPEND | S_IMMUTABLE | 420 S_NOATIME | S_DIRSYNC); 421 } 422 423 int nilfs_read_inode_common(struct inode *inode, 424 struct nilfs_inode *raw_inode) 425 { 426 struct nilfs_inode_info *ii = NILFS_I(inode); 427 int err; 428 429 inode->i_mode = le16_to_cpu(raw_inode->i_mode); 430 i_uid_write(inode, le32_to_cpu(raw_inode->i_uid)); 431 i_gid_write(inode, le32_to_cpu(raw_inode->i_gid)); 432 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); 433 inode->i_size = le64_to_cpu(raw_inode->i_size); 434 inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); 435 inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); 436 inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime); 437 inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); 438 inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); 439 inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); 440 if (inode->i_nlink == 0) 441 return -ESTALE; /* this inode is deleted */ 442 443 inode->i_blocks = le64_to_cpu(raw_inode->i_blocks); 444 ii->i_flags = le32_to_cpu(raw_inode->i_flags); 445 #if 0 446 ii->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); 447 ii->i_dir_acl = S_ISREG(inode->i_mode) ? 448 0 : le32_to_cpu(raw_inode->i_dir_acl); 449 #endif 450 ii->i_dir_start_lookup = 0; 451 inode->i_generation = le32_to_cpu(raw_inode->i_generation); 452 453 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 454 S_ISLNK(inode->i_mode)) { 455 err = nilfs_bmap_read(ii->i_bmap, raw_inode); 456 if (err < 0) 457 return err; 458 set_bit(NILFS_I_BMAP, &ii->i_state); 459 /* No lock is needed; iget() ensures it. */ 460 } 461 return 0; 462 } 463 464 static int __nilfs_read_inode(struct super_block *sb, 465 struct nilfs_root *root, unsigned long ino, 466 struct inode *inode) 467 { 468 struct the_nilfs *nilfs = sb->s_fs_info; 469 struct buffer_head *bh; 470 struct nilfs_inode *raw_inode; 471 int err; 472 473 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 474 err = nilfs_ifile_get_inode_block(root->ifile, ino, &bh); 475 if (unlikely(err)) 476 goto bad_inode; 477 478 raw_inode = nilfs_ifile_map_inode(root->ifile, ino, bh); 479 480 err = nilfs_read_inode_common(inode, raw_inode); 481 if (err) 482 goto failed_unmap; 483 484 if (S_ISREG(inode->i_mode)) { 485 inode->i_op = &nilfs_file_inode_operations; 486 inode->i_fop = &nilfs_file_operations; 487 inode->i_mapping->a_ops = &nilfs_aops; 488 } else if (S_ISDIR(inode->i_mode)) { 489 inode->i_op = &nilfs_dir_inode_operations; 490 inode->i_fop = &nilfs_dir_operations; 491 inode->i_mapping->a_ops = &nilfs_aops; 492 } else if (S_ISLNK(inode->i_mode)) { 493 inode->i_op = &nilfs_symlink_inode_operations; 494 inode_nohighmem(inode); 495 inode->i_mapping->a_ops = &nilfs_aops; 496 } else { 497 inode->i_op = &nilfs_special_inode_operations; 498 init_special_inode( 499 inode, inode->i_mode, 500 huge_decode_dev(le64_to_cpu(raw_inode->i_device_code))); 501 } 502 nilfs_ifile_unmap_inode(root->ifile, ino, bh); 503 brelse(bh); 504 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 505 nilfs_set_inode_flags(inode); 506 mapping_set_gfp_mask(inode->i_mapping, 507 mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS)); 508 return 0; 509 510 failed_unmap: 511 nilfs_ifile_unmap_inode(root->ifile, ino, bh); 512 brelse(bh); 513 514 bad_inode: 515 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 516 return err; 517 } 518 519 static int nilfs_iget_test(struct inode *inode, void *opaque) 520 { 521 struct nilfs_iget_args *args = opaque; 522 struct nilfs_inode_info *ii; 523 524 if (args->ino != inode->i_ino || args->root != NILFS_I(inode)->i_root) 525 return 0; 526 527 ii = NILFS_I(inode); 528 if (!test_bit(NILFS_I_GCINODE, &ii->i_state)) 529 return !args->for_gc; 530 531 return args->for_gc && args->cno == ii->i_cno; 532 } 533 534 static int nilfs_iget_set(struct inode *inode, void *opaque) 535 { 536 struct nilfs_iget_args *args = opaque; 537 538 inode->i_ino = args->ino; 539 if (args->for_gc) { 540 NILFS_I(inode)->i_state = BIT(NILFS_I_GCINODE); 541 NILFS_I(inode)->i_cno = args->cno; 542 NILFS_I(inode)->i_root = NULL; 543 } else { 544 if (args->root && args->ino == NILFS_ROOT_INO) 545 nilfs_get_root(args->root); 546 NILFS_I(inode)->i_root = args->root; 547 } 548 return 0; 549 } 550 551 struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, 552 unsigned long ino) 553 { 554 struct nilfs_iget_args args = { 555 .ino = ino, .root = root, .cno = 0, .for_gc = 0 556 }; 557 558 return ilookup5(sb, ino, nilfs_iget_test, &args); 559 } 560 561 struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, 562 unsigned long ino) 563 { 564 struct nilfs_iget_args args = { 565 .ino = ino, .root = root, .cno = 0, .for_gc = 0 566 }; 567 568 return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); 569 } 570 571 struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root, 572 unsigned long ino) 573 { 574 struct inode *inode; 575 int err; 576 577 inode = nilfs_iget_locked(sb, root, ino); 578 if (unlikely(!inode)) 579 return ERR_PTR(-ENOMEM); 580 if (!(inode->i_state & I_NEW)) 581 return inode; 582 583 err = __nilfs_read_inode(sb, root, ino, inode); 584 if (unlikely(err)) { 585 iget_failed(inode); 586 return ERR_PTR(err); 587 } 588 unlock_new_inode(inode); 589 return inode; 590 } 591 592 struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, 593 __u64 cno) 594 { 595 struct nilfs_iget_args args = { 596 .ino = ino, .root = NULL, .cno = cno, .for_gc = 1 597 }; 598 struct inode *inode; 599 int err; 600 601 inode = iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); 602 if (unlikely(!inode)) 603 return ERR_PTR(-ENOMEM); 604 if (!(inode->i_state & I_NEW)) 605 return inode; 606 607 err = nilfs_init_gcinode(inode); 608 if (unlikely(err)) { 609 iget_failed(inode); 610 return ERR_PTR(err); 611 } 612 unlock_new_inode(inode); 613 return inode; 614 } 615 616 void nilfs_write_inode_common(struct inode *inode, 617 struct nilfs_inode *raw_inode, int has_bmap) 618 { 619 struct nilfs_inode_info *ii = NILFS_I(inode); 620 621 raw_inode->i_mode = cpu_to_le16(inode->i_mode); 622 raw_inode->i_uid = cpu_to_le32(i_uid_read(inode)); 623 raw_inode->i_gid = cpu_to_le32(i_gid_read(inode)); 624 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); 625 raw_inode->i_size = cpu_to_le64(inode->i_size); 626 raw_inode->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); 627 raw_inode->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); 628 raw_inode->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); 629 raw_inode->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); 630 raw_inode->i_blocks = cpu_to_le64(inode->i_blocks); 631 632 raw_inode->i_flags = cpu_to_le32(ii->i_flags); 633 raw_inode->i_generation = cpu_to_le32(inode->i_generation); 634 635 if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) { 636 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 637 638 /* zero-fill unused portion in the case of super root block */ 639 raw_inode->i_xattr = 0; 640 raw_inode->i_pad = 0; 641 memset((void *)raw_inode + sizeof(*raw_inode), 0, 642 nilfs->ns_inode_size - sizeof(*raw_inode)); 643 } 644 645 if (has_bmap) 646 nilfs_bmap_write(ii->i_bmap, raw_inode); 647 else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) 648 raw_inode->i_device_code = 649 cpu_to_le64(huge_encode_dev(inode->i_rdev)); 650 /* 651 * When extending inode, nilfs->ns_inode_size should be checked 652 * for substitutions of appended fields. 653 */ 654 } 655 656 void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh, int flags) 657 { 658 ino_t ino = inode->i_ino; 659 struct nilfs_inode_info *ii = NILFS_I(inode); 660 struct inode *ifile = ii->i_root->ifile; 661 struct nilfs_inode *raw_inode; 662 663 raw_inode = nilfs_ifile_map_inode(ifile, ino, ibh); 664 665 if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state)) 666 memset(raw_inode, 0, NILFS_MDT(ifile)->mi_entry_size); 667 if (flags & I_DIRTY_DATASYNC) 668 set_bit(NILFS_I_INODE_SYNC, &ii->i_state); 669 670 nilfs_write_inode_common(inode, raw_inode, 0); 671 /* 672 * XXX: call with has_bmap = 0 is a workaround to avoid 673 * deadlock of bmap. This delays update of i_bmap to just 674 * before writing. 675 */ 676 677 nilfs_ifile_unmap_inode(ifile, ino, ibh); 678 } 679 680 #define NILFS_MAX_TRUNCATE_BLOCKS 16384 /* 64MB for 4KB block */ 681 682 static void nilfs_truncate_bmap(struct nilfs_inode_info *ii, 683 unsigned long from) 684 { 685 __u64 b; 686 int ret; 687 688 if (!test_bit(NILFS_I_BMAP, &ii->i_state)) 689 return; 690 repeat: 691 ret = nilfs_bmap_last_key(ii->i_bmap, &b); 692 if (ret == -ENOENT) 693 return; 694 else if (ret < 0) 695 goto failed; 696 697 if (b < from) 698 return; 699 700 b -= min_t(__u64, NILFS_MAX_TRUNCATE_BLOCKS, b - from); 701 ret = nilfs_bmap_truncate(ii->i_bmap, b); 702 nilfs_relax_pressure_in_lock(ii->vfs_inode.i_sb); 703 if (!ret || (ret == -ENOMEM && 704 nilfs_bmap_truncate(ii->i_bmap, b) == 0)) 705 goto repeat; 706 707 failed: 708 nilfs_warn(ii->vfs_inode.i_sb, "error %d truncating bmap (ino=%lu)", 709 ret, ii->vfs_inode.i_ino); 710 } 711 712 void nilfs_truncate(struct inode *inode) 713 { 714 unsigned long blkoff; 715 unsigned int blocksize; 716 struct nilfs_transaction_info ti; 717 struct super_block *sb = inode->i_sb; 718 struct nilfs_inode_info *ii = NILFS_I(inode); 719 720 if (!test_bit(NILFS_I_BMAP, &ii->i_state)) 721 return; 722 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 723 return; 724 725 blocksize = sb->s_blocksize; 726 blkoff = (inode->i_size + blocksize - 1) >> sb->s_blocksize_bits; 727 nilfs_transaction_begin(sb, &ti, 0); /* never fails */ 728 729 block_truncate_page(inode->i_mapping, inode->i_size, nilfs_get_block); 730 731 nilfs_truncate_bmap(ii, blkoff); 732 733 inode->i_mtime = inode->i_ctime = current_time(inode); 734 if (IS_SYNC(inode)) 735 nilfs_set_transaction_flag(NILFS_TI_SYNC); 736 737 nilfs_mark_inode_dirty(inode); 738 nilfs_set_file_dirty(inode, 0); 739 nilfs_transaction_commit(sb); 740 /* 741 * May construct a logical segment and may fail in sync mode. 742 * But truncate has no return value. 743 */ 744 } 745 746 static void nilfs_clear_inode(struct inode *inode) 747 { 748 struct nilfs_inode_info *ii = NILFS_I(inode); 749 750 /* 751 * Free resources allocated in nilfs_read_inode(), here. 752 */ 753 BUG_ON(!list_empty(&ii->i_dirty)); 754 brelse(ii->i_bh); 755 ii->i_bh = NULL; 756 757 if (nilfs_is_metadata_file_inode(inode)) 758 nilfs_mdt_clear(inode); 759 760 if (test_bit(NILFS_I_BMAP, &ii->i_state)) 761 nilfs_bmap_clear(ii->i_bmap); 762 763 nilfs_btnode_cache_clear(&ii->i_btnode_cache); 764 765 if (ii->i_root && inode->i_ino == NILFS_ROOT_INO) 766 nilfs_put_root(ii->i_root); 767 } 768 769 void nilfs_evict_inode(struct inode *inode) 770 { 771 struct nilfs_transaction_info ti; 772 struct super_block *sb = inode->i_sb; 773 struct nilfs_inode_info *ii = NILFS_I(inode); 774 int ret; 775 776 if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) { 777 truncate_inode_pages_final(&inode->i_data); 778 clear_inode(inode); 779 nilfs_clear_inode(inode); 780 return; 781 } 782 nilfs_transaction_begin(sb, &ti, 0); /* never fails */ 783 784 truncate_inode_pages_final(&inode->i_data); 785 786 /* TODO: some of the following operations may fail. */ 787 nilfs_truncate_bmap(ii, 0); 788 nilfs_mark_inode_dirty(inode); 789 clear_inode(inode); 790 791 ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino); 792 if (!ret) 793 atomic64_dec(&ii->i_root->inodes_count); 794 795 nilfs_clear_inode(inode); 796 797 if (IS_SYNC(inode)) 798 nilfs_set_transaction_flag(NILFS_TI_SYNC); 799 nilfs_transaction_commit(sb); 800 /* 801 * May construct a logical segment and may fail in sync mode. 802 * But delete_inode has no return value. 803 */ 804 } 805 806 int nilfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, 807 struct iattr *iattr) 808 { 809 struct nilfs_transaction_info ti; 810 struct inode *inode = d_inode(dentry); 811 struct super_block *sb = inode->i_sb; 812 int err; 813 814 err = setattr_prepare(&init_user_ns, dentry, iattr); 815 if (err) 816 return err; 817 818 err = nilfs_transaction_begin(sb, &ti, 0); 819 if (unlikely(err)) 820 return err; 821 822 if ((iattr->ia_valid & ATTR_SIZE) && 823 iattr->ia_size != i_size_read(inode)) { 824 inode_dio_wait(inode); 825 truncate_setsize(inode, iattr->ia_size); 826 nilfs_truncate(inode); 827 } 828 829 setattr_copy(&init_user_ns, inode, iattr); 830 mark_inode_dirty(inode); 831 832 if (iattr->ia_valid & ATTR_MODE) { 833 err = nilfs_acl_chmod(inode); 834 if (unlikely(err)) 835 goto out_err; 836 } 837 838 return nilfs_transaction_commit(sb); 839 840 out_err: 841 nilfs_transaction_abort(sb); 842 return err; 843 } 844 845 int nilfs_permission(struct user_namespace *mnt_userns, struct inode *inode, 846 int mask) 847 { 848 struct nilfs_root *root = NILFS_I(inode)->i_root; 849 850 if ((mask & MAY_WRITE) && root && 851 root->cno != NILFS_CPTREE_CURRENT_CNO) 852 return -EROFS; /* snapshot is not writable */ 853 854 return generic_permission(&init_user_ns, inode, mask); 855 } 856 857 int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh) 858 { 859 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 860 struct nilfs_inode_info *ii = NILFS_I(inode); 861 int err; 862 863 spin_lock(&nilfs->ns_inode_lock); 864 if (ii->i_bh == NULL) { 865 spin_unlock(&nilfs->ns_inode_lock); 866 err = nilfs_ifile_get_inode_block(ii->i_root->ifile, 867 inode->i_ino, pbh); 868 if (unlikely(err)) 869 return err; 870 spin_lock(&nilfs->ns_inode_lock); 871 if (ii->i_bh == NULL) 872 ii->i_bh = *pbh; 873 else { 874 brelse(*pbh); 875 *pbh = ii->i_bh; 876 } 877 } else 878 *pbh = ii->i_bh; 879 880 get_bh(*pbh); 881 spin_unlock(&nilfs->ns_inode_lock); 882 return 0; 883 } 884 885 int nilfs_inode_dirty(struct inode *inode) 886 { 887 struct nilfs_inode_info *ii = NILFS_I(inode); 888 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 889 int ret = 0; 890 891 if (!list_empty(&ii->i_dirty)) { 892 spin_lock(&nilfs->ns_inode_lock); 893 ret = test_bit(NILFS_I_DIRTY, &ii->i_state) || 894 test_bit(NILFS_I_BUSY, &ii->i_state); 895 spin_unlock(&nilfs->ns_inode_lock); 896 } 897 return ret; 898 } 899 900 int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty) 901 { 902 struct nilfs_inode_info *ii = NILFS_I(inode); 903 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 904 905 atomic_add(nr_dirty, &nilfs->ns_ndirtyblks); 906 907 if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state)) 908 return 0; 909 910 spin_lock(&nilfs->ns_inode_lock); 911 if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && 912 !test_bit(NILFS_I_BUSY, &ii->i_state)) { 913 /* 914 * Because this routine may race with nilfs_dispose_list(), 915 * we have to check NILFS_I_QUEUED here, too. 916 */ 917 if (list_empty(&ii->i_dirty) && igrab(inode) == NULL) { 918 /* 919 * This will happen when somebody is freeing 920 * this inode. 921 */ 922 nilfs_warn(inode->i_sb, 923 "cannot set file dirty (ino=%lu): the file is being freed", 924 inode->i_ino); 925 spin_unlock(&nilfs->ns_inode_lock); 926 return -EINVAL; /* 927 * NILFS_I_DIRTY may remain for 928 * freeing inode. 929 */ 930 } 931 list_move_tail(&ii->i_dirty, &nilfs->ns_dirty_files); 932 set_bit(NILFS_I_QUEUED, &ii->i_state); 933 } 934 spin_unlock(&nilfs->ns_inode_lock); 935 return 0; 936 } 937 938 int __nilfs_mark_inode_dirty(struct inode *inode, int flags) 939 { 940 struct buffer_head *ibh; 941 int err; 942 943 err = nilfs_load_inode_block(inode, &ibh); 944 if (unlikely(err)) { 945 nilfs_warn(inode->i_sb, 946 "cannot mark inode dirty (ino=%lu): error %d loading inode block", 947 inode->i_ino, err); 948 return err; 949 } 950 nilfs_update_inode(inode, ibh, flags); 951 mark_buffer_dirty(ibh); 952 nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile); 953 brelse(ibh); 954 return 0; 955 } 956 957 /** 958 * nilfs_dirty_inode - reflect changes on given inode to an inode block. 959 * @inode: inode of the file to be registered. 960 * 961 * nilfs_dirty_inode() loads a inode block containing the specified 962 * @inode and copies data from a nilfs_inode to a corresponding inode 963 * entry in the inode block. This operation is excluded from the segment 964 * construction. This function can be called both as a single operation 965 * and as a part of indivisible file operations. 966 */ 967 void nilfs_dirty_inode(struct inode *inode, int flags) 968 { 969 struct nilfs_transaction_info ti; 970 struct nilfs_mdt_info *mdi = NILFS_MDT(inode); 971 972 if (is_bad_inode(inode)) { 973 nilfs_warn(inode->i_sb, 974 "tried to mark bad_inode dirty. ignored."); 975 dump_stack(); 976 return; 977 } 978 if (mdi) { 979 nilfs_mdt_mark_dirty(inode); 980 return; 981 } 982 nilfs_transaction_begin(inode->i_sb, &ti, 0); 983 __nilfs_mark_inode_dirty(inode, flags); 984 nilfs_transaction_commit(inode->i_sb); /* never fails */ 985 } 986 987 int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 988 __u64 start, __u64 len) 989 { 990 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 991 __u64 logical = 0, phys = 0, size = 0; 992 __u32 flags = 0; 993 loff_t isize; 994 sector_t blkoff, end_blkoff; 995 sector_t delalloc_blkoff; 996 unsigned long delalloc_blklen; 997 unsigned int blkbits = inode->i_blkbits; 998 int ret, n; 999 1000 ret = fiemap_prep(inode, fieinfo, start, &len, 0); 1001 if (ret) 1002 return ret; 1003 1004 inode_lock(inode); 1005 1006 isize = i_size_read(inode); 1007 1008 blkoff = start >> blkbits; 1009 end_blkoff = (start + len - 1) >> blkbits; 1010 1011 delalloc_blklen = nilfs_find_uncommitted_extent(inode, blkoff, 1012 &delalloc_blkoff); 1013 1014 do { 1015 __u64 blkphy; 1016 unsigned int maxblocks; 1017 1018 if (delalloc_blklen && blkoff == delalloc_blkoff) { 1019 if (size) { 1020 /* End of the current extent */ 1021 ret = fiemap_fill_next_extent( 1022 fieinfo, logical, phys, size, flags); 1023 if (ret) 1024 break; 1025 } 1026 if (blkoff > end_blkoff) 1027 break; 1028 1029 flags = FIEMAP_EXTENT_MERGED | FIEMAP_EXTENT_DELALLOC; 1030 logical = blkoff << blkbits; 1031 phys = 0; 1032 size = delalloc_blklen << blkbits; 1033 1034 blkoff = delalloc_blkoff + delalloc_blklen; 1035 delalloc_blklen = nilfs_find_uncommitted_extent( 1036 inode, blkoff, &delalloc_blkoff); 1037 continue; 1038 } 1039 1040 /* 1041 * Limit the number of blocks that we look up so as 1042 * not to get into the next delayed allocation extent. 1043 */ 1044 maxblocks = INT_MAX; 1045 if (delalloc_blklen) 1046 maxblocks = min_t(sector_t, delalloc_blkoff - blkoff, 1047 maxblocks); 1048 blkphy = 0; 1049 1050 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 1051 n = nilfs_bmap_lookup_contig( 1052 NILFS_I(inode)->i_bmap, blkoff, &blkphy, maxblocks); 1053 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 1054 1055 if (n < 0) { 1056 int past_eof; 1057 1058 if (unlikely(n != -ENOENT)) 1059 break; /* error */ 1060 1061 /* HOLE */ 1062 blkoff++; 1063 past_eof = ((blkoff << blkbits) >= isize); 1064 1065 if (size) { 1066 /* End of the current extent */ 1067 1068 if (past_eof) 1069 flags |= FIEMAP_EXTENT_LAST; 1070 1071 ret = fiemap_fill_next_extent( 1072 fieinfo, logical, phys, size, flags); 1073 if (ret) 1074 break; 1075 size = 0; 1076 } 1077 if (blkoff > end_blkoff || past_eof) 1078 break; 1079 } else { 1080 if (size) { 1081 if (phys && blkphy << blkbits == phys + size) { 1082 /* The current extent goes on */ 1083 size += n << blkbits; 1084 } else { 1085 /* Terminate the current extent */ 1086 ret = fiemap_fill_next_extent( 1087 fieinfo, logical, phys, size, 1088 flags); 1089 if (ret || blkoff > end_blkoff) 1090 break; 1091 1092 /* Start another extent */ 1093 flags = FIEMAP_EXTENT_MERGED; 1094 logical = blkoff << blkbits; 1095 phys = blkphy << blkbits; 1096 size = n << blkbits; 1097 } 1098 } else { 1099 /* Start a new extent */ 1100 flags = FIEMAP_EXTENT_MERGED; 1101 logical = blkoff << blkbits; 1102 phys = blkphy << blkbits; 1103 size = n << blkbits; 1104 } 1105 blkoff += n; 1106 } 1107 cond_resched(); 1108 } while (true); 1109 1110 /* If ret is 1 then we just hit the end of the extent array */ 1111 if (ret == 1) 1112 ret = 0; 1113 1114 inode_unlock(inode); 1115 return ret; 1116 } 1117