1 /* 2 * inode.c - NILFS inode operations. 3 * 4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * Written by Ryusuke Konishi. 17 * 18 */ 19 20 #include <linux/buffer_head.h> 21 #include <linux/gfp.h> 22 #include <linux/mpage.h> 23 #include <linux/pagemap.h> 24 #include <linux/writeback.h> 25 #include <linux/uio.h> 26 #include "nilfs.h" 27 #include "btnode.h" 28 #include "segment.h" 29 #include "page.h" 30 #include "mdt.h" 31 #include "cpfile.h" 32 #include "ifile.h" 33 34 /** 35 * struct nilfs_iget_args - arguments used during comparison between inodes 36 * @ino: inode number 37 * @cno: checkpoint number 38 * @root: pointer on NILFS root object (mounted checkpoint) 39 * @for_gc: inode for GC flag 40 */ 41 struct nilfs_iget_args { 42 u64 ino; 43 __u64 cno; 44 struct nilfs_root *root; 45 int for_gc; 46 }; 47 48 static int nilfs_iget_test(struct inode *inode, void *opaque); 49 50 void nilfs_inode_add_blocks(struct inode *inode, int n) 51 { 52 struct nilfs_root *root = NILFS_I(inode)->i_root; 53 54 inode_add_bytes(inode, (1 << inode->i_blkbits) * n); 55 if (root) 56 atomic64_add(n, &root->blocks_count); 57 } 58 59 void nilfs_inode_sub_blocks(struct inode *inode, int n) 60 { 61 struct nilfs_root *root = NILFS_I(inode)->i_root; 62 63 inode_sub_bytes(inode, (1 << inode->i_blkbits) * n); 64 if (root) 65 atomic64_sub(n, &root->blocks_count); 66 } 67 68 /** 69 * nilfs_get_block() - get a file block on the filesystem (callback function) 70 * @inode - inode struct of the target file 71 * @blkoff - file block number 72 * @bh_result - buffer head to be mapped on 73 * @create - indicate whether allocating the block or not when it has not 74 * been allocated yet. 75 * 76 * This function does not issue actual read request of the specified data 77 * block. It is done by VFS. 78 */ 79 int nilfs_get_block(struct inode *inode, sector_t blkoff, 80 struct buffer_head *bh_result, int create) 81 { 82 struct nilfs_inode_info *ii = NILFS_I(inode); 83 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 84 __u64 blknum = 0; 85 int err = 0, ret; 86 unsigned int maxblocks = bh_result->b_size >> inode->i_blkbits; 87 88 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 89 ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks); 90 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 91 if (ret >= 0) { /* found */ 92 map_bh(bh_result, inode->i_sb, blknum); 93 if (ret > 0) 94 bh_result->b_size = (ret << inode->i_blkbits); 95 goto out; 96 } 97 /* data block was not found */ 98 if (ret == -ENOENT && create) { 99 struct nilfs_transaction_info ti; 100 101 bh_result->b_blocknr = 0; 102 err = nilfs_transaction_begin(inode->i_sb, &ti, 1); 103 if (unlikely(err)) 104 goto out; 105 err = nilfs_bmap_insert(ii->i_bmap, blkoff, 106 (unsigned long)bh_result); 107 if (unlikely(err != 0)) { 108 if (err == -EEXIST) { 109 /* 110 * The get_block() function could be called 111 * from multiple callers for an inode. 112 * However, the page having this block must 113 * be locked in this case. 114 */ 115 printk(KERN_WARNING 116 "nilfs_get_block: a race condition " 117 "while inserting a data block. " 118 "(inode number=%lu, file block " 119 "offset=%llu)\n", 120 inode->i_ino, 121 (unsigned long long)blkoff); 122 err = 0; 123 } 124 nilfs_transaction_abort(inode->i_sb); 125 goto out; 126 } 127 nilfs_mark_inode_dirty_sync(inode); 128 nilfs_transaction_commit(inode->i_sb); /* never fails */ 129 /* Error handling should be detailed */ 130 set_buffer_new(bh_result); 131 set_buffer_delay(bh_result); 132 map_bh(bh_result, inode->i_sb, 0); 133 /* Disk block number must be changed to proper value */ 134 135 } else if (ret == -ENOENT) { 136 /* 137 * not found is not error (e.g. hole); must return without 138 * the mapped state flag. 139 */ 140 ; 141 } else { 142 err = ret; 143 } 144 145 out: 146 return err; 147 } 148 149 /** 150 * nilfs_readpage() - implement readpage() method of nilfs_aops {} 151 * address_space_operations. 152 * @file - file struct of the file to be read 153 * @page - the page to be read 154 */ 155 static int nilfs_readpage(struct file *file, struct page *page) 156 { 157 return mpage_readpage(page, nilfs_get_block); 158 } 159 160 /** 161 * nilfs_readpages() - implement readpages() method of nilfs_aops {} 162 * address_space_operations. 163 * @file - file struct of the file to be read 164 * @mapping - address_space struct used for reading multiple pages 165 * @pages - the pages to be read 166 * @nr_pages - number of pages to be read 167 */ 168 static int nilfs_readpages(struct file *file, struct address_space *mapping, 169 struct list_head *pages, unsigned int nr_pages) 170 { 171 return mpage_readpages(mapping, pages, nr_pages, nilfs_get_block); 172 } 173 174 static int nilfs_writepages(struct address_space *mapping, 175 struct writeback_control *wbc) 176 { 177 struct inode *inode = mapping->host; 178 int err = 0; 179 180 if (inode->i_sb->s_flags & MS_RDONLY) { 181 nilfs_clear_dirty_pages(mapping, false); 182 return -EROFS; 183 } 184 185 if (wbc->sync_mode == WB_SYNC_ALL) 186 err = nilfs_construct_dsync_segment(inode->i_sb, inode, 187 wbc->range_start, 188 wbc->range_end); 189 return err; 190 } 191 192 static int nilfs_writepage(struct page *page, struct writeback_control *wbc) 193 { 194 struct inode *inode = page->mapping->host; 195 int err; 196 197 if (inode->i_sb->s_flags & MS_RDONLY) { 198 /* 199 * It means that filesystem was remounted in read-only 200 * mode because of error or metadata corruption. But we 201 * have dirty pages that try to be flushed in background. 202 * So, here we simply discard this dirty page. 203 */ 204 nilfs_clear_dirty_page(page, false); 205 unlock_page(page); 206 return -EROFS; 207 } 208 209 redirty_page_for_writepage(wbc, page); 210 unlock_page(page); 211 212 if (wbc->sync_mode == WB_SYNC_ALL) { 213 err = nilfs_construct_segment(inode->i_sb); 214 if (unlikely(err)) 215 return err; 216 } else if (wbc->for_reclaim) 217 nilfs_flush_segment(inode->i_sb, inode->i_ino); 218 219 return 0; 220 } 221 222 static int nilfs_set_page_dirty(struct page *page) 223 { 224 struct inode *inode = page->mapping->host; 225 int ret = __set_page_dirty_nobuffers(page); 226 227 if (page_has_buffers(page)) { 228 unsigned int nr_dirty = 0; 229 struct buffer_head *bh, *head; 230 231 /* 232 * This page is locked by callers, and no other thread 233 * concurrently marks its buffers dirty since they are 234 * only dirtied through routines in fs/buffer.c in 235 * which call sites of mark_buffer_dirty are protected 236 * by page lock. 237 */ 238 bh = head = page_buffers(page); 239 do { 240 /* Do not mark hole blocks dirty */ 241 if (buffer_dirty(bh) || !buffer_mapped(bh)) 242 continue; 243 244 set_buffer_dirty(bh); 245 nr_dirty++; 246 } while (bh = bh->b_this_page, bh != head); 247 248 if (nr_dirty) 249 nilfs_set_file_dirty(inode, nr_dirty); 250 } else if (ret) { 251 unsigned int nr_dirty = 1 << (PAGE_SHIFT - inode->i_blkbits); 252 253 nilfs_set_file_dirty(inode, nr_dirty); 254 } 255 return ret; 256 } 257 258 void nilfs_write_failed(struct address_space *mapping, loff_t to) 259 { 260 struct inode *inode = mapping->host; 261 262 if (to > inode->i_size) { 263 truncate_pagecache(inode, inode->i_size); 264 nilfs_truncate(inode); 265 } 266 } 267 268 static int nilfs_write_begin(struct file *file, struct address_space *mapping, 269 loff_t pos, unsigned len, unsigned flags, 270 struct page **pagep, void **fsdata) 271 272 { 273 struct inode *inode = mapping->host; 274 int err = nilfs_transaction_begin(inode->i_sb, NULL, 1); 275 276 if (unlikely(err)) 277 return err; 278 279 err = block_write_begin(mapping, pos, len, flags, pagep, 280 nilfs_get_block); 281 if (unlikely(err)) { 282 nilfs_write_failed(mapping, pos + len); 283 nilfs_transaction_abort(inode->i_sb); 284 } 285 return err; 286 } 287 288 static int nilfs_write_end(struct file *file, struct address_space *mapping, 289 loff_t pos, unsigned len, unsigned copied, 290 struct page *page, void *fsdata) 291 { 292 struct inode *inode = mapping->host; 293 unsigned int start = pos & (PAGE_SIZE - 1); 294 unsigned int nr_dirty; 295 int err; 296 297 nr_dirty = nilfs_page_count_clean_buffers(page, start, 298 start + copied); 299 copied = generic_write_end(file, mapping, pos, len, copied, page, 300 fsdata); 301 nilfs_set_file_dirty(inode, nr_dirty); 302 err = nilfs_transaction_commit(inode->i_sb); 303 return err ? : copied; 304 } 305 306 static ssize_t 307 nilfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) 308 { 309 struct inode *inode = file_inode(iocb->ki_filp); 310 311 if (iov_iter_rw(iter) == WRITE) 312 return 0; 313 314 /* Needs synchronization with the cleaner */ 315 return blockdev_direct_IO(iocb, inode, iter, nilfs_get_block); 316 } 317 318 const struct address_space_operations nilfs_aops = { 319 .writepage = nilfs_writepage, 320 .readpage = nilfs_readpage, 321 .writepages = nilfs_writepages, 322 .set_page_dirty = nilfs_set_page_dirty, 323 .readpages = nilfs_readpages, 324 .write_begin = nilfs_write_begin, 325 .write_end = nilfs_write_end, 326 /* .releasepage = nilfs_releasepage, */ 327 .invalidatepage = block_invalidatepage, 328 .direct_IO = nilfs_direct_IO, 329 .is_partially_uptodate = block_is_partially_uptodate, 330 }; 331 332 static int nilfs_insert_inode_locked(struct inode *inode, 333 struct nilfs_root *root, 334 unsigned long ino) 335 { 336 struct nilfs_iget_args args = { 337 .ino = ino, .root = root, .cno = 0, .for_gc = 0 338 }; 339 340 return insert_inode_locked4(inode, ino, nilfs_iget_test, &args); 341 } 342 343 struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) 344 { 345 struct super_block *sb = dir->i_sb; 346 struct the_nilfs *nilfs = sb->s_fs_info; 347 struct inode *inode; 348 struct nilfs_inode_info *ii; 349 struct nilfs_root *root; 350 int err = -ENOMEM; 351 ino_t ino; 352 353 inode = new_inode(sb); 354 if (unlikely(!inode)) 355 goto failed; 356 357 mapping_set_gfp_mask(inode->i_mapping, 358 mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS)); 359 360 root = NILFS_I(dir)->i_root; 361 ii = NILFS_I(inode); 362 ii->i_state = 1 << NILFS_I_NEW; 363 ii->i_root = root; 364 365 err = nilfs_ifile_create_inode(root->ifile, &ino, &ii->i_bh); 366 if (unlikely(err)) 367 goto failed_ifile_create_inode; 368 /* reference count of i_bh inherits from nilfs_mdt_read_block() */ 369 370 atomic64_inc(&root->inodes_count); 371 inode_init_owner(inode, dir, mode); 372 inode->i_ino = ino; 373 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 374 375 if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { 376 err = nilfs_bmap_read(ii->i_bmap, NULL); 377 if (err < 0) 378 goto failed_after_creation; 379 380 set_bit(NILFS_I_BMAP, &ii->i_state); 381 /* No lock is needed; iget() ensures it. */ 382 } 383 384 ii->i_flags = nilfs_mask_flags( 385 mode, NILFS_I(dir)->i_flags & NILFS_FL_INHERITED); 386 387 /* ii->i_file_acl = 0; */ 388 /* ii->i_dir_acl = 0; */ 389 ii->i_dir_start_lookup = 0; 390 nilfs_set_inode_flags(inode); 391 spin_lock(&nilfs->ns_next_gen_lock); 392 inode->i_generation = nilfs->ns_next_generation++; 393 spin_unlock(&nilfs->ns_next_gen_lock); 394 if (nilfs_insert_inode_locked(inode, root, ino) < 0) { 395 err = -EIO; 396 goto failed_after_creation; 397 } 398 399 err = nilfs_init_acl(inode, dir); 400 if (unlikely(err)) 401 /* 402 * Never occur. When supporting nilfs_init_acl(), 403 * proper cancellation of above jobs should be considered. 404 */ 405 goto failed_after_creation; 406 407 return inode; 408 409 failed_after_creation: 410 clear_nlink(inode); 411 unlock_new_inode(inode); 412 iput(inode); /* 413 * raw_inode will be deleted through 414 * nilfs_evict_inode(). 415 */ 416 goto failed; 417 418 failed_ifile_create_inode: 419 make_bad_inode(inode); 420 iput(inode); 421 failed: 422 return ERR_PTR(err); 423 } 424 425 void nilfs_set_inode_flags(struct inode *inode) 426 { 427 unsigned int flags = NILFS_I(inode)->i_flags; 428 unsigned int new_fl = 0; 429 430 if (flags & FS_SYNC_FL) 431 new_fl |= S_SYNC; 432 if (flags & FS_APPEND_FL) 433 new_fl |= S_APPEND; 434 if (flags & FS_IMMUTABLE_FL) 435 new_fl |= S_IMMUTABLE; 436 if (flags & FS_NOATIME_FL) 437 new_fl |= S_NOATIME; 438 if (flags & FS_DIRSYNC_FL) 439 new_fl |= S_DIRSYNC; 440 inode_set_flags(inode, new_fl, S_SYNC | S_APPEND | S_IMMUTABLE | 441 S_NOATIME | S_DIRSYNC); 442 } 443 444 int nilfs_read_inode_common(struct inode *inode, 445 struct nilfs_inode *raw_inode) 446 { 447 struct nilfs_inode_info *ii = NILFS_I(inode); 448 int err; 449 450 inode->i_mode = le16_to_cpu(raw_inode->i_mode); 451 i_uid_write(inode, le32_to_cpu(raw_inode->i_uid)); 452 i_gid_write(inode, le32_to_cpu(raw_inode->i_gid)); 453 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); 454 inode->i_size = le64_to_cpu(raw_inode->i_size); 455 inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); 456 inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); 457 inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime); 458 inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); 459 inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); 460 inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); 461 if (inode->i_nlink == 0) 462 return -ESTALE; /* this inode is deleted */ 463 464 inode->i_blocks = le64_to_cpu(raw_inode->i_blocks); 465 ii->i_flags = le32_to_cpu(raw_inode->i_flags); 466 #if 0 467 ii->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); 468 ii->i_dir_acl = S_ISREG(inode->i_mode) ? 469 0 : le32_to_cpu(raw_inode->i_dir_acl); 470 #endif 471 ii->i_dir_start_lookup = 0; 472 inode->i_generation = le32_to_cpu(raw_inode->i_generation); 473 474 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 475 S_ISLNK(inode->i_mode)) { 476 err = nilfs_bmap_read(ii->i_bmap, raw_inode); 477 if (err < 0) 478 return err; 479 set_bit(NILFS_I_BMAP, &ii->i_state); 480 /* No lock is needed; iget() ensures it. */ 481 } 482 return 0; 483 } 484 485 static int __nilfs_read_inode(struct super_block *sb, 486 struct nilfs_root *root, unsigned long ino, 487 struct inode *inode) 488 { 489 struct the_nilfs *nilfs = sb->s_fs_info; 490 struct buffer_head *bh; 491 struct nilfs_inode *raw_inode; 492 int err; 493 494 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 495 err = nilfs_ifile_get_inode_block(root->ifile, ino, &bh); 496 if (unlikely(err)) 497 goto bad_inode; 498 499 raw_inode = nilfs_ifile_map_inode(root->ifile, ino, bh); 500 501 err = nilfs_read_inode_common(inode, raw_inode); 502 if (err) 503 goto failed_unmap; 504 505 if (S_ISREG(inode->i_mode)) { 506 inode->i_op = &nilfs_file_inode_operations; 507 inode->i_fop = &nilfs_file_operations; 508 inode->i_mapping->a_ops = &nilfs_aops; 509 } else if (S_ISDIR(inode->i_mode)) { 510 inode->i_op = &nilfs_dir_inode_operations; 511 inode->i_fop = &nilfs_dir_operations; 512 inode->i_mapping->a_ops = &nilfs_aops; 513 } else if (S_ISLNK(inode->i_mode)) { 514 inode->i_op = &nilfs_symlink_inode_operations; 515 inode_nohighmem(inode); 516 inode->i_mapping->a_ops = &nilfs_aops; 517 } else { 518 inode->i_op = &nilfs_special_inode_operations; 519 init_special_inode( 520 inode, inode->i_mode, 521 huge_decode_dev(le64_to_cpu(raw_inode->i_device_code))); 522 } 523 nilfs_ifile_unmap_inode(root->ifile, ino, bh); 524 brelse(bh); 525 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 526 nilfs_set_inode_flags(inode); 527 mapping_set_gfp_mask(inode->i_mapping, 528 mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS)); 529 return 0; 530 531 failed_unmap: 532 nilfs_ifile_unmap_inode(root->ifile, ino, bh); 533 brelse(bh); 534 535 bad_inode: 536 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 537 return err; 538 } 539 540 static int nilfs_iget_test(struct inode *inode, void *opaque) 541 { 542 struct nilfs_iget_args *args = opaque; 543 struct nilfs_inode_info *ii; 544 545 if (args->ino != inode->i_ino || args->root != NILFS_I(inode)->i_root) 546 return 0; 547 548 ii = NILFS_I(inode); 549 if (!test_bit(NILFS_I_GCINODE, &ii->i_state)) 550 return !args->for_gc; 551 552 return args->for_gc && args->cno == ii->i_cno; 553 } 554 555 static int nilfs_iget_set(struct inode *inode, void *opaque) 556 { 557 struct nilfs_iget_args *args = opaque; 558 559 inode->i_ino = args->ino; 560 if (args->for_gc) { 561 NILFS_I(inode)->i_state = 1 << NILFS_I_GCINODE; 562 NILFS_I(inode)->i_cno = args->cno; 563 NILFS_I(inode)->i_root = NULL; 564 } else { 565 if (args->root && args->ino == NILFS_ROOT_INO) 566 nilfs_get_root(args->root); 567 NILFS_I(inode)->i_root = args->root; 568 } 569 return 0; 570 } 571 572 struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, 573 unsigned long ino) 574 { 575 struct nilfs_iget_args args = { 576 .ino = ino, .root = root, .cno = 0, .for_gc = 0 577 }; 578 579 return ilookup5(sb, ino, nilfs_iget_test, &args); 580 } 581 582 struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, 583 unsigned long ino) 584 { 585 struct nilfs_iget_args args = { 586 .ino = ino, .root = root, .cno = 0, .for_gc = 0 587 }; 588 589 return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); 590 } 591 592 struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root, 593 unsigned long ino) 594 { 595 struct inode *inode; 596 int err; 597 598 inode = nilfs_iget_locked(sb, root, ino); 599 if (unlikely(!inode)) 600 return ERR_PTR(-ENOMEM); 601 if (!(inode->i_state & I_NEW)) 602 return inode; 603 604 err = __nilfs_read_inode(sb, root, ino, inode); 605 if (unlikely(err)) { 606 iget_failed(inode); 607 return ERR_PTR(err); 608 } 609 unlock_new_inode(inode); 610 return inode; 611 } 612 613 struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, 614 __u64 cno) 615 { 616 struct nilfs_iget_args args = { 617 .ino = ino, .root = NULL, .cno = cno, .for_gc = 1 618 }; 619 struct inode *inode; 620 int err; 621 622 inode = iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); 623 if (unlikely(!inode)) 624 return ERR_PTR(-ENOMEM); 625 if (!(inode->i_state & I_NEW)) 626 return inode; 627 628 err = nilfs_init_gcinode(inode); 629 if (unlikely(err)) { 630 iget_failed(inode); 631 return ERR_PTR(err); 632 } 633 unlock_new_inode(inode); 634 return inode; 635 } 636 637 void nilfs_write_inode_common(struct inode *inode, 638 struct nilfs_inode *raw_inode, int has_bmap) 639 { 640 struct nilfs_inode_info *ii = NILFS_I(inode); 641 642 raw_inode->i_mode = cpu_to_le16(inode->i_mode); 643 raw_inode->i_uid = cpu_to_le32(i_uid_read(inode)); 644 raw_inode->i_gid = cpu_to_le32(i_gid_read(inode)); 645 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); 646 raw_inode->i_size = cpu_to_le64(inode->i_size); 647 raw_inode->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); 648 raw_inode->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); 649 raw_inode->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); 650 raw_inode->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); 651 raw_inode->i_blocks = cpu_to_le64(inode->i_blocks); 652 653 raw_inode->i_flags = cpu_to_le32(ii->i_flags); 654 raw_inode->i_generation = cpu_to_le32(inode->i_generation); 655 656 if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) { 657 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 658 659 /* zero-fill unused portion in the case of super root block */ 660 raw_inode->i_xattr = 0; 661 raw_inode->i_pad = 0; 662 memset((void *)raw_inode + sizeof(*raw_inode), 0, 663 nilfs->ns_inode_size - sizeof(*raw_inode)); 664 } 665 666 if (has_bmap) 667 nilfs_bmap_write(ii->i_bmap, raw_inode); 668 else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) 669 raw_inode->i_device_code = 670 cpu_to_le64(huge_encode_dev(inode->i_rdev)); 671 /* 672 * When extending inode, nilfs->ns_inode_size should be checked 673 * for substitutions of appended fields. 674 */ 675 } 676 677 void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh, int flags) 678 { 679 ino_t ino = inode->i_ino; 680 struct nilfs_inode_info *ii = NILFS_I(inode); 681 struct inode *ifile = ii->i_root->ifile; 682 struct nilfs_inode *raw_inode; 683 684 raw_inode = nilfs_ifile_map_inode(ifile, ino, ibh); 685 686 if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state)) 687 memset(raw_inode, 0, NILFS_MDT(ifile)->mi_entry_size); 688 if (flags & I_DIRTY_DATASYNC) 689 set_bit(NILFS_I_INODE_SYNC, &ii->i_state); 690 691 nilfs_write_inode_common(inode, raw_inode, 0); 692 /* 693 * XXX: call with has_bmap = 0 is a workaround to avoid 694 * deadlock of bmap. This delays update of i_bmap to just 695 * before writing. 696 */ 697 698 nilfs_ifile_unmap_inode(ifile, ino, ibh); 699 } 700 701 #define NILFS_MAX_TRUNCATE_BLOCKS 16384 /* 64MB for 4KB block */ 702 703 static void nilfs_truncate_bmap(struct nilfs_inode_info *ii, 704 unsigned long from) 705 { 706 __u64 b; 707 int ret; 708 709 if (!test_bit(NILFS_I_BMAP, &ii->i_state)) 710 return; 711 repeat: 712 ret = nilfs_bmap_last_key(ii->i_bmap, &b); 713 if (ret == -ENOENT) 714 return; 715 else if (ret < 0) 716 goto failed; 717 718 if (b < from) 719 return; 720 721 b -= min_t(__u64, NILFS_MAX_TRUNCATE_BLOCKS, b - from); 722 ret = nilfs_bmap_truncate(ii->i_bmap, b); 723 nilfs_relax_pressure_in_lock(ii->vfs_inode.i_sb); 724 if (!ret || (ret == -ENOMEM && 725 nilfs_bmap_truncate(ii->i_bmap, b) == 0)) 726 goto repeat; 727 728 failed: 729 nilfs_warning(ii->vfs_inode.i_sb, __func__, 730 "failed to truncate bmap (ino=%lu, err=%d)", 731 ii->vfs_inode.i_ino, ret); 732 } 733 734 void nilfs_truncate(struct inode *inode) 735 { 736 unsigned long blkoff; 737 unsigned int blocksize; 738 struct nilfs_transaction_info ti; 739 struct super_block *sb = inode->i_sb; 740 struct nilfs_inode_info *ii = NILFS_I(inode); 741 742 if (!test_bit(NILFS_I_BMAP, &ii->i_state)) 743 return; 744 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 745 return; 746 747 blocksize = sb->s_blocksize; 748 blkoff = (inode->i_size + blocksize - 1) >> sb->s_blocksize_bits; 749 nilfs_transaction_begin(sb, &ti, 0); /* never fails */ 750 751 block_truncate_page(inode->i_mapping, inode->i_size, nilfs_get_block); 752 753 nilfs_truncate_bmap(ii, blkoff); 754 755 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 756 if (IS_SYNC(inode)) 757 nilfs_set_transaction_flag(NILFS_TI_SYNC); 758 759 nilfs_mark_inode_dirty(inode); 760 nilfs_set_file_dirty(inode, 0); 761 nilfs_transaction_commit(sb); 762 /* 763 * May construct a logical segment and may fail in sync mode. 764 * But truncate has no return value. 765 */ 766 } 767 768 static void nilfs_clear_inode(struct inode *inode) 769 { 770 struct nilfs_inode_info *ii = NILFS_I(inode); 771 772 /* 773 * Free resources allocated in nilfs_read_inode(), here. 774 */ 775 BUG_ON(!list_empty(&ii->i_dirty)); 776 brelse(ii->i_bh); 777 ii->i_bh = NULL; 778 779 if (nilfs_is_metadata_file_inode(inode)) 780 nilfs_mdt_clear(inode); 781 782 if (test_bit(NILFS_I_BMAP, &ii->i_state)) 783 nilfs_bmap_clear(ii->i_bmap); 784 785 nilfs_btnode_cache_clear(&ii->i_btnode_cache); 786 787 if (ii->i_root && inode->i_ino == NILFS_ROOT_INO) 788 nilfs_put_root(ii->i_root); 789 } 790 791 void nilfs_evict_inode(struct inode *inode) 792 { 793 struct nilfs_transaction_info ti; 794 struct super_block *sb = inode->i_sb; 795 struct nilfs_inode_info *ii = NILFS_I(inode); 796 int ret; 797 798 if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) { 799 truncate_inode_pages_final(&inode->i_data); 800 clear_inode(inode); 801 nilfs_clear_inode(inode); 802 return; 803 } 804 nilfs_transaction_begin(sb, &ti, 0); /* never fails */ 805 806 truncate_inode_pages_final(&inode->i_data); 807 808 /* TODO: some of the following operations may fail. */ 809 nilfs_truncate_bmap(ii, 0); 810 nilfs_mark_inode_dirty(inode); 811 clear_inode(inode); 812 813 ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino); 814 if (!ret) 815 atomic64_dec(&ii->i_root->inodes_count); 816 817 nilfs_clear_inode(inode); 818 819 if (IS_SYNC(inode)) 820 nilfs_set_transaction_flag(NILFS_TI_SYNC); 821 nilfs_transaction_commit(sb); 822 /* 823 * May construct a logical segment and may fail in sync mode. 824 * But delete_inode has no return value. 825 */ 826 } 827 828 int nilfs_setattr(struct dentry *dentry, struct iattr *iattr) 829 { 830 struct nilfs_transaction_info ti; 831 struct inode *inode = d_inode(dentry); 832 struct super_block *sb = inode->i_sb; 833 int err; 834 835 err = inode_change_ok(inode, iattr); 836 if (err) 837 return err; 838 839 err = nilfs_transaction_begin(sb, &ti, 0); 840 if (unlikely(err)) 841 return err; 842 843 if ((iattr->ia_valid & ATTR_SIZE) && 844 iattr->ia_size != i_size_read(inode)) { 845 inode_dio_wait(inode); 846 truncate_setsize(inode, iattr->ia_size); 847 nilfs_truncate(inode); 848 } 849 850 setattr_copy(inode, iattr); 851 mark_inode_dirty(inode); 852 853 if (iattr->ia_valid & ATTR_MODE) { 854 err = nilfs_acl_chmod(inode); 855 if (unlikely(err)) 856 goto out_err; 857 } 858 859 return nilfs_transaction_commit(sb); 860 861 out_err: 862 nilfs_transaction_abort(sb); 863 return err; 864 } 865 866 int nilfs_permission(struct inode *inode, int mask) 867 { 868 struct nilfs_root *root = NILFS_I(inode)->i_root; 869 870 if ((mask & MAY_WRITE) && root && 871 root->cno != NILFS_CPTREE_CURRENT_CNO) 872 return -EROFS; /* snapshot is not writable */ 873 874 return generic_permission(inode, mask); 875 } 876 877 int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh) 878 { 879 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 880 struct nilfs_inode_info *ii = NILFS_I(inode); 881 int err; 882 883 spin_lock(&nilfs->ns_inode_lock); 884 if (ii->i_bh == NULL) { 885 spin_unlock(&nilfs->ns_inode_lock); 886 err = nilfs_ifile_get_inode_block(ii->i_root->ifile, 887 inode->i_ino, pbh); 888 if (unlikely(err)) 889 return err; 890 spin_lock(&nilfs->ns_inode_lock); 891 if (ii->i_bh == NULL) 892 ii->i_bh = *pbh; 893 else { 894 brelse(*pbh); 895 *pbh = ii->i_bh; 896 } 897 } else 898 *pbh = ii->i_bh; 899 900 get_bh(*pbh); 901 spin_unlock(&nilfs->ns_inode_lock); 902 return 0; 903 } 904 905 int nilfs_inode_dirty(struct inode *inode) 906 { 907 struct nilfs_inode_info *ii = NILFS_I(inode); 908 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 909 int ret = 0; 910 911 if (!list_empty(&ii->i_dirty)) { 912 spin_lock(&nilfs->ns_inode_lock); 913 ret = test_bit(NILFS_I_DIRTY, &ii->i_state) || 914 test_bit(NILFS_I_BUSY, &ii->i_state); 915 spin_unlock(&nilfs->ns_inode_lock); 916 } 917 return ret; 918 } 919 920 int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty) 921 { 922 struct nilfs_inode_info *ii = NILFS_I(inode); 923 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 924 925 atomic_add(nr_dirty, &nilfs->ns_ndirtyblks); 926 927 if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state)) 928 return 0; 929 930 spin_lock(&nilfs->ns_inode_lock); 931 if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && 932 !test_bit(NILFS_I_BUSY, &ii->i_state)) { 933 /* 934 * Because this routine may race with nilfs_dispose_list(), 935 * we have to check NILFS_I_QUEUED here, too. 936 */ 937 if (list_empty(&ii->i_dirty) && igrab(inode) == NULL) { 938 /* 939 * This will happen when somebody is freeing 940 * this inode. 941 */ 942 nilfs_warning(inode->i_sb, __func__, 943 "cannot get inode (ino=%lu)", 944 inode->i_ino); 945 spin_unlock(&nilfs->ns_inode_lock); 946 return -EINVAL; /* 947 * NILFS_I_DIRTY may remain for 948 * freeing inode. 949 */ 950 } 951 list_move_tail(&ii->i_dirty, &nilfs->ns_dirty_files); 952 set_bit(NILFS_I_QUEUED, &ii->i_state); 953 } 954 spin_unlock(&nilfs->ns_inode_lock); 955 return 0; 956 } 957 958 int __nilfs_mark_inode_dirty(struct inode *inode, int flags) 959 { 960 struct buffer_head *ibh; 961 int err; 962 963 err = nilfs_load_inode_block(inode, &ibh); 964 if (unlikely(err)) { 965 nilfs_warning(inode->i_sb, __func__, 966 "failed to reget inode block."); 967 return err; 968 } 969 nilfs_update_inode(inode, ibh, flags); 970 mark_buffer_dirty(ibh); 971 nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile); 972 brelse(ibh); 973 return 0; 974 } 975 976 /** 977 * nilfs_dirty_inode - reflect changes on given inode to an inode block. 978 * @inode: inode of the file to be registered. 979 * 980 * nilfs_dirty_inode() loads a inode block containing the specified 981 * @inode and copies data from a nilfs_inode to a corresponding inode 982 * entry in the inode block. This operation is excluded from the segment 983 * construction. This function can be called both as a single operation 984 * and as a part of indivisible file operations. 985 */ 986 void nilfs_dirty_inode(struct inode *inode, int flags) 987 { 988 struct nilfs_transaction_info ti; 989 struct nilfs_mdt_info *mdi = NILFS_MDT(inode); 990 991 if (is_bad_inode(inode)) { 992 nilfs_warning(inode->i_sb, __func__, 993 "tried to mark bad_inode dirty. ignored."); 994 dump_stack(); 995 return; 996 } 997 if (mdi) { 998 nilfs_mdt_mark_dirty(inode); 999 return; 1000 } 1001 nilfs_transaction_begin(inode->i_sb, &ti, 0); 1002 __nilfs_mark_inode_dirty(inode, flags); 1003 nilfs_transaction_commit(inode->i_sb); /* never fails */ 1004 } 1005 1006 int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 1007 __u64 start, __u64 len) 1008 { 1009 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 1010 __u64 logical = 0, phys = 0, size = 0; 1011 __u32 flags = 0; 1012 loff_t isize; 1013 sector_t blkoff, end_blkoff; 1014 sector_t delalloc_blkoff; 1015 unsigned long delalloc_blklen; 1016 unsigned int blkbits = inode->i_blkbits; 1017 int ret, n; 1018 1019 ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC); 1020 if (ret) 1021 return ret; 1022 1023 inode_lock(inode); 1024 1025 isize = i_size_read(inode); 1026 1027 blkoff = start >> blkbits; 1028 end_blkoff = (start + len - 1) >> blkbits; 1029 1030 delalloc_blklen = nilfs_find_uncommitted_extent(inode, blkoff, 1031 &delalloc_blkoff); 1032 1033 do { 1034 __u64 blkphy; 1035 unsigned int maxblocks; 1036 1037 if (delalloc_blklen && blkoff == delalloc_blkoff) { 1038 if (size) { 1039 /* End of the current extent */ 1040 ret = fiemap_fill_next_extent( 1041 fieinfo, logical, phys, size, flags); 1042 if (ret) 1043 break; 1044 } 1045 if (blkoff > end_blkoff) 1046 break; 1047 1048 flags = FIEMAP_EXTENT_MERGED | FIEMAP_EXTENT_DELALLOC; 1049 logical = blkoff << blkbits; 1050 phys = 0; 1051 size = delalloc_blklen << blkbits; 1052 1053 blkoff = delalloc_blkoff + delalloc_blklen; 1054 delalloc_blklen = nilfs_find_uncommitted_extent( 1055 inode, blkoff, &delalloc_blkoff); 1056 continue; 1057 } 1058 1059 /* 1060 * Limit the number of blocks that we look up so as 1061 * not to get into the next delayed allocation extent. 1062 */ 1063 maxblocks = INT_MAX; 1064 if (delalloc_blklen) 1065 maxblocks = min_t(sector_t, delalloc_blkoff - blkoff, 1066 maxblocks); 1067 blkphy = 0; 1068 1069 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 1070 n = nilfs_bmap_lookup_contig( 1071 NILFS_I(inode)->i_bmap, blkoff, &blkphy, maxblocks); 1072 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 1073 1074 if (n < 0) { 1075 int past_eof; 1076 1077 if (unlikely(n != -ENOENT)) 1078 break; /* error */ 1079 1080 /* HOLE */ 1081 blkoff++; 1082 past_eof = ((blkoff << blkbits) >= isize); 1083 1084 if (size) { 1085 /* End of the current extent */ 1086 1087 if (past_eof) 1088 flags |= FIEMAP_EXTENT_LAST; 1089 1090 ret = fiemap_fill_next_extent( 1091 fieinfo, logical, phys, size, flags); 1092 if (ret) 1093 break; 1094 size = 0; 1095 } 1096 if (blkoff > end_blkoff || past_eof) 1097 break; 1098 } else { 1099 if (size) { 1100 if (phys && blkphy << blkbits == phys + size) { 1101 /* The current extent goes on */ 1102 size += n << blkbits; 1103 } else { 1104 /* Terminate the current extent */ 1105 ret = fiemap_fill_next_extent( 1106 fieinfo, logical, phys, size, 1107 flags); 1108 if (ret || blkoff > end_blkoff) 1109 break; 1110 1111 /* Start another extent */ 1112 flags = FIEMAP_EXTENT_MERGED; 1113 logical = blkoff << blkbits; 1114 phys = blkphy << blkbits; 1115 size = n << blkbits; 1116 } 1117 } else { 1118 /* Start a new extent */ 1119 flags = FIEMAP_EXTENT_MERGED; 1120 logical = blkoff << blkbits; 1121 phys = blkphy << blkbits; 1122 size = n << blkbits; 1123 } 1124 blkoff += n; 1125 } 1126 cond_resched(); 1127 } while (true); 1128 1129 /* If ret is 1 then we just hit the end of the extent array */ 1130 if (ret == 1) 1131 ret = 0; 1132 1133 inode_unlock(inode); 1134 return ret; 1135 } 1136