1 /* 2 * linux/fs/inode.c 3 * 4 * (C) 1997 Linus Torvalds 5 */ 6 7 #include <linux/fs.h> 8 #include <linux/mm.h> 9 #include <linux/dcache.h> 10 #include <linux/init.h> 11 #include <linux/slab.h> 12 #include <linux/writeback.h> 13 #include <linux/module.h> 14 #include <linux/backing-dev.h> 15 #include <linux/wait.h> 16 #include <linux/rwsem.h> 17 #include <linux/hash.h> 18 #include <linux/swap.h> 19 #include <linux/security.h> 20 #include <linux/pagemap.h> 21 #include <linux/cdev.h> 22 #include <linux/bootmem.h> 23 #include <linux/fsnotify.h> 24 #include <linux/mount.h> 25 #include <linux/async.h> 26 #include <linux/posix_acl.h> 27 #include <linux/ima.h> 28 29 /* 30 * This is needed for the following functions: 31 * - inode_has_buffers 32 * - invalidate_bdev 33 * 34 * FIXME: remove all knowledge of the buffer layer from this file 35 */ 36 #include <linux/buffer_head.h> 37 38 /* 39 * New inode.c implementation. 40 * 41 * This implementation has the basic premise of trying 42 * to be extremely low-overhead and SMP-safe, yet be 43 * simple enough to be "obviously correct". 44 * 45 * Famous last words. 46 */ 47 48 /* inode dynamic allocation 1999, Andrea Arcangeli <andrea@suse.de> */ 49 50 /* #define INODE_PARANOIA 1 */ 51 /* #define INODE_DEBUG 1 */ 52 53 /* 54 * Inode lookup is no longer as critical as it used to be: 55 * most of the lookups are going to be through the dcache. 56 */ 57 #define I_HASHBITS i_hash_shift 58 #define I_HASHMASK i_hash_mask 59 60 static unsigned int i_hash_mask __read_mostly; 61 static unsigned int i_hash_shift __read_mostly; 62 63 /* 64 * Each inode can be on two separate lists. One is 65 * the hash list of the inode, used for lookups. The 66 * other linked list is the "type" list: 67 * "in_use" - valid inode, i_count > 0, i_nlink > 0 68 * "dirty" - as "in_use" but also dirty 69 * "unused" - valid inode, i_count = 0 70 * 71 * A "dirty" list is maintained for each super block, 72 * allowing for low-overhead inode sync() operations. 73 */ 74 75 static LIST_HEAD(inode_lru); 76 static struct hlist_head *inode_hashtable __read_mostly; 77 78 /* 79 * A simple spinlock to protect the list manipulations. 80 * 81 * NOTE! You also have to own the lock if you change 82 * the i_state of an inode while it is in use.. 83 */ 84 DEFINE_SPINLOCK(inode_lock); 85 86 /* 87 * iprune_sem provides exclusion between the kswapd or try_to_free_pages 88 * icache shrinking path, and the umount path. Without this exclusion, 89 * by the time prune_icache calls iput for the inode whose pages it has 90 * been invalidating, or by the time it calls clear_inode & destroy_inode 91 * from its final dispose_list, the struct super_block they refer to 92 * (for inode->i_sb->s_op) may already have been freed and reused. 93 * 94 * We make this an rwsem because the fastpath is icache shrinking. In 95 * some cases a filesystem may be doing a significant amount of work in 96 * its inode reclaim code, so this should improve parallelism. 97 */ 98 static DECLARE_RWSEM(iprune_sem); 99 100 /* 101 * Statistics gathering.. 102 */ 103 struct inodes_stat_t inodes_stat; 104 105 static DEFINE_PER_CPU(unsigned int, nr_inodes); 106 107 static struct kmem_cache *inode_cachep __read_mostly; 108 109 static int get_nr_inodes(void) 110 { 111 int i; 112 int sum = 0; 113 for_each_possible_cpu(i) 114 sum += per_cpu(nr_inodes, i); 115 return sum < 0 ? 0 : sum; 116 } 117 118 static inline int get_nr_inodes_unused(void) 119 { 120 return inodes_stat.nr_unused; 121 } 122 123 int get_nr_dirty_inodes(void) 124 { 125 /* not actually dirty inodes, but a wild approximation */ 126 int nr_dirty = get_nr_inodes() - get_nr_inodes_unused(); 127 return nr_dirty > 0 ? nr_dirty : 0; 128 } 129 130 /* 131 * Handle nr_inode sysctl 132 */ 133 #ifdef CONFIG_SYSCTL 134 int proc_nr_inodes(ctl_table *table, int write, 135 void __user *buffer, size_t *lenp, loff_t *ppos) 136 { 137 inodes_stat.nr_inodes = get_nr_inodes(); 138 return proc_dointvec(table, write, buffer, lenp, ppos); 139 } 140 #endif 141 142 static void wake_up_inode(struct inode *inode) 143 { 144 /* 145 * Prevent speculative execution through spin_unlock(&inode_lock); 146 */ 147 smp_mb(); 148 wake_up_bit(&inode->i_state, __I_NEW); 149 } 150 151 /** 152 * inode_init_always - perform inode structure intialisation 153 * @sb: superblock inode belongs to 154 * @inode: inode to initialise 155 * 156 * These are initializations that need to be done on every inode 157 * allocation as the fields are not initialised by slab allocation. 158 */ 159 int inode_init_always(struct super_block *sb, struct inode *inode) 160 { 161 static const struct address_space_operations empty_aops; 162 static const struct inode_operations empty_iops; 163 static const struct file_operations empty_fops; 164 struct address_space *const mapping = &inode->i_data; 165 166 inode->i_sb = sb; 167 inode->i_blkbits = sb->s_blocksize_bits; 168 inode->i_flags = 0; 169 atomic_set(&inode->i_count, 1); 170 inode->i_op = &empty_iops; 171 inode->i_fop = &empty_fops; 172 inode->i_nlink = 1; 173 inode->i_uid = 0; 174 inode->i_gid = 0; 175 atomic_set(&inode->i_writecount, 0); 176 inode->i_size = 0; 177 inode->i_blocks = 0; 178 inode->i_bytes = 0; 179 inode->i_generation = 0; 180 #ifdef CONFIG_QUOTA 181 memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); 182 #endif 183 inode->i_pipe = NULL; 184 inode->i_bdev = NULL; 185 inode->i_cdev = NULL; 186 inode->i_rdev = 0; 187 inode->dirtied_when = 0; 188 189 if (security_inode_alloc(inode)) 190 goto out; 191 spin_lock_init(&inode->i_lock); 192 lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); 193 194 mutex_init(&inode->i_mutex); 195 lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key); 196 197 init_rwsem(&inode->i_alloc_sem); 198 lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key); 199 200 mapping->a_ops = &empty_aops; 201 mapping->host = inode; 202 mapping->flags = 0; 203 mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE); 204 mapping->assoc_mapping = NULL; 205 mapping->backing_dev_info = &default_backing_dev_info; 206 mapping->writeback_index = 0; 207 208 /* 209 * If the block_device provides a backing_dev_info for client 210 * inodes then use that. Otherwise the inode share the bdev's 211 * backing_dev_info. 212 */ 213 if (sb->s_bdev) { 214 struct backing_dev_info *bdi; 215 216 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; 217 mapping->backing_dev_info = bdi; 218 } 219 inode->i_private = NULL; 220 inode->i_mapping = mapping; 221 #ifdef CONFIG_FS_POSIX_ACL 222 inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED; 223 #endif 224 225 #ifdef CONFIG_FSNOTIFY 226 inode->i_fsnotify_mask = 0; 227 #endif 228 229 this_cpu_inc(nr_inodes); 230 231 return 0; 232 out: 233 return -ENOMEM; 234 } 235 EXPORT_SYMBOL(inode_init_always); 236 237 static struct inode *alloc_inode(struct super_block *sb) 238 { 239 struct inode *inode; 240 241 if (sb->s_op->alloc_inode) 242 inode = sb->s_op->alloc_inode(sb); 243 else 244 inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL); 245 246 if (!inode) 247 return NULL; 248 249 if (unlikely(inode_init_always(sb, inode))) { 250 if (inode->i_sb->s_op->destroy_inode) 251 inode->i_sb->s_op->destroy_inode(inode); 252 else 253 kmem_cache_free(inode_cachep, inode); 254 return NULL; 255 } 256 257 return inode; 258 } 259 260 void free_inode_nonrcu(struct inode *inode) 261 { 262 kmem_cache_free(inode_cachep, inode); 263 } 264 EXPORT_SYMBOL(free_inode_nonrcu); 265 266 void __destroy_inode(struct inode *inode) 267 { 268 BUG_ON(inode_has_buffers(inode)); 269 security_inode_free(inode); 270 fsnotify_inode_delete(inode); 271 #ifdef CONFIG_FS_POSIX_ACL 272 if (inode->i_acl && inode->i_acl != ACL_NOT_CACHED) 273 posix_acl_release(inode->i_acl); 274 if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) 275 posix_acl_release(inode->i_default_acl); 276 #endif 277 this_cpu_dec(nr_inodes); 278 } 279 EXPORT_SYMBOL(__destroy_inode); 280 281 static void i_callback(struct rcu_head *head) 282 { 283 struct inode *inode = container_of(head, struct inode, i_rcu); 284 INIT_LIST_HEAD(&inode->i_dentry); 285 kmem_cache_free(inode_cachep, inode); 286 } 287 288 static void destroy_inode(struct inode *inode) 289 { 290 BUG_ON(!list_empty(&inode->i_lru)); 291 __destroy_inode(inode); 292 if (inode->i_sb->s_op->destroy_inode) 293 inode->i_sb->s_op->destroy_inode(inode); 294 else 295 call_rcu(&inode->i_rcu, i_callback); 296 } 297 298 /* 299 * These are initializations that only need to be done 300 * once, because the fields are idempotent across use 301 * of the inode, so let the slab aware of that. 302 */ 303 void inode_init_once(struct inode *inode) 304 { 305 memset(inode, 0, sizeof(*inode)); 306 INIT_HLIST_NODE(&inode->i_hash); 307 INIT_LIST_HEAD(&inode->i_dentry); 308 INIT_LIST_HEAD(&inode->i_devices); 309 INIT_LIST_HEAD(&inode->i_wb_list); 310 INIT_LIST_HEAD(&inode->i_lru); 311 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); 312 spin_lock_init(&inode->i_data.tree_lock); 313 spin_lock_init(&inode->i_data.i_mmap_lock); 314 INIT_LIST_HEAD(&inode->i_data.private_list); 315 spin_lock_init(&inode->i_data.private_lock); 316 INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap); 317 INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear); 318 i_size_ordered_init(inode); 319 #ifdef CONFIG_FSNOTIFY 320 INIT_HLIST_HEAD(&inode->i_fsnotify_marks); 321 #endif 322 } 323 EXPORT_SYMBOL(inode_init_once); 324 325 static void init_once(void *foo) 326 { 327 struct inode *inode = (struct inode *) foo; 328 329 inode_init_once(inode); 330 } 331 332 /* 333 * inode_lock must be held 334 */ 335 void __iget(struct inode *inode) 336 { 337 atomic_inc(&inode->i_count); 338 } 339 340 /* 341 * get additional reference to inode; caller must already hold one. 342 */ 343 void ihold(struct inode *inode) 344 { 345 WARN_ON(atomic_inc_return(&inode->i_count) < 2); 346 } 347 EXPORT_SYMBOL(ihold); 348 349 static void inode_lru_list_add(struct inode *inode) 350 { 351 if (list_empty(&inode->i_lru)) { 352 list_add(&inode->i_lru, &inode_lru); 353 inodes_stat.nr_unused++; 354 } 355 } 356 357 static void inode_lru_list_del(struct inode *inode) 358 { 359 if (!list_empty(&inode->i_lru)) { 360 list_del_init(&inode->i_lru); 361 inodes_stat.nr_unused--; 362 } 363 } 364 365 static inline void __inode_sb_list_add(struct inode *inode) 366 { 367 list_add(&inode->i_sb_list, &inode->i_sb->s_inodes); 368 } 369 370 /** 371 * inode_sb_list_add - add inode to the superblock list of inodes 372 * @inode: inode to add 373 */ 374 void inode_sb_list_add(struct inode *inode) 375 { 376 spin_lock(&inode_lock); 377 __inode_sb_list_add(inode); 378 spin_unlock(&inode_lock); 379 } 380 EXPORT_SYMBOL_GPL(inode_sb_list_add); 381 382 static inline void __inode_sb_list_del(struct inode *inode) 383 { 384 list_del_init(&inode->i_sb_list); 385 } 386 387 static unsigned long hash(struct super_block *sb, unsigned long hashval) 388 { 389 unsigned long tmp; 390 391 tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) / 392 L1_CACHE_BYTES; 393 tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS); 394 return tmp & I_HASHMASK; 395 } 396 397 /** 398 * __insert_inode_hash - hash an inode 399 * @inode: unhashed inode 400 * @hashval: unsigned long value used to locate this object in the 401 * inode_hashtable. 402 * 403 * Add an inode to the inode hash for this superblock. 404 */ 405 void __insert_inode_hash(struct inode *inode, unsigned long hashval) 406 { 407 struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval); 408 409 spin_lock(&inode_lock); 410 hlist_add_head(&inode->i_hash, b); 411 spin_unlock(&inode_lock); 412 } 413 EXPORT_SYMBOL(__insert_inode_hash); 414 415 /** 416 * __remove_inode_hash - remove an inode from the hash 417 * @inode: inode to unhash 418 * 419 * Remove an inode from the superblock. 420 */ 421 static void __remove_inode_hash(struct inode *inode) 422 { 423 hlist_del_init(&inode->i_hash); 424 } 425 426 /** 427 * remove_inode_hash - remove an inode from the hash 428 * @inode: inode to unhash 429 * 430 * Remove an inode from the superblock. 431 */ 432 void remove_inode_hash(struct inode *inode) 433 { 434 spin_lock(&inode_lock); 435 hlist_del_init(&inode->i_hash); 436 spin_unlock(&inode_lock); 437 } 438 EXPORT_SYMBOL(remove_inode_hash); 439 440 void end_writeback(struct inode *inode) 441 { 442 might_sleep(); 443 BUG_ON(inode->i_data.nrpages); 444 BUG_ON(!list_empty(&inode->i_data.private_list)); 445 BUG_ON(!(inode->i_state & I_FREEING)); 446 BUG_ON(inode->i_state & I_CLEAR); 447 inode_sync_wait(inode); 448 /* don't need i_lock here, no concurrent mods to i_state */ 449 inode->i_state = I_FREEING | I_CLEAR; 450 } 451 EXPORT_SYMBOL(end_writeback); 452 453 static void evict(struct inode *inode) 454 { 455 const struct super_operations *op = inode->i_sb->s_op; 456 457 if (op->evict_inode) { 458 op->evict_inode(inode); 459 } else { 460 if (inode->i_data.nrpages) 461 truncate_inode_pages(&inode->i_data, 0); 462 end_writeback(inode); 463 } 464 if (S_ISBLK(inode->i_mode) && inode->i_bdev) 465 bd_forget(inode); 466 if (S_ISCHR(inode->i_mode) && inode->i_cdev) 467 cd_forget(inode); 468 } 469 470 /* 471 * dispose_list - dispose of the contents of a local list 472 * @head: the head of the list to free 473 * 474 * Dispose-list gets a local list with local inodes in it, so it doesn't 475 * need to worry about list corruption and SMP locks. 476 */ 477 static void dispose_list(struct list_head *head) 478 { 479 while (!list_empty(head)) { 480 struct inode *inode; 481 482 inode = list_first_entry(head, struct inode, i_lru); 483 list_del_init(&inode->i_lru); 484 485 evict(inode); 486 487 spin_lock(&inode_lock); 488 __remove_inode_hash(inode); 489 __inode_sb_list_del(inode); 490 spin_unlock(&inode_lock); 491 492 wake_up_inode(inode); 493 destroy_inode(inode); 494 } 495 } 496 497 /** 498 * evict_inodes - evict all evictable inodes for a superblock 499 * @sb: superblock to operate on 500 * 501 * Make sure that no inodes with zero refcount are retained. This is 502 * called by superblock shutdown after having MS_ACTIVE flag removed, 503 * so any inode reaching zero refcount during or after that call will 504 * be immediately evicted. 505 */ 506 void evict_inodes(struct super_block *sb) 507 { 508 struct inode *inode, *next; 509 LIST_HEAD(dispose); 510 511 down_write(&iprune_sem); 512 513 spin_lock(&inode_lock); 514 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 515 if (atomic_read(&inode->i_count)) 516 continue; 517 518 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { 519 WARN_ON(1); 520 continue; 521 } 522 523 inode->i_state |= I_FREEING; 524 525 /* 526 * Move the inode off the IO lists and LRU once I_FREEING is 527 * set so that it won't get moved back on there if it is dirty. 528 */ 529 list_move(&inode->i_lru, &dispose); 530 list_del_init(&inode->i_wb_list); 531 if (!(inode->i_state & (I_DIRTY | I_SYNC))) 532 inodes_stat.nr_unused--; 533 } 534 spin_unlock(&inode_lock); 535 536 dispose_list(&dispose); 537 up_write(&iprune_sem); 538 } 539 540 /** 541 * invalidate_inodes - attempt to free all inodes on a superblock 542 * @sb: superblock to operate on 543 * 544 * Attempts to free all inodes for a given superblock. If there were any 545 * busy inodes return a non-zero value, else zero. 546 */ 547 int invalidate_inodes(struct super_block *sb) 548 { 549 int busy = 0; 550 struct inode *inode, *next; 551 LIST_HEAD(dispose); 552 553 down_write(&iprune_sem); 554 555 spin_lock(&inode_lock); 556 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 557 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) 558 continue; 559 if (atomic_read(&inode->i_count)) { 560 busy = 1; 561 continue; 562 } 563 564 inode->i_state |= I_FREEING; 565 566 /* 567 * Move the inode off the IO lists and LRU once I_FREEING is 568 * set so that it won't get moved back on there if it is dirty. 569 */ 570 list_move(&inode->i_lru, &dispose); 571 list_del_init(&inode->i_wb_list); 572 if (!(inode->i_state & (I_DIRTY | I_SYNC))) 573 inodes_stat.nr_unused--; 574 } 575 spin_unlock(&inode_lock); 576 577 dispose_list(&dispose); 578 up_write(&iprune_sem); 579 580 return busy; 581 } 582 583 static int can_unuse(struct inode *inode) 584 { 585 if (inode->i_state & ~I_REFERENCED) 586 return 0; 587 if (inode_has_buffers(inode)) 588 return 0; 589 if (atomic_read(&inode->i_count)) 590 return 0; 591 if (inode->i_data.nrpages) 592 return 0; 593 return 1; 594 } 595 596 /* 597 * Scan `goal' inodes on the unused list for freeable ones. They are moved to a 598 * temporary list and then are freed outside inode_lock by dispose_list(). 599 * 600 * Any inodes which are pinned purely because of attached pagecache have their 601 * pagecache removed. If the inode has metadata buffers attached to 602 * mapping->private_list then try to remove them. 603 * 604 * If the inode has the I_REFERENCED flag set, then it means that it has been 605 * used recently - the flag is set in iput_final(). When we encounter such an 606 * inode, clear the flag and move it to the back of the LRU so it gets another 607 * pass through the LRU before it gets reclaimed. This is necessary because of 608 * the fact we are doing lazy LRU updates to minimise lock contention so the 609 * LRU does not have strict ordering. Hence we don't want to reclaim inodes 610 * with this flag set because they are the inodes that are out of order. 611 */ 612 static void prune_icache(int nr_to_scan) 613 { 614 LIST_HEAD(freeable); 615 int nr_scanned; 616 unsigned long reap = 0; 617 618 down_read(&iprune_sem); 619 spin_lock(&inode_lock); 620 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { 621 struct inode *inode; 622 623 if (list_empty(&inode_lru)) 624 break; 625 626 inode = list_entry(inode_lru.prev, struct inode, i_lru); 627 628 /* 629 * Referenced or dirty inodes are still in use. Give them 630 * another pass through the LRU as we canot reclaim them now. 631 */ 632 if (atomic_read(&inode->i_count) || 633 (inode->i_state & ~I_REFERENCED)) { 634 list_del_init(&inode->i_lru); 635 inodes_stat.nr_unused--; 636 continue; 637 } 638 639 /* recently referenced inodes get one more pass */ 640 if (inode->i_state & I_REFERENCED) { 641 list_move(&inode->i_lru, &inode_lru); 642 inode->i_state &= ~I_REFERENCED; 643 continue; 644 } 645 if (inode_has_buffers(inode) || inode->i_data.nrpages) { 646 __iget(inode); 647 spin_unlock(&inode_lock); 648 if (remove_inode_buffers(inode)) 649 reap += invalidate_mapping_pages(&inode->i_data, 650 0, -1); 651 iput(inode); 652 spin_lock(&inode_lock); 653 654 if (inode != list_entry(inode_lru.next, 655 struct inode, i_lru)) 656 continue; /* wrong inode or list_empty */ 657 if (!can_unuse(inode)) 658 continue; 659 } 660 WARN_ON(inode->i_state & I_NEW); 661 inode->i_state |= I_FREEING; 662 663 /* 664 * Move the inode off the IO lists and LRU once I_FREEING is 665 * set so that it won't get moved back on there if it is dirty. 666 */ 667 list_move(&inode->i_lru, &freeable); 668 list_del_init(&inode->i_wb_list); 669 inodes_stat.nr_unused--; 670 } 671 if (current_is_kswapd()) 672 __count_vm_events(KSWAPD_INODESTEAL, reap); 673 else 674 __count_vm_events(PGINODESTEAL, reap); 675 spin_unlock(&inode_lock); 676 677 dispose_list(&freeable); 678 up_read(&iprune_sem); 679 } 680 681 /* 682 * shrink_icache_memory() will attempt to reclaim some unused inodes. Here, 683 * "unused" means that no dentries are referring to the inodes: the files are 684 * not open and the dcache references to those inodes have already been 685 * reclaimed. 686 * 687 * This function is passed the number of inodes to scan, and it returns the 688 * total number of remaining possibly-reclaimable inodes. 689 */ 690 static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) 691 { 692 if (nr) { 693 /* 694 * Nasty deadlock avoidance. We may hold various FS locks, 695 * and we don't want to recurse into the FS that called us 696 * in clear_inode() and friends.. 697 */ 698 if (!(gfp_mask & __GFP_FS)) 699 return -1; 700 prune_icache(nr); 701 } 702 return (get_nr_inodes_unused() / 100) * sysctl_vfs_cache_pressure; 703 } 704 705 static struct shrinker icache_shrinker = { 706 .shrink = shrink_icache_memory, 707 .seeks = DEFAULT_SEEKS, 708 }; 709 710 static void __wait_on_freeing_inode(struct inode *inode); 711 /* 712 * Called with the inode lock held. 713 */ 714 static struct inode *find_inode(struct super_block *sb, 715 struct hlist_head *head, 716 int (*test)(struct inode *, void *), 717 void *data) 718 { 719 struct hlist_node *node; 720 struct inode *inode = NULL; 721 722 repeat: 723 hlist_for_each_entry(inode, node, head, i_hash) { 724 if (inode->i_sb != sb) 725 continue; 726 if (!test(inode, data)) 727 continue; 728 if (inode->i_state & (I_FREEING|I_WILL_FREE)) { 729 __wait_on_freeing_inode(inode); 730 goto repeat; 731 } 732 __iget(inode); 733 return inode; 734 } 735 return NULL; 736 } 737 738 /* 739 * find_inode_fast is the fast path version of find_inode, see the comment at 740 * iget_locked for details. 741 */ 742 static struct inode *find_inode_fast(struct super_block *sb, 743 struct hlist_head *head, unsigned long ino) 744 { 745 struct hlist_node *node; 746 struct inode *inode = NULL; 747 748 repeat: 749 hlist_for_each_entry(inode, node, head, i_hash) { 750 if (inode->i_ino != ino) 751 continue; 752 if (inode->i_sb != sb) 753 continue; 754 if (inode->i_state & (I_FREEING|I_WILL_FREE)) { 755 __wait_on_freeing_inode(inode); 756 goto repeat; 757 } 758 __iget(inode); 759 return inode; 760 } 761 return NULL; 762 } 763 764 /* 765 * Each cpu owns a range of LAST_INO_BATCH numbers. 766 * 'shared_last_ino' is dirtied only once out of LAST_INO_BATCH allocations, 767 * to renew the exhausted range. 768 * 769 * This does not significantly increase overflow rate because every CPU can 770 * consume at most LAST_INO_BATCH-1 unused inode numbers. So there is 771 * NR_CPUS*(LAST_INO_BATCH-1) wastage. At 4096 and 1024, this is ~0.1% of the 772 * 2^32 range, and is a worst-case. Even a 50% wastage would only increase 773 * overflow rate by 2x, which does not seem too significant. 774 * 775 * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW 776 * error if st_ino won't fit in target struct field. Use 32bit counter 777 * here to attempt to avoid that. 778 */ 779 #define LAST_INO_BATCH 1024 780 static DEFINE_PER_CPU(unsigned int, last_ino); 781 782 unsigned int get_next_ino(void) 783 { 784 unsigned int *p = &get_cpu_var(last_ino); 785 unsigned int res = *p; 786 787 #ifdef CONFIG_SMP 788 if (unlikely((res & (LAST_INO_BATCH-1)) == 0)) { 789 static atomic_t shared_last_ino; 790 int next = atomic_add_return(LAST_INO_BATCH, &shared_last_ino); 791 792 res = next - LAST_INO_BATCH; 793 } 794 #endif 795 796 *p = ++res; 797 put_cpu_var(last_ino); 798 return res; 799 } 800 EXPORT_SYMBOL(get_next_ino); 801 802 /** 803 * new_inode - obtain an inode 804 * @sb: superblock 805 * 806 * Allocates a new inode for given superblock. The default gfp_mask 807 * for allocations related to inode->i_mapping is GFP_HIGHUSER_MOVABLE. 808 * If HIGHMEM pages are unsuitable or it is known that pages allocated 809 * for the page cache are not reclaimable or migratable, 810 * mapping_set_gfp_mask() must be called with suitable flags on the 811 * newly created inode's mapping 812 * 813 */ 814 struct inode *new_inode(struct super_block *sb) 815 { 816 struct inode *inode; 817 818 spin_lock_prefetch(&inode_lock); 819 820 inode = alloc_inode(sb); 821 if (inode) { 822 spin_lock(&inode_lock); 823 __inode_sb_list_add(inode); 824 inode->i_state = 0; 825 spin_unlock(&inode_lock); 826 } 827 return inode; 828 } 829 EXPORT_SYMBOL(new_inode); 830 831 void unlock_new_inode(struct inode *inode) 832 { 833 #ifdef CONFIG_DEBUG_LOCK_ALLOC 834 if (S_ISDIR(inode->i_mode)) { 835 struct file_system_type *type = inode->i_sb->s_type; 836 837 /* Set new key only if filesystem hasn't already changed it */ 838 if (!lockdep_match_class(&inode->i_mutex, 839 &type->i_mutex_key)) { 840 /* 841 * ensure nobody is actually holding i_mutex 842 */ 843 mutex_destroy(&inode->i_mutex); 844 mutex_init(&inode->i_mutex); 845 lockdep_set_class(&inode->i_mutex, 846 &type->i_mutex_dir_key); 847 } 848 } 849 #endif 850 /* 851 * This is special! We do not need the spinlock when clearing I_NEW, 852 * because we're guaranteed that nobody else tries to do anything about 853 * the state of the inode when it is locked, as we just created it (so 854 * there can be no old holders that haven't tested I_NEW). 855 * However we must emit the memory barrier so that other CPUs reliably 856 * see the clearing of I_NEW after the other inode initialisation has 857 * completed. 858 */ 859 smp_mb(); 860 WARN_ON(!(inode->i_state & I_NEW)); 861 inode->i_state &= ~I_NEW; 862 wake_up_inode(inode); 863 } 864 EXPORT_SYMBOL(unlock_new_inode); 865 866 /* 867 * This is called without the inode lock held.. Be careful. 868 * 869 * We no longer cache the sb_flags in i_flags - see fs.h 870 * -- rmk@arm.uk.linux.org 871 */ 872 static struct inode *get_new_inode(struct super_block *sb, 873 struct hlist_head *head, 874 int (*test)(struct inode *, void *), 875 int (*set)(struct inode *, void *), 876 void *data) 877 { 878 struct inode *inode; 879 880 inode = alloc_inode(sb); 881 if (inode) { 882 struct inode *old; 883 884 spin_lock(&inode_lock); 885 /* We released the lock, so.. */ 886 old = find_inode(sb, head, test, data); 887 if (!old) { 888 if (set(inode, data)) 889 goto set_failed; 890 891 hlist_add_head(&inode->i_hash, head); 892 __inode_sb_list_add(inode); 893 inode->i_state = I_NEW; 894 spin_unlock(&inode_lock); 895 896 /* Return the locked inode with I_NEW set, the 897 * caller is responsible for filling in the contents 898 */ 899 return inode; 900 } 901 902 /* 903 * Uhhuh, somebody else created the same inode under 904 * us. Use the old inode instead of the one we just 905 * allocated. 906 */ 907 spin_unlock(&inode_lock); 908 destroy_inode(inode); 909 inode = old; 910 wait_on_inode(inode); 911 } 912 return inode; 913 914 set_failed: 915 spin_unlock(&inode_lock); 916 destroy_inode(inode); 917 return NULL; 918 } 919 920 /* 921 * get_new_inode_fast is the fast path version of get_new_inode, see the 922 * comment at iget_locked for details. 923 */ 924 static struct inode *get_new_inode_fast(struct super_block *sb, 925 struct hlist_head *head, unsigned long ino) 926 { 927 struct inode *inode; 928 929 inode = alloc_inode(sb); 930 if (inode) { 931 struct inode *old; 932 933 spin_lock(&inode_lock); 934 /* We released the lock, so.. */ 935 old = find_inode_fast(sb, head, ino); 936 if (!old) { 937 inode->i_ino = ino; 938 hlist_add_head(&inode->i_hash, head); 939 __inode_sb_list_add(inode); 940 inode->i_state = I_NEW; 941 spin_unlock(&inode_lock); 942 943 /* Return the locked inode with I_NEW set, the 944 * caller is responsible for filling in the contents 945 */ 946 return inode; 947 } 948 949 /* 950 * Uhhuh, somebody else created the same inode under 951 * us. Use the old inode instead of the one we just 952 * allocated. 953 */ 954 spin_unlock(&inode_lock); 955 destroy_inode(inode); 956 inode = old; 957 wait_on_inode(inode); 958 } 959 return inode; 960 } 961 962 /* 963 * search the inode cache for a matching inode number. 964 * If we find one, then the inode number we are trying to 965 * allocate is not unique and so we should not use it. 966 * 967 * Returns 1 if the inode number is unique, 0 if it is not. 968 */ 969 static int test_inode_iunique(struct super_block *sb, unsigned long ino) 970 { 971 struct hlist_head *b = inode_hashtable + hash(sb, ino); 972 struct hlist_node *node; 973 struct inode *inode; 974 975 hlist_for_each_entry(inode, node, b, i_hash) { 976 if (inode->i_ino == ino && inode->i_sb == sb) 977 return 0; 978 } 979 980 return 1; 981 } 982 983 /** 984 * iunique - get a unique inode number 985 * @sb: superblock 986 * @max_reserved: highest reserved inode number 987 * 988 * Obtain an inode number that is unique on the system for a given 989 * superblock. This is used by file systems that have no natural 990 * permanent inode numbering system. An inode number is returned that 991 * is higher than the reserved limit but unique. 992 * 993 * BUGS: 994 * With a large number of inodes live on the file system this function 995 * currently becomes quite slow. 996 */ 997 ino_t iunique(struct super_block *sb, ino_t max_reserved) 998 { 999 /* 1000 * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW 1001 * error if st_ino won't fit in target struct field. Use 32bit counter 1002 * here to attempt to avoid that. 1003 */ 1004 static DEFINE_SPINLOCK(iunique_lock); 1005 static unsigned int counter; 1006 ino_t res; 1007 1008 spin_lock(&inode_lock); 1009 spin_lock(&iunique_lock); 1010 do { 1011 if (counter <= max_reserved) 1012 counter = max_reserved + 1; 1013 res = counter++; 1014 } while (!test_inode_iunique(sb, res)); 1015 spin_unlock(&iunique_lock); 1016 spin_unlock(&inode_lock); 1017 1018 return res; 1019 } 1020 EXPORT_SYMBOL(iunique); 1021 1022 struct inode *igrab(struct inode *inode) 1023 { 1024 spin_lock(&inode_lock); 1025 if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) 1026 __iget(inode); 1027 else 1028 /* 1029 * Handle the case where s_op->clear_inode is not been 1030 * called yet, and somebody is calling igrab 1031 * while the inode is getting freed. 1032 */ 1033 inode = NULL; 1034 spin_unlock(&inode_lock); 1035 return inode; 1036 } 1037 EXPORT_SYMBOL(igrab); 1038 1039 /** 1040 * ifind - internal function, you want ilookup5() or iget5(). 1041 * @sb: super block of file system to search 1042 * @head: the head of the list to search 1043 * @test: callback used for comparisons between inodes 1044 * @data: opaque data pointer to pass to @test 1045 * @wait: if true wait for the inode to be unlocked, if false do not 1046 * 1047 * ifind() searches for the inode specified by @data in the inode 1048 * cache. This is a generalized version of ifind_fast() for file systems where 1049 * the inode number is not sufficient for unique identification of an inode. 1050 * 1051 * If the inode is in the cache, the inode is returned with an incremented 1052 * reference count. 1053 * 1054 * Otherwise NULL is returned. 1055 * 1056 * Note, @test is called with the inode_lock held, so can't sleep. 1057 */ 1058 static struct inode *ifind(struct super_block *sb, 1059 struct hlist_head *head, int (*test)(struct inode *, void *), 1060 void *data, const int wait) 1061 { 1062 struct inode *inode; 1063 1064 spin_lock(&inode_lock); 1065 inode = find_inode(sb, head, test, data); 1066 if (inode) { 1067 spin_unlock(&inode_lock); 1068 if (likely(wait)) 1069 wait_on_inode(inode); 1070 return inode; 1071 } 1072 spin_unlock(&inode_lock); 1073 return NULL; 1074 } 1075 1076 /** 1077 * ifind_fast - internal function, you want ilookup() or iget(). 1078 * @sb: super block of file system to search 1079 * @head: head of the list to search 1080 * @ino: inode number to search for 1081 * 1082 * ifind_fast() searches for the inode @ino in the inode cache. This is for 1083 * file systems where the inode number is sufficient for unique identification 1084 * of an inode. 1085 * 1086 * If the inode is in the cache, the inode is returned with an incremented 1087 * reference count. 1088 * 1089 * Otherwise NULL is returned. 1090 */ 1091 static struct inode *ifind_fast(struct super_block *sb, 1092 struct hlist_head *head, unsigned long ino) 1093 { 1094 struct inode *inode; 1095 1096 spin_lock(&inode_lock); 1097 inode = find_inode_fast(sb, head, ino); 1098 if (inode) { 1099 spin_unlock(&inode_lock); 1100 wait_on_inode(inode); 1101 return inode; 1102 } 1103 spin_unlock(&inode_lock); 1104 return NULL; 1105 } 1106 1107 /** 1108 * ilookup5_nowait - search for an inode in the inode cache 1109 * @sb: super block of file system to search 1110 * @hashval: hash value (usually inode number) to search for 1111 * @test: callback used for comparisons between inodes 1112 * @data: opaque data pointer to pass to @test 1113 * 1114 * ilookup5() uses ifind() to search for the inode specified by @hashval and 1115 * @data in the inode cache. This is a generalized version of ilookup() for 1116 * file systems where the inode number is not sufficient for unique 1117 * identification of an inode. 1118 * 1119 * If the inode is in the cache, the inode is returned with an incremented 1120 * reference count. Note, the inode lock is not waited upon so you have to be 1121 * very careful what you do with the returned inode. You probably should be 1122 * using ilookup5() instead. 1123 * 1124 * Otherwise NULL is returned. 1125 * 1126 * Note, @test is called with the inode_lock held, so can't sleep. 1127 */ 1128 struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, 1129 int (*test)(struct inode *, void *), void *data) 1130 { 1131 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1132 1133 return ifind(sb, head, test, data, 0); 1134 } 1135 EXPORT_SYMBOL(ilookup5_nowait); 1136 1137 /** 1138 * ilookup5 - search for an inode in the inode cache 1139 * @sb: super block of file system to search 1140 * @hashval: hash value (usually inode number) to search for 1141 * @test: callback used for comparisons between inodes 1142 * @data: opaque data pointer to pass to @test 1143 * 1144 * ilookup5() uses ifind() to search for the inode specified by @hashval and 1145 * @data in the inode cache. This is a generalized version of ilookup() for 1146 * file systems where the inode number is not sufficient for unique 1147 * identification of an inode. 1148 * 1149 * If the inode is in the cache, the inode lock is waited upon and the inode is 1150 * returned with an incremented reference count. 1151 * 1152 * Otherwise NULL is returned. 1153 * 1154 * Note, @test is called with the inode_lock held, so can't sleep. 1155 */ 1156 struct inode *ilookup5(struct super_block *sb, unsigned long hashval, 1157 int (*test)(struct inode *, void *), void *data) 1158 { 1159 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1160 1161 return ifind(sb, head, test, data, 1); 1162 } 1163 EXPORT_SYMBOL(ilookup5); 1164 1165 /** 1166 * ilookup - search for an inode in the inode cache 1167 * @sb: super block of file system to search 1168 * @ino: inode number to search for 1169 * 1170 * ilookup() uses ifind_fast() to search for the inode @ino in the inode cache. 1171 * This is for file systems where the inode number is sufficient for unique 1172 * identification of an inode. 1173 * 1174 * If the inode is in the cache, the inode is returned with an incremented 1175 * reference count. 1176 * 1177 * Otherwise NULL is returned. 1178 */ 1179 struct inode *ilookup(struct super_block *sb, unsigned long ino) 1180 { 1181 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1182 1183 return ifind_fast(sb, head, ino); 1184 } 1185 EXPORT_SYMBOL(ilookup); 1186 1187 /** 1188 * iget5_locked - obtain an inode from a mounted file system 1189 * @sb: super block of file system 1190 * @hashval: hash value (usually inode number) to get 1191 * @test: callback used for comparisons between inodes 1192 * @set: callback used to initialize a new struct inode 1193 * @data: opaque data pointer to pass to @test and @set 1194 * 1195 * iget5_locked() uses ifind() to search for the inode specified by @hashval 1196 * and @data in the inode cache and if present it is returned with an increased 1197 * reference count. This is a generalized version of iget_locked() for file 1198 * systems where the inode number is not sufficient for unique identification 1199 * of an inode. 1200 * 1201 * If the inode is not in cache, get_new_inode() is called to allocate a new 1202 * inode and this is returned locked, hashed, and with the I_NEW flag set. The 1203 * file system gets to fill it in before unlocking it via unlock_new_inode(). 1204 * 1205 * Note both @test and @set are called with the inode_lock held, so can't sleep. 1206 */ 1207 struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, 1208 int (*test)(struct inode *, void *), 1209 int (*set)(struct inode *, void *), void *data) 1210 { 1211 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1212 struct inode *inode; 1213 1214 inode = ifind(sb, head, test, data, 1); 1215 if (inode) 1216 return inode; 1217 /* 1218 * get_new_inode() will do the right thing, re-trying the search 1219 * in case it had to block at any point. 1220 */ 1221 return get_new_inode(sb, head, test, set, data); 1222 } 1223 EXPORT_SYMBOL(iget5_locked); 1224 1225 /** 1226 * iget_locked - obtain an inode from a mounted file system 1227 * @sb: super block of file system 1228 * @ino: inode number to get 1229 * 1230 * iget_locked() uses ifind_fast() to search for the inode specified by @ino in 1231 * the inode cache and if present it is returned with an increased reference 1232 * count. This is for file systems where the inode number is sufficient for 1233 * unique identification of an inode. 1234 * 1235 * If the inode is not in cache, get_new_inode_fast() is called to allocate a 1236 * new inode and this is returned locked, hashed, and with the I_NEW flag set. 1237 * The file system gets to fill it in before unlocking it via 1238 * unlock_new_inode(). 1239 */ 1240 struct inode *iget_locked(struct super_block *sb, unsigned long ino) 1241 { 1242 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1243 struct inode *inode; 1244 1245 inode = ifind_fast(sb, head, ino); 1246 if (inode) 1247 return inode; 1248 /* 1249 * get_new_inode_fast() will do the right thing, re-trying the search 1250 * in case it had to block at any point. 1251 */ 1252 return get_new_inode_fast(sb, head, ino); 1253 } 1254 EXPORT_SYMBOL(iget_locked); 1255 1256 int insert_inode_locked(struct inode *inode) 1257 { 1258 struct super_block *sb = inode->i_sb; 1259 ino_t ino = inode->i_ino; 1260 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1261 1262 inode->i_state |= I_NEW; 1263 while (1) { 1264 struct hlist_node *node; 1265 struct inode *old = NULL; 1266 spin_lock(&inode_lock); 1267 hlist_for_each_entry(old, node, head, i_hash) { 1268 if (old->i_ino != ino) 1269 continue; 1270 if (old->i_sb != sb) 1271 continue; 1272 if (old->i_state & (I_FREEING|I_WILL_FREE)) 1273 continue; 1274 break; 1275 } 1276 if (likely(!node)) { 1277 hlist_add_head(&inode->i_hash, head); 1278 spin_unlock(&inode_lock); 1279 return 0; 1280 } 1281 __iget(old); 1282 spin_unlock(&inode_lock); 1283 wait_on_inode(old); 1284 if (unlikely(!inode_unhashed(old))) { 1285 iput(old); 1286 return -EBUSY; 1287 } 1288 iput(old); 1289 } 1290 } 1291 EXPORT_SYMBOL(insert_inode_locked); 1292 1293 int insert_inode_locked4(struct inode *inode, unsigned long hashval, 1294 int (*test)(struct inode *, void *), void *data) 1295 { 1296 struct super_block *sb = inode->i_sb; 1297 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1298 1299 inode->i_state |= I_NEW; 1300 1301 while (1) { 1302 struct hlist_node *node; 1303 struct inode *old = NULL; 1304 1305 spin_lock(&inode_lock); 1306 hlist_for_each_entry(old, node, head, i_hash) { 1307 if (old->i_sb != sb) 1308 continue; 1309 if (!test(old, data)) 1310 continue; 1311 if (old->i_state & (I_FREEING|I_WILL_FREE)) 1312 continue; 1313 break; 1314 } 1315 if (likely(!node)) { 1316 hlist_add_head(&inode->i_hash, head); 1317 spin_unlock(&inode_lock); 1318 return 0; 1319 } 1320 __iget(old); 1321 spin_unlock(&inode_lock); 1322 wait_on_inode(old); 1323 if (unlikely(!inode_unhashed(old))) { 1324 iput(old); 1325 return -EBUSY; 1326 } 1327 iput(old); 1328 } 1329 } 1330 EXPORT_SYMBOL(insert_inode_locked4); 1331 1332 1333 int generic_delete_inode(struct inode *inode) 1334 { 1335 return 1; 1336 } 1337 EXPORT_SYMBOL(generic_delete_inode); 1338 1339 /* 1340 * Normal UNIX filesystem behaviour: delete the 1341 * inode when the usage count drops to zero, and 1342 * i_nlink is zero. 1343 */ 1344 int generic_drop_inode(struct inode *inode) 1345 { 1346 return !inode->i_nlink || inode_unhashed(inode); 1347 } 1348 EXPORT_SYMBOL_GPL(generic_drop_inode); 1349 1350 /* 1351 * Called when we're dropping the last reference 1352 * to an inode. 1353 * 1354 * Call the FS "drop_inode()" function, defaulting to 1355 * the legacy UNIX filesystem behaviour. If it tells 1356 * us to evict inode, do so. Otherwise, retain inode 1357 * in cache if fs is alive, sync and evict if fs is 1358 * shutting down. 1359 */ 1360 static void iput_final(struct inode *inode) 1361 { 1362 struct super_block *sb = inode->i_sb; 1363 const struct super_operations *op = inode->i_sb->s_op; 1364 int drop; 1365 1366 if (op && op->drop_inode) 1367 drop = op->drop_inode(inode); 1368 else 1369 drop = generic_drop_inode(inode); 1370 1371 if (!drop) { 1372 if (sb->s_flags & MS_ACTIVE) { 1373 inode->i_state |= I_REFERENCED; 1374 if (!(inode->i_state & (I_DIRTY|I_SYNC))) { 1375 inode_lru_list_add(inode); 1376 } 1377 spin_unlock(&inode_lock); 1378 return; 1379 } 1380 WARN_ON(inode->i_state & I_NEW); 1381 inode->i_state |= I_WILL_FREE; 1382 spin_unlock(&inode_lock); 1383 write_inode_now(inode, 1); 1384 spin_lock(&inode_lock); 1385 WARN_ON(inode->i_state & I_NEW); 1386 inode->i_state &= ~I_WILL_FREE; 1387 __remove_inode_hash(inode); 1388 } 1389 1390 WARN_ON(inode->i_state & I_NEW); 1391 inode->i_state |= I_FREEING; 1392 1393 /* 1394 * Move the inode off the IO lists and LRU once I_FREEING is 1395 * set so that it won't get moved back on there if it is dirty. 1396 */ 1397 inode_lru_list_del(inode); 1398 list_del_init(&inode->i_wb_list); 1399 1400 __inode_sb_list_del(inode); 1401 spin_unlock(&inode_lock); 1402 evict(inode); 1403 remove_inode_hash(inode); 1404 wake_up_inode(inode); 1405 BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); 1406 destroy_inode(inode); 1407 } 1408 1409 /** 1410 * iput - put an inode 1411 * @inode: inode to put 1412 * 1413 * Puts an inode, dropping its usage count. If the inode use count hits 1414 * zero, the inode is then freed and may also be destroyed. 1415 * 1416 * Consequently, iput() can sleep. 1417 */ 1418 void iput(struct inode *inode) 1419 { 1420 if (inode) { 1421 BUG_ON(inode->i_state & I_CLEAR); 1422 1423 if (atomic_dec_and_lock(&inode->i_count, &inode_lock)) 1424 iput_final(inode); 1425 } 1426 } 1427 EXPORT_SYMBOL(iput); 1428 1429 /** 1430 * bmap - find a block number in a file 1431 * @inode: inode of file 1432 * @block: block to find 1433 * 1434 * Returns the block number on the device holding the inode that 1435 * is the disk block number for the block of the file requested. 1436 * That is, asked for block 4 of inode 1 the function will return the 1437 * disk block relative to the disk start that holds that block of the 1438 * file. 1439 */ 1440 sector_t bmap(struct inode *inode, sector_t block) 1441 { 1442 sector_t res = 0; 1443 if (inode->i_mapping->a_ops->bmap) 1444 res = inode->i_mapping->a_ops->bmap(inode->i_mapping, block); 1445 return res; 1446 } 1447 EXPORT_SYMBOL(bmap); 1448 1449 /* 1450 * With relative atime, only update atime if the previous atime is 1451 * earlier than either the ctime or mtime or if at least a day has 1452 * passed since the last atime update. 1453 */ 1454 static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, 1455 struct timespec now) 1456 { 1457 1458 if (!(mnt->mnt_flags & MNT_RELATIME)) 1459 return 1; 1460 /* 1461 * Is mtime younger than atime? If yes, update atime: 1462 */ 1463 if (timespec_compare(&inode->i_mtime, &inode->i_atime) >= 0) 1464 return 1; 1465 /* 1466 * Is ctime younger than atime? If yes, update atime: 1467 */ 1468 if (timespec_compare(&inode->i_ctime, &inode->i_atime) >= 0) 1469 return 1; 1470 1471 /* 1472 * Is the previous atime value older than a day? If yes, 1473 * update atime: 1474 */ 1475 if ((long)(now.tv_sec - inode->i_atime.tv_sec) >= 24*60*60) 1476 return 1; 1477 /* 1478 * Good, we can skip the atime update: 1479 */ 1480 return 0; 1481 } 1482 1483 /** 1484 * touch_atime - update the access time 1485 * @mnt: mount the inode is accessed on 1486 * @dentry: dentry accessed 1487 * 1488 * Update the accessed time on an inode and mark it for writeback. 1489 * This function automatically handles read only file systems and media, 1490 * as well as the "noatime" flag and inode specific "noatime" markers. 1491 */ 1492 void touch_atime(struct vfsmount *mnt, struct dentry *dentry) 1493 { 1494 struct inode *inode = dentry->d_inode; 1495 struct timespec now; 1496 1497 if (inode->i_flags & S_NOATIME) 1498 return; 1499 if (IS_NOATIME(inode)) 1500 return; 1501 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) 1502 return; 1503 1504 if (mnt->mnt_flags & MNT_NOATIME) 1505 return; 1506 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) 1507 return; 1508 1509 now = current_fs_time(inode->i_sb); 1510 1511 if (!relatime_need_update(mnt, inode, now)) 1512 return; 1513 1514 if (timespec_equal(&inode->i_atime, &now)) 1515 return; 1516 1517 if (mnt_want_write(mnt)) 1518 return; 1519 1520 inode->i_atime = now; 1521 mark_inode_dirty_sync(inode); 1522 mnt_drop_write(mnt); 1523 } 1524 EXPORT_SYMBOL(touch_atime); 1525 1526 /** 1527 * file_update_time - update mtime and ctime time 1528 * @file: file accessed 1529 * 1530 * Update the mtime and ctime members of an inode and mark the inode 1531 * for writeback. Note that this function is meant exclusively for 1532 * usage in the file write path of filesystems, and filesystems may 1533 * choose to explicitly ignore update via this function with the 1534 * S_NOCMTIME inode flag, e.g. for network filesystem where these 1535 * timestamps are handled by the server. 1536 */ 1537 1538 void file_update_time(struct file *file) 1539 { 1540 struct inode *inode = file->f_path.dentry->d_inode; 1541 struct timespec now; 1542 enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0; 1543 1544 /* First try to exhaust all avenues to not sync */ 1545 if (IS_NOCMTIME(inode)) 1546 return; 1547 1548 now = current_fs_time(inode->i_sb); 1549 if (!timespec_equal(&inode->i_mtime, &now)) 1550 sync_it = S_MTIME; 1551 1552 if (!timespec_equal(&inode->i_ctime, &now)) 1553 sync_it |= S_CTIME; 1554 1555 if (IS_I_VERSION(inode)) 1556 sync_it |= S_VERSION; 1557 1558 if (!sync_it) 1559 return; 1560 1561 /* Finally allowed to write? Takes lock. */ 1562 if (mnt_want_write_file(file)) 1563 return; 1564 1565 /* Only change inode inside the lock region */ 1566 if (sync_it & S_VERSION) 1567 inode_inc_iversion(inode); 1568 if (sync_it & S_CTIME) 1569 inode->i_ctime = now; 1570 if (sync_it & S_MTIME) 1571 inode->i_mtime = now; 1572 mark_inode_dirty_sync(inode); 1573 mnt_drop_write(file->f_path.mnt); 1574 } 1575 EXPORT_SYMBOL(file_update_time); 1576 1577 int inode_needs_sync(struct inode *inode) 1578 { 1579 if (IS_SYNC(inode)) 1580 return 1; 1581 if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) 1582 return 1; 1583 return 0; 1584 } 1585 EXPORT_SYMBOL(inode_needs_sync); 1586 1587 int inode_wait(void *word) 1588 { 1589 schedule(); 1590 return 0; 1591 } 1592 EXPORT_SYMBOL(inode_wait); 1593 1594 /* 1595 * If we try to find an inode in the inode hash while it is being 1596 * deleted, we have to wait until the filesystem completes its 1597 * deletion before reporting that it isn't found. This function waits 1598 * until the deletion _might_ have completed. Callers are responsible 1599 * to recheck inode state. 1600 * 1601 * It doesn't matter if I_NEW is not set initially, a call to 1602 * wake_up_inode() after removing from the hash list will DTRT. 1603 * 1604 * This is called with inode_lock held. 1605 */ 1606 static void __wait_on_freeing_inode(struct inode *inode) 1607 { 1608 wait_queue_head_t *wq; 1609 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); 1610 wq = bit_waitqueue(&inode->i_state, __I_NEW); 1611 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 1612 spin_unlock(&inode_lock); 1613 schedule(); 1614 finish_wait(wq, &wait.wait); 1615 spin_lock(&inode_lock); 1616 } 1617 1618 static __initdata unsigned long ihash_entries; 1619 static int __init set_ihash_entries(char *str) 1620 { 1621 if (!str) 1622 return 0; 1623 ihash_entries = simple_strtoul(str, &str, 0); 1624 return 1; 1625 } 1626 __setup("ihash_entries=", set_ihash_entries); 1627 1628 /* 1629 * Initialize the waitqueues and inode hash table. 1630 */ 1631 void __init inode_init_early(void) 1632 { 1633 int loop; 1634 1635 /* If hashes are distributed across NUMA nodes, defer 1636 * hash allocation until vmalloc space is available. 1637 */ 1638 if (hashdist) 1639 return; 1640 1641 inode_hashtable = 1642 alloc_large_system_hash("Inode-cache", 1643 sizeof(struct hlist_head), 1644 ihash_entries, 1645 14, 1646 HASH_EARLY, 1647 &i_hash_shift, 1648 &i_hash_mask, 1649 0); 1650 1651 for (loop = 0; loop < (1 << i_hash_shift); loop++) 1652 INIT_HLIST_HEAD(&inode_hashtable[loop]); 1653 } 1654 1655 void __init inode_init(void) 1656 { 1657 int loop; 1658 1659 /* inode slab cache */ 1660 inode_cachep = kmem_cache_create("inode_cache", 1661 sizeof(struct inode), 1662 0, 1663 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| 1664 SLAB_MEM_SPREAD), 1665 init_once); 1666 register_shrinker(&icache_shrinker); 1667 1668 /* Hash may have been set up in inode_init_early */ 1669 if (!hashdist) 1670 return; 1671 1672 inode_hashtable = 1673 alloc_large_system_hash("Inode-cache", 1674 sizeof(struct hlist_head), 1675 ihash_entries, 1676 14, 1677 0, 1678 &i_hash_shift, 1679 &i_hash_mask, 1680 0); 1681 1682 for (loop = 0; loop < (1 << i_hash_shift); loop++) 1683 INIT_HLIST_HEAD(&inode_hashtable[loop]); 1684 } 1685 1686 void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) 1687 { 1688 inode->i_mode = mode; 1689 if (S_ISCHR(mode)) { 1690 inode->i_fop = &def_chr_fops; 1691 inode->i_rdev = rdev; 1692 } else if (S_ISBLK(mode)) { 1693 inode->i_fop = &def_blk_fops; 1694 inode->i_rdev = rdev; 1695 } else if (S_ISFIFO(mode)) 1696 inode->i_fop = &def_fifo_fops; 1697 else if (S_ISSOCK(mode)) 1698 inode->i_fop = &bad_sock_fops; 1699 else 1700 printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for" 1701 " inode %s:%lu\n", mode, inode->i_sb->s_id, 1702 inode->i_ino); 1703 } 1704 EXPORT_SYMBOL(init_special_inode); 1705 1706 /** 1707 * Init uid,gid,mode for new inode according to posix standards 1708 * @inode: New inode 1709 * @dir: Directory inode 1710 * @mode: mode of the new inode 1711 */ 1712 void inode_init_owner(struct inode *inode, const struct inode *dir, 1713 mode_t mode) 1714 { 1715 inode->i_uid = current_fsuid(); 1716 if (dir && dir->i_mode & S_ISGID) { 1717 inode->i_gid = dir->i_gid; 1718 if (S_ISDIR(mode)) 1719 mode |= S_ISGID; 1720 } else 1721 inode->i_gid = current_fsgid(); 1722 inode->i_mode = mode; 1723 } 1724 EXPORT_SYMBOL(inode_init_owner); 1725