1 /* 2 * linux/fs/inode.c 3 * 4 * (C) 1997 Linus Torvalds 5 */ 6 7 #include <linux/fs.h> 8 #include <linux/mm.h> 9 #include <linux/dcache.h> 10 #include <linux/init.h> 11 #include <linux/quotaops.h> 12 #include <linux/slab.h> 13 #include <linux/writeback.h> 14 #include <linux/module.h> 15 #include <linux/backing-dev.h> 16 #include <linux/wait.h> 17 #include <linux/rwsem.h> 18 #include <linux/hash.h> 19 #include <linux/swap.h> 20 #include <linux/security.h> 21 #include <linux/ima.h> 22 #include <linux/pagemap.h> 23 #include <linux/cdev.h> 24 #include <linux/bootmem.h> 25 #include <linux/inotify.h> 26 #include <linux/fsnotify.h> 27 #include <linux/mount.h> 28 #include <linux/async.h> 29 #include <linux/posix_acl.h> 30 31 /* 32 * This is needed for the following functions: 33 * - inode_has_buffers 34 * - invalidate_inode_buffers 35 * - invalidate_bdev 36 * 37 * FIXME: remove all knowledge of the buffer layer from this file 38 */ 39 #include <linux/buffer_head.h> 40 41 /* 42 * New inode.c implementation. 43 * 44 * This implementation has the basic premise of trying 45 * to be extremely low-overhead and SMP-safe, yet be 46 * simple enough to be "obviously correct". 47 * 48 * Famous last words. 49 */ 50 51 /* inode dynamic allocation 1999, Andrea Arcangeli <andrea@suse.de> */ 52 53 /* #define INODE_PARANOIA 1 */ 54 /* #define INODE_DEBUG 1 */ 55 56 /* 57 * Inode lookup is no longer as critical as it used to be: 58 * most of the lookups are going to be through the dcache. 59 */ 60 #define I_HASHBITS i_hash_shift 61 #define I_HASHMASK i_hash_mask 62 63 static unsigned int i_hash_mask __read_mostly; 64 static unsigned int i_hash_shift __read_mostly; 65 66 /* 67 * Each inode can be on two separate lists. One is 68 * the hash list of the inode, used for lookups. The 69 * other linked list is the "type" list: 70 * "in_use" - valid inode, i_count > 0, i_nlink > 0 71 * "dirty" - as "in_use" but also dirty 72 * "unused" - valid inode, i_count = 0 73 * 74 * A "dirty" list is maintained for each super block, 75 * allowing for low-overhead inode sync() operations. 76 */ 77 78 LIST_HEAD(inode_in_use); 79 LIST_HEAD(inode_unused); 80 static struct hlist_head *inode_hashtable __read_mostly; 81 82 /* 83 * A simple spinlock to protect the list manipulations. 84 * 85 * NOTE! You also have to own the lock if you change 86 * the i_state of an inode while it is in use.. 87 */ 88 DEFINE_SPINLOCK(inode_lock); 89 90 /* 91 * iprune_sem provides exclusion between the kswapd or try_to_free_pages 92 * icache shrinking path, and the umount path. Without this exclusion, 93 * by the time prune_icache calls iput for the inode whose pages it has 94 * been invalidating, or by the time it calls clear_inode & destroy_inode 95 * from its final dispose_list, the struct super_block they refer to 96 * (for inode->i_sb->s_op) may already have been freed and reused. 97 * 98 * We make this an rwsem because the fastpath is icache shrinking. In 99 * some cases a filesystem may be doing a significant amount of work in 100 * its inode reclaim code, so this should improve parallelism. 101 */ 102 static DECLARE_RWSEM(iprune_sem); 103 104 /* 105 * Statistics gathering.. 106 */ 107 struct inodes_stat_t inodes_stat; 108 109 static struct kmem_cache *inode_cachep __read_mostly; 110 111 static void wake_up_inode(struct inode *inode) 112 { 113 /* 114 * Prevent speculative execution through spin_unlock(&inode_lock); 115 */ 116 smp_mb(); 117 wake_up_bit(&inode->i_state, __I_LOCK); 118 } 119 120 /** 121 * inode_init_always - perform inode structure intialisation 122 * @sb: superblock inode belongs to 123 * @inode: inode to initialise 124 * 125 * These are initializations that need to be done on every inode 126 * allocation as the fields are not initialised by slab allocation. 127 */ 128 int inode_init_always(struct super_block *sb, struct inode *inode) 129 { 130 static const struct address_space_operations empty_aops; 131 static const struct inode_operations empty_iops; 132 static const struct file_operations empty_fops; 133 struct address_space *const mapping = &inode->i_data; 134 135 inode->i_sb = sb; 136 inode->i_blkbits = sb->s_blocksize_bits; 137 inode->i_flags = 0; 138 atomic_set(&inode->i_count, 1); 139 inode->i_op = &empty_iops; 140 inode->i_fop = &empty_fops; 141 inode->i_nlink = 1; 142 inode->i_uid = 0; 143 inode->i_gid = 0; 144 atomic_set(&inode->i_writecount, 0); 145 inode->i_size = 0; 146 inode->i_blocks = 0; 147 inode->i_bytes = 0; 148 inode->i_generation = 0; 149 #ifdef CONFIG_QUOTA 150 memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); 151 #endif 152 inode->i_pipe = NULL; 153 inode->i_bdev = NULL; 154 inode->i_cdev = NULL; 155 inode->i_rdev = 0; 156 inode->dirtied_when = 0; 157 158 if (security_inode_alloc(inode)) 159 goto out; 160 161 /* allocate and initialize an i_integrity */ 162 if (ima_inode_alloc(inode)) 163 goto out_free_security; 164 165 spin_lock_init(&inode->i_lock); 166 lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); 167 168 mutex_init(&inode->i_mutex); 169 lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key); 170 171 init_rwsem(&inode->i_alloc_sem); 172 lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key); 173 174 mapping->a_ops = &empty_aops; 175 mapping->host = inode; 176 mapping->flags = 0; 177 mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE); 178 mapping->assoc_mapping = NULL; 179 mapping->backing_dev_info = &default_backing_dev_info; 180 mapping->writeback_index = 0; 181 182 /* 183 * If the block_device provides a backing_dev_info for client 184 * inodes then use that. Otherwise the inode share the bdev's 185 * backing_dev_info. 186 */ 187 if (sb->s_bdev) { 188 struct backing_dev_info *bdi; 189 190 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; 191 mapping->backing_dev_info = bdi; 192 } 193 inode->i_private = NULL; 194 inode->i_mapping = mapping; 195 #ifdef CONFIG_FS_POSIX_ACL 196 inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED; 197 #endif 198 199 #ifdef CONFIG_FSNOTIFY 200 inode->i_fsnotify_mask = 0; 201 #endif 202 203 return 0; 204 205 out_free_security: 206 security_inode_free(inode); 207 out: 208 return -ENOMEM; 209 } 210 EXPORT_SYMBOL(inode_init_always); 211 212 static struct inode *alloc_inode(struct super_block *sb) 213 { 214 struct inode *inode; 215 216 if (sb->s_op->alloc_inode) 217 inode = sb->s_op->alloc_inode(sb); 218 else 219 inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL); 220 221 if (!inode) 222 return NULL; 223 224 if (unlikely(inode_init_always(sb, inode))) { 225 if (inode->i_sb->s_op->destroy_inode) 226 inode->i_sb->s_op->destroy_inode(inode); 227 else 228 kmem_cache_free(inode_cachep, inode); 229 return NULL; 230 } 231 232 return inode; 233 } 234 235 void __destroy_inode(struct inode *inode) 236 { 237 BUG_ON(inode_has_buffers(inode)); 238 ima_inode_free(inode); 239 security_inode_free(inode); 240 fsnotify_inode_delete(inode); 241 #ifdef CONFIG_FS_POSIX_ACL 242 if (inode->i_acl && inode->i_acl != ACL_NOT_CACHED) 243 posix_acl_release(inode->i_acl); 244 if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) 245 posix_acl_release(inode->i_default_acl); 246 #endif 247 } 248 EXPORT_SYMBOL(__destroy_inode); 249 250 void destroy_inode(struct inode *inode) 251 { 252 __destroy_inode(inode); 253 if (inode->i_sb->s_op->destroy_inode) 254 inode->i_sb->s_op->destroy_inode(inode); 255 else 256 kmem_cache_free(inode_cachep, (inode)); 257 } 258 259 /* 260 * These are initializations that only need to be done 261 * once, because the fields are idempotent across use 262 * of the inode, so let the slab aware of that. 263 */ 264 void inode_init_once(struct inode *inode) 265 { 266 memset(inode, 0, sizeof(*inode)); 267 INIT_HLIST_NODE(&inode->i_hash); 268 INIT_LIST_HEAD(&inode->i_dentry); 269 INIT_LIST_HEAD(&inode->i_devices); 270 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); 271 spin_lock_init(&inode->i_data.tree_lock); 272 spin_lock_init(&inode->i_data.i_mmap_lock); 273 INIT_LIST_HEAD(&inode->i_data.private_list); 274 spin_lock_init(&inode->i_data.private_lock); 275 INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap); 276 INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear); 277 i_size_ordered_init(inode); 278 #ifdef CONFIG_INOTIFY 279 INIT_LIST_HEAD(&inode->inotify_watches); 280 mutex_init(&inode->inotify_mutex); 281 #endif 282 #ifdef CONFIG_FSNOTIFY 283 INIT_HLIST_HEAD(&inode->i_fsnotify_mark_entries); 284 #endif 285 } 286 EXPORT_SYMBOL(inode_init_once); 287 288 static void init_once(void *foo) 289 { 290 struct inode *inode = (struct inode *) foo; 291 292 inode_init_once(inode); 293 } 294 295 /* 296 * inode_lock must be held 297 */ 298 void __iget(struct inode *inode) 299 { 300 if (atomic_read(&inode->i_count)) { 301 atomic_inc(&inode->i_count); 302 return; 303 } 304 atomic_inc(&inode->i_count); 305 if (!(inode->i_state & (I_DIRTY|I_SYNC))) 306 list_move(&inode->i_list, &inode_in_use); 307 inodes_stat.nr_unused--; 308 } 309 310 /** 311 * clear_inode - clear an inode 312 * @inode: inode to clear 313 * 314 * This is called by the filesystem to tell us 315 * that the inode is no longer useful. We just 316 * terminate it with extreme prejudice. 317 */ 318 void clear_inode(struct inode *inode) 319 { 320 might_sleep(); 321 invalidate_inode_buffers(inode); 322 323 BUG_ON(inode->i_data.nrpages); 324 BUG_ON(!(inode->i_state & I_FREEING)); 325 BUG_ON(inode->i_state & I_CLEAR); 326 inode_sync_wait(inode); 327 vfs_dq_drop(inode); 328 if (inode->i_sb->s_op->clear_inode) 329 inode->i_sb->s_op->clear_inode(inode); 330 if (S_ISBLK(inode->i_mode) && inode->i_bdev) 331 bd_forget(inode); 332 if (S_ISCHR(inode->i_mode) && inode->i_cdev) 333 cd_forget(inode); 334 inode->i_state = I_CLEAR; 335 } 336 EXPORT_SYMBOL(clear_inode); 337 338 /* 339 * dispose_list - dispose of the contents of a local list 340 * @head: the head of the list to free 341 * 342 * Dispose-list gets a local list with local inodes in it, so it doesn't 343 * need to worry about list corruption and SMP locks. 344 */ 345 static void dispose_list(struct list_head *head) 346 { 347 int nr_disposed = 0; 348 349 while (!list_empty(head)) { 350 struct inode *inode; 351 352 inode = list_first_entry(head, struct inode, i_list); 353 list_del(&inode->i_list); 354 355 if (inode->i_data.nrpages) 356 truncate_inode_pages(&inode->i_data, 0); 357 clear_inode(inode); 358 359 spin_lock(&inode_lock); 360 hlist_del_init(&inode->i_hash); 361 list_del_init(&inode->i_sb_list); 362 spin_unlock(&inode_lock); 363 364 wake_up_inode(inode); 365 destroy_inode(inode); 366 nr_disposed++; 367 } 368 spin_lock(&inode_lock); 369 inodes_stat.nr_inodes -= nr_disposed; 370 spin_unlock(&inode_lock); 371 } 372 373 /* 374 * Invalidate all inodes for a device. 375 */ 376 static int invalidate_list(struct list_head *head, struct list_head *dispose) 377 { 378 struct list_head *next; 379 int busy = 0, count = 0; 380 381 next = head->next; 382 for (;;) { 383 struct list_head *tmp = next; 384 struct inode *inode; 385 386 /* 387 * We can reschedule here without worrying about the list's 388 * consistency because the per-sb list of inodes must not 389 * change during umount anymore, and because iprune_sem keeps 390 * shrink_icache_memory() away. 391 */ 392 cond_resched_lock(&inode_lock); 393 394 next = next->next; 395 if (tmp == head) 396 break; 397 inode = list_entry(tmp, struct inode, i_sb_list); 398 if (inode->i_state & I_NEW) 399 continue; 400 invalidate_inode_buffers(inode); 401 if (!atomic_read(&inode->i_count)) { 402 list_move(&inode->i_list, dispose); 403 WARN_ON(inode->i_state & I_NEW); 404 inode->i_state |= I_FREEING; 405 count++; 406 continue; 407 } 408 busy = 1; 409 } 410 /* only unused inodes may be cached with i_count zero */ 411 inodes_stat.nr_unused -= count; 412 return busy; 413 } 414 415 /** 416 * invalidate_inodes - discard the inodes on a device 417 * @sb: superblock 418 * 419 * Discard all of the inodes for a given superblock. If the discard 420 * fails because there are busy inodes then a non zero value is returned. 421 * If the discard is successful all the inodes have been discarded. 422 */ 423 int invalidate_inodes(struct super_block *sb) 424 { 425 int busy; 426 LIST_HEAD(throw_away); 427 428 down_write(&iprune_sem); 429 spin_lock(&inode_lock); 430 inotify_unmount_inodes(&sb->s_inodes); 431 fsnotify_unmount_inodes(&sb->s_inodes); 432 busy = invalidate_list(&sb->s_inodes, &throw_away); 433 spin_unlock(&inode_lock); 434 435 dispose_list(&throw_away); 436 up_write(&iprune_sem); 437 438 return busy; 439 } 440 EXPORT_SYMBOL(invalidate_inodes); 441 442 static int can_unuse(struct inode *inode) 443 { 444 if (inode->i_state) 445 return 0; 446 if (inode_has_buffers(inode)) 447 return 0; 448 if (atomic_read(&inode->i_count)) 449 return 0; 450 if (inode->i_data.nrpages) 451 return 0; 452 return 1; 453 } 454 455 /* 456 * Scan `goal' inodes on the unused list for freeable ones. They are moved to 457 * a temporary list and then are freed outside inode_lock by dispose_list(). 458 * 459 * Any inodes which are pinned purely because of attached pagecache have their 460 * pagecache removed. We expect the final iput() on that inode to add it to 461 * the front of the inode_unused list. So look for it there and if the 462 * inode is still freeable, proceed. The right inode is found 99.9% of the 463 * time in testing on a 4-way. 464 * 465 * If the inode has metadata buffers attached to mapping->private_list then 466 * try to remove them. 467 */ 468 static void prune_icache(int nr_to_scan) 469 { 470 LIST_HEAD(freeable); 471 int nr_pruned = 0; 472 int nr_scanned; 473 unsigned long reap = 0; 474 475 down_read(&iprune_sem); 476 spin_lock(&inode_lock); 477 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { 478 struct inode *inode; 479 480 if (list_empty(&inode_unused)) 481 break; 482 483 inode = list_entry(inode_unused.prev, struct inode, i_list); 484 485 if (inode->i_state || atomic_read(&inode->i_count)) { 486 list_move(&inode->i_list, &inode_unused); 487 continue; 488 } 489 if (inode_has_buffers(inode) || inode->i_data.nrpages) { 490 __iget(inode); 491 spin_unlock(&inode_lock); 492 if (remove_inode_buffers(inode)) 493 reap += invalidate_mapping_pages(&inode->i_data, 494 0, -1); 495 iput(inode); 496 spin_lock(&inode_lock); 497 498 if (inode != list_entry(inode_unused.next, 499 struct inode, i_list)) 500 continue; /* wrong inode or list_empty */ 501 if (!can_unuse(inode)) 502 continue; 503 } 504 list_move(&inode->i_list, &freeable); 505 WARN_ON(inode->i_state & I_NEW); 506 inode->i_state |= I_FREEING; 507 nr_pruned++; 508 } 509 inodes_stat.nr_unused -= nr_pruned; 510 if (current_is_kswapd()) 511 __count_vm_events(KSWAPD_INODESTEAL, reap); 512 else 513 __count_vm_events(PGINODESTEAL, reap); 514 spin_unlock(&inode_lock); 515 516 dispose_list(&freeable); 517 up_read(&iprune_sem); 518 } 519 520 /* 521 * shrink_icache_memory() will attempt to reclaim some unused inodes. Here, 522 * "unused" means that no dentries are referring to the inodes: the files are 523 * not open and the dcache references to those inodes have already been 524 * reclaimed. 525 * 526 * This function is passed the number of inodes to scan, and it returns the 527 * total number of remaining possibly-reclaimable inodes. 528 */ 529 static int shrink_icache_memory(int nr, gfp_t gfp_mask) 530 { 531 if (nr) { 532 /* 533 * Nasty deadlock avoidance. We may hold various FS locks, 534 * and we don't want to recurse into the FS that called us 535 * in clear_inode() and friends.. 536 */ 537 if (!(gfp_mask & __GFP_FS)) 538 return -1; 539 prune_icache(nr); 540 } 541 return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; 542 } 543 544 static struct shrinker icache_shrinker = { 545 .shrink = shrink_icache_memory, 546 .seeks = DEFAULT_SEEKS, 547 }; 548 549 static void __wait_on_freeing_inode(struct inode *inode); 550 /* 551 * Called with the inode lock held. 552 * NOTE: we are not increasing the inode-refcount, you must call __iget() 553 * by hand after calling find_inode now! This simplifies iunique and won't 554 * add any additional branch in the common code. 555 */ 556 static struct inode *find_inode(struct super_block *sb, 557 struct hlist_head *head, 558 int (*test)(struct inode *, void *), 559 void *data) 560 { 561 struct hlist_node *node; 562 struct inode *inode = NULL; 563 564 repeat: 565 hlist_for_each_entry(inode, node, head, i_hash) { 566 if (inode->i_sb != sb) 567 continue; 568 if (!test(inode, data)) 569 continue; 570 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) { 571 __wait_on_freeing_inode(inode); 572 goto repeat; 573 } 574 break; 575 } 576 return node ? inode : NULL; 577 } 578 579 /* 580 * find_inode_fast is the fast path version of find_inode, see the comment at 581 * iget_locked for details. 582 */ 583 static struct inode *find_inode_fast(struct super_block *sb, 584 struct hlist_head *head, unsigned long ino) 585 { 586 struct hlist_node *node; 587 struct inode *inode = NULL; 588 589 repeat: 590 hlist_for_each_entry(inode, node, head, i_hash) { 591 if (inode->i_ino != ino) 592 continue; 593 if (inode->i_sb != sb) 594 continue; 595 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) { 596 __wait_on_freeing_inode(inode); 597 goto repeat; 598 } 599 break; 600 } 601 return node ? inode : NULL; 602 } 603 604 static unsigned long hash(struct super_block *sb, unsigned long hashval) 605 { 606 unsigned long tmp; 607 608 tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) / 609 L1_CACHE_BYTES; 610 tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS); 611 return tmp & I_HASHMASK; 612 } 613 614 static inline void 615 __inode_add_to_lists(struct super_block *sb, struct hlist_head *head, 616 struct inode *inode) 617 { 618 inodes_stat.nr_inodes++; 619 list_add(&inode->i_list, &inode_in_use); 620 list_add(&inode->i_sb_list, &sb->s_inodes); 621 if (head) 622 hlist_add_head(&inode->i_hash, head); 623 } 624 625 /** 626 * inode_add_to_lists - add a new inode to relevant lists 627 * @sb: superblock inode belongs to 628 * @inode: inode to mark in use 629 * 630 * When an inode is allocated it needs to be accounted for, added to the in use 631 * list, the owning superblock and the inode hash. This needs to be done under 632 * the inode_lock, so export a function to do this rather than the inode lock 633 * itself. We calculate the hash list to add to here so it is all internal 634 * which requires the caller to have already set up the inode number in the 635 * inode to add. 636 */ 637 void inode_add_to_lists(struct super_block *sb, struct inode *inode) 638 { 639 struct hlist_head *head = inode_hashtable + hash(sb, inode->i_ino); 640 641 spin_lock(&inode_lock); 642 __inode_add_to_lists(sb, head, inode); 643 spin_unlock(&inode_lock); 644 } 645 EXPORT_SYMBOL_GPL(inode_add_to_lists); 646 647 /** 648 * new_inode - obtain an inode 649 * @sb: superblock 650 * 651 * Allocates a new inode for given superblock. The default gfp_mask 652 * for allocations related to inode->i_mapping is GFP_HIGHUSER_MOVABLE. 653 * If HIGHMEM pages are unsuitable or it is known that pages allocated 654 * for the page cache are not reclaimable or migratable, 655 * mapping_set_gfp_mask() must be called with suitable flags on the 656 * newly created inode's mapping 657 * 658 */ 659 struct inode *new_inode(struct super_block *sb) 660 { 661 /* 662 * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW 663 * error if st_ino won't fit in target struct field. Use 32bit counter 664 * here to attempt to avoid that. 665 */ 666 static unsigned int last_ino; 667 struct inode *inode; 668 669 spin_lock_prefetch(&inode_lock); 670 671 inode = alloc_inode(sb); 672 if (inode) { 673 spin_lock(&inode_lock); 674 __inode_add_to_lists(sb, NULL, inode); 675 inode->i_ino = ++last_ino; 676 inode->i_state = 0; 677 spin_unlock(&inode_lock); 678 } 679 return inode; 680 } 681 EXPORT_SYMBOL(new_inode); 682 683 void unlock_new_inode(struct inode *inode) 684 { 685 #ifdef CONFIG_DEBUG_LOCK_ALLOC 686 if (inode->i_mode & S_IFDIR) { 687 struct file_system_type *type = inode->i_sb->s_type; 688 689 /* Set new key only if filesystem hasn't already changed it */ 690 if (!lockdep_match_class(&inode->i_mutex, 691 &type->i_mutex_key)) { 692 /* 693 * ensure nobody is actually holding i_mutex 694 */ 695 mutex_destroy(&inode->i_mutex); 696 mutex_init(&inode->i_mutex); 697 lockdep_set_class(&inode->i_mutex, 698 &type->i_mutex_dir_key); 699 } 700 } 701 #endif 702 /* 703 * This is special! We do not need the spinlock when clearing I_LOCK, 704 * because we're guaranteed that nobody else tries to do anything about 705 * the state of the inode when it is locked, as we just created it (so 706 * there can be no old holders that haven't tested I_LOCK). 707 * However we must emit the memory barrier so that other CPUs reliably 708 * see the clearing of I_LOCK after the other inode initialisation has 709 * completed. 710 */ 711 smp_mb(); 712 WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW)); 713 inode->i_state &= ~(I_LOCK|I_NEW); 714 wake_up_inode(inode); 715 } 716 EXPORT_SYMBOL(unlock_new_inode); 717 718 /* 719 * This is called without the inode lock held.. Be careful. 720 * 721 * We no longer cache the sb_flags in i_flags - see fs.h 722 * -- rmk@arm.uk.linux.org 723 */ 724 static struct inode *get_new_inode(struct super_block *sb, 725 struct hlist_head *head, 726 int (*test)(struct inode *, void *), 727 int (*set)(struct inode *, void *), 728 void *data) 729 { 730 struct inode *inode; 731 732 inode = alloc_inode(sb); 733 if (inode) { 734 struct inode *old; 735 736 spin_lock(&inode_lock); 737 /* We released the lock, so.. */ 738 old = find_inode(sb, head, test, data); 739 if (!old) { 740 if (set(inode, data)) 741 goto set_failed; 742 743 __inode_add_to_lists(sb, head, inode); 744 inode->i_state = I_LOCK|I_NEW; 745 spin_unlock(&inode_lock); 746 747 /* Return the locked inode with I_NEW set, the 748 * caller is responsible for filling in the contents 749 */ 750 return inode; 751 } 752 753 /* 754 * Uhhuh, somebody else created the same inode under 755 * us. Use the old inode instead of the one we just 756 * allocated. 757 */ 758 __iget(old); 759 spin_unlock(&inode_lock); 760 destroy_inode(inode); 761 inode = old; 762 wait_on_inode(inode); 763 } 764 return inode; 765 766 set_failed: 767 spin_unlock(&inode_lock); 768 destroy_inode(inode); 769 return NULL; 770 } 771 772 /* 773 * get_new_inode_fast is the fast path version of get_new_inode, see the 774 * comment at iget_locked for details. 775 */ 776 static struct inode *get_new_inode_fast(struct super_block *sb, 777 struct hlist_head *head, unsigned long ino) 778 { 779 struct inode *inode; 780 781 inode = alloc_inode(sb); 782 if (inode) { 783 struct inode *old; 784 785 spin_lock(&inode_lock); 786 /* We released the lock, so.. */ 787 old = find_inode_fast(sb, head, ino); 788 if (!old) { 789 inode->i_ino = ino; 790 __inode_add_to_lists(sb, head, inode); 791 inode->i_state = I_LOCK|I_NEW; 792 spin_unlock(&inode_lock); 793 794 /* Return the locked inode with I_NEW set, the 795 * caller is responsible for filling in the contents 796 */ 797 return inode; 798 } 799 800 /* 801 * Uhhuh, somebody else created the same inode under 802 * us. Use the old inode instead of the one we just 803 * allocated. 804 */ 805 __iget(old); 806 spin_unlock(&inode_lock); 807 destroy_inode(inode); 808 inode = old; 809 wait_on_inode(inode); 810 } 811 return inode; 812 } 813 814 /** 815 * iunique - get a unique inode number 816 * @sb: superblock 817 * @max_reserved: highest reserved inode number 818 * 819 * Obtain an inode number that is unique on the system for a given 820 * superblock. This is used by file systems that have no natural 821 * permanent inode numbering system. An inode number is returned that 822 * is higher than the reserved limit but unique. 823 * 824 * BUGS: 825 * With a large number of inodes live on the file system this function 826 * currently becomes quite slow. 827 */ 828 ino_t iunique(struct super_block *sb, ino_t max_reserved) 829 { 830 /* 831 * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW 832 * error if st_ino won't fit in target struct field. Use 32bit counter 833 * here to attempt to avoid that. 834 */ 835 static unsigned int counter; 836 struct inode *inode; 837 struct hlist_head *head; 838 ino_t res; 839 840 spin_lock(&inode_lock); 841 do { 842 if (counter <= max_reserved) 843 counter = max_reserved + 1; 844 res = counter++; 845 head = inode_hashtable + hash(sb, res); 846 inode = find_inode_fast(sb, head, res); 847 } while (inode != NULL); 848 spin_unlock(&inode_lock); 849 850 return res; 851 } 852 EXPORT_SYMBOL(iunique); 853 854 struct inode *igrab(struct inode *inode) 855 { 856 spin_lock(&inode_lock); 857 if (!(inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))) 858 __iget(inode); 859 else 860 /* 861 * Handle the case where s_op->clear_inode is not been 862 * called yet, and somebody is calling igrab 863 * while the inode is getting freed. 864 */ 865 inode = NULL; 866 spin_unlock(&inode_lock); 867 return inode; 868 } 869 EXPORT_SYMBOL(igrab); 870 871 /** 872 * ifind - internal function, you want ilookup5() or iget5(). 873 * @sb: super block of file system to search 874 * @head: the head of the list to search 875 * @test: callback used for comparisons between inodes 876 * @data: opaque data pointer to pass to @test 877 * @wait: if true wait for the inode to be unlocked, if false do not 878 * 879 * ifind() searches for the inode specified by @data in the inode 880 * cache. This is a generalized version of ifind_fast() for file systems where 881 * the inode number is not sufficient for unique identification of an inode. 882 * 883 * If the inode is in the cache, the inode is returned with an incremented 884 * reference count. 885 * 886 * Otherwise NULL is returned. 887 * 888 * Note, @test is called with the inode_lock held, so can't sleep. 889 */ 890 static struct inode *ifind(struct super_block *sb, 891 struct hlist_head *head, int (*test)(struct inode *, void *), 892 void *data, const int wait) 893 { 894 struct inode *inode; 895 896 spin_lock(&inode_lock); 897 inode = find_inode(sb, head, test, data); 898 if (inode) { 899 __iget(inode); 900 spin_unlock(&inode_lock); 901 if (likely(wait)) 902 wait_on_inode(inode); 903 return inode; 904 } 905 spin_unlock(&inode_lock); 906 return NULL; 907 } 908 909 /** 910 * ifind_fast - internal function, you want ilookup() or iget(). 911 * @sb: super block of file system to search 912 * @head: head of the list to search 913 * @ino: inode number to search for 914 * 915 * ifind_fast() searches for the inode @ino in the inode cache. This is for 916 * file systems where the inode number is sufficient for unique identification 917 * of an inode. 918 * 919 * If the inode is in the cache, the inode is returned with an incremented 920 * reference count. 921 * 922 * Otherwise NULL is returned. 923 */ 924 static struct inode *ifind_fast(struct super_block *sb, 925 struct hlist_head *head, unsigned long ino) 926 { 927 struct inode *inode; 928 929 spin_lock(&inode_lock); 930 inode = find_inode_fast(sb, head, ino); 931 if (inode) { 932 __iget(inode); 933 spin_unlock(&inode_lock); 934 wait_on_inode(inode); 935 return inode; 936 } 937 spin_unlock(&inode_lock); 938 return NULL; 939 } 940 941 /** 942 * ilookup5_nowait - search for an inode in the inode cache 943 * @sb: super block of file system to search 944 * @hashval: hash value (usually inode number) to search for 945 * @test: callback used for comparisons between inodes 946 * @data: opaque data pointer to pass to @test 947 * 948 * ilookup5() uses ifind() to search for the inode specified by @hashval and 949 * @data in the inode cache. This is a generalized version of ilookup() for 950 * file systems where the inode number is not sufficient for unique 951 * identification of an inode. 952 * 953 * If the inode is in the cache, the inode is returned with an incremented 954 * reference count. Note, the inode lock is not waited upon so you have to be 955 * very careful what you do with the returned inode. You probably should be 956 * using ilookup5() instead. 957 * 958 * Otherwise NULL is returned. 959 * 960 * Note, @test is called with the inode_lock held, so can't sleep. 961 */ 962 struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, 963 int (*test)(struct inode *, void *), void *data) 964 { 965 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 966 967 return ifind(sb, head, test, data, 0); 968 } 969 EXPORT_SYMBOL(ilookup5_nowait); 970 971 /** 972 * ilookup5 - search for an inode in the inode cache 973 * @sb: super block of file system to search 974 * @hashval: hash value (usually inode number) to search for 975 * @test: callback used for comparisons between inodes 976 * @data: opaque data pointer to pass to @test 977 * 978 * ilookup5() uses ifind() to search for the inode specified by @hashval and 979 * @data in the inode cache. This is a generalized version of ilookup() for 980 * file systems where the inode number is not sufficient for unique 981 * identification of an inode. 982 * 983 * If the inode is in the cache, the inode lock is waited upon and the inode is 984 * returned with an incremented reference count. 985 * 986 * Otherwise NULL is returned. 987 * 988 * Note, @test is called with the inode_lock held, so can't sleep. 989 */ 990 struct inode *ilookup5(struct super_block *sb, unsigned long hashval, 991 int (*test)(struct inode *, void *), void *data) 992 { 993 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 994 995 return ifind(sb, head, test, data, 1); 996 } 997 EXPORT_SYMBOL(ilookup5); 998 999 /** 1000 * ilookup - search for an inode in the inode cache 1001 * @sb: super block of file system to search 1002 * @ino: inode number to search for 1003 * 1004 * ilookup() uses ifind_fast() to search for the inode @ino in the inode cache. 1005 * This is for file systems where the inode number is sufficient for unique 1006 * identification of an inode. 1007 * 1008 * If the inode is in the cache, the inode is returned with an incremented 1009 * reference count. 1010 * 1011 * Otherwise NULL is returned. 1012 */ 1013 struct inode *ilookup(struct super_block *sb, unsigned long ino) 1014 { 1015 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1016 1017 return ifind_fast(sb, head, ino); 1018 } 1019 EXPORT_SYMBOL(ilookup); 1020 1021 /** 1022 * iget5_locked - obtain an inode from a mounted file system 1023 * @sb: super block of file system 1024 * @hashval: hash value (usually inode number) to get 1025 * @test: callback used for comparisons between inodes 1026 * @set: callback used to initialize a new struct inode 1027 * @data: opaque data pointer to pass to @test and @set 1028 * 1029 * iget5_locked() uses ifind() to search for the inode specified by @hashval 1030 * and @data in the inode cache and if present it is returned with an increased 1031 * reference count. This is a generalized version of iget_locked() for file 1032 * systems where the inode number is not sufficient for unique identification 1033 * of an inode. 1034 * 1035 * If the inode is not in cache, get_new_inode() is called to allocate a new 1036 * inode and this is returned locked, hashed, and with the I_NEW flag set. The 1037 * file system gets to fill it in before unlocking it via unlock_new_inode(). 1038 * 1039 * Note both @test and @set are called with the inode_lock held, so can't sleep. 1040 */ 1041 struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, 1042 int (*test)(struct inode *, void *), 1043 int (*set)(struct inode *, void *), void *data) 1044 { 1045 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1046 struct inode *inode; 1047 1048 inode = ifind(sb, head, test, data, 1); 1049 if (inode) 1050 return inode; 1051 /* 1052 * get_new_inode() will do the right thing, re-trying the search 1053 * in case it had to block at any point. 1054 */ 1055 return get_new_inode(sb, head, test, set, data); 1056 } 1057 EXPORT_SYMBOL(iget5_locked); 1058 1059 /** 1060 * iget_locked - obtain an inode from a mounted file system 1061 * @sb: super block of file system 1062 * @ino: inode number to get 1063 * 1064 * iget_locked() uses ifind_fast() to search for the inode specified by @ino in 1065 * the inode cache and if present it is returned with an increased reference 1066 * count. This is for file systems where the inode number is sufficient for 1067 * unique identification of an inode. 1068 * 1069 * If the inode is not in cache, get_new_inode_fast() is called to allocate a 1070 * new inode and this is returned locked, hashed, and with the I_NEW flag set. 1071 * The file system gets to fill it in before unlocking it via 1072 * unlock_new_inode(). 1073 */ 1074 struct inode *iget_locked(struct super_block *sb, unsigned long ino) 1075 { 1076 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1077 struct inode *inode; 1078 1079 inode = ifind_fast(sb, head, ino); 1080 if (inode) 1081 return inode; 1082 /* 1083 * get_new_inode_fast() will do the right thing, re-trying the search 1084 * in case it had to block at any point. 1085 */ 1086 return get_new_inode_fast(sb, head, ino); 1087 } 1088 EXPORT_SYMBOL(iget_locked); 1089 1090 int insert_inode_locked(struct inode *inode) 1091 { 1092 struct super_block *sb = inode->i_sb; 1093 ino_t ino = inode->i_ino; 1094 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1095 1096 inode->i_state |= I_LOCK|I_NEW; 1097 while (1) { 1098 struct hlist_node *node; 1099 struct inode *old = NULL; 1100 spin_lock(&inode_lock); 1101 hlist_for_each_entry(old, node, head, i_hash) { 1102 if (old->i_ino != ino) 1103 continue; 1104 if (old->i_sb != sb) 1105 continue; 1106 if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) 1107 continue; 1108 break; 1109 } 1110 if (likely(!node)) { 1111 hlist_add_head(&inode->i_hash, head); 1112 spin_unlock(&inode_lock); 1113 return 0; 1114 } 1115 __iget(old); 1116 spin_unlock(&inode_lock); 1117 wait_on_inode(old); 1118 if (unlikely(!hlist_unhashed(&old->i_hash))) { 1119 iput(old); 1120 return -EBUSY; 1121 } 1122 iput(old); 1123 } 1124 } 1125 EXPORT_SYMBOL(insert_inode_locked); 1126 1127 int insert_inode_locked4(struct inode *inode, unsigned long hashval, 1128 int (*test)(struct inode *, void *), void *data) 1129 { 1130 struct super_block *sb = inode->i_sb; 1131 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1132 1133 inode->i_state |= I_LOCK|I_NEW; 1134 1135 while (1) { 1136 struct hlist_node *node; 1137 struct inode *old = NULL; 1138 1139 spin_lock(&inode_lock); 1140 hlist_for_each_entry(old, node, head, i_hash) { 1141 if (old->i_sb != sb) 1142 continue; 1143 if (!test(old, data)) 1144 continue; 1145 if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) 1146 continue; 1147 break; 1148 } 1149 if (likely(!node)) { 1150 hlist_add_head(&inode->i_hash, head); 1151 spin_unlock(&inode_lock); 1152 return 0; 1153 } 1154 __iget(old); 1155 spin_unlock(&inode_lock); 1156 wait_on_inode(old); 1157 if (unlikely(!hlist_unhashed(&old->i_hash))) { 1158 iput(old); 1159 return -EBUSY; 1160 } 1161 iput(old); 1162 } 1163 } 1164 EXPORT_SYMBOL(insert_inode_locked4); 1165 1166 /** 1167 * __insert_inode_hash - hash an inode 1168 * @inode: unhashed inode 1169 * @hashval: unsigned long value used to locate this object in the 1170 * inode_hashtable. 1171 * 1172 * Add an inode to the inode hash for this superblock. 1173 */ 1174 void __insert_inode_hash(struct inode *inode, unsigned long hashval) 1175 { 1176 struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval); 1177 spin_lock(&inode_lock); 1178 hlist_add_head(&inode->i_hash, head); 1179 spin_unlock(&inode_lock); 1180 } 1181 EXPORT_SYMBOL(__insert_inode_hash); 1182 1183 /** 1184 * remove_inode_hash - remove an inode from the hash 1185 * @inode: inode to unhash 1186 * 1187 * Remove an inode from the superblock. 1188 */ 1189 void remove_inode_hash(struct inode *inode) 1190 { 1191 spin_lock(&inode_lock); 1192 hlist_del_init(&inode->i_hash); 1193 spin_unlock(&inode_lock); 1194 } 1195 EXPORT_SYMBOL(remove_inode_hash); 1196 1197 /* 1198 * Tell the filesystem that this inode is no longer of any interest and should 1199 * be completely destroyed. 1200 * 1201 * We leave the inode in the inode hash table until *after* the filesystem's 1202 * ->delete_inode completes. This ensures that an iget (such as nfsd might 1203 * instigate) will always find up-to-date information either in the hash or on 1204 * disk. 1205 * 1206 * I_FREEING is set so that no-one will take a new reference to the inode while 1207 * it is being deleted. 1208 */ 1209 void generic_delete_inode(struct inode *inode) 1210 { 1211 const struct super_operations *op = inode->i_sb->s_op; 1212 1213 list_del_init(&inode->i_list); 1214 list_del_init(&inode->i_sb_list); 1215 WARN_ON(inode->i_state & I_NEW); 1216 inode->i_state |= I_FREEING; 1217 inodes_stat.nr_inodes--; 1218 spin_unlock(&inode_lock); 1219 1220 security_inode_delete(inode); 1221 1222 if (op->delete_inode) { 1223 void (*delete)(struct inode *) = op->delete_inode; 1224 if (!is_bad_inode(inode)) 1225 vfs_dq_init(inode); 1226 /* Filesystems implementing their own 1227 * s_op->delete_inode are required to call 1228 * truncate_inode_pages and clear_inode() 1229 * internally */ 1230 delete(inode); 1231 } else { 1232 truncate_inode_pages(&inode->i_data, 0); 1233 clear_inode(inode); 1234 } 1235 spin_lock(&inode_lock); 1236 hlist_del_init(&inode->i_hash); 1237 spin_unlock(&inode_lock); 1238 wake_up_inode(inode); 1239 BUG_ON(inode->i_state != I_CLEAR); 1240 destroy_inode(inode); 1241 } 1242 EXPORT_SYMBOL(generic_delete_inode); 1243 1244 /** 1245 * generic_detach_inode - remove inode from inode lists 1246 * @inode: inode to remove 1247 * 1248 * Remove inode from inode lists, write it if it's dirty. This is just an 1249 * internal VFS helper exported for hugetlbfs. Do not use! 1250 * 1251 * Returns 1 if inode should be completely destroyed. 1252 */ 1253 int generic_detach_inode(struct inode *inode) 1254 { 1255 struct super_block *sb = inode->i_sb; 1256 1257 if (!hlist_unhashed(&inode->i_hash)) { 1258 if (!(inode->i_state & (I_DIRTY|I_SYNC))) 1259 list_move(&inode->i_list, &inode_unused); 1260 inodes_stat.nr_unused++; 1261 if (sb->s_flags & MS_ACTIVE) { 1262 spin_unlock(&inode_lock); 1263 return 0; 1264 } 1265 WARN_ON(inode->i_state & I_NEW); 1266 inode->i_state |= I_WILL_FREE; 1267 spin_unlock(&inode_lock); 1268 write_inode_now(inode, 1); 1269 spin_lock(&inode_lock); 1270 WARN_ON(inode->i_state & I_NEW); 1271 inode->i_state &= ~I_WILL_FREE; 1272 inodes_stat.nr_unused--; 1273 hlist_del_init(&inode->i_hash); 1274 } 1275 list_del_init(&inode->i_list); 1276 list_del_init(&inode->i_sb_list); 1277 WARN_ON(inode->i_state & I_NEW); 1278 inode->i_state |= I_FREEING; 1279 inodes_stat.nr_inodes--; 1280 spin_unlock(&inode_lock); 1281 return 1; 1282 } 1283 EXPORT_SYMBOL_GPL(generic_detach_inode); 1284 1285 static void generic_forget_inode(struct inode *inode) 1286 { 1287 if (!generic_detach_inode(inode)) 1288 return; 1289 if (inode->i_data.nrpages) 1290 truncate_inode_pages(&inode->i_data, 0); 1291 clear_inode(inode); 1292 wake_up_inode(inode); 1293 destroy_inode(inode); 1294 } 1295 1296 /* 1297 * Normal UNIX filesystem behaviour: delete the 1298 * inode when the usage count drops to zero, and 1299 * i_nlink is zero. 1300 */ 1301 void generic_drop_inode(struct inode *inode) 1302 { 1303 if (!inode->i_nlink) 1304 generic_delete_inode(inode); 1305 else 1306 generic_forget_inode(inode); 1307 } 1308 EXPORT_SYMBOL_GPL(generic_drop_inode); 1309 1310 /* 1311 * Called when we're dropping the last reference 1312 * to an inode. 1313 * 1314 * Call the FS "drop()" function, defaulting to 1315 * the legacy UNIX filesystem behaviour.. 1316 * 1317 * NOTE! NOTE! NOTE! We're called with the inode lock 1318 * held, and the drop function is supposed to release 1319 * the lock! 1320 */ 1321 static inline void iput_final(struct inode *inode) 1322 { 1323 const struct super_operations *op = inode->i_sb->s_op; 1324 void (*drop)(struct inode *) = generic_drop_inode; 1325 1326 if (op && op->drop_inode) 1327 drop = op->drop_inode; 1328 drop(inode); 1329 } 1330 1331 /** 1332 * iput - put an inode 1333 * @inode: inode to put 1334 * 1335 * Puts an inode, dropping its usage count. If the inode use count hits 1336 * zero, the inode is then freed and may also be destroyed. 1337 * 1338 * Consequently, iput() can sleep. 1339 */ 1340 void iput(struct inode *inode) 1341 { 1342 if (inode) { 1343 BUG_ON(inode->i_state == I_CLEAR); 1344 1345 if (atomic_dec_and_lock(&inode->i_count, &inode_lock)) 1346 iput_final(inode); 1347 } 1348 } 1349 EXPORT_SYMBOL(iput); 1350 1351 /** 1352 * bmap - find a block number in a file 1353 * @inode: inode of file 1354 * @block: block to find 1355 * 1356 * Returns the block number on the device holding the inode that 1357 * is the disk block number for the block of the file requested. 1358 * That is, asked for block 4 of inode 1 the function will return the 1359 * disk block relative to the disk start that holds that block of the 1360 * file. 1361 */ 1362 sector_t bmap(struct inode *inode, sector_t block) 1363 { 1364 sector_t res = 0; 1365 if (inode->i_mapping->a_ops->bmap) 1366 res = inode->i_mapping->a_ops->bmap(inode->i_mapping, block); 1367 return res; 1368 } 1369 EXPORT_SYMBOL(bmap); 1370 1371 /* 1372 * With relative atime, only update atime if the previous atime is 1373 * earlier than either the ctime or mtime or if at least a day has 1374 * passed since the last atime update. 1375 */ 1376 static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, 1377 struct timespec now) 1378 { 1379 1380 if (!(mnt->mnt_flags & MNT_RELATIME)) 1381 return 1; 1382 /* 1383 * Is mtime younger than atime? If yes, update atime: 1384 */ 1385 if (timespec_compare(&inode->i_mtime, &inode->i_atime) >= 0) 1386 return 1; 1387 /* 1388 * Is ctime younger than atime? If yes, update atime: 1389 */ 1390 if (timespec_compare(&inode->i_ctime, &inode->i_atime) >= 0) 1391 return 1; 1392 1393 /* 1394 * Is the previous atime value older than a day? If yes, 1395 * update atime: 1396 */ 1397 if ((long)(now.tv_sec - inode->i_atime.tv_sec) >= 24*60*60) 1398 return 1; 1399 /* 1400 * Good, we can skip the atime update: 1401 */ 1402 return 0; 1403 } 1404 1405 /** 1406 * touch_atime - update the access time 1407 * @mnt: mount the inode is accessed on 1408 * @dentry: dentry accessed 1409 * 1410 * Update the accessed time on an inode and mark it for writeback. 1411 * This function automatically handles read only file systems and media, 1412 * as well as the "noatime" flag and inode specific "noatime" markers. 1413 */ 1414 void touch_atime(struct vfsmount *mnt, struct dentry *dentry) 1415 { 1416 struct inode *inode = dentry->d_inode; 1417 struct timespec now; 1418 1419 if (inode->i_flags & S_NOATIME) 1420 return; 1421 if (IS_NOATIME(inode)) 1422 return; 1423 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) 1424 return; 1425 1426 if (mnt->mnt_flags & MNT_NOATIME) 1427 return; 1428 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) 1429 return; 1430 1431 now = current_fs_time(inode->i_sb); 1432 1433 if (!relatime_need_update(mnt, inode, now)) 1434 return; 1435 1436 if (timespec_equal(&inode->i_atime, &now)) 1437 return; 1438 1439 if (mnt_want_write(mnt)) 1440 return; 1441 1442 inode->i_atime = now; 1443 mark_inode_dirty_sync(inode); 1444 mnt_drop_write(mnt); 1445 } 1446 EXPORT_SYMBOL(touch_atime); 1447 1448 /** 1449 * file_update_time - update mtime and ctime time 1450 * @file: file accessed 1451 * 1452 * Update the mtime and ctime members of an inode and mark the inode 1453 * for writeback. Note that this function is meant exclusively for 1454 * usage in the file write path of filesystems, and filesystems may 1455 * choose to explicitly ignore update via this function with the 1456 * S_NOCMTIME inode flag, e.g. for network filesystem where these 1457 * timestamps are handled by the server. 1458 */ 1459 1460 void file_update_time(struct file *file) 1461 { 1462 struct inode *inode = file->f_path.dentry->d_inode; 1463 struct timespec now; 1464 enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0; 1465 1466 /* First try to exhaust all avenues to not sync */ 1467 if (IS_NOCMTIME(inode)) 1468 return; 1469 1470 now = current_fs_time(inode->i_sb); 1471 if (!timespec_equal(&inode->i_mtime, &now)) 1472 sync_it = S_MTIME; 1473 1474 if (!timespec_equal(&inode->i_ctime, &now)) 1475 sync_it |= S_CTIME; 1476 1477 if (IS_I_VERSION(inode)) 1478 sync_it |= S_VERSION; 1479 1480 if (!sync_it) 1481 return; 1482 1483 /* Finally allowed to write? Takes lock. */ 1484 if (mnt_want_write_file(file)) 1485 return; 1486 1487 /* Only change inode inside the lock region */ 1488 if (sync_it & S_VERSION) 1489 inode_inc_iversion(inode); 1490 if (sync_it & S_CTIME) 1491 inode->i_ctime = now; 1492 if (sync_it & S_MTIME) 1493 inode->i_mtime = now; 1494 mark_inode_dirty_sync(inode); 1495 mnt_drop_write(file->f_path.mnt); 1496 } 1497 EXPORT_SYMBOL(file_update_time); 1498 1499 int inode_needs_sync(struct inode *inode) 1500 { 1501 if (IS_SYNC(inode)) 1502 return 1; 1503 if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) 1504 return 1; 1505 return 0; 1506 } 1507 EXPORT_SYMBOL(inode_needs_sync); 1508 1509 int inode_wait(void *word) 1510 { 1511 schedule(); 1512 return 0; 1513 } 1514 EXPORT_SYMBOL(inode_wait); 1515 1516 /* 1517 * If we try to find an inode in the inode hash while it is being 1518 * deleted, we have to wait until the filesystem completes its 1519 * deletion before reporting that it isn't found. This function waits 1520 * until the deletion _might_ have completed. Callers are responsible 1521 * to recheck inode state. 1522 * 1523 * It doesn't matter if I_LOCK is not set initially, a call to 1524 * wake_up_inode() after removing from the hash list will DTRT. 1525 * 1526 * This is called with inode_lock held. 1527 */ 1528 static void __wait_on_freeing_inode(struct inode *inode) 1529 { 1530 wait_queue_head_t *wq; 1531 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_LOCK); 1532 wq = bit_waitqueue(&inode->i_state, __I_LOCK); 1533 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 1534 spin_unlock(&inode_lock); 1535 schedule(); 1536 finish_wait(wq, &wait.wait); 1537 spin_lock(&inode_lock); 1538 } 1539 1540 static __initdata unsigned long ihash_entries; 1541 static int __init set_ihash_entries(char *str) 1542 { 1543 if (!str) 1544 return 0; 1545 ihash_entries = simple_strtoul(str, &str, 0); 1546 return 1; 1547 } 1548 __setup("ihash_entries=", set_ihash_entries); 1549 1550 /* 1551 * Initialize the waitqueues and inode hash table. 1552 */ 1553 void __init inode_init_early(void) 1554 { 1555 int loop; 1556 1557 /* If hashes are distributed across NUMA nodes, defer 1558 * hash allocation until vmalloc space is available. 1559 */ 1560 if (hashdist) 1561 return; 1562 1563 inode_hashtable = 1564 alloc_large_system_hash("Inode-cache", 1565 sizeof(struct hlist_head), 1566 ihash_entries, 1567 14, 1568 HASH_EARLY, 1569 &i_hash_shift, 1570 &i_hash_mask, 1571 0); 1572 1573 for (loop = 0; loop < (1 << i_hash_shift); loop++) 1574 INIT_HLIST_HEAD(&inode_hashtable[loop]); 1575 } 1576 1577 void __init inode_init(void) 1578 { 1579 int loop; 1580 1581 /* inode slab cache */ 1582 inode_cachep = kmem_cache_create("inode_cache", 1583 sizeof(struct inode), 1584 0, 1585 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| 1586 SLAB_MEM_SPREAD), 1587 init_once); 1588 register_shrinker(&icache_shrinker); 1589 1590 /* Hash may have been set up in inode_init_early */ 1591 if (!hashdist) 1592 return; 1593 1594 inode_hashtable = 1595 alloc_large_system_hash("Inode-cache", 1596 sizeof(struct hlist_head), 1597 ihash_entries, 1598 14, 1599 0, 1600 &i_hash_shift, 1601 &i_hash_mask, 1602 0); 1603 1604 for (loop = 0; loop < (1 << i_hash_shift); loop++) 1605 INIT_HLIST_HEAD(&inode_hashtable[loop]); 1606 } 1607 1608 void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) 1609 { 1610 inode->i_mode = mode; 1611 if (S_ISCHR(mode)) { 1612 inode->i_fop = &def_chr_fops; 1613 inode->i_rdev = rdev; 1614 } else if (S_ISBLK(mode)) { 1615 inode->i_fop = &def_blk_fops; 1616 inode->i_rdev = rdev; 1617 } else if (S_ISFIFO(mode)) 1618 inode->i_fop = &def_fifo_fops; 1619 else if (S_ISSOCK(mode)) 1620 inode->i_fop = &bad_sock_fops; 1621 else 1622 printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for" 1623 " inode %s:%lu\n", mode, inode->i_sb->s_id, 1624 inode->i_ino); 1625 } 1626 EXPORT_SYMBOL(init_special_inode); 1627