1 /* 2 * linux/fs/inode.c 3 * 4 * (C) 1997 Linus Torvalds 5 */ 6 7 #include <linux/fs.h> 8 #include <linux/mm.h> 9 #include <linux/dcache.h> 10 #include <linux/init.h> 11 #include <linux/quotaops.h> 12 #include <linux/slab.h> 13 #include <linux/writeback.h> 14 #include <linux/module.h> 15 #include <linux/backing-dev.h> 16 #include <linux/wait.h> 17 #include <linux/hash.h> 18 #include <linux/swap.h> 19 #include <linux/security.h> 20 #include <linux/ima.h> 21 #include <linux/pagemap.h> 22 #include <linux/cdev.h> 23 #include <linux/bootmem.h> 24 #include <linux/inotify.h> 25 #include <linux/mount.h> 26 #include <linux/async.h> 27 28 /* 29 * This is needed for the following functions: 30 * - inode_has_buffers 31 * - invalidate_inode_buffers 32 * - invalidate_bdev 33 * 34 * FIXME: remove all knowledge of the buffer layer from this file 35 */ 36 #include <linux/buffer_head.h> 37 38 /* 39 * New inode.c implementation. 40 * 41 * This implementation has the basic premise of trying 42 * to be extremely low-overhead and SMP-safe, yet be 43 * simple enough to be "obviously correct". 44 * 45 * Famous last words. 46 */ 47 48 /* inode dynamic allocation 1999, Andrea Arcangeli <andrea@suse.de> */ 49 50 /* #define INODE_PARANOIA 1 */ 51 /* #define INODE_DEBUG 1 */ 52 53 /* 54 * Inode lookup is no longer as critical as it used to be: 55 * most of the lookups are going to be through the dcache. 56 */ 57 #define I_HASHBITS i_hash_shift 58 #define I_HASHMASK i_hash_mask 59 60 static unsigned int i_hash_mask __read_mostly; 61 static unsigned int i_hash_shift __read_mostly; 62 63 /* 64 * Each inode can be on two separate lists. One is 65 * the hash list of the inode, used for lookups. The 66 * other linked list is the "type" list: 67 * "in_use" - valid inode, i_count > 0, i_nlink > 0 68 * "dirty" - as "in_use" but also dirty 69 * "unused" - valid inode, i_count = 0 70 * 71 * A "dirty" list is maintained for each super block, 72 * allowing for low-overhead inode sync() operations. 73 */ 74 75 LIST_HEAD(inode_in_use); 76 LIST_HEAD(inode_unused); 77 static struct hlist_head *inode_hashtable __read_mostly; 78 79 /* 80 * A simple spinlock to protect the list manipulations. 81 * 82 * NOTE! You also have to own the lock if you change 83 * the i_state of an inode while it is in use.. 84 */ 85 DEFINE_SPINLOCK(inode_lock); 86 87 /* 88 * iprune_mutex provides exclusion between the kswapd or try_to_free_pages 89 * icache shrinking path, and the umount path. Without this exclusion, 90 * by the time prune_icache calls iput for the inode whose pages it has 91 * been invalidating, or by the time it calls clear_inode & destroy_inode 92 * from its final dispose_list, the struct super_block they refer to 93 * (for inode->i_sb->s_op) may already have been freed and reused. 94 */ 95 static DEFINE_MUTEX(iprune_mutex); 96 97 /* 98 * Statistics gathering.. 99 */ 100 struct inodes_stat_t inodes_stat; 101 102 static struct kmem_cache *inode_cachep __read_mostly; 103 104 static void wake_up_inode(struct inode *inode) 105 { 106 /* 107 * Prevent speculative execution through spin_unlock(&inode_lock); 108 */ 109 smp_mb(); 110 wake_up_bit(&inode->i_state, __I_LOCK); 111 } 112 113 /** 114 * inode_init_always - perform inode structure intialisation 115 * @sb: superblock inode belongs to 116 * @inode: inode to initialise 117 * 118 * These are initializations that need to be done on every inode 119 * allocation as the fields are not initialised by slab allocation. 120 */ 121 struct inode *inode_init_always(struct super_block *sb, struct inode *inode) 122 { 123 static const struct address_space_operations empty_aops; 124 static struct inode_operations empty_iops; 125 static const struct file_operations empty_fops; 126 127 struct address_space *const mapping = &inode->i_data; 128 129 inode->i_sb = sb; 130 inode->i_blkbits = sb->s_blocksize_bits; 131 inode->i_flags = 0; 132 atomic_set(&inode->i_count, 1); 133 inode->i_op = &empty_iops; 134 inode->i_fop = &empty_fops; 135 inode->i_nlink = 1; 136 inode->i_uid = 0; 137 inode->i_gid = 0; 138 atomic_set(&inode->i_writecount, 0); 139 inode->i_size = 0; 140 inode->i_blocks = 0; 141 inode->i_bytes = 0; 142 inode->i_generation = 0; 143 #ifdef CONFIG_QUOTA 144 memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); 145 #endif 146 inode->i_pipe = NULL; 147 inode->i_bdev = NULL; 148 inode->i_cdev = NULL; 149 inode->i_rdev = 0; 150 inode->dirtied_when = 0; 151 152 if (security_inode_alloc(inode)) 153 goto out_free_inode; 154 155 /* allocate and initialize an i_integrity */ 156 if (ima_inode_alloc(inode)) 157 goto out_free_security; 158 159 spin_lock_init(&inode->i_lock); 160 lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); 161 162 mutex_init(&inode->i_mutex); 163 lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key); 164 165 init_rwsem(&inode->i_alloc_sem); 166 lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key); 167 168 mapping->a_ops = &empty_aops; 169 mapping->host = inode; 170 mapping->flags = 0; 171 mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE); 172 mapping->assoc_mapping = NULL; 173 mapping->backing_dev_info = &default_backing_dev_info; 174 mapping->writeback_index = 0; 175 176 /* 177 * If the block_device provides a backing_dev_info for client 178 * inodes then use that. Otherwise the inode share the bdev's 179 * backing_dev_info. 180 */ 181 if (sb->s_bdev) { 182 struct backing_dev_info *bdi; 183 184 bdi = sb->s_bdev->bd_inode_backing_dev_info; 185 if (!bdi) 186 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; 187 mapping->backing_dev_info = bdi; 188 } 189 inode->i_private = NULL; 190 inode->i_mapping = mapping; 191 192 return inode; 193 194 out_free_security: 195 security_inode_free(inode); 196 out_free_inode: 197 if (inode->i_sb->s_op->destroy_inode) 198 inode->i_sb->s_op->destroy_inode(inode); 199 else 200 kmem_cache_free(inode_cachep, (inode)); 201 return NULL; 202 } 203 EXPORT_SYMBOL(inode_init_always); 204 205 static struct inode *alloc_inode(struct super_block *sb) 206 { 207 struct inode *inode; 208 209 if (sb->s_op->alloc_inode) 210 inode = sb->s_op->alloc_inode(sb); 211 else 212 inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL); 213 214 if (inode) 215 return inode_init_always(sb, inode); 216 return NULL; 217 } 218 219 void destroy_inode(struct inode *inode) 220 { 221 BUG_ON(inode_has_buffers(inode)); 222 ima_inode_free(inode); 223 security_inode_free(inode); 224 if (inode->i_sb->s_op->destroy_inode) 225 inode->i_sb->s_op->destroy_inode(inode); 226 else 227 kmem_cache_free(inode_cachep, (inode)); 228 } 229 EXPORT_SYMBOL(destroy_inode); 230 231 232 /* 233 * These are initializations that only need to be done 234 * once, because the fields are idempotent across use 235 * of the inode, so let the slab aware of that. 236 */ 237 void inode_init_once(struct inode *inode) 238 { 239 memset(inode, 0, sizeof(*inode)); 240 INIT_HLIST_NODE(&inode->i_hash); 241 INIT_LIST_HEAD(&inode->i_dentry); 242 INIT_LIST_HEAD(&inode->i_devices); 243 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); 244 spin_lock_init(&inode->i_data.tree_lock); 245 spin_lock_init(&inode->i_data.i_mmap_lock); 246 INIT_LIST_HEAD(&inode->i_data.private_list); 247 spin_lock_init(&inode->i_data.private_lock); 248 INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap); 249 INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear); 250 i_size_ordered_init(inode); 251 #ifdef CONFIG_INOTIFY 252 INIT_LIST_HEAD(&inode->inotify_watches); 253 mutex_init(&inode->inotify_mutex); 254 #endif 255 } 256 EXPORT_SYMBOL(inode_init_once); 257 258 static void init_once(void *foo) 259 { 260 struct inode *inode = (struct inode *) foo; 261 262 inode_init_once(inode); 263 } 264 265 /* 266 * inode_lock must be held 267 */ 268 void __iget(struct inode *inode) 269 { 270 if (atomic_read(&inode->i_count)) { 271 atomic_inc(&inode->i_count); 272 return; 273 } 274 atomic_inc(&inode->i_count); 275 if (!(inode->i_state & (I_DIRTY|I_SYNC))) 276 list_move(&inode->i_list, &inode_in_use); 277 inodes_stat.nr_unused--; 278 } 279 280 /** 281 * clear_inode - clear an inode 282 * @inode: inode to clear 283 * 284 * This is called by the filesystem to tell us 285 * that the inode is no longer useful. We just 286 * terminate it with extreme prejudice. 287 */ 288 void clear_inode(struct inode *inode) 289 { 290 might_sleep(); 291 invalidate_inode_buffers(inode); 292 293 BUG_ON(inode->i_data.nrpages); 294 BUG_ON(!(inode->i_state & I_FREEING)); 295 BUG_ON(inode->i_state & I_CLEAR); 296 inode_sync_wait(inode); 297 vfs_dq_drop(inode); 298 if (inode->i_sb->s_op->clear_inode) 299 inode->i_sb->s_op->clear_inode(inode); 300 if (S_ISBLK(inode->i_mode) && inode->i_bdev) 301 bd_forget(inode); 302 if (S_ISCHR(inode->i_mode) && inode->i_cdev) 303 cd_forget(inode); 304 inode->i_state = I_CLEAR; 305 } 306 EXPORT_SYMBOL(clear_inode); 307 308 /* 309 * dispose_list - dispose of the contents of a local list 310 * @head: the head of the list to free 311 * 312 * Dispose-list gets a local list with local inodes in it, so it doesn't 313 * need to worry about list corruption and SMP locks. 314 */ 315 static void dispose_list(struct list_head *head) 316 { 317 int nr_disposed = 0; 318 319 while (!list_empty(head)) { 320 struct inode *inode; 321 322 inode = list_first_entry(head, struct inode, i_list); 323 list_del(&inode->i_list); 324 325 if (inode->i_data.nrpages) 326 truncate_inode_pages(&inode->i_data, 0); 327 clear_inode(inode); 328 329 spin_lock(&inode_lock); 330 hlist_del_init(&inode->i_hash); 331 list_del_init(&inode->i_sb_list); 332 spin_unlock(&inode_lock); 333 334 wake_up_inode(inode); 335 destroy_inode(inode); 336 nr_disposed++; 337 } 338 spin_lock(&inode_lock); 339 inodes_stat.nr_inodes -= nr_disposed; 340 spin_unlock(&inode_lock); 341 } 342 343 /* 344 * Invalidate all inodes for a device. 345 */ 346 static int invalidate_list(struct list_head *head, struct list_head *dispose) 347 { 348 struct list_head *next; 349 int busy = 0, count = 0; 350 351 next = head->next; 352 for (;;) { 353 struct list_head *tmp = next; 354 struct inode *inode; 355 356 /* 357 * We can reschedule here without worrying about the list's 358 * consistency because the per-sb list of inodes must not 359 * change during umount anymore, and because iprune_mutex keeps 360 * shrink_icache_memory() away. 361 */ 362 cond_resched_lock(&inode_lock); 363 364 next = next->next; 365 if (tmp == head) 366 break; 367 inode = list_entry(tmp, struct inode, i_sb_list); 368 if (inode->i_state & I_NEW) 369 continue; 370 invalidate_inode_buffers(inode); 371 if (!atomic_read(&inode->i_count)) { 372 list_move(&inode->i_list, dispose); 373 WARN_ON(inode->i_state & I_NEW); 374 inode->i_state |= I_FREEING; 375 count++; 376 continue; 377 } 378 busy = 1; 379 } 380 /* only unused inodes may be cached with i_count zero */ 381 inodes_stat.nr_unused -= count; 382 return busy; 383 } 384 385 /** 386 * invalidate_inodes - discard the inodes on a device 387 * @sb: superblock 388 * 389 * Discard all of the inodes for a given superblock. If the discard 390 * fails because there are busy inodes then a non zero value is returned. 391 * If the discard is successful all the inodes have been discarded. 392 */ 393 int invalidate_inodes(struct super_block *sb) 394 { 395 int busy; 396 LIST_HEAD(throw_away); 397 398 mutex_lock(&iprune_mutex); 399 spin_lock(&inode_lock); 400 inotify_unmount_inodes(&sb->s_inodes); 401 busy = invalidate_list(&sb->s_inodes, &throw_away); 402 spin_unlock(&inode_lock); 403 404 dispose_list(&throw_away); 405 mutex_unlock(&iprune_mutex); 406 407 return busy; 408 } 409 EXPORT_SYMBOL(invalidate_inodes); 410 411 static int can_unuse(struct inode *inode) 412 { 413 if (inode->i_state) 414 return 0; 415 if (inode_has_buffers(inode)) 416 return 0; 417 if (atomic_read(&inode->i_count)) 418 return 0; 419 if (inode->i_data.nrpages) 420 return 0; 421 return 1; 422 } 423 424 /* 425 * Scan `goal' inodes on the unused list for freeable ones. They are moved to 426 * a temporary list and then are freed outside inode_lock by dispose_list(). 427 * 428 * Any inodes which are pinned purely because of attached pagecache have their 429 * pagecache removed. We expect the final iput() on that inode to add it to 430 * the front of the inode_unused list. So look for it there and if the 431 * inode is still freeable, proceed. The right inode is found 99.9% of the 432 * time in testing on a 4-way. 433 * 434 * If the inode has metadata buffers attached to mapping->private_list then 435 * try to remove them. 436 */ 437 static void prune_icache(int nr_to_scan) 438 { 439 LIST_HEAD(freeable); 440 int nr_pruned = 0; 441 int nr_scanned; 442 unsigned long reap = 0; 443 444 mutex_lock(&iprune_mutex); 445 spin_lock(&inode_lock); 446 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { 447 struct inode *inode; 448 449 if (list_empty(&inode_unused)) 450 break; 451 452 inode = list_entry(inode_unused.prev, struct inode, i_list); 453 454 if (inode->i_state || atomic_read(&inode->i_count)) { 455 list_move(&inode->i_list, &inode_unused); 456 continue; 457 } 458 if (inode_has_buffers(inode) || inode->i_data.nrpages) { 459 __iget(inode); 460 spin_unlock(&inode_lock); 461 if (remove_inode_buffers(inode)) 462 reap += invalidate_mapping_pages(&inode->i_data, 463 0, -1); 464 iput(inode); 465 spin_lock(&inode_lock); 466 467 if (inode != list_entry(inode_unused.next, 468 struct inode, i_list)) 469 continue; /* wrong inode or list_empty */ 470 if (!can_unuse(inode)) 471 continue; 472 } 473 list_move(&inode->i_list, &freeable); 474 WARN_ON(inode->i_state & I_NEW); 475 inode->i_state |= I_FREEING; 476 nr_pruned++; 477 } 478 inodes_stat.nr_unused -= nr_pruned; 479 if (current_is_kswapd()) 480 __count_vm_events(KSWAPD_INODESTEAL, reap); 481 else 482 __count_vm_events(PGINODESTEAL, reap); 483 spin_unlock(&inode_lock); 484 485 dispose_list(&freeable); 486 mutex_unlock(&iprune_mutex); 487 } 488 489 /* 490 * shrink_icache_memory() will attempt to reclaim some unused inodes. Here, 491 * "unused" means that no dentries are referring to the inodes: the files are 492 * not open and the dcache references to those inodes have already been 493 * reclaimed. 494 * 495 * This function is passed the number of inodes to scan, and it returns the 496 * total number of remaining possibly-reclaimable inodes. 497 */ 498 static int shrink_icache_memory(int nr, gfp_t gfp_mask) 499 { 500 if (nr) { 501 /* 502 * Nasty deadlock avoidance. We may hold various FS locks, 503 * and we don't want to recurse into the FS that called us 504 * in clear_inode() and friends.. 505 */ 506 if (!(gfp_mask & __GFP_FS)) 507 return -1; 508 prune_icache(nr); 509 } 510 return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; 511 } 512 513 static struct shrinker icache_shrinker = { 514 .shrink = shrink_icache_memory, 515 .seeks = DEFAULT_SEEKS, 516 }; 517 518 static void __wait_on_freeing_inode(struct inode *inode); 519 /* 520 * Called with the inode lock held. 521 * NOTE: we are not increasing the inode-refcount, you must call __iget() 522 * by hand after calling find_inode now! This simplifies iunique and won't 523 * add any additional branch in the common code. 524 */ 525 static struct inode *find_inode(struct super_block *sb, 526 struct hlist_head *head, 527 int (*test)(struct inode *, void *), 528 void *data) 529 { 530 struct hlist_node *node; 531 struct inode *inode = NULL; 532 533 repeat: 534 hlist_for_each_entry(inode, node, head, i_hash) { 535 if (inode->i_sb != sb) 536 continue; 537 if (!test(inode, data)) 538 continue; 539 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) { 540 __wait_on_freeing_inode(inode); 541 goto repeat; 542 } 543 break; 544 } 545 return node ? inode : NULL; 546 } 547 548 /* 549 * find_inode_fast is the fast path version of find_inode, see the comment at 550 * iget_locked for details. 551 */ 552 static struct inode *find_inode_fast(struct super_block *sb, 553 struct hlist_head *head, unsigned long ino) 554 { 555 struct hlist_node *node; 556 struct inode *inode = NULL; 557 558 repeat: 559 hlist_for_each_entry(inode, node, head, i_hash) { 560 if (inode->i_ino != ino) 561 continue; 562 if (inode->i_sb != sb) 563 continue; 564 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) { 565 __wait_on_freeing_inode(inode); 566 goto repeat; 567 } 568 break; 569 } 570 return node ? inode : NULL; 571 } 572 573 static unsigned long hash(struct super_block *sb, unsigned long hashval) 574 { 575 unsigned long tmp; 576 577 tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) / 578 L1_CACHE_BYTES; 579 tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS); 580 return tmp & I_HASHMASK; 581 } 582 583 static inline void 584 __inode_add_to_lists(struct super_block *sb, struct hlist_head *head, 585 struct inode *inode) 586 { 587 inodes_stat.nr_inodes++; 588 list_add(&inode->i_list, &inode_in_use); 589 list_add(&inode->i_sb_list, &sb->s_inodes); 590 if (head) 591 hlist_add_head(&inode->i_hash, head); 592 } 593 594 /** 595 * inode_add_to_lists - add a new inode to relevant lists 596 * @sb: superblock inode belongs to 597 * @inode: inode to mark in use 598 * 599 * When an inode is allocated it needs to be accounted for, added to the in use 600 * list, the owning superblock and the inode hash. This needs to be done under 601 * the inode_lock, so export a function to do this rather than the inode lock 602 * itself. We calculate the hash list to add to here so it is all internal 603 * which requires the caller to have already set up the inode number in the 604 * inode to add. 605 */ 606 void inode_add_to_lists(struct super_block *sb, struct inode *inode) 607 { 608 struct hlist_head *head = inode_hashtable + hash(sb, inode->i_ino); 609 610 spin_lock(&inode_lock); 611 __inode_add_to_lists(sb, head, inode); 612 spin_unlock(&inode_lock); 613 } 614 EXPORT_SYMBOL_GPL(inode_add_to_lists); 615 616 /** 617 * new_inode - obtain an inode 618 * @sb: superblock 619 * 620 * Allocates a new inode for given superblock. The default gfp_mask 621 * for allocations related to inode->i_mapping is GFP_HIGHUSER_MOVABLE. 622 * If HIGHMEM pages are unsuitable or it is known that pages allocated 623 * for the page cache are not reclaimable or migratable, 624 * mapping_set_gfp_mask() must be called with suitable flags on the 625 * newly created inode's mapping 626 * 627 */ 628 struct inode *new_inode(struct super_block *sb) 629 { 630 /* 631 * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW 632 * error if st_ino won't fit in target struct field. Use 32bit counter 633 * here to attempt to avoid that. 634 */ 635 static unsigned int last_ino; 636 struct inode *inode; 637 638 spin_lock_prefetch(&inode_lock); 639 640 inode = alloc_inode(sb); 641 if (inode) { 642 spin_lock(&inode_lock); 643 __inode_add_to_lists(sb, NULL, inode); 644 inode->i_ino = ++last_ino; 645 inode->i_state = 0; 646 spin_unlock(&inode_lock); 647 } 648 return inode; 649 } 650 EXPORT_SYMBOL(new_inode); 651 652 void unlock_new_inode(struct inode *inode) 653 { 654 #ifdef CONFIG_DEBUG_LOCK_ALLOC 655 if (inode->i_mode & S_IFDIR) { 656 struct file_system_type *type = inode->i_sb->s_type; 657 658 /* 659 * ensure nobody is actually holding i_mutex 660 */ 661 mutex_destroy(&inode->i_mutex); 662 mutex_init(&inode->i_mutex); 663 lockdep_set_class(&inode->i_mutex, &type->i_mutex_dir_key); 664 } 665 #endif 666 /* 667 * This is special! We do not need the spinlock 668 * when clearing I_LOCK, because we're guaranteed 669 * that nobody else tries to do anything about the 670 * state of the inode when it is locked, as we 671 * just created it (so there can be no old holders 672 * that haven't tested I_LOCK). 673 */ 674 WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW)); 675 inode->i_state &= ~(I_LOCK|I_NEW); 676 wake_up_inode(inode); 677 } 678 EXPORT_SYMBOL(unlock_new_inode); 679 680 /* 681 * This is called without the inode lock held.. Be careful. 682 * 683 * We no longer cache the sb_flags in i_flags - see fs.h 684 * -- rmk@arm.uk.linux.org 685 */ 686 static struct inode *get_new_inode(struct super_block *sb, 687 struct hlist_head *head, 688 int (*test)(struct inode *, void *), 689 int (*set)(struct inode *, void *), 690 void *data) 691 { 692 struct inode *inode; 693 694 inode = alloc_inode(sb); 695 if (inode) { 696 struct inode *old; 697 698 spin_lock(&inode_lock); 699 /* We released the lock, so.. */ 700 old = find_inode(sb, head, test, data); 701 if (!old) { 702 if (set(inode, data)) 703 goto set_failed; 704 705 __inode_add_to_lists(sb, head, inode); 706 inode->i_state = I_LOCK|I_NEW; 707 spin_unlock(&inode_lock); 708 709 /* Return the locked inode with I_NEW set, the 710 * caller is responsible for filling in the contents 711 */ 712 return inode; 713 } 714 715 /* 716 * Uhhuh, somebody else created the same inode under 717 * us. Use the old inode instead of the one we just 718 * allocated. 719 */ 720 __iget(old); 721 spin_unlock(&inode_lock); 722 destroy_inode(inode); 723 inode = old; 724 wait_on_inode(inode); 725 } 726 return inode; 727 728 set_failed: 729 spin_unlock(&inode_lock); 730 destroy_inode(inode); 731 return NULL; 732 } 733 734 /* 735 * get_new_inode_fast is the fast path version of get_new_inode, see the 736 * comment at iget_locked for details. 737 */ 738 static struct inode *get_new_inode_fast(struct super_block *sb, 739 struct hlist_head *head, unsigned long ino) 740 { 741 struct inode *inode; 742 743 inode = alloc_inode(sb); 744 if (inode) { 745 struct inode *old; 746 747 spin_lock(&inode_lock); 748 /* We released the lock, so.. */ 749 old = find_inode_fast(sb, head, ino); 750 if (!old) { 751 inode->i_ino = ino; 752 __inode_add_to_lists(sb, head, inode); 753 inode->i_state = I_LOCK|I_NEW; 754 spin_unlock(&inode_lock); 755 756 /* Return the locked inode with I_NEW set, the 757 * caller is responsible for filling in the contents 758 */ 759 return inode; 760 } 761 762 /* 763 * Uhhuh, somebody else created the same inode under 764 * us. Use the old inode instead of the one we just 765 * allocated. 766 */ 767 __iget(old); 768 spin_unlock(&inode_lock); 769 destroy_inode(inode); 770 inode = old; 771 wait_on_inode(inode); 772 } 773 return inode; 774 } 775 776 /** 777 * iunique - get a unique inode number 778 * @sb: superblock 779 * @max_reserved: highest reserved inode number 780 * 781 * Obtain an inode number that is unique on the system for a given 782 * superblock. This is used by file systems that have no natural 783 * permanent inode numbering system. An inode number is returned that 784 * is higher than the reserved limit but unique. 785 * 786 * BUGS: 787 * With a large number of inodes live on the file system this function 788 * currently becomes quite slow. 789 */ 790 ino_t iunique(struct super_block *sb, ino_t max_reserved) 791 { 792 /* 793 * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW 794 * error if st_ino won't fit in target struct field. Use 32bit counter 795 * here to attempt to avoid that. 796 */ 797 static unsigned int counter; 798 struct inode *inode; 799 struct hlist_head *head; 800 ino_t res; 801 802 spin_lock(&inode_lock); 803 do { 804 if (counter <= max_reserved) 805 counter = max_reserved + 1; 806 res = counter++; 807 head = inode_hashtable + hash(sb, res); 808 inode = find_inode_fast(sb, head, res); 809 } while (inode != NULL); 810 spin_unlock(&inode_lock); 811 812 return res; 813 } 814 EXPORT_SYMBOL(iunique); 815 816 struct inode *igrab(struct inode *inode) 817 { 818 spin_lock(&inode_lock); 819 if (!(inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))) 820 __iget(inode); 821 else 822 /* 823 * Handle the case where s_op->clear_inode is not been 824 * called yet, and somebody is calling igrab 825 * while the inode is getting freed. 826 */ 827 inode = NULL; 828 spin_unlock(&inode_lock); 829 return inode; 830 } 831 EXPORT_SYMBOL(igrab); 832 833 /** 834 * ifind - internal function, you want ilookup5() or iget5(). 835 * @sb: super block of file system to search 836 * @head: the head of the list to search 837 * @test: callback used for comparisons between inodes 838 * @data: opaque data pointer to pass to @test 839 * @wait: if true wait for the inode to be unlocked, if false do not 840 * 841 * ifind() searches for the inode specified by @data in the inode 842 * cache. This is a generalized version of ifind_fast() for file systems where 843 * the inode number is not sufficient for unique identification of an inode. 844 * 845 * If the inode is in the cache, the inode is returned with an incremented 846 * reference count. 847 * 848 * Otherwise NULL is returned. 849 * 850 * Note, @test is called with the inode_lock held, so can't sleep. 851 */ 852 static struct inode *ifind(struct super_block *sb, 853 struct hlist_head *head, int (*test)(struct inode *, void *), 854 void *data, const int wait) 855 { 856 struct inode *inode; 857 858 spin_lock(&inode_lock); 859 inode = find_inode(sb, head, test, data); 860 if (inode) { 861 __iget(inode); 862 spin_unlock(&inode_lock); 863 if (likely(wait)) 864 wait_on_inode(inode); 865 return inode; 866 } 867 spin_unlock(&inode_lock); 868 return NULL; 869 } 870 871 /** 872 * ifind_fast - internal function, you want ilookup() or iget(). 873 * @sb: super block of file system to search 874 * @head: head of the list to search 875 * @ino: inode number to search for 876 * 877 * ifind_fast() searches for the inode @ino in the inode cache. This is for 878 * file systems where the inode number is sufficient for unique identification 879 * of an inode. 880 * 881 * If the inode is in the cache, the inode is returned with an incremented 882 * reference count. 883 * 884 * Otherwise NULL is returned. 885 */ 886 static struct inode *ifind_fast(struct super_block *sb, 887 struct hlist_head *head, unsigned long ino) 888 { 889 struct inode *inode; 890 891 spin_lock(&inode_lock); 892 inode = find_inode_fast(sb, head, ino); 893 if (inode) { 894 __iget(inode); 895 spin_unlock(&inode_lock); 896 wait_on_inode(inode); 897 return inode; 898 } 899 spin_unlock(&inode_lock); 900 return NULL; 901 } 902 903 /** 904 * ilookup5_nowait - search for an inode in the inode cache 905 * @sb: super block of file system to search 906 * @hashval: hash value (usually inode number) to search for 907 * @test: callback used for comparisons between inodes 908 * @data: opaque data pointer to pass to @test 909 * 910 * ilookup5() uses ifind() to search for the inode specified by @hashval and 911 * @data in the inode cache. This is a generalized version of ilookup() for 912 * file systems where the inode number is not sufficient for unique 913 * identification of an inode. 914 * 915 * If the inode is in the cache, the inode is returned with an incremented 916 * reference count. Note, the inode lock is not waited upon so you have to be 917 * very careful what you do with the returned inode. You probably should be 918 * using ilookup5() instead. 919 * 920 * Otherwise NULL is returned. 921 * 922 * Note, @test is called with the inode_lock held, so can't sleep. 923 */ 924 struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, 925 int (*test)(struct inode *, void *), void *data) 926 { 927 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 928 929 return ifind(sb, head, test, data, 0); 930 } 931 EXPORT_SYMBOL(ilookup5_nowait); 932 933 /** 934 * ilookup5 - search for an inode in the inode cache 935 * @sb: super block of file system to search 936 * @hashval: hash value (usually inode number) to search for 937 * @test: callback used for comparisons between inodes 938 * @data: opaque data pointer to pass to @test 939 * 940 * ilookup5() uses ifind() to search for the inode specified by @hashval and 941 * @data in the inode cache. This is a generalized version of ilookup() for 942 * file systems where the inode number is not sufficient for unique 943 * identification of an inode. 944 * 945 * If the inode is in the cache, the inode lock is waited upon and the inode is 946 * returned with an incremented reference count. 947 * 948 * Otherwise NULL is returned. 949 * 950 * Note, @test is called with the inode_lock held, so can't sleep. 951 */ 952 struct inode *ilookup5(struct super_block *sb, unsigned long hashval, 953 int (*test)(struct inode *, void *), void *data) 954 { 955 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 956 957 return ifind(sb, head, test, data, 1); 958 } 959 EXPORT_SYMBOL(ilookup5); 960 961 /** 962 * ilookup - search for an inode in the inode cache 963 * @sb: super block of file system to search 964 * @ino: inode number to search for 965 * 966 * ilookup() uses ifind_fast() to search for the inode @ino in the inode cache. 967 * This is for file systems where the inode number is sufficient for unique 968 * identification of an inode. 969 * 970 * If the inode is in the cache, the inode is returned with an incremented 971 * reference count. 972 * 973 * Otherwise NULL is returned. 974 */ 975 struct inode *ilookup(struct super_block *sb, unsigned long ino) 976 { 977 struct hlist_head *head = inode_hashtable + hash(sb, ino); 978 979 return ifind_fast(sb, head, ino); 980 } 981 EXPORT_SYMBOL(ilookup); 982 983 /** 984 * iget5_locked - obtain an inode from a mounted file system 985 * @sb: super block of file system 986 * @hashval: hash value (usually inode number) to get 987 * @test: callback used for comparisons between inodes 988 * @set: callback used to initialize a new struct inode 989 * @data: opaque data pointer to pass to @test and @set 990 * 991 * iget5_locked() uses ifind() to search for the inode specified by @hashval 992 * and @data in the inode cache and if present it is returned with an increased 993 * reference count. This is a generalized version of iget_locked() for file 994 * systems where the inode number is not sufficient for unique identification 995 * of an inode. 996 * 997 * If the inode is not in cache, get_new_inode() is called to allocate a new 998 * inode and this is returned locked, hashed, and with the I_NEW flag set. The 999 * file system gets to fill it in before unlocking it via unlock_new_inode(). 1000 * 1001 * Note both @test and @set are called with the inode_lock held, so can't sleep. 1002 */ 1003 struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, 1004 int (*test)(struct inode *, void *), 1005 int (*set)(struct inode *, void *), void *data) 1006 { 1007 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1008 struct inode *inode; 1009 1010 inode = ifind(sb, head, test, data, 1); 1011 if (inode) 1012 return inode; 1013 /* 1014 * get_new_inode() will do the right thing, re-trying the search 1015 * in case it had to block at any point. 1016 */ 1017 return get_new_inode(sb, head, test, set, data); 1018 } 1019 EXPORT_SYMBOL(iget5_locked); 1020 1021 /** 1022 * iget_locked - obtain an inode from a mounted file system 1023 * @sb: super block of file system 1024 * @ino: inode number to get 1025 * 1026 * iget_locked() uses ifind_fast() to search for the inode specified by @ino in 1027 * the inode cache and if present it is returned with an increased reference 1028 * count. This is for file systems where the inode number is sufficient for 1029 * unique identification of an inode. 1030 * 1031 * If the inode is not in cache, get_new_inode_fast() is called to allocate a 1032 * new inode and this is returned locked, hashed, and with the I_NEW flag set. 1033 * The file system gets to fill it in before unlocking it via 1034 * unlock_new_inode(). 1035 */ 1036 struct inode *iget_locked(struct super_block *sb, unsigned long ino) 1037 { 1038 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1039 struct inode *inode; 1040 1041 inode = ifind_fast(sb, head, ino); 1042 if (inode) 1043 return inode; 1044 /* 1045 * get_new_inode_fast() will do the right thing, re-trying the search 1046 * in case it had to block at any point. 1047 */ 1048 return get_new_inode_fast(sb, head, ino); 1049 } 1050 EXPORT_SYMBOL(iget_locked); 1051 1052 int insert_inode_locked(struct inode *inode) 1053 { 1054 struct super_block *sb = inode->i_sb; 1055 ino_t ino = inode->i_ino; 1056 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1057 1058 inode->i_state |= I_LOCK|I_NEW; 1059 while (1) { 1060 struct hlist_node *node; 1061 struct inode *old = NULL; 1062 spin_lock(&inode_lock); 1063 hlist_for_each_entry(old, node, head, i_hash) { 1064 if (old->i_ino != ino) 1065 continue; 1066 if (old->i_sb != sb) 1067 continue; 1068 if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) 1069 continue; 1070 break; 1071 } 1072 if (likely(!node)) { 1073 hlist_add_head(&inode->i_hash, head); 1074 spin_unlock(&inode_lock); 1075 return 0; 1076 } 1077 __iget(old); 1078 spin_unlock(&inode_lock); 1079 wait_on_inode(old); 1080 if (unlikely(!hlist_unhashed(&old->i_hash))) { 1081 iput(old); 1082 return -EBUSY; 1083 } 1084 iput(old); 1085 } 1086 } 1087 EXPORT_SYMBOL(insert_inode_locked); 1088 1089 int insert_inode_locked4(struct inode *inode, unsigned long hashval, 1090 int (*test)(struct inode *, void *), void *data) 1091 { 1092 struct super_block *sb = inode->i_sb; 1093 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1094 1095 inode->i_state |= I_LOCK|I_NEW; 1096 1097 while (1) { 1098 struct hlist_node *node; 1099 struct inode *old = NULL; 1100 1101 spin_lock(&inode_lock); 1102 hlist_for_each_entry(old, node, head, i_hash) { 1103 if (old->i_sb != sb) 1104 continue; 1105 if (!test(old, data)) 1106 continue; 1107 if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) 1108 continue; 1109 break; 1110 } 1111 if (likely(!node)) { 1112 hlist_add_head(&inode->i_hash, head); 1113 spin_unlock(&inode_lock); 1114 return 0; 1115 } 1116 __iget(old); 1117 spin_unlock(&inode_lock); 1118 wait_on_inode(old); 1119 if (unlikely(!hlist_unhashed(&old->i_hash))) { 1120 iput(old); 1121 return -EBUSY; 1122 } 1123 iput(old); 1124 } 1125 } 1126 EXPORT_SYMBOL(insert_inode_locked4); 1127 1128 /** 1129 * __insert_inode_hash - hash an inode 1130 * @inode: unhashed inode 1131 * @hashval: unsigned long value used to locate this object in the 1132 * inode_hashtable. 1133 * 1134 * Add an inode to the inode hash for this superblock. 1135 */ 1136 void __insert_inode_hash(struct inode *inode, unsigned long hashval) 1137 { 1138 struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval); 1139 spin_lock(&inode_lock); 1140 hlist_add_head(&inode->i_hash, head); 1141 spin_unlock(&inode_lock); 1142 } 1143 EXPORT_SYMBOL(__insert_inode_hash); 1144 1145 /** 1146 * remove_inode_hash - remove an inode from the hash 1147 * @inode: inode to unhash 1148 * 1149 * Remove an inode from the superblock. 1150 */ 1151 void remove_inode_hash(struct inode *inode) 1152 { 1153 spin_lock(&inode_lock); 1154 hlist_del_init(&inode->i_hash); 1155 spin_unlock(&inode_lock); 1156 } 1157 EXPORT_SYMBOL(remove_inode_hash); 1158 1159 /* 1160 * Tell the filesystem that this inode is no longer of any interest and should 1161 * be completely destroyed. 1162 * 1163 * We leave the inode in the inode hash table until *after* the filesystem's 1164 * ->delete_inode completes. This ensures that an iget (such as nfsd might 1165 * instigate) will always find up-to-date information either in the hash or on 1166 * disk. 1167 * 1168 * I_FREEING is set so that no-one will take a new reference to the inode while 1169 * it is being deleted. 1170 */ 1171 void generic_delete_inode(struct inode *inode) 1172 { 1173 const struct super_operations *op = inode->i_sb->s_op; 1174 1175 list_del_init(&inode->i_list); 1176 list_del_init(&inode->i_sb_list); 1177 WARN_ON(inode->i_state & I_NEW); 1178 inode->i_state |= I_FREEING; 1179 inodes_stat.nr_inodes--; 1180 spin_unlock(&inode_lock); 1181 1182 security_inode_delete(inode); 1183 1184 if (op->delete_inode) { 1185 void (*delete)(struct inode *) = op->delete_inode; 1186 if (!is_bad_inode(inode)) 1187 vfs_dq_init(inode); 1188 /* Filesystems implementing their own 1189 * s_op->delete_inode are required to call 1190 * truncate_inode_pages and clear_inode() 1191 * internally */ 1192 delete(inode); 1193 } else { 1194 truncate_inode_pages(&inode->i_data, 0); 1195 clear_inode(inode); 1196 } 1197 spin_lock(&inode_lock); 1198 hlist_del_init(&inode->i_hash); 1199 spin_unlock(&inode_lock); 1200 wake_up_inode(inode); 1201 BUG_ON(inode->i_state != I_CLEAR); 1202 destroy_inode(inode); 1203 } 1204 EXPORT_SYMBOL(generic_delete_inode); 1205 1206 static void generic_forget_inode(struct inode *inode) 1207 { 1208 struct super_block *sb = inode->i_sb; 1209 1210 if (!hlist_unhashed(&inode->i_hash)) { 1211 if (!(inode->i_state & (I_DIRTY|I_SYNC))) 1212 list_move(&inode->i_list, &inode_unused); 1213 inodes_stat.nr_unused++; 1214 if (sb->s_flags & MS_ACTIVE) { 1215 spin_unlock(&inode_lock); 1216 return; 1217 } 1218 WARN_ON(inode->i_state & I_NEW); 1219 inode->i_state |= I_WILL_FREE; 1220 spin_unlock(&inode_lock); 1221 write_inode_now(inode, 1); 1222 spin_lock(&inode_lock); 1223 WARN_ON(inode->i_state & I_NEW); 1224 inode->i_state &= ~I_WILL_FREE; 1225 inodes_stat.nr_unused--; 1226 hlist_del_init(&inode->i_hash); 1227 } 1228 list_del_init(&inode->i_list); 1229 list_del_init(&inode->i_sb_list); 1230 WARN_ON(inode->i_state & I_NEW); 1231 inode->i_state |= I_FREEING; 1232 inodes_stat.nr_inodes--; 1233 spin_unlock(&inode_lock); 1234 if (inode->i_data.nrpages) 1235 truncate_inode_pages(&inode->i_data, 0); 1236 clear_inode(inode); 1237 wake_up_inode(inode); 1238 destroy_inode(inode); 1239 } 1240 1241 /* 1242 * Normal UNIX filesystem behaviour: delete the 1243 * inode when the usage count drops to zero, and 1244 * i_nlink is zero. 1245 */ 1246 void generic_drop_inode(struct inode *inode) 1247 { 1248 if (!inode->i_nlink) 1249 generic_delete_inode(inode); 1250 else 1251 generic_forget_inode(inode); 1252 } 1253 EXPORT_SYMBOL_GPL(generic_drop_inode); 1254 1255 /* 1256 * Called when we're dropping the last reference 1257 * to an inode. 1258 * 1259 * Call the FS "drop()" function, defaulting to 1260 * the legacy UNIX filesystem behaviour.. 1261 * 1262 * NOTE! NOTE! NOTE! We're called with the inode lock 1263 * held, and the drop function is supposed to release 1264 * the lock! 1265 */ 1266 static inline void iput_final(struct inode *inode) 1267 { 1268 const struct super_operations *op = inode->i_sb->s_op; 1269 void (*drop)(struct inode *) = generic_drop_inode; 1270 1271 if (op && op->drop_inode) 1272 drop = op->drop_inode; 1273 drop(inode); 1274 } 1275 1276 /** 1277 * iput - put an inode 1278 * @inode: inode to put 1279 * 1280 * Puts an inode, dropping its usage count. If the inode use count hits 1281 * zero, the inode is then freed and may also be destroyed. 1282 * 1283 * Consequently, iput() can sleep. 1284 */ 1285 void iput(struct inode *inode) 1286 { 1287 if (inode) { 1288 BUG_ON(inode->i_state == I_CLEAR); 1289 1290 if (atomic_dec_and_lock(&inode->i_count, &inode_lock)) 1291 iput_final(inode); 1292 } 1293 } 1294 EXPORT_SYMBOL(iput); 1295 1296 /** 1297 * bmap - find a block number in a file 1298 * @inode: inode of file 1299 * @block: block to find 1300 * 1301 * Returns the block number on the device holding the inode that 1302 * is the disk block number for the block of the file requested. 1303 * That is, asked for block 4 of inode 1 the function will return the 1304 * disk block relative to the disk start that holds that block of the 1305 * file. 1306 */ 1307 sector_t bmap(struct inode *inode, sector_t block) 1308 { 1309 sector_t res = 0; 1310 if (inode->i_mapping->a_ops->bmap) 1311 res = inode->i_mapping->a_ops->bmap(inode->i_mapping, block); 1312 return res; 1313 } 1314 EXPORT_SYMBOL(bmap); 1315 1316 /* 1317 * With relative atime, only update atime if the previous atime is 1318 * earlier than either the ctime or mtime or if at least a day has 1319 * passed since the last atime update. 1320 */ 1321 static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, 1322 struct timespec now) 1323 { 1324 1325 if (!(mnt->mnt_flags & MNT_RELATIME)) 1326 return 1; 1327 /* 1328 * Is mtime younger than atime? If yes, update atime: 1329 */ 1330 if (timespec_compare(&inode->i_mtime, &inode->i_atime) >= 0) 1331 return 1; 1332 /* 1333 * Is ctime younger than atime? If yes, update atime: 1334 */ 1335 if (timespec_compare(&inode->i_ctime, &inode->i_atime) >= 0) 1336 return 1; 1337 1338 /* 1339 * Is the previous atime value older than a day? If yes, 1340 * update atime: 1341 */ 1342 if ((long)(now.tv_sec - inode->i_atime.tv_sec) >= 24*60*60) 1343 return 1; 1344 /* 1345 * Good, we can skip the atime update: 1346 */ 1347 return 0; 1348 } 1349 1350 /** 1351 * touch_atime - update the access time 1352 * @mnt: mount the inode is accessed on 1353 * @dentry: dentry accessed 1354 * 1355 * Update the accessed time on an inode and mark it for writeback. 1356 * This function automatically handles read only file systems and media, 1357 * as well as the "noatime" flag and inode specific "noatime" markers. 1358 */ 1359 void touch_atime(struct vfsmount *mnt, struct dentry *dentry) 1360 { 1361 struct inode *inode = dentry->d_inode; 1362 struct timespec now; 1363 1364 if (mnt_want_write(mnt)) 1365 return; 1366 if (inode->i_flags & S_NOATIME) 1367 goto out; 1368 if (IS_NOATIME(inode)) 1369 goto out; 1370 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) 1371 goto out; 1372 1373 if (mnt->mnt_flags & MNT_NOATIME) 1374 goto out; 1375 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) 1376 goto out; 1377 1378 now = current_fs_time(inode->i_sb); 1379 1380 if (!relatime_need_update(mnt, inode, now)) 1381 goto out; 1382 1383 if (timespec_equal(&inode->i_atime, &now)) 1384 goto out; 1385 1386 inode->i_atime = now; 1387 mark_inode_dirty_sync(inode); 1388 out: 1389 mnt_drop_write(mnt); 1390 } 1391 EXPORT_SYMBOL(touch_atime); 1392 1393 /** 1394 * file_update_time - update mtime and ctime time 1395 * @file: file accessed 1396 * 1397 * Update the mtime and ctime members of an inode and mark the inode 1398 * for writeback. Note that this function is meant exclusively for 1399 * usage in the file write path of filesystems, and filesystems may 1400 * choose to explicitly ignore update via this function with the 1401 * S_NOCTIME inode flag, e.g. for network filesystem where these 1402 * timestamps are handled by the server. 1403 */ 1404 1405 void file_update_time(struct file *file) 1406 { 1407 struct inode *inode = file->f_path.dentry->d_inode; 1408 struct timespec now; 1409 int sync_it = 0; 1410 int err; 1411 1412 if (IS_NOCMTIME(inode)) 1413 return; 1414 1415 err = mnt_want_write(file->f_path.mnt); 1416 if (err) 1417 return; 1418 1419 now = current_fs_time(inode->i_sb); 1420 if (!timespec_equal(&inode->i_mtime, &now)) { 1421 inode->i_mtime = now; 1422 sync_it = 1; 1423 } 1424 1425 if (!timespec_equal(&inode->i_ctime, &now)) { 1426 inode->i_ctime = now; 1427 sync_it = 1; 1428 } 1429 1430 if (IS_I_VERSION(inode)) { 1431 inode_inc_iversion(inode); 1432 sync_it = 1; 1433 } 1434 1435 if (sync_it) 1436 mark_inode_dirty_sync(inode); 1437 mnt_drop_write(file->f_path.mnt); 1438 } 1439 EXPORT_SYMBOL(file_update_time); 1440 1441 int inode_needs_sync(struct inode *inode) 1442 { 1443 if (IS_SYNC(inode)) 1444 return 1; 1445 if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) 1446 return 1; 1447 return 0; 1448 } 1449 EXPORT_SYMBOL(inode_needs_sync); 1450 1451 int inode_wait(void *word) 1452 { 1453 schedule(); 1454 return 0; 1455 } 1456 EXPORT_SYMBOL(inode_wait); 1457 1458 /* 1459 * If we try to find an inode in the inode hash while it is being 1460 * deleted, we have to wait until the filesystem completes its 1461 * deletion before reporting that it isn't found. This function waits 1462 * until the deletion _might_ have completed. Callers are responsible 1463 * to recheck inode state. 1464 * 1465 * It doesn't matter if I_LOCK is not set initially, a call to 1466 * wake_up_inode() after removing from the hash list will DTRT. 1467 * 1468 * This is called with inode_lock held. 1469 */ 1470 static void __wait_on_freeing_inode(struct inode *inode) 1471 { 1472 wait_queue_head_t *wq; 1473 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_LOCK); 1474 wq = bit_waitqueue(&inode->i_state, __I_LOCK); 1475 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 1476 spin_unlock(&inode_lock); 1477 schedule(); 1478 finish_wait(wq, &wait.wait); 1479 spin_lock(&inode_lock); 1480 } 1481 1482 static __initdata unsigned long ihash_entries; 1483 static int __init set_ihash_entries(char *str) 1484 { 1485 if (!str) 1486 return 0; 1487 ihash_entries = simple_strtoul(str, &str, 0); 1488 return 1; 1489 } 1490 __setup("ihash_entries=", set_ihash_entries); 1491 1492 /* 1493 * Initialize the waitqueues and inode hash table. 1494 */ 1495 void __init inode_init_early(void) 1496 { 1497 int loop; 1498 1499 /* If hashes are distributed across NUMA nodes, defer 1500 * hash allocation until vmalloc space is available. 1501 */ 1502 if (hashdist) 1503 return; 1504 1505 inode_hashtable = 1506 alloc_large_system_hash("Inode-cache", 1507 sizeof(struct hlist_head), 1508 ihash_entries, 1509 14, 1510 HASH_EARLY, 1511 &i_hash_shift, 1512 &i_hash_mask, 1513 0); 1514 1515 for (loop = 0; loop < (1 << i_hash_shift); loop++) 1516 INIT_HLIST_HEAD(&inode_hashtable[loop]); 1517 } 1518 1519 void __init inode_init(void) 1520 { 1521 int loop; 1522 1523 /* inode slab cache */ 1524 inode_cachep = kmem_cache_create("inode_cache", 1525 sizeof(struct inode), 1526 0, 1527 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| 1528 SLAB_MEM_SPREAD), 1529 init_once); 1530 register_shrinker(&icache_shrinker); 1531 1532 /* Hash may have been set up in inode_init_early */ 1533 if (!hashdist) 1534 return; 1535 1536 inode_hashtable = 1537 alloc_large_system_hash("Inode-cache", 1538 sizeof(struct hlist_head), 1539 ihash_entries, 1540 14, 1541 0, 1542 &i_hash_shift, 1543 &i_hash_mask, 1544 0); 1545 1546 for (loop = 0; loop < (1 << i_hash_shift); loop++) 1547 INIT_HLIST_HEAD(&inode_hashtable[loop]); 1548 } 1549 1550 void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) 1551 { 1552 inode->i_mode = mode; 1553 if (S_ISCHR(mode)) { 1554 inode->i_fop = &def_chr_fops; 1555 inode->i_rdev = rdev; 1556 } else if (S_ISBLK(mode)) { 1557 inode->i_fop = &def_blk_fops; 1558 inode->i_rdev = rdev; 1559 } else if (S_ISFIFO(mode)) 1560 inode->i_fop = &def_fifo_fops; 1561 else if (S_ISSOCK(mode)) 1562 inode->i_fop = &bad_sock_fops; 1563 else 1564 printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o)\n", 1565 mode); 1566 } 1567 EXPORT_SYMBOL(init_special_inode); 1568