1 /* 2 * Open file cache. 3 * 4 * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com> 5 */ 6 7 #include <linux/hash.h> 8 #include <linux/slab.h> 9 #include <linux/file.h> 10 #include <linux/sched.h> 11 #include <linux/list_lru.h> 12 #include <linux/fsnotify_backend.h> 13 #include <linux/fsnotify.h> 14 #include <linux/seq_file.h> 15 16 #include "vfs.h" 17 #include "nfsd.h" 18 #include "nfsfh.h" 19 #include "netns.h" 20 #include "filecache.h" 21 #include "trace.h" 22 23 #define NFSDDBG_FACILITY NFSDDBG_FH 24 25 /* FIXME: dynamically size this for the machine somehow? */ 26 #define NFSD_FILE_HASH_BITS 12 27 #define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS) 28 #define NFSD_LAUNDRETTE_DELAY (2 * HZ) 29 30 #define NFSD_FILE_SHUTDOWN (1) 31 #define NFSD_FILE_LRU_THRESHOLD (4096UL) 32 #define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2) 33 34 /* We only care about NFSD_MAY_READ/WRITE for this cache */ 35 #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) 36 37 struct nfsd_fcache_bucket { 38 struct hlist_head nfb_head; 39 spinlock_t nfb_lock; 40 unsigned int nfb_count; 41 unsigned int nfb_maxcount; 42 }; 43 44 static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); 45 46 struct nfsd_fcache_disposal { 47 struct list_head list; 48 struct work_struct work; 49 struct net *net; 50 spinlock_t lock; 51 struct list_head freeme; 52 struct rcu_head rcu; 53 }; 54 55 static struct workqueue_struct *nfsd_filecache_wq __read_mostly; 56 57 static struct kmem_cache *nfsd_file_slab; 58 static struct kmem_cache *nfsd_file_mark_slab; 59 static struct nfsd_fcache_bucket *nfsd_file_hashtbl; 60 static struct list_lru nfsd_file_lru; 61 static long nfsd_file_lru_flags; 62 static struct fsnotify_group *nfsd_file_fsnotify_group; 63 static atomic_long_t nfsd_filecache_count; 64 static struct delayed_work nfsd_filecache_laundrette; 65 static DEFINE_SPINLOCK(laundrette_lock); 66 static LIST_HEAD(laundrettes); 67 68 static void nfsd_file_gc(void); 69 70 static void 71 nfsd_file_schedule_laundrette(void) 72 { 73 long count = atomic_long_read(&nfsd_filecache_count); 74 75 if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags)) 76 return; 77 78 queue_delayed_work(system_wq, &nfsd_filecache_laundrette, 79 NFSD_LAUNDRETTE_DELAY); 80 } 81 82 static void 83 nfsd_file_slab_free(struct rcu_head *rcu) 84 { 85 struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu); 86 87 put_cred(nf->nf_cred); 88 kmem_cache_free(nfsd_file_slab, nf); 89 } 90 91 static void 92 nfsd_file_mark_free(struct fsnotify_mark *mark) 93 { 94 struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark, 95 nfm_mark); 96 97 kmem_cache_free(nfsd_file_mark_slab, nfm); 98 } 99 100 static struct nfsd_file_mark * 101 nfsd_file_mark_get(struct nfsd_file_mark *nfm) 102 { 103 if (!refcount_inc_not_zero(&nfm->nfm_ref)) 104 return NULL; 105 return nfm; 106 } 107 108 static void 109 nfsd_file_mark_put(struct nfsd_file_mark *nfm) 110 { 111 if (refcount_dec_and_test(&nfm->nfm_ref)) { 112 fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group); 113 fsnotify_put_mark(&nfm->nfm_mark); 114 } 115 } 116 117 static struct nfsd_file_mark * 118 nfsd_file_mark_find_or_create(struct nfsd_file *nf) 119 { 120 int err; 121 struct fsnotify_mark *mark; 122 struct nfsd_file_mark *nfm = NULL, *new; 123 struct inode *inode = nf->nf_inode; 124 125 do { 126 mutex_lock(&nfsd_file_fsnotify_group->mark_mutex); 127 mark = fsnotify_find_mark(&inode->i_fsnotify_marks, 128 nfsd_file_fsnotify_group); 129 if (mark) { 130 nfm = nfsd_file_mark_get(container_of(mark, 131 struct nfsd_file_mark, 132 nfm_mark)); 133 mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); 134 if (nfm) { 135 fsnotify_put_mark(mark); 136 break; 137 } 138 /* Avoid soft lockup race with nfsd_file_mark_put() */ 139 fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group); 140 fsnotify_put_mark(mark); 141 } else 142 mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); 143 144 /* allocate a new nfm */ 145 new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL); 146 if (!new) 147 return NULL; 148 fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group); 149 new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF; 150 refcount_set(&new->nfm_ref, 1); 151 152 err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0); 153 154 /* 155 * If the add was successful, then return the object. 156 * Otherwise, we need to put the reference we hold on the 157 * nfm_mark. The fsnotify code will take a reference and put 158 * it on failure, so we can't just free it directly. It's also 159 * not safe to call fsnotify_destroy_mark on it as the 160 * mark->group will be NULL. Thus, we can't let the nfm_ref 161 * counter drive the destruction at this point. 162 */ 163 if (likely(!err)) 164 nfm = new; 165 else 166 fsnotify_put_mark(&new->nfm_mark); 167 } while (unlikely(err == -EEXIST)); 168 169 return nfm; 170 } 171 172 static struct nfsd_file * 173 nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, 174 struct net *net) 175 { 176 struct nfsd_file *nf; 177 178 nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); 179 if (nf) { 180 INIT_HLIST_NODE(&nf->nf_node); 181 INIT_LIST_HEAD(&nf->nf_lru); 182 nf->nf_file = NULL; 183 nf->nf_cred = get_current_cred(); 184 nf->nf_net = net; 185 nf->nf_flags = 0; 186 nf->nf_inode = inode; 187 nf->nf_hashval = hashval; 188 refcount_set(&nf->nf_ref, 1); 189 nf->nf_may = may & NFSD_FILE_MAY_MASK; 190 if (may & NFSD_MAY_NOT_BREAK_LEASE) { 191 if (may & NFSD_MAY_WRITE) 192 __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags); 193 if (may & NFSD_MAY_READ) 194 __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); 195 } 196 nf->nf_mark = NULL; 197 init_rwsem(&nf->nf_rwsem); 198 trace_nfsd_file_alloc(nf); 199 } 200 return nf; 201 } 202 203 static bool 204 nfsd_file_free(struct nfsd_file *nf) 205 { 206 bool flush = false; 207 208 trace_nfsd_file_put_final(nf); 209 if (nf->nf_mark) 210 nfsd_file_mark_put(nf->nf_mark); 211 if (nf->nf_file) { 212 get_file(nf->nf_file); 213 filp_close(nf->nf_file, NULL); 214 fput(nf->nf_file); 215 flush = true; 216 } 217 call_rcu(&nf->nf_rcu, nfsd_file_slab_free); 218 return flush; 219 } 220 221 static bool 222 nfsd_file_check_writeback(struct nfsd_file *nf) 223 { 224 struct file *file = nf->nf_file; 225 struct address_space *mapping; 226 227 if (!file || !(file->f_mode & FMODE_WRITE)) 228 return false; 229 mapping = file->f_mapping; 230 return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) || 231 mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); 232 } 233 234 static int 235 nfsd_file_check_write_error(struct nfsd_file *nf) 236 { 237 struct file *file = nf->nf_file; 238 239 if (!file || !(file->f_mode & FMODE_WRITE)) 240 return 0; 241 return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); 242 } 243 244 static void 245 nfsd_file_do_unhash(struct nfsd_file *nf) 246 { 247 lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 248 249 trace_nfsd_file_unhash(nf); 250 251 if (nfsd_file_check_write_error(nf)) 252 nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id)); 253 --nfsd_file_hashtbl[nf->nf_hashval].nfb_count; 254 hlist_del_rcu(&nf->nf_node); 255 atomic_long_dec(&nfsd_filecache_count); 256 } 257 258 static bool 259 nfsd_file_unhash(struct nfsd_file *nf) 260 { 261 if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 262 nfsd_file_do_unhash(nf); 263 if (!list_empty(&nf->nf_lru)) 264 list_lru_del(&nfsd_file_lru, &nf->nf_lru); 265 return true; 266 } 267 return false; 268 } 269 270 /* 271 * Return true if the file was unhashed. 272 */ 273 static bool 274 nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose) 275 { 276 lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 277 278 trace_nfsd_file_unhash_and_release_locked(nf); 279 if (!nfsd_file_unhash(nf)) 280 return false; 281 /* keep final reference for nfsd_file_lru_dispose */ 282 if (refcount_dec_not_one(&nf->nf_ref)) 283 return true; 284 285 list_add(&nf->nf_lru, dispose); 286 return true; 287 } 288 289 static void 290 nfsd_file_put_noref(struct nfsd_file *nf) 291 { 292 trace_nfsd_file_put(nf); 293 294 if (refcount_dec_and_test(&nf->nf_ref)) { 295 WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); 296 nfsd_file_free(nf); 297 } 298 } 299 300 void 301 nfsd_file_put(struct nfsd_file *nf) 302 { 303 bool is_hashed; 304 305 set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); 306 if (refcount_read(&nf->nf_ref) > 2 || !nf->nf_file) { 307 nfsd_file_put_noref(nf); 308 return; 309 } 310 311 filemap_flush(nf->nf_file->f_mapping); 312 is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0; 313 nfsd_file_put_noref(nf); 314 if (is_hashed) 315 nfsd_file_schedule_laundrette(); 316 if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) 317 nfsd_file_gc(); 318 } 319 320 struct nfsd_file * 321 nfsd_file_get(struct nfsd_file *nf) 322 { 323 if (likely(refcount_inc_not_zero(&nf->nf_ref))) 324 return nf; 325 return NULL; 326 } 327 328 static void 329 nfsd_file_dispose_list(struct list_head *dispose) 330 { 331 struct nfsd_file *nf; 332 333 while(!list_empty(dispose)) { 334 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 335 list_del(&nf->nf_lru); 336 nfsd_file_put_noref(nf); 337 } 338 } 339 340 static void 341 nfsd_file_dispose_list_sync(struct list_head *dispose) 342 { 343 bool flush = false; 344 struct nfsd_file *nf; 345 346 while(!list_empty(dispose)) { 347 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 348 list_del(&nf->nf_lru); 349 if (!refcount_dec_and_test(&nf->nf_ref)) 350 continue; 351 if (nfsd_file_free(nf)) 352 flush = true; 353 } 354 if (flush) 355 flush_delayed_fput(); 356 } 357 358 static void 359 nfsd_file_list_remove_disposal(struct list_head *dst, 360 struct nfsd_fcache_disposal *l) 361 { 362 spin_lock(&l->lock); 363 list_splice_init(&l->freeme, dst); 364 spin_unlock(&l->lock); 365 } 366 367 static void 368 nfsd_file_list_add_disposal(struct list_head *files, struct net *net) 369 { 370 struct nfsd_fcache_disposal *l; 371 372 rcu_read_lock(); 373 list_for_each_entry_rcu(l, &laundrettes, list) { 374 if (l->net == net) { 375 spin_lock(&l->lock); 376 list_splice_tail_init(files, &l->freeme); 377 spin_unlock(&l->lock); 378 queue_work(nfsd_filecache_wq, &l->work); 379 break; 380 } 381 } 382 rcu_read_unlock(); 383 } 384 385 static void 386 nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src, 387 struct net *net) 388 { 389 struct nfsd_file *nf, *tmp; 390 391 list_for_each_entry_safe(nf, tmp, src, nf_lru) { 392 if (nf->nf_net == net) 393 list_move_tail(&nf->nf_lru, dst); 394 } 395 } 396 397 static void 398 nfsd_file_dispose_list_delayed(struct list_head *dispose) 399 { 400 LIST_HEAD(list); 401 struct nfsd_file *nf; 402 403 while(!list_empty(dispose)) { 404 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 405 nfsd_file_list_add_pernet(&list, dispose, nf->nf_net); 406 nfsd_file_list_add_disposal(&list, nf->nf_net); 407 } 408 } 409 410 /* 411 * Note this can deadlock with nfsd_file_cache_purge. 412 */ 413 static enum lru_status 414 nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, 415 spinlock_t *lock, void *arg) 416 __releases(lock) 417 __acquires(lock) 418 { 419 struct list_head *head = arg; 420 struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); 421 422 /* 423 * Do a lockless refcount check. The hashtable holds one reference, so 424 * we look to see if anything else has a reference, or if any have 425 * been put since the shrinker last ran. Those don't get unhashed and 426 * released. 427 * 428 * Note that in the put path, we set the flag and then decrement the 429 * counter. Here we check the counter and then test and clear the flag. 430 * That order is deliberate to ensure that we can do this locklessly. 431 */ 432 if (refcount_read(&nf->nf_ref) > 1) 433 goto out_skip; 434 435 /* 436 * Don't throw out files that are still undergoing I/O or 437 * that have uncleared errors pending. 438 */ 439 if (nfsd_file_check_writeback(nf)) 440 goto out_skip; 441 442 if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) 443 goto out_skip; 444 445 if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) 446 goto out_skip; 447 448 list_lru_isolate_move(lru, &nf->nf_lru, head); 449 return LRU_REMOVED; 450 out_skip: 451 return LRU_SKIP; 452 } 453 454 static unsigned long 455 nfsd_file_lru_walk_list(struct shrink_control *sc) 456 { 457 LIST_HEAD(head); 458 struct nfsd_file *nf; 459 unsigned long ret; 460 461 if (sc) 462 ret = list_lru_shrink_walk(&nfsd_file_lru, sc, 463 nfsd_file_lru_cb, &head); 464 else 465 ret = list_lru_walk(&nfsd_file_lru, 466 nfsd_file_lru_cb, 467 &head, LONG_MAX); 468 list_for_each_entry(nf, &head, nf_lru) { 469 spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 470 nfsd_file_do_unhash(nf); 471 spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 472 } 473 nfsd_file_dispose_list_delayed(&head); 474 return ret; 475 } 476 477 static void 478 nfsd_file_gc(void) 479 { 480 nfsd_file_lru_walk_list(NULL); 481 } 482 483 static void 484 nfsd_file_gc_worker(struct work_struct *work) 485 { 486 nfsd_file_gc(); 487 nfsd_file_schedule_laundrette(); 488 } 489 490 static unsigned long 491 nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) 492 { 493 return list_lru_count(&nfsd_file_lru); 494 } 495 496 static unsigned long 497 nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) 498 { 499 return nfsd_file_lru_walk_list(sc); 500 } 501 502 static struct shrinker nfsd_file_shrinker = { 503 .scan_objects = nfsd_file_lru_scan, 504 .count_objects = nfsd_file_lru_count, 505 .seeks = 1, 506 }; 507 508 static void 509 __nfsd_file_close_inode(struct inode *inode, unsigned int hashval, 510 struct list_head *dispose) 511 { 512 struct nfsd_file *nf; 513 struct hlist_node *tmp; 514 515 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 516 hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) { 517 if (inode == nf->nf_inode) 518 nfsd_file_unhash_and_release_locked(nf, dispose); 519 } 520 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 521 } 522 523 /** 524 * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file 525 * @inode: inode of the file to attempt to remove 526 * 527 * Walk the whole hash bucket, looking for any files that correspond to "inode". 528 * If any do, then unhash them and put the hashtable reference to them and 529 * destroy any that had their last reference put. Also ensure that any of the 530 * fputs also have their final __fput done as well. 531 */ 532 void 533 nfsd_file_close_inode_sync(struct inode *inode) 534 { 535 unsigned int hashval = (unsigned int)hash_long(inode->i_ino, 536 NFSD_FILE_HASH_BITS); 537 LIST_HEAD(dispose); 538 539 __nfsd_file_close_inode(inode, hashval, &dispose); 540 trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose)); 541 nfsd_file_dispose_list_sync(&dispose); 542 } 543 544 /** 545 * nfsd_file_close_inode - attempt a delayed close of a nfsd_file 546 * @inode: inode of the file to attempt to remove 547 * 548 * Walk the whole hash bucket, looking for any files that correspond to "inode". 549 * If any do, then unhash them and put the hashtable reference to them and 550 * destroy any that had their last reference put. 551 */ 552 static void 553 nfsd_file_close_inode(struct inode *inode) 554 { 555 unsigned int hashval = (unsigned int)hash_long(inode->i_ino, 556 NFSD_FILE_HASH_BITS); 557 LIST_HEAD(dispose); 558 559 __nfsd_file_close_inode(inode, hashval, &dispose); 560 trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose)); 561 nfsd_file_dispose_list_delayed(&dispose); 562 } 563 564 /** 565 * nfsd_file_delayed_close - close unused nfsd_files 566 * @work: dummy 567 * 568 * Walk the LRU list and close any entries that have not been used since 569 * the last scan. 570 * 571 * Note this can deadlock with nfsd_file_cache_purge. 572 */ 573 static void 574 nfsd_file_delayed_close(struct work_struct *work) 575 { 576 LIST_HEAD(head); 577 struct nfsd_fcache_disposal *l = container_of(work, 578 struct nfsd_fcache_disposal, work); 579 580 nfsd_file_list_remove_disposal(&head, l); 581 nfsd_file_dispose_list(&head); 582 } 583 584 static int 585 nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, 586 void *data) 587 { 588 struct file_lock *fl = data; 589 590 /* Only close files for F_SETLEASE leases */ 591 if (fl->fl_flags & FL_LEASE) 592 nfsd_file_close_inode_sync(file_inode(fl->fl_file)); 593 return 0; 594 } 595 596 static struct notifier_block nfsd_file_lease_notifier = { 597 .notifier_call = nfsd_file_lease_notifier_call, 598 }; 599 600 static int 601 nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask, 602 struct inode *inode, struct inode *dir, 603 const struct qstr *name, u32 cookie) 604 { 605 trace_nfsd_file_fsnotify_handle_event(inode, mask); 606 607 /* Should be no marks on non-regular files */ 608 if (!S_ISREG(inode->i_mode)) { 609 WARN_ON_ONCE(1); 610 return 0; 611 } 612 613 /* don't close files if this was not the last link */ 614 if (mask & FS_ATTRIB) { 615 if (inode->i_nlink) 616 return 0; 617 } 618 619 nfsd_file_close_inode(inode); 620 return 0; 621 } 622 623 624 static const struct fsnotify_ops nfsd_file_fsnotify_ops = { 625 .handle_inode_event = nfsd_file_fsnotify_handle_event, 626 .free_mark = nfsd_file_mark_free, 627 }; 628 629 int 630 nfsd_file_cache_init(void) 631 { 632 int ret = -ENOMEM; 633 unsigned int i; 634 635 clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); 636 637 if (nfsd_file_hashtbl) 638 return 0; 639 640 nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0); 641 if (!nfsd_filecache_wq) 642 goto out; 643 644 nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE, 645 sizeof(*nfsd_file_hashtbl), GFP_KERNEL); 646 if (!nfsd_file_hashtbl) { 647 pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n"); 648 goto out_err; 649 } 650 651 nfsd_file_slab = kmem_cache_create("nfsd_file", 652 sizeof(struct nfsd_file), 0, 0, NULL); 653 if (!nfsd_file_slab) { 654 pr_err("nfsd: unable to create nfsd_file_slab\n"); 655 goto out_err; 656 } 657 658 nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark", 659 sizeof(struct nfsd_file_mark), 0, 0, NULL); 660 if (!nfsd_file_mark_slab) { 661 pr_err("nfsd: unable to create nfsd_file_mark_slab\n"); 662 goto out_err; 663 } 664 665 666 ret = list_lru_init(&nfsd_file_lru); 667 if (ret) { 668 pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret); 669 goto out_err; 670 } 671 672 ret = register_shrinker(&nfsd_file_shrinker); 673 if (ret) { 674 pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret); 675 goto out_lru; 676 } 677 678 ret = lease_register_notifier(&nfsd_file_lease_notifier); 679 if (ret) { 680 pr_err("nfsd: unable to register lease notifier: %d\n", ret); 681 goto out_shrinker; 682 } 683 684 nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops); 685 if (IS_ERR(nfsd_file_fsnotify_group)) { 686 pr_err("nfsd: unable to create fsnotify group: %ld\n", 687 PTR_ERR(nfsd_file_fsnotify_group)); 688 ret = PTR_ERR(nfsd_file_fsnotify_group); 689 nfsd_file_fsnotify_group = NULL; 690 goto out_notifier; 691 } 692 693 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 694 INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head); 695 spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock); 696 } 697 698 INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker); 699 out: 700 return ret; 701 out_notifier: 702 lease_unregister_notifier(&nfsd_file_lease_notifier); 703 out_shrinker: 704 unregister_shrinker(&nfsd_file_shrinker); 705 out_lru: 706 list_lru_destroy(&nfsd_file_lru); 707 out_err: 708 kmem_cache_destroy(nfsd_file_slab); 709 nfsd_file_slab = NULL; 710 kmem_cache_destroy(nfsd_file_mark_slab); 711 nfsd_file_mark_slab = NULL; 712 kfree(nfsd_file_hashtbl); 713 nfsd_file_hashtbl = NULL; 714 destroy_workqueue(nfsd_filecache_wq); 715 nfsd_filecache_wq = NULL; 716 goto out; 717 } 718 719 /* 720 * Note this can deadlock with nfsd_file_lru_cb. 721 */ 722 void 723 nfsd_file_cache_purge(struct net *net) 724 { 725 unsigned int i; 726 struct nfsd_file *nf; 727 struct hlist_node *next; 728 LIST_HEAD(dispose); 729 bool del; 730 731 if (!nfsd_file_hashtbl) 732 return; 733 734 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 735 struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i]; 736 737 spin_lock(&nfb->nfb_lock); 738 hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) { 739 if (net && nf->nf_net != net) 740 continue; 741 del = nfsd_file_unhash_and_release_locked(nf, &dispose); 742 743 /* 744 * Deadlock detected! Something marked this entry as 745 * unhased, but hasn't removed it from the hash list. 746 */ 747 WARN_ON_ONCE(!del); 748 } 749 spin_unlock(&nfb->nfb_lock); 750 nfsd_file_dispose_list(&dispose); 751 } 752 } 753 754 static struct nfsd_fcache_disposal * 755 nfsd_alloc_fcache_disposal(struct net *net) 756 { 757 struct nfsd_fcache_disposal *l; 758 759 l = kmalloc(sizeof(*l), GFP_KERNEL); 760 if (!l) 761 return NULL; 762 INIT_WORK(&l->work, nfsd_file_delayed_close); 763 l->net = net; 764 spin_lock_init(&l->lock); 765 INIT_LIST_HEAD(&l->freeme); 766 return l; 767 } 768 769 static void 770 nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l) 771 { 772 rcu_assign_pointer(l->net, NULL); 773 cancel_work_sync(&l->work); 774 nfsd_file_dispose_list(&l->freeme); 775 kfree_rcu(l, rcu); 776 } 777 778 static void 779 nfsd_add_fcache_disposal(struct nfsd_fcache_disposal *l) 780 { 781 spin_lock(&laundrette_lock); 782 list_add_tail_rcu(&l->list, &laundrettes); 783 spin_unlock(&laundrette_lock); 784 } 785 786 static void 787 nfsd_del_fcache_disposal(struct nfsd_fcache_disposal *l) 788 { 789 spin_lock(&laundrette_lock); 790 list_del_rcu(&l->list); 791 spin_unlock(&laundrette_lock); 792 } 793 794 static int 795 nfsd_alloc_fcache_disposal_net(struct net *net) 796 { 797 struct nfsd_fcache_disposal *l; 798 799 l = nfsd_alloc_fcache_disposal(net); 800 if (!l) 801 return -ENOMEM; 802 nfsd_add_fcache_disposal(l); 803 return 0; 804 } 805 806 static void 807 nfsd_free_fcache_disposal_net(struct net *net) 808 { 809 struct nfsd_fcache_disposal *l; 810 811 rcu_read_lock(); 812 list_for_each_entry_rcu(l, &laundrettes, list) { 813 if (l->net != net) 814 continue; 815 nfsd_del_fcache_disposal(l); 816 rcu_read_unlock(); 817 nfsd_free_fcache_disposal(l); 818 return; 819 } 820 rcu_read_unlock(); 821 } 822 823 int 824 nfsd_file_cache_start_net(struct net *net) 825 { 826 return nfsd_alloc_fcache_disposal_net(net); 827 } 828 829 void 830 nfsd_file_cache_shutdown_net(struct net *net) 831 { 832 nfsd_file_cache_purge(net); 833 nfsd_free_fcache_disposal_net(net); 834 } 835 836 void 837 nfsd_file_cache_shutdown(void) 838 { 839 set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); 840 841 lease_unregister_notifier(&nfsd_file_lease_notifier); 842 unregister_shrinker(&nfsd_file_shrinker); 843 /* 844 * make sure all callers of nfsd_file_lru_cb are done before 845 * calling nfsd_file_cache_purge 846 */ 847 cancel_delayed_work_sync(&nfsd_filecache_laundrette); 848 nfsd_file_cache_purge(NULL); 849 list_lru_destroy(&nfsd_file_lru); 850 rcu_barrier(); 851 fsnotify_put_group(nfsd_file_fsnotify_group); 852 nfsd_file_fsnotify_group = NULL; 853 kmem_cache_destroy(nfsd_file_slab); 854 nfsd_file_slab = NULL; 855 fsnotify_wait_marks_destroyed(); 856 kmem_cache_destroy(nfsd_file_mark_slab); 857 nfsd_file_mark_slab = NULL; 858 kfree(nfsd_file_hashtbl); 859 nfsd_file_hashtbl = NULL; 860 destroy_workqueue(nfsd_filecache_wq); 861 nfsd_filecache_wq = NULL; 862 } 863 864 static bool 865 nfsd_match_cred(const struct cred *c1, const struct cred *c2) 866 { 867 int i; 868 869 if (!uid_eq(c1->fsuid, c2->fsuid)) 870 return false; 871 if (!gid_eq(c1->fsgid, c2->fsgid)) 872 return false; 873 if (c1->group_info == NULL || c2->group_info == NULL) 874 return c1->group_info == c2->group_info; 875 if (c1->group_info->ngroups != c2->group_info->ngroups) 876 return false; 877 for (i = 0; i < c1->group_info->ngroups; i++) { 878 if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) 879 return false; 880 } 881 return true; 882 } 883 884 static struct nfsd_file * 885 nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, 886 unsigned int hashval, struct net *net) 887 { 888 struct nfsd_file *nf; 889 unsigned char need = may_flags & NFSD_FILE_MAY_MASK; 890 891 hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, 892 nf_node, lockdep_is_held(&nfsd_file_hashtbl[hashval].nfb_lock)) { 893 if (nf->nf_may != need) 894 continue; 895 if (nf->nf_inode != inode) 896 continue; 897 if (nf->nf_net != net) 898 continue; 899 if (!nfsd_match_cred(nf->nf_cred, current_cred())) 900 continue; 901 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) 902 continue; 903 if (nfsd_file_get(nf) != NULL) 904 return nf; 905 } 906 return NULL; 907 } 908 909 /** 910 * nfsd_file_is_cached - are there any cached open files for this fh? 911 * @inode: inode of the file to check 912 * 913 * Scan the hashtable for open files that match this fh. Returns true if there 914 * are any, and false if not. 915 */ 916 bool 917 nfsd_file_is_cached(struct inode *inode) 918 { 919 bool ret = false; 920 struct nfsd_file *nf; 921 unsigned int hashval; 922 923 hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); 924 925 rcu_read_lock(); 926 hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, 927 nf_node) { 928 if (inode == nf->nf_inode) { 929 ret = true; 930 break; 931 } 932 } 933 rcu_read_unlock(); 934 trace_nfsd_file_is_cached(inode, hashval, (int)ret); 935 return ret; 936 } 937 938 __be32 939 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 940 unsigned int may_flags, struct nfsd_file **pnf) 941 { 942 __be32 status; 943 struct net *net = SVC_NET(rqstp); 944 struct nfsd_file *nf, *new; 945 struct inode *inode; 946 unsigned int hashval; 947 bool retry = true; 948 949 /* FIXME: skip this if fh_dentry is already set? */ 950 status = fh_verify(rqstp, fhp, S_IFREG, 951 may_flags|NFSD_MAY_OWNER_OVERRIDE); 952 if (status != nfs_ok) 953 return status; 954 955 inode = d_inode(fhp->fh_dentry); 956 hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); 957 retry: 958 rcu_read_lock(); 959 nf = nfsd_file_find_locked(inode, may_flags, hashval, net); 960 rcu_read_unlock(); 961 if (nf) 962 goto wait_for_construction; 963 964 new = nfsd_file_alloc(inode, may_flags, hashval, net); 965 if (!new) { 966 trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, 967 NULL, nfserr_jukebox); 968 return nfserr_jukebox; 969 } 970 971 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 972 nf = nfsd_file_find_locked(inode, may_flags, hashval, net); 973 if (nf == NULL) 974 goto open_file; 975 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 976 nfsd_file_slab_free(&new->nf_rcu); 977 978 wait_for_construction: 979 wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); 980 981 /* Did construction of this file fail? */ 982 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 983 if (!retry) { 984 status = nfserr_jukebox; 985 goto out; 986 } 987 retry = false; 988 nfsd_file_put_noref(nf); 989 goto retry; 990 } 991 992 this_cpu_inc(nfsd_file_cache_hits); 993 994 if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) { 995 bool write = (may_flags & NFSD_MAY_WRITE); 996 997 if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) || 998 (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) { 999 status = nfserrno(nfsd_open_break_lease( 1000 file_inode(nf->nf_file), may_flags)); 1001 if (status == nfs_ok) { 1002 clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); 1003 if (write) 1004 clear_bit(NFSD_FILE_BREAK_WRITE, 1005 &nf->nf_flags); 1006 } 1007 } 1008 } 1009 out: 1010 if (status == nfs_ok) { 1011 *pnf = nf; 1012 } else { 1013 nfsd_file_put(nf); 1014 nf = NULL; 1015 } 1016 1017 trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status); 1018 return status; 1019 open_file: 1020 nf = new; 1021 /* Take reference for the hashtable */ 1022 refcount_inc(&nf->nf_ref); 1023 __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); 1024 __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); 1025 list_lru_add(&nfsd_file_lru, &nf->nf_lru); 1026 hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head); 1027 ++nfsd_file_hashtbl[hashval].nfb_count; 1028 nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, 1029 nfsd_file_hashtbl[hashval].nfb_count); 1030 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 1031 if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD) 1032 nfsd_file_gc(); 1033 1034 nf->nf_mark = nfsd_file_mark_find_or_create(nf); 1035 if (nf->nf_mark) 1036 status = nfsd_open_verified(rqstp, fhp, S_IFREG, 1037 may_flags, &nf->nf_file); 1038 else 1039 status = nfserr_jukebox; 1040 /* 1041 * If construction failed, or we raced with a call to unlink() 1042 * then unhash. 1043 */ 1044 if (status != nfs_ok || inode->i_nlink == 0) { 1045 bool do_free; 1046 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 1047 do_free = nfsd_file_unhash(nf); 1048 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 1049 if (do_free) 1050 nfsd_file_put_noref(nf); 1051 } 1052 clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); 1053 smp_mb__after_atomic(); 1054 wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); 1055 goto out; 1056 } 1057 1058 /* 1059 * Note that fields may be added, removed or reordered in the future. Programs 1060 * scraping this file for info should test the labels to ensure they're 1061 * getting the correct field. 1062 */ 1063 static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) 1064 { 1065 unsigned int i, count = 0, longest = 0; 1066 unsigned long hits = 0; 1067 1068 /* 1069 * No need for spinlocks here since we're not terribly interested in 1070 * accuracy. We do take the nfsd_mutex simply to ensure that we 1071 * don't end up racing with server shutdown 1072 */ 1073 mutex_lock(&nfsd_mutex); 1074 if (nfsd_file_hashtbl) { 1075 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 1076 count += nfsd_file_hashtbl[i].nfb_count; 1077 longest = max(longest, nfsd_file_hashtbl[i].nfb_count); 1078 } 1079 } 1080 mutex_unlock(&nfsd_mutex); 1081 1082 for_each_possible_cpu(i) 1083 hits += per_cpu(nfsd_file_cache_hits, i); 1084 1085 seq_printf(m, "total entries: %u\n", count); 1086 seq_printf(m, "longest chain: %u\n", longest); 1087 seq_printf(m, "cache hits: %lu\n", hits); 1088 return 0; 1089 } 1090 1091 int nfsd_file_cache_stats_open(struct inode *inode, struct file *file) 1092 { 1093 return single_open(file, nfsd_file_cache_stats_show, NULL); 1094 } 1095