1 /* 2 * Open file cache. 3 * 4 * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com> 5 */ 6 7 #include <linux/hash.h> 8 #include <linux/slab.h> 9 #include <linux/file.h> 10 #include <linux/sched.h> 11 #include <linux/list_lru.h> 12 #include <linux/fsnotify_backend.h> 13 #include <linux/fsnotify.h> 14 #include <linux/seq_file.h> 15 16 #include "vfs.h" 17 #include "nfsd.h" 18 #include "nfsfh.h" 19 #include "netns.h" 20 #include "filecache.h" 21 #include "trace.h" 22 23 #define NFSDDBG_FACILITY NFSDDBG_FH 24 25 /* FIXME: dynamically size this for the machine somehow? */ 26 #define NFSD_FILE_HASH_BITS 12 27 #define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS) 28 #define NFSD_LAUNDRETTE_DELAY (2 * HZ) 29 30 #define NFSD_FILE_SHUTDOWN (1) 31 #define NFSD_FILE_LRU_THRESHOLD (4096UL) 32 #define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2) 33 34 /* We only care about NFSD_MAY_READ/WRITE for this cache */ 35 #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) 36 37 struct nfsd_fcache_bucket { 38 struct hlist_head nfb_head; 39 spinlock_t nfb_lock; 40 unsigned int nfb_count; 41 unsigned int nfb_maxcount; 42 }; 43 44 static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); 45 46 struct nfsd_fcache_disposal { 47 struct list_head list; 48 struct work_struct work; 49 struct net *net; 50 spinlock_t lock; 51 struct list_head freeme; 52 struct rcu_head rcu; 53 }; 54 55 static struct workqueue_struct *nfsd_filecache_wq __read_mostly; 56 57 static struct kmem_cache *nfsd_file_slab; 58 static struct kmem_cache *nfsd_file_mark_slab; 59 static struct nfsd_fcache_bucket *nfsd_file_hashtbl; 60 static struct list_lru nfsd_file_lru; 61 static long nfsd_file_lru_flags; 62 static struct fsnotify_group *nfsd_file_fsnotify_group; 63 static atomic_long_t nfsd_filecache_count; 64 static struct delayed_work nfsd_filecache_laundrette; 65 static DEFINE_SPINLOCK(laundrette_lock); 66 static LIST_HEAD(laundrettes); 67 68 static void nfsd_file_gc(void); 69 70 static void 71 nfsd_file_schedule_laundrette(void) 72 { 73 long count = atomic_long_read(&nfsd_filecache_count); 74 75 if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags)) 76 return; 77 78 queue_delayed_work(system_wq, &nfsd_filecache_laundrette, 79 NFSD_LAUNDRETTE_DELAY); 80 } 81 82 static void 83 nfsd_file_slab_free(struct rcu_head *rcu) 84 { 85 struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu); 86 87 put_cred(nf->nf_cred); 88 kmem_cache_free(nfsd_file_slab, nf); 89 } 90 91 static void 92 nfsd_file_mark_free(struct fsnotify_mark *mark) 93 { 94 struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark, 95 nfm_mark); 96 97 kmem_cache_free(nfsd_file_mark_slab, nfm); 98 } 99 100 static struct nfsd_file_mark * 101 nfsd_file_mark_get(struct nfsd_file_mark *nfm) 102 { 103 if (!refcount_inc_not_zero(&nfm->nfm_ref)) 104 return NULL; 105 return nfm; 106 } 107 108 static void 109 nfsd_file_mark_put(struct nfsd_file_mark *nfm) 110 { 111 if (refcount_dec_and_test(&nfm->nfm_ref)) { 112 fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group); 113 fsnotify_put_mark(&nfm->nfm_mark); 114 } 115 } 116 117 static struct nfsd_file_mark * 118 nfsd_file_mark_find_or_create(struct nfsd_file *nf) 119 { 120 int err; 121 struct fsnotify_mark *mark; 122 struct nfsd_file_mark *nfm = NULL, *new; 123 struct inode *inode = nf->nf_inode; 124 125 do { 126 mutex_lock(&nfsd_file_fsnotify_group->mark_mutex); 127 mark = fsnotify_find_mark(&inode->i_fsnotify_marks, 128 nfsd_file_fsnotify_group); 129 if (mark) { 130 nfm = nfsd_file_mark_get(container_of(mark, 131 struct nfsd_file_mark, 132 nfm_mark)); 133 mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); 134 if (nfm) { 135 fsnotify_put_mark(mark); 136 break; 137 } 138 /* Avoid soft lockup race with nfsd_file_mark_put() */ 139 fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group); 140 fsnotify_put_mark(mark); 141 } else 142 mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); 143 144 /* allocate a new nfm */ 145 new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL); 146 if (!new) 147 return NULL; 148 fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group); 149 new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF; 150 refcount_set(&new->nfm_ref, 1); 151 152 err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0); 153 154 /* 155 * If the add was successful, then return the object. 156 * Otherwise, we need to put the reference we hold on the 157 * nfm_mark. The fsnotify code will take a reference and put 158 * it on failure, so we can't just free it directly. It's also 159 * not safe to call fsnotify_destroy_mark on it as the 160 * mark->group will be NULL. Thus, we can't let the nfm_ref 161 * counter drive the destruction at this point. 162 */ 163 if (likely(!err)) 164 nfm = new; 165 else 166 fsnotify_put_mark(&new->nfm_mark); 167 } while (unlikely(err == -EEXIST)); 168 169 return nfm; 170 } 171 172 static struct nfsd_file * 173 nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, 174 struct net *net) 175 { 176 struct nfsd_file *nf; 177 178 nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); 179 if (nf) { 180 INIT_HLIST_NODE(&nf->nf_node); 181 INIT_LIST_HEAD(&nf->nf_lru); 182 nf->nf_file = NULL; 183 nf->nf_cred = get_current_cred(); 184 nf->nf_net = net; 185 nf->nf_flags = 0; 186 nf->nf_inode = inode; 187 nf->nf_hashval = hashval; 188 refcount_set(&nf->nf_ref, 1); 189 nf->nf_may = may & NFSD_FILE_MAY_MASK; 190 if (may & NFSD_MAY_NOT_BREAK_LEASE) { 191 if (may & NFSD_MAY_WRITE) 192 __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags); 193 if (may & NFSD_MAY_READ) 194 __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); 195 } 196 nf->nf_mark = NULL; 197 init_rwsem(&nf->nf_rwsem); 198 trace_nfsd_file_alloc(nf); 199 } 200 return nf; 201 } 202 203 static bool 204 nfsd_file_free(struct nfsd_file *nf) 205 { 206 bool flush = false; 207 208 trace_nfsd_file_put_final(nf); 209 if (nf->nf_mark) 210 nfsd_file_mark_put(nf->nf_mark); 211 if (nf->nf_file) { 212 get_file(nf->nf_file); 213 filp_close(nf->nf_file, NULL); 214 fput(nf->nf_file); 215 flush = true; 216 } 217 call_rcu(&nf->nf_rcu, nfsd_file_slab_free); 218 return flush; 219 } 220 221 static bool 222 nfsd_file_check_writeback(struct nfsd_file *nf) 223 { 224 struct file *file = nf->nf_file; 225 struct address_space *mapping; 226 227 if (!file || !(file->f_mode & FMODE_WRITE)) 228 return false; 229 mapping = file->f_mapping; 230 return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) || 231 mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); 232 } 233 234 static int 235 nfsd_file_check_write_error(struct nfsd_file *nf) 236 { 237 struct file *file = nf->nf_file; 238 239 if (!file || !(file->f_mode & FMODE_WRITE)) 240 return 0; 241 return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); 242 } 243 244 static void 245 nfsd_file_do_unhash(struct nfsd_file *nf) 246 { 247 lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 248 249 trace_nfsd_file_unhash(nf); 250 251 if (nfsd_file_check_write_error(nf)) 252 nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id)); 253 --nfsd_file_hashtbl[nf->nf_hashval].nfb_count; 254 hlist_del_rcu(&nf->nf_node); 255 atomic_long_dec(&nfsd_filecache_count); 256 } 257 258 static bool 259 nfsd_file_unhash(struct nfsd_file *nf) 260 { 261 if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 262 nfsd_file_do_unhash(nf); 263 if (!list_empty(&nf->nf_lru)) 264 list_lru_del(&nfsd_file_lru, &nf->nf_lru); 265 return true; 266 } 267 return false; 268 } 269 270 /* 271 * Return true if the file was unhashed. 272 */ 273 static bool 274 nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose) 275 { 276 lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 277 278 trace_nfsd_file_unhash_and_release_locked(nf); 279 if (!nfsd_file_unhash(nf)) 280 return false; 281 /* keep final reference for nfsd_file_lru_dispose */ 282 if (refcount_dec_not_one(&nf->nf_ref)) 283 return true; 284 285 list_add(&nf->nf_lru, dispose); 286 return true; 287 } 288 289 static void 290 nfsd_file_put_noref(struct nfsd_file *nf) 291 { 292 trace_nfsd_file_put(nf); 293 294 if (refcount_dec_and_test(&nf->nf_ref)) { 295 WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); 296 nfsd_file_free(nf); 297 } 298 } 299 300 void 301 nfsd_file_put(struct nfsd_file *nf) 302 { 303 bool is_hashed; 304 305 set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); 306 if (refcount_read(&nf->nf_ref) > 2 || !nf->nf_file) { 307 nfsd_file_put_noref(nf); 308 return; 309 } 310 311 filemap_flush(nf->nf_file->f_mapping); 312 is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0; 313 nfsd_file_put_noref(nf); 314 if (is_hashed) 315 nfsd_file_schedule_laundrette(); 316 if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) 317 nfsd_file_gc(); 318 } 319 320 struct nfsd_file * 321 nfsd_file_get(struct nfsd_file *nf) 322 { 323 if (likely(refcount_inc_not_zero(&nf->nf_ref))) 324 return nf; 325 return NULL; 326 } 327 328 static void 329 nfsd_file_dispose_list(struct list_head *dispose) 330 { 331 struct nfsd_file *nf; 332 333 while(!list_empty(dispose)) { 334 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 335 list_del(&nf->nf_lru); 336 nfsd_file_put_noref(nf); 337 } 338 } 339 340 static void 341 nfsd_file_dispose_list_sync(struct list_head *dispose) 342 { 343 bool flush = false; 344 struct nfsd_file *nf; 345 346 while(!list_empty(dispose)) { 347 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 348 list_del(&nf->nf_lru); 349 if (!refcount_dec_and_test(&nf->nf_ref)) 350 continue; 351 if (nfsd_file_free(nf)) 352 flush = true; 353 } 354 if (flush) 355 flush_delayed_fput(); 356 } 357 358 static void 359 nfsd_file_list_remove_disposal(struct list_head *dst, 360 struct nfsd_fcache_disposal *l) 361 { 362 spin_lock(&l->lock); 363 list_splice_init(&l->freeme, dst); 364 spin_unlock(&l->lock); 365 } 366 367 static void 368 nfsd_file_list_add_disposal(struct list_head *files, struct net *net) 369 { 370 struct nfsd_fcache_disposal *l; 371 372 rcu_read_lock(); 373 list_for_each_entry_rcu(l, &laundrettes, list) { 374 if (l->net == net) { 375 spin_lock(&l->lock); 376 list_splice_tail_init(files, &l->freeme); 377 spin_unlock(&l->lock); 378 queue_work(nfsd_filecache_wq, &l->work); 379 break; 380 } 381 } 382 rcu_read_unlock(); 383 } 384 385 static void 386 nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src, 387 struct net *net) 388 { 389 struct nfsd_file *nf, *tmp; 390 391 list_for_each_entry_safe(nf, tmp, src, nf_lru) { 392 if (nf->nf_net == net) 393 list_move_tail(&nf->nf_lru, dst); 394 } 395 } 396 397 static void 398 nfsd_file_dispose_list_delayed(struct list_head *dispose) 399 { 400 LIST_HEAD(list); 401 struct nfsd_file *nf; 402 403 while(!list_empty(dispose)) { 404 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 405 nfsd_file_list_add_pernet(&list, dispose, nf->nf_net); 406 nfsd_file_list_add_disposal(&list, nf->nf_net); 407 } 408 } 409 410 /* 411 * Note this can deadlock with nfsd_file_cache_purge. 412 */ 413 static enum lru_status 414 nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, 415 spinlock_t *lock, void *arg) 416 __releases(lock) 417 __acquires(lock) 418 { 419 struct list_head *head = arg; 420 struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); 421 422 /* 423 * Do a lockless refcount check. The hashtable holds one reference, so 424 * we look to see if anything else has a reference, or if any have 425 * been put since the shrinker last ran. Those don't get unhashed and 426 * released. 427 * 428 * Note that in the put path, we set the flag and then decrement the 429 * counter. Here we check the counter and then test and clear the flag. 430 * That order is deliberate to ensure that we can do this locklessly. 431 */ 432 if (refcount_read(&nf->nf_ref) > 1) 433 goto out_skip; 434 435 /* 436 * Don't throw out files that are still undergoing I/O or 437 * that have uncleared errors pending. 438 */ 439 if (nfsd_file_check_writeback(nf)) 440 goto out_skip; 441 442 if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) 443 goto out_skip; 444 445 if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) 446 goto out_skip; 447 448 list_lru_isolate_move(lru, &nf->nf_lru, head); 449 return LRU_REMOVED; 450 out_skip: 451 return LRU_SKIP; 452 } 453 454 static unsigned long 455 nfsd_file_lru_walk_list(struct shrink_control *sc) 456 { 457 LIST_HEAD(head); 458 struct nfsd_file *nf; 459 unsigned long ret; 460 461 if (sc) 462 ret = list_lru_shrink_walk(&nfsd_file_lru, sc, 463 nfsd_file_lru_cb, &head); 464 else 465 ret = list_lru_walk(&nfsd_file_lru, 466 nfsd_file_lru_cb, 467 &head, LONG_MAX); 468 list_for_each_entry(nf, &head, nf_lru) { 469 spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 470 nfsd_file_do_unhash(nf); 471 spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 472 } 473 nfsd_file_dispose_list_delayed(&head); 474 return ret; 475 } 476 477 static void 478 nfsd_file_gc(void) 479 { 480 nfsd_file_lru_walk_list(NULL); 481 } 482 483 static void 484 nfsd_file_gc_worker(struct work_struct *work) 485 { 486 nfsd_file_gc(); 487 nfsd_file_schedule_laundrette(); 488 } 489 490 static unsigned long 491 nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) 492 { 493 return list_lru_count(&nfsd_file_lru); 494 } 495 496 static unsigned long 497 nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) 498 { 499 return nfsd_file_lru_walk_list(sc); 500 } 501 502 static struct shrinker nfsd_file_shrinker = { 503 .scan_objects = nfsd_file_lru_scan, 504 .count_objects = nfsd_file_lru_count, 505 .seeks = 1, 506 }; 507 508 static void 509 __nfsd_file_close_inode(struct inode *inode, unsigned int hashval, 510 struct list_head *dispose) 511 { 512 struct nfsd_file *nf; 513 struct hlist_node *tmp; 514 515 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 516 hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) { 517 if (inode == nf->nf_inode) 518 nfsd_file_unhash_and_release_locked(nf, dispose); 519 } 520 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 521 } 522 523 /** 524 * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file 525 * @inode: inode of the file to attempt to remove 526 * 527 * Walk the whole hash bucket, looking for any files that correspond to "inode". 528 * If any do, then unhash them and put the hashtable reference to them and 529 * destroy any that had their last reference put. Also ensure that any of the 530 * fputs also have their final __fput done as well. 531 */ 532 void 533 nfsd_file_close_inode_sync(struct inode *inode) 534 { 535 unsigned int hashval = (unsigned int)hash_long(inode->i_ino, 536 NFSD_FILE_HASH_BITS); 537 LIST_HEAD(dispose); 538 539 __nfsd_file_close_inode(inode, hashval, &dispose); 540 trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose)); 541 nfsd_file_dispose_list_sync(&dispose); 542 } 543 544 /** 545 * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file 546 * @inode: inode of the file to attempt to remove 547 * 548 * Walk the whole hash bucket, looking for any files that correspond to "inode". 549 * If any do, then unhash them and put the hashtable reference to them and 550 * destroy any that had their last reference put. 551 */ 552 static void 553 nfsd_file_close_inode(struct inode *inode) 554 { 555 unsigned int hashval = (unsigned int)hash_long(inode->i_ino, 556 NFSD_FILE_HASH_BITS); 557 LIST_HEAD(dispose); 558 559 __nfsd_file_close_inode(inode, hashval, &dispose); 560 trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose)); 561 nfsd_file_dispose_list_delayed(&dispose); 562 } 563 564 /** 565 * nfsd_file_delayed_close - close unused nfsd_files 566 * @work: dummy 567 * 568 * Walk the LRU list and close any entries that have not been used since 569 * the last scan. 570 * 571 * Note this can deadlock with nfsd_file_cache_purge. 572 */ 573 static void 574 nfsd_file_delayed_close(struct work_struct *work) 575 { 576 LIST_HEAD(head); 577 struct nfsd_fcache_disposal *l = container_of(work, 578 struct nfsd_fcache_disposal, work); 579 580 nfsd_file_list_remove_disposal(&head, l); 581 nfsd_file_dispose_list(&head); 582 } 583 584 static int 585 nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, 586 void *data) 587 { 588 struct file_lock *fl = data; 589 590 /* Only close files for F_SETLEASE leases */ 591 if (fl->fl_flags & FL_LEASE) 592 nfsd_file_close_inode_sync(file_inode(fl->fl_file)); 593 return 0; 594 } 595 596 static struct notifier_block nfsd_file_lease_notifier = { 597 .notifier_call = nfsd_file_lease_notifier_call, 598 }; 599 600 static int 601 nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask, 602 struct inode *inode, struct inode *dir, 603 const struct qstr *name) 604 { 605 trace_nfsd_file_fsnotify_handle_event(inode, mask); 606 607 /* Should be no marks on non-regular files */ 608 if (!S_ISREG(inode->i_mode)) { 609 WARN_ON_ONCE(1); 610 return 0; 611 } 612 613 /* don't close files if this was not the last link */ 614 if (mask & FS_ATTRIB) { 615 if (inode->i_nlink) 616 return 0; 617 } 618 619 nfsd_file_close_inode(inode); 620 return 0; 621 } 622 623 624 static const struct fsnotify_ops nfsd_file_fsnotify_ops = { 625 .handle_inode_event = nfsd_file_fsnotify_handle_event, 626 .free_mark = nfsd_file_mark_free, 627 }; 628 629 int 630 nfsd_file_cache_init(void) 631 { 632 int ret = -ENOMEM; 633 unsigned int i; 634 635 clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); 636 637 if (nfsd_file_hashtbl) 638 return 0; 639 640 nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0); 641 if (!nfsd_filecache_wq) 642 goto out; 643 644 nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE, 645 sizeof(*nfsd_file_hashtbl), GFP_KERNEL); 646 if (!nfsd_file_hashtbl) { 647 pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n"); 648 goto out_err; 649 } 650 651 nfsd_file_slab = kmem_cache_create("nfsd_file", 652 sizeof(struct nfsd_file), 0, 0, NULL); 653 if (!nfsd_file_slab) { 654 pr_err("nfsd: unable to create nfsd_file_slab\n"); 655 goto out_err; 656 } 657 658 nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark", 659 sizeof(struct nfsd_file_mark), 0, 0, NULL); 660 if (!nfsd_file_mark_slab) { 661 pr_err("nfsd: unable to create nfsd_file_mark_slab\n"); 662 goto out_err; 663 } 664 665 666 ret = list_lru_init(&nfsd_file_lru); 667 if (ret) { 668 pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret); 669 goto out_err; 670 } 671 672 ret = register_shrinker(&nfsd_file_shrinker); 673 if (ret) { 674 pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret); 675 goto out_lru; 676 } 677 678 ret = lease_register_notifier(&nfsd_file_lease_notifier); 679 if (ret) { 680 pr_err("nfsd: unable to register lease notifier: %d\n", ret); 681 goto out_shrinker; 682 } 683 684 nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops); 685 if (IS_ERR(nfsd_file_fsnotify_group)) { 686 pr_err("nfsd: unable to create fsnotify group: %ld\n", 687 PTR_ERR(nfsd_file_fsnotify_group)); 688 nfsd_file_fsnotify_group = NULL; 689 goto out_notifier; 690 } 691 692 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 693 INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head); 694 spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock); 695 } 696 697 INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker); 698 out: 699 return ret; 700 out_notifier: 701 lease_unregister_notifier(&nfsd_file_lease_notifier); 702 out_shrinker: 703 unregister_shrinker(&nfsd_file_shrinker); 704 out_lru: 705 list_lru_destroy(&nfsd_file_lru); 706 out_err: 707 kmem_cache_destroy(nfsd_file_slab); 708 nfsd_file_slab = NULL; 709 kmem_cache_destroy(nfsd_file_mark_slab); 710 nfsd_file_mark_slab = NULL; 711 kfree(nfsd_file_hashtbl); 712 nfsd_file_hashtbl = NULL; 713 destroy_workqueue(nfsd_filecache_wq); 714 nfsd_filecache_wq = NULL; 715 goto out; 716 } 717 718 /* 719 * Note this can deadlock with nfsd_file_lru_cb. 720 */ 721 void 722 nfsd_file_cache_purge(struct net *net) 723 { 724 unsigned int i; 725 struct nfsd_file *nf; 726 struct hlist_node *next; 727 LIST_HEAD(dispose); 728 bool del; 729 730 if (!nfsd_file_hashtbl) 731 return; 732 733 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 734 struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i]; 735 736 spin_lock(&nfb->nfb_lock); 737 hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) { 738 if (net && nf->nf_net != net) 739 continue; 740 del = nfsd_file_unhash_and_release_locked(nf, &dispose); 741 742 /* 743 * Deadlock detected! Something marked this entry as 744 * unhased, but hasn't removed it from the hash list. 745 */ 746 WARN_ON_ONCE(!del); 747 } 748 spin_unlock(&nfb->nfb_lock); 749 nfsd_file_dispose_list(&dispose); 750 } 751 } 752 753 static struct nfsd_fcache_disposal * 754 nfsd_alloc_fcache_disposal(struct net *net) 755 { 756 struct nfsd_fcache_disposal *l; 757 758 l = kmalloc(sizeof(*l), GFP_KERNEL); 759 if (!l) 760 return NULL; 761 INIT_WORK(&l->work, nfsd_file_delayed_close); 762 l->net = net; 763 spin_lock_init(&l->lock); 764 INIT_LIST_HEAD(&l->freeme); 765 return l; 766 } 767 768 static void 769 nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l) 770 { 771 rcu_assign_pointer(l->net, NULL); 772 cancel_work_sync(&l->work); 773 nfsd_file_dispose_list(&l->freeme); 774 kfree_rcu(l, rcu); 775 } 776 777 static void 778 nfsd_add_fcache_disposal(struct nfsd_fcache_disposal *l) 779 { 780 spin_lock(&laundrette_lock); 781 list_add_tail_rcu(&l->list, &laundrettes); 782 spin_unlock(&laundrette_lock); 783 } 784 785 static void 786 nfsd_del_fcache_disposal(struct nfsd_fcache_disposal *l) 787 { 788 spin_lock(&laundrette_lock); 789 list_del_rcu(&l->list); 790 spin_unlock(&laundrette_lock); 791 } 792 793 static int 794 nfsd_alloc_fcache_disposal_net(struct net *net) 795 { 796 struct nfsd_fcache_disposal *l; 797 798 l = nfsd_alloc_fcache_disposal(net); 799 if (!l) 800 return -ENOMEM; 801 nfsd_add_fcache_disposal(l); 802 return 0; 803 } 804 805 static void 806 nfsd_free_fcache_disposal_net(struct net *net) 807 { 808 struct nfsd_fcache_disposal *l; 809 810 rcu_read_lock(); 811 list_for_each_entry_rcu(l, &laundrettes, list) { 812 if (l->net != net) 813 continue; 814 nfsd_del_fcache_disposal(l); 815 rcu_read_unlock(); 816 nfsd_free_fcache_disposal(l); 817 return; 818 } 819 rcu_read_unlock(); 820 } 821 822 int 823 nfsd_file_cache_start_net(struct net *net) 824 { 825 return nfsd_alloc_fcache_disposal_net(net); 826 } 827 828 void 829 nfsd_file_cache_shutdown_net(struct net *net) 830 { 831 nfsd_file_cache_purge(net); 832 nfsd_free_fcache_disposal_net(net); 833 } 834 835 void 836 nfsd_file_cache_shutdown(void) 837 { 838 set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); 839 840 lease_unregister_notifier(&nfsd_file_lease_notifier); 841 unregister_shrinker(&nfsd_file_shrinker); 842 /* 843 * make sure all callers of nfsd_file_lru_cb are done before 844 * calling nfsd_file_cache_purge 845 */ 846 cancel_delayed_work_sync(&nfsd_filecache_laundrette); 847 nfsd_file_cache_purge(NULL); 848 list_lru_destroy(&nfsd_file_lru); 849 rcu_barrier(); 850 fsnotify_put_group(nfsd_file_fsnotify_group); 851 nfsd_file_fsnotify_group = NULL; 852 kmem_cache_destroy(nfsd_file_slab); 853 nfsd_file_slab = NULL; 854 fsnotify_wait_marks_destroyed(); 855 kmem_cache_destroy(nfsd_file_mark_slab); 856 nfsd_file_mark_slab = NULL; 857 kfree(nfsd_file_hashtbl); 858 nfsd_file_hashtbl = NULL; 859 destroy_workqueue(nfsd_filecache_wq); 860 nfsd_filecache_wq = NULL; 861 } 862 863 static bool 864 nfsd_match_cred(const struct cred *c1, const struct cred *c2) 865 { 866 int i; 867 868 if (!uid_eq(c1->fsuid, c2->fsuid)) 869 return false; 870 if (!gid_eq(c1->fsgid, c2->fsgid)) 871 return false; 872 if (c1->group_info == NULL || c2->group_info == NULL) 873 return c1->group_info == c2->group_info; 874 if (c1->group_info->ngroups != c2->group_info->ngroups) 875 return false; 876 for (i = 0; i < c1->group_info->ngroups; i++) { 877 if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) 878 return false; 879 } 880 return true; 881 } 882 883 static struct nfsd_file * 884 nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, 885 unsigned int hashval, struct net *net) 886 { 887 struct nfsd_file *nf; 888 unsigned char need = may_flags & NFSD_FILE_MAY_MASK; 889 890 hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, 891 nf_node, lockdep_is_held(&nfsd_file_hashtbl[hashval].nfb_lock)) { 892 if (nf->nf_may != need) 893 continue; 894 if (nf->nf_inode != inode) 895 continue; 896 if (nf->nf_net != net) 897 continue; 898 if (!nfsd_match_cred(nf->nf_cred, current_cred())) 899 continue; 900 if (nfsd_file_get(nf) != NULL) 901 return nf; 902 } 903 return NULL; 904 } 905 906 /** 907 * nfsd_file_is_cached - are there any cached open files for this fh? 908 * @inode: inode of the file to check 909 * 910 * Scan the hashtable for open files that match this fh. Returns true if there 911 * are any, and false if not. 912 */ 913 bool 914 nfsd_file_is_cached(struct inode *inode) 915 { 916 bool ret = false; 917 struct nfsd_file *nf; 918 unsigned int hashval; 919 920 hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); 921 922 rcu_read_lock(); 923 hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, 924 nf_node) { 925 if (inode == nf->nf_inode) { 926 ret = true; 927 break; 928 } 929 } 930 rcu_read_unlock(); 931 trace_nfsd_file_is_cached(inode, hashval, (int)ret); 932 return ret; 933 } 934 935 __be32 936 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 937 unsigned int may_flags, struct nfsd_file **pnf) 938 { 939 __be32 status; 940 struct net *net = SVC_NET(rqstp); 941 struct nfsd_file *nf, *new; 942 struct inode *inode; 943 unsigned int hashval; 944 bool retry = true; 945 946 /* FIXME: skip this if fh_dentry is already set? */ 947 status = fh_verify(rqstp, fhp, S_IFREG, 948 may_flags|NFSD_MAY_OWNER_OVERRIDE); 949 if (status != nfs_ok) 950 return status; 951 952 inode = d_inode(fhp->fh_dentry); 953 hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); 954 retry: 955 rcu_read_lock(); 956 nf = nfsd_file_find_locked(inode, may_flags, hashval, net); 957 rcu_read_unlock(); 958 if (nf) 959 goto wait_for_construction; 960 961 new = nfsd_file_alloc(inode, may_flags, hashval, net); 962 if (!new) { 963 trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, 964 NULL, nfserr_jukebox); 965 return nfserr_jukebox; 966 } 967 968 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 969 nf = nfsd_file_find_locked(inode, may_flags, hashval, net); 970 if (nf == NULL) 971 goto open_file; 972 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 973 nfsd_file_slab_free(&new->nf_rcu); 974 975 wait_for_construction: 976 wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); 977 978 /* Did construction of this file fail? */ 979 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 980 if (!retry) { 981 status = nfserr_jukebox; 982 goto out; 983 } 984 retry = false; 985 nfsd_file_put_noref(nf); 986 goto retry; 987 } 988 989 this_cpu_inc(nfsd_file_cache_hits); 990 991 if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) { 992 bool write = (may_flags & NFSD_MAY_WRITE); 993 994 if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) || 995 (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) { 996 status = nfserrno(nfsd_open_break_lease( 997 file_inode(nf->nf_file), may_flags)); 998 if (status == nfs_ok) { 999 clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); 1000 if (write) 1001 clear_bit(NFSD_FILE_BREAK_WRITE, 1002 &nf->nf_flags); 1003 } 1004 } 1005 } 1006 out: 1007 if (status == nfs_ok) { 1008 *pnf = nf; 1009 } else { 1010 nfsd_file_put(nf); 1011 nf = NULL; 1012 } 1013 1014 trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status); 1015 return status; 1016 open_file: 1017 nf = new; 1018 /* Take reference for the hashtable */ 1019 refcount_inc(&nf->nf_ref); 1020 __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); 1021 __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); 1022 list_lru_add(&nfsd_file_lru, &nf->nf_lru); 1023 hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head); 1024 ++nfsd_file_hashtbl[hashval].nfb_count; 1025 nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, 1026 nfsd_file_hashtbl[hashval].nfb_count); 1027 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 1028 if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD) 1029 nfsd_file_gc(); 1030 1031 nf->nf_mark = nfsd_file_mark_find_or_create(nf); 1032 if (nf->nf_mark) 1033 status = nfsd_open_verified(rqstp, fhp, S_IFREG, 1034 may_flags, &nf->nf_file); 1035 else 1036 status = nfserr_jukebox; 1037 /* 1038 * If construction failed, or we raced with a call to unlink() 1039 * then unhash. 1040 */ 1041 if (status != nfs_ok || inode->i_nlink == 0) { 1042 bool do_free; 1043 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 1044 do_free = nfsd_file_unhash(nf); 1045 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 1046 if (do_free) 1047 nfsd_file_put_noref(nf); 1048 } 1049 clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); 1050 smp_mb__after_atomic(); 1051 wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); 1052 goto out; 1053 } 1054 1055 /* 1056 * Note that fields may be added, removed or reordered in the future. Programs 1057 * scraping this file for info should test the labels to ensure they're 1058 * getting the correct field. 1059 */ 1060 static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) 1061 { 1062 unsigned int i, count = 0, longest = 0; 1063 unsigned long hits = 0; 1064 1065 /* 1066 * No need for spinlocks here since we're not terribly interested in 1067 * accuracy. We do take the nfsd_mutex simply to ensure that we 1068 * don't end up racing with server shutdown 1069 */ 1070 mutex_lock(&nfsd_mutex); 1071 if (nfsd_file_hashtbl) { 1072 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 1073 count += nfsd_file_hashtbl[i].nfb_count; 1074 longest = max(longest, nfsd_file_hashtbl[i].nfb_count); 1075 } 1076 } 1077 mutex_unlock(&nfsd_mutex); 1078 1079 for_each_possible_cpu(i) 1080 hits += per_cpu(nfsd_file_cache_hits, i); 1081 1082 seq_printf(m, "total entries: %u\n", count); 1083 seq_printf(m, "longest chain: %u\n", longest); 1084 seq_printf(m, "cache hits: %lu\n", hits); 1085 return 0; 1086 } 1087 1088 int nfsd_file_cache_stats_open(struct inode *inode, struct file *file) 1089 { 1090 return single_open(file, nfsd_file_cache_stats_show, NULL); 1091 } 1092