1 /* 2 * Open file cache. 3 * 4 * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com> 5 */ 6 7 #include <linux/hash.h> 8 #include <linux/slab.h> 9 #include <linux/file.h> 10 #include <linux/sched.h> 11 #include <linux/list_lru.h> 12 #include <linux/fsnotify_backend.h> 13 #include <linux/fsnotify.h> 14 #include <linux/seq_file.h> 15 16 #include "vfs.h" 17 #include "nfsd.h" 18 #include "nfsfh.h" 19 #include "netns.h" 20 #include "filecache.h" 21 #include "trace.h" 22 23 #define NFSDDBG_FACILITY NFSDDBG_FH 24 25 /* FIXME: dynamically size this for the machine somehow? */ 26 #define NFSD_FILE_HASH_BITS 12 27 #define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS) 28 #define NFSD_LAUNDRETTE_DELAY (2 * HZ) 29 30 #define NFSD_FILE_SHUTDOWN (1) 31 #define NFSD_FILE_LRU_THRESHOLD (4096UL) 32 #define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2) 33 34 /* We only care about NFSD_MAY_READ/WRITE for this cache */ 35 #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) 36 37 struct nfsd_fcache_bucket { 38 struct hlist_head nfb_head; 39 spinlock_t nfb_lock; 40 unsigned int nfb_count; 41 unsigned int nfb_maxcount; 42 }; 43 44 static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); 45 46 struct nfsd_fcache_disposal { 47 struct work_struct work; 48 spinlock_t lock; 49 struct list_head freeme; 50 }; 51 52 static struct workqueue_struct *nfsd_filecache_wq __read_mostly; 53 54 static struct kmem_cache *nfsd_file_slab; 55 static struct kmem_cache *nfsd_file_mark_slab; 56 static struct nfsd_fcache_bucket *nfsd_file_hashtbl; 57 static struct list_lru nfsd_file_lru; 58 static long nfsd_file_lru_flags; 59 static struct fsnotify_group *nfsd_file_fsnotify_group; 60 static atomic_long_t nfsd_filecache_count; 61 static struct delayed_work nfsd_filecache_laundrette; 62 63 static void nfsd_file_gc(void); 64 65 static void 66 nfsd_file_schedule_laundrette(void) 67 { 68 long count = atomic_long_read(&nfsd_filecache_count); 69 70 if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags)) 71 return; 72 73 queue_delayed_work(system_wq, &nfsd_filecache_laundrette, 74 NFSD_LAUNDRETTE_DELAY); 75 } 76 77 static void 78 nfsd_file_slab_free(struct rcu_head *rcu) 79 { 80 struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu); 81 82 put_cred(nf->nf_cred); 83 kmem_cache_free(nfsd_file_slab, nf); 84 } 85 86 static void 87 nfsd_file_mark_free(struct fsnotify_mark *mark) 88 { 89 struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark, 90 nfm_mark); 91 92 kmem_cache_free(nfsd_file_mark_slab, nfm); 93 } 94 95 static struct nfsd_file_mark * 96 nfsd_file_mark_get(struct nfsd_file_mark *nfm) 97 { 98 if (!refcount_inc_not_zero(&nfm->nfm_ref)) 99 return NULL; 100 return nfm; 101 } 102 103 static void 104 nfsd_file_mark_put(struct nfsd_file_mark *nfm) 105 { 106 if (refcount_dec_and_test(&nfm->nfm_ref)) { 107 fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group); 108 fsnotify_put_mark(&nfm->nfm_mark); 109 } 110 } 111 112 static struct nfsd_file_mark * 113 nfsd_file_mark_find_or_create(struct nfsd_file *nf) 114 { 115 int err; 116 struct fsnotify_mark *mark; 117 struct nfsd_file_mark *nfm = NULL, *new; 118 struct inode *inode = nf->nf_inode; 119 120 do { 121 mutex_lock(&nfsd_file_fsnotify_group->mark_mutex); 122 mark = fsnotify_find_mark(&inode->i_fsnotify_marks, 123 nfsd_file_fsnotify_group); 124 if (mark) { 125 nfm = nfsd_file_mark_get(container_of(mark, 126 struct nfsd_file_mark, 127 nfm_mark)); 128 mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); 129 if (nfm) { 130 fsnotify_put_mark(mark); 131 break; 132 } 133 /* Avoid soft lockup race with nfsd_file_mark_put() */ 134 fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group); 135 fsnotify_put_mark(mark); 136 } else 137 mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); 138 139 /* allocate a new nfm */ 140 new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL); 141 if (!new) 142 return NULL; 143 fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group); 144 new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF; 145 refcount_set(&new->nfm_ref, 1); 146 147 err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0); 148 149 /* 150 * If the add was successful, then return the object. 151 * Otherwise, we need to put the reference we hold on the 152 * nfm_mark. The fsnotify code will take a reference and put 153 * it on failure, so we can't just free it directly. It's also 154 * not safe to call fsnotify_destroy_mark on it as the 155 * mark->group will be NULL. Thus, we can't let the nfm_ref 156 * counter drive the destruction at this point. 157 */ 158 if (likely(!err)) 159 nfm = new; 160 else 161 fsnotify_put_mark(&new->nfm_mark); 162 } while (unlikely(err == -EEXIST)); 163 164 return nfm; 165 } 166 167 static struct nfsd_file * 168 nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, 169 struct net *net) 170 { 171 struct nfsd_file *nf; 172 173 nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); 174 if (nf) { 175 INIT_HLIST_NODE(&nf->nf_node); 176 INIT_LIST_HEAD(&nf->nf_lru); 177 nf->nf_file = NULL; 178 nf->nf_cred = get_current_cred(); 179 nf->nf_net = net; 180 nf->nf_flags = 0; 181 nf->nf_inode = inode; 182 nf->nf_hashval = hashval; 183 refcount_set(&nf->nf_ref, 1); 184 nf->nf_may = may & NFSD_FILE_MAY_MASK; 185 if (may & NFSD_MAY_NOT_BREAK_LEASE) { 186 if (may & NFSD_MAY_WRITE) 187 __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags); 188 if (may & NFSD_MAY_READ) 189 __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); 190 } 191 nf->nf_mark = NULL; 192 trace_nfsd_file_alloc(nf); 193 } 194 return nf; 195 } 196 197 static bool 198 nfsd_file_free(struct nfsd_file *nf) 199 { 200 bool flush = false; 201 202 trace_nfsd_file_put_final(nf); 203 if (nf->nf_mark) 204 nfsd_file_mark_put(nf->nf_mark); 205 if (nf->nf_file) { 206 get_file(nf->nf_file); 207 filp_close(nf->nf_file, NULL); 208 fput(nf->nf_file); 209 flush = true; 210 } 211 call_rcu(&nf->nf_rcu, nfsd_file_slab_free); 212 return flush; 213 } 214 215 static bool 216 nfsd_file_check_writeback(struct nfsd_file *nf) 217 { 218 struct file *file = nf->nf_file; 219 struct address_space *mapping; 220 221 if (!file || !(file->f_mode & FMODE_WRITE)) 222 return false; 223 mapping = file->f_mapping; 224 return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) || 225 mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); 226 } 227 228 static int 229 nfsd_file_check_write_error(struct nfsd_file *nf) 230 { 231 struct file *file = nf->nf_file; 232 233 if (!file || !(file->f_mode & FMODE_WRITE)) 234 return 0; 235 return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); 236 } 237 238 static void 239 nfsd_file_do_unhash(struct nfsd_file *nf) 240 { 241 lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 242 243 trace_nfsd_file_unhash(nf); 244 245 if (nfsd_file_check_write_error(nf)) 246 nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); 247 --nfsd_file_hashtbl[nf->nf_hashval].nfb_count; 248 hlist_del_rcu(&nf->nf_node); 249 atomic_long_dec(&nfsd_filecache_count); 250 } 251 252 static bool 253 nfsd_file_unhash(struct nfsd_file *nf) 254 { 255 if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 256 nfsd_file_do_unhash(nf); 257 if (!list_empty(&nf->nf_lru)) 258 list_lru_del(&nfsd_file_lru, &nf->nf_lru); 259 return true; 260 } 261 return false; 262 } 263 264 /* 265 * Return true if the file was unhashed. 266 */ 267 static bool 268 nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose) 269 { 270 lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 271 272 trace_nfsd_file_unhash_and_release_locked(nf); 273 if (!nfsd_file_unhash(nf)) 274 return false; 275 /* keep final reference for nfsd_file_lru_dispose */ 276 if (refcount_dec_not_one(&nf->nf_ref)) 277 return true; 278 279 list_add(&nf->nf_lru, dispose); 280 return true; 281 } 282 283 static void 284 nfsd_file_put_noref(struct nfsd_file *nf) 285 { 286 trace_nfsd_file_put(nf); 287 288 if (refcount_dec_and_test(&nf->nf_ref)) { 289 WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); 290 nfsd_file_free(nf); 291 } 292 } 293 294 void 295 nfsd_file_put(struct nfsd_file *nf) 296 { 297 bool is_hashed; 298 299 set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); 300 if (refcount_read(&nf->nf_ref) > 2 || !nf->nf_file) { 301 nfsd_file_put_noref(nf); 302 return; 303 } 304 305 filemap_flush(nf->nf_file->f_mapping); 306 is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0; 307 nfsd_file_put_noref(nf); 308 if (is_hashed) 309 nfsd_file_schedule_laundrette(); 310 if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) 311 nfsd_file_gc(); 312 } 313 314 struct nfsd_file * 315 nfsd_file_get(struct nfsd_file *nf) 316 { 317 if (likely(refcount_inc_not_zero(&nf->nf_ref))) 318 return nf; 319 return NULL; 320 } 321 322 static void 323 nfsd_file_dispose_list(struct list_head *dispose) 324 { 325 struct nfsd_file *nf; 326 327 while(!list_empty(dispose)) { 328 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 329 list_del(&nf->nf_lru); 330 nfsd_file_put_noref(nf); 331 } 332 } 333 334 static void 335 nfsd_file_dispose_list_sync(struct list_head *dispose) 336 { 337 bool flush = false; 338 struct nfsd_file *nf; 339 340 while(!list_empty(dispose)) { 341 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 342 list_del(&nf->nf_lru); 343 if (!refcount_dec_and_test(&nf->nf_ref)) 344 continue; 345 if (nfsd_file_free(nf)) 346 flush = true; 347 } 348 if (flush) 349 flush_delayed_fput(); 350 } 351 352 static void 353 nfsd_file_list_remove_disposal(struct list_head *dst, 354 struct nfsd_fcache_disposal *l) 355 { 356 spin_lock(&l->lock); 357 list_splice_init(&l->freeme, dst); 358 spin_unlock(&l->lock); 359 } 360 361 static void 362 nfsd_file_list_add_disposal(struct list_head *files, struct net *net) 363 { 364 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 365 struct nfsd_fcache_disposal *l = nn->fcache_disposal; 366 367 spin_lock(&l->lock); 368 list_splice_tail_init(files, &l->freeme); 369 spin_unlock(&l->lock); 370 queue_work(nfsd_filecache_wq, &l->work); 371 } 372 373 static void 374 nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src, 375 struct net *net) 376 { 377 struct nfsd_file *nf, *tmp; 378 379 list_for_each_entry_safe(nf, tmp, src, nf_lru) { 380 if (nf->nf_net == net) 381 list_move_tail(&nf->nf_lru, dst); 382 } 383 } 384 385 static void 386 nfsd_file_dispose_list_delayed(struct list_head *dispose) 387 { 388 LIST_HEAD(list); 389 struct nfsd_file *nf; 390 391 while(!list_empty(dispose)) { 392 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 393 nfsd_file_list_add_pernet(&list, dispose, nf->nf_net); 394 nfsd_file_list_add_disposal(&list, nf->nf_net); 395 } 396 } 397 398 /* 399 * Note this can deadlock with nfsd_file_cache_purge. 400 */ 401 static enum lru_status 402 nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, 403 spinlock_t *lock, void *arg) 404 __releases(lock) 405 __acquires(lock) 406 { 407 struct list_head *head = arg; 408 struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); 409 410 /* 411 * Do a lockless refcount check. The hashtable holds one reference, so 412 * we look to see if anything else has a reference, or if any have 413 * been put since the shrinker last ran. Those don't get unhashed and 414 * released. 415 * 416 * Note that in the put path, we set the flag and then decrement the 417 * counter. Here we check the counter and then test and clear the flag. 418 * That order is deliberate to ensure that we can do this locklessly. 419 */ 420 if (refcount_read(&nf->nf_ref) > 1) 421 goto out_skip; 422 423 /* 424 * Don't throw out files that are still undergoing I/O or 425 * that have uncleared errors pending. 426 */ 427 if (nfsd_file_check_writeback(nf)) 428 goto out_skip; 429 430 if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) 431 goto out_skip; 432 433 if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) 434 goto out_skip; 435 436 list_lru_isolate_move(lru, &nf->nf_lru, head); 437 return LRU_REMOVED; 438 out_skip: 439 return LRU_SKIP; 440 } 441 442 static unsigned long 443 nfsd_file_lru_walk_list(struct shrink_control *sc) 444 { 445 LIST_HEAD(head); 446 struct nfsd_file *nf; 447 unsigned long ret; 448 449 if (sc) 450 ret = list_lru_shrink_walk(&nfsd_file_lru, sc, 451 nfsd_file_lru_cb, &head); 452 else 453 ret = list_lru_walk(&nfsd_file_lru, 454 nfsd_file_lru_cb, 455 &head, LONG_MAX); 456 list_for_each_entry(nf, &head, nf_lru) { 457 spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 458 nfsd_file_do_unhash(nf); 459 spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 460 } 461 nfsd_file_dispose_list_delayed(&head); 462 return ret; 463 } 464 465 static void 466 nfsd_file_gc(void) 467 { 468 nfsd_file_lru_walk_list(NULL); 469 } 470 471 static void 472 nfsd_file_gc_worker(struct work_struct *work) 473 { 474 nfsd_file_gc(); 475 nfsd_file_schedule_laundrette(); 476 } 477 478 static unsigned long 479 nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) 480 { 481 return list_lru_count(&nfsd_file_lru); 482 } 483 484 static unsigned long 485 nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) 486 { 487 return nfsd_file_lru_walk_list(sc); 488 } 489 490 static struct shrinker nfsd_file_shrinker = { 491 .scan_objects = nfsd_file_lru_scan, 492 .count_objects = nfsd_file_lru_count, 493 .seeks = 1, 494 }; 495 496 static void 497 __nfsd_file_close_inode(struct inode *inode, unsigned int hashval, 498 struct list_head *dispose) 499 { 500 struct nfsd_file *nf; 501 struct hlist_node *tmp; 502 503 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 504 hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) { 505 if (inode == nf->nf_inode) 506 nfsd_file_unhash_and_release_locked(nf, dispose); 507 } 508 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 509 } 510 511 /** 512 * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file 513 * @inode: inode of the file to attempt to remove 514 * 515 * Walk the whole hash bucket, looking for any files that correspond to "inode". 516 * If any do, then unhash them and put the hashtable reference to them and 517 * destroy any that had their last reference put. Also ensure that any of the 518 * fputs also have their final __fput done as well. 519 */ 520 void 521 nfsd_file_close_inode_sync(struct inode *inode) 522 { 523 unsigned int hashval = (unsigned int)hash_long(inode->i_ino, 524 NFSD_FILE_HASH_BITS); 525 LIST_HEAD(dispose); 526 527 __nfsd_file_close_inode(inode, hashval, &dispose); 528 trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose)); 529 nfsd_file_dispose_list_sync(&dispose); 530 } 531 532 /** 533 * nfsd_file_close_inode - attempt a delayed close of a nfsd_file 534 * @inode: inode of the file to attempt to remove 535 * 536 * Walk the whole hash bucket, looking for any files that correspond to "inode". 537 * If any do, then unhash them and put the hashtable reference to them and 538 * destroy any that had their last reference put. 539 */ 540 static void 541 nfsd_file_close_inode(struct inode *inode) 542 { 543 unsigned int hashval = (unsigned int)hash_long(inode->i_ino, 544 NFSD_FILE_HASH_BITS); 545 LIST_HEAD(dispose); 546 547 __nfsd_file_close_inode(inode, hashval, &dispose); 548 trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose)); 549 nfsd_file_dispose_list_delayed(&dispose); 550 } 551 552 /** 553 * nfsd_file_delayed_close - close unused nfsd_files 554 * @work: dummy 555 * 556 * Walk the LRU list and close any entries that have not been used since 557 * the last scan. 558 * 559 * Note this can deadlock with nfsd_file_cache_purge. 560 */ 561 static void 562 nfsd_file_delayed_close(struct work_struct *work) 563 { 564 LIST_HEAD(head); 565 struct nfsd_fcache_disposal *l = container_of(work, 566 struct nfsd_fcache_disposal, work); 567 568 nfsd_file_list_remove_disposal(&head, l); 569 nfsd_file_dispose_list(&head); 570 } 571 572 static int 573 nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, 574 void *data) 575 { 576 struct file_lock *fl = data; 577 578 /* Only close files for F_SETLEASE leases */ 579 if (fl->fl_flags & FL_LEASE) 580 nfsd_file_close_inode_sync(file_inode(fl->fl_file)); 581 return 0; 582 } 583 584 static struct notifier_block nfsd_file_lease_notifier = { 585 .notifier_call = nfsd_file_lease_notifier_call, 586 }; 587 588 static int 589 nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask, 590 struct inode *inode, struct inode *dir, 591 const struct qstr *name, u32 cookie) 592 { 593 if (WARN_ON_ONCE(!inode)) 594 return 0; 595 596 trace_nfsd_file_fsnotify_handle_event(inode, mask); 597 598 /* Should be no marks on non-regular files */ 599 if (!S_ISREG(inode->i_mode)) { 600 WARN_ON_ONCE(1); 601 return 0; 602 } 603 604 /* don't close files if this was not the last link */ 605 if (mask & FS_ATTRIB) { 606 if (inode->i_nlink) 607 return 0; 608 } 609 610 nfsd_file_close_inode(inode); 611 return 0; 612 } 613 614 615 static const struct fsnotify_ops nfsd_file_fsnotify_ops = { 616 .handle_inode_event = nfsd_file_fsnotify_handle_event, 617 .free_mark = nfsd_file_mark_free, 618 }; 619 620 int 621 nfsd_file_cache_init(void) 622 { 623 int ret = -ENOMEM; 624 unsigned int i; 625 626 clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); 627 628 if (nfsd_file_hashtbl) 629 return 0; 630 631 nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0); 632 if (!nfsd_filecache_wq) 633 goto out; 634 635 nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE, 636 sizeof(*nfsd_file_hashtbl), GFP_KERNEL); 637 if (!nfsd_file_hashtbl) { 638 pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n"); 639 goto out_err; 640 } 641 642 nfsd_file_slab = kmem_cache_create("nfsd_file", 643 sizeof(struct nfsd_file), 0, 0, NULL); 644 if (!nfsd_file_slab) { 645 pr_err("nfsd: unable to create nfsd_file_slab\n"); 646 goto out_err; 647 } 648 649 nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark", 650 sizeof(struct nfsd_file_mark), 0, 0, NULL); 651 if (!nfsd_file_mark_slab) { 652 pr_err("nfsd: unable to create nfsd_file_mark_slab\n"); 653 goto out_err; 654 } 655 656 657 ret = list_lru_init(&nfsd_file_lru); 658 if (ret) { 659 pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret); 660 goto out_err; 661 } 662 663 ret = register_shrinker(&nfsd_file_shrinker); 664 if (ret) { 665 pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret); 666 goto out_lru; 667 } 668 669 ret = lease_register_notifier(&nfsd_file_lease_notifier); 670 if (ret) { 671 pr_err("nfsd: unable to register lease notifier: %d\n", ret); 672 goto out_shrinker; 673 } 674 675 nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops); 676 if (IS_ERR(nfsd_file_fsnotify_group)) { 677 pr_err("nfsd: unable to create fsnotify group: %ld\n", 678 PTR_ERR(nfsd_file_fsnotify_group)); 679 ret = PTR_ERR(nfsd_file_fsnotify_group); 680 nfsd_file_fsnotify_group = NULL; 681 goto out_notifier; 682 } 683 684 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 685 INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head); 686 spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock); 687 } 688 689 INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker); 690 out: 691 return ret; 692 out_notifier: 693 lease_unregister_notifier(&nfsd_file_lease_notifier); 694 out_shrinker: 695 unregister_shrinker(&nfsd_file_shrinker); 696 out_lru: 697 list_lru_destroy(&nfsd_file_lru); 698 out_err: 699 kmem_cache_destroy(nfsd_file_slab); 700 nfsd_file_slab = NULL; 701 kmem_cache_destroy(nfsd_file_mark_slab); 702 nfsd_file_mark_slab = NULL; 703 kfree(nfsd_file_hashtbl); 704 nfsd_file_hashtbl = NULL; 705 destroy_workqueue(nfsd_filecache_wq); 706 nfsd_filecache_wq = NULL; 707 goto out; 708 } 709 710 /* 711 * Note this can deadlock with nfsd_file_lru_cb. 712 */ 713 void 714 nfsd_file_cache_purge(struct net *net) 715 { 716 unsigned int i; 717 struct nfsd_file *nf; 718 struct hlist_node *next; 719 LIST_HEAD(dispose); 720 bool del; 721 722 if (!nfsd_file_hashtbl) 723 return; 724 725 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 726 struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i]; 727 728 spin_lock(&nfb->nfb_lock); 729 hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) { 730 if (net && nf->nf_net != net) 731 continue; 732 del = nfsd_file_unhash_and_release_locked(nf, &dispose); 733 734 /* 735 * Deadlock detected! Something marked this entry as 736 * unhased, but hasn't removed it from the hash list. 737 */ 738 WARN_ON_ONCE(!del); 739 } 740 spin_unlock(&nfb->nfb_lock); 741 nfsd_file_dispose_list(&dispose); 742 } 743 } 744 745 static struct nfsd_fcache_disposal * 746 nfsd_alloc_fcache_disposal(void) 747 { 748 struct nfsd_fcache_disposal *l; 749 750 l = kmalloc(sizeof(*l), GFP_KERNEL); 751 if (!l) 752 return NULL; 753 INIT_WORK(&l->work, nfsd_file_delayed_close); 754 spin_lock_init(&l->lock); 755 INIT_LIST_HEAD(&l->freeme); 756 return l; 757 } 758 759 static void 760 nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l) 761 { 762 cancel_work_sync(&l->work); 763 nfsd_file_dispose_list(&l->freeme); 764 kfree(l); 765 } 766 767 static void 768 nfsd_free_fcache_disposal_net(struct net *net) 769 { 770 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 771 struct nfsd_fcache_disposal *l = nn->fcache_disposal; 772 773 nfsd_free_fcache_disposal(l); 774 } 775 776 int 777 nfsd_file_cache_start_net(struct net *net) 778 { 779 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 780 781 nn->fcache_disposal = nfsd_alloc_fcache_disposal(); 782 return nn->fcache_disposal ? 0 : -ENOMEM; 783 } 784 785 void 786 nfsd_file_cache_shutdown_net(struct net *net) 787 { 788 nfsd_file_cache_purge(net); 789 nfsd_free_fcache_disposal_net(net); 790 } 791 792 void 793 nfsd_file_cache_shutdown(void) 794 { 795 set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); 796 797 lease_unregister_notifier(&nfsd_file_lease_notifier); 798 unregister_shrinker(&nfsd_file_shrinker); 799 /* 800 * make sure all callers of nfsd_file_lru_cb are done before 801 * calling nfsd_file_cache_purge 802 */ 803 cancel_delayed_work_sync(&nfsd_filecache_laundrette); 804 nfsd_file_cache_purge(NULL); 805 list_lru_destroy(&nfsd_file_lru); 806 rcu_barrier(); 807 fsnotify_put_group(nfsd_file_fsnotify_group); 808 nfsd_file_fsnotify_group = NULL; 809 kmem_cache_destroy(nfsd_file_slab); 810 nfsd_file_slab = NULL; 811 fsnotify_wait_marks_destroyed(); 812 kmem_cache_destroy(nfsd_file_mark_slab); 813 nfsd_file_mark_slab = NULL; 814 kfree(nfsd_file_hashtbl); 815 nfsd_file_hashtbl = NULL; 816 destroy_workqueue(nfsd_filecache_wq); 817 nfsd_filecache_wq = NULL; 818 } 819 820 static bool 821 nfsd_match_cred(const struct cred *c1, const struct cred *c2) 822 { 823 int i; 824 825 if (!uid_eq(c1->fsuid, c2->fsuid)) 826 return false; 827 if (!gid_eq(c1->fsgid, c2->fsgid)) 828 return false; 829 if (c1->group_info == NULL || c2->group_info == NULL) 830 return c1->group_info == c2->group_info; 831 if (c1->group_info->ngroups != c2->group_info->ngroups) 832 return false; 833 for (i = 0; i < c1->group_info->ngroups; i++) { 834 if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) 835 return false; 836 } 837 return true; 838 } 839 840 static struct nfsd_file * 841 nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, 842 unsigned int hashval, struct net *net) 843 { 844 struct nfsd_file *nf; 845 unsigned char need = may_flags & NFSD_FILE_MAY_MASK; 846 847 hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, 848 nf_node, lockdep_is_held(&nfsd_file_hashtbl[hashval].nfb_lock)) { 849 if (nf->nf_may != need) 850 continue; 851 if (nf->nf_inode != inode) 852 continue; 853 if (nf->nf_net != net) 854 continue; 855 if (!nfsd_match_cred(nf->nf_cred, current_cred())) 856 continue; 857 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) 858 continue; 859 if (nfsd_file_get(nf) != NULL) 860 return nf; 861 } 862 return NULL; 863 } 864 865 /** 866 * nfsd_file_is_cached - are there any cached open files for this fh? 867 * @inode: inode of the file to check 868 * 869 * Scan the hashtable for open files that match this fh. Returns true if there 870 * are any, and false if not. 871 */ 872 bool 873 nfsd_file_is_cached(struct inode *inode) 874 { 875 bool ret = false; 876 struct nfsd_file *nf; 877 unsigned int hashval; 878 879 hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); 880 881 rcu_read_lock(); 882 hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, 883 nf_node) { 884 if (inode == nf->nf_inode) { 885 ret = true; 886 break; 887 } 888 } 889 rcu_read_unlock(); 890 trace_nfsd_file_is_cached(inode, hashval, (int)ret); 891 return ret; 892 } 893 894 __be32 895 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 896 unsigned int may_flags, struct nfsd_file **pnf) 897 { 898 __be32 status; 899 struct net *net = SVC_NET(rqstp); 900 struct nfsd_file *nf, *new; 901 struct inode *inode; 902 unsigned int hashval; 903 bool retry = true; 904 905 /* FIXME: skip this if fh_dentry is already set? */ 906 status = fh_verify(rqstp, fhp, S_IFREG, 907 may_flags|NFSD_MAY_OWNER_OVERRIDE); 908 if (status != nfs_ok) 909 return status; 910 911 inode = d_inode(fhp->fh_dentry); 912 hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); 913 retry: 914 rcu_read_lock(); 915 nf = nfsd_file_find_locked(inode, may_flags, hashval, net); 916 rcu_read_unlock(); 917 if (nf) 918 goto wait_for_construction; 919 920 new = nfsd_file_alloc(inode, may_flags, hashval, net); 921 if (!new) { 922 trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, 923 NULL, nfserr_jukebox); 924 return nfserr_jukebox; 925 } 926 927 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 928 nf = nfsd_file_find_locked(inode, may_flags, hashval, net); 929 if (nf == NULL) 930 goto open_file; 931 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 932 nfsd_file_slab_free(&new->nf_rcu); 933 934 wait_for_construction: 935 wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); 936 937 /* Did construction of this file fail? */ 938 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 939 if (!retry) { 940 status = nfserr_jukebox; 941 goto out; 942 } 943 retry = false; 944 nfsd_file_put_noref(nf); 945 goto retry; 946 } 947 948 this_cpu_inc(nfsd_file_cache_hits); 949 950 if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) { 951 bool write = (may_flags & NFSD_MAY_WRITE); 952 953 if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) || 954 (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) { 955 status = nfserrno(nfsd_open_break_lease( 956 file_inode(nf->nf_file), may_flags)); 957 if (status == nfs_ok) { 958 clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); 959 if (write) 960 clear_bit(NFSD_FILE_BREAK_WRITE, 961 &nf->nf_flags); 962 } 963 } 964 } 965 out: 966 if (status == nfs_ok) { 967 *pnf = nf; 968 } else { 969 nfsd_file_put(nf); 970 nf = NULL; 971 } 972 973 trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status); 974 return status; 975 open_file: 976 nf = new; 977 /* Take reference for the hashtable */ 978 refcount_inc(&nf->nf_ref); 979 __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); 980 __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); 981 list_lru_add(&nfsd_file_lru, &nf->nf_lru); 982 hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head); 983 ++nfsd_file_hashtbl[hashval].nfb_count; 984 nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, 985 nfsd_file_hashtbl[hashval].nfb_count); 986 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 987 if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD) 988 nfsd_file_gc(); 989 990 nf->nf_mark = nfsd_file_mark_find_or_create(nf); 991 if (nf->nf_mark) 992 status = nfsd_open_verified(rqstp, fhp, S_IFREG, 993 may_flags, &nf->nf_file); 994 else 995 status = nfserr_jukebox; 996 /* 997 * If construction failed, or we raced with a call to unlink() 998 * then unhash. 999 */ 1000 if (status != nfs_ok || inode->i_nlink == 0) { 1001 bool do_free; 1002 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 1003 do_free = nfsd_file_unhash(nf); 1004 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 1005 if (do_free) 1006 nfsd_file_put_noref(nf); 1007 } 1008 clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); 1009 smp_mb__after_atomic(); 1010 wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); 1011 goto out; 1012 } 1013 1014 /* 1015 * Note that fields may be added, removed or reordered in the future. Programs 1016 * scraping this file for info should test the labels to ensure they're 1017 * getting the correct field. 1018 */ 1019 static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) 1020 { 1021 unsigned int i, count = 0, longest = 0; 1022 unsigned long hits = 0; 1023 1024 /* 1025 * No need for spinlocks here since we're not terribly interested in 1026 * accuracy. We do take the nfsd_mutex simply to ensure that we 1027 * don't end up racing with server shutdown 1028 */ 1029 mutex_lock(&nfsd_mutex); 1030 if (nfsd_file_hashtbl) { 1031 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 1032 count += nfsd_file_hashtbl[i].nfb_count; 1033 longest = max(longest, nfsd_file_hashtbl[i].nfb_count); 1034 } 1035 } 1036 mutex_unlock(&nfsd_mutex); 1037 1038 for_each_possible_cpu(i) 1039 hits += per_cpu(nfsd_file_cache_hits, i); 1040 1041 seq_printf(m, "total entries: %u\n", count); 1042 seq_printf(m, "longest chain: %u\n", longest); 1043 seq_printf(m, "cache hits: %lu\n", hits); 1044 return 0; 1045 } 1046 1047 int nfsd_file_cache_stats_open(struct inode *inode, struct file *file) 1048 { 1049 return single_open(file, nfsd_file_cache_stats_show, NULL); 1050 } 1051