1 /* 2 * Open file cache. 3 * 4 * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com> 5 */ 6 7 #include <linux/hash.h> 8 #include <linux/slab.h> 9 #include <linux/file.h> 10 #include <linux/pagemap.h> 11 #include <linux/sched.h> 12 #include <linux/list_lru.h> 13 #include <linux/fsnotify_backend.h> 14 #include <linux/fsnotify.h> 15 #include <linux/seq_file.h> 16 17 #include "vfs.h" 18 #include "nfsd.h" 19 #include "nfsfh.h" 20 #include "netns.h" 21 #include "filecache.h" 22 #include "trace.h" 23 24 #define NFSDDBG_FACILITY NFSDDBG_FH 25 26 /* FIXME: dynamically size this for the machine somehow? */ 27 #define NFSD_FILE_HASH_BITS 12 28 #define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS) 29 #define NFSD_LAUNDRETTE_DELAY (2 * HZ) 30 31 #define NFSD_FILE_SHUTDOWN (1) 32 #define NFSD_FILE_LRU_THRESHOLD (4096UL) 33 #define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2) 34 35 /* We only care about NFSD_MAY_READ/WRITE for this cache */ 36 #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) 37 38 struct nfsd_fcache_bucket { 39 struct hlist_head nfb_head; 40 spinlock_t nfb_lock; 41 unsigned int nfb_count; 42 unsigned int nfb_maxcount; 43 }; 44 45 static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); 46 static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions); 47 48 struct nfsd_fcache_disposal { 49 struct work_struct work; 50 spinlock_t lock; 51 struct list_head freeme; 52 }; 53 54 static struct workqueue_struct *nfsd_filecache_wq __read_mostly; 55 56 static struct kmem_cache *nfsd_file_slab; 57 static struct kmem_cache *nfsd_file_mark_slab; 58 static struct nfsd_fcache_bucket *nfsd_file_hashtbl; 59 static struct list_lru nfsd_file_lru; 60 static long nfsd_file_lru_flags; 61 static struct fsnotify_group *nfsd_file_fsnotify_group; 62 static atomic_long_t nfsd_filecache_count; 63 static struct delayed_work nfsd_filecache_laundrette; 64 65 static void nfsd_file_gc(void); 66 67 static void 68 nfsd_file_schedule_laundrette(void) 69 { 70 long count = atomic_long_read(&nfsd_filecache_count); 71 72 if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags)) 73 return; 74 75 queue_delayed_work(system_wq, &nfsd_filecache_laundrette, 76 NFSD_LAUNDRETTE_DELAY); 77 } 78 79 static void 80 nfsd_file_slab_free(struct rcu_head *rcu) 81 { 82 struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu); 83 84 put_cred(nf->nf_cred); 85 kmem_cache_free(nfsd_file_slab, nf); 86 } 87 88 static void 89 nfsd_file_mark_free(struct fsnotify_mark *mark) 90 { 91 struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark, 92 nfm_mark); 93 94 kmem_cache_free(nfsd_file_mark_slab, nfm); 95 } 96 97 static struct nfsd_file_mark * 98 nfsd_file_mark_get(struct nfsd_file_mark *nfm) 99 { 100 if (!refcount_inc_not_zero(&nfm->nfm_ref)) 101 return NULL; 102 return nfm; 103 } 104 105 static void 106 nfsd_file_mark_put(struct nfsd_file_mark *nfm) 107 { 108 if (refcount_dec_and_test(&nfm->nfm_ref)) { 109 fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group); 110 fsnotify_put_mark(&nfm->nfm_mark); 111 } 112 } 113 114 static struct nfsd_file_mark * 115 nfsd_file_mark_find_or_create(struct nfsd_file *nf) 116 { 117 int err; 118 struct fsnotify_mark *mark; 119 struct nfsd_file_mark *nfm = NULL, *new; 120 struct inode *inode = nf->nf_inode; 121 122 do { 123 fsnotify_group_lock(nfsd_file_fsnotify_group); 124 mark = fsnotify_find_mark(&inode->i_fsnotify_marks, 125 nfsd_file_fsnotify_group); 126 if (mark) { 127 nfm = nfsd_file_mark_get(container_of(mark, 128 struct nfsd_file_mark, 129 nfm_mark)); 130 fsnotify_group_unlock(nfsd_file_fsnotify_group); 131 if (nfm) { 132 fsnotify_put_mark(mark); 133 break; 134 } 135 /* Avoid soft lockup race with nfsd_file_mark_put() */ 136 fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group); 137 fsnotify_put_mark(mark); 138 } else { 139 fsnotify_group_unlock(nfsd_file_fsnotify_group); 140 } 141 142 /* allocate a new nfm */ 143 new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL); 144 if (!new) 145 return NULL; 146 fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group); 147 new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF; 148 refcount_set(&new->nfm_ref, 1); 149 150 err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0); 151 152 /* 153 * If the add was successful, then return the object. 154 * Otherwise, we need to put the reference we hold on the 155 * nfm_mark. The fsnotify code will take a reference and put 156 * it on failure, so we can't just free it directly. It's also 157 * not safe to call fsnotify_destroy_mark on it as the 158 * mark->group will be NULL. Thus, we can't let the nfm_ref 159 * counter drive the destruction at this point. 160 */ 161 if (likely(!err)) 162 nfm = new; 163 else 164 fsnotify_put_mark(&new->nfm_mark); 165 } while (unlikely(err == -EEXIST)); 166 167 return nfm; 168 } 169 170 static struct nfsd_file * 171 nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, 172 struct net *net) 173 { 174 struct nfsd_file *nf; 175 176 nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); 177 if (nf) { 178 INIT_HLIST_NODE(&nf->nf_node); 179 INIT_LIST_HEAD(&nf->nf_lru); 180 nf->nf_file = NULL; 181 nf->nf_cred = get_current_cred(); 182 nf->nf_net = net; 183 nf->nf_flags = 0; 184 nf->nf_inode = inode; 185 nf->nf_hashval = hashval; 186 refcount_set(&nf->nf_ref, 1); 187 nf->nf_may = may & NFSD_FILE_MAY_MASK; 188 nf->nf_mark = NULL; 189 trace_nfsd_file_alloc(nf); 190 } 191 return nf; 192 } 193 194 static bool 195 nfsd_file_free(struct nfsd_file *nf) 196 { 197 bool flush = false; 198 199 trace_nfsd_file_put_final(nf); 200 if (nf->nf_mark) 201 nfsd_file_mark_put(nf->nf_mark); 202 if (nf->nf_file) { 203 get_file(nf->nf_file); 204 filp_close(nf->nf_file, NULL); 205 fput(nf->nf_file); 206 flush = true; 207 } 208 call_rcu(&nf->nf_rcu, nfsd_file_slab_free); 209 return flush; 210 } 211 212 static bool 213 nfsd_file_check_writeback(struct nfsd_file *nf) 214 { 215 struct file *file = nf->nf_file; 216 struct address_space *mapping; 217 218 if (!file || !(file->f_mode & FMODE_WRITE)) 219 return false; 220 mapping = file->f_mapping; 221 return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) || 222 mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); 223 } 224 225 static int 226 nfsd_file_check_write_error(struct nfsd_file *nf) 227 { 228 struct file *file = nf->nf_file; 229 230 if (!file || !(file->f_mode & FMODE_WRITE)) 231 return 0; 232 return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); 233 } 234 235 static void 236 nfsd_file_flush(struct nfsd_file *nf) 237 { 238 if (nf->nf_file && vfs_fsync(nf->nf_file, 1) != 0) 239 nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); 240 } 241 242 static void 243 nfsd_file_do_unhash(struct nfsd_file *nf) 244 { 245 lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 246 247 trace_nfsd_file_unhash(nf); 248 249 if (nfsd_file_check_write_error(nf)) 250 nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); 251 --nfsd_file_hashtbl[nf->nf_hashval].nfb_count; 252 hlist_del_rcu(&nf->nf_node); 253 atomic_long_dec(&nfsd_filecache_count); 254 } 255 256 static bool 257 nfsd_file_unhash(struct nfsd_file *nf) 258 { 259 if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 260 nfsd_file_do_unhash(nf); 261 if (!list_empty(&nf->nf_lru)) 262 list_lru_del(&nfsd_file_lru, &nf->nf_lru); 263 return true; 264 } 265 return false; 266 } 267 268 /* 269 * Return true if the file was unhashed. 270 */ 271 static bool 272 nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose) 273 { 274 lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 275 276 trace_nfsd_file_unhash_and_release_locked(nf); 277 if (!nfsd_file_unhash(nf)) 278 return false; 279 /* keep final reference for nfsd_file_lru_dispose */ 280 if (refcount_dec_not_one(&nf->nf_ref)) 281 return true; 282 283 list_add(&nf->nf_lru, dispose); 284 return true; 285 } 286 287 static void 288 nfsd_file_put_noref(struct nfsd_file *nf) 289 { 290 trace_nfsd_file_put(nf); 291 292 if (refcount_dec_and_test(&nf->nf_ref)) { 293 WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); 294 nfsd_file_free(nf); 295 } 296 } 297 298 void 299 nfsd_file_put(struct nfsd_file *nf) 300 { 301 might_sleep(); 302 303 set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); 304 if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) { 305 nfsd_file_flush(nf); 306 nfsd_file_put_noref(nf); 307 } else if (nf->nf_file) { 308 nfsd_file_put_noref(nf); 309 nfsd_file_schedule_laundrette(); 310 } else 311 nfsd_file_put_noref(nf); 312 313 if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) 314 nfsd_file_gc(); 315 } 316 317 struct nfsd_file * 318 nfsd_file_get(struct nfsd_file *nf) 319 { 320 if (likely(refcount_inc_not_zero(&nf->nf_ref))) 321 return nf; 322 return NULL; 323 } 324 325 static void 326 nfsd_file_dispose_list(struct list_head *dispose) 327 { 328 struct nfsd_file *nf; 329 330 while(!list_empty(dispose)) { 331 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 332 list_del(&nf->nf_lru); 333 nfsd_file_flush(nf); 334 nfsd_file_put_noref(nf); 335 } 336 } 337 338 static void 339 nfsd_file_dispose_list_sync(struct list_head *dispose) 340 { 341 bool flush = false; 342 struct nfsd_file *nf; 343 344 while(!list_empty(dispose)) { 345 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 346 list_del(&nf->nf_lru); 347 nfsd_file_flush(nf); 348 if (!refcount_dec_and_test(&nf->nf_ref)) 349 continue; 350 if (nfsd_file_free(nf)) 351 flush = true; 352 } 353 if (flush) 354 flush_delayed_fput(); 355 } 356 357 static void 358 nfsd_file_list_remove_disposal(struct list_head *dst, 359 struct nfsd_fcache_disposal *l) 360 { 361 spin_lock(&l->lock); 362 list_splice_init(&l->freeme, dst); 363 spin_unlock(&l->lock); 364 } 365 366 static void 367 nfsd_file_list_add_disposal(struct list_head *files, struct net *net) 368 { 369 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 370 struct nfsd_fcache_disposal *l = nn->fcache_disposal; 371 372 spin_lock(&l->lock); 373 list_splice_tail_init(files, &l->freeme); 374 spin_unlock(&l->lock); 375 queue_work(nfsd_filecache_wq, &l->work); 376 } 377 378 static void 379 nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src, 380 struct net *net) 381 { 382 struct nfsd_file *nf, *tmp; 383 384 list_for_each_entry_safe(nf, tmp, src, nf_lru) { 385 if (nf->nf_net == net) 386 list_move_tail(&nf->nf_lru, dst); 387 } 388 } 389 390 static void 391 nfsd_file_dispose_list_delayed(struct list_head *dispose) 392 { 393 LIST_HEAD(list); 394 struct nfsd_file *nf; 395 396 while(!list_empty(dispose)) { 397 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 398 nfsd_file_list_add_pernet(&list, dispose, nf->nf_net); 399 nfsd_file_list_add_disposal(&list, nf->nf_net); 400 } 401 } 402 403 /* 404 * Note this can deadlock with nfsd_file_cache_purge. 405 */ 406 static enum lru_status 407 nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, 408 spinlock_t *lock, void *arg) 409 __releases(lock) 410 __acquires(lock) 411 { 412 struct list_head *head = arg; 413 struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); 414 415 /* 416 * Do a lockless refcount check. The hashtable holds one reference, so 417 * we look to see if anything else has a reference, or if any have 418 * been put since the shrinker last ran. Those don't get unhashed and 419 * released. 420 * 421 * Note that in the put path, we set the flag and then decrement the 422 * counter. Here we check the counter and then test and clear the flag. 423 * That order is deliberate to ensure that we can do this locklessly. 424 */ 425 if (refcount_read(&nf->nf_ref) > 1) 426 goto out_skip; 427 428 /* 429 * Don't throw out files that are still undergoing I/O or 430 * that have uncleared errors pending. 431 */ 432 if (nfsd_file_check_writeback(nf)) 433 goto out_skip; 434 435 if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) 436 goto out_skip; 437 438 if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) 439 goto out_skip; 440 441 list_lru_isolate_move(lru, &nf->nf_lru, head); 442 return LRU_REMOVED; 443 out_skip: 444 return LRU_SKIP; 445 } 446 447 static unsigned long 448 nfsd_file_lru_walk_list(struct shrink_control *sc) 449 { 450 LIST_HEAD(head); 451 struct nfsd_file *nf; 452 unsigned long ret; 453 454 if (sc) 455 ret = list_lru_shrink_walk(&nfsd_file_lru, sc, 456 nfsd_file_lru_cb, &head); 457 else 458 ret = list_lru_walk(&nfsd_file_lru, 459 nfsd_file_lru_cb, 460 &head, LONG_MAX); 461 list_for_each_entry(nf, &head, nf_lru) { 462 spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 463 nfsd_file_do_unhash(nf); 464 spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 465 } 466 nfsd_file_dispose_list_delayed(&head); 467 return ret; 468 } 469 470 static void 471 nfsd_file_gc(void) 472 { 473 nfsd_file_lru_walk_list(NULL); 474 } 475 476 static void 477 nfsd_file_gc_worker(struct work_struct *work) 478 { 479 nfsd_file_gc(); 480 nfsd_file_schedule_laundrette(); 481 } 482 483 static unsigned long 484 nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) 485 { 486 return list_lru_count(&nfsd_file_lru); 487 } 488 489 static unsigned long 490 nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) 491 { 492 return nfsd_file_lru_walk_list(sc); 493 } 494 495 static struct shrinker nfsd_file_shrinker = { 496 .scan_objects = nfsd_file_lru_scan, 497 .count_objects = nfsd_file_lru_count, 498 .seeks = 1, 499 }; 500 501 static void 502 __nfsd_file_close_inode(struct inode *inode, unsigned int hashval, 503 struct list_head *dispose) 504 { 505 struct nfsd_file *nf; 506 struct hlist_node *tmp; 507 508 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 509 hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) { 510 if (inode == nf->nf_inode) 511 nfsd_file_unhash_and_release_locked(nf, dispose); 512 } 513 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 514 } 515 516 /** 517 * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file 518 * @inode: inode of the file to attempt to remove 519 * 520 * Walk the whole hash bucket, looking for any files that correspond to "inode". 521 * If any do, then unhash them and put the hashtable reference to them and 522 * destroy any that had their last reference put. Also ensure that any of the 523 * fputs also have their final __fput done as well. 524 */ 525 void 526 nfsd_file_close_inode_sync(struct inode *inode) 527 { 528 unsigned int hashval = (unsigned int)hash_long(inode->i_ino, 529 NFSD_FILE_HASH_BITS); 530 LIST_HEAD(dispose); 531 532 __nfsd_file_close_inode(inode, hashval, &dispose); 533 trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose)); 534 nfsd_file_dispose_list_sync(&dispose); 535 } 536 537 /** 538 * nfsd_file_close_inode - attempt a delayed close of a nfsd_file 539 * @inode: inode of the file to attempt to remove 540 * 541 * Walk the whole hash bucket, looking for any files that correspond to "inode". 542 * If any do, then unhash them and put the hashtable reference to them and 543 * destroy any that had their last reference put. 544 */ 545 static void 546 nfsd_file_close_inode(struct inode *inode) 547 { 548 unsigned int hashval = (unsigned int)hash_long(inode->i_ino, 549 NFSD_FILE_HASH_BITS); 550 LIST_HEAD(dispose); 551 552 __nfsd_file_close_inode(inode, hashval, &dispose); 553 trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose)); 554 nfsd_file_dispose_list_delayed(&dispose); 555 } 556 557 /** 558 * nfsd_file_delayed_close - close unused nfsd_files 559 * @work: dummy 560 * 561 * Walk the LRU list and close any entries that have not been used since 562 * the last scan. 563 * 564 * Note this can deadlock with nfsd_file_cache_purge. 565 */ 566 static void 567 nfsd_file_delayed_close(struct work_struct *work) 568 { 569 LIST_HEAD(head); 570 struct nfsd_fcache_disposal *l = container_of(work, 571 struct nfsd_fcache_disposal, work); 572 573 nfsd_file_list_remove_disposal(&head, l); 574 nfsd_file_dispose_list(&head); 575 } 576 577 static int 578 nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, 579 void *data) 580 { 581 struct file_lock *fl = data; 582 583 /* Only close files for F_SETLEASE leases */ 584 if (fl->fl_flags & FL_LEASE) 585 nfsd_file_close_inode_sync(file_inode(fl->fl_file)); 586 return 0; 587 } 588 589 static struct notifier_block nfsd_file_lease_notifier = { 590 .notifier_call = nfsd_file_lease_notifier_call, 591 }; 592 593 static int 594 nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask, 595 struct inode *inode, struct inode *dir, 596 const struct qstr *name, u32 cookie) 597 { 598 if (WARN_ON_ONCE(!inode)) 599 return 0; 600 601 trace_nfsd_file_fsnotify_handle_event(inode, mask); 602 603 /* Should be no marks on non-regular files */ 604 if (!S_ISREG(inode->i_mode)) { 605 WARN_ON_ONCE(1); 606 return 0; 607 } 608 609 /* don't close files if this was not the last link */ 610 if (mask & FS_ATTRIB) { 611 if (inode->i_nlink) 612 return 0; 613 } 614 615 nfsd_file_close_inode(inode); 616 return 0; 617 } 618 619 620 static const struct fsnotify_ops nfsd_file_fsnotify_ops = { 621 .handle_inode_event = nfsd_file_fsnotify_handle_event, 622 .free_mark = nfsd_file_mark_free, 623 }; 624 625 int 626 nfsd_file_cache_init(void) 627 { 628 int ret = -ENOMEM; 629 unsigned int i; 630 631 clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); 632 633 if (nfsd_file_hashtbl) 634 return 0; 635 636 nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0); 637 if (!nfsd_filecache_wq) 638 goto out; 639 640 nfsd_file_hashtbl = kvcalloc(NFSD_FILE_HASH_SIZE, 641 sizeof(*nfsd_file_hashtbl), GFP_KERNEL); 642 if (!nfsd_file_hashtbl) { 643 pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n"); 644 goto out_err; 645 } 646 647 nfsd_file_slab = kmem_cache_create("nfsd_file", 648 sizeof(struct nfsd_file), 0, 0, NULL); 649 if (!nfsd_file_slab) { 650 pr_err("nfsd: unable to create nfsd_file_slab\n"); 651 goto out_err; 652 } 653 654 nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark", 655 sizeof(struct nfsd_file_mark), 0, 0, NULL); 656 if (!nfsd_file_mark_slab) { 657 pr_err("nfsd: unable to create nfsd_file_mark_slab\n"); 658 goto out_err; 659 } 660 661 662 ret = list_lru_init(&nfsd_file_lru); 663 if (ret) { 664 pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret); 665 goto out_err; 666 } 667 668 ret = register_shrinker(&nfsd_file_shrinker); 669 if (ret) { 670 pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret); 671 goto out_lru; 672 } 673 674 ret = lease_register_notifier(&nfsd_file_lease_notifier); 675 if (ret) { 676 pr_err("nfsd: unable to register lease notifier: %d\n", ret); 677 goto out_shrinker; 678 } 679 680 nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops, 681 FSNOTIFY_GROUP_NOFS); 682 if (IS_ERR(nfsd_file_fsnotify_group)) { 683 pr_err("nfsd: unable to create fsnotify group: %ld\n", 684 PTR_ERR(nfsd_file_fsnotify_group)); 685 ret = PTR_ERR(nfsd_file_fsnotify_group); 686 nfsd_file_fsnotify_group = NULL; 687 goto out_notifier; 688 } 689 690 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 691 INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head); 692 spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock); 693 } 694 695 INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker); 696 out: 697 return ret; 698 out_notifier: 699 lease_unregister_notifier(&nfsd_file_lease_notifier); 700 out_shrinker: 701 unregister_shrinker(&nfsd_file_shrinker); 702 out_lru: 703 list_lru_destroy(&nfsd_file_lru); 704 out_err: 705 kmem_cache_destroy(nfsd_file_slab); 706 nfsd_file_slab = NULL; 707 kmem_cache_destroy(nfsd_file_mark_slab); 708 nfsd_file_mark_slab = NULL; 709 kvfree(nfsd_file_hashtbl); 710 nfsd_file_hashtbl = NULL; 711 destroy_workqueue(nfsd_filecache_wq); 712 nfsd_filecache_wq = NULL; 713 goto out; 714 } 715 716 /* 717 * Note this can deadlock with nfsd_file_lru_cb. 718 */ 719 void 720 nfsd_file_cache_purge(struct net *net) 721 { 722 unsigned int i; 723 struct nfsd_file *nf; 724 struct hlist_node *next; 725 LIST_HEAD(dispose); 726 bool del; 727 728 if (!nfsd_file_hashtbl) 729 return; 730 731 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 732 struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i]; 733 734 spin_lock(&nfb->nfb_lock); 735 hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) { 736 if (net && nf->nf_net != net) 737 continue; 738 del = nfsd_file_unhash_and_release_locked(nf, &dispose); 739 740 /* 741 * Deadlock detected! Something marked this entry as 742 * unhased, but hasn't removed it from the hash list. 743 */ 744 WARN_ON_ONCE(!del); 745 } 746 spin_unlock(&nfb->nfb_lock); 747 nfsd_file_dispose_list(&dispose); 748 } 749 } 750 751 static struct nfsd_fcache_disposal * 752 nfsd_alloc_fcache_disposal(void) 753 { 754 struct nfsd_fcache_disposal *l; 755 756 l = kmalloc(sizeof(*l), GFP_KERNEL); 757 if (!l) 758 return NULL; 759 INIT_WORK(&l->work, nfsd_file_delayed_close); 760 spin_lock_init(&l->lock); 761 INIT_LIST_HEAD(&l->freeme); 762 return l; 763 } 764 765 static void 766 nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l) 767 { 768 cancel_work_sync(&l->work); 769 nfsd_file_dispose_list(&l->freeme); 770 kfree(l); 771 } 772 773 static void 774 nfsd_free_fcache_disposal_net(struct net *net) 775 { 776 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 777 struct nfsd_fcache_disposal *l = nn->fcache_disposal; 778 779 nfsd_free_fcache_disposal(l); 780 } 781 782 int 783 nfsd_file_cache_start_net(struct net *net) 784 { 785 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 786 787 nn->fcache_disposal = nfsd_alloc_fcache_disposal(); 788 return nn->fcache_disposal ? 0 : -ENOMEM; 789 } 790 791 void 792 nfsd_file_cache_shutdown_net(struct net *net) 793 { 794 nfsd_file_cache_purge(net); 795 nfsd_free_fcache_disposal_net(net); 796 } 797 798 void 799 nfsd_file_cache_shutdown(void) 800 { 801 set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); 802 803 lease_unregister_notifier(&nfsd_file_lease_notifier); 804 unregister_shrinker(&nfsd_file_shrinker); 805 /* 806 * make sure all callers of nfsd_file_lru_cb are done before 807 * calling nfsd_file_cache_purge 808 */ 809 cancel_delayed_work_sync(&nfsd_filecache_laundrette); 810 nfsd_file_cache_purge(NULL); 811 list_lru_destroy(&nfsd_file_lru); 812 rcu_barrier(); 813 fsnotify_put_group(nfsd_file_fsnotify_group); 814 nfsd_file_fsnotify_group = NULL; 815 kmem_cache_destroy(nfsd_file_slab); 816 nfsd_file_slab = NULL; 817 fsnotify_wait_marks_destroyed(); 818 kmem_cache_destroy(nfsd_file_mark_slab); 819 nfsd_file_mark_slab = NULL; 820 kvfree(nfsd_file_hashtbl); 821 nfsd_file_hashtbl = NULL; 822 destroy_workqueue(nfsd_filecache_wq); 823 nfsd_filecache_wq = NULL; 824 } 825 826 static bool 827 nfsd_match_cred(const struct cred *c1, const struct cred *c2) 828 { 829 int i; 830 831 if (!uid_eq(c1->fsuid, c2->fsuid)) 832 return false; 833 if (!gid_eq(c1->fsgid, c2->fsgid)) 834 return false; 835 if (c1->group_info == NULL || c2->group_info == NULL) 836 return c1->group_info == c2->group_info; 837 if (c1->group_info->ngroups != c2->group_info->ngroups) 838 return false; 839 for (i = 0; i < c1->group_info->ngroups; i++) { 840 if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) 841 return false; 842 } 843 return true; 844 } 845 846 static struct nfsd_file * 847 nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, 848 unsigned int hashval, struct net *net) 849 { 850 struct nfsd_file *nf; 851 unsigned char need = may_flags & NFSD_FILE_MAY_MASK; 852 853 hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, 854 nf_node, lockdep_is_held(&nfsd_file_hashtbl[hashval].nfb_lock)) { 855 if (nf->nf_may != need) 856 continue; 857 if (nf->nf_inode != inode) 858 continue; 859 if (nf->nf_net != net) 860 continue; 861 if (!nfsd_match_cred(nf->nf_cred, current_cred())) 862 continue; 863 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) 864 continue; 865 if (nfsd_file_get(nf) != NULL) 866 return nf; 867 } 868 return NULL; 869 } 870 871 /** 872 * nfsd_file_is_cached - are there any cached open files for this fh? 873 * @inode: inode of the file to check 874 * 875 * Scan the hashtable for open files that match this fh. Returns true if there 876 * are any, and false if not. 877 */ 878 bool 879 nfsd_file_is_cached(struct inode *inode) 880 { 881 bool ret = false; 882 struct nfsd_file *nf; 883 unsigned int hashval; 884 885 hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); 886 887 rcu_read_lock(); 888 hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, 889 nf_node) { 890 if (inode == nf->nf_inode) { 891 ret = true; 892 break; 893 } 894 } 895 rcu_read_unlock(); 896 trace_nfsd_file_is_cached(inode, hashval, (int)ret); 897 return ret; 898 } 899 900 static __be32 901 nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 902 unsigned int may_flags, struct nfsd_file **pnf, bool open) 903 { 904 __be32 status; 905 struct net *net = SVC_NET(rqstp); 906 struct nfsd_file *nf, *new; 907 struct inode *inode; 908 unsigned int hashval; 909 bool retry = true; 910 911 /* FIXME: skip this if fh_dentry is already set? */ 912 status = fh_verify(rqstp, fhp, S_IFREG, 913 may_flags|NFSD_MAY_OWNER_OVERRIDE); 914 if (status != nfs_ok) 915 return status; 916 917 inode = d_inode(fhp->fh_dentry); 918 hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); 919 retry: 920 rcu_read_lock(); 921 nf = nfsd_file_find_locked(inode, may_flags, hashval, net); 922 rcu_read_unlock(); 923 if (nf) 924 goto wait_for_construction; 925 926 new = nfsd_file_alloc(inode, may_flags, hashval, net); 927 if (!new) { 928 trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, 929 NULL, nfserr_jukebox); 930 return nfserr_jukebox; 931 } 932 933 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 934 nf = nfsd_file_find_locked(inode, may_flags, hashval, net); 935 if (nf == NULL) 936 goto open_file; 937 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 938 nfsd_file_slab_free(&new->nf_rcu); 939 940 wait_for_construction: 941 wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); 942 943 /* Did construction of this file fail? */ 944 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 945 if (!retry) { 946 status = nfserr_jukebox; 947 goto out; 948 } 949 retry = false; 950 nfsd_file_put_noref(nf); 951 goto retry; 952 } 953 954 this_cpu_inc(nfsd_file_cache_hits); 955 956 status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); 957 out: 958 if (status == nfs_ok) { 959 if (open) 960 this_cpu_inc(nfsd_file_acquisitions); 961 *pnf = nf; 962 } else { 963 nfsd_file_put(nf); 964 nf = NULL; 965 } 966 967 trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status); 968 return status; 969 open_file: 970 nf = new; 971 /* Take reference for the hashtable */ 972 refcount_inc(&nf->nf_ref); 973 __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); 974 __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); 975 list_lru_add(&nfsd_file_lru, &nf->nf_lru); 976 hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head); 977 ++nfsd_file_hashtbl[hashval].nfb_count; 978 nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, 979 nfsd_file_hashtbl[hashval].nfb_count); 980 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 981 if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD) 982 nfsd_file_gc(); 983 984 nf->nf_mark = nfsd_file_mark_find_or_create(nf); 985 if (nf->nf_mark) { 986 if (open) { 987 status = nfsd_open_verified(rqstp, fhp, may_flags, 988 &nf->nf_file); 989 trace_nfsd_file_open(nf, status); 990 } else 991 status = nfs_ok; 992 } else 993 status = nfserr_jukebox; 994 /* 995 * If construction failed, or we raced with a call to unlink() 996 * then unhash. 997 */ 998 if (status != nfs_ok || inode->i_nlink == 0) { 999 bool do_free; 1000 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 1001 do_free = nfsd_file_unhash(nf); 1002 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 1003 if (do_free) 1004 nfsd_file_put_noref(nf); 1005 } 1006 clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); 1007 smp_mb__after_atomic(); 1008 wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); 1009 goto out; 1010 } 1011 1012 /** 1013 * nfsd_file_acquire - Get a struct nfsd_file with an open file 1014 * @rqstp: the RPC transaction being executed 1015 * @fhp: the NFS filehandle of the file to be opened 1016 * @may_flags: NFSD_MAY_ settings for the file 1017 * @pnf: OUT: new or found "struct nfsd_file" object 1018 * 1019 * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in 1020 * network byte order is returned. 1021 */ 1022 __be32 1023 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 1024 unsigned int may_flags, struct nfsd_file **pnf) 1025 { 1026 return nfsd_do_file_acquire(rqstp, fhp, may_flags, pnf, true); 1027 } 1028 1029 /** 1030 * nfsd_file_create - Get a struct nfsd_file, do not open 1031 * @rqstp: the RPC transaction being executed 1032 * @fhp: the NFS filehandle of the file just created 1033 * @may_flags: NFSD_MAY_ settings for the file 1034 * @pnf: OUT: new or found "struct nfsd_file" object 1035 * 1036 * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in 1037 * network byte order is returned. 1038 */ 1039 __be32 1040 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, 1041 unsigned int may_flags, struct nfsd_file **pnf) 1042 { 1043 return nfsd_do_file_acquire(rqstp, fhp, may_flags, pnf, false); 1044 } 1045 1046 /* 1047 * Note that fields may be added, removed or reordered in the future. Programs 1048 * scraping this file for info should test the labels to ensure they're 1049 * getting the correct field. 1050 */ 1051 static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) 1052 { 1053 unsigned long hits = 0, acquisitions = 0; 1054 unsigned int i, count = 0, longest = 0; 1055 unsigned long lru = 0; 1056 1057 /* 1058 * No need for spinlocks here since we're not terribly interested in 1059 * accuracy. We do take the nfsd_mutex simply to ensure that we 1060 * don't end up racing with server shutdown 1061 */ 1062 mutex_lock(&nfsd_mutex); 1063 if (nfsd_file_hashtbl) { 1064 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 1065 count += nfsd_file_hashtbl[i].nfb_count; 1066 longest = max(longest, nfsd_file_hashtbl[i].nfb_count); 1067 } 1068 lru = list_lru_count(&nfsd_file_lru); 1069 } 1070 mutex_unlock(&nfsd_mutex); 1071 1072 for_each_possible_cpu(i) { 1073 hits += per_cpu(nfsd_file_cache_hits, i); 1074 acquisitions += per_cpu(nfsd_file_acquisitions, i); 1075 } 1076 1077 seq_printf(m, "total entries: %u\n", count); 1078 seq_printf(m, "longest chain: %u\n", longest); 1079 seq_printf(m, "lru entries: %lu\n", lru); 1080 seq_printf(m, "cache hits: %lu\n", hits); 1081 seq_printf(m, "acquisitions: %lu\n", acquisitions); 1082 return 0; 1083 } 1084 1085 int nfsd_file_cache_stats_open(struct inode *inode, struct file *file) 1086 { 1087 return single_open(file, nfsd_file_cache_stats_show, NULL); 1088 } 1089