1 /* 2 * Open file cache. 3 * 4 * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com> 5 */ 6 7 #include <linux/hash.h> 8 #include <linux/slab.h> 9 #include <linux/file.h> 10 #include <linux/sched.h> 11 #include <linux/list_lru.h> 12 #include <linux/fsnotify_backend.h> 13 #include <linux/fsnotify.h> 14 #include <linux/seq_file.h> 15 16 #include "vfs.h" 17 #include "nfsd.h" 18 #include "nfsfh.h" 19 #include "netns.h" 20 #include "filecache.h" 21 #include "trace.h" 22 23 #define NFSDDBG_FACILITY NFSDDBG_FH 24 25 /* FIXME: dynamically size this for the machine somehow? */ 26 #define NFSD_FILE_HASH_BITS 12 27 #define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS) 28 #define NFSD_LAUNDRETTE_DELAY (2 * HZ) 29 30 #define NFSD_FILE_SHUTDOWN (1) 31 #define NFSD_FILE_LRU_THRESHOLD (4096UL) 32 #define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2) 33 34 /* We only care about NFSD_MAY_READ/WRITE for this cache */ 35 #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) 36 37 struct nfsd_fcache_bucket { 38 struct hlist_head nfb_head; 39 spinlock_t nfb_lock; 40 unsigned int nfb_count; 41 unsigned int nfb_maxcount; 42 }; 43 44 static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); 45 46 struct nfsd_fcache_disposal { 47 struct list_head list; 48 struct work_struct work; 49 struct net *net; 50 spinlock_t lock; 51 struct list_head freeme; 52 struct rcu_head rcu; 53 }; 54 55 static struct workqueue_struct *nfsd_filecache_wq __read_mostly; 56 57 static struct kmem_cache *nfsd_file_slab; 58 static struct kmem_cache *nfsd_file_mark_slab; 59 static struct nfsd_fcache_bucket *nfsd_file_hashtbl; 60 static struct list_lru nfsd_file_lru; 61 static long nfsd_file_lru_flags; 62 static struct fsnotify_group *nfsd_file_fsnotify_group; 63 static atomic_long_t nfsd_filecache_count; 64 static struct delayed_work nfsd_filecache_laundrette; 65 static DEFINE_SPINLOCK(laundrette_lock); 66 static LIST_HEAD(laundrettes); 67 68 static void nfsd_file_gc(void); 69 70 static void 71 nfsd_file_schedule_laundrette(void) 72 { 73 long count = atomic_long_read(&nfsd_filecache_count); 74 75 if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags)) 76 return; 77 78 queue_delayed_work(system_wq, &nfsd_filecache_laundrette, 79 NFSD_LAUNDRETTE_DELAY); 80 } 81 82 static void 83 nfsd_file_slab_free(struct rcu_head *rcu) 84 { 85 struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu); 86 87 put_cred(nf->nf_cred); 88 kmem_cache_free(nfsd_file_slab, nf); 89 } 90 91 static void 92 nfsd_file_mark_free(struct fsnotify_mark *mark) 93 { 94 struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark, 95 nfm_mark); 96 97 kmem_cache_free(nfsd_file_mark_slab, nfm); 98 } 99 100 static struct nfsd_file_mark * 101 nfsd_file_mark_get(struct nfsd_file_mark *nfm) 102 { 103 if (!refcount_inc_not_zero(&nfm->nfm_ref)) 104 return NULL; 105 return nfm; 106 } 107 108 static void 109 nfsd_file_mark_put(struct nfsd_file_mark *nfm) 110 { 111 if (refcount_dec_and_test(&nfm->nfm_ref)) { 112 fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group); 113 fsnotify_put_mark(&nfm->nfm_mark); 114 } 115 } 116 117 static struct nfsd_file_mark * 118 nfsd_file_mark_find_or_create(struct nfsd_file *nf) 119 { 120 int err; 121 struct fsnotify_mark *mark; 122 struct nfsd_file_mark *nfm = NULL, *new; 123 struct inode *inode = nf->nf_inode; 124 125 do { 126 mutex_lock(&nfsd_file_fsnotify_group->mark_mutex); 127 mark = fsnotify_find_mark(&inode->i_fsnotify_marks, 128 nfsd_file_fsnotify_group); 129 if (mark) { 130 nfm = nfsd_file_mark_get(container_of(mark, 131 struct nfsd_file_mark, 132 nfm_mark)); 133 mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); 134 if (nfm) { 135 fsnotify_put_mark(mark); 136 break; 137 } 138 /* Avoid soft lockup race with nfsd_file_mark_put() */ 139 fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group); 140 fsnotify_put_mark(mark); 141 } else 142 mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); 143 144 /* allocate a new nfm */ 145 new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL); 146 if (!new) 147 return NULL; 148 fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group); 149 new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF; 150 refcount_set(&new->nfm_ref, 1); 151 152 err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0); 153 154 /* 155 * If the add was successful, then return the object. 156 * Otherwise, we need to put the reference we hold on the 157 * nfm_mark. The fsnotify code will take a reference and put 158 * it on failure, so we can't just free it directly. It's also 159 * not safe to call fsnotify_destroy_mark on it as the 160 * mark->group will be NULL. Thus, we can't let the nfm_ref 161 * counter drive the destruction at this point. 162 */ 163 if (likely(!err)) 164 nfm = new; 165 else 166 fsnotify_put_mark(&new->nfm_mark); 167 } while (unlikely(err == -EEXIST)); 168 169 return nfm; 170 } 171 172 static struct nfsd_file * 173 nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, 174 struct net *net) 175 { 176 struct nfsd_file *nf; 177 178 nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); 179 if (nf) { 180 INIT_HLIST_NODE(&nf->nf_node); 181 INIT_LIST_HEAD(&nf->nf_lru); 182 nf->nf_file = NULL; 183 nf->nf_cred = get_current_cred(); 184 nf->nf_net = net; 185 nf->nf_flags = 0; 186 nf->nf_inode = inode; 187 nf->nf_hashval = hashval; 188 refcount_set(&nf->nf_ref, 1); 189 nf->nf_may = may & NFSD_FILE_MAY_MASK; 190 if (may & NFSD_MAY_NOT_BREAK_LEASE) { 191 if (may & NFSD_MAY_WRITE) 192 __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags); 193 if (may & NFSD_MAY_READ) 194 __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); 195 } 196 nf->nf_mark = NULL; 197 init_rwsem(&nf->nf_rwsem); 198 trace_nfsd_file_alloc(nf); 199 } 200 return nf; 201 } 202 203 static bool 204 nfsd_file_free(struct nfsd_file *nf) 205 { 206 bool flush = false; 207 208 trace_nfsd_file_put_final(nf); 209 if (nf->nf_mark) 210 nfsd_file_mark_put(nf->nf_mark); 211 if (nf->nf_file) { 212 get_file(nf->nf_file); 213 filp_close(nf->nf_file, NULL); 214 fput(nf->nf_file); 215 flush = true; 216 } 217 call_rcu(&nf->nf_rcu, nfsd_file_slab_free); 218 return flush; 219 } 220 221 static bool 222 nfsd_file_check_writeback(struct nfsd_file *nf) 223 { 224 struct file *file = nf->nf_file; 225 struct address_space *mapping; 226 227 if (!file || !(file->f_mode & FMODE_WRITE)) 228 return false; 229 mapping = file->f_mapping; 230 return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) || 231 mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); 232 } 233 234 static int 235 nfsd_file_check_write_error(struct nfsd_file *nf) 236 { 237 struct file *file = nf->nf_file; 238 239 if (!file || !(file->f_mode & FMODE_WRITE)) 240 return 0; 241 return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); 242 } 243 244 static void 245 nfsd_file_do_unhash(struct nfsd_file *nf) 246 { 247 lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 248 249 trace_nfsd_file_unhash(nf); 250 251 if (nfsd_file_check_write_error(nf)) 252 nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id)); 253 --nfsd_file_hashtbl[nf->nf_hashval].nfb_count; 254 hlist_del_rcu(&nf->nf_node); 255 atomic_long_dec(&nfsd_filecache_count); 256 } 257 258 static bool 259 nfsd_file_unhash(struct nfsd_file *nf) 260 { 261 if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 262 nfsd_file_do_unhash(nf); 263 if (!list_empty(&nf->nf_lru)) 264 list_lru_del(&nfsd_file_lru, &nf->nf_lru); 265 return true; 266 } 267 return false; 268 } 269 270 /* 271 * Return true if the file was unhashed. 272 */ 273 static bool 274 nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose) 275 { 276 lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 277 278 trace_nfsd_file_unhash_and_release_locked(nf); 279 if (!nfsd_file_unhash(nf)) 280 return false; 281 /* keep final reference for nfsd_file_lru_dispose */ 282 if (refcount_dec_not_one(&nf->nf_ref)) 283 return true; 284 285 list_add(&nf->nf_lru, dispose); 286 return true; 287 } 288 289 static void 290 nfsd_file_put_noref(struct nfsd_file *nf) 291 { 292 trace_nfsd_file_put(nf); 293 294 if (refcount_dec_and_test(&nf->nf_ref)) { 295 WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); 296 nfsd_file_free(nf); 297 } 298 } 299 300 void 301 nfsd_file_put(struct nfsd_file *nf) 302 { 303 bool is_hashed; 304 305 set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); 306 if (refcount_read(&nf->nf_ref) > 2 || !nf->nf_file) { 307 nfsd_file_put_noref(nf); 308 return; 309 } 310 311 filemap_flush(nf->nf_file->f_mapping); 312 is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0; 313 nfsd_file_put_noref(nf); 314 if (is_hashed) 315 nfsd_file_schedule_laundrette(); 316 if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) 317 nfsd_file_gc(); 318 } 319 320 struct nfsd_file * 321 nfsd_file_get(struct nfsd_file *nf) 322 { 323 if (likely(refcount_inc_not_zero(&nf->nf_ref))) 324 return nf; 325 return NULL; 326 } 327 328 static void 329 nfsd_file_dispose_list(struct list_head *dispose) 330 { 331 struct nfsd_file *nf; 332 333 while(!list_empty(dispose)) { 334 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 335 list_del(&nf->nf_lru); 336 nfsd_file_put_noref(nf); 337 } 338 } 339 340 static void 341 nfsd_file_dispose_list_sync(struct list_head *dispose) 342 { 343 bool flush = false; 344 struct nfsd_file *nf; 345 346 while(!list_empty(dispose)) { 347 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 348 list_del(&nf->nf_lru); 349 if (!refcount_dec_and_test(&nf->nf_ref)) 350 continue; 351 if (nfsd_file_free(nf)) 352 flush = true; 353 } 354 if (flush) 355 flush_delayed_fput(); 356 } 357 358 static void 359 nfsd_file_list_remove_disposal(struct list_head *dst, 360 struct nfsd_fcache_disposal *l) 361 { 362 spin_lock(&l->lock); 363 list_splice_init(&l->freeme, dst); 364 spin_unlock(&l->lock); 365 } 366 367 static void 368 nfsd_file_list_add_disposal(struct list_head *files, struct net *net) 369 { 370 struct nfsd_fcache_disposal *l; 371 372 rcu_read_lock(); 373 list_for_each_entry_rcu(l, &laundrettes, list) { 374 if (l->net == net) { 375 spin_lock(&l->lock); 376 list_splice_tail_init(files, &l->freeme); 377 spin_unlock(&l->lock); 378 queue_work(nfsd_filecache_wq, &l->work); 379 break; 380 } 381 } 382 rcu_read_unlock(); 383 } 384 385 static void 386 nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src, 387 struct net *net) 388 { 389 struct nfsd_file *nf, *tmp; 390 391 list_for_each_entry_safe(nf, tmp, src, nf_lru) { 392 if (nf->nf_net == net) 393 list_move_tail(&nf->nf_lru, dst); 394 } 395 } 396 397 static void 398 nfsd_file_dispose_list_delayed(struct list_head *dispose) 399 { 400 LIST_HEAD(list); 401 struct nfsd_file *nf; 402 403 while(!list_empty(dispose)) { 404 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 405 nfsd_file_list_add_pernet(&list, dispose, nf->nf_net); 406 nfsd_file_list_add_disposal(&list, nf->nf_net); 407 } 408 } 409 410 /* 411 * Note this can deadlock with nfsd_file_cache_purge. 412 */ 413 static enum lru_status 414 nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, 415 spinlock_t *lock, void *arg) 416 __releases(lock) 417 __acquires(lock) 418 { 419 struct list_head *head = arg; 420 struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); 421 422 /* 423 * Do a lockless refcount check. The hashtable holds one reference, so 424 * we look to see if anything else has a reference, or if any have 425 * been put since the shrinker last ran. Those don't get unhashed and 426 * released. 427 * 428 * Note that in the put path, we set the flag and then decrement the 429 * counter. Here we check the counter and then test and clear the flag. 430 * That order is deliberate to ensure that we can do this locklessly. 431 */ 432 if (refcount_read(&nf->nf_ref) > 1) 433 goto out_skip; 434 435 /* 436 * Don't throw out files that are still undergoing I/O or 437 * that have uncleared errors pending. 438 */ 439 if (nfsd_file_check_writeback(nf)) 440 goto out_skip; 441 442 if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) 443 goto out_skip; 444 445 if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) 446 goto out_skip; 447 448 list_lru_isolate_move(lru, &nf->nf_lru, head); 449 return LRU_REMOVED; 450 out_skip: 451 return LRU_SKIP; 452 } 453 454 static unsigned long 455 nfsd_file_lru_walk_list(struct shrink_control *sc) 456 { 457 LIST_HEAD(head); 458 struct nfsd_file *nf; 459 unsigned long ret; 460 461 if (sc) 462 ret = list_lru_shrink_walk(&nfsd_file_lru, sc, 463 nfsd_file_lru_cb, &head); 464 else 465 ret = list_lru_walk(&nfsd_file_lru, 466 nfsd_file_lru_cb, 467 &head, LONG_MAX); 468 list_for_each_entry(nf, &head, nf_lru) { 469 spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 470 nfsd_file_do_unhash(nf); 471 spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 472 } 473 nfsd_file_dispose_list_delayed(&head); 474 return ret; 475 } 476 477 static void 478 nfsd_file_gc(void) 479 { 480 nfsd_file_lru_walk_list(NULL); 481 } 482 483 static void 484 nfsd_file_gc_worker(struct work_struct *work) 485 { 486 nfsd_file_gc(); 487 nfsd_file_schedule_laundrette(); 488 } 489 490 static unsigned long 491 nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) 492 { 493 return list_lru_count(&nfsd_file_lru); 494 } 495 496 static unsigned long 497 nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) 498 { 499 return nfsd_file_lru_walk_list(sc); 500 } 501 502 static struct shrinker nfsd_file_shrinker = { 503 .scan_objects = nfsd_file_lru_scan, 504 .count_objects = nfsd_file_lru_count, 505 .seeks = 1, 506 }; 507 508 static void 509 __nfsd_file_close_inode(struct inode *inode, unsigned int hashval, 510 struct list_head *dispose) 511 { 512 struct nfsd_file *nf; 513 struct hlist_node *tmp; 514 515 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 516 hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) { 517 if (inode == nf->nf_inode) 518 nfsd_file_unhash_and_release_locked(nf, dispose); 519 } 520 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 521 } 522 523 /** 524 * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file 525 * @inode: inode of the file to attempt to remove 526 * 527 * Walk the whole hash bucket, looking for any files that correspond to "inode". 528 * If any do, then unhash them and put the hashtable reference to them and 529 * destroy any that had their last reference put. Also ensure that any of the 530 * fputs also have their final __fput done as well. 531 */ 532 void 533 nfsd_file_close_inode_sync(struct inode *inode) 534 { 535 unsigned int hashval = (unsigned int)hash_long(inode->i_ino, 536 NFSD_FILE_HASH_BITS); 537 LIST_HEAD(dispose); 538 539 __nfsd_file_close_inode(inode, hashval, &dispose); 540 trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose)); 541 nfsd_file_dispose_list_sync(&dispose); 542 } 543 544 /** 545 * nfsd_file_close_inode - attempt a delayed close of a nfsd_file 546 * @inode: inode of the file to attempt to remove 547 * 548 * Walk the whole hash bucket, looking for any files that correspond to "inode". 549 * If any do, then unhash them and put the hashtable reference to them and 550 * destroy any that had their last reference put. 551 */ 552 static void 553 nfsd_file_close_inode(struct inode *inode) 554 { 555 unsigned int hashval = (unsigned int)hash_long(inode->i_ino, 556 NFSD_FILE_HASH_BITS); 557 LIST_HEAD(dispose); 558 559 __nfsd_file_close_inode(inode, hashval, &dispose); 560 trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose)); 561 nfsd_file_dispose_list_delayed(&dispose); 562 } 563 564 /** 565 * nfsd_file_delayed_close - close unused nfsd_files 566 * @work: dummy 567 * 568 * Walk the LRU list and close any entries that have not been used since 569 * the last scan. 570 * 571 * Note this can deadlock with nfsd_file_cache_purge. 572 */ 573 static void 574 nfsd_file_delayed_close(struct work_struct *work) 575 { 576 LIST_HEAD(head); 577 struct nfsd_fcache_disposal *l = container_of(work, 578 struct nfsd_fcache_disposal, work); 579 580 nfsd_file_list_remove_disposal(&head, l); 581 nfsd_file_dispose_list(&head); 582 } 583 584 static int 585 nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, 586 void *data) 587 { 588 struct file_lock *fl = data; 589 590 /* Only close files for F_SETLEASE leases */ 591 if (fl->fl_flags & FL_LEASE) 592 nfsd_file_close_inode_sync(file_inode(fl->fl_file)); 593 return 0; 594 } 595 596 static struct notifier_block nfsd_file_lease_notifier = { 597 .notifier_call = nfsd_file_lease_notifier_call, 598 }; 599 600 static int 601 nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask, 602 struct inode *inode, struct inode *dir, 603 const struct qstr *name, u32 cookie) 604 { 605 if (WARN_ON_ONCE(!inode)) 606 return 0; 607 608 trace_nfsd_file_fsnotify_handle_event(inode, mask); 609 610 /* Should be no marks on non-regular files */ 611 if (!S_ISREG(inode->i_mode)) { 612 WARN_ON_ONCE(1); 613 return 0; 614 } 615 616 /* don't close files if this was not the last link */ 617 if (mask & FS_ATTRIB) { 618 if (inode->i_nlink) 619 return 0; 620 } 621 622 nfsd_file_close_inode(inode); 623 return 0; 624 } 625 626 627 static const struct fsnotify_ops nfsd_file_fsnotify_ops = { 628 .handle_inode_event = nfsd_file_fsnotify_handle_event, 629 .free_mark = nfsd_file_mark_free, 630 }; 631 632 int 633 nfsd_file_cache_init(void) 634 { 635 int ret = -ENOMEM; 636 unsigned int i; 637 638 clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); 639 640 if (nfsd_file_hashtbl) 641 return 0; 642 643 nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0); 644 if (!nfsd_filecache_wq) 645 goto out; 646 647 nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE, 648 sizeof(*nfsd_file_hashtbl), GFP_KERNEL); 649 if (!nfsd_file_hashtbl) { 650 pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n"); 651 goto out_err; 652 } 653 654 nfsd_file_slab = kmem_cache_create("nfsd_file", 655 sizeof(struct nfsd_file), 0, 0, NULL); 656 if (!nfsd_file_slab) { 657 pr_err("nfsd: unable to create nfsd_file_slab\n"); 658 goto out_err; 659 } 660 661 nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark", 662 sizeof(struct nfsd_file_mark), 0, 0, NULL); 663 if (!nfsd_file_mark_slab) { 664 pr_err("nfsd: unable to create nfsd_file_mark_slab\n"); 665 goto out_err; 666 } 667 668 669 ret = list_lru_init(&nfsd_file_lru); 670 if (ret) { 671 pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret); 672 goto out_err; 673 } 674 675 ret = register_shrinker(&nfsd_file_shrinker); 676 if (ret) { 677 pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret); 678 goto out_lru; 679 } 680 681 ret = lease_register_notifier(&nfsd_file_lease_notifier); 682 if (ret) { 683 pr_err("nfsd: unable to register lease notifier: %d\n", ret); 684 goto out_shrinker; 685 } 686 687 nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops); 688 if (IS_ERR(nfsd_file_fsnotify_group)) { 689 pr_err("nfsd: unable to create fsnotify group: %ld\n", 690 PTR_ERR(nfsd_file_fsnotify_group)); 691 ret = PTR_ERR(nfsd_file_fsnotify_group); 692 nfsd_file_fsnotify_group = NULL; 693 goto out_notifier; 694 } 695 696 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 697 INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head); 698 spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock); 699 } 700 701 INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker); 702 out: 703 return ret; 704 out_notifier: 705 lease_unregister_notifier(&nfsd_file_lease_notifier); 706 out_shrinker: 707 unregister_shrinker(&nfsd_file_shrinker); 708 out_lru: 709 list_lru_destroy(&nfsd_file_lru); 710 out_err: 711 kmem_cache_destroy(nfsd_file_slab); 712 nfsd_file_slab = NULL; 713 kmem_cache_destroy(nfsd_file_mark_slab); 714 nfsd_file_mark_slab = NULL; 715 kfree(nfsd_file_hashtbl); 716 nfsd_file_hashtbl = NULL; 717 destroy_workqueue(nfsd_filecache_wq); 718 nfsd_filecache_wq = NULL; 719 goto out; 720 } 721 722 /* 723 * Note this can deadlock with nfsd_file_lru_cb. 724 */ 725 void 726 nfsd_file_cache_purge(struct net *net) 727 { 728 unsigned int i; 729 struct nfsd_file *nf; 730 struct hlist_node *next; 731 LIST_HEAD(dispose); 732 bool del; 733 734 if (!nfsd_file_hashtbl) 735 return; 736 737 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 738 struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i]; 739 740 spin_lock(&nfb->nfb_lock); 741 hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) { 742 if (net && nf->nf_net != net) 743 continue; 744 del = nfsd_file_unhash_and_release_locked(nf, &dispose); 745 746 /* 747 * Deadlock detected! Something marked this entry as 748 * unhased, but hasn't removed it from the hash list. 749 */ 750 WARN_ON_ONCE(!del); 751 } 752 spin_unlock(&nfb->nfb_lock); 753 nfsd_file_dispose_list(&dispose); 754 } 755 } 756 757 static struct nfsd_fcache_disposal * 758 nfsd_alloc_fcache_disposal(struct net *net) 759 { 760 struct nfsd_fcache_disposal *l; 761 762 l = kmalloc(sizeof(*l), GFP_KERNEL); 763 if (!l) 764 return NULL; 765 INIT_WORK(&l->work, nfsd_file_delayed_close); 766 l->net = net; 767 spin_lock_init(&l->lock); 768 INIT_LIST_HEAD(&l->freeme); 769 return l; 770 } 771 772 static void 773 nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l) 774 { 775 rcu_assign_pointer(l->net, NULL); 776 cancel_work_sync(&l->work); 777 nfsd_file_dispose_list(&l->freeme); 778 kfree_rcu(l, rcu); 779 } 780 781 static void 782 nfsd_add_fcache_disposal(struct nfsd_fcache_disposal *l) 783 { 784 spin_lock(&laundrette_lock); 785 list_add_tail_rcu(&l->list, &laundrettes); 786 spin_unlock(&laundrette_lock); 787 } 788 789 static void 790 nfsd_del_fcache_disposal(struct nfsd_fcache_disposal *l) 791 { 792 spin_lock(&laundrette_lock); 793 list_del_rcu(&l->list); 794 spin_unlock(&laundrette_lock); 795 } 796 797 static int 798 nfsd_alloc_fcache_disposal_net(struct net *net) 799 { 800 struct nfsd_fcache_disposal *l; 801 802 l = nfsd_alloc_fcache_disposal(net); 803 if (!l) 804 return -ENOMEM; 805 nfsd_add_fcache_disposal(l); 806 return 0; 807 } 808 809 static void 810 nfsd_free_fcache_disposal_net(struct net *net) 811 { 812 struct nfsd_fcache_disposal *l; 813 814 rcu_read_lock(); 815 list_for_each_entry_rcu(l, &laundrettes, list) { 816 if (l->net != net) 817 continue; 818 nfsd_del_fcache_disposal(l); 819 rcu_read_unlock(); 820 nfsd_free_fcache_disposal(l); 821 return; 822 } 823 rcu_read_unlock(); 824 } 825 826 int 827 nfsd_file_cache_start_net(struct net *net) 828 { 829 return nfsd_alloc_fcache_disposal_net(net); 830 } 831 832 void 833 nfsd_file_cache_shutdown_net(struct net *net) 834 { 835 nfsd_file_cache_purge(net); 836 nfsd_free_fcache_disposal_net(net); 837 } 838 839 void 840 nfsd_file_cache_shutdown(void) 841 { 842 set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); 843 844 lease_unregister_notifier(&nfsd_file_lease_notifier); 845 unregister_shrinker(&nfsd_file_shrinker); 846 /* 847 * make sure all callers of nfsd_file_lru_cb are done before 848 * calling nfsd_file_cache_purge 849 */ 850 cancel_delayed_work_sync(&nfsd_filecache_laundrette); 851 nfsd_file_cache_purge(NULL); 852 list_lru_destroy(&nfsd_file_lru); 853 rcu_barrier(); 854 fsnotify_put_group(nfsd_file_fsnotify_group); 855 nfsd_file_fsnotify_group = NULL; 856 kmem_cache_destroy(nfsd_file_slab); 857 nfsd_file_slab = NULL; 858 fsnotify_wait_marks_destroyed(); 859 kmem_cache_destroy(nfsd_file_mark_slab); 860 nfsd_file_mark_slab = NULL; 861 kfree(nfsd_file_hashtbl); 862 nfsd_file_hashtbl = NULL; 863 destroy_workqueue(nfsd_filecache_wq); 864 nfsd_filecache_wq = NULL; 865 } 866 867 static bool 868 nfsd_match_cred(const struct cred *c1, const struct cred *c2) 869 { 870 int i; 871 872 if (!uid_eq(c1->fsuid, c2->fsuid)) 873 return false; 874 if (!gid_eq(c1->fsgid, c2->fsgid)) 875 return false; 876 if (c1->group_info == NULL || c2->group_info == NULL) 877 return c1->group_info == c2->group_info; 878 if (c1->group_info->ngroups != c2->group_info->ngroups) 879 return false; 880 for (i = 0; i < c1->group_info->ngroups; i++) { 881 if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) 882 return false; 883 } 884 return true; 885 } 886 887 static struct nfsd_file * 888 nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, 889 unsigned int hashval, struct net *net) 890 { 891 struct nfsd_file *nf; 892 unsigned char need = may_flags & NFSD_FILE_MAY_MASK; 893 894 hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, 895 nf_node, lockdep_is_held(&nfsd_file_hashtbl[hashval].nfb_lock)) { 896 if (nf->nf_may != need) 897 continue; 898 if (nf->nf_inode != inode) 899 continue; 900 if (nf->nf_net != net) 901 continue; 902 if (!nfsd_match_cred(nf->nf_cred, current_cred())) 903 continue; 904 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) 905 continue; 906 if (nfsd_file_get(nf) != NULL) 907 return nf; 908 } 909 return NULL; 910 } 911 912 /** 913 * nfsd_file_is_cached - are there any cached open files for this fh? 914 * @inode: inode of the file to check 915 * 916 * Scan the hashtable for open files that match this fh. Returns true if there 917 * are any, and false if not. 918 */ 919 bool 920 nfsd_file_is_cached(struct inode *inode) 921 { 922 bool ret = false; 923 struct nfsd_file *nf; 924 unsigned int hashval; 925 926 hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); 927 928 rcu_read_lock(); 929 hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, 930 nf_node) { 931 if (inode == nf->nf_inode) { 932 ret = true; 933 break; 934 } 935 } 936 rcu_read_unlock(); 937 trace_nfsd_file_is_cached(inode, hashval, (int)ret); 938 return ret; 939 } 940 941 __be32 942 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 943 unsigned int may_flags, struct nfsd_file **pnf) 944 { 945 __be32 status; 946 struct net *net = SVC_NET(rqstp); 947 struct nfsd_file *nf, *new; 948 struct inode *inode; 949 unsigned int hashval; 950 bool retry = true; 951 952 /* FIXME: skip this if fh_dentry is already set? */ 953 status = fh_verify(rqstp, fhp, S_IFREG, 954 may_flags|NFSD_MAY_OWNER_OVERRIDE); 955 if (status != nfs_ok) 956 return status; 957 958 inode = d_inode(fhp->fh_dentry); 959 hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); 960 retry: 961 rcu_read_lock(); 962 nf = nfsd_file_find_locked(inode, may_flags, hashval, net); 963 rcu_read_unlock(); 964 if (nf) 965 goto wait_for_construction; 966 967 new = nfsd_file_alloc(inode, may_flags, hashval, net); 968 if (!new) { 969 trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, 970 NULL, nfserr_jukebox); 971 return nfserr_jukebox; 972 } 973 974 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 975 nf = nfsd_file_find_locked(inode, may_flags, hashval, net); 976 if (nf == NULL) 977 goto open_file; 978 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 979 nfsd_file_slab_free(&new->nf_rcu); 980 981 wait_for_construction: 982 wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); 983 984 /* Did construction of this file fail? */ 985 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 986 if (!retry) { 987 status = nfserr_jukebox; 988 goto out; 989 } 990 retry = false; 991 nfsd_file_put_noref(nf); 992 goto retry; 993 } 994 995 this_cpu_inc(nfsd_file_cache_hits); 996 997 if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) { 998 bool write = (may_flags & NFSD_MAY_WRITE); 999 1000 if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) || 1001 (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) { 1002 status = nfserrno(nfsd_open_break_lease( 1003 file_inode(nf->nf_file), may_flags)); 1004 if (status == nfs_ok) { 1005 clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); 1006 if (write) 1007 clear_bit(NFSD_FILE_BREAK_WRITE, 1008 &nf->nf_flags); 1009 } 1010 } 1011 } 1012 out: 1013 if (status == nfs_ok) { 1014 *pnf = nf; 1015 } else { 1016 nfsd_file_put(nf); 1017 nf = NULL; 1018 } 1019 1020 trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status); 1021 return status; 1022 open_file: 1023 nf = new; 1024 /* Take reference for the hashtable */ 1025 refcount_inc(&nf->nf_ref); 1026 __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); 1027 __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); 1028 list_lru_add(&nfsd_file_lru, &nf->nf_lru); 1029 hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head); 1030 ++nfsd_file_hashtbl[hashval].nfb_count; 1031 nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, 1032 nfsd_file_hashtbl[hashval].nfb_count); 1033 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 1034 if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD) 1035 nfsd_file_gc(); 1036 1037 nf->nf_mark = nfsd_file_mark_find_or_create(nf); 1038 if (nf->nf_mark) 1039 status = nfsd_open_verified(rqstp, fhp, S_IFREG, 1040 may_flags, &nf->nf_file); 1041 else 1042 status = nfserr_jukebox; 1043 /* 1044 * If construction failed, or we raced with a call to unlink() 1045 * then unhash. 1046 */ 1047 if (status != nfs_ok || inode->i_nlink == 0) { 1048 bool do_free; 1049 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 1050 do_free = nfsd_file_unhash(nf); 1051 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 1052 if (do_free) 1053 nfsd_file_put_noref(nf); 1054 } 1055 clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); 1056 smp_mb__after_atomic(); 1057 wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); 1058 goto out; 1059 } 1060 1061 /* 1062 * Note that fields may be added, removed or reordered in the future. Programs 1063 * scraping this file for info should test the labels to ensure they're 1064 * getting the correct field. 1065 */ 1066 static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) 1067 { 1068 unsigned int i, count = 0, longest = 0; 1069 unsigned long hits = 0; 1070 1071 /* 1072 * No need for spinlocks here since we're not terribly interested in 1073 * accuracy. We do take the nfsd_mutex simply to ensure that we 1074 * don't end up racing with server shutdown 1075 */ 1076 mutex_lock(&nfsd_mutex); 1077 if (nfsd_file_hashtbl) { 1078 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 1079 count += nfsd_file_hashtbl[i].nfb_count; 1080 longest = max(longest, nfsd_file_hashtbl[i].nfb_count); 1081 } 1082 } 1083 mutex_unlock(&nfsd_mutex); 1084 1085 for_each_possible_cpu(i) 1086 hits += per_cpu(nfsd_file_cache_hits, i); 1087 1088 seq_printf(m, "total entries: %u\n", count); 1089 seq_printf(m, "longest chain: %u\n", longest); 1090 seq_printf(m, "cache hits: %lu\n", hits); 1091 return 0; 1092 } 1093 1094 int nfsd_file_cache_stats_open(struct inode *inode, struct file *file) 1095 { 1096 return single_open(file, nfsd_file_cache_stats_show, NULL); 1097 } 1098