1 /* 2 * Open file cache. 3 * 4 * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com> 5 */ 6 7 #include <linux/hash.h> 8 #include <linux/slab.h> 9 #include <linux/file.h> 10 #include <linux/sched.h> 11 #include <linux/list_lru.h> 12 #include <linux/fsnotify_backend.h> 13 #include <linux/fsnotify.h> 14 #include <linux/seq_file.h> 15 16 #include "vfs.h" 17 #include "nfsd.h" 18 #include "nfsfh.h" 19 #include "netns.h" 20 #include "filecache.h" 21 #include "trace.h" 22 23 #define NFSDDBG_FACILITY NFSDDBG_FH 24 25 /* FIXME: dynamically size this for the machine somehow? */ 26 #define NFSD_FILE_HASH_BITS 12 27 #define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS) 28 #define NFSD_LAUNDRETTE_DELAY (2 * HZ) 29 30 #define NFSD_FILE_SHUTDOWN (1) 31 #define NFSD_FILE_LRU_THRESHOLD (4096UL) 32 #define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2) 33 34 /* We only care about NFSD_MAY_READ/WRITE for this cache */ 35 #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) 36 37 struct nfsd_fcache_bucket { 38 struct hlist_head nfb_head; 39 spinlock_t nfb_lock; 40 unsigned int nfb_count; 41 unsigned int nfb_maxcount; 42 }; 43 44 static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); 45 46 struct nfsd_fcache_disposal { 47 struct list_head list; 48 struct work_struct work; 49 struct net *net; 50 spinlock_t lock; 51 struct list_head freeme; 52 struct rcu_head rcu; 53 }; 54 55 static struct workqueue_struct *nfsd_filecache_wq __read_mostly; 56 57 static struct kmem_cache *nfsd_file_slab; 58 static struct kmem_cache *nfsd_file_mark_slab; 59 static struct nfsd_fcache_bucket *nfsd_file_hashtbl; 60 static struct list_lru nfsd_file_lru; 61 static long nfsd_file_lru_flags; 62 static struct fsnotify_group *nfsd_file_fsnotify_group; 63 static atomic_long_t nfsd_filecache_count; 64 static struct delayed_work nfsd_filecache_laundrette; 65 static DEFINE_SPINLOCK(laundrette_lock); 66 static LIST_HEAD(laundrettes); 67 68 static void nfsd_file_gc(void); 69 70 static void 71 nfsd_file_schedule_laundrette(void) 72 { 73 long count = atomic_long_read(&nfsd_filecache_count); 74 75 if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags)) 76 return; 77 78 queue_delayed_work(system_wq, &nfsd_filecache_laundrette, 79 NFSD_LAUNDRETTE_DELAY); 80 } 81 82 static void 83 nfsd_file_slab_free(struct rcu_head *rcu) 84 { 85 struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu); 86 87 put_cred(nf->nf_cred); 88 kmem_cache_free(nfsd_file_slab, nf); 89 } 90 91 static void 92 nfsd_file_mark_free(struct fsnotify_mark *mark) 93 { 94 struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark, 95 nfm_mark); 96 97 kmem_cache_free(nfsd_file_mark_slab, nfm); 98 } 99 100 static struct nfsd_file_mark * 101 nfsd_file_mark_get(struct nfsd_file_mark *nfm) 102 { 103 if (!refcount_inc_not_zero(&nfm->nfm_ref)) 104 return NULL; 105 return nfm; 106 } 107 108 static void 109 nfsd_file_mark_put(struct nfsd_file_mark *nfm) 110 { 111 if (refcount_dec_and_test(&nfm->nfm_ref)) { 112 fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group); 113 fsnotify_put_mark(&nfm->nfm_mark); 114 } 115 } 116 117 static struct nfsd_file_mark * 118 nfsd_file_mark_find_or_create(struct nfsd_file *nf) 119 { 120 int err; 121 struct fsnotify_mark *mark; 122 struct nfsd_file_mark *nfm = NULL, *new; 123 struct inode *inode = nf->nf_inode; 124 125 do { 126 mutex_lock(&nfsd_file_fsnotify_group->mark_mutex); 127 mark = fsnotify_find_mark(&inode->i_fsnotify_marks, 128 nfsd_file_fsnotify_group); 129 if (mark) { 130 nfm = nfsd_file_mark_get(container_of(mark, 131 struct nfsd_file_mark, 132 nfm_mark)); 133 mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); 134 if (nfm) { 135 fsnotify_put_mark(mark); 136 break; 137 } 138 /* Avoid soft lockup race with nfsd_file_mark_put() */ 139 fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group); 140 fsnotify_put_mark(mark); 141 } else 142 mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); 143 144 /* allocate a new nfm */ 145 new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL); 146 if (!new) 147 return NULL; 148 fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group); 149 new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF; 150 refcount_set(&new->nfm_ref, 1); 151 152 err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0); 153 154 /* 155 * If the add was successful, then return the object. 156 * Otherwise, we need to put the reference we hold on the 157 * nfm_mark. The fsnotify code will take a reference and put 158 * it on failure, so we can't just free it directly. It's also 159 * not safe to call fsnotify_destroy_mark on it as the 160 * mark->group will be NULL. Thus, we can't let the nfm_ref 161 * counter drive the destruction at this point. 162 */ 163 if (likely(!err)) 164 nfm = new; 165 else 166 fsnotify_put_mark(&new->nfm_mark); 167 } while (unlikely(err == -EEXIST)); 168 169 return nfm; 170 } 171 172 static struct nfsd_file * 173 nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, 174 struct net *net) 175 { 176 struct nfsd_file *nf; 177 178 nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); 179 if (nf) { 180 INIT_HLIST_NODE(&nf->nf_node); 181 INIT_LIST_HEAD(&nf->nf_lru); 182 nf->nf_file = NULL; 183 nf->nf_cred = get_current_cred(); 184 nf->nf_net = net; 185 nf->nf_flags = 0; 186 nf->nf_inode = inode; 187 nf->nf_hashval = hashval; 188 refcount_set(&nf->nf_ref, 1); 189 nf->nf_may = may & NFSD_FILE_MAY_MASK; 190 if (may & NFSD_MAY_NOT_BREAK_LEASE) { 191 if (may & NFSD_MAY_WRITE) 192 __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags); 193 if (may & NFSD_MAY_READ) 194 __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); 195 } 196 nf->nf_mark = NULL; 197 init_rwsem(&nf->nf_rwsem); 198 trace_nfsd_file_alloc(nf); 199 } 200 return nf; 201 } 202 203 static bool 204 nfsd_file_free(struct nfsd_file *nf) 205 { 206 bool flush = false; 207 208 trace_nfsd_file_put_final(nf); 209 if (nf->nf_mark) 210 nfsd_file_mark_put(nf->nf_mark); 211 if (nf->nf_file) { 212 get_file(nf->nf_file); 213 filp_close(nf->nf_file, NULL); 214 fput(nf->nf_file); 215 flush = true; 216 } 217 call_rcu(&nf->nf_rcu, nfsd_file_slab_free); 218 return flush; 219 } 220 221 static bool 222 nfsd_file_check_writeback(struct nfsd_file *nf) 223 { 224 struct file *file = nf->nf_file; 225 struct address_space *mapping; 226 227 if (!file || !(file->f_mode & FMODE_WRITE)) 228 return false; 229 mapping = file->f_mapping; 230 return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) || 231 mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); 232 } 233 234 static int 235 nfsd_file_check_write_error(struct nfsd_file *nf) 236 { 237 struct file *file = nf->nf_file; 238 239 if (!file || !(file->f_mode & FMODE_WRITE)) 240 return 0; 241 return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); 242 } 243 244 static void 245 nfsd_file_do_unhash(struct nfsd_file *nf) 246 { 247 lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 248 249 trace_nfsd_file_unhash(nf); 250 251 if (nfsd_file_check_write_error(nf)) 252 nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id)); 253 --nfsd_file_hashtbl[nf->nf_hashval].nfb_count; 254 hlist_del_rcu(&nf->nf_node); 255 atomic_long_dec(&nfsd_filecache_count); 256 } 257 258 static bool 259 nfsd_file_unhash(struct nfsd_file *nf) 260 { 261 if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 262 nfsd_file_do_unhash(nf); 263 if (!list_empty(&nf->nf_lru)) 264 list_lru_del(&nfsd_file_lru, &nf->nf_lru); 265 return true; 266 } 267 return false; 268 } 269 270 /* 271 * Return true if the file was unhashed. 272 */ 273 static bool 274 nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose) 275 { 276 lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 277 278 trace_nfsd_file_unhash_and_release_locked(nf); 279 if (!nfsd_file_unhash(nf)) 280 return false; 281 /* keep final reference for nfsd_file_lru_dispose */ 282 if (refcount_dec_not_one(&nf->nf_ref)) 283 return true; 284 285 list_add(&nf->nf_lru, dispose); 286 return true; 287 } 288 289 static void 290 nfsd_file_put_noref(struct nfsd_file *nf) 291 { 292 trace_nfsd_file_put(nf); 293 294 if (refcount_dec_and_test(&nf->nf_ref)) { 295 WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); 296 nfsd_file_free(nf); 297 } 298 } 299 300 void 301 nfsd_file_put(struct nfsd_file *nf) 302 { 303 bool is_hashed; 304 305 set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); 306 if (refcount_read(&nf->nf_ref) > 2 || !nf->nf_file) { 307 nfsd_file_put_noref(nf); 308 return; 309 } 310 311 filemap_flush(nf->nf_file->f_mapping); 312 is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0; 313 nfsd_file_put_noref(nf); 314 if (is_hashed) 315 nfsd_file_schedule_laundrette(); 316 if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) 317 nfsd_file_gc(); 318 } 319 320 struct nfsd_file * 321 nfsd_file_get(struct nfsd_file *nf) 322 { 323 if (likely(refcount_inc_not_zero(&nf->nf_ref))) 324 return nf; 325 return NULL; 326 } 327 328 static void 329 nfsd_file_dispose_list(struct list_head *dispose) 330 { 331 struct nfsd_file *nf; 332 333 while(!list_empty(dispose)) { 334 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 335 list_del(&nf->nf_lru); 336 nfsd_file_put_noref(nf); 337 } 338 } 339 340 static void 341 nfsd_file_dispose_list_sync(struct list_head *dispose) 342 { 343 bool flush = false; 344 struct nfsd_file *nf; 345 346 while(!list_empty(dispose)) { 347 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 348 list_del(&nf->nf_lru); 349 if (!refcount_dec_and_test(&nf->nf_ref)) 350 continue; 351 if (nfsd_file_free(nf)) 352 flush = true; 353 } 354 if (flush) 355 flush_delayed_fput(); 356 } 357 358 static void 359 nfsd_file_list_remove_disposal(struct list_head *dst, 360 struct nfsd_fcache_disposal *l) 361 { 362 spin_lock(&l->lock); 363 list_splice_init(&l->freeme, dst); 364 spin_unlock(&l->lock); 365 } 366 367 static void 368 nfsd_file_list_add_disposal(struct list_head *files, struct net *net) 369 { 370 struct nfsd_fcache_disposal *l; 371 372 rcu_read_lock(); 373 list_for_each_entry_rcu(l, &laundrettes, list) { 374 if (l->net == net) { 375 spin_lock(&l->lock); 376 list_splice_tail_init(files, &l->freeme); 377 spin_unlock(&l->lock); 378 queue_work(nfsd_filecache_wq, &l->work); 379 break; 380 } 381 } 382 rcu_read_unlock(); 383 } 384 385 static void 386 nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src, 387 struct net *net) 388 { 389 struct nfsd_file *nf, *tmp; 390 391 list_for_each_entry_safe(nf, tmp, src, nf_lru) { 392 if (nf->nf_net == net) 393 list_move_tail(&nf->nf_lru, dst); 394 } 395 } 396 397 static void 398 nfsd_file_dispose_list_delayed(struct list_head *dispose) 399 { 400 LIST_HEAD(list); 401 struct nfsd_file *nf; 402 403 while(!list_empty(dispose)) { 404 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 405 nfsd_file_list_add_pernet(&list, dispose, nf->nf_net); 406 nfsd_file_list_add_disposal(&list, nf->nf_net); 407 } 408 } 409 410 /* 411 * Note this can deadlock with nfsd_file_cache_purge. 412 */ 413 static enum lru_status 414 nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, 415 spinlock_t *lock, void *arg) 416 __releases(lock) 417 __acquires(lock) 418 { 419 struct list_head *head = arg; 420 struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); 421 422 /* 423 * Do a lockless refcount check. The hashtable holds one reference, so 424 * we look to see if anything else has a reference, or if any have 425 * been put since the shrinker last ran. Those don't get unhashed and 426 * released. 427 * 428 * Note that in the put path, we set the flag and then decrement the 429 * counter. Here we check the counter and then test and clear the flag. 430 * That order is deliberate to ensure that we can do this locklessly. 431 */ 432 if (refcount_read(&nf->nf_ref) > 1) 433 goto out_skip; 434 435 /* 436 * Don't throw out files that are still undergoing I/O or 437 * that have uncleared errors pending. 438 */ 439 if (nfsd_file_check_writeback(nf)) 440 goto out_skip; 441 442 if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) 443 goto out_skip; 444 445 if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) 446 goto out_skip; 447 448 list_lru_isolate_move(lru, &nf->nf_lru, head); 449 return LRU_REMOVED; 450 out_skip: 451 return LRU_SKIP; 452 } 453 454 static unsigned long 455 nfsd_file_lru_walk_list(struct shrink_control *sc) 456 { 457 LIST_HEAD(head); 458 struct nfsd_file *nf; 459 unsigned long ret; 460 461 if (sc) 462 ret = list_lru_shrink_walk(&nfsd_file_lru, sc, 463 nfsd_file_lru_cb, &head); 464 else 465 ret = list_lru_walk(&nfsd_file_lru, 466 nfsd_file_lru_cb, 467 &head, LONG_MAX); 468 list_for_each_entry(nf, &head, nf_lru) { 469 spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 470 nfsd_file_do_unhash(nf); 471 spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 472 } 473 nfsd_file_dispose_list_delayed(&head); 474 return ret; 475 } 476 477 static void 478 nfsd_file_gc(void) 479 { 480 nfsd_file_lru_walk_list(NULL); 481 } 482 483 static void 484 nfsd_file_gc_worker(struct work_struct *work) 485 { 486 nfsd_file_gc(); 487 nfsd_file_schedule_laundrette(); 488 } 489 490 static unsigned long 491 nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) 492 { 493 return list_lru_count(&nfsd_file_lru); 494 } 495 496 static unsigned long 497 nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) 498 { 499 return nfsd_file_lru_walk_list(sc); 500 } 501 502 static struct shrinker nfsd_file_shrinker = { 503 .scan_objects = nfsd_file_lru_scan, 504 .count_objects = nfsd_file_lru_count, 505 .seeks = 1, 506 }; 507 508 static void 509 __nfsd_file_close_inode(struct inode *inode, unsigned int hashval, 510 struct list_head *dispose) 511 { 512 struct nfsd_file *nf; 513 struct hlist_node *tmp; 514 515 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 516 hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) { 517 if (inode == nf->nf_inode) 518 nfsd_file_unhash_and_release_locked(nf, dispose); 519 } 520 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 521 } 522 523 /** 524 * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file 525 * @inode: inode of the file to attempt to remove 526 * 527 * Walk the whole hash bucket, looking for any files that correspond to "inode". 528 * If any do, then unhash them and put the hashtable reference to them and 529 * destroy any that had their last reference put. Also ensure that any of the 530 * fputs also have their final __fput done as well. 531 */ 532 void 533 nfsd_file_close_inode_sync(struct inode *inode) 534 { 535 unsigned int hashval = (unsigned int)hash_long(inode->i_ino, 536 NFSD_FILE_HASH_BITS); 537 LIST_HEAD(dispose); 538 539 __nfsd_file_close_inode(inode, hashval, &dispose); 540 trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose)); 541 nfsd_file_dispose_list_sync(&dispose); 542 } 543 544 /** 545 * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file 546 * @inode: inode of the file to attempt to remove 547 * 548 * Walk the whole hash bucket, looking for any files that correspond to "inode". 549 * If any do, then unhash them and put the hashtable reference to them and 550 * destroy any that had their last reference put. 551 */ 552 static void 553 nfsd_file_close_inode(struct inode *inode) 554 { 555 unsigned int hashval = (unsigned int)hash_long(inode->i_ino, 556 NFSD_FILE_HASH_BITS); 557 LIST_HEAD(dispose); 558 559 __nfsd_file_close_inode(inode, hashval, &dispose); 560 trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose)); 561 nfsd_file_dispose_list_delayed(&dispose); 562 } 563 564 /** 565 * nfsd_file_delayed_close - close unused nfsd_files 566 * @work: dummy 567 * 568 * Walk the LRU list and close any entries that have not been used since 569 * the last scan. 570 * 571 * Note this can deadlock with nfsd_file_cache_purge. 572 */ 573 static void 574 nfsd_file_delayed_close(struct work_struct *work) 575 { 576 LIST_HEAD(head); 577 struct nfsd_fcache_disposal *l = container_of(work, 578 struct nfsd_fcache_disposal, work); 579 580 nfsd_file_list_remove_disposal(&head, l); 581 nfsd_file_dispose_list(&head); 582 } 583 584 static int 585 nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, 586 void *data) 587 { 588 struct file_lock *fl = data; 589 590 /* Only close files for F_SETLEASE leases */ 591 if (fl->fl_flags & FL_LEASE) 592 nfsd_file_close_inode_sync(file_inode(fl->fl_file)); 593 return 0; 594 } 595 596 static struct notifier_block nfsd_file_lease_notifier = { 597 .notifier_call = nfsd_file_lease_notifier_call, 598 }; 599 600 static int 601 nfsd_file_fsnotify_handle_event(struct fsnotify_group *group, 602 struct inode *inode, 603 u32 mask, const void *data, int data_type, 604 const struct qstr *file_name, u32 cookie, 605 struct fsnotify_iter_info *iter_info) 606 { 607 trace_nfsd_file_fsnotify_handle_event(inode, mask); 608 609 /* Should be no marks on non-regular files */ 610 if (!S_ISREG(inode->i_mode)) { 611 WARN_ON_ONCE(1); 612 return 0; 613 } 614 615 /* don't close files if this was not the last link */ 616 if (mask & FS_ATTRIB) { 617 if (inode->i_nlink) 618 return 0; 619 } 620 621 nfsd_file_close_inode(inode); 622 return 0; 623 } 624 625 626 static const struct fsnotify_ops nfsd_file_fsnotify_ops = { 627 .handle_event = nfsd_file_fsnotify_handle_event, 628 .free_mark = nfsd_file_mark_free, 629 }; 630 631 int 632 nfsd_file_cache_init(void) 633 { 634 int ret = -ENOMEM; 635 unsigned int i; 636 637 clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); 638 639 if (nfsd_file_hashtbl) 640 return 0; 641 642 nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0); 643 if (!nfsd_filecache_wq) 644 goto out; 645 646 nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE, 647 sizeof(*nfsd_file_hashtbl), GFP_KERNEL); 648 if (!nfsd_file_hashtbl) { 649 pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n"); 650 goto out_err; 651 } 652 653 nfsd_file_slab = kmem_cache_create("nfsd_file", 654 sizeof(struct nfsd_file), 0, 0, NULL); 655 if (!nfsd_file_slab) { 656 pr_err("nfsd: unable to create nfsd_file_slab\n"); 657 goto out_err; 658 } 659 660 nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark", 661 sizeof(struct nfsd_file_mark), 0, 0, NULL); 662 if (!nfsd_file_mark_slab) { 663 pr_err("nfsd: unable to create nfsd_file_mark_slab\n"); 664 goto out_err; 665 } 666 667 668 ret = list_lru_init(&nfsd_file_lru); 669 if (ret) { 670 pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret); 671 goto out_err; 672 } 673 674 ret = register_shrinker(&nfsd_file_shrinker); 675 if (ret) { 676 pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret); 677 goto out_lru; 678 } 679 680 ret = lease_register_notifier(&nfsd_file_lease_notifier); 681 if (ret) { 682 pr_err("nfsd: unable to register lease notifier: %d\n", ret); 683 goto out_shrinker; 684 } 685 686 nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops); 687 if (IS_ERR(nfsd_file_fsnotify_group)) { 688 pr_err("nfsd: unable to create fsnotify group: %ld\n", 689 PTR_ERR(nfsd_file_fsnotify_group)); 690 nfsd_file_fsnotify_group = NULL; 691 goto out_notifier; 692 } 693 694 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 695 INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head); 696 spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock); 697 } 698 699 INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker); 700 out: 701 return ret; 702 out_notifier: 703 lease_unregister_notifier(&nfsd_file_lease_notifier); 704 out_shrinker: 705 unregister_shrinker(&nfsd_file_shrinker); 706 out_lru: 707 list_lru_destroy(&nfsd_file_lru); 708 out_err: 709 kmem_cache_destroy(nfsd_file_slab); 710 nfsd_file_slab = NULL; 711 kmem_cache_destroy(nfsd_file_mark_slab); 712 nfsd_file_mark_slab = NULL; 713 kfree(nfsd_file_hashtbl); 714 nfsd_file_hashtbl = NULL; 715 destroy_workqueue(nfsd_filecache_wq); 716 nfsd_filecache_wq = NULL; 717 goto out; 718 } 719 720 /* 721 * Note this can deadlock with nfsd_file_lru_cb. 722 */ 723 void 724 nfsd_file_cache_purge(struct net *net) 725 { 726 unsigned int i; 727 struct nfsd_file *nf; 728 struct hlist_node *next; 729 LIST_HEAD(dispose); 730 bool del; 731 732 if (!nfsd_file_hashtbl) 733 return; 734 735 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 736 struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i]; 737 738 spin_lock(&nfb->nfb_lock); 739 hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) { 740 if (net && nf->nf_net != net) 741 continue; 742 del = nfsd_file_unhash_and_release_locked(nf, &dispose); 743 744 /* 745 * Deadlock detected! Something marked this entry as 746 * unhased, but hasn't removed it from the hash list. 747 */ 748 WARN_ON_ONCE(!del); 749 } 750 spin_unlock(&nfb->nfb_lock); 751 nfsd_file_dispose_list(&dispose); 752 } 753 } 754 755 static struct nfsd_fcache_disposal * 756 nfsd_alloc_fcache_disposal(struct net *net) 757 { 758 struct nfsd_fcache_disposal *l; 759 760 l = kmalloc(sizeof(*l), GFP_KERNEL); 761 if (!l) 762 return NULL; 763 INIT_WORK(&l->work, nfsd_file_delayed_close); 764 l->net = net; 765 spin_lock_init(&l->lock); 766 INIT_LIST_HEAD(&l->freeme); 767 return l; 768 } 769 770 static void 771 nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l) 772 { 773 rcu_assign_pointer(l->net, NULL); 774 cancel_work_sync(&l->work); 775 nfsd_file_dispose_list(&l->freeme); 776 kfree_rcu(l, rcu); 777 } 778 779 static void 780 nfsd_add_fcache_disposal(struct nfsd_fcache_disposal *l) 781 { 782 spin_lock(&laundrette_lock); 783 list_add_tail_rcu(&l->list, &laundrettes); 784 spin_unlock(&laundrette_lock); 785 } 786 787 static void 788 nfsd_del_fcache_disposal(struct nfsd_fcache_disposal *l) 789 { 790 spin_lock(&laundrette_lock); 791 list_del_rcu(&l->list); 792 spin_unlock(&laundrette_lock); 793 } 794 795 static int 796 nfsd_alloc_fcache_disposal_net(struct net *net) 797 { 798 struct nfsd_fcache_disposal *l; 799 800 l = nfsd_alloc_fcache_disposal(net); 801 if (!l) 802 return -ENOMEM; 803 nfsd_add_fcache_disposal(l); 804 return 0; 805 } 806 807 static void 808 nfsd_free_fcache_disposal_net(struct net *net) 809 { 810 struct nfsd_fcache_disposal *l; 811 812 rcu_read_lock(); 813 list_for_each_entry_rcu(l, &laundrettes, list) { 814 if (l->net != net) 815 continue; 816 nfsd_del_fcache_disposal(l); 817 rcu_read_unlock(); 818 nfsd_free_fcache_disposal(l); 819 return; 820 } 821 rcu_read_unlock(); 822 } 823 824 int 825 nfsd_file_cache_start_net(struct net *net) 826 { 827 return nfsd_alloc_fcache_disposal_net(net); 828 } 829 830 void 831 nfsd_file_cache_shutdown_net(struct net *net) 832 { 833 nfsd_file_cache_purge(net); 834 nfsd_free_fcache_disposal_net(net); 835 } 836 837 void 838 nfsd_file_cache_shutdown(void) 839 { 840 set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); 841 842 lease_unregister_notifier(&nfsd_file_lease_notifier); 843 unregister_shrinker(&nfsd_file_shrinker); 844 /* 845 * make sure all callers of nfsd_file_lru_cb are done before 846 * calling nfsd_file_cache_purge 847 */ 848 cancel_delayed_work_sync(&nfsd_filecache_laundrette); 849 nfsd_file_cache_purge(NULL); 850 list_lru_destroy(&nfsd_file_lru); 851 rcu_barrier(); 852 fsnotify_put_group(nfsd_file_fsnotify_group); 853 nfsd_file_fsnotify_group = NULL; 854 kmem_cache_destroy(nfsd_file_slab); 855 nfsd_file_slab = NULL; 856 fsnotify_wait_marks_destroyed(); 857 kmem_cache_destroy(nfsd_file_mark_slab); 858 nfsd_file_mark_slab = NULL; 859 kfree(nfsd_file_hashtbl); 860 nfsd_file_hashtbl = NULL; 861 destroy_workqueue(nfsd_filecache_wq); 862 nfsd_filecache_wq = NULL; 863 } 864 865 static bool 866 nfsd_match_cred(const struct cred *c1, const struct cred *c2) 867 { 868 int i; 869 870 if (!uid_eq(c1->fsuid, c2->fsuid)) 871 return false; 872 if (!gid_eq(c1->fsgid, c2->fsgid)) 873 return false; 874 if (c1->group_info == NULL || c2->group_info == NULL) 875 return c1->group_info == c2->group_info; 876 if (c1->group_info->ngroups != c2->group_info->ngroups) 877 return false; 878 for (i = 0; i < c1->group_info->ngroups; i++) { 879 if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) 880 return false; 881 } 882 return true; 883 } 884 885 static struct nfsd_file * 886 nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, 887 unsigned int hashval, struct net *net) 888 { 889 struct nfsd_file *nf; 890 unsigned char need = may_flags & NFSD_FILE_MAY_MASK; 891 892 hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, 893 nf_node) { 894 if ((need & nf->nf_may) != need) 895 continue; 896 if (nf->nf_inode != inode) 897 continue; 898 if (nf->nf_net != net) 899 continue; 900 if (!nfsd_match_cred(nf->nf_cred, current_cred())) 901 continue; 902 if (nfsd_file_get(nf) != NULL) 903 return nf; 904 } 905 return NULL; 906 } 907 908 /** 909 * nfsd_file_is_cached - are there any cached open files for this fh? 910 * @inode: inode of the file to check 911 * 912 * Scan the hashtable for open files that match this fh. Returns true if there 913 * are any, and false if not. 914 */ 915 bool 916 nfsd_file_is_cached(struct inode *inode) 917 { 918 bool ret = false; 919 struct nfsd_file *nf; 920 unsigned int hashval; 921 922 hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); 923 924 rcu_read_lock(); 925 hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, 926 nf_node) { 927 if (inode == nf->nf_inode) { 928 ret = true; 929 break; 930 } 931 } 932 rcu_read_unlock(); 933 trace_nfsd_file_is_cached(inode, hashval, (int)ret); 934 return ret; 935 } 936 937 __be32 938 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 939 unsigned int may_flags, struct nfsd_file **pnf) 940 { 941 __be32 status; 942 struct net *net = SVC_NET(rqstp); 943 struct nfsd_file *nf, *new; 944 struct inode *inode; 945 unsigned int hashval; 946 bool retry = true; 947 948 /* FIXME: skip this if fh_dentry is already set? */ 949 status = fh_verify(rqstp, fhp, S_IFREG, 950 may_flags|NFSD_MAY_OWNER_OVERRIDE); 951 if (status != nfs_ok) 952 return status; 953 954 inode = d_inode(fhp->fh_dentry); 955 hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); 956 retry: 957 rcu_read_lock(); 958 nf = nfsd_file_find_locked(inode, may_flags, hashval, net); 959 rcu_read_unlock(); 960 if (nf) 961 goto wait_for_construction; 962 963 new = nfsd_file_alloc(inode, may_flags, hashval, net); 964 if (!new) { 965 trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, 966 NULL, nfserr_jukebox); 967 return nfserr_jukebox; 968 } 969 970 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 971 nf = nfsd_file_find_locked(inode, may_flags, hashval, net); 972 if (nf == NULL) 973 goto open_file; 974 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 975 nfsd_file_slab_free(&new->nf_rcu); 976 977 wait_for_construction: 978 wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); 979 980 /* Did construction of this file fail? */ 981 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 982 if (!retry) { 983 status = nfserr_jukebox; 984 goto out; 985 } 986 retry = false; 987 nfsd_file_put_noref(nf); 988 goto retry; 989 } 990 991 this_cpu_inc(nfsd_file_cache_hits); 992 993 if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) { 994 bool write = (may_flags & NFSD_MAY_WRITE); 995 996 if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) || 997 (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) { 998 status = nfserrno(nfsd_open_break_lease( 999 file_inode(nf->nf_file), may_flags)); 1000 if (status == nfs_ok) { 1001 clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); 1002 if (write) 1003 clear_bit(NFSD_FILE_BREAK_WRITE, 1004 &nf->nf_flags); 1005 } 1006 } 1007 } 1008 out: 1009 if (status == nfs_ok) { 1010 *pnf = nf; 1011 } else { 1012 nfsd_file_put(nf); 1013 nf = NULL; 1014 } 1015 1016 trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status); 1017 return status; 1018 open_file: 1019 nf = new; 1020 /* Take reference for the hashtable */ 1021 refcount_inc(&nf->nf_ref); 1022 __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); 1023 __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); 1024 list_lru_add(&nfsd_file_lru, &nf->nf_lru); 1025 hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head); 1026 ++nfsd_file_hashtbl[hashval].nfb_count; 1027 nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, 1028 nfsd_file_hashtbl[hashval].nfb_count); 1029 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 1030 if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD) 1031 nfsd_file_gc(); 1032 1033 nf->nf_mark = nfsd_file_mark_find_or_create(nf); 1034 if (nf->nf_mark) 1035 status = nfsd_open_verified(rqstp, fhp, S_IFREG, 1036 may_flags, &nf->nf_file); 1037 else 1038 status = nfserr_jukebox; 1039 /* 1040 * If construction failed, or we raced with a call to unlink() 1041 * then unhash. 1042 */ 1043 if (status != nfs_ok || inode->i_nlink == 0) { 1044 bool do_free; 1045 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 1046 do_free = nfsd_file_unhash(nf); 1047 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 1048 if (do_free) 1049 nfsd_file_put_noref(nf); 1050 } 1051 clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); 1052 smp_mb__after_atomic(); 1053 wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); 1054 goto out; 1055 } 1056 1057 /* 1058 * Note that fields may be added, removed or reordered in the future. Programs 1059 * scraping this file for info should test the labels to ensure they're 1060 * getting the correct field. 1061 */ 1062 static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) 1063 { 1064 unsigned int i, count = 0, longest = 0; 1065 unsigned long hits = 0; 1066 1067 /* 1068 * No need for spinlocks here since we're not terribly interested in 1069 * accuracy. We do take the nfsd_mutex simply to ensure that we 1070 * don't end up racing with server shutdown 1071 */ 1072 mutex_lock(&nfsd_mutex); 1073 if (nfsd_file_hashtbl) { 1074 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 1075 count += nfsd_file_hashtbl[i].nfb_count; 1076 longest = max(longest, nfsd_file_hashtbl[i].nfb_count); 1077 } 1078 } 1079 mutex_unlock(&nfsd_mutex); 1080 1081 for_each_possible_cpu(i) 1082 hits += per_cpu(nfsd_file_cache_hits, i); 1083 1084 seq_printf(m, "total entries: %u\n", count); 1085 seq_printf(m, "longest chain: %u\n", longest); 1086 seq_printf(m, "cache hits: %lu\n", hits); 1087 return 0; 1088 } 1089 1090 int nfsd_file_cache_stats_open(struct inode *inode, struct file *file) 1091 { 1092 return single_open(file, nfsd_file_cache_stats_show, NULL); 1093 } 1094