1 /* 2 * Open file cache. 3 * 4 * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com> 5 */ 6 7 #include <linux/hash.h> 8 #include <linux/slab.h> 9 #include <linux/file.h> 10 #include <linux/pagemap.h> 11 #include <linux/sched.h> 12 #include <linux/list_lru.h> 13 #include <linux/fsnotify_backend.h> 14 #include <linux/fsnotify.h> 15 #include <linux/seq_file.h> 16 17 #include "vfs.h" 18 #include "nfsd.h" 19 #include "nfsfh.h" 20 #include "netns.h" 21 #include "filecache.h" 22 #include "trace.h" 23 24 #define NFSDDBG_FACILITY NFSDDBG_FH 25 26 /* FIXME: dynamically size this for the machine somehow? */ 27 #define NFSD_FILE_HASH_BITS 12 28 #define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS) 29 #define NFSD_LAUNDRETTE_DELAY (2 * HZ) 30 31 #define NFSD_FILE_SHUTDOWN (1) 32 #define NFSD_FILE_LRU_THRESHOLD (4096UL) 33 #define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2) 34 35 /* We only care about NFSD_MAY_READ/WRITE for this cache */ 36 #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) 37 38 struct nfsd_fcache_bucket { 39 struct hlist_head nfb_head; 40 spinlock_t nfb_lock; 41 unsigned int nfb_count; 42 unsigned int nfb_maxcount; 43 }; 44 45 static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); 46 47 struct nfsd_fcache_disposal { 48 struct work_struct work; 49 spinlock_t lock; 50 struct list_head freeme; 51 }; 52 53 static struct workqueue_struct *nfsd_filecache_wq __read_mostly; 54 55 static struct kmem_cache *nfsd_file_slab; 56 static struct kmem_cache *nfsd_file_mark_slab; 57 static struct nfsd_fcache_bucket *nfsd_file_hashtbl; 58 static struct list_lru nfsd_file_lru; 59 static long nfsd_file_lru_flags; 60 static struct fsnotify_group *nfsd_file_fsnotify_group; 61 static atomic_long_t nfsd_filecache_count; 62 static struct delayed_work nfsd_filecache_laundrette; 63 64 static void nfsd_file_gc(void); 65 66 static void 67 nfsd_file_schedule_laundrette(void) 68 { 69 long count = atomic_long_read(&nfsd_filecache_count); 70 71 if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags)) 72 return; 73 74 queue_delayed_work(system_wq, &nfsd_filecache_laundrette, 75 NFSD_LAUNDRETTE_DELAY); 76 } 77 78 static void 79 nfsd_file_slab_free(struct rcu_head *rcu) 80 { 81 struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu); 82 83 put_cred(nf->nf_cred); 84 kmem_cache_free(nfsd_file_slab, nf); 85 } 86 87 static void 88 nfsd_file_mark_free(struct fsnotify_mark *mark) 89 { 90 struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark, 91 nfm_mark); 92 93 kmem_cache_free(nfsd_file_mark_slab, nfm); 94 } 95 96 static struct nfsd_file_mark * 97 nfsd_file_mark_get(struct nfsd_file_mark *nfm) 98 { 99 if (!refcount_inc_not_zero(&nfm->nfm_ref)) 100 return NULL; 101 return nfm; 102 } 103 104 static void 105 nfsd_file_mark_put(struct nfsd_file_mark *nfm) 106 { 107 if (refcount_dec_and_test(&nfm->nfm_ref)) { 108 fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group); 109 fsnotify_put_mark(&nfm->nfm_mark); 110 } 111 } 112 113 static struct nfsd_file_mark * 114 nfsd_file_mark_find_or_create(struct nfsd_file *nf) 115 { 116 int err; 117 struct fsnotify_mark *mark; 118 struct nfsd_file_mark *nfm = NULL, *new; 119 struct inode *inode = nf->nf_inode; 120 121 do { 122 mutex_lock(&nfsd_file_fsnotify_group->mark_mutex); 123 mark = fsnotify_find_mark(&inode->i_fsnotify_marks, 124 nfsd_file_fsnotify_group); 125 if (mark) { 126 nfm = nfsd_file_mark_get(container_of(mark, 127 struct nfsd_file_mark, 128 nfm_mark)); 129 mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); 130 if (nfm) { 131 fsnotify_put_mark(mark); 132 break; 133 } 134 /* Avoid soft lockup race with nfsd_file_mark_put() */ 135 fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group); 136 fsnotify_put_mark(mark); 137 } else 138 mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); 139 140 /* allocate a new nfm */ 141 new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL); 142 if (!new) 143 return NULL; 144 fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group); 145 new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF; 146 refcount_set(&new->nfm_ref, 1); 147 148 err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0); 149 150 /* 151 * If the add was successful, then return the object. 152 * Otherwise, we need to put the reference we hold on the 153 * nfm_mark. The fsnotify code will take a reference and put 154 * it on failure, so we can't just free it directly. It's also 155 * not safe to call fsnotify_destroy_mark on it as the 156 * mark->group will be NULL. Thus, we can't let the nfm_ref 157 * counter drive the destruction at this point. 158 */ 159 if (likely(!err)) 160 nfm = new; 161 else 162 fsnotify_put_mark(&new->nfm_mark); 163 } while (unlikely(err == -EEXIST)); 164 165 return nfm; 166 } 167 168 static struct nfsd_file * 169 nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, 170 struct net *net) 171 { 172 struct nfsd_file *nf; 173 174 nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); 175 if (nf) { 176 INIT_HLIST_NODE(&nf->nf_node); 177 INIT_LIST_HEAD(&nf->nf_lru); 178 nf->nf_file = NULL; 179 nf->nf_cred = get_current_cred(); 180 nf->nf_net = net; 181 nf->nf_flags = 0; 182 nf->nf_inode = inode; 183 nf->nf_hashval = hashval; 184 refcount_set(&nf->nf_ref, 1); 185 nf->nf_may = may & NFSD_FILE_MAY_MASK; 186 if (may & NFSD_MAY_NOT_BREAK_LEASE) { 187 if (may & NFSD_MAY_WRITE) 188 __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags); 189 if (may & NFSD_MAY_READ) 190 __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); 191 } 192 nf->nf_mark = NULL; 193 trace_nfsd_file_alloc(nf); 194 } 195 return nf; 196 } 197 198 static bool 199 nfsd_file_free(struct nfsd_file *nf) 200 { 201 bool flush = false; 202 203 trace_nfsd_file_put_final(nf); 204 if (nf->nf_mark) 205 nfsd_file_mark_put(nf->nf_mark); 206 if (nf->nf_file) { 207 get_file(nf->nf_file); 208 filp_close(nf->nf_file, NULL); 209 fput(nf->nf_file); 210 flush = true; 211 } 212 call_rcu(&nf->nf_rcu, nfsd_file_slab_free); 213 return flush; 214 } 215 216 static bool 217 nfsd_file_check_writeback(struct nfsd_file *nf) 218 { 219 struct file *file = nf->nf_file; 220 struct address_space *mapping; 221 222 if (!file || !(file->f_mode & FMODE_WRITE)) 223 return false; 224 mapping = file->f_mapping; 225 return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) || 226 mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); 227 } 228 229 static int 230 nfsd_file_check_write_error(struct nfsd_file *nf) 231 { 232 struct file *file = nf->nf_file; 233 234 if (!file || !(file->f_mode & FMODE_WRITE)) 235 return 0; 236 return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); 237 } 238 239 static void 240 nfsd_file_flush(struct nfsd_file *nf) 241 { 242 if (nf->nf_file && vfs_fsync(nf->nf_file, 1) != 0) 243 nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); 244 } 245 246 static void 247 nfsd_file_do_unhash(struct nfsd_file *nf) 248 { 249 lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 250 251 trace_nfsd_file_unhash(nf); 252 253 if (nfsd_file_check_write_error(nf)) 254 nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); 255 --nfsd_file_hashtbl[nf->nf_hashval].nfb_count; 256 hlist_del_rcu(&nf->nf_node); 257 atomic_long_dec(&nfsd_filecache_count); 258 } 259 260 static bool 261 nfsd_file_unhash(struct nfsd_file *nf) 262 { 263 if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 264 nfsd_file_do_unhash(nf); 265 if (!list_empty(&nf->nf_lru)) 266 list_lru_del(&nfsd_file_lru, &nf->nf_lru); 267 return true; 268 } 269 return false; 270 } 271 272 /* 273 * Return true if the file was unhashed. 274 */ 275 static bool 276 nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose) 277 { 278 lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 279 280 trace_nfsd_file_unhash_and_release_locked(nf); 281 if (!nfsd_file_unhash(nf)) 282 return false; 283 /* keep final reference for nfsd_file_lru_dispose */ 284 if (refcount_dec_not_one(&nf->nf_ref)) 285 return true; 286 287 list_add(&nf->nf_lru, dispose); 288 return true; 289 } 290 291 static void 292 nfsd_file_put_noref(struct nfsd_file *nf) 293 { 294 trace_nfsd_file_put(nf); 295 296 if (refcount_dec_and_test(&nf->nf_ref)) { 297 WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); 298 nfsd_file_free(nf); 299 } 300 } 301 302 void 303 nfsd_file_put(struct nfsd_file *nf) 304 { 305 set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); 306 if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) { 307 nfsd_file_flush(nf); 308 nfsd_file_put_noref(nf); 309 } else { 310 nfsd_file_put_noref(nf); 311 if (nf->nf_file) 312 nfsd_file_schedule_laundrette(); 313 } 314 if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) 315 nfsd_file_gc(); 316 } 317 318 struct nfsd_file * 319 nfsd_file_get(struct nfsd_file *nf) 320 { 321 if (likely(refcount_inc_not_zero(&nf->nf_ref))) 322 return nf; 323 return NULL; 324 } 325 326 static void 327 nfsd_file_dispose_list(struct list_head *dispose) 328 { 329 struct nfsd_file *nf; 330 331 while(!list_empty(dispose)) { 332 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 333 list_del(&nf->nf_lru); 334 nfsd_file_flush(nf); 335 nfsd_file_put_noref(nf); 336 } 337 } 338 339 static void 340 nfsd_file_dispose_list_sync(struct list_head *dispose) 341 { 342 bool flush = false; 343 struct nfsd_file *nf; 344 345 while(!list_empty(dispose)) { 346 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 347 list_del(&nf->nf_lru); 348 nfsd_file_flush(nf); 349 if (!refcount_dec_and_test(&nf->nf_ref)) 350 continue; 351 if (nfsd_file_free(nf)) 352 flush = true; 353 } 354 if (flush) 355 flush_delayed_fput(); 356 } 357 358 static void 359 nfsd_file_list_remove_disposal(struct list_head *dst, 360 struct nfsd_fcache_disposal *l) 361 { 362 spin_lock(&l->lock); 363 list_splice_init(&l->freeme, dst); 364 spin_unlock(&l->lock); 365 } 366 367 static void 368 nfsd_file_list_add_disposal(struct list_head *files, struct net *net) 369 { 370 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 371 struct nfsd_fcache_disposal *l = nn->fcache_disposal; 372 373 spin_lock(&l->lock); 374 list_splice_tail_init(files, &l->freeme); 375 spin_unlock(&l->lock); 376 queue_work(nfsd_filecache_wq, &l->work); 377 } 378 379 static void 380 nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src, 381 struct net *net) 382 { 383 struct nfsd_file *nf, *tmp; 384 385 list_for_each_entry_safe(nf, tmp, src, nf_lru) { 386 if (nf->nf_net == net) 387 list_move_tail(&nf->nf_lru, dst); 388 } 389 } 390 391 static void 392 nfsd_file_dispose_list_delayed(struct list_head *dispose) 393 { 394 LIST_HEAD(list); 395 struct nfsd_file *nf; 396 397 while(!list_empty(dispose)) { 398 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 399 nfsd_file_list_add_pernet(&list, dispose, nf->nf_net); 400 nfsd_file_list_add_disposal(&list, nf->nf_net); 401 } 402 } 403 404 /* 405 * Note this can deadlock with nfsd_file_cache_purge. 406 */ 407 static enum lru_status 408 nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, 409 spinlock_t *lock, void *arg) 410 __releases(lock) 411 __acquires(lock) 412 { 413 struct list_head *head = arg; 414 struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); 415 416 /* 417 * Do a lockless refcount check. The hashtable holds one reference, so 418 * we look to see if anything else has a reference, or if any have 419 * been put since the shrinker last ran. Those don't get unhashed and 420 * released. 421 * 422 * Note that in the put path, we set the flag and then decrement the 423 * counter. Here we check the counter and then test and clear the flag. 424 * That order is deliberate to ensure that we can do this locklessly. 425 */ 426 if (refcount_read(&nf->nf_ref) > 1) 427 goto out_skip; 428 429 /* 430 * Don't throw out files that are still undergoing I/O or 431 * that have uncleared errors pending. 432 */ 433 if (nfsd_file_check_writeback(nf)) 434 goto out_skip; 435 436 if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) 437 goto out_skip; 438 439 if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) 440 goto out_skip; 441 442 list_lru_isolate_move(lru, &nf->nf_lru, head); 443 return LRU_REMOVED; 444 out_skip: 445 return LRU_SKIP; 446 } 447 448 static unsigned long 449 nfsd_file_lru_walk_list(struct shrink_control *sc) 450 { 451 LIST_HEAD(head); 452 struct nfsd_file *nf; 453 unsigned long ret; 454 455 if (sc) 456 ret = list_lru_shrink_walk(&nfsd_file_lru, sc, 457 nfsd_file_lru_cb, &head); 458 else 459 ret = list_lru_walk(&nfsd_file_lru, 460 nfsd_file_lru_cb, 461 &head, LONG_MAX); 462 list_for_each_entry(nf, &head, nf_lru) { 463 spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 464 nfsd_file_do_unhash(nf); 465 spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 466 } 467 nfsd_file_dispose_list_delayed(&head); 468 return ret; 469 } 470 471 static void 472 nfsd_file_gc(void) 473 { 474 nfsd_file_lru_walk_list(NULL); 475 } 476 477 static void 478 nfsd_file_gc_worker(struct work_struct *work) 479 { 480 nfsd_file_gc(); 481 nfsd_file_schedule_laundrette(); 482 } 483 484 static unsigned long 485 nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) 486 { 487 return list_lru_count(&nfsd_file_lru); 488 } 489 490 static unsigned long 491 nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) 492 { 493 return nfsd_file_lru_walk_list(sc); 494 } 495 496 static struct shrinker nfsd_file_shrinker = { 497 .scan_objects = nfsd_file_lru_scan, 498 .count_objects = nfsd_file_lru_count, 499 .seeks = 1, 500 }; 501 502 static void 503 __nfsd_file_close_inode(struct inode *inode, unsigned int hashval, 504 struct list_head *dispose) 505 { 506 struct nfsd_file *nf; 507 struct hlist_node *tmp; 508 509 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 510 hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) { 511 if (inode == nf->nf_inode) 512 nfsd_file_unhash_and_release_locked(nf, dispose); 513 } 514 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 515 } 516 517 /** 518 * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file 519 * @inode: inode of the file to attempt to remove 520 * 521 * Walk the whole hash bucket, looking for any files that correspond to "inode". 522 * If any do, then unhash them and put the hashtable reference to them and 523 * destroy any that had their last reference put. Also ensure that any of the 524 * fputs also have their final __fput done as well. 525 */ 526 void 527 nfsd_file_close_inode_sync(struct inode *inode) 528 { 529 unsigned int hashval = (unsigned int)hash_long(inode->i_ino, 530 NFSD_FILE_HASH_BITS); 531 LIST_HEAD(dispose); 532 533 __nfsd_file_close_inode(inode, hashval, &dispose); 534 trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose)); 535 nfsd_file_dispose_list_sync(&dispose); 536 } 537 538 /** 539 * nfsd_file_close_inode - attempt a delayed close of a nfsd_file 540 * @inode: inode of the file to attempt to remove 541 * 542 * Walk the whole hash bucket, looking for any files that correspond to "inode". 543 * If any do, then unhash them and put the hashtable reference to them and 544 * destroy any that had their last reference put. 545 */ 546 static void 547 nfsd_file_close_inode(struct inode *inode) 548 { 549 unsigned int hashval = (unsigned int)hash_long(inode->i_ino, 550 NFSD_FILE_HASH_BITS); 551 LIST_HEAD(dispose); 552 553 __nfsd_file_close_inode(inode, hashval, &dispose); 554 trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose)); 555 nfsd_file_dispose_list_delayed(&dispose); 556 } 557 558 /** 559 * nfsd_file_delayed_close - close unused nfsd_files 560 * @work: dummy 561 * 562 * Walk the LRU list and close any entries that have not been used since 563 * the last scan. 564 * 565 * Note this can deadlock with nfsd_file_cache_purge. 566 */ 567 static void 568 nfsd_file_delayed_close(struct work_struct *work) 569 { 570 LIST_HEAD(head); 571 struct nfsd_fcache_disposal *l = container_of(work, 572 struct nfsd_fcache_disposal, work); 573 574 nfsd_file_list_remove_disposal(&head, l); 575 nfsd_file_dispose_list(&head); 576 } 577 578 static int 579 nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, 580 void *data) 581 { 582 struct file_lock *fl = data; 583 584 /* Only close files for F_SETLEASE leases */ 585 if (fl->fl_flags & FL_LEASE) 586 nfsd_file_close_inode_sync(file_inode(fl->fl_file)); 587 return 0; 588 } 589 590 static struct notifier_block nfsd_file_lease_notifier = { 591 .notifier_call = nfsd_file_lease_notifier_call, 592 }; 593 594 static int 595 nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask, 596 struct inode *inode, struct inode *dir, 597 const struct qstr *name, u32 cookie) 598 { 599 if (WARN_ON_ONCE(!inode)) 600 return 0; 601 602 trace_nfsd_file_fsnotify_handle_event(inode, mask); 603 604 /* Should be no marks on non-regular files */ 605 if (!S_ISREG(inode->i_mode)) { 606 WARN_ON_ONCE(1); 607 return 0; 608 } 609 610 /* don't close files if this was not the last link */ 611 if (mask & FS_ATTRIB) { 612 if (inode->i_nlink) 613 return 0; 614 } 615 616 nfsd_file_close_inode(inode); 617 return 0; 618 } 619 620 621 static const struct fsnotify_ops nfsd_file_fsnotify_ops = { 622 .handle_inode_event = nfsd_file_fsnotify_handle_event, 623 .free_mark = nfsd_file_mark_free, 624 }; 625 626 int 627 nfsd_file_cache_init(void) 628 { 629 int ret = -ENOMEM; 630 unsigned int i; 631 632 clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); 633 634 if (nfsd_file_hashtbl) 635 return 0; 636 637 nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0); 638 if (!nfsd_filecache_wq) 639 goto out; 640 641 nfsd_file_hashtbl = kvcalloc(NFSD_FILE_HASH_SIZE, 642 sizeof(*nfsd_file_hashtbl), GFP_KERNEL); 643 if (!nfsd_file_hashtbl) { 644 pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n"); 645 goto out_err; 646 } 647 648 nfsd_file_slab = kmem_cache_create("nfsd_file", 649 sizeof(struct nfsd_file), 0, 0, NULL); 650 if (!nfsd_file_slab) { 651 pr_err("nfsd: unable to create nfsd_file_slab\n"); 652 goto out_err; 653 } 654 655 nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark", 656 sizeof(struct nfsd_file_mark), 0, 0, NULL); 657 if (!nfsd_file_mark_slab) { 658 pr_err("nfsd: unable to create nfsd_file_mark_slab\n"); 659 goto out_err; 660 } 661 662 663 ret = list_lru_init(&nfsd_file_lru); 664 if (ret) { 665 pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret); 666 goto out_err; 667 } 668 669 ret = register_shrinker(&nfsd_file_shrinker); 670 if (ret) { 671 pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret); 672 goto out_lru; 673 } 674 675 ret = lease_register_notifier(&nfsd_file_lease_notifier); 676 if (ret) { 677 pr_err("nfsd: unable to register lease notifier: %d\n", ret); 678 goto out_shrinker; 679 } 680 681 nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops); 682 if (IS_ERR(nfsd_file_fsnotify_group)) { 683 pr_err("nfsd: unable to create fsnotify group: %ld\n", 684 PTR_ERR(nfsd_file_fsnotify_group)); 685 ret = PTR_ERR(nfsd_file_fsnotify_group); 686 nfsd_file_fsnotify_group = NULL; 687 goto out_notifier; 688 } 689 690 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 691 INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head); 692 spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock); 693 } 694 695 INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker); 696 out: 697 return ret; 698 out_notifier: 699 lease_unregister_notifier(&nfsd_file_lease_notifier); 700 out_shrinker: 701 unregister_shrinker(&nfsd_file_shrinker); 702 out_lru: 703 list_lru_destroy(&nfsd_file_lru); 704 out_err: 705 kmem_cache_destroy(nfsd_file_slab); 706 nfsd_file_slab = NULL; 707 kmem_cache_destroy(nfsd_file_mark_slab); 708 nfsd_file_mark_slab = NULL; 709 kvfree(nfsd_file_hashtbl); 710 nfsd_file_hashtbl = NULL; 711 destroy_workqueue(nfsd_filecache_wq); 712 nfsd_filecache_wq = NULL; 713 goto out; 714 } 715 716 /* 717 * Note this can deadlock with nfsd_file_lru_cb. 718 */ 719 void 720 nfsd_file_cache_purge(struct net *net) 721 { 722 unsigned int i; 723 struct nfsd_file *nf; 724 struct hlist_node *next; 725 LIST_HEAD(dispose); 726 bool del; 727 728 if (!nfsd_file_hashtbl) 729 return; 730 731 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 732 struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i]; 733 734 spin_lock(&nfb->nfb_lock); 735 hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) { 736 if (net && nf->nf_net != net) 737 continue; 738 del = nfsd_file_unhash_and_release_locked(nf, &dispose); 739 740 /* 741 * Deadlock detected! Something marked this entry as 742 * unhased, but hasn't removed it from the hash list. 743 */ 744 WARN_ON_ONCE(!del); 745 } 746 spin_unlock(&nfb->nfb_lock); 747 nfsd_file_dispose_list(&dispose); 748 } 749 } 750 751 static struct nfsd_fcache_disposal * 752 nfsd_alloc_fcache_disposal(void) 753 { 754 struct nfsd_fcache_disposal *l; 755 756 l = kmalloc(sizeof(*l), GFP_KERNEL); 757 if (!l) 758 return NULL; 759 INIT_WORK(&l->work, nfsd_file_delayed_close); 760 spin_lock_init(&l->lock); 761 INIT_LIST_HEAD(&l->freeme); 762 return l; 763 } 764 765 static void 766 nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l) 767 { 768 cancel_work_sync(&l->work); 769 nfsd_file_dispose_list(&l->freeme); 770 kfree(l); 771 } 772 773 static void 774 nfsd_free_fcache_disposal_net(struct net *net) 775 { 776 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 777 struct nfsd_fcache_disposal *l = nn->fcache_disposal; 778 779 nfsd_free_fcache_disposal(l); 780 } 781 782 int 783 nfsd_file_cache_start_net(struct net *net) 784 { 785 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 786 787 nn->fcache_disposal = nfsd_alloc_fcache_disposal(); 788 return nn->fcache_disposal ? 0 : -ENOMEM; 789 } 790 791 void 792 nfsd_file_cache_shutdown_net(struct net *net) 793 { 794 nfsd_file_cache_purge(net); 795 nfsd_free_fcache_disposal_net(net); 796 } 797 798 void 799 nfsd_file_cache_shutdown(void) 800 { 801 set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); 802 803 lease_unregister_notifier(&nfsd_file_lease_notifier); 804 unregister_shrinker(&nfsd_file_shrinker); 805 /* 806 * make sure all callers of nfsd_file_lru_cb are done before 807 * calling nfsd_file_cache_purge 808 */ 809 cancel_delayed_work_sync(&nfsd_filecache_laundrette); 810 nfsd_file_cache_purge(NULL); 811 list_lru_destroy(&nfsd_file_lru); 812 rcu_barrier(); 813 fsnotify_put_group(nfsd_file_fsnotify_group); 814 nfsd_file_fsnotify_group = NULL; 815 kmem_cache_destroy(nfsd_file_slab); 816 nfsd_file_slab = NULL; 817 fsnotify_wait_marks_destroyed(); 818 kmem_cache_destroy(nfsd_file_mark_slab); 819 nfsd_file_mark_slab = NULL; 820 kvfree(nfsd_file_hashtbl); 821 nfsd_file_hashtbl = NULL; 822 destroy_workqueue(nfsd_filecache_wq); 823 nfsd_filecache_wq = NULL; 824 } 825 826 static bool 827 nfsd_match_cred(const struct cred *c1, const struct cred *c2) 828 { 829 int i; 830 831 if (!uid_eq(c1->fsuid, c2->fsuid)) 832 return false; 833 if (!gid_eq(c1->fsgid, c2->fsgid)) 834 return false; 835 if (c1->group_info == NULL || c2->group_info == NULL) 836 return c1->group_info == c2->group_info; 837 if (c1->group_info->ngroups != c2->group_info->ngroups) 838 return false; 839 for (i = 0; i < c1->group_info->ngroups; i++) { 840 if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) 841 return false; 842 } 843 return true; 844 } 845 846 static struct nfsd_file * 847 nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, 848 unsigned int hashval, struct net *net) 849 { 850 struct nfsd_file *nf; 851 unsigned char need = may_flags & NFSD_FILE_MAY_MASK; 852 853 hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, 854 nf_node, lockdep_is_held(&nfsd_file_hashtbl[hashval].nfb_lock)) { 855 if (nf->nf_may != need) 856 continue; 857 if (nf->nf_inode != inode) 858 continue; 859 if (nf->nf_net != net) 860 continue; 861 if (!nfsd_match_cred(nf->nf_cred, current_cred())) 862 continue; 863 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) 864 continue; 865 if (nfsd_file_get(nf) != NULL) 866 return nf; 867 } 868 return NULL; 869 } 870 871 /** 872 * nfsd_file_is_cached - are there any cached open files for this fh? 873 * @inode: inode of the file to check 874 * 875 * Scan the hashtable for open files that match this fh. Returns true if there 876 * are any, and false if not. 877 */ 878 bool 879 nfsd_file_is_cached(struct inode *inode) 880 { 881 bool ret = false; 882 struct nfsd_file *nf; 883 unsigned int hashval; 884 885 hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); 886 887 rcu_read_lock(); 888 hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, 889 nf_node) { 890 if (inode == nf->nf_inode) { 891 ret = true; 892 break; 893 } 894 } 895 rcu_read_unlock(); 896 trace_nfsd_file_is_cached(inode, hashval, (int)ret); 897 return ret; 898 } 899 900 __be32 901 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 902 unsigned int may_flags, struct nfsd_file **pnf) 903 { 904 __be32 status; 905 struct net *net = SVC_NET(rqstp); 906 struct nfsd_file *nf, *new; 907 struct inode *inode; 908 unsigned int hashval; 909 bool retry = true; 910 911 /* FIXME: skip this if fh_dentry is already set? */ 912 status = fh_verify(rqstp, fhp, S_IFREG, 913 may_flags|NFSD_MAY_OWNER_OVERRIDE); 914 if (status != nfs_ok) 915 return status; 916 917 inode = d_inode(fhp->fh_dentry); 918 hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); 919 retry: 920 rcu_read_lock(); 921 nf = nfsd_file_find_locked(inode, may_flags, hashval, net); 922 rcu_read_unlock(); 923 if (nf) 924 goto wait_for_construction; 925 926 new = nfsd_file_alloc(inode, may_flags, hashval, net); 927 if (!new) { 928 trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, 929 NULL, nfserr_jukebox); 930 return nfserr_jukebox; 931 } 932 933 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 934 nf = nfsd_file_find_locked(inode, may_flags, hashval, net); 935 if (nf == NULL) 936 goto open_file; 937 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 938 nfsd_file_slab_free(&new->nf_rcu); 939 940 wait_for_construction: 941 wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); 942 943 /* Did construction of this file fail? */ 944 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 945 if (!retry) { 946 status = nfserr_jukebox; 947 goto out; 948 } 949 retry = false; 950 nfsd_file_put_noref(nf); 951 goto retry; 952 } 953 954 this_cpu_inc(nfsd_file_cache_hits); 955 956 if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) { 957 bool write = (may_flags & NFSD_MAY_WRITE); 958 959 if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) || 960 (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) { 961 status = nfserrno(nfsd_open_break_lease( 962 file_inode(nf->nf_file), may_flags)); 963 if (status == nfs_ok) { 964 clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); 965 if (write) 966 clear_bit(NFSD_FILE_BREAK_WRITE, 967 &nf->nf_flags); 968 } 969 } 970 } 971 out: 972 if (status == nfs_ok) { 973 *pnf = nf; 974 } else { 975 nfsd_file_put(nf); 976 nf = NULL; 977 } 978 979 trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status); 980 return status; 981 open_file: 982 nf = new; 983 /* Take reference for the hashtable */ 984 refcount_inc(&nf->nf_ref); 985 __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); 986 __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); 987 list_lru_add(&nfsd_file_lru, &nf->nf_lru); 988 hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head); 989 ++nfsd_file_hashtbl[hashval].nfb_count; 990 nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, 991 nfsd_file_hashtbl[hashval].nfb_count); 992 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 993 if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD) 994 nfsd_file_gc(); 995 996 nf->nf_mark = nfsd_file_mark_find_or_create(nf); 997 if (nf->nf_mark) 998 status = nfsd_open_verified(rqstp, fhp, S_IFREG, 999 may_flags, &nf->nf_file); 1000 else 1001 status = nfserr_jukebox; 1002 /* 1003 * If construction failed, or we raced with a call to unlink() 1004 * then unhash. 1005 */ 1006 if (status != nfs_ok || inode->i_nlink == 0) { 1007 bool do_free; 1008 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 1009 do_free = nfsd_file_unhash(nf); 1010 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 1011 if (do_free) 1012 nfsd_file_put_noref(nf); 1013 } 1014 clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); 1015 smp_mb__after_atomic(); 1016 wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); 1017 goto out; 1018 } 1019 1020 /* 1021 * Note that fields may be added, removed or reordered in the future. Programs 1022 * scraping this file for info should test the labels to ensure they're 1023 * getting the correct field. 1024 */ 1025 static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) 1026 { 1027 unsigned int i, count = 0, longest = 0; 1028 unsigned long hits = 0; 1029 1030 /* 1031 * No need for spinlocks here since we're not terribly interested in 1032 * accuracy. We do take the nfsd_mutex simply to ensure that we 1033 * don't end up racing with server shutdown 1034 */ 1035 mutex_lock(&nfsd_mutex); 1036 if (nfsd_file_hashtbl) { 1037 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 1038 count += nfsd_file_hashtbl[i].nfb_count; 1039 longest = max(longest, nfsd_file_hashtbl[i].nfb_count); 1040 } 1041 } 1042 mutex_unlock(&nfsd_mutex); 1043 1044 for_each_possible_cpu(i) 1045 hits += per_cpu(nfsd_file_cache_hits, i); 1046 1047 seq_printf(m, "total entries: %u\n", count); 1048 seq_printf(m, "longest chain: %u\n", longest); 1049 seq_printf(m, "cache hits: %lu\n", hits); 1050 return 0; 1051 } 1052 1053 int nfsd_file_cache_stats_open(struct inode *inode, struct file *file) 1054 { 1055 return single_open(file, nfsd_file_cache_stats_show, NULL); 1056 } 1057