1 /* 2 * Open file cache. 3 * 4 * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com> 5 */ 6 7 #include <linux/hash.h> 8 #include <linux/slab.h> 9 #include <linux/file.h> 10 #include <linux/pagemap.h> 11 #include <linux/sched.h> 12 #include <linux/list_lru.h> 13 #include <linux/fsnotify_backend.h> 14 #include <linux/fsnotify.h> 15 #include <linux/seq_file.h> 16 17 #include "vfs.h" 18 #include "nfsd.h" 19 #include "nfsfh.h" 20 #include "netns.h" 21 #include "filecache.h" 22 #include "trace.h" 23 24 #define NFSDDBG_FACILITY NFSDDBG_FH 25 26 /* FIXME: dynamically size this for the machine somehow? */ 27 #define NFSD_FILE_HASH_BITS 12 28 #define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS) 29 #define NFSD_LAUNDRETTE_DELAY (2 * HZ) 30 31 #define NFSD_FILE_SHUTDOWN (1) 32 #define NFSD_FILE_LRU_THRESHOLD (4096UL) 33 #define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2) 34 35 /* We only care about NFSD_MAY_READ/WRITE for this cache */ 36 #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) 37 38 struct nfsd_fcache_bucket { 39 struct hlist_head nfb_head; 40 spinlock_t nfb_lock; 41 unsigned int nfb_count; 42 unsigned int nfb_maxcount; 43 }; 44 45 static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); 46 47 struct nfsd_fcache_disposal { 48 struct work_struct work; 49 spinlock_t lock; 50 struct list_head freeme; 51 }; 52 53 static struct workqueue_struct *nfsd_filecache_wq __read_mostly; 54 55 static struct kmem_cache *nfsd_file_slab; 56 static struct kmem_cache *nfsd_file_mark_slab; 57 static struct nfsd_fcache_bucket *nfsd_file_hashtbl; 58 static struct list_lru nfsd_file_lru; 59 static long nfsd_file_lru_flags; 60 static struct fsnotify_group *nfsd_file_fsnotify_group; 61 static atomic_long_t nfsd_filecache_count; 62 static struct delayed_work nfsd_filecache_laundrette; 63 64 static void nfsd_file_gc(void); 65 66 static void 67 nfsd_file_schedule_laundrette(void) 68 { 69 long count = atomic_long_read(&nfsd_filecache_count); 70 71 if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags)) 72 return; 73 74 queue_delayed_work(system_wq, &nfsd_filecache_laundrette, 75 NFSD_LAUNDRETTE_DELAY); 76 } 77 78 static void 79 nfsd_file_slab_free(struct rcu_head *rcu) 80 { 81 struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu); 82 83 put_cred(nf->nf_cred); 84 kmem_cache_free(nfsd_file_slab, nf); 85 } 86 87 static void 88 nfsd_file_mark_free(struct fsnotify_mark *mark) 89 { 90 struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark, 91 nfm_mark); 92 93 kmem_cache_free(nfsd_file_mark_slab, nfm); 94 } 95 96 static struct nfsd_file_mark * 97 nfsd_file_mark_get(struct nfsd_file_mark *nfm) 98 { 99 if (!refcount_inc_not_zero(&nfm->nfm_ref)) 100 return NULL; 101 return nfm; 102 } 103 104 static void 105 nfsd_file_mark_put(struct nfsd_file_mark *nfm) 106 { 107 if (refcount_dec_and_test(&nfm->nfm_ref)) { 108 fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group); 109 fsnotify_put_mark(&nfm->nfm_mark); 110 } 111 } 112 113 static struct nfsd_file_mark * 114 nfsd_file_mark_find_or_create(struct nfsd_file *nf) 115 { 116 int err; 117 struct fsnotify_mark *mark; 118 struct nfsd_file_mark *nfm = NULL, *new; 119 struct inode *inode = nf->nf_inode; 120 121 do { 122 fsnotify_group_lock(nfsd_file_fsnotify_group); 123 mark = fsnotify_find_mark(&inode->i_fsnotify_marks, 124 nfsd_file_fsnotify_group); 125 if (mark) { 126 nfm = nfsd_file_mark_get(container_of(mark, 127 struct nfsd_file_mark, 128 nfm_mark)); 129 fsnotify_group_unlock(nfsd_file_fsnotify_group); 130 if (nfm) { 131 fsnotify_put_mark(mark); 132 break; 133 } 134 /* Avoid soft lockup race with nfsd_file_mark_put() */ 135 fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group); 136 fsnotify_put_mark(mark); 137 } else { 138 fsnotify_group_unlock(nfsd_file_fsnotify_group); 139 } 140 141 /* allocate a new nfm */ 142 new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL); 143 if (!new) 144 return NULL; 145 fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group); 146 new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF; 147 refcount_set(&new->nfm_ref, 1); 148 149 err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0); 150 151 /* 152 * If the add was successful, then return the object. 153 * Otherwise, we need to put the reference we hold on the 154 * nfm_mark. The fsnotify code will take a reference and put 155 * it on failure, so we can't just free it directly. It's also 156 * not safe to call fsnotify_destroy_mark on it as the 157 * mark->group will be NULL. Thus, we can't let the nfm_ref 158 * counter drive the destruction at this point. 159 */ 160 if (likely(!err)) 161 nfm = new; 162 else 163 fsnotify_put_mark(&new->nfm_mark); 164 } while (unlikely(err == -EEXIST)); 165 166 return nfm; 167 } 168 169 static struct nfsd_file * 170 nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, 171 struct net *net) 172 { 173 struct nfsd_file *nf; 174 175 nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); 176 if (nf) { 177 INIT_HLIST_NODE(&nf->nf_node); 178 INIT_LIST_HEAD(&nf->nf_lru); 179 nf->nf_file = NULL; 180 nf->nf_cred = get_current_cred(); 181 nf->nf_net = net; 182 nf->nf_flags = 0; 183 nf->nf_inode = inode; 184 nf->nf_hashval = hashval; 185 refcount_set(&nf->nf_ref, 1); 186 nf->nf_may = may & NFSD_FILE_MAY_MASK; 187 if (may & NFSD_MAY_NOT_BREAK_LEASE) { 188 if (may & NFSD_MAY_WRITE) 189 __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags); 190 if (may & NFSD_MAY_READ) 191 __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); 192 } 193 nf->nf_mark = NULL; 194 trace_nfsd_file_alloc(nf); 195 } 196 return nf; 197 } 198 199 static bool 200 nfsd_file_free(struct nfsd_file *nf) 201 { 202 bool flush = false; 203 204 trace_nfsd_file_put_final(nf); 205 if (nf->nf_mark) 206 nfsd_file_mark_put(nf->nf_mark); 207 if (nf->nf_file) { 208 get_file(nf->nf_file); 209 filp_close(nf->nf_file, NULL); 210 fput(nf->nf_file); 211 flush = true; 212 } 213 call_rcu(&nf->nf_rcu, nfsd_file_slab_free); 214 return flush; 215 } 216 217 static bool 218 nfsd_file_check_writeback(struct nfsd_file *nf) 219 { 220 struct file *file = nf->nf_file; 221 struct address_space *mapping; 222 223 if (!file || !(file->f_mode & FMODE_WRITE)) 224 return false; 225 mapping = file->f_mapping; 226 return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) || 227 mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); 228 } 229 230 static int 231 nfsd_file_check_write_error(struct nfsd_file *nf) 232 { 233 struct file *file = nf->nf_file; 234 235 if (!file || !(file->f_mode & FMODE_WRITE)) 236 return 0; 237 return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); 238 } 239 240 static void 241 nfsd_file_flush(struct nfsd_file *nf) 242 { 243 if (nf->nf_file && vfs_fsync(nf->nf_file, 1) != 0) 244 nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); 245 } 246 247 static void 248 nfsd_file_do_unhash(struct nfsd_file *nf) 249 { 250 lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 251 252 trace_nfsd_file_unhash(nf); 253 254 if (nfsd_file_check_write_error(nf)) 255 nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); 256 --nfsd_file_hashtbl[nf->nf_hashval].nfb_count; 257 hlist_del_rcu(&nf->nf_node); 258 atomic_long_dec(&nfsd_filecache_count); 259 } 260 261 static bool 262 nfsd_file_unhash(struct nfsd_file *nf) 263 { 264 if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 265 nfsd_file_do_unhash(nf); 266 if (!list_empty(&nf->nf_lru)) 267 list_lru_del(&nfsd_file_lru, &nf->nf_lru); 268 return true; 269 } 270 return false; 271 } 272 273 /* 274 * Return true if the file was unhashed. 275 */ 276 static bool 277 nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose) 278 { 279 lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 280 281 trace_nfsd_file_unhash_and_release_locked(nf); 282 if (!nfsd_file_unhash(nf)) 283 return false; 284 /* keep final reference for nfsd_file_lru_dispose */ 285 if (refcount_dec_not_one(&nf->nf_ref)) 286 return true; 287 288 list_add(&nf->nf_lru, dispose); 289 return true; 290 } 291 292 static void 293 nfsd_file_put_noref(struct nfsd_file *nf) 294 { 295 trace_nfsd_file_put(nf); 296 297 if (refcount_dec_and_test(&nf->nf_ref)) { 298 WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); 299 nfsd_file_free(nf); 300 } 301 } 302 303 void 304 nfsd_file_put(struct nfsd_file *nf) 305 { 306 might_sleep(); 307 308 set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); 309 if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) { 310 nfsd_file_flush(nf); 311 nfsd_file_put_noref(nf); 312 } else { 313 nfsd_file_put_noref(nf); 314 if (nf->nf_file) 315 nfsd_file_schedule_laundrette(); 316 } 317 if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) 318 nfsd_file_gc(); 319 } 320 321 struct nfsd_file * 322 nfsd_file_get(struct nfsd_file *nf) 323 { 324 if (likely(refcount_inc_not_zero(&nf->nf_ref))) 325 return nf; 326 return NULL; 327 } 328 329 static void 330 nfsd_file_dispose_list(struct list_head *dispose) 331 { 332 struct nfsd_file *nf; 333 334 while(!list_empty(dispose)) { 335 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 336 list_del(&nf->nf_lru); 337 nfsd_file_flush(nf); 338 nfsd_file_put_noref(nf); 339 } 340 } 341 342 static void 343 nfsd_file_dispose_list_sync(struct list_head *dispose) 344 { 345 bool flush = false; 346 struct nfsd_file *nf; 347 348 while(!list_empty(dispose)) { 349 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 350 list_del(&nf->nf_lru); 351 nfsd_file_flush(nf); 352 if (!refcount_dec_and_test(&nf->nf_ref)) 353 continue; 354 if (nfsd_file_free(nf)) 355 flush = true; 356 } 357 if (flush) 358 flush_delayed_fput(); 359 } 360 361 static void 362 nfsd_file_list_remove_disposal(struct list_head *dst, 363 struct nfsd_fcache_disposal *l) 364 { 365 spin_lock(&l->lock); 366 list_splice_init(&l->freeme, dst); 367 spin_unlock(&l->lock); 368 } 369 370 static void 371 nfsd_file_list_add_disposal(struct list_head *files, struct net *net) 372 { 373 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 374 struct nfsd_fcache_disposal *l = nn->fcache_disposal; 375 376 spin_lock(&l->lock); 377 list_splice_tail_init(files, &l->freeme); 378 spin_unlock(&l->lock); 379 queue_work(nfsd_filecache_wq, &l->work); 380 } 381 382 static void 383 nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src, 384 struct net *net) 385 { 386 struct nfsd_file *nf, *tmp; 387 388 list_for_each_entry_safe(nf, tmp, src, nf_lru) { 389 if (nf->nf_net == net) 390 list_move_tail(&nf->nf_lru, dst); 391 } 392 } 393 394 static void 395 nfsd_file_dispose_list_delayed(struct list_head *dispose) 396 { 397 LIST_HEAD(list); 398 struct nfsd_file *nf; 399 400 while(!list_empty(dispose)) { 401 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 402 nfsd_file_list_add_pernet(&list, dispose, nf->nf_net); 403 nfsd_file_list_add_disposal(&list, nf->nf_net); 404 } 405 } 406 407 /* 408 * Note this can deadlock with nfsd_file_cache_purge. 409 */ 410 static enum lru_status 411 nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, 412 spinlock_t *lock, void *arg) 413 __releases(lock) 414 __acquires(lock) 415 { 416 struct list_head *head = arg; 417 struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); 418 419 /* 420 * Do a lockless refcount check. The hashtable holds one reference, so 421 * we look to see if anything else has a reference, or if any have 422 * been put since the shrinker last ran. Those don't get unhashed and 423 * released. 424 * 425 * Note that in the put path, we set the flag and then decrement the 426 * counter. Here we check the counter and then test and clear the flag. 427 * That order is deliberate to ensure that we can do this locklessly. 428 */ 429 if (refcount_read(&nf->nf_ref) > 1) 430 goto out_skip; 431 432 /* 433 * Don't throw out files that are still undergoing I/O or 434 * that have uncleared errors pending. 435 */ 436 if (nfsd_file_check_writeback(nf)) 437 goto out_skip; 438 439 if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) 440 goto out_skip; 441 442 if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) 443 goto out_skip; 444 445 list_lru_isolate_move(lru, &nf->nf_lru, head); 446 return LRU_REMOVED; 447 out_skip: 448 return LRU_SKIP; 449 } 450 451 static unsigned long 452 nfsd_file_lru_walk_list(struct shrink_control *sc) 453 { 454 LIST_HEAD(head); 455 struct nfsd_file *nf; 456 unsigned long ret; 457 458 if (sc) 459 ret = list_lru_shrink_walk(&nfsd_file_lru, sc, 460 nfsd_file_lru_cb, &head); 461 else 462 ret = list_lru_walk(&nfsd_file_lru, 463 nfsd_file_lru_cb, 464 &head, LONG_MAX); 465 list_for_each_entry(nf, &head, nf_lru) { 466 spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 467 nfsd_file_do_unhash(nf); 468 spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 469 } 470 nfsd_file_dispose_list_delayed(&head); 471 return ret; 472 } 473 474 static void 475 nfsd_file_gc(void) 476 { 477 nfsd_file_lru_walk_list(NULL); 478 } 479 480 static void 481 nfsd_file_gc_worker(struct work_struct *work) 482 { 483 nfsd_file_gc(); 484 nfsd_file_schedule_laundrette(); 485 } 486 487 static unsigned long 488 nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) 489 { 490 return list_lru_count(&nfsd_file_lru); 491 } 492 493 static unsigned long 494 nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) 495 { 496 return nfsd_file_lru_walk_list(sc); 497 } 498 499 static struct shrinker nfsd_file_shrinker = { 500 .scan_objects = nfsd_file_lru_scan, 501 .count_objects = nfsd_file_lru_count, 502 .seeks = 1, 503 }; 504 505 static void 506 __nfsd_file_close_inode(struct inode *inode, unsigned int hashval, 507 struct list_head *dispose) 508 { 509 struct nfsd_file *nf; 510 struct hlist_node *tmp; 511 512 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 513 hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) { 514 if (inode == nf->nf_inode) 515 nfsd_file_unhash_and_release_locked(nf, dispose); 516 } 517 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 518 } 519 520 /** 521 * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file 522 * @inode: inode of the file to attempt to remove 523 * 524 * Walk the whole hash bucket, looking for any files that correspond to "inode". 525 * If any do, then unhash them and put the hashtable reference to them and 526 * destroy any that had their last reference put. Also ensure that any of the 527 * fputs also have their final __fput done as well. 528 */ 529 void 530 nfsd_file_close_inode_sync(struct inode *inode) 531 { 532 unsigned int hashval = (unsigned int)hash_long(inode->i_ino, 533 NFSD_FILE_HASH_BITS); 534 LIST_HEAD(dispose); 535 536 __nfsd_file_close_inode(inode, hashval, &dispose); 537 trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose)); 538 nfsd_file_dispose_list_sync(&dispose); 539 } 540 541 /** 542 * nfsd_file_close_inode - attempt a delayed close of a nfsd_file 543 * @inode: inode of the file to attempt to remove 544 * 545 * Walk the whole hash bucket, looking for any files that correspond to "inode". 546 * If any do, then unhash them and put the hashtable reference to them and 547 * destroy any that had their last reference put. 548 */ 549 static void 550 nfsd_file_close_inode(struct inode *inode) 551 { 552 unsigned int hashval = (unsigned int)hash_long(inode->i_ino, 553 NFSD_FILE_HASH_BITS); 554 LIST_HEAD(dispose); 555 556 __nfsd_file_close_inode(inode, hashval, &dispose); 557 trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose)); 558 nfsd_file_dispose_list_delayed(&dispose); 559 } 560 561 /** 562 * nfsd_file_delayed_close - close unused nfsd_files 563 * @work: dummy 564 * 565 * Walk the LRU list and close any entries that have not been used since 566 * the last scan. 567 * 568 * Note this can deadlock with nfsd_file_cache_purge. 569 */ 570 static void 571 nfsd_file_delayed_close(struct work_struct *work) 572 { 573 LIST_HEAD(head); 574 struct nfsd_fcache_disposal *l = container_of(work, 575 struct nfsd_fcache_disposal, work); 576 577 nfsd_file_list_remove_disposal(&head, l); 578 nfsd_file_dispose_list(&head); 579 } 580 581 static int 582 nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, 583 void *data) 584 { 585 struct file_lock *fl = data; 586 587 /* Only close files for F_SETLEASE leases */ 588 if (fl->fl_flags & FL_LEASE) 589 nfsd_file_close_inode_sync(file_inode(fl->fl_file)); 590 return 0; 591 } 592 593 static struct notifier_block nfsd_file_lease_notifier = { 594 .notifier_call = nfsd_file_lease_notifier_call, 595 }; 596 597 static int 598 nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask, 599 struct inode *inode, struct inode *dir, 600 const struct qstr *name, u32 cookie) 601 { 602 if (WARN_ON_ONCE(!inode)) 603 return 0; 604 605 trace_nfsd_file_fsnotify_handle_event(inode, mask); 606 607 /* Should be no marks on non-regular files */ 608 if (!S_ISREG(inode->i_mode)) { 609 WARN_ON_ONCE(1); 610 return 0; 611 } 612 613 /* don't close files if this was not the last link */ 614 if (mask & FS_ATTRIB) { 615 if (inode->i_nlink) 616 return 0; 617 } 618 619 nfsd_file_close_inode(inode); 620 return 0; 621 } 622 623 624 static const struct fsnotify_ops nfsd_file_fsnotify_ops = { 625 .handle_inode_event = nfsd_file_fsnotify_handle_event, 626 .free_mark = nfsd_file_mark_free, 627 }; 628 629 int 630 nfsd_file_cache_init(void) 631 { 632 int ret = -ENOMEM; 633 unsigned int i; 634 635 clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); 636 637 if (nfsd_file_hashtbl) 638 return 0; 639 640 nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0); 641 if (!nfsd_filecache_wq) 642 goto out; 643 644 nfsd_file_hashtbl = kvcalloc(NFSD_FILE_HASH_SIZE, 645 sizeof(*nfsd_file_hashtbl), GFP_KERNEL); 646 if (!nfsd_file_hashtbl) { 647 pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n"); 648 goto out_err; 649 } 650 651 nfsd_file_slab = kmem_cache_create("nfsd_file", 652 sizeof(struct nfsd_file), 0, 0, NULL); 653 if (!nfsd_file_slab) { 654 pr_err("nfsd: unable to create nfsd_file_slab\n"); 655 goto out_err; 656 } 657 658 nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark", 659 sizeof(struct nfsd_file_mark), 0, 0, NULL); 660 if (!nfsd_file_mark_slab) { 661 pr_err("nfsd: unable to create nfsd_file_mark_slab\n"); 662 goto out_err; 663 } 664 665 666 ret = list_lru_init(&nfsd_file_lru); 667 if (ret) { 668 pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret); 669 goto out_err; 670 } 671 672 ret = register_shrinker(&nfsd_file_shrinker); 673 if (ret) { 674 pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret); 675 goto out_lru; 676 } 677 678 ret = lease_register_notifier(&nfsd_file_lease_notifier); 679 if (ret) { 680 pr_err("nfsd: unable to register lease notifier: %d\n", ret); 681 goto out_shrinker; 682 } 683 684 nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops, 685 FSNOTIFY_GROUP_NOFS); 686 if (IS_ERR(nfsd_file_fsnotify_group)) { 687 pr_err("nfsd: unable to create fsnotify group: %ld\n", 688 PTR_ERR(nfsd_file_fsnotify_group)); 689 ret = PTR_ERR(nfsd_file_fsnotify_group); 690 nfsd_file_fsnotify_group = NULL; 691 goto out_notifier; 692 } 693 694 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 695 INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head); 696 spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock); 697 } 698 699 INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker); 700 out: 701 return ret; 702 out_notifier: 703 lease_unregister_notifier(&nfsd_file_lease_notifier); 704 out_shrinker: 705 unregister_shrinker(&nfsd_file_shrinker); 706 out_lru: 707 list_lru_destroy(&nfsd_file_lru); 708 out_err: 709 kmem_cache_destroy(nfsd_file_slab); 710 nfsd_file_slab = NULL; 711 kmem_cache_destroy(nfsd_file_mark_slab); 712 nfsd_file_mark_slab = NULL; 713 kvfree(nfsd_file_hashtbl); 714 nfsd_file_hashtbl = NULL; 715 destroy_workqueue(nfsd_filecache_wq); 716 nfsd_filecache_wq = NULL; 717 goto out; 718 } 719 720 /* 721 * Note this can deadlock with nfsd_file_lru_cb. 722 */ 723 void 724 nfsd_file_cache_purge(struct net *net) 725 { 726 unsigned int i; 727 struct nfsd_file *nf; 728 struct hlist_node *next; 729 LIST_HEAD(dispose); 730 bool del; 731 732 if (!nfsd_file_hashtbl) 733 return; 734 735 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 736 struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i]; 737 738 spin_lock(&nfb->nfb_lock); 739 hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) { 740 if (net && nf->nf_net != net) 741 continue; 742 del = nfsd_file_unhash_and_release_locked(nf, &dispose); 743 744 /* 745 * Deadlock detected! Something marked this entry as 746 * unhased, but hasn't removed it from the hash list. 747 */ 748 WARN_ON_ONCE(!del); 749 } 750 spin_unlock(&nfb->nfb_lock); 751 nfsd_file_dispose_list(&dispose); 752 } 753 } 754 755 static struct nfsd_fcache_disposal * 756 nfsd_alloc_fcache_disposal(void) 757 { 758 struct nfsd_fcache_disposal *l; 759 760 l = kmalloc(sizeof(*l), GFP_KERNEL); 761 if (!l) 762 return NULL; 763 INIT_WORK(&l->work, nfsd_file_delayed_close); 764 spin_lock_init(&l->lock); 765 INIT_LIST_HEAD(&l->freeme); 766 return l; 767 } 768 769 static void 770 nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l) 771 { 772 cancel_work_sync(&l->work); 773 nfsd_file_dispose_list(&l->freeme); 774 kfree(l); 775 } 776 777 static void 778 nfsd_free_fcache_disposal_net(struct net *net) 779 { 780 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 781 struct nfsd_fcache_disposal *l = nn->fcache_disposal; 782 783 nfsd_free_fcache_disposal(l); 784 } 785 786 int 787 nfsd_file_cache_start_net(struct net *net) 788 { 789 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 790 791 nn->fcache_disposal = nfsd_alloc_fcache_disposal(); 792 return nn->fcache_disposal ? 0 : -ENOMEM; 793 } 794 795 void 796 nfsd_file_cache_shutdown_net(struct net *net) 797 { 798 nfsd_file_cache_purge(net); 799 nfsd_free_fcache_disposal_net(net); 800 } 801 802 void 803 nfsd_file_cache_shutdown(void) 804 { 805 set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); 806 807 lease_unregister_notifier(&nfsd_file_lease_notifier); 808 unregister_shrinker(&nfsd_file_shrinker); 809 /* 810 * make sure all callers of nfsd_file_lru_cb are done before 811 * calling nfsd_file_cache_purge 812 */ 813 cancel_delayed_work_sync(&nfsd_filecache_laundrette); 814 nfsd_file_cache_purge(NULL); 815 list_lru_destroy(&nfsd_file_lru); 816 rcu_barrier(); 817 fsnotify_put_group(nfsd_file_fsnotify_group); 818 nfsd_file_fsnotify_group = NULL; 819 kmem_cache_destroy(nfsd_file_slab); 820 nfsd_file_slab = NULL; 821 fsnotify_wait_marks_destroyed(); 822 kmem_cache_destroy(nfsd_file_mark_slab); 823 nfsd_file_mark_slab = NULL; 824 kvfree(nfsd_file_hashtbl); 825 nfsd_file_hashtbl = NULL; 826 destroy_workqueue(nfsd_filecache_wq); 827 nfsd_filecache_wq = NULL; 828 } 829 830 static bool 831 nfsd_match_cred(const struct cred *c1, const struct cred *c2) 832 { 833 int i; 834 835 if (!uid_eq(c1->fsuid, c2->fsuid)) 836 return false; 837 if (!gid_eq(c1->fsgid, c2->fsgid)) 838 return false; 839 if (c1->group_info == NULL || c2->group_info == NULL) 840 return c1->group_info == c2->group_info; 841 if (c1->group_info->ngroups != c2->group_info->ngroups) 842 return false; 843 for (i = 0; i < c1->group_info->ngroups; i++) { 844 if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) 845 return false; 846 } 847 return true; 848 } 849 850 static struct nfsd_file * 851 nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, 852 unsigned int hashval, struct net *net) 853 { 854 struct nfsd_file *nf; 855 unsigned char need = may_flags & NFSD_FILE_MAY_MASK; 856 857 hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, 858 nf_node, lockdep_is_held(&nfsd_file_hashtbl[hashval].nfb_lock)) { 859 if (nf->nf_may != need) 860 continue; 861 if (nf->nf_inode != inode) 862 continue; 863 if (nf->nf_net != net) 864 continue; 865 if (!nfsd_match_cred(nf->nf_cred, current_cred())) 866 continue; 867 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) 868 continue; 869 if (nfsd_file_get(nf) != NULL) 870 return nf; 871 } 872 return NULL; 873 } 874 875 /** 876 * nfsd_file_is_cached - are there any cached open files for this fh? 877 * @inode: inode of the file to check 878 * 879 * Scan the hashtable for open files that match this fh. Returns true if there 880 * are any, and false if not. 881 */ 882 bool 883 nfsd_file_is_cached(struct inode *inode) 884 { 885 bool ret = false; 886 struct nfsd_file *nf; 887 unsigned int hashval; 888 889 hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); 890 891 rcu_read_lock(); 892 hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, 893 nf_node) { 894 if (inode == nf->nf_inode) { 895 ret = true; 896 break; 897 } 898 } 899 rcu_read_unlock(); 900 trace_nfsd_file_is_cached(inode, hashval, (int)ret); 901 return ret; 902 } 903 904 static __be32 905 nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 906 unsigned int may_flags, struct nfsd_file **pnf, bool open) 907 { 908 __be32 status; 909 struct net *net = SVC_NET(rqstp); 910 struct nfsd_file *nf, *new; 911 struct inode *inode; 912 unsigned int hashval; 913 bool retry = true; 914 915 /* FIXME: skip this if fh_dentry is already set? */ 916 status = fh_verify(rqstp, fhp, S_IFREG, 917 may_flags|NFSD_MAY_OWNER_OVERRIDE); 918 if (status != nfs_ok) 919 return status; 920 921 inode = d_inode(fhp->fh_dentry); 922 hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); 923 retry: 924 rcu_read_lock(); 925 nf = nfsd_file_find_locked(inode, may_flags, hashval, net); 926 rcu_read_unlock(); 927 if (nf) 928 goto wait_for_construction; 929 930 new = nfsd_file_alloc(inode, may_flags, hashval, net); 931 if (!new) { 932 trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, 933 NULL, nfserr_jukebox); 934 return nfserr_jukebox; 935 } 936 937 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 938 nf = nfsd_file_find_locked(inode, may_flags, hashval, net); 939 if (nf == NULL) 940 goto open_file; 941 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 942 nfsd_file_slab_free(&new->nf_rcu); 943 944 wait_for_construction: 945 wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); 946 947 /* Did construction of this file fail? */ 948 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 949 if (!retry) { 950 status = nfserr_jukebox; 951 goto out; 952 } 953 retry = false; 954 nfsd_file_put_noref(nf); 955 goto retry; 956 } 957 958 this_cpu_inc(nfsd_file_cache_hits); 959 960 if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) { 961 bool write = (may_flags & NFSD_MAY_WRITE); 962 963 if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) || 964 (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) { 965 status = nfserrno(nfsd_open_break_lease( 966 file_inode(nf->nf_file), may_flags)); 967 if (status == nfs_ok) { 968 clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); 969 if (write) 970 clear_bit(NFSD_FILE_BREAK_WRITE, 971 &nf->nf_flags); 972 } 973 } 974 } 975 out: 976 if (status == nfs_ok) { 977 *pnf = nf; 978 } else { 979 nfsd_file_put(nf); 980 nf = NULL; 981 } 982 983 trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status); 984 return status; 985 open_file: 986 nf = new; 987 /* Take reference for the hashtable */ 988 refcount_inc(&nf->nf_ref); 989 __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); 990 __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); 991 list_lru_add(&nfsd_file_lru, &nf->nf_lru); 992 hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head); 993 ++nfsd_file_hashtbl[hashval].nfb_count; 994 nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, 995 nfsd_file_hashtbl[hashval].nfb_count); 996 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 997 if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD) 998 nfsd_file_gc(); 999 1000 nf->nf_mark = nfsd_file_mark_find_or_create(nf); 1001 if (nf->nf_mark) { 1002 if (open) { 1003 status = nfsd_open_verified(rqstp, fhp, may_flags, 1004 &nf->nf_file); 1005 trace_nfsd_file_open(nf, status); 1006 } else 1007 status = nfs_ok; 1008 } else 1009 status = nfserr_jukebox; 1010 /* 1011 * If construction failed, or we raced with a call to unlink() 1012 * then unhash. 1013 */ 1014 if (status != nfs_ok || inode->i_nlink == 0) { 1015 bool do_free; 1016 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 1017 do_free = nfsd_file_unhash(nf); 1018 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 1019 if (do_free) 1020 nfsd_file_put_noref(nf); 1021 } 1022 clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); 1023 smp_mb__after_atomic(); 1024 wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); 1025 goto out; 1026 } 1027 1028 /** 1029 * nfsd_file_acquire - Get a struct nfsd_file with an open file 1030 * @rqstp: the RPC transaction being executed 1031 * @fhp: the NFS filehandle of the file to be opened 1032 * @may_flags: NFSD_MAY_ settings for the file 1033 * @pnf: OUT: new or found "struct nfsd_file" object 1034 * 1035 * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in 1036 * network byte order is returned. 1037 */ 1038 __be32 1039 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 1040 unsigned int may_flags, struct nfsd_file **pnf) 1041 { 1042 return nfsd_do_file_acquire(rqstp, fhp, may_flags, pnf, true); 1043 } 1044 1045 /** 1046 * nfsd_file_create - Get a struct nfsd_file, do not open 1047 * @rqstp: the RPC transaction being executed 1048 * @fhp: the NFS filehandle of the file just created 1049 * @may_flags: NFSD_MAY_ settings for the file 1050 * @pnf: OUT: new or found "struct nfsd_file" object 1051 * 1052 * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in 1053 * network byte order is returned. 1054 */ 1055 __be32 1056 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, 1057 unsigned int may_flags, struct nfsd_file **pnf) 1058 { 1059 return nfsd_do_file_acquire(rqstp, fhp, may_flags, pnf, false); 1060 } 1061 1062 /* 1063 * Note that fields may be added, removed or reordered in the future. Programs 1064 * scraping this file for info should test the labels to ensure they're 1065 * getting the correct field. 1066 */ 1067 static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) 1068 { 1069 unsigned int i, count = 0, longest = 0; 1070 unsigned long hits = 0; 1071 1072 /* 1073 * No need for spinlocks here since we're not terribly interested in 1074 * accuracy. We do take the nfsd_mutex simply to ensure that we 1075 * don't end up racing with server shutdown 1076 */ 1077 mutex_lock(&nfsd_mutex); 1078 if (nfsd_file_hashtbl) { 1079 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 1080 count += nfsd_file_hashtbl[i].nfb_count; 1081 longest = max(longest, nfsd_file_hashtbl[i].nfb_count); 1082 } 1083 } 1084 mutex_unlock(&nfsd_mutex); 1085 1086 for_each_possible_cpu(i) 1087 hits += per_cpu(nfsd_file_cache_hits, i); 1088 1089 seq_printf(m, "total entries: %u\n", count); 1090 seq_printf(m, "longest chain: %u\n", longest); 1091 seq_printf(m, "cache hits: %lu\n", hits); 1092 return 0; 1093 } 1094 1095 int nfsd_file_cache_stats_open(struct inode *inode, struct file *file) 1096 { 1097 return single_open(file, nfsd_file_cache_stats_show, NULL); 1098 } 1099