1 /* 2 * Open file cache. 3 * 4 * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com> 5 */ 6 7 #include <linux/hash.h> 8 #include <linux/slab.h> 9 #include <linux/file.h> 10 #include <linux/pagemap.h> 11 #include <linux/sched.h> 12 #include <linux/list_lru.h> 13 #include <linux/fsnotify_backend.h> 14 #include <linux/fsnotify.h> 15 #include <linux/seq_file.h> 16 #include <linux/rhashtable.h> 17 18 #include "vfs.h" 19 #include "nfsd.h" 20 #include "nfsfh.h" 21 #include "netns.h" 22 #include "filecache.h" 23 #include "trace.h" 24 25 #define NFSD_LAUNDRETTE_DELAY (2 * HZ) 26 27 #define NFSD_FILE_CACHE_UP (0) 28 29 /* We only care about NFSD_MAY_READ/WRITE for this cache */ 30 #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) 31 32 static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); 33 static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions); 34 static DEFINE_PER_CPU(unsigned long, nfsd_file_releases); 35 static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age); 36 static DEFINE_PER_CPU(unsigned long, nfsd_file_pages_flushed); 37 static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions); 38 39 struct nfsd_fcache_disposal { 40 struct work_struct work; 41 spinlock_t lock; 42 struct list_head freeme; 43 }; 44 45 static struct workqueue_struct *nfsd_filecache_wq __read_mostly; 46 47 static struct kmem_cache *nfsd_file_slab; 48 static struct kmem_cache *nfsd_file_mark_slab; 49 static struct list_lru nfsd_file_lru; 50 static unsigned long nfsd_file_flags; 51 static struct fsnotify_group *nfsd_file_fsnotify_group; 52 static struct delayed_work nfsd_filecache_laundrette; 53 static struct rhashtable nfsd_file_rhash_tbl 54 ____cacheline_aligned_in_smp; 55 56 enum nfsd_file_lookup_type { 57 NFSD_FILE_KEY_INODE, 58 NFSD_FILE_KEY_FULL, 59 }; 60 61 struct nfsd_file_lookup_key { 62 struct inode *inode; 63 struct net *net; 64 const struct cred *cred; 65 unsigned char need; 66 enum nfsd_file_lookup_type type; 67 }; 68 69 /* 70 * The returned hash value is based solely on the address of an in-code 71 * inode, a pointer to a slab-allocated object. The entropy in such a 72 * pointer is concentrated in its middle bits. 73 */ 74 static u32 nfsd_file_inode_hash(const struct inode *inode, u32 seed) 75 { 76 unsigned long ptr = (unsigned long)inode; 77 u32 k; 78 79 k = ptr >> L1_CACHE_SHIFT; 80 k &= 0x00ffffff; 81 return jhash2(&k, 1, seed); 82 } 83 84 /** 85 * nfsd_file_key_hashfn - Compute the hash value of a lookup key 86 * @data: key on which to compute the hash value 87 * @len: rhash table's key_len parameter (unused) 88 * @seed: rhash table's random seed of the day 89 * 90 * Return value: 91 * Computed 32-bit hash value 92 */ 93 static u32 nfsd_file_key_hashfn(const void *data, u32 len, u32 seed) 94 { 95 const struct nfsd_file_lookup_key *key = data; 96 97 return nfsd_file_inode_hash(key->inode, seed); 98 } 99 100 /** 101 * nfsd_file_obj_hashfn - Compute the hash value of an nfsd_file 102 * @data: object on which to compute the hash value 103 * @len: rhash table's key_len parameter (unused) 104 * @seed: rhash table's random seed of the day 105 * 106 * Return value: 107 * Computed 32-bit hash value 108 */ 109 static u32 nfsd_file_obj_hashfn(const void *data, u32 len, u32 seed) 110 { 111 const struct nfsd_file *nf = data; 112 113 return nfsd_file_inode_hash(nf->nf_inode, seed); 114 } 115 116 static bool 117 nfsd_match_cred(const struct cred *c1, const struct cred *c2) 118 { 119 int i; 120 121 if (!uid_eq(c1->fsuid, c2->fsuid)) 122 return false; 123 if (!gid_eq(c1->fsgid, c2->fsgid)) 124 return false; 125 if (c1->group_info == NULL || c2->group_info == NULL) 126 return c1->group_info == c2->group_info; 127 if (c1->group_info->ngroups != c2->group_info->ngroups) 128 return false; 129 for (i = 0; i < c1->group_info->ngroups; i++) { 130 if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) 131 return false; 132 } 133 return true; 134 } 135 136 /** 137 * nfsd_file_obj_cmpfn - Match a cache item against search criteria 138 * @arg: search criteria 139 * @ptr: cache item to check 140 * 141 * Return values: 142 * %0 - Item matches search criteria 143 * %1 - Item does not match search criteria 144 */ 145 static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg, 146 const void *ptr) 147 { 148 const struct nfsd_file_lookup_key *key = arg->key; 149 const struct nfsd_file *nf = ptr; 150 151 switch (key->type) { 152 case NFSD_FILE_KEY_INODE: 153 if (nf->nf_inode != key->inode) 154 return 1; 155 break; 156 case NFSD_FILE_KEY_FULL: 157 if (nf->nf_inode != key->inode) 158 return 1; 159 if (nf->nf_may != key->need) 160 return 1; 161 if (nf->nf_net != key->net) 162 return 1; 163 if (!nfsd_match_cred(nf->nf_cred, key->cred)) 164 return 1; 165 if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) 166 return 1; 167 break; 168 } 169 return 0; 170 } 171 172 static const struct rhashtable_params nfsd_file_rhash_params = { 173 .key_len = sizeof_field(struct nfsd_file, nf_inode), 174 .key_offset = offsetof(struct nfsd_file, nf_inode), 175 .head_offset = offsetof(struct nfsd_file, nf_rhash), 176 .hashfn = nfsd_file_key_hashfn, 177 .obj_hashfn = nfsd_file_obj_hashfn, 178 .obj_cmpfn = nfsd_file_obj_cmpfn, 179 /* Reduce resizing churn on light workloads */ 180 .min_size = 512, /* buckets */ 181 .automatic_shrinking = true, 182 }; 183 184 static void 185 nfsd_file_schedule_laundrette(void) 186 { 187 if ((atomic_read(&nfsd_file_rhash_tbl.nelems) == 0) || 188 test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0) 189 return; 190 191 queue_delayed_work(system_wq, &nfsd_filecache_laundrette, 192 NFSD_LAUNDRETTE_DELAY); 193 } 194 195 static void 196 nfsd_file_slab_free(struct rcu_head *rcu) 197 { 198 struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu); 199 200 put_cred(nf->nf_cred); 201 kmem_cache_free(nfsd_file_slab, nf); 202 } 203 204 static void 205 nfsd_file_mark_free(struct fsnotify_mark *mark) 206 { 207 struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark, 208 nfm_mark); 209 210 kmem_cache_free(nfsd_file_mark_slab, nfm); 211 } 212 213 static struct nfsd_file_mark * 214 nfsd_file_mark_get(struct nfsd_file_mark *nfm) 215 { 216 if (!refcount_inc_not_zero(&nfm->nfm_ref)) 217 return NULL; 218 return nfm; 219 } 220 221 static void 222 nfsd_file_mark_put(struct nfsd_file_mark *nfm) 223 { 224 if (refcount_dec_and_test(&nfm->nfm_ref)) { 225 fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group); 226 fsnotify_put_mark(&nfm->nfm_mark); 227 } 228 } 229 230 static struct nfsd_file_mark * 231 nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode) 232 { 233 int err; 234 struct fsnotify_mark *mark; 235 struct nfsd_file_mark *nfm = NULL, *new; 236 237 do { 238 fsnotify_group_lock(nfsd_file_fsnotify_group); 239 mark = fsnotify_find_mark(&inode->i_fsnotify_marks, 240 nfsd_file_fsnotify_group); 241 if (mark) { 242 nfm = nfsd_file_mark_get(container_of(mark, 243 struct nfsd_file_mark, 244 nfm_mark)); 245 fsnotify_group_unlock(nfsd_file_fsnotify_group); 246 if (nfm) { 247 fsnotify_put_mark(mark); 248 break; 249 } 250 /* Avoid soft lockup race with nfsd_file_mark_put() */ 251 fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group); 252 fsnotify_put_mark(mark); 253 } else { 254 fsnotify_group_unlock(nfsd_file_fsnotify_group); 255 } 256 257 /* allocate a new nfm */ 258 new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL); 259 if (!new) 260 return NULL; 261 fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group); 262 new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF; 263 refcount_set(&new->nfm_ref, 1); 264 265 err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0); 266 267 /* 268 * If the add was successful, then return the object. 269 * Otherwise, we need to put the reference we hold on the 270 * nfm_mark. The fsnotify code will take a reference and put 271 * it on failure, so we can't just free it directly. It's also 272 * not safe to call fsnotify_destroy_mark on it as the 273 * mark->group will be NULL. Thus, we can't let the nfm_ref 274 * counter drive the destruction at this point. 275 */ 276 if (likely(!err)) 277 nfm = new; 278 else 279 fsnotify_put_mark(&new->nfm_mark); 280 } while (unlikely(err == -EEXIST)); 281 282 return nfm; 283 } 284 285 static struct nfsd_file * 286 nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) 287 { 288 struct nfsd_file *nf; 289 290 nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); 291 if (nf) { 292 INIT_LIST_HEAD(&nf->nf_lru); 293 nf->nf_birthtime = ktime_get(); 294 nf->nf_file = NULL; 295 nf->nf_cred = get_current_cred(); 296 nf->nf_net = key->net; 297 nf->nf_flags = 0; 298 __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); 299 __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); 300 nf->nf_inode = key->inode; 301 /* nf_ref is pre-incremented for hash table */ 302 refcount_set(&nf->nf_ref, 2); 303 nf->nf_may = key->need; 304 nf->nf_mark = NULL; 305 } 306 return nf; 307 } 308 309 static bool 310 nfsd_file_free(struct nfsd_file *nf) 311 { 312 s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime)); 313 bool flush = false; 314 315 this_cpu_inc(nfsd_file_releases); 316 this_cpu_add(nfsd_file_total_age, age); 317 318 trace_nfsd_file_put_final(nf); 319 if (nf->nf_mark) 320 nfsd_file_mark_put(nf->nf_mark); 321 if (nf->nf_file) { 322 get_file(nf->nf_file); 323 filp_close(nf->nf_file, NULL); 324 fput(nf->nf_file); 325 flush = true; 326 } 327 328 /* 329 * If this item is still linked via nf_lru, that's a bug. 330 * WARN and leak it to preserve system stability. 331 */ 332 if (WARN_ON_ONCE(!list_empty(&nf->nf_lru))) 333 return flush; 334 335 call_rcu(&nf->nf_rcu, nfsd_file_slab_free); 336 return flush; 337 } 338 339 static bool 340 nfsd_file_check_writeback(struct nfsd_file *nf) 341 { 342 struct file *file = nf->nf_file; 343 struct address_space *mapping; 344 345 if (!file || !(file->f_mode & FMODE_WRITE)) 346 return false; 347 mapping = file->f_mapping; 348 return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) || 349 mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); 350 } 351 352 static int 353 nfsd_file_check_write_error(struct nfsd_file *nf) 354 { 355 struct file *file = nf->nf_file; 356 357 if (!file || !(file->f_mode & FMODE_WRITE)) 358 return 0; 359 return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); 360 } 361 362 static void 363 nfsd_file_flush(struct nfsd_file *nf) 364 { 365 struct file *file = nf->nf_file; 366 367 if (!file || !(file->f_mode & FMODE_WRITE)) 368 return; 369 this_cpu_add(nfsd_file_pages_flushed, file->f_mapping->nrpages); 370 if (vfs_fsync(file, 1) != 0) 371 nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); 372 } 373 374 static void nfsd_file_lru_add(struct nfsd_file *nf) 375 { 376 set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); 377 if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) 378 trace_nfsd_file_lru_add(nf); 379 } 380 381 static void nfsd_file_lru_remove(struct nfsd_file *nf) 382 { 383 if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) 384 trace_nfsd_file_lru_del(nf); 385 } 386 387 static void 388 nfsd_file_hash_remove(struct nfsd_file *nf) 389 { 390 trace_nfsd_file_unhash(nf); 391 392 if (nfsd_file_check_write_error(nf)) 393 nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); 394 rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash, 395 nfsd_file_rhash_params); 396 } 397 398 static bool 399 nfsd_file_unhash(struct nfsd_file *nf) 400 { 401 if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 402 nfsd_file_hash_remove(nf); 403 return true; 404 } 405 return false; 406 } 407 408 /* 409 * Return true if the file was unhashed. 410 */ 411 static bool 412 nfsd_file_unhash_and_dispose(struct nfsd_file *nf, struct list_head *dispose) 413 { 414 trace_nfsd_file_unhash_and_dispose(nf); 415 if (!nfsd_file_unhash(nf)) 416 return false; 417 /* keep final reference for nfsd_file_lru_dispose */ 418 if (refcount_dec_not_one(&nf->nf_ref)) 419 return true; 420 421 nfsd_file_lru_remove(nf); 422 list_add(&nf->nf_lru, dispose); 423 return true; 424 } 425 426 static void 427 nfsd_file_put_noref(struct nfsd_file *nf) 428 { 429 trace_nfsd_file_put(nf); 430 431 if (refcount_dec_and_test(&nf->nf_ref)) { 432 WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); 433 nfsd_file_lru_remove(nf); 434 nfsd_file_free(nf); 435 } 436 } 437 438 void 439 nfsd_file_put(struct nfsd_file *nf) 440 { 441 might_sleep(); 442 443 nfsd_file_lru_add(nf); 444 if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) { 445 nfsd_file_flush(nf); 446 nfsd_file_put_noref(nf); 447 } else if (nf->nf_file) { 448 nfsd_file_put_noref(nf); 449 nfsd_file_schedule_laundrette(); 450 } else 451 nfsd_file_put_noref(nf); 452 } 453 454 /** 455 * nfsd_file_close - Close an nfsd_file 456 * @nf: nfsd_file to close 457 * 458 * If this is the final reference for @nf, free it immediately. 459 * This reflects an on-the-wire CLOSE or DELEGRETURN into the 460 * VFS and exported filesystem. 461 */ 462 void nfsd_file_close(struct nfsd_file *nf) 463 { 464 nfsd_file_put(nf); 465 if (refcount_dec_if_one(&nf->nf_ref)) { 466 nfsd_file_unhash(nf); 467 nfsd_file_lru_remove(nf); 468 nfsd_file_free(nf); 469 } 470 } 471 472 struct nfsd_file * 473 nfsd_file_get(struct nfsd_file *nf) 474 { 475 if (likely(refcount_inc_not_zero(&nf->nf_ref))) 476 return nf; 477 return NULL; 478 } 479 480 static void 481 nfsd_file_dispose_list(struct list_head *dispose) 482 { 483 struct nfsd_file *nf; 484 485 while(!list_empty(dispose)) { 486 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 487 list_del_init(&nf->nf_lru); 488 nfsd_file_flush(nf); 489 nfsd_file_put_noref(nf); 490 } 491 } 492 493 static void 494 nfsd_file_dispose_list_sync(struct list_head *dispose) 495 { 496 bool flush = false; 497 struct nfsd_file *nf; 498 499 while(!list_empty(dispose)) { 500 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 501 list_del_init(&nf->nf_lru); 502 nfsd_file_flush(nf); 503 if (!refcount_dec_and_test(&nf->nf_ref)) 504 continue; 505 if (nfsd_file_free(nf)) 506 flush = true; 507 } 508 if (flush) 509 flush_delayed_fput(); 510 } 511 512 static void 513 nfsd_file_list_remove_disposal(struct list_head *dst, 514 struct nfsd_fcache_disposal *l) 515 { 516 spin_lock(&l->lock); 517 list_splice_init(&l->freeme, dst); 518 spin_unlock(&l->lock); 519 } 520 521 static void 522 nfsd_file_list_add_disposal(struct list_head *files, struct net *net) 523 { 524 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 525 struct nfsd_fcache_disposal *l = nn->fcache_disposal; 526 527 spin_lock(&l->lock); 528 list_splice_tail_init(files, &l->freeme); 529 spin_unlock(&l->lock); 530 queue_work(nfsd_filecache_wq, &l->work); 531 } 532 533 static void 534 nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src, 535 struct net *net) 536 { 537 struct nfsd_file *nf, *tmp; 538 539 list_for_each_entry_safe(nf, tmp, src, nf_lru) { 540 if (nf->nf_net == net) 541 list_move_tail(&nf->nf_lru, dst); 542 } 543 } 544 545 static void 546 nfsd_file_dispose_list_delayed(struct list_head *dispose) 547 { 548 LIST_HEAD(list); 549 struct nfsd_file *nf; 550 551 while(!list_empty(dispose)) { 552 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 553 nfsd_file_list_add_pernet(&list, dispose, nf->nf_net); 554 nfsd_file_list_add_disposal(&list, nf->nf_net); 555 } 556 } 557 558 /** 559 * nfsd_file_lru_cb - Examine an entry on the LRU list 560 * @item: LRU entry to examine 561 * @lru: controlling LRU 562 * @lock: LRU list lock (unused) 563 * @arg: dispose list 564 * 565 * Note this can deadlock with nfsd_file_cache_purge. 566 * 567 * Return values: 568 * %LRU_REMOVED: @item was removed from the LRU 569 * %LRU_ROTATE: @item is to be moved to the LRU tail 570 * %LRU_SKIP: @item cannot be evicted 571 */ 572 static enum lru_status 573 nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, 574 spinlock_t *lock, void *arg) 575 __releases(lock) 576 __acquires(lock) 577 { 578 struct list_head *head = arg; 579 struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); 580 581 /* 582 * Do a lockless refcount check. The hashtable holds one reference, so 583 * we look to see if anything else has a reference, or if any have 584 * been put since the shrinker last ran. Those don't get unhashed and 585 * released. 586 * 587 * Note that in the put path, we set the flag and then decrement the 588 * counter. Here we check the counter and then test and clear the flag. 589 * That order is deliberate to ensure that we can do this locklessly. 590 */ 591 if (refcount_read(&nf->nf_ref) > 1) { 592 list_lru_isolate(lru, &nf->nf_lru); 593 trace_nfsd_file_gc_in_use(nf); 594 return LRU_REMOVED; 595 } 596 597 /* 598 * Don't throw out files that are still undergoing I/O or 599 * that have uncleared errors pending. 600 */ 601 if (nfsd_file_check_writeback(nf)) { 602 trace_nfsd_file_gc_writeback(nf); 603 return LRU_SKIP; 604 } 605 606 if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) { 607 trace_nfsd_file_gc_referenced(nf); 608 return LRU_ROTATE; 609 } 610 611 if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 612 trace_nfsd_file_gc_hashed(nf); 613 return LRU_SKIP; 614 } 615 616 list_lru_isolate_move(lru, &nf->nf_lru, head); 617 this_cpu_inc(nfsd_file_evictions); 618 trace_nfsd_file_gc_disposed(nf); 619 return LRU_REMOVED; 620 } 621 622 /* 623 * Unhash items on @dispose immediately, then queue them on the 624 * disposal workqueue to finish releasing them in the background. 625 * 626 * cel: Note that between the time list_lru_shrink_walk runs and 627 * now, these items are in the hash table but marked unhashed. 628 * Why release these outside of lru_cb ? There's no lock ordering 629 * problem since lru_cb currently takes no lock. 630 */ 631 static void nfsd_file_gc_dispose_list(struct list_head *dispose) 632 { 633 struct nfsd_file *nf; 634 635 list_for_each_entry(nf, dispose, nf_lru) 636 nfsd_file_hash_remove(nf); 637 nfsd_file_dispose_list_delayed(dispose); 638 } 639 640 static void 641 nfsd_file_gc(void) 642 { 643 LIST_HEAD(dispose); 644 unsigned long ret; 645 646 ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, 647 &dispose, list_lru_count(&nfsd_file_lru)); 648 trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru)); 649 nfsd_file_gc_dispose_list(&dispose); 650 } 651 652 static void 653 nfsd_file_gc_worker(struct work_struct *work) 654 { 655 nfsd_file_gc(); 656 nfsd_file_schedule_laundrette(); 657 } 658 659 static unsigned long 660 nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) 661 { 662 return list_lru_count(&nfsd_file_lru); 663 } 664 665 static unsigned long 666 nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) 667 { 668 LIST_HEAD(dispose); 669 unsigned long ret; 670 671 ret = list_lru_shrink_walk(&nfsd_file_lru, sc, 672 nfsd_file_lru_cb, &dispose); 673 trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru)); 674 nfsd_file_gc_dispose_list(&dispose); 675 return ret; 676 } 677 678 static struct shrinker nfsd_file_shrinker = { 679 .scan_objects = nfsd_file_lru_scan, 680 .count_objects = nfsd_file_lru_count, 681 .seeks = 1, 682 }; 683 684 /* 685 * Find all cache items across all net namespaces that match @inode and 686 * move them to @dispose. The lookup is atomic wrt nfsd_file_acquire(). 687 */ 688 static unsigned int 689 __nfsd_file_close_inode(struct inode *inode, struct list_head *dispose) 690 { 691 struct nfsd_file_lookup_key key = { 692 .type = NFSD_FILE_KEY_INODE, 693 .inode = inode, 694 }; 695 unsigned int count = 0; 696 struct nfsd_file *nf; 697 698 rcu_read_lock(); 699 do { 700 nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, 701 nfsd_file_rhash_params); 702 if (!nf) 703 break; 704 nfsd_file_unhash_and_dispose(nf, dispose); 705 count++; 706 } while (1); 707 rcu_read_unlock(); 708 return count; 709 } 710 711 /** 712 * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file 713 * @inode: inode of the file to attempt to remove 714 * 715 * Unhash and put, then flush and fput all cache items associated with @inode. 716 */ 717 void 718 nfsd_file_close_inode_sync(struct inode *inode) 719 { 720 LIST_HEAD(dispose); 721 unsigned int count; 722 723 count = __nfsd_file_close_inode(inode, &dispose); 724 trace_nfsd_file_close_inode_sync(inode, count); 725 nfsd_file_dispose_list_sync(&dispose); 726 } 727 728 /** 729 * nfsd_file_close_inode - attempt a delayed close of a nfsd_file 730 * @inode: inode of the file to attempt to remove 731 * 732 * Unhash and put all cache item associated with @inode. 733 */ 734 static void 735 nfsd_file_close_inode(struct inode *inode) 736 { 737 LIST_HEAD(dispose); 738 unsigned int count; 739 740 count = __nfsd_file_close_inode(inode, &dispose); 741 trace_nfsd_file_close_inode(inode, count); 742 nfsd_file_dispose_list_delayed(&dispose); 743 } 744 745 /** 746 * nfsd_file_delayed_close - close unused nfsd_files 747 * @work: dummy 748 * 749 * Walk the LRU list and close any entries that have not been used since 750 * the last scan. 751 * 752 * Note this can deadlock with nfsd_file_cache_purge. 753 */ 754 static void 755 nfsd_file_delayed_close(struct work_struct *work) 756 { 757 LIST_HEAD(head); 758 struct nfsd_fcache_disposal *l = container_of(work, 759 struct nfsd_fcache_disposal, work); 760 761 nfsd_file_list_remove_disposal(&head, l); 762 nfsd_file_dispose_list(&head); 763 } 764 765 static int 766 nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, 767 void *data) 768 { 769 struct file_lock *fl = data; 770 771 /* Only close files for F_SETLEASE leases */ 772 if (fl->fl_flags & FL_LEASE) 773 nfsd_file_close_inode_sync(file_inode(fl->fl_file)); 774 return 0; 775 } 776 777 static struct notifier_block nfsd_file_lease_notifier = { 778 .notifier_call = nfsd_file_lease_notifier_call, 779 }; 780 781 static int 782 nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask, 783 struct inode *inode, struct inode *dir, 784 const struct qstr *name, u32 cookie) 785 { 786 if (WARN_ON_ONCE(!inode)) 787 return 0; 788 789 trace_nfsd_file_fsnotify_handle_event(inode, mask); 790 791 /* Should be no marks on non-regular files */ 792 if (!S_ISREG(inode->i_mode)) { 793 WARN_ON_ONCE(1); 794 return 0; 795 } 796 797 /* don't close files if this was not the last link */ 798 if (mask & FS_ATTRIB) { 799 if (inode->i_nlink) 800 return 0; 801 } 802 803 nfsd_file_close_inode(inode); 804 return 0; 805 } 806 807 808 static const struct fsnotify_ops nfsd_file_fsnotify_ops = { 809 .handle_inode_event = nfsd_file_fsnotify_handle_event, 810 .free_mark = nfsd_file_mark_free, 811 }; 812 813 int 814 nfsd_file_cache_init(void) 815 { 816 int ret; 817 818 lockdep_assert_held(&nfsd_mutex); 819 if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) 820 return 0; 821 822 ret = rhashtable_init(&nfsd_file_rhash_tbl, &nfsd_file_rhash_params); 823 if (ret) 824 return ret; 825 826 ret = -ENOMEM; 827 nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0); 828 if (!nfsd_filecache_wq) 829 goto out; 830 831 nfsd_file_slab = kmem_cache_create("nfsd_file", 832 sizeof(struct nfsd_file), 0, 0, NULL); 833 if (!nfsd_file_slab) { 834 pr_err("nfsd: unable to create nfsd_file_slab\n"); 835 goto out_err; 836 } 837 838 nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark", 839 sizeof(struct nfsd_file_mark), 0, 0, NULL); 840 if (!nfsd_file_mark_slab) { 841 pr_err("nfsd: unable to create nfsd_file_mark_slab\n"); 842 goto out_err; 843 } 844 845 846 ret = list_lru_init(&nfsd_file_lru); 847 if (ret) { 848 pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret); 849 goto out_err; 850 } 851 852 ret = register_shrinker(&nfsd_file_shrinker, "nfsd-filecache"); 853 if (ret) { 854 pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret); 855 goto out_lru; 856 } 857 858 ret = lease_register_notifier(&nfsd_file_lease_notifier); 859 if (ret) { 860 pr_err("nfsd: unable to register lease notifier: %d\n", ret); 861 goto out_shrinker; 862 } 863 864 nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops, 865 FSNOTIFY_GROUP_NOFS); 866 if (IS_ERR(nfsd_file_fsnotify_group)) { 867 pr_err("nfsd: unable to create fsnotify group: %ld\n", 868 PTR_ERR(nfsd_file_fsnotify_group)); 869 ret = PTR_ERR(nfsd_file_fsnotify_group); 870 nfsd_file_fsnotify_group = NULL; 871 goto out_notifier; 872 } 873 874 INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker); 875 out: 876 return ret; 877 out_notifier: 878 lease_unregister_notifier(&nfsd_file_lease_notifier); 879 out_shrinker: 880 unregister_shrinker(&nfsd_file_shrinker); 881 out_lru: 882 list_lru_destroy(&nfsd_file_lru); 883 out_err: 884 kmem_cache_destroy(nfsd_file_slab); 885 nfsd_file_slab = NULL; 886 kmem_cache_destroy(nfsd_file_mark_slab); 887 nfsd_file_mark_slab = NULL; 888 destroy_workqueue(nfsd_filecache_wq); 889 nfsd_filecache_wq = NULL; 890 rhashtable_destroy(&nfsd_file_rhash_tbl); 891 goto out; 892 } 893 894 /* 895 * Note this can deadlock with nfsd_file_lru_cb. 896 */ 897 static void 898 __nfsd_file_cache_purge(struct net *net) 899 { 900 struct rhashtable_iter iter; 901 struct nfsd_file *nf; 902 LIST_HEAD(dispose); 903 bool del; 904 905 rhashtable_walk_enter(&nfsd_file_rhash_tbl, &iter); 906 do { 907 rhashtable_walk_start(&iter); 908 909 nf = rhashtable_walk_next(&iter); 910 while (!IS_ERR_OR_NULL(nf)) { 911 if (net && nf->nf_net != net) 912 continue; 913 del = nfsd_file_unhash_and_dispose(nf, &dispose); 914 915 /* 916 * Deadlock detected! Something marked this entry as 917 * unhased, but hasn't removed it from the hash list. 918 */ 919 WARN_ON_ONCE(!del); 920 921 nf = rhashtable_walk_next(&iter); 922 } 923 924 rhashtable_walk_stop(&iter); 925 } while (nf == ERR_PTR(-EAGAIN)); 926 rhashtable_walk_exit(&iter); 927 928 nfsd_file_dispose_list(&dispose); 929 } 930 931 static struct nfsd_fcache_disposal * 932 nfsd_alloc_fcache_disposal(void) 933 { 934 struct nfsd_fcache_disposal *l; 935 936 l = kmalloc(sizeof(*l), GFP_KERNEL); 937 if (!l) 938 return NULL; 939 INIT_WORK(&l->work, nfsd_file_delayed_close); 940 spin_lock_init(&l->lock); 941 INIT_LIST_HEAD(&l->freeme); 942 return l; 943 } 944 945 static void 946 nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l) 947 { 948 cancel_work_sync(&l->work); 949 nfsd_file_dispose_list(&l->freeme); 950 kfree(l); 951 } 952 953 static void 954 nfsd_free_fcache_disposal_net(struct net *net) 955 { 956 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 957 struct nfsd_fcache_disposal *l = nn->fcache_disposal; 958 959 nfsd_free_fcache_disposal(l); 960 } 961 962 int 963 nfsd_file_cache_start_net(struct net *net) 964 { 965 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 966 967 nn->fcache_disposal = nfsd_alloc_fcache_disposal(); 968 return nn->fcache_disposal ? 0 : -ENOMEM; 969 } 970 971 /** 972 * nfsd_file_cache_purge - Remove all cache items associated with @net 973 * @net: target net namespace 974 * 975 */ 976 void 977 nfsd_file_cache_purge(struct net *net) 978 { 979 lockdep_assert_held(&nfsd_mutex); 980 if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) 981 __nfsd_file_cache_purge(net); 982 } 983 984 void 985 nfsd_file_cache_shutdown_net(struct net *net) 986 { 987 nfsd_file_cache_purge(net); 988 nfsd_free_fcache_disposal_net(net); 989 } 990 991 void 992 nfsd_file_cache_shutdown(void) 993 { 994 int i; 995 996 lockdep_assert_held(&nfsd_mutex); 997 if (test_and_clear_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0) 998 return; 999 1000 lease_unregister_notifier(&nfsd_file_lease_notifier); 1001 unregister_shrinker(&nfsd_file_shrinker); 1002 /* 1003 * make sure all callers of nfsd_file_lru_cb are done before 1004 * calling nfsd_file_cache_purge 1005 */ 1006 cancel_delayed_work_sync(&nfsd_filecache_laundrette); 1007 __nfsd_file_cache_purge(NULL); 1008 list_lru_destroy(&nfsd_file_lru); 1009 rcu_barrier(); 1010 fsnotify_put_group(nfsd_file_fsnotify_group); 1011 nfsd_file_fsnotify_group = NULL; 1012 kmem_cache_destroy(nfsd_file_slab); 1013 nfsd_file_slab = NULL; 1014 fsnotify_wait_marks_destroyed(); 1015 kmem_cache_destroy(nfsd_file_mark_slab); 1016 nfsd_file_mark_slab = NULL; 1017 destroy_workqueue(nfsd_filecache_wq); 1018 nfsd_filecache_wq = NULL; 1019 rhashtable_destroy(&nfsd_file_rhash_tbl); 1020 1021 for_each_possible_cpu(i) { 1022 per_cpu(nfsd_file_cache_hits, i) = 0; 1023 per_cpu(nfsd_file_acquisitions, i) = 0; 1024 per_cpu(nfsd_file_releases, i) = 0; 1025 per_cpu(nfsd_file_total_age, i) = 0; 1026 per_cpu(nfsd_file_pages_flushed, i) = 0; 1027 per_cpu(nfsd_file_evictions, i) = 0; 1028 } 1029 } 1030 1031 /** 1032 * nfsd_file_is_cached - are there any cached open files for this inode? 1033 * @inode: inode to check 1034 * 1035 * The lookup matches inodes in all net namespaces and is atomic wrt 1036 * nfsd_file_acquire(). 1037 * 1038 * Return values: 1039 * %true: filecache contains at least one file matching this inode 1040 * %false: filecache contains no files matching this inode 1041 */ 1042 bool 1043 nfsd_file_is_cached(struct inode *inode) 1044 { 1045 struct nfsd_file_lookup_key key = { 1046 .type = NFSD_FILE_KEY_INODE, 1047 .inode = inode, 1048 }; 1049 bool ret = false; 1050 1051 if (rhashtable_lookup_fast(&nfsd_file_rhash_tbl, &key, 1052 nfsd_file_rhash_params) != NULL) 1053 ret = true; 1054 trace_nfsd_file_is_cached(inode, (int)ret); 1055 return ret; 1056 } 1057 1058 static __be32 1059 nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 1060 unsigned int may_flags, struct nfsd_file **pnf, bool open) 1061 { 1062 struct nfsd_file_lookup_key key = { 1063 .type = NFSD_FILE_KEY_FULL, 1064 .need = may_flags & NFSD_FILE_MAY_MASK, 1065 .net = SVC_NET(rqstp), 1066 }; 1067 struct nfsd_file *nf, *new; 1068 bool retry = true; 1069 __be32 status; 1070 1071 status = fh_verify(rqstp, fhp, S_IFREG, 1072 may_flags|NFSD_MAY_OWNER_OVERRIDE); 1073 if (status != nfs_ok) 1074 return status; 1075 key.inode = d_inode(fhp->fh_dentry); 1076 key.cred = get_current_cred(); 1077 1078 retry: 1079 /* Avoid allocation if the item is already in cache */ 1080 nf = rhashtable_lookup_fast(&nfsd_file_rhash_tbl, &key, 1081 nfsd_file_rhash_params); 1082 if (nf) 1083 nf = nfsd_file_get(nf); 1084 if (nf) 1085 goto wait_for_construction; 1086 1087 new = nfsd_file_alloc(&key, may_flags); 1088 if (!new) { 1089 status = nfserr_jukebox; 1090 goto out_status; 1091 } 1092 1093 nf = rhashtable_lookup_get_insert_key(&nfsd_file_rhash_tbl, 1094 &key, &new->nf_rhash, 1095 nfsd_file_rhash_params); 1096 if (!nf) { 1097 nf = new; 1098 goto open_file; 1099 } 1100 if (IS_ERR(nf)) 1101 goto insert_err; 1102 nf = nfsd_file_get(nf); 1103 if (nf == NULL) { 1104 nf = new; 1105 goto open_file; 1106 } 1107 nfsd_file_slab_free(&new->nf_rcu); 1108 1109 wait_for_construction: 1110 wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); 1111 1112 /* Did construction of this file fail? */ 1113 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 1114 trace_nfsd_file_cons_err(rqstp, key.inode, may_flags, nf); 1115 if (!retry) { 1116 status = nfserr_jukebox; 1117 goto out; 1118 } 1119 retry = false; 1120 nfsd_file_put_noref(nf); 1121 goto retry; 1122 } 1123 1124 nfsd_file_lru_remove(nf); 1125 this_cpu_inc(nfsd_file_cache_hits); 1126 1127 status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); 1128 out: 1129 if (status == nfs_ok) { 1130 if (open) 1131 this_cpu_inc(nfsd_file_acquisitions); 1132 *pnf = nf; 1133 } else { 1134 nfsd_file_put(nf); 1135 nf = NULL; 1136 } 1137 1138 out_status: 1139 put_cred(key.cred); 1140 if (open) 1141 trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); 1142 return status; 1143 1144 open_file: 1145 trace_nfsd_file_alloc(nf); 1146 nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode); 1147 if (nf->nf_mark) { 1148 if (open) { 1149 status = nfsd_open_verified(rqstp, fhp, may_flags, 1150 &nf->nf_file); 1151 trace_nfsd_file_open(nf, status); 1152 } else 1153 status = nfs_ok; 1154 } else 1155 status = nfserr_jukebox; 1156 /* 1157 * If construction failed, or we raced with a call to unlink() 1158 * then unhash. 1159 */ 1160 if (status != nfs_ok || key.inode->i_nlink == 0) 1161 if (nfsd_file_unhash(nf)) 1162 nfsd_file_put_noref(nf); 1163 clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); 1164 smp_mb__after_atomic(); 1165 wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); 1166 goto out; 1167 1168 insert_err: 1169 nfsd_file_slab_free(&new->nf_rcu); 1170 trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, PTR_ERR(nf)); 1171 nf = NULL; 1172 status = nfserr_jukebox; 1173 goto out_status; 1174 } 1175 1176 /** 1177 * nfsd_file_acquire - Get a struct nfsd_file with an open file 1178 * @rqstp: the RPC transaction being executed 1179 * @fhp: the NFS filehandle of the file to be opened 1180 * @may_flags: NFSD_MAY_ settings for the file 1181 * @pnf: OUT: new or found "struct nfsd_file" object 1182 * 1183 * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in 1184 * network byte order is returned. 1185 */ 1186 __be32 1187 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 1188 unsigned int may_flags, struct nfsd_file **pnf) 1189 { 1190 return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true); 1191 } 1192 1193 /** 1194 * nfsd_file_create - Get a struct nfsd_file, do not open 1195 * @rqstp: the RPC transaction being executed 1196 * @fhp: the NFS filehandle of the file just created 1197 * @may_flags: NFSD_MAY_ settings for the file 1198 * @pnf: OUT: new or found "struct nfsd_file" object 1199 * 1200 * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in 1201 * network byte order is returned. 1202 */ 1203 __be32 1204 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, 1205 unsigned int may_flags, struct nfsd_file **pnf) 1206 { 1207 return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false); 1208 } 1209 1210 /* 1211 * Note that fields may be added, removed or reordered in the future. Programs 1212 * scraping this file for info should test the labels to ensure they're 1213 * getting the correct field. 1214 */ 1215 int nfsd_file_cache_stats_show(struct seq_file *m, void *v) 1216 { 1217 unsigned long releases = 0, pages_flushed = 0, evictions = 0; 1218 unsigned long hits = 0, acquisitions = 0; 1219 unsigned int i, count = 0, buckets = 0; 1220 unsigned long lru = 0, total_age = 0; 1221 1222 /* Serialize with server shutdown */ 1223 mutex_lock(&nfsd_mutex); 1224 if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) { 1225 struct bucket_table *tbl; 1226 struct rhashtable *ht; 1227 1228 lru = list_lru_count(&nfsd_file_lru); 1229 1230 rcu_read_lock(); 1231 ht = &nfsd_file_rhash_tbl; 1232 count = atomic_read(&ht->nelems); 1233 tbl = rht_dereference_rcu(ht->tbl, ht); 1234 buckets = tbl->size; 1235 rcu_read_unlock(); 1236 } 1237 mutex_unlock(&nfsd_mutex); 1238 1239 for_each_possible_cpu(i) { 1240 hits += per_cpu(nfsd_file_cache_hits, i); 1241 acquisitions += per_cpu(nfsd_file_acquisitions, i); 1242 releases += per_cpu(nfsd_file_releases, i); 1243 total_age += per_cpu(nfsd_file_total_age, i); 1244 evictions += per_cpu(nfsd_file_evictions, i); 1245 pages_flushed += per_cpu(nfsd_file_pages_flushed, i); 1246 } 1247 1248 seq_printf(m, "total entries: %u\n", count); 1249 seq_printf(m, "hash buckets: %u\n", buckets); 1250 seq_printf(m, "lru entries: %lu\n", lru); 1251 seq_printf(m, "cache hits: %lu\n", hits); 1252 seq_printf(m, "acquisitions: %lu\n", acquisitions); 1253 seq_printf(m, "releases: %lu\n", releases); 1254 seq_printf(m, "evictions: %lu\n", evictions); 1255 if (releases) 1256 seq_printf(m, "mean age (ms): %ld\n", total_age / releases); 1257 else 1258 seq_printf(m, "mean age (ms): -\n"); 1259 seq_printf(m, "pages flushed: %lu\n", pages_flushed); 1260 return 0; 1261 } 1262