1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * The NFSD open file cache. 4 * 5 * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com> 6 * 7 * An nfsd_file object is a per-file collection of open state that binds 8 * together: 9 * - a struct file * 10 * - a user credential 11 * - a network namespace 12 * - a read-ahead context 13 * - monitoring for writeback errors 14 * 15 * nfsd_file objects are reference-counted. Consumers acquire a new 16 * object via the nfsd_file_acquire API. They manage their interest in 17 * the acquired object, and hence the object's reference count, via 18 * nfsd_file_get and nfsd_file_put. There are two varieties of nfsd_file 19 * object: 20 * 21 * * non-garbage-collected: When a consumer wants to precisely control 22 * the lifetime of a file's open state, it acquires a non-garbage- 23 * collected nfsd_file. The final nfsd_file_put releases the open 24 * state immediately. 25 * 26 * * garbage-collected: When a consumer does not control the lifetime 27 * of open state, it acquires a garbage-collected nfsd_file. The 28 * final nfsd_file_put allows the open state to linger for a period 29 * during which it may be re-used. 30 */ 31 32 #include <linux/hash.h> 33 #include <linux/slab.h> 34 #include <linux/file.h> 35 #include <linux/pagemap.h> 36 #include <linux/sched.h> 37 #include <linux/list_lru.h> 38 #include <linux/fsnotify_backend.h> 39 #include <linux/fsnotify.h> 40 #include <linux/seq_file.h> 41 #include <linux/rhashtable.h> 42 43 #include "vfs.h" 44 #include "nfsd.h" 45 #include "nfsfh.h" 46 #include "netns.h" 47 #include "filecache.h" 48 #include "trace.h" 49 50 #define NFSD_LAUNDRETTE_DELAY (2 * HZ) 51 52 #define NFSD_FILE_CACHE_UP (0) 53 54 /* We only care about NFSD_MAY_READ/WRITE for this cache */ 55 #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) 56 57 static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); 58 static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions); 59 static DEFINE_PER_CPU(unsigned long, nfsd_file_releases); 60 static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age); 61 static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions); 62 63 struct nfsd_fcache_disposal { 64 struct work_struct work; 65 spinlock_t lock; 66 struct list_head freeme; 67 }; 68 69 static struct workqueue_struct *nfsd_filecache_wq __read_mostly; 70 71 static struct kmem_cache *nfsd_file_slab; 72 static struct kmem_cache *nfsd_file_mark_slab; 73 static struct list_lru nfsd_file_lru; 74 static unsigned long nfsd_file_flags; 75 static struct fsnotify_group *nfsd_file_fsnotify_group; 76 static struct delayed_work nfsd_filecache_laundrette; 77 static struct rhashtable nfsd_file_rhash_tbl 78 ____cacheline_aligned_in_smp; 79 80 enum nfsd_file_lookup_type { 81 NFSD_FILE_KEY_INODE, 82 NFSD_FILE_KEY_FULL, 83 }; 84 85 struct nfsd_file_lookup_key { 86 struct inode *inode; 87 struct net *net; 88 const struct cred *cred; 89 unsigned char need; 90 bool gc; 91 enum nfsd_file_lookup_type type; 92 }; 93 94 /* 95 * The returned hash value is based solely on the address of an in-code 96 * inode, a pointer to a slab-allocated object. The entropy in such a 97 * pointer is concentrated in its middle bits. 98 */ 99 static u32 nfsd_file_inode_hash(const struct inode *inode, u32 seed) 100 { 101 unsigned long ptr = (unsigned long)inode; 102 u32 k; 103 104 k = ptr >> L1_CACHE_SHIFT; 105 k &= 0x00ffffff; 106 return jhash2(&k, 1, seed); 107 } 108 109 /** 110 * nfsd_file_key_hashfn - Compute the hash value of a lookup key 111 * @data: key on which to compute the hash value 112 * @len: rhash table's key_len parameter (unused) 113 * @seed: rhash table's random seed of the day 114 * 115 * Return value: 116 * Computed 32-bit hash value 117 */ 118 static u32 nfsd_file_key_hashfn(const void *data, u32 len, u32 seed) 119 { 120 const struct nfsd_file_lookup_key *key = data; 121 122 return nfsd_file_inode_hash(key->inode, seed); 123 } 124 125 /** 126 * nfsd_file_obj_hashfn - Compute the hash value of an nfsd_file 127 * @data: object on which to compute the hash value 128 * @len: rhash table's key_len parameter (unused) 129 * @seed: rhash table's random seed of the day 130 * 131 * Return value: 132 * Computed 32-bit hash value 133 */ 134 static u32 nfsd_file_obj_hashfn(const void *data, u32 len, u32 seed) 135 { 136 const struct nfsd_file *nf = data; 137 138 return nfsd_file_inode_hash(nf->nf_inode, seed); 139 } 140 141 static bool 142 nfsd_match_cred(const struct cred *c1, const struct cred *c2) 143 { 144 int i; 145 146 if (!uid_eq(c1->fsuid, c2->fsuid)) 147 return false; 148 if (!gid_eq(c1->fsgid, c2->fsgid)) 149 return false; 150 if (c1->group_info == NULL || c2->group_info == NULL) 151 return c1->group_info == c2->group_info; 152 if (c1->group_info->ngroups != c2->group_info->ngroups) 153 return false; 154 for (i = 0; i < c1->group_info->ngroups; i++) { 155 if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) 156 return false; 157 } 158 return true; 159 } 160 161 /** 162 * nfsd_file_obj_cmpfn - Match a cache item against search criteria 163 * @arg: search criteria 164 * @ptr: cache item to check 165 * 166 * Return values: 167 * %0 - Item matches search criteria 168 * %1 - Item does not match search criteria 169 */ 170 static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg, 171 const void *ptr) 172 { 173 const struct nfsd_file_lookup_key *key = arg->key; 174 const struct nfsd_file *nf = ptr; 175 176 switch (key->type) { 177 case NFSD_FILE_KEY_INODE: 178 if (nf->nf_inode != key->inode) 179 return 1; 180 break; 181 case NFSD_FILE_KEY_FULL: 182 if (nf->nf_inode != key->inode) 183 return 1; 184 if (nf->nf_may != key->need) 185 return 1; 186 if (nf->nf_net != key->net) 187 return 1; 188 if (!nfsd_match_cred(nf->nf_cred, key->cred)) 189 return 1; 190 if (!!test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc) 191 return 1; 192 if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) 193 return 1; 194 break; 195 } 196 return 0; 197 } 198 199 static const struct rhashtable_params nfsd_file_rhash_params = { 200 .key_len = sizeof_field(struct nfsd_file, nf_inode), 201 .key_offset = offsetof(struct nfsd_file, nf_inode), 202 .head_offset = offsetof(struct nfsd_file, nf_rhash), 203 .hashfn = nfsd_file_key_hashfn, 204 .obj_hashfn = nfsd_file_obj_hashfn, 205 .obj_cmpfn = nfsd_file_obj_cmpfn, 206 /* Reduce resizing churn on light workloads */ 207 .min_size = 512, /* buckets */ 208 .automatic_shrinking = true, 209 }; 210 211 static void 212 nfsd_file_schedule_laundrette(void) 213 { 214 if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags)) 215 queue_delayed_work(system_wq, &nfsd_filecache_laundrette, 216 NFSD_LAUNDRETTE_DELAY); 217 } 218 219 static void 220 nfsd_file_slab_free(struct rcu_head *rcu) 221 { 222 struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu); 223 224 put_cred(nf->nf_cred); 225 kmem_cache_free(nfsd_file_slab, nf); 226 } 227 228 static void 229 nfsd_file_mark_free(struct fsnotify_mark *mark) 230 { 231 struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark, 232 nfm_mark); 233 234 kmem_cache_free(nfsd_file_mark_slab, nfm); 235 } 236 237 static struct nfsd_file_mark * 238 nfsd_file_mark_get(struct nfsd_file_mark *nfm) 239 { 240 if (!refcount_inc_not_zero(&nfm->nfm_ref)) 241 return NULL; 242 return nfm; 243 } 244 245 static void 246 nfsd_file_mark_put(struct nfsd_file_mark *nfm) 247 { 248 if (refcount_dec_and_test(&nfm->nfm_ref)) { 249 fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group); 250 fsnotify_put_mark(&nfm->nfm_mark); 251 } 252 } 253 254 static struct nfsd_file_mark * 255 nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode) 256 { 257 int err; 258 struct fsnotify_mark *mark; 259 struct nfsd_file_mark *nfm = NULL, *new; 260 261 do { 262 fsnotify_group_lock(nfsd_file_fsnotify_group); 263 mark = fsnotify_find_mark(&inode->i_fsnotify_marks, 264 nfsd_file_fsnotify_group); 265 if (mark) { 266 nfm = nfsd_file_mark_get(container_of(mark, 267 struct nfsd_file_mark, 268 nfm_mark)); 269 fsnotify_group_unlock(nfsd_file_fsnotify_group); 270 if (nfm) { 271 fsnotify_put_mark(mark); 272 break; 273 } 274 /* Avoid soft lockup race with nfsd_file_mark_put() */ 275 fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group); 276 fsnotify_put_mark(mark); 277 } else { 278 fsnotify_group_unlock(nfsd_file_fsnotify_group); 279 } 280 281 /* allocate a new nfm */ 282 new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL); 283 if (!new) 284 return NULL; 285 fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group); 286 new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF; 287 refcount_set(&new->nfm_ref, 1); 288 289 err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0); 290 291 /* 292 * If the add was successful, then return the object. 293 * Otherwise, we need to put the reference we hold on the 294 * nfm_mark. The fsnotify code will take a reference and put 295 * it on failure, so we can't just free it directly. It's also 296 * not safe to call fsnotify_destroy_mark on it as the 297 * mark->group will be NULL. Thus, we can't let the nfm_ref 298 * counter drive the destruction at this point. 299 */ 300 if (likely(!err)) 301 nfm = new; 302 else 303 fsnotify_put_mark(&new->nfm_mark); 304 } while (unlikely(err == -EEXIST)); 305 306 return nfm; 307 } 308 309 static struct nfsd_file * 310 nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) 311 { 312 struct nfsd_file *nf; 313 314 nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); 315 if (nf) { 316 INIT_LIST_HEAD(&nf->nf_lru); 317 nf->nf_birthtime = ktime_get(); 318 nf->nf_file = NULL; 319 nf->nf_cred = get_current_cred(); 320 nf->nf_net = key->net; 321 nf->nf_flags = 0; 322 __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); 323 __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); 324 if (key->gc) 325 __set_bit(NFSD_FILE_GC, &nf->nf_flags); 326 nf->nf_inode = key->inode; 327 /* nf_ref is pre-incremented for hash table */ 328 refcount_set(&nf->nf_ref, 2); 329 nf->nf_may = key->need; 330 nf->nf_mark = NULL; 331 } 332 return nf; 333 } 334 335 static void 336 nfsd_file_fsync(struct nfsd_file *nf) 337 { 338 struct file *file = nf->nf_file; 339 int ret; 340 341 if (!file || !(file->f_mode & FMODE_WRITE)) 342 return; 343 ret = vfs_fsync(file, 1); 344 trace_nfsd_file_fsync(nf, ret); 345 if (ret) 346 nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); 347 } 348 349 static int 350 nfsd_file_check_write_error(struct nfsd_file *nf) 351 { 352 struct file *file = nf->nf_file; 353 354 if (!file || !(file->f_mode & FMODE_WRITE)) 355 return 0; 356 return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); 357 } 358 359 static void 360 nfsd_file_hash_remove(struct nfsd_file *nf) 361 { 362 trace_nfsd_file_unhash(nf); 363 364 if (nfsd_file_check_write_error(nf)) 365 nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); 366 rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash, 367 nfsd_file_rhash_params); 368 } 369 370 static bool 371 nfsd_file_unhash(struct nfsd_file *nf) 372 { 373 if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 374 nfsd_file_hash_remove(nf); 375 return true; 376 } 377 return false; 378 } 379 380 static bool 381 nfsd_file_free(struct nfsd_file *nf) 382 { 383 s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime)); 384 bool flush = false; 385 386 trace_nfsd_file_free(nf); 387 388 this_cpu_inc(nfsd_file_releases); 389 this_cpu_add(nfsd_file_total_age, age); 390 391 if (nf->nf_mark) 392 nfsd_file_mark_put(nf->nf_mark); 393 if (nf->nf_file) { 394 get_file(nf->nf_file); 395 filp_close(nf->nf_file, NULL); 396 fput(nf->nf_file); 397 flush = true; 398 } 399 400 /* 401 * If this item is still linked via nf_lru, that's a bug. 402 * WARN and leak it to preserve system stability. 403 */ 404 if (WARN_ON_ONCE(!list_empty(&nf->nf_lru))) 405 return flush; 406 407 call_rcu(&nf->nf_rcu, nfsd_file_slab_free); 408 return flush; 409 } 410 411 static bool 412 nfsd_file_check_writeback(struct nfsd_file *nf) 413 { 414 struct file *file = nf->nf_file; 415 struct address_space *mapping; 416 417 if (!file || !(file->f_mode & FMODE_WRITE)) 418 return false; 419 mapping = file->f_mapping; 420 return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) || 421 mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); 422 } 423 424 static void nfsd_file_lru_add(struct nfsd_file *nf) 425 { 426 set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); 427 if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) 428 trace_nfsd_file_lru_add(nf); 429 } 430 431 static void nfsd_file_lru_remove(struct nfsd_file *nf) 432 { 433 if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) 434 trace_nfsd_file_lru_del(nf); 435 } 436 437 struct nfsd_file * 438 nfsd_file_get(struct nfsd_file *nf) 439 { 440 if (likely(refcount_inc_not_zero(&nf->nf_ref))) 441 return nf; 442 return NULL; 443 } 444 445 static void 446 nfsd_file_unhash_and_queue(struct nfsd_file *nf, struct list_head *dispose) 447 { 448 trace_nfsd_file_unhash_and_queue(nf); 449 if (nfsd_file_unhash(nf)) { 450 /* caller must call nfsd_file_dispose_list() later */ 451 nfsd_file_lru_remove(nf); 452 list_add(&nf->nf_lru, dispose); 453 } 454 } 455 456 static void 457 nfsd_file_put_noref(struct nfsd_file *nf) 458 { 459 trace_nfsd_file_put(nf); 460 461 if (refcount_dec_and_test(&nf->nf_ref)) { 462 WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); 463 nfsd_file_lru_remove(nf); 464 nfsd_file_free(nf); 465 } 466 } 467 468 static void 469 nfsd_file_unhash_and_put(struct nfsd_file *nf) 470 { 471 if (nfsd_file_unhash(nf)) 472 nfsd_file_put_noref(nf); 473 } 474 475 void 476 nfsd_file_put(struct nfsd_file *nf) 477 { 478 might_sleep(); 479 480 if (test_bit(NFSD_FILE_GC, &nf->nf_flags)) 481 nfsd_file_lru_add(nf); 482 else if (refcount_read(&nf->nf_ref) == 2) 483 nfsd_file_unhash_and_put(nf); 484 485 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 486 nfsd_file_fsync(nf); 487 nfsd_file_put_noref(nf); 488 } else if (nf->nf_file && test_bit(NFSD_FILE_GC, &nf->nf_flags)) { 489 nfsd_file_put_noref(nf); 490 nfsd_file_schedule_laundrette(); 491 } else 492 nfsd_file_put_noref(nf); 493 } 494 495 static void 496 nfsd_file_dispose_list(struct list_head *dispose) 497 { 498 struct nfsd_file *nf; 499 500 while(!list_empty(dispose)) { 501 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 502 list_del_init(&nf->nf_lru); 503 nfsd_file_fsync(nf); 504 nfsd_file_put_noref(nf); 505 } 506 } 507 508 static void 509 nfsd_file_dispose_list_sync(struct list_head *dispose) 510 { 511 bool flush = false; 512 struct nfsd_file *nf; 513 514 while(!list_empty(dispose)) { 515 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 516 list_del_init(&nf->nf_lru); 517 nfsd_file_fsync(nf); 518 if (!refcount_dec_and_test(&nf->nf_ref)) 519 continue; 520 if (nfsd_file_free(nf)) 521 flush = true; 522 } 523 if (flush) 524 flush_delayed_fput(); 525 } 526 527 static void 528 nfsd_file_list_remove_disposal(struct list_head *dst, 529 struct nfsd_fcache_disposal *l) 530 { 531 spin_lock(&l->lock); 532 list_splice_init(&l->freeme, dst); 533 spin_unlock(&l->lock); 534 } 535 536 static void 537 nfsd_file_list_add_disposal(struct list_head *files, struct net *net) 538 { 539 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 540 struct nfsd_fcache_disposal *l = nn->fcache_disposal; 541 542 spin_lock(&l->lock); 543 list_splice_tail_init(files, &l->freeme); 544 spin_unlock(&l->lock); 545 queue_work(nfsd_filecache_wq, &l->work); 546 } 547 548 static void 549 nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src, 550 struct net *net) 551 { 552 struct nfsd_file *nf, *tmp; 553 554 list_for_each_entry_safe(nf, tmp, src, nf_lru) { 555 if (nf->nf_net == net) 556 list_move_tail(&nf->nf_lru, dst); 557 } 558 } 559 560 static void 561 nfsd_file_dispose_list_delayed(struct list_head *dispose) 562 { 563 LIST_HEAD(list); 564 struct nfsd_file *nf; 565 566 while(!list_empty(dispose)) { 567 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 568 nfsd_file_list_add_pernet(&list, dispose, nf->nf_net); 569 nfsd_file_list_add_disposal(&list, nf->nf_net); 570 } 571 } 572 573 /** 574 * nfsd_file_lru_cb - Examine an entry on the LRU list 575 * @item: LRU entry to examine 576 * @lru: controlling LRU 577 * @lock: LRU list lock (unused) 578 * @arg: dispose list 579 * 580 * Return values: 581 * %LRU_REMOVED: @item was removed from the LRU 582 * %LRU_ROTATE: @item is to be moved to the LRU tail 583 * %LRU_SKIP: @item cannot be evicted 584 */ 585 static enum lru_status 586 nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, 587 spinlock_t *lock, void *arg) 588 __releases(lock) 589 __acquires(lock) 590 { 591 struct list_head *head = arg; 592 struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); 593 594 /* 595 * Do a lockless refcount check. The hashtable holds one reference, so 596 * we look to see if anything else has a reference, or if any have 597 * been put since the shrinker last ran. Those don't get unhashed and 598 * released. 599 * 600 * Note that in the put path, we set the flag and then decrement the 601 * counter. Here we check the counter and then test and clear the flag. 602 * That order is deliberate to ensure that we can do this locklessly. 603 */ 604 if (refcount_read(&nf->nf_ref) > 1) { 605 list_lru_isolate(lru, &nf->nf_lru); 606 trace_nfsd_file_gc_in_use(nf); 607 return LRU_REMOVED; 608 } 609 610 /* 611 * Don't throw out files that are still undergoing I/O or 612 * that have uncleared errors pending. 613 */ 614 if (nfsd_file_check_writeback(nf)) { 615 trace_nfsd_file_gc_writeback(nf); 616 return LRU_SKIP; 617 } 618 619 if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) { 620 trace_nfsd_file_gc_referenced(nf); 621 return LRU_ROTATE; 622 } 623 624 if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 625 trace_nfsd_file_gc_hashed(nf); 626 return LRU_SKIP; 627 } 628 629 list_lru_isolate_move(lru, &nf->nf_lru, head); 630 this_cpu_inc(nfsd_file_evictions); 631 trace_nfsd_file_gc_disposed(nf); 632 return LRU_REMOVED; 633 } 634 635 /* 636 * Unhash items on @dispose immediately, then queue them on the 637 * disposal workqueue to finish releasing them in the background. 638 * 639 * cel: Note that between the time list_lru_shrink_walk runs and 640 * now, these items are in the hash table but marked unhashed. 641 * Why release these outside of lru_cb ? There's no lock ordering 642 * problem since lru_cb currently takes no lock. 643 */ 644 static void nfsd_file_gc_dispose_list(struct list_head *dispose) 645 { 646 struct nfsd_file *nf; 647 648 list_for_each_entry(nf, dispose, nf_lru) 649 nfsd_file_hash_remove(nf); 650 nfsd_file_dispose_list_delayed(dispose); 651 } 652 653 static void 654 nfsd_file_gc(void) 655 { 656 LIST_HEAD(dispose); 657 unsigned long ret; 658 659 ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, 660 &dispose, list_lru_count(&nfsd_file_lru)); 661 trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru)); 662 nfsd_file_gc_dispose_list(&dispose); 663 } 664 665 static void 666 nfsd_file_gc_worker(struct work_struct *work) 667 { 668 nfsd_file_gc(); 669 if (list_lru_count(&nfsd_file_lru)) 670 nfsd_file_schedule_laundrette(); 671 } 672 673 static unsigned long 674 nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) 675 { 676 return list_lru_count(&nfsd_file_lru); 677 } 678 679 static unsigned long 680 nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) 681 { 682 LIST_HEAD(dispose); 683 unsigned long ret; 684 685 ret = list_lru_shrink_walk(&nfsd_file_lru, sc, 686 nfsd_file_lru_cb, &dispose); 687 trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru)); 688 nfsd_file_gc_dispose_list(&dispose); 689 return ret; 690 } 691 692 static struct shrinker nfsd_file_shrinker = { 693 .scan_objects = nfsd_file_lru_scan, 694 .count_objects = nfsd_file_lru_count, 695 .seeks = 1, 696 }; 697 698 /* 699 * Find all cache items across all net namespaces that match @inode and 700 * move them to @dispose. The lookup is atomic wrt nfsd_file_acquire(). 701 */ 702 static unsigned int 703 __nfsd_file_close_inode(struct inode *inode, struct list_head *dispose) 704 { 705 struct nfsd_file_lookup_key key = { 706 .type = NFSD_FILE_KEY_INODE, 707 .inode = inode, 708 }; 709 unsigned int count = 0; 710 struct nfsd_file *nf; 711 712 rcu_read_lock(); 713 do { 714 nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, 715 nfsd_file_rhash_params); 716 if (!nf) 717 break; 718 nfsd_file_unhash_and_queue(nf, dispose); 719 count++; 720 } while (1); 721 rcu_read_unlock(); 722 return count; 723 } 724 725 /** 726 * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file 727 * @inode: inode of the file to attempt to remove 728 * 729 * Unhash and put, then flush and fput all cache items associated with @inode. 730 */ 731 void 732 nfsd_file_close_inode_sync(struct inode *inode) 733 { 734 LIST_HEAD(dispose); 735 unsigned int count; 736 737 count = __nfsd_file_close_inode(inode, &dispose); 738 trace_nfsd_file_close_inode_sync(inode, count); 739 nfsd_file_dispose_list_sync(&dispose); 740 } 741 742 /** 743 * nfsd_file_close_inode - attempt a delayed close of a nfsd_file 744 * @inode: inode of the file to attempt to remove 745 * 746 * Unhash and put all cache item associated with @inode. 747 */ 748 static void 749 nfsd_file_close_inode(struct inode *inode) 750 { 751 LIST_HEAD(dispose); 752 unsigned int count; 753 754 count = __nfsd_file_close_inode(inode, &dispose); 755 trace_nfsd_file_close_inode(inode, count); 756 nfsd_file_dispose_list_delayed(&dispose); 757 } 758 759 /** 760 * nfsd_file_delayed_close - close unused nfsd_files 761 * @work: dummy 762 * 763 * Walk the LRU list and close any entries that have not been used since 764 * the last scan. 765 */ 766 static void 767 nfsd_file_delayed_close(struct work_struct *work) 768 { 769 LIST_HEAD(head); 770 struct nfsd_fcache_disposal *l = container_of(work, 771 struct nfsd_fcache_disposal, work); 772 773 nfsd_file_list_remove_disposal(&head, l); 774 nfsd_file_dispose_list(&head); 775 } 776 777 static int 778 nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, 779 void *data) 780 { 781 struct file_lock *fl = data; 782 783 /* Only close files for F_SETLEASE leases */ 784 if (fl->fl_flags & FL_LEASE) 785 nfsd_file_close_inode_sync(file_inode(fl->fl_file)); 786 return 0; 787 } 788 789 static struct notifier_block nfsd_file_lease_notifier = { 790 .notifier_call = nfsd_file_lease_notifier_call, 791 }; 792 793 static int 794 nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask, 795 struct inode *inode, struct inode *dir, 796 const struct qstr *name, u32 cookie) 797 { 798 if (WARN_ON_ONCE(!inode)) 799 return 0; 800 801 trace_nfsd_file_fsnotify_handle_event(inode, mask); 802 803 /* Should be no marks on non-regular files */ 804 if (!S_ISREG(inode->i_mode)) { 805 WARN_ON_ONCE(1); 806 return 0; 807 } 808 809 /* don't close files if this was not the last link */ 810 if (mask & FS_ATTRIB) { 811 if (inode->i_nlink) 812 return 0; 813 } 814 815 nfsd_file_close_inode(inode); 816 return 0; 817 } 818 819 820 static const struct fsnotify_ops nfsd_file_fsnotify_ops = { 821 .handle_inode_event = nfsd_file_fsnotify_handle_event, 822 .free_mark = nfsd_file_mark_free, 823 }; 824 825 int 826 nfsd_file_cache_init(void) 827 { 828 int ret; 829 830 lockdep_assert_held(&nfsd_mutex); 831 if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) 832 return 0; 833 834 ret = rhashtable_init(&nfsd_file_rhash_tbl, &nfsd_file_rhash_params); 835 if (ret) 836 return ret; 837 838 ret = -ENOMEM; 839 nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0); 840 if (!nfsd_filecache_wq) 841 goto out; 842 843 nfsd_file_slab = kmem_cache_create("nfsd_file", 844 sizeof(struct nfsd_file), 0, 0, NULL); 845 if (!nfsd_file_slab) { 846 pr_err("nfsd: unable to create nfsd_file_slab\n"); 847 goto out_err; 848 } 849 850 nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark", 851 sizeof(struct nfsd_file_mark), 0, 0, NULL); 852 if (!nfsd_file_mark_slab) { 853 pr_err("nfsd: unable to create nfsd_file_mark_slab\n"); 854 goto out_err; 855 } 856 857 858 ret = list_lru_init(&nfsd_file_lru); 859 if (ret) { 860 pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret); 861 goto out_err; 862 } 863 864 ret = register_shrinker(&nfsd_file_shrinker, "nfsd-filecache"); 865 if (ret) { 866 pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret); 867 goto out_lru; 868 } 869 870 ret = lease_register_notifier(&nfsd_file_lease_notifier); 871 if (ret) { 872 pr_err("nfsd: unable to register lease notifier: %d\n", ret); 873 goto out_shrinker; 874 } 875 876 nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops, 877 FSNOTIFY_GROUP_NOFS); 878 if (IS_ERR(nfsd_file_fsnotify_group)) { 879 pr_err("nfsd: unable to create fsnotify group: %ld\n", 880 PTR_ERR(nfsd_file_fsnotify_group)); 881 ret = PTR_ERR(nfsd_file_fsnotify_group); 882 nfsd_file_fsnotify_group = NULL; 883 goto out_notifier; 884 } 885 886 INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker); 887 out: 888 return ret; 889 out_notifier: 890 lease_unregister_notifier(&nfsd_file_lease_notifier); 891 out_shrinker: 892 unregister_shrinker(&nfsd_file_shrinker); 893 out_lru: 894 list_lru_destroy(&nfsd_file_lru); 895 out_err: 896 kmem_cache_destroy(nfsd_file_slab); 897 nfsd_file_slab = NULL; 898 kmem_cache_destroy(nfsd_file_mark_slab); 899 nfsd_file_mark_slab = NULL; 900 destroy_workqueue(nfsd_filecache_wq); 901 nfsd_filecache_wq = NULL; 902 rhashtable_destroy(&nfsd_file_rhash_tbl); 903 goto out; 904 } 905 906 static void 907 __nfsd_file_cache_purge(struct net *net) 908 { 909 struct rhashtable_iter iter; 910 struct nfsd_file *nf; 911 LIST_HEAD(dispose); 912 913 rhashtable_walk_enter(&nfsd_file_rhash_tbl, &iter); 914 do { 915 rhashtable_walk_start(&iter); 916 917 nf = rhashtable_walk_next(&iter); 918 while (!IS_ERR_OR_NULL(nf)) { 919 if (!net || nf->nf_net == net) 920 nfsd_file_unhash_and_queue(nf, &dispose); 921 nf = rhashtable_walk_next(&iter); 922 } 923 924 rhashtable_walk_stop(&iter); 925 } while (nf == ERR_PTR(-EAGAIN)); 926 rhashtable_walk_exit(&iter); 927 928 nfsd_file_dispose_list(&dispose); 929 } 930 931 static struct nfsd_fcache_disposal * 932 nfsd_alloc_fcache_disposal(void) 933 { 934 struct nfsd_fcache_disposal *l; 935 936 l = kmalloc(sizeof(*l), GFP_KERNEL); 937 if (!l) 938 return NULL; 939 INIT_WORK(&l->work, nfsd_file_delayed_close); 940 spin_lock_init(&l->lock); 941 INIT_LIST_HEAD(&l->freeme); 942 return l; 943 } 944 945 static void 946 nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l) 947 { 948 cancel_work_sync(&l->work); 949 nfsd_file_dispose_list(&l->freeme); 950 kfree(l); 951 } 952 953 static void 954 nfsd_free_fcache_disposal_net(struct net *net) 955 { 956 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 957 struct nfsd_fcache_disposal *l = nn->fcache_disposal; 958 959 nfsd_free_fcache_disposal(l); 960 } 961 962 int 963 nfsd_file_cache_start_net(struct net *net) 964 { 965 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 966 967 nn->fcache_disposal = nfsd_alloc_fcache_disposal(); 968 return nn->fcache_disposal ? 0 : -ENOMEM; 969 } 970 971 /** 972 * nfsd_file_cache_purge - Remove all cache items associated with @net 973 * @net: target net namespace 974 * 975 */ 976 void 977 nfsd_file_cache_purge(struct net *net) 978 { 979 lockdep_assert_held(&nfsd_mutex); 980 if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) 981 __nfsd_file_cache_purge(net); 982 } 983 984 void 985 nfsd_file_cache_shutdown_net(struct net *net) 986 { 987 nfsd_file_cache_purge(net); 988 nfsd_free_fcache_disposal_net(net); 989 } 990 991 void 992 nfsd_file_cache_shutdown(void) 993 { 994 int i; 995 996 lockdep_assert_held(&nfsd_mutex); 997 if (test_and_clear_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0) 998 return; 999 1000 lease_unregister_notifier(&nfsd_file_lease_notifier); 1001 unregister_shrinker(&nfsd_file_shrinker); 1002 /* 1003 * make sure all callers of nfsd_file_lru_cb are done before 1004 * calling nfsd_file_cache_purge 1005 */ 1006 cancel_delayed_work_sync(&nfsd_filecache_laundrette); 1007 __nfsd_file_cache_purge(NULL); 1008 list_lru_destroy(&nfsd_file_lru); 1009 rcu_barrier(); 1010 fsnotify_put_group(nfsd_file_fsnotify_group); 1011 nfsd_file_fsnotify_group = NULL; 1012 kmem_cache_destroy(nfsd_file_slab); 1013 nfsd_file_slab = NULL; 1014 fsnotify_wait_marks_destroyed(); 1015 kmem_cache_destroy(nfsd_file_mark_slab); 1016 nfsd_file_mark_slab = NULL; 1017 destroy_workqueue(nfsd_filecache_wq); 1018 nfsd_filecache_wq = NULL; 1019 rhashtable_destroy(&nfsd_file_rhash_tbl); 1020 1021 for_each_possible_cpu(i) { 1022 per_cpu(nfsd_file_cache_hits, i) = 0; 1023 per_cpu(nfsd_file_acquisitions, i) = 0; 1024 per_cpu(nfsd_file_releases, i) = 0; 1025 per_cpu(nfsd_file_total_age, i) = 0; 1026 per_cpu(nfsd_file_evictions, i) = 0; 1027 } 1028 } 1029 1030 /** 1031 * nfsd_file_is_cached - are there any cached open files for this inode? 1032 * @inode: inode to check 1033 * 1034 * The lookup matches inodes in all net namespaces and is atomic wrt 1035 * nfsd_file_acquire(). 1036 * 1037 * Return values: 1038 * %true: filecache contains at least one file matching this inode 1039 * %false: filecache contains no files matching this inode 1040 */ 1041 bool 1042 nfsd_file_is_cached(struct inode *inode) 1043 { 1044 struct nfsd_file_lookup_key key = { 1045 .type = NFSD_FILE_KEY_INODE, 1046 .inode = inode, 1047 }; 1048 bool ret = false; 1049 1050 if (rhashtable_lookup_fast(&nfsd_file_rhash_tbl, &key, 1051 nfsd_file_rhash_params) != NULL) 1052 ret = true; 1053 trace_nfsd_file_is_cached(inode, (int)ret); 1054 return ret; 1055 } 1056 1057 static __be32 1058 nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 1059 unsigned int may_flags, struct nfsd_file **pnf, 1060 bool open, bool want_gc) 1061 { 1062 struct nfsd_file_lookup_key key = { 1063 .type = NFSD_FILE_KEY_FULL, 1064 .need = may_flags & NFSD_FILE_MAY_MASK, 1065 .net = SVC_NET(rqstp), 1066 .gc = want_gc, 1067 }; 1068 bool open_retry = true; 1069 struct nfsd_file *nf; 1070 __be32 status; 1071 int ret; 1072 1073 status = fh_verify(rqstp, fhp, S_IFREG, 1074 may_flags|NFSD_MAY_OWNER_OVERRIDE); 1075 if (status != nfs_ok) 1076 return status; 1077 key.inode = d_inode(fhp->fh_dentry); 1078 key.cred = get_current_cred(); 1079 1080 retry: 1081 rcu_read_lock(); 1082 nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, 1083 nfsd_file_rhash_params); 1084 if (nf) 1085 nf = nfsd_file_get(nf); 1086 rcu_read_unlock(); 1087 if (nf) 1088 goto wait_for_construction; 1089 1090 nf = nfsd_file_alloc(&key, may_flags); 1091 if (!nf) { 1092 status = nfserr_jukebox; 1093 goto out_status; 1094 } 1095 1096 ret = rhashtable_lookup_insert_key(&nfsd_file_rhash_tbl, 1097 &key, &nf->nf_rhash, 1098 nfsd_file_rhash_params); 1099 if (likely(ret == 0)) 1100 goto open_file; 1101 1102 nfsd_file_slab_free(&nf->nf_rcu); 1103 nf = NULL; 1104 if (ret == -EEXIST) 1105 goto retry; 1106 trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, ret); 1107 status = nfserr_jukebox; 1108 goto out_status; 1109 1110 wait_for_construction: 1111 wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); 1112 1113 /* Did construction of this file fail? */ 1114 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 1115 trace_nfsd_file_cons_err(rqstp, key.inode, may_flags, nf); 1116 if (!open_retry) { 1117 status = nfserr_jukebox; 1118 goto out; 1119 } 1120 open_retry = false; 1121 nfsd_file_put_noref(nf); 1122 goto retry; 1123 } 1124 1125 nfsd_file_lru_remove(nf); 1126 this_cpu_inc(nfsd_file_cache_hits); 1127 1128 status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); 1129 out: 1130 if (status == nfs_ok) { 1131 if (open) 1132 this_cpu_inc(nfsd_file_acquisitions); 1133 *pnf = nf; 1134 } else { 1135 nfsd_file_put(nf); 1136 nf = NULL; 1137 } 1138 1139 out_status: 1140 put_cred(key.cred); 1141 if (open) 1142 trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); 1143 return status; 1144 1145 open_file: 1146 trace_nfsd_file_alloc(nf); 1147 nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode); 1148 if (nf->nf_mark) { 1149 if (open) { 1150 status = nfsd_open_verified(rqstp, fhp, may_flags, 1151 &nf->nf_file); 1152 trace_nfsd_file_open(nf, status); 1153 } else 1154 status = nfs_ok; 1155 } else 1156 status = nfserr_jukebox; 1157 /* 1158 * If construction failed, or we raced with a call to unlink() 1159 * then unhash. 1160 */ 1161 if (status != nfs_ok || key.inode->i_nlink == 0) 1162 nfsd_file_unhash_and_put(nf); 1163 clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); 1164 smp_mb__after_atomic(); 1165 wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); 1166 goto out; 1167 } 1168 1169 /** 1170 * nfsd_file_acquire_gc - Get a struct nfsd_file with an open file 1171 * @rqstp: the RPC transaction being executed 1172 * @fhp: the NFS filehandle of the file to be opened 1173 * @may_flags: NFSD_MAY_ settings for the file 1174 * @pnf: OUT: new or found "struct nfsd_file" object 1175 * 1176 * The nfsd_file object returned by this API is reference-counted 1177 * and garbage-collected. The object is retained for a few 1178 * seconds after the final nfsd_file_put() in case the caller 1179 * wants to re-use it. 1180 * 1181 * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in 1182 * network byte order is returned. 1183 */ 1184 __be32 1185 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, 1186 unsigned int may_flags, struct nfsd_file **pnf) 1187 { 1188 return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, true); 1189 } 1190 1191 /** 1192 * nfsd_file_acquire - Get a struct nfsd_file with an open file 1193 * @rqstp: the RPC transaction being executed 1194 * @fhp: the NFS filehandle of the file to be opened 1195 * @may_flags: NFSD_MAY_ settings for the file 1196 * @pnf: OUT: new or found "struct nfsd_file" object 1197 * 1198 * The nfsd_file_object returned by this API is reference-counted 1199 * but not garbage-collected. The object is unhashed after the 1200 * final nfsd_file_put(). 1201 * 1202 * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in 1203 * network byte order is returned. 1204 */ 1205 __be32 1206 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 1207 unsigned int may_flags, struct nfsd_file **pnf) 1208 { 1209 return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, false); 1210 } 1211 1212 /** 1213 * nfsd_file_create - Get a struct nfsd_file, do not open 1214 * @rqstp: the RPC transaction being executed 1215 * @fhp: the NFS filehandle of the file just created 1216 * @may_flags: NFSD_MAY_ settings for the file 1217 * @pnf: OUT: new or found "struct nfsd_file" object 1218 * 1219 * The nfsd_file_object returned by this API is reference-counted 1220 * but not garbage-collected. The object is released immediately 1221 * one RCU grace period after the final nfsd_file_put(). 1222 * 1223 * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in 1224 * network byte order is returned. 1225 */ 1226 __be32 1227 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, 1228 unsigned int may_flags, struct nfsd_file **pnf) 1229 { 1230 return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false, false); 1231 } 1232 1233 /* 1234 * Note that fields may be added, removed or reordered in the future. Programs 1235 * scraping this file for info should test the labels to ensure they're 1236 * getting the correct field. 1237 */ 1238 int nfsd_file_cache_stats_show(struct seq_file *m, void *v) 1239 { 1240 unsigned long releases = 0, evictions = 0; 1241 unsigned long hits = 0, acquisitions = 0; 1242 unsigned int i, count = 0, buckets = 0; 1243 unsigned long lru = 0, total_age = 0; 1244 1245 /* Serialize with server shutdown */ 1246 mutex_lock(&nfsd_mutex); 1247 if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) { 1248 struct bucket_table *tbl; 1249 struct rhashtable *ht; 1250 1251 lru = list_lru_count(&nfsd_file_lru); 1252 1253 rcu_read_lock(); 1254 ht = &nfsd_file_rhash_tbl; 1255 count = atomic_read(&ht->nelems); 1256 tbl = rht_dereference_rcu(ht->tbl, ht); 1257 buckets = tbl->size; 1258 rcu_read_unlock(); 1259 } 1260 mutex_unlock(&nfsd_mutex); 1261 1262 for_each_possible_cpu(i) { 1263 hits += per_cpu(nfsd_file_cache_hits, i); 1264 acquisitions += per_cpu(nfsd_file_acquisitions, i); 1265 releases += per_cpu(nfsd_file_releases, i); 1266 total_age += per_cpu(nfsd_file_total_age, i); 1267 evictions += per_cpu(nfsd_file_evictions, i); 1268 } 1269 1270 seq_printf(m, "total entries: %u\n", count); 1271 seq_printf(m, "hash buckets: %u\n", buckets); 1272 seq_printf(m, "lru entries: %lu\n", lru); 1273 seq_printf(m, "cache hits: %lu\n", hits); 1274 seq_printf(m, "acquisitions: %lu\n", acquisitions); 1275 seq_printf(m, "releases: %lu\n", releases); 1276 seq_printf(m, "evictions: %lu\n", evictions); 1277 if (releases) 1278 seq_printf(m, "mean age (ms): %ld\n", total_age / releases); 1279 else 1280 seq_printf(m, "mean age (ms): -\n"); 1281 return 0; 1282 } 1283