1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * The NFSD open file cache. 4 * 5 * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com> 6 * 7 * An nfsd_file object is a per-file collection of open state that binds 8 * together: 9 * - a struct file * 10 * - a user credential 11 * - a network namespace 12 * - a read-ahead context 13 * - monitoring for writeback errors 14 * 15 * nfsd_file objects are reference-counted. Consumers acquire a new 16 * object via the nfsd_file_acquire API. They manage their interest in 17 * the acquired object, and hence the object's reference count, via 18 * nfsd_file_get and nfsd_file_put. There are two varieties of nfsd_file 19 * object: 20 * 21 * * non-garbage-collected: When a consumer wants to precisely control 22 * the lifetime of a file's open state, it acquires a non-garbage- 23 * collected nfsd_file. The final nfsd_file_put releases the open 24 * state immediately. 25 * 26 * * garbage-collected: When a consumer does not control the lifetime 27 * of open state, it acquires a garbage-collected nfsd_file. The 28 * final nfsd_file_put allows the open state to linger for a period 29 * during which it may be re-used. 30 */ 31 32 #include <linux/hash.h> 33 #include <linux/slab.h> 34 #include <linux/file.h> 35 #include <linux/pagemap.h> 36 #include <linux/sched.h> 37 #include <linux/list_lru.h> 38 #include <linux/fsnotify_backend.h> 39 #include <linux/fsnotify.h> 40 #include <linux/seq_file.h> 41 #include <linux/rhashtable.h> 42 43 #include "vfs.h" 44 #include "nfsd.h" 45 #include "nfsfh.h" 46 #include "netns.h" 47 #include "filecache.h" 48 #include "trace.h" 49 50 #define NFSD_LAUNDRETTE_DELAY (2 * HZ) 51 52 #define NFSD_FILE_CACHE_UP (0) 53 54 /* We only care about NFSD_MAY_READ/WRITE for this cache */ 55 #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) 56 57 static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); 58 static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions); 59 static DEFINE_PER_CPU(unsigned long, nfsd_file_releases); 60 static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age); 61 static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions); 62 63 struct nfsd_fcache_disposal { 64 struct work_struct work; 65 spinlock_t lock; 66 struct list_head freeme; 67 }; 68 69 static struct workqueue_struct *nfsd_filecache_wq __read_mostly; 70 71 static struct kmem_cache *nfsd_file_slab; 72 static struct kmem_cache *nfsd_file_mark_slab; 73 static struct list_lru nfsd_file_lru; 74 static unsigned long nfsd_file_flags; 75 static struct fsnotify_group *nfsd_file_fsnotify_group; 76 static struct delayed_work nfsd_filecache_laundrette; 77 static struct rhashtable nfsd_file_rhash_tbl 78 ____cacheline_aligned_in_smp; 79 80 enum nfsd_file_lookup_type { 81 NFSD_FILE_KEY_INODE, 82 NFSD_FILE_KEY_FULL, 83 }; 84 85 struct nfsd_file_lookup_key { 86 struct inode *inode; 87 struct net *net; 88 const struct cred *cred; 89 unsigned char need; 90 bool gc; 91 enum nfsd_file_lookup_type type; 92 }; 93 94 /* 95 * The returned hash value is based solely on the address of an in-code 96 * inode, a pointer to a slab-allocated object. The entropy in such a 97 * pointer is concentrated in its middle bits. 98 */ 99 static u32 nfsd_file_inode_hash(const struct inode *inode, u32 seed) 100 { 101 unsigned long ptr = (unsigned long)inode; 102 u32 k; 103 104 k = ptr >> L1_CACHE_SHIFT; 105 k &= 0x00ffffff; 106 return jhash2(&k, 1, seed); 107 } 108 109 /** 110 * nfsd_file_key_hashfn - Compute the hash value of a lookup key 111 * @data: key on which to compute the hash value 112 * @len: rhash table's key_len parameter (unused) 113 * @seed: rhash table's random seed of the day 114 * 115 * Return value: 116 * Computed 32-bit hash value 117 */ 118 static u32 nfsd_file_key_hashfn(const void *data, u32 len, u32 seed) 119 { 120 const struct nfsd_file_lookup_key *key = data; 121 122 return nfsd_file_inode_hash(key->inode, seed); 123 } 124 125 /** 126 * nfsd_file_obj_hashfn - Compute the hash value of an nfsd_file 127 * @data: object on which to compute the hash value 128 * @len: rhash table's key_len parameter (unused) 129 * @seed: rhash table's random seed of the day 130 * 131 * Return value: 132 * Computed 32-bit hash value 133 */ 134 static u32 nfsd_file_obj_hashfn(const void *data, u32 len, u32 seed) 135 { 136 const struct nfsd_file *nf = data; 137 138 return nfsd_file_inode_hash(nf->nf_inode, seed); 139 } 140 141 static bool 142 nfsd_match_cred(const struct cred *c1, const struct cred *c2) 143 { 144 int i; 145 146 if (!uid_eq(c1->fsuid, c2->fsuid)) 147 return false; 148 if (!gid_eq(c1->fsgid, c2->fsgid)) 149 return false; 150 if (c1->group_info == NULL || c2->group_info == NULL) 151 return c1->group_info == c2->group_info; 152 if (c1->group_info->ngroups != c2->group_info->ngroups) 153 return false; 154 for (i = 0; i < c1->group_info->ngroups; i++) { 155 if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) 156 return false; 157 } 158 return true; 159 } 160 161 /** 162 * nfsd_file_obj_cmpfn - Match a cache item against search criteria 163 * @arg: search criteria 164 * @ptr: cache item to check 165 * 166 * Return values: 167 * %0 - Item matches search criteria 168 * %1 - Item does not match search criteria 169 */ 170 static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg, 171 const void *ptr) 172 { 173 const struct nfsd_file_lookup_key *key = arg->key; 174 const struct nfsd_file *nf = ptr; 175 176 switch (key->type) { 177 case NFSD_FILE_KEY_INODE: 178 if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc) 179 return 1; 180 if (nf->nf_inode != key->inode) 181 return 1; 182 break; 183 case NFSD_FILE_KEY_FULL: 184 if (nf->nf_inode != key->inode) 185 return 1; 186 if (nf->nf_may != key->need) 187 return 1; 188 if (nf->nf_net != key->net) 189 return 1; 190 if (!nfsd_match_cred(nf->nf_cred, key->cred)) 191 return 1; 192 if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc) 193 return 1; 194 if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) 195 return 1; 196 break; 197 } 198 return 0; 199 } 200 201 static const struct rhashtable_params nfsd_file_rhash_params = { 202 .key_len = sizeof_field(struct nfsd_file, nf_inode), 203 .key_offset = offsetof(struct nfsd_file, nf_inode), 204 .head_offset = offsetof(struct nfsd_file, nf_rhash), 205 .hashfn = nfsd_file_key_hashfn, 206 .obj_hashfn = nfsd_file_obj_hashfn, 207 .obj_cmpfn = nfsd_file_obj_cmpfn, 208 /* Reduce resizing churn on light workloads */ 209 .min_size = 512, /* buckets */ 210 .automatic_shrinking = true, 211 }; 212 213 static void 214 nfsd_file_schedule_laundrette(void) 215 { 216 if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags)) 217 queue_delayed_work(system_wq, &nfsd_filecache_laundrette, 218 NFSD_LAUNDRETTE_DELAY); 219 } 220 221 static void 222 nfsd_file_slab_free(struct rcu_head *rcu) 223 { 224 struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu); 225 226 put_cred(nf->nf_cred); 227 kmem_cache_free(nfsd_file_slab, nf); 228 } 229 230 static void 231 nfsd_file_mark_free(struct fsnotify_mark *mark) 232 { 233 struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark, 234 nfm_mark); 235 236 kmem_cache_free(nfsd_file_mark_slab, nfm); 237 } 238 239 static struct nfsd_file_mark * 240 nfsd_file_mark_get(struct nfsd_file_mark *nfm) 241 { 242 if (!refcount_inc_not_zero(&nfm->nfm_ref)) 243 return NULL; 244 return nfm; 245 } 246 247 static void 248 nfsd_file_mark_put(struct nfsd_file_mark *nfm) 249 { 250 if (refcount_dec_and_test(&nfm->nfm_ref)) { 251 fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group); 252 fsnotify_put_mark(&nfm->nfm_mark); 253 } 254 } 255 256 static struct nfsd_file_mark * 257 nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode) 258 { 259 int err; 260 struct fsnotify_mark *mark; 261 struct nfsd_file_mark *nfm = NULL, *new; 262 263 do { 264 fsnotify_group_lock(nfsd_file_fsnotify_group); 265 mark = fsnotify_find_mark(&inode->i_fsnotify_marks, 266 nfsd_file_fsnotify_group); 267 if (mark) { 268 nfm = nfsd_file_mark_get(container_of(mark, 269 struct nfsd_file_mark, 270 nfm_mark)); 271 fsnotify_group_unlock(nfsd_file_fsnotify_group); 272 if (nfm) { 273 fsnotify_put_mark(mark); 274 break; 275 } 276 /* Avoid soft lockup race with nfsd_file_mark_put() */ 277 fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group); 278 fsnotify_put_mark(mark); 279 } else { 280 fsnotify_group_unlock(nfsd_file_fsnotify_group); 281 } 282 283 /* allocate a new nfm */ 284 new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL); 285 if (!new) 286 return NULL; 287 fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group); 288 new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF; 289 refcount_set(&new->nfm_ref, 1); 290 291 err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0); 292 293 /* 294 * If the add was successful, then return the object. 295 * Otherwise, we need to put the reference we hold on the 296 * nfm_mark. The fsnotify code will take a reference and put 297 * it on failure, so we can't just free it directly. It's also 298 * not safe to call fsnotify_destroy_mark on it as the 299 * mark->group will be NULL. Thus, we can't let the nfm_ref 300 * counter drive the destruction at this point. 301 */ 302 if (likely(!err)) 303 nfm = new; 304 else 305 fsnotify_put_mark(&new->nfm_mark); 306 } while (unlikely(err == -EEXIST)); 307 308 return nfm; 309 } 310 311 static struct nfsd_file * 312 nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) 313 { 314 struct nfsd_file *nf; 315 316 nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); 317 if (nf) { 318 INIT_LIST_HEAD(&nf->nf_lru); 319 nf->nf_birthtime = ktime_get(); 320 nf->nf_file = NULL; 321 nf->nf_cred = get_current_cred(); 322 nf->nf_net = key->net; 323 nf->nf_flags = 0; 324 __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); 325 __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); 326 if (key->gc) 327 __set_bit(NFSD_FILE_GC, &nf->nf_flags); 328 nf->nf_inode = key->inode; 329 refcount_set(&nf->nf_ref, 1); 330 nf->nf_may = key->need; 331 nf->nf_mark = NULL; 332 } 333 return nf; 334 } 335 336 /** 337 * nfsd_file_check_write_error - check for writeback errors on a file 338 * @nf: nfsd_file to check for writeback errors 339 * 340 * Check whether a nfsd_file has an unseen error. Reset the write 341 * verifier if so. 342 */ 343 static void 344 nfsd_file_check_write_error(struct nfsd_file *nf) 345 { 346 struct file *file = nf->nf_file; 347 348 if ((file->f_mode & FMODE_WRITE) && 349 filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err))) 350 nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); 351 } 352 353 static void 354 nfsd_file_hash_remove(struct nfsd_file *nf) 355 { 356 trace_nfsd_file_unhash(nf); 357 rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash, 358 nfsd_file_rhash_params); 359 } 360 361 static bool 362 nfsd_file_unhash(struct nfsd_file *nf) 363 { 364 if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 365 nfsd_file_hash_remove(nf); 366 return true; 367 } 368 return false; 369 } 370 371 static void 372 nfsd_file_free(struct nfsd_file *nf) 373 { 374 s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime)); 375 376 trace_nfsd_file_free(nf); 377 378 this_cpu_inc(nfsd_file_releases); 379 this_cpu_add(nfsd_file_total_age, age); 380 381 nfsd_file_unhash(nf); 382 if (nf->nf_mark) 383 nfsd_file_mark_put(nf->nf_mark); 384 if (nf->nf_file) { 385 nfsd_file_check_write_error(nf); 386 filp_close(nf->nf_file, NULL); 387 } 388 389 /* 390 * If this item is still linked via nf_lru, that's a bug. 391 * WARN and leak it to preserve system stability. 392 */ 393 if (WARN_ON_ONCE(!list_empty(&nf->nf_lru))) 394 return; 395 396 call_rcu(&nf->nf_rcu, nfsd_file_slab_free); 397 } 398 399 static bool 400 nfsd_file_check_writeback(struct nfsd_file *nf) 401 { 402 struct file *file = nf->nf_file; 403 struct address_space *mapping; 404 405 if (!file || !(file->f_mode & FMODE_WRITE)) 406 return false; 407 mapping = file->f_mapping; 408 return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) || 409 mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); 410 } 411 412 static bool nfsd_file_lru_add(struct nfsd_file *nf) 413 { 414 set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); 415 if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) { 416 trace_nfsd_file_lru_add(nf); 417 return true; 418 } 419 return false; 420 } 421 422 static bool nfsd_file_lru_remove(struct nfsd_file *nf) 423 { 424 if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) { 425 trace_nfsd_file_lru_del(nf); 426 return true; 427 } 428 return false; 429 } 430 431 struct nfsd_file * 432 nfsd_file_get(struct nfsd_file *nf) 433 { 434 if (nf && refcount_inc_not_zero(&nf->nf_ref)) 435 return nf; 436 return NULL; 437 } 438 439 /** 440 * nfsd_file_put - put the reference to a nfsd_file 441 * @nf: nfsd_file of which to put the reference 442 * 443 * Put a reference to a nfsd_file. In the non-GC case, we just put the 444 * reference immediately. In the GC case, if the reference would be 445 * the last one, the put it on the LRU instead to be cleaned up later. 446 */ 447 void 448 nfsd_file_put(struct nfsd_file *nf) 449 { 450 might_sleep(); 451 trace_nfsd_file_put(nf); 452 453 if (test_bit(NFSD_FILE_GC, &nf->nf_flags) && 454 test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 455 /* 456 * If this is the last reference (nf_ref == 1), then try to 457 * transfer it to the LRU. 458 */ 459 if (refcount_dec_not_one(&nf->nf_ref)) 460 return; 461 462 /* Try to add it to the LRU. If that fails, decrement. */ 463 if (nfsd_file_lru_add(nf)) { 464 /* If it's still hashed, we're done */ 465 if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 466 nfsd_file_schedule_laundrette(); 467 return; 468 } 469 470 /* 471 * We're racing with unhashing, so try to remove it from 472 * the LRU. If removal fails, then someone else already 473 * has our reference. 474 */ 475 if (!nfsd_file_lru_remove(nf)) 476 return; 477 } 478 } 479 if (refcount_dec_and_test(&nf->nf_ref)) 480 nfsd_file_free(nf); 481 } 482 483 static void 484 nfsd_file_dispose_list(struct list_head *dispose) 485 { 486 struct nfsd_file *nf; 487 488 while (!list_empty(dispose)) { 489 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 490 list_del_init(&nf->nf_lru); 491 nfsd_file_free(nf); 492 } 493 } 494 495 static void 496 nfsd_file_list_remove_disposal(struct list_head *dst, 497 struct nfsd_fcache_disposal *l) 498 { 499 spin_lock(&l->lock); 500 list_splice_init(&l->freeme, dst); 501 spin_unlock(&l->lock); 502 } 503 504 static void 505 nfsd_file_list_add_disposal(struct list_head *files, struct net *net) 506 { 507 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 508 struct nfsd_fcache_disposal *l = nn->fcache_disposal; 509 510 spin_lock(&l->lock); 511 list_splice_tail_init(files, &l->freeme); 512 spin_unlock(&l->lock); 513 queue_work(nfsd_filecache_wq, &l->work); 514 } 515 516 static void 517 nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src, 518 struct net *net) 519 { 520 struct nfsd_file *nf, *tmp; 521 522 list_for_each_entry_safe(nf, tmp, src, nf_lru) { 523 if (nf->nf_net == net) 524 list_move_tail(&nf->nf_lru, dst); 525 } 526 } 527 528 static void 529 nfsd_file_dispose_list_delayed(struct list_head *dispose) 530 { 531 LIST_HEAD(list); 532 struct nfsd_file *nf; 533 534 while(!list_empty(dispose)) { 535 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 536 nfsd_file_list_add_pernet(&list, dispose, nf->nf_net); 537 nfsd_file_list_add_disposal(&list, nf->nf_net); 538 } 539 } 540 541 /** 542 * nfsd_file_lru_cb - Examine an entry on the LRU list 543 * @item: LRU entry to examine 544 * @lru: controlling LRU 545 * @lock: LRU list lock (unused) 546 * @arg: dispose list 547 * 548 * Return values: 549 * %LRU_REMOVED: @item was removed from the LRU 550 * %LRU_ROTATE: @item is to be moved to the LRU tail 551 * %LRU_SKIP: @item cannot be evicted 552 */ 553 static enum lru_status 554 nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, 555 spinlock_t *lock, void *arg) 556 __releases(lock) 557 __acquires(lock) 558 { 559 struct list_head *head = arg; 560 struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); 561 562 /* We should only be dealing with GC entries here */ 563 WARN_ON_ONCE(!test_bit(NFSD_FILE_GC, &nf->nf_flags)); 564 565 /* 566 * Don't throw out files that are still undergoing I/O or 567 * that have uncleared errors pending. 568 */ 569 if (nfsd_file_check_writeback(nf)) { 570 trace_nfsd_file_gc_writeback(nf); 571 return LRU_SKIP; 572 } 573 574 /* If it was recently added to the list, skip it */ 575 if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) { 576 trace_nfsd_file_gc_referenced(nf); 577 return LRU_ROTATE; 578 } 579 580 /* 581 * Put the reference held on behalf of the LRU. If it wasn't the last 582 * one, then just remove it from the LRU and ignore it. 583 */ 584 if (!refcount_dec_and_test(&nf->nf_ref)) { 585 trace_nfsd_file_gc_in_use(nf); 586 list_lru_isolate(lru, &nf->nf_lru); 587 return LRU_REMOVED; 588 } 589 590 /* Refcount went to zero. Unhash it and queue it to the dispose list */ 591 nfsd_file_unhash(nf); 592 list_lru_isolate_move(lru, &nf->nf_lru, head); 593 this_cpu_inc(nfsd_file_evictions); 594 trace_nfsd_file_gc_disposed(nf); 595 return LRU_REMOVED; 596 } 597 598 static void 599 nfsd_file_gc(void) 600 { 601 LIST_HEAD(dispose); 602 unsigned long ret; 603 604 ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, 605 &dispose, list_lru_count(&nfsd_file_lru)); 606 trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru)); 607 nfsd_file_dispose_list_delayed(&dispose); 608 } 609 610 static void 611 nfsd_file_gc_worker(struct work_struct *work) 612 { 613 nfsd_file_gc(); 614 if (list_lru_count(&nfsd_file_lru)) 615 nfsd_file_schedule_laundrette(); 616 } 617 618 static unsigned long 619 nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) 620 { 621 return list_lru_count(&nfsd_file_lru); 622 } 623 624 static unsigned long 625 nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) 626 { 627 LIST_HEAD(dispose); 628 unsigned long ret; 629 630 ret = list_lru_shrink_walk(&nfsd_file_lru, sc, 631 nfsd_file_lru_cb, &dispose); 632 trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru)); 633 nfsd_file_dispose_list_delayed(&dispose); 634 return ret; 635 } 636 637 static struct shrinker nfsd_file_shrinker = { 638 .scan_objects = nfsd_file_lru_scan, 639 .count_objects = nfsd_file_lru_count, 640 .seeks = 1, 641 }; 642 643 /** 644 * nfsd_file_cond_queue - conditionally unhash and queue a nfsd_file 645 * @nf: nfsd_file to attempt to queue 646 * @dispose: private list to queue successfully-put objects 647 * 648 * Unhash an nfsd_file, try to get a reference to it, and then put that 649 * reference. If it's the last reference, queue it to the dispose list. 650 */ 651 static void 652 nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose) 653 __must_hold(RCU) 654 { 655 int decrement = 1; 656 657 /* If we raced with someone else unhashing, ignore it */ 658 if (!nfsd_file_unhash(nf)) 659 return; 660 661 /* If we can't get a reference, ignore it */ 662 if (!nfsd_file_get(nf)) 663 return; 664 665 /* Extra decrement if we remove from the LRU */ 666 if (nfsd_file_lru_remove(nf)) 667 ++decrement; 668 669 /* If refcount goes to 0, then put on the dispose list */ 670 if (refcount_sub_and_test(decrement, &nf->nf_ref)) { 671 list_add(&nf->nf_lru, dispose); 672 trace_nfsd_file_closing(nf); 673 } 674 } 675 676 /** 677 * nfsd_file_queue_for_close: try to close out any open nfsd_files for an inode 678 * @inode: inode on which to close out nfsd_files 679 * @dispose: list on which to gather nfsd_files to close out 680 * 681 * An nfsd_file represents a struct file being held open on behalf of nfsd. An 682 * open file however can block other activity (such as leases), or cause 683 * undesirable behavior (e.g. spurious silly-renames when reexporting NFS). 684 * 685 * This function is intended to find open nfsd_files when this sort of 686 * conflicting access occurs and then attempt to close those files out. 687 * 688 * Populates the dispose list with entries that have already had their 689 * refcounts go to zero. The actual free of an nfsd_file can be expensive, 690 * so we leave it up to the caller whether it wants to wait or not. 691 */ 692 static void 693 nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose) 694 { 695 struct nfsd_file_lookup_key key = { 696 .type = NFSD_FILE_KEY_INODE, 697 .inode = inode, 698 .gc = true, 699 }; 700 struct nfsd_file *nf; 701 702 rcu_read_lock(); 703 do { 704 nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, 705 nfsd_file_rhash_params); 706 if (!nf) 707 break; 708 nfsd_file_cond_queue(nf, dispose); 709 } while (1); 710 rcu_read_unlock(); 711 } 712 713 /** 714 * nfsd_file_close_inode - attempt a delayed close of a nfsd_file 715 * @inode: inode of the file to attempt to remove 716 * 717 * Close out any open nfsd_files that can be reaped for @inode. The 718 * actual freeing is deferred to the dispose_list_delayed infrastructure. 719 * 720 * This is used by the fsnotify callbacks and setlease notifier. 721 */ 722 static void 723 nfsd_file_close_inode(struct inode *inode) 724 { 725 LIST_HEAD(dispose); 726 727 nfsd_file_queue_for_close(inode, &dispose); 728 nfsd_file_dispose_list_delayed(&dispose); 729 } 730 731 /** 732 * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file 733 * @inode: inode of the file to attempt to remove 734 * 735 * Close out any open nfsd_files that can be reaped for @inode. The 736 * nfsd_files are closed out synchronously. 737 * 738 * This is called from nfsd_rename and nfsd_unlink to avoid silly-renames 739 * when reexporting NFS. 740 */ 741 void 742 nfsd_file_close_inode_sync(struct inode *inode) 743 { 744 struct nfsd_file *nf; 745 LIST_HEAD(dispose); 746 747 trace_nfsd_file_close(inode); 748 749 nfsd_file_queue_for_close(inode, &dispose); 750 while (!list_empty(&dispose)) { 751 nf = list_first_entry(&dispose, struct nfsd_file, nf_lru); 752 list_del_init(&nf->nf_lru); 753 nfsd_file_free(nf); 754 } 755 flush_delayed_fput(); 756 } 757 758 /** 759 * nfsd_file_delayed_close - close unused nfsd_files 760 * @work: dummy 761 * 762 * Walk the LRU list and destroy any entries that have not been used since 763 * the last scan. 764 */ 765 static void 766 nfsd_file_delayed_close(struct work_struct *work) 767 { 768 LIST_HEAD(head); 769 struct nfsd_fcache_disposal *l = container_of(work, 770 struct nfsd_fcache_disposal, work); 771 772 nfsd_file_list_remove_disposal(&head, l); 773 nfsd_file_dispose_list(&head); 774 } 775 776 static int 777 nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, 778 void *data) 779 { 780 struct file_lock *fl = data; 781 782 /* Only close files for F_SETLEASE leases */ 783 if (fl->fl_flags & FL_LEASE) 784 nfsd_file_close_inode(file_inode(fl->fl_file)); 785 return 0; 786 } 787 788 static struct notifier_block nfsd_file_lease_notifier = { 789 .notifier_call = nfsd_file_lease_notifier_call, 790 }; 791 792 static int 793 nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask, 794 struct inode *inode, struct inode *dir, 795 const struct qstr *name, u32 cookie) 796 { 797 if (WARN_ON_ONCE(!inode)) 798 return 0; 799 800 trace_nfsd_file_fsnotify_handle_event(inode, mask); 801 802 /* Should be no marks on non-regular files */ 803 if (!S_ISREG(inode->i_mode)) { 804 WARN_ON_ONCE(1); 805 return 0; 806 } 807 808 /* don't close files if this was not the last link */ 809 if (mask & FS_ATTRIB) { 810 if (inode->i_nlink) 811 return 0; 812 } 813 814 nfsd_file_close_inode(inode); 815 return 0; 816 } 817 818 819 static const struct fsnotify_ops nfsd_file_fsnotify_ops = { 820 .handle_inode_event = nfsd_file_fsnotify_handle_event, 821 .free_mark = nfsd_file_mark_free, 822 }; 823 824 int 825 nfsd_file_cache_init(void) 826 { 827 int ret; 828 829 lockdep_assert_held(&nfsd_mutex); 830 if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) 831 return 0; 832 833 ret = rhashtable_init(&nfsd_file_rhash_tbl, &nfsd_file_rhash_params); 834 if (ret) 835 return ret; 836 837 ret = -ENOMEM; 838 nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0); 839 if (!nfsd_filecache_wq) 840 goto out; 841 842 nfsd_file_slab = kmem_cache_create("nfsd_file", 843 sizeof(struct nfsd_file), 0, 0, NULL); 844 if (!nfsd_file_slab) { 845 pr_err("nfsd: unable to create nfsd_file_slab\n"); 846 goto out_err; 847 } 848 849 nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark", 850 sizeof(struct nfsd_file_mark), 0, 0, NULL); 851 if (!nfsd_file_mark_slab) { 852 pr_err("nfsd: unable to create nfsd_file_mark_slab\n"); 853 goto out_err; 854 } 855 856 857 ret = list_lru_init(&nfsd_file_lru); 858 if (ret) { 859 pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret); 860 goto out_err; 861 } 862 863 ret = register_shrinker(&nfsd_file_shrinker, "nfsd-filecache"); 864 if (ret) { 865 pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret); 866 goto out_lru; 867 } 868 869 ret = lease_register_notifier(&nfsd_file_lease_notifier); 870 if (ret) { 871 pr_err("nfsd: unable to register lease notifier: %d\n", ret); 872 goto out_shrinker; 873 } 874 875 nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops, 876 FSNOTIFY_GROUP_NOFS); 877 if (IS_ERR(nfsd_file_fsnotify_group)) { 878 pr_err("nfsd: unable to create fsnotify group: %ld\n", 879 PTR_ERR(nfsd_file_fsnotify_group)); 880 ret = PTR_ERR(nfsd_file_fsnotify_group); 881 nfsd_file_fsnotify_group = NULL; 882 goto out_notifier; 883 } 884 885 INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker); 886 out: 887 return ret; 888 out_notifier: 889 lease_unregister_notifier(&nfsd_file_lease_notifier); 890 out_shrinker: 891 unregister_shrinker(&nfsd_file_shrinker); 892 out_lru: 893 list_lru_destroy(&nfsd_file_lru); 894 out_err: 895 kmem_cache_destroy(nfsd_file_slab); 896 nfsd_file_slab = NULL; 897 kmem_cache_destroy(nfsd_file_mark_slab); 898 nfsd_file_mark_slab = NULL; 899 destroy_workqueue(nfsd_filecache_wq); 900 nfsd_filecache_wq = NULL; 901 rhashtable_destroy(&nfsd_file_rhash_tbl); 902 goto out; 903 } 904 905 /** 906 * __nfsd_file_cache_purge: clean out the cache for shutdown 907 * @net: net-namespace to shut down the cache (may be NULL) 908 * 909 * Walk the nfsd_file cache and close out any that match @net. If @net is NULL, 910 * then close out everything. Called when an nfsd instance is being shut down. 911 */ 912 static void 913 __nfsd_file_cache_purge(struct net *net) 914 { 915 struct rhashtable_iter iter; 916 struct nfsd_file *nf; 917 LIST_HEAD(dispose); 918 919 rhashtable_walk_enter(&nfsd_file_rhash_tbl, &iter); 920 do { 921 rhashtable_walk_start(&iter); 922 923 nf = rhashtable_walk_next(&iter); 924 while (!IS_ERR_OR_NULL(nf)) { 925 if (!net || nf->nf_net == net) 926 nfsd_file_cond_queue(nf, &dispose); 927 nf = rhashtable_walk_next(&iter); 928 } 929 930 rhashtable_walk_stop(&iter); 931 } while (nf == ERR_PTR(-EAGAIN)); 932 rhashtable_walk_exit(&iter); 933 934 nfsd_file_dispose_list(&dispose); 935 } 936 937 static struct nfsd_fcache_disposal * 938 nfsd_alloc_fcache_disposal(void) 939 { 940 struct nfsd_fcache_disposal *l; 941 942 l = kmalloc(sizeof(*l), GFP_KERNEL); 943 if (!l) 944 return NULL; 945 INIT_WORK(&l->work, nfsd_file_delayed_close); 946 spin_lock_init(&l->lock); 947 INIT_LIST_HEAD(&l->freeme); 948 return l; 949 } 950 951 static void 952 nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l) 953 { 954 cancel_work_sync(&l->work); 955 nfsd_file_dispose_list(&l->freeme); 956 kfree(l); 957 } 958 959 static void 960 nfsd_free_fcache_disposal_net(struct net *net) 961 { 962 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 963 struct nfsd_fcache_disposal *l = nn->fcache_disposal; 964 965 nfsd_free_fcache_disposal(l); 966 } 967 968 int 969 nfsd_file_cache_start_net(struct net *net) 970 { 971 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 972 973 nn->fcache_disposal = nfsd_alloc_fcache_disposal(); 974 return nn->fcache_disposal ? 0 : -ENOMEM; 975 } 976 977 /** 978 * nfsd_file_cache_purge - Remove all cache items associated with @net 979 * @net: target net namespace 980 * 981 */ 982 void 983 nfsd_file_cache_purge(struct net *net) 984 { 985 lockdep_assert_held(&nfsd_mutex); 986 if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) 987 __nfsd_file_cache_purge(net); 988 } 989 990 void 991 nfsd_file_cache_shutdown_net(struct net *net) 992 { 993 nfsd_file_cache_purge(net); 994 nfsd_free_fcache_disposal_net(net); 995 } 996 997 void 998 nfsd_file_cache_shutdown(void) 999 { 1000 int i; 1001 1002 lockdep_assert_held(&nfsd_mutex); 1003 if (test_and_clear_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0) 1004 return; 1005 1006 lease_unregister_notifier(&nfsd_file_lease_notifier); 1007 unregister_shrinker(&nfsd_file_shrinker); 1008 /* 1009 * make sure all callers of nfsd_file_lru_cb are done before 1010 * calling nfsd_file_cache_purge 1011 */ 1012 cancel_delayed_work_sync(&nfsd_filecache_laundrette); 1013 __nfsd_file_cache_purge(NULL); 1014 list_lru_destroy(&nfsd_file_lru); 1015 rcu_barrier(); 1016 fsnotify_put_group(nfsd_file_fsnotify_group); 1017 nfsd_file_fsnotify_group = NULL; 1018 kmem_cache_destroy(nfsd_file_slab); 1019 nfsd_file_slab = NULL; 1020 fsnotify_wait_marks_destroyed(); 1021 kmem_cache_destroy(nfsd_file_mark_slab); 1022 nfsd_file_mark_slab = NULL; 1023 destroy_workqueue(nfsd_filecache_wq); 1024 nfsd_filecache_wq = NULL; 1025 rhashtable_destroy(&nfsd_file_rhash_tbl); 1026 1027 for_each_possible_cpu(i) { 1028 per_cpu(nfsd_file_cache_hits, i) = 0; 1029 per_cpu(nfsd_file_acquisitions, i) = 0; 1030 per_cpu(nfsd_file_releases, i) = 0; 1031 per_cpu(nfsd_file_total_age, i) = 0; 1032 per_cpu(nfsd_file_evictions, i) = 0; 1033 } 1034 } 1035 1036 /** 1037 * nfsd_file_is_cached - are there any cached open files for this inode? 1038 * @inode: inode to check 1039 * 1040 * The lookup matches inodes in all net namespaces and is atomic wrt 1041 * nfsd_file_acquire(). 1042 * 1043 * Return values: 1044 * %true: filecache contains at least one file matching this inode 1045 * %false: filecache contains no files matching this inode 1046 */ 1047 bool 1048 nfsd_file_is_cached(struct inode *inode) 1049 { 1050 struct nfsd_file_lookup_key key = { 1051 .type = NFSD_FILE_KEY_INODE, 1052 .inode = inode, 1053 .gc = true, 1054 }; 1055 bool ret = false; 1056 1057 if (rhashtable_lookup_fast(&nfsd_file_rhash_tbl, &key, 1058 nfsd_file_rhash_params) != NULL) 1059 ret = true; 1060 trace_nfsd_file_is_cached(inode, (int)ret); 1061 return ret; 1062 } 1063 1064 static __be32 1065 nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 1066 unsigned int may_flags, struct file *file, 1067 struct nfsd_file **pnf, bool want_gc) 1068 { 1069 struct nfsd_file_lookup_key key = { 1070 .type = NFSD_FILE_KEY_FULL, 1071 .need = may_flags & NFSD_FILE_MAY_MASK, 1072 .net = SVC_NET(rqstp), 1073 .gc = want_gc, 1074 }; 1075 bool open_retry = true; 1076 struct nfsd_file *nf; 1077 __be32 status; 1078 int ret; 1079 1080 status = fh_verify(rqstp, fhp, S_IFREG, 1081 may_flags|NFSD_MAY_OWNER_OVERRIDE); 1082 if (status != nfs_ok) 1083 return status; 1084 key.inode = d_inode(fhp->fh_dentry); 1085 key.cred = get_current_cred(); 1086 1087 retry: 1088 rcu_read_lock(); 1089 nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, 1090 nfsd_file_rhash_params); 1091 nf = nfsd_file_get(nf); 1092 rcu_read_unlock(); 1093 1094 if (nf) { 1095 /* 1096 * If the nf is on the LRU then it holds an extra reference 1097 * that must be put if it's removed. It had better not be 1098 * the last one however, since we should hold another. 1099 */ 1100 if (nfsd_file_lru_remove(nf)) 1101 WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref)); 1102 goto wait_for_construction; 1103 } 1104 1105 nf = nfsd_file_alloc(&key, may_flags); 1106 if (!nf) { 1107 status = nfserr_jukebox; 1108 goto out; 1109 } 1110 1111 ret = rhashtable_lookup_insert_key(&nfsd_file_rhash_tbl, 1112 &key, &nf->nf_rhash, 1113 nfsd_file_rhash_params); 1114 if (likely(ret == 0)) 1115 goto open_file; 1116 1117 if (ret == -EEXIST) 1118 goto retry; 1119 trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, ret); 1120 status = nfserr_jukebox; 1121 goto construction_err; 1122 1123 wait_for_construction: 1124 wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); 1125 1126 /* Did construction of this file fail? */ 1127 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 1128 trace_nfsd_file_cons_err(rqstp, key.inode, may_flags, nf); 1129 if (!open_retry) { 1130 status = nfserr_jukebox; 1131 goto construction_err; 1132 } 1133 open_retry = false; 1134 goto retry; 1135 } 1136 this_cpu_inc(nfsd_file_cache_hits); 1137 1138 status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); 1139 if (status != nfs_ok) { 1140 nfsd_file_put(nf); 1141 nf = NULL; 1142 } 1143 1144 out: 1145 if (status == nfs_ok) { 1146 this_cpu_inc(nfsd_file_acquisitions); 1147 nfsd_file_check_write_error(nf); 1148 *pnf = nf; 1149 } 1150 put_cred(key.cred); 1151 trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); 1152 return status; 1153 1154 open_file: 1155 trace_nfsd_file_alloc(nf); 1156 nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode); 1157 if (nf->nf_mark) { 1158 if (file) { 1159 get_file(file); 1160 nf->nf_file = file; 1161 status = nfs_ok; 1162 trace_nfsd_file_opened(nf, status); 1163 } else { 1164 status = nfsd_open_verified(rqstp, fhp, may_flags, 1165 &nf->nf_file); 1166 trace_nfsd_file_open(nf, status); 1167 } 1168 } else 1169 status = nfserr_jukebox; 1170 /* 1171 * If construction failed, or we raced with a call to unlink() 1172 * then unhash. 1173 */ 1174 if (status == nfs_ok && key.inode->i_nlink == 0) 1175 status = nfserr_jukebox; 1176 if (status != nfs_ok) 1177 nfsd_file_unhash(nf); 1178 clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags); 1179 if (status == nfs_ok) 1180 goto out; 1181 1182 construction_err: 1183 if (refcount_dec_and_test(&nf->nf_ref)) 1184 nfsd_file_free(nf); 1185 nf = NULL; 1186 goto out; 1187 } 1188 1189 /** 1190 * nfsd_file_acquire_gc - Get a struct nfsd_file with an open file 1191 * @rqstp: the RPC transaction being executed 1192 * @fhp: the NFS filehandle of the file to be opened 1193 * @may_flags: NFSD_MAY_ settings for the file 1194 * @pnf: OUT: new or found "struct nfsd_file" object 1195 * 1196 * The nfsd_file object returned by this API is reference-counted 1197 * and garbage-collected. The object is retained for a few 1198 * seconds after the final nfsd_file_put() in case the caller 1199 * wants to re-use it. 1200 * 1201 * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in 1202 * network byte order is returned. 1203 */ 1204 __be32 1205 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, 1206 unsigned int may_flags, struct nfsd_file **pnf) 1207 { 1208 return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, true); 1209 } 1210 1211 /** 1212 * nfsd_file_acquire - Get a struct nfsd_file with an open file 1213 * @rqstp: the RPC transaction being executed 1214 * @fhp: the NFS filehandle of the file to be opened 1215 * @may_flags: NFSD_MAY_ settings for the file 1216 * @pnf: OUT: new or found "struct nfsd_file" object 1217 * 1218 * The nfsd_file_object returned by this API is reference-counted 1219 * but not garbage-collected. The object is unhashed after the 1220 * final nfsd_file_put(). 1221 * 1222 * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in 1223 * network byte order is returned. 1224 */ 1225 __be32 1226 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 1227 unsigned int may_flags, struct nfsd_file **pnf) 1228 { 1229 return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, false); 1230 } 1231 1232 /** 1233 * nfsd_file_acquire_opened - Get a struct nfsd_file using existing open file 1234 * @rqstp: the RPC transaction being executed 1235 * @fhp: the NFS filehandle of the file just created 1236 * @may_flags: NFSD_MAY_ settings for the file 1237 * @file: cached, already-open file (may be NULL) 1238 * @pnf: OUT: new or found "struct nfsd_file" object 1239 * 1240 * Acquire a nfsd_file object that is not GC'ed. If one doesn't already exist, 1241 * and @file is non-NULL, use it to instantiate a new nfsd_file instead of 1242 * opening a new one. 1243 * 1244 * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in 1245 * network byte order is returned. 1246 */ 1247 __be32 1248 nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, 1249 unsigned int may_flags, struct file *file, 1250 struct nfsd_file **pnf) 1251 { 1252 return nfsd_file_do_acquire(rqstp, fhp, may_flags, file, pnf, false); 1253 } 1254 1255 /* 1256 * Note that fields may be added, removed or reordered in the future. Programs 1257 * scraping this file for info should test the labels to ensure they're 1258 * getting the correct field. 1259 */ 1260 int nfsd_file_cache_stats_show(struct seq_file *m, void *v) 1261 { 1262 unsigned long releases = 0, evictions = 0; 1263 unsigned long hits = 0, acquisitions = 0; 1264 unsigned int i, count = 0, buckets = 0; 1265 unsigned long lru = 0, total_age = 0; 1266 1267 /* Serialize with server shutdown */ 1268 mutex_lock(&nfsd_mutex); 1269 if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) { 1270 struct bucket_table *tbl; 1271 struct rhashtable *ht; 1272 1273 lru = list_lru_count(&nfsd_file_lru); 1274 1275 rcu_read_lock(); 1276 ht = &nfsd_file_rhash_tbl; 1277 count = atomic_read(&ht->nelems); 1278 tbl = rht_dereference_rcu(ht->tbl, ht); 1279 buckets = tbl->size; 1280 rcu_read_unlock(); 1281 } 1282 mutex_unlock(&nfsd_mutex); 1283 1284 for_each_possible_cpu(i) { 1285 hits += per_cpu(nfsd_file_cache_hits, i); 1286 acquisitions += per_cpu(nfsd_file_acquisitions, i); 1287 releases += per_cpu(nfsd_file_releases, i); 1288 total_age += per_cpu(nfsd_file_total_age, i); 1289 evictions += per_cpu(nfsd_file_evictions, i); 1290 } 1291 1292 seq_printf(m, "total entries: %u\n", count); 1293 seq_printf(m, "hash buckets: %u\n", buckets); 1294 seq_printf(m, "lru entries: %lu\n", lru); 1295 seq_printf(m, "cache hits: %lu\n", hits); 1296 seq_printf(m, "acquisitions: %lu\n", acquisitions); 1297 seq_printf(m, "releases: %lu\n", releases); 1298 seq_printf(m, "evictions: %lu\n", evictions); 1299 if (releases) 1300 seq_printf(m, "mean age (ms): %ld\n", total_age / releases); 1301 else 1302 seq_printf(m, "mean age (ms): -\n"); 1303 return 0; 1304 } 1305