1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2019, 2020 Amazon.com, Inc. or its affiliates. All rights reserved. 5 * 6 * User extended attribute client side cache functions. 7 * 8 * Author: Frank van der Linden <fllinden@amazon.com> 9 */ 10 #include <linux/errno.h> 11 #include <linux/nfs_fs.h> 12 #include <linux/hashtable.h> 13 #include <linux/refcount.h> 14 #include <uapi/linux/xattr.h> 15 16 #include "nfs4_fs.h" 17 #include "internal.h" 18 19 /* 20 * User extended attributes client side caching is implemented by having 21 * a cache structure attached to NFS inodes. This structure is allocated 22 * when needed, and freed when the cache is zapped. 23 * 24 * The cache structure contains as hash table of entries, and a pointer 25 * to a special-cased entry for the listxattr cache. 26 * 27 * Accessing and allocating / freeing the caches is done via reference 28 * counting. The cache entries use a similar refcounting scheme. 29 * 30 * This makes freeing a cache, both from the shrinker and from the 31 * zap cache path, easy. It also means that, in current use cases, 32 * the large majority of inodes will not waste any memory, as they 33 * will never have any user extended attributes assigned to them. 34 * 35 * Attribute entries are hashed in to a simple hash table. They are 36 * also part of an LRU. 37 * 38 * There are three shrinkers. 39 * 40 * Two shrinkers deal with the cache entries themselves: one for 41 * large entries (> PAGE_SIZE), and one for smaller entries. The 42 * shrinker for the larger entries works more aggressively than 43 * those for the smaller entries. 44 * 45 * The other shrinker frees the cache structures themselves. 46 */ 47 48 /* 49 * 64 buckets is a good default. There is likely no reasonable 50 * workload that uses more than even 64 user extended attributes. 51 * You can certainly add a lot more - but you get what you ask for 52 * in those circumstances. 53 */ 54 #define NFS4_XATTR_HASH_SIZE 64 55 56 #define NFSDBG_FACILITY NFSDBG_XATTRCACHE 57 58 struct nfs4_xattr_cache; 59 struct nfs4_xattr_entry; 60 61 struct nfs4_xattr_bucket { 62 spinlock_t lock; 63 struct hlist_head hlist; 64 struct nfs4_xattr_cache *cache; 65 bool draining; 66 }; 67 68 struct nfs4_xattr_cache { 69 struct kref ref; 70 struct nfs4_xattr_bucket buckets[NFS4_XATTR_HASH_SIZE]; 71 struct list_head lru; 72 struct list_head dispose; 73 atomic_long_t nent; 74 spinlock_t listxattr_lock; 75 struct inode *inode; 76 struct nfs4_xattr_entry *listxattr; 77 }; 78 79 struct nfs4_xattr_entry { 80 struct kref ref; 81 struct hlist_node hnode; 82 struct list_head lru; 83 struct list_head dispose; 84 char *xattr_name; 85 void *xattr_value; 86 size_t xattr_size; 87 struct nfs4_xattr_bucket *bucket; 88 uint32_t flags; 89 }; 90 91 #define NFS4_XATTR_ENTRY_EXTVAL 0x0001 92 93 /* 94 * LRU list of NFS inodes that have xattr caches. 95 */ 96 static struct list_lru nfs4_xattr_cache_lru; 97 static struct list_lru nfs4_xattr_entry_lru; 98 static struct list_lru nfs4_xattr_large_entry_lru; 99 100 static struct kmem_cache *nfs4_xattr_cache_cachep; 101 102 /* 103 * Hashing helper functions. 104 */ 105 static void 106 nfs4_xattr_hash_init(struct nfs4_xattr_cache *cache) 107 { 108 unsigned int i; 109 110 for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) { 111 INIT_HLIST_HEAD(&cache->buckets[i].hlist); 112 spin_lock_init(&cache->buckets[i].lock); 113 cache->buckets[i].cache = cache; 114 cache->buckets[i].draining = false; 115 } 116 } 117 118 /* 119 * Locking order: 120 * 1. inode i_lock or bucket lock 121 * 2. list_lru lock (taken by list_lru_* functions) 122 */ 123 124 /* 125 * Wrapper functions to add a cache entry to the right LRU. 126 */ 127 static bool 128 nfs4_xattr_entry_lru_add(struct nfs4_xattr_entry *entry) 129 { 130 struct list_lru *lru; 131 132 lru = (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) ? 133 &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; 134 135 return list_lru_add(lru, &entry->lru); 136 } 137 138 static bool 139 nfs4_xattr_entry_lru_del(struct nfs4_xattr_entry *entry) 140 { 141 struct list_lru *lru; 142 143 lru = (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) ? 144 &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; 145 146 return list_lru_del(lru, &entry->lru); 147 } 148 149 /* 150 * This function allocates cache entries. They are the normal 151 * extended attribute name/value pairs, but may also be a listxattr 152 * cache. Those allocations use the same entry so that they can be 153 * treated as one by the memory shrinker. 154 * 155 * xattr cache entries are allocated together with names. If the 156 * value fits in to one page with the entry structure and the name, 157 * it will also be part of the same allocation (kmalloc). This is 158 * expected to be the vast majority of cases. Larger allocations 159 * have a value pointer that is allocated separately by kvmalloc. 160 * 161 * Parameters: 162 * 163 * @name: Name of the extended attribute. NULL for listxattr cache 164 * entry. 165 * @value: Value of attribute, or listxattr cache. NULL if the 166 * value is to be copied from pages instead. 167 * @pages: Pages to copy the value from, if not NULL. Passed in to 168 * make it easier to copy the value after an RPC, even if 169 * the value will not be passed up to application (e.g. 170 * for a 'query' getxattr with NULL buffer). 171 * @len: Length of the value. Can be 0 for zero-length attribues. 172 * @value and @pages will be NULL if @len is 0. 173 */ 174 static struct nfs4_xattr_entry * 175 nfs4_xattr_alloc_entry(const char *name, const void *value, 176 struct page **pages, size_t len) 177 { 178 struct nfs4_xattr_entry *entry; 179 void *valp; 180 char *namep; 181 size_t alloclen, slen; 182 char *buf; 183 uint32_t flags; 184 185 BUILD_BUG_ON(sizeof(struct nfs4_xattr_entry) + 186 XATTR_NAME_MAX + 1 > PAGE_SIZE); 187 188 alloclen = sizeof(struct nfs4_xattr_entry); 189 if (name != NULL) { 190 slen = strlen(name) + 1; 191 alloclen += slen; 192 } else 193 slen = 0; 194 195 if (alloclen + len <= PAGE_SIZE) { 196 alloclen += len; 197 flags = 0; 198 } else { 199 flags = NFS4_XATTR_ENTRY_EXTVAL; 200 } 201 202 buf = kmalloc(alloclen, GFP_KERNEL_ACCOUNT | GFP_NOFS); 203 if (buf == NULL) 204 return NULL; 205 entry = (struct nfs4_xattr_entry *)buf; 206 207 if (name != NULL) { 208 namep = buf + sizeof(struct nfs4_xattr_entry); 209 memcpy(namep, name, slen); 210 } else { 211 namep = NULL; 212 } 213 214 215 if (flags & NFS4_XATTR_ENTRY_EXTVAL) { 216 valp = kvmalloc(len, GFP_KERNEL_ACCOUNT | GFP_NOFS); 217 if (valp == NULL) { 218 kfree(buf); 219 return NULL; 220 } 221 } else if (len != 0) { 222 valp = buf + sizeof(struct nfs4_xattr_entry) + slen; 223 } else 224 valp = NULL; 225 226 if (valp != NULL) { 227 if (value != NULL) 228 memcpy(valp, value, len); 229 else 230 _copy_from_pages(valp, pages, 0, len); 231 } 232 233 entry->flags = flags; 234 entry->xattr_value = valp; 235 kref_init(&entry->ref); 236 entry->xattr_name = namep; 237 entry->xattr_size = len; 238 entry->bucket = NULL; 239 INIT_LIST_HEAD(&entry->lru); 240 INIT_LIST_HEAD(&entry->dispose); 241 INIT_HLIST_NODE(&entry->hnode); 242 243 return entry; 244 } 245 246 static void 247 nfs4_xattr_free_entry(struct nfs4_xattr_entry *entry) 248 { 249 if (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) 250 kvfree(entry->xattr_value); 251 kfree(entry); 252 } 253 254 static void 255 nfs4_xattr_free_entry_cb(struct kref *kref) 256 { 257 struct nfs4_xattr_entry *entry; 258 259 entry = container_of(kref, struct nfs4_xattr_entry, ref); 260 261 if (WARN_ON(!list_empty(&entry->lru))) 262 return; 263 264 nfs4_xattr_free_entry(entry); 265 } 266 267 static void 268 nfs4_xattr_free_cache_cb(struct kref *kref) 269 { 270 struct nfs4_xattr_cache *cache; 271 int i; 272 273 cache = container_of(kref, struct nfs4_xattr_cache, ref); 274 275 for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) { 276 if (WARN_ON(!hlist_empty(&cache->buckets[i].hlist))) 277 return; 278 cache->buckets[i].draining = false; 279 } 280 281 cache->listxattr = NULL; 282 283 kmem_cache_free(nfs4_xattr_cache_cachep, cache); 284 285 } 286 287 static struct nfs4_xattr_cache * 288 nfs4_xattr_alloc_cache(void) 289 { 290 struct nfs4_xattr_cache *cache; 291 292 cache = kmem_cache_alloc(nfs4_xattr_cache_cachep, 293 GFP_KERNEL_ACCOUNT | GFP_NOFS); 294 if (cache == NULL) 295 return NULL; 296 297 kref_init(&cache->ref); 298 atomic_long_set(&cache->nent, 0); 299 300 return cache; 301 } 302 303 /* 304 * Set the listxattr cache, which is a special-cased cache entry. 305 * The special value ERR_PTR(-ESTALE) is used to indicate that 306 * the cache is being drained - this prevents a new listxattr 307 * cache from being added to what is now a stale cache. 308 */ 309 static int 310 nfs4_xattr_set_listcache(struct nfs4_xattr_cache *cache, 311 struct nfs4_xattr_entry *new) 312 { 313 struct nfs4_xattr_entry *old; 314 int ret = 1; 315 316 spin_lock(&cache->listxattr_lock); 317 318 old = cache->listxattr; 319 320 if (old == ERR_PTR(-ESTALE)) { 321 ret = 0; 322 goto out; 323 } 324 325 cache->listxattr = new; 326 if (new != NULL && new != ERR_PTR(-ESTALE)) 327 nfs4_xattr_entry_lru_add(new); 328 329 if (old != NULL) { 330 nfs4_xattr_entry_lru_del(old); 331 kref_put(&old->ref, nfs4_xattr_free_entry_cb); 332 } 333 out: 334 spin_unlock(&cache->listxattr_lock); 335 336 return ret; 337 } 338 339 /* 340 * Unlink a cache from its parent inode, clearing out an invalid 341 * cache. Must be called with i_lock held. 342 */ 343 static struct nfs4_xattr_cache * 344 nfs4_xattr_cache_unlink(struct inode *inode) 345 { 346 struct nfs_inode *nfsi; 347 struct nfs4_xattr_cache *oldcache; 348 349 nfsi = NFS_I(inode); 350 351 oldcache = nfsi->xattr_cache; 352 if (oldcache != NULL) { 353 list_lru_del(&nfs4_xattr_cache_lru, &oldcache->lru); 354 oldcache->inode = NULL; 355 } 356 nfsi->xattr_cache = NULL; 357 nfsi->cache_validity &= ~NFS_INO_INVALID_XATTR; 358 359 return oldcache; 360 361 } 362 363 /* 364 * Discard a cache. Called by get_cache() if there was an old, 365 * invalid cache. Can also be called from a shrinker callback. 366 * 367 * The cache is dead, it has already been unlinked from its inode, 368 * and no longer appears on the cache LRU list. 369 * 370 * Mark all buckets as draining, so that no new entries are added. This 371 * could still happen in the unlikely, but possible case that another 372 * thread had grabbed a reference before it was unlinked from the inode, 373 * and is still holding it for an add operation. 374 * 375 * Remove all entries from the LRU lists, so that there is no longer 376 * any way to 'find' this cache. Then, remove the entries from the hash 377 * table. 378 * 379 * At that point, the cache will remain empty and can be freed when the final 380 * reference drops, which is very likely the kref_put at the end of 381 * this function, or the one called immediately afterwards in the 382 * shrinker callback. 383 */ 384 static void 385 nfs4_xattr_discard_cache(struct nfs4_xattr_cache *cache) 386 { 387 unsigned int i; 388 struct nfs4_xattr_entry *entry; 389 struct nfs4_xattr_bucket *bucket; 390 struct hlist_node *n; 391 392 nfs4_xattr_set_listcache(cache, ERR_PTR(-ESTALE)); 393 394 for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) { 395 bucket = &cache->buckets[i]; 396 397 spin_lock(&bucket->lock); 398 bucket->draining = true; 399 hlist_for_each_entry_safe(entry, n, &bucket->hlist, hnode) { 400 nfs4_xattr_entry_lru_del(entry); 401 hlist_del_init(&entry->hnode); 402 kref_put(&entry->ref, nfs4_xattr_free_entry_cb); 403 } 404 spin_unlock(&bucket->lock); 405 } 406 407 atomic_long_set(&cache->nent, 0); 408 409 kref_put(&cache->ref, nfs4_xattr_free_cache_cb); 410 } 411 412 /* 413 * Get a referenced copy of the cache structure. Avoid doing allocs 414 * while holding i_lock. Which means that we do some optimistic allocation, 415 * and might have to free the result in rare cases. 416 * 417 * This function only checks the NFS_INO_INVALID_XATTR cache validity bit 418 * and acts accordingly, replacing the cache when needed. For the read case 419 * (!add), this means that the caller must make sure that the cache 420 * is valid before caling this function. getxattr and listxattr call 421 * revalidate_inode to do this. The attribute cache timeout (for the 422 * non-delegated case) is expected to be dealt with in the revalidate 423 * call. 424 */ 425 426 static struct nfs4_xattr_cache * 427 nfs4_xattr_get_cache(struct inode *inode, int add) 428 { 429 struct nfs_inode *nfsi; 430 struct nfs4_xattr_cache *cache, *oldcache, *newcache; 431 432 nfsi = NFS_I(inode); 433 434 cache = oldcache = NULL; 435 436 spin_lock(&inode->i_lock); 437 438 if (nfsi->cache_validity & NFS_INO_INVALID_XATTR) 439 oldcache = nfs4_xattr_cache_unlink(inode); 440 else 441 cache = nfsi->xattr_cache; 442 443 if (cache != NULL) 444 kref_get(&cache->ref); 445 446 spin_unlock(&inode->i_lock); 447 448 if (add && cache == NULL) { 449 newcache = NULL; 450 451 cache = nfs4_xattr_alloc_cache(); 452 if (cache == NULL) 453 goto out; 454 455 spin_lock(&inode->i_lock); 456 if (nfsi->cache_validity & NFS_INO_INVALID_XATTR) { 457 /* 458 * The cache was invalidated again. Give up, 459 * since what we want to enter is now likely 460 * outdated anyway. 461 */ 462 spin_unlock(&inode->i_lock); 463 kref_put(&cache->ref, nfs4_xattr_free_cache_cb); 464 cache = NULL; 465 goto out; 466 } 467 468 /* 469 * Check if someone beat us to it. 470 */ 471 if (nfsi->xattr_cache != NULL) { 472 newcache = nfsi->xattr_cache; 473 kref_get(&newcache->ref); 474 } else { 475 kref_get(&cache->ref); 476 nfsi->xattr_cache = cache; 477 cache->inode = inode; 478 list_lru_add(&nfs4_xattr_cache_lru, &cache->lru); 479 } 480 481 spin_unlock(&inode->i_lock); 482 483 /* 484 * If there was a race, throw away the cache we just 485 * allocated, and use the new one allocated by someone 486 * else. 487 */ 488 if (newcache != NULL) { 489 kref_put(&cache->ref, nfs4_xattr_free_cache_cb); 490 cache = newcache; 491 } 492 } 493 494 out: 495 /* 496 * Discard the now orphaned old cache. 497 */ 498 if (oldcache != NULL) 499 nfs4_xattr_discard_cache(oldcache); 500 501 return cache; 502 } 503 504 static inline struct nfs4_xattr_bucket * 505 nfs4_xattr_hash_bucket(struct nfs4_xattr_cache *cache, const char *name) 506 { 507 return &cache->buckets[jhash(name, strlen(name), 0) & 508 (ARRAY_SIZE(cache->buckets) - 1)]; 509 } 510 511 static struct nfs4_xattr_entry * 512 nfs4_xattr_get_entry(struct nfs4_xattr_bucket *bucket, const char *name) 513 { 514 struct nfs4_xattr_entry *entry; 515 516 entry = NULL; 517 518 hlist_for_each_entry(entry, &bucket->hlist, hnode) { 519 if (!strcmp(entry->xattr_name, name)) 520 break; 521 } 522 523 return entry; 524 } 525 526 static int 527 nfs4_xattr_hash_add(struct nfs4_xattr_cache *cache, 528 struct nfs4_xattr_entry *entry) 529 { 530 struct nfs4_xattr_bucket *bucket; 531 struct nfs4_xattr_entry *oldentry = NULL; 532 int ret = 1; 533 534 bucket = nfs4_xattr_hash_bucket(cache, entry->xattr_name); 535 entry->bucket = bucket; 536 537 spin_lock(&bucket->lock); 538 539 if (bucket->draining) { 540 ret = 0; 541 goto out; 542 } 543 544 oldentry = nfs4_xattr_get_entry(bucket, entry->xattr_name); 545 if (oldentry != NULL) { 546 hlist_del_init(&oldentry->hnode); 547 nfs4_xattr_entry_lru_del(oldentry); 548 } else { 549 atomic_long_inc(&cache->nent); 550 } 551 552 hlist_add_head(&entry->hnode, &bucket->hlist); 553 nfs4_xattr_entry_lru_add(entry); 554 555 out: 556 spin_unlock(&bucket->lock); 557 558 if (oldentry != NULL) 559 kref_put(&oldentry->ref, nfs4_xattr_free_entry_cb); 560 561 return ret; 562 } 563 564 static void 565 nfs4_xattr_hash_remove(struct nfs4_xattr_cache *cache, const char *name) 566 { 567 struct nfs4_xattr_bucket *bucket; 568 struct nfs4_xattr_entry *entry; 569 570 bucket = nfs4_xattr_hash_bucket(cache, name); 571 572 spin_lock(&bucket->lock); 573 574 entry = nfs4_xattr_get_entry(bucket, name); 575 if (entry != NULL) { 576 hlist_del_init(&entry->hnode); 577 nfs4_xattr_entry_lru_del(entry); 578 atomic_long_dec(&cache->nent); 579 } 580 581 spin_unlock(&bucket->lock); 582 583 if (entry != NULL) 584 kref_put(&entry->ref, nfs4_xattr_free_entry_cb); 585 } 586 587 static struct nfs4_xattr_entry * 588 nfs4_xattr_hash_find(struct nfs4_xattr_cache *cache, const char *name) 589 { 590 struct nfs4_xattr_bucket *bucket; 591 struct nfs4_xattr_entry *entry; 592 593 bucket = nfs4_xattr_hash_bucket(cache, name); 594 595 spin_lock(&bucket->lock); 596 597 entry = nfs4_xattr_get_entry(bucket, name); 598 if (entry != NULL) 599 kref_get(&entry->ref); 600 601 spin_unlock(&bucket->lock); 602 603 return entry; 604 } 605 606 /* 607 * Entry point to retrieve an entry from the cache. 608 */ 609 ssize_t nfs4_xattr_cache_get(struct inode *inode, const char *name, char *buf, 610 ssize_t buflen) 611 { 612 struct nfs4_xattr_cache *cache; 613 struct nfs4_xattr_entry *entry; 614 ssize_t ret; 615 616 cache = nfs4_xattr_get_cache(inode, 0); 617 if (cache == NULL) 618 return -ENOENT; 619 620 ret = 0; 621 entry = nfs4_xattr_hash_find(cache, name); 622 623 if (entry != NULL) { 624 dprintk("%s: cache hit '%s', len %lu\n", __func__, 625 entry->xattr_name, (unsigned long)entry->xattr_size); 626 if (buflen == 0) { 627 /* Length probe only */ 628 ret = entry->xattr_size; 629 } else if (buflen < entry->xattr_size) 630 ret = -ERANGE; 631 else { 632 memcpy(buf, entry->xattr_value, entry->xattr_size); 633 ret = entry->xattr_size; 634 } 635 kref_put(&entry->ref, nfs4_xattr_free_entry_cb); 636 } else { 637 dprintk("%s: cache miss '%s'\n", __func__, name); 638 ret = -ENOENT; 639 } 640 641 kref_put(&cache->ref, nfs4_xattr_free_cache_cb); 642 643 return ret; 644 } 645 646 /* 647 * Retrieve a cached list of xattrs from the cache. 648 */ 649 ssize_t nfs4_xattr_cache_list(struct inode *inode, char *buf, ssize_t buflen) 650 { 651 struct nfs4_xattr_cache *cache; 652 struct nfs4_xattr_entry *entry; 653 ssize_t ret; 654 655 cache = nfs4_xattr_get_cache(inode, 0); 656 if (cache == NULL) 657 return -ENOENT; 658 659 spin_lock(&cache->listxattr_lock); 660 661 entry = cache->listxattr; 662 663 if (entry != NULL && entry != ERR_PTR(-ESTALE)) { 664 if (buflen == 0) { 665 /* Length probe only */ 666 ret = entry->xattr_size; 667 } else if (entry->xattr_size > buflen) 668 ret = -ERANGE; 669 else { 670 memcpy(buf, entry->xattr_value, entry->xattr_size); 671 ret = entry->xattr_size; 672 } 673 } else { 674 ret = -ENOENT; 675 } 676 677 spin_unlock(&cache->listxattr_lock); 678 679 kref_put(&cache->ref, nfs4_xattr_free_cache_cb); 680 681 return ret; 682 } 683 684 /* 685 * Add an xattr to the cache. 686 * 687 * This also invalidates the xattr list cache. 688 */ 689 void nfs4_xattr_cache_add(struct inode *inode, const char *name, 690 const char *buf, struct page **pages, ssize_t buflen) 691 { 692 struct nfs4_xattr_cache *cache; 693 struct nfs4_xattr_entry *entry; 694 695 dprintk("%s: add '%s' len %lu\n", __func__, 696 name, (unsigned long)buflen); 697 698 cache = nfs4_xattr_get_cache(inode, 1); 699 if (cache == NULL) 700 return; 701 702 entry = nfs4_xattr_alloc_entry(name, buf, pages, buflen); 703 if (entry == NULL) 704 goto out; 705 706 (void)nfs4_xattr_set_listcache(cache, NULL); 707 708 if (!nfs4_xattr_hash_add(cache, entry)) 709 kref_put(&entry->ref, nfs4_xattr_free_entry_cb); 710 711 out: 712 kref_put(&cache->ref, nfs4_xattr_free_cache_cb); 713 } 714 715 716 /* 717 * Remove an xattr from the cache. 718 * 719 * This also invalidates the xattr list cache. 720 */ 721 void nfs4_xattr_cache_remove(struct inode *inode, const char *name) 722 { 723 struct nfs4_xattr_cache *cache; 724 725 dprintk("%s: remove '%s'\n", __func__, name); 726 727 cache = nfs4_xattr_get_cache(inode, 0); 728 if (cache == NULL) 729 return; 730 731 (void)nfs4_xattr_set_listcache(cache, NULL); 732 nfs4_xattr_hash_remove(cache, name); 733 734 kref_put(&cache->ref, nfs4_xattr_free_cache_cb); 735 } 736 737 /* 738 * Cache listxattr output, replacing any possible old one. 739 */ 740 void nfs4_xattr_cache_set_list(struct inode *inode, const char *buf, 741 ssize_t buflen) 742 { 743 struct nfs4_xattr_cache *cache; 744 struct nfs4_xattr_entry *entry; 745 746 cache = nfs4_xattr_get_cache(inode, 1); 747 if (cache == NULL) 748 return; 749 750 entry = nfs4_xattr_alloc_entry(NULL, buf, NULL, buflen); 751 if (entry == NULL) 752 goto out; 753 754 /* 755 * This is just there to be able to get to bucket->cache, 756 * which is obviously the same for all buckets, so just 757 * use bucket 0. 758 */ 759 entry->bucket = &cache->buckets[0]; 760 761 if (!nfs4_xattr_set_listcache(cache, entry)) 762 kref_put(&entry->ref, nfs4_xattr_free_entry_cb); 763 764 out: 765 kref_put(&cache->ref, nfs4_xattr_free_cache_cb); 766 } 767 768 /* 769 * Zap the entire cache. Called when an inode is evicted. 770 */ 771 void nfs4_xattr_cache_zap(struct inode *inode) 772 { 773 struct nfs4_xattr_cache *oldcache; 774 775 spin_lock(&inode->i_lock); 776 oldcache = nfs4_xattr_cache_unlink(inode); 777 spin_unlock(&inode->i_lock); 778 779 if (oldcache) 780 nfs4_xattr_discard_cache(oldcache); 781 } 782 783 /* 784 * The entry LRU is shrunk more aggressively than the cache LRU, 785 * by settings @seeks to 1. 786 * 787 * Cache structures are freed only when they've become empty, after 788 * pruning all but one entry. 789 */ 790 791 static unsigned long nfs4_xattr_cache_count(struct shrinker *shrink, 792 struct shrink_control *sc); 793 static unsigned long nfs4_xattr_entry_count(struct shrinker *shrink, 794 struct shrink_control *sc); 795 static unsigned long nfs4_xattr_cache_scan(struct shrinker *shrink, 796 struct shrink_control *sc); 797 static unsigned long nfs4_xattr_entry_scan(struct shrinker *shrink, 798 struct shrink_control *sc); 799 800 static struct shrinker nfs4_xattr_cache_shrinker = { 801 .count_objects = nfs4_xattr_cache_count, 802 .scan_objects = nfs4_xattr_cache_scan, 803 .seeks = DEFAULT_SEEKS, 804 .flags = SHRINKER_MEMCG_AWARE, 805 }; 806 807 static struct shrinker nfs4_xattr_entry_shrinker = { 808 .count_objects = nfs4_xattr_entry_count, 809 .scan_objects = nfs4_xattr_entry_scan, 810 .seeks = DEFAULT_SEEKS, 811 .batch = 512, 812 .flags = SHRINKER_MEMCG_AWARE, 813 }; 814 815 static struct shrinker nfs4_xattr_large_entry_shrinker = { 816 .count_objects = nfs4_xattr_entry_count, 817 .scan_objects = nfs4_xattr_entry_scan, 818 .seeks = 1, 819 .batch = 512, 820 .flags = SHRINKER_MEMCG_AWARE, 821 }; 822 823 static enum lru_status 824 cache_lru_isolate(struct list_head *item, 825 struct list_lru_one *lru, spinlock_t *lru_lock, void *arg) 826 { 827 struct list_head *dispose = arg; 828 struct inode *inode; 829 struct nfs4_xattr_cache *cache = container_of(item, 830 struct nfs4_xattr_cache, lru); 831 832 if (atomic_long_read(&cache->nent) > 1) 833 return LRU_SKIP; 834 835 /* 836 * If a cache structure is on the LRU list, we know that 837 * its inode is valid. Try to lock it to break the link. 838 * Since we're inverting the lock order here, only try. 839 */ 840 inode = cache->inode; 841 842 if (!spin_trylock(&inode->i_lock)) 843 return LRU_SKIP; 844 845 kref_get(&cache->ref); 846 847 cache->inode = NULL; 848 NFS_I(inode)->xattr_cache = NULL; 849 NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_XATTR; 850 list_lru_isolate(lru, &cache->lru); 851 852 spin_unlock(&inode->i_lock); 853 854 list_add_tail(&cache->dispose, dispose); 855 return LRU_REMOVED; 856 } 857 858 static unsigned long 859 nfs4_xattr_cache_scan(struct shrinker *shrink, struct shrink_control *sc) 860 { 861 LIST_HEAD(dispose); 862 unsigned long freed; 863 struct nfs4_xattr_cache *cache; 864 865 freed = list_lru_shrink_walk(&nfs4_xattr_cache_lru, sc, 866 cache_lru_isolate, &dispose); 867 while (!list_empty(&dispose)) { 868 cache = list_first_entry(&dispose, struct nfs4_xattr_cache, 869 dispose); 870 list_del_init(&cache->dispose); 871 nfs4_xattr_discard_cache(cache); 872 kref_put(&cache->ref, nfs4_xattr_free_cache_cb); 873 } 874 875 return freed; 876 } 877 878 879 static unsigned long 880 nfs4_xattr_cache_count(struct shrinker *shrink, struct shrink_control *sc) 881 { 882 unsigned long count; 883 884 count = list_lru_shrink_count(&nfs4_xattr_cache_lru, sc); 885 return vfs_pressure_ratio(count); 886 } 887 888 static enum lru_status 889 entry_lru_isolate(struct list_head *item, 890 struct list_lru_one *lru, spinlock_t *lru_lock, void *arg) 891 { 892 struct list_head *dispose = arg; 893 struct nfs4_xattr_bucket *bucket; 894 struct nfs4_xattr_cache *cache; 895 struct nfs4_xattr_entry *entry = container_of(item, 896 struct nfs4_xattr_entry, lru); 897 898 bucket = entry->bucket; 899 cache = bucket->cache; 900 901 /* 902 * Unhook the entry from its parent (either a cache bucket 903 * or a cache structure if it's a listxattr buf), so that 904 * it's no longer found. Then add it to the isolate list, 905 * to be freed later. 906 * 907 * In both cases, we're reverting lock order, so use 908 * trylock and skip the entry if we can't get the lock. 909 */ 910 if (entry->xattr_name != NULL) { 911 /* Regular cache entry */ 912 if (!spin_trylock(&bucket->lock)) 913 return LRU_SKIP; 914 915 kref_get(&entry->ref); 916 917 hlist_del_init(&entry->hnode); 918 atomic_long_dec(&cache->nent); 919 list_lru_isolate(lru, &entry->lru); 920 921 spin_unlock(&bucket->lock); 922 } else { 923 /* Listxattr cache entry */ 924 if (!spin_trylock(&cache->listxattr_lock)) 925 return LRU_SKIP; 926 927 kref_get(&entry->ref); 928 929 cache->listxattr = NULL; 930 list_lru_isolate(lru, &entry->lru); 931 932 spin_unlock(&cache->listxattr_lock); 933 } 934 935 list_add_tail(&entry->dispose, dispose); 936 return LRU_REMOVED; 937 } 938 939 static unsigned long 940 nfs4_xattr_entry_scan(struct shrinker *shrink, struct shrink_control *sc) 941 { 942 LIST_HEAD(dispose); 943 unsigned long freed; 944 struct nfs4_xattr_entry *entry; 945 struct list_lru *lru; 946 947 lru = (shrink == &nfs4_xattr_large_entry_shrinker) ? 948 &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; 949 950 freed = list_lru_shrink_walk(lru, sc, entry_lru_isolate, &dispose); 951 952 while (!list_empty(&dispose)) { 953 entry = list_first_entry(&dispose, struct nfs4_xattr_entry, 954 dispose); 955 list_del_init(&entry->dispose); 956 957 /* 958 * Drop two references: the one that we just grabbed 959 * in entry_lru_isolate, and the one that was set 960 * when the entry was first allocated. 961 */ 962 kref_put(&entry->ref, nfs4_xattr_free_entry_cb); 963 kref_put(&entry->ref, nfs4_xattr_free_entry_cb); 964 } 965 966 return freed; 967 } 968 969 static unsigned long 970 nfs4_xattr_entry_count(struct shrinker *shrink, struct shrink_control *sc) 971 { 972 unsigned long count; 973 struct list_lru *lru; 974 975 lru = (shrink == &nfs4_xattr_large_entry_shrinker) ? 976 &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; 977 978 count = list_lru_shrink_count(lru, sc); 979 return vfs_pressure_ratio(count); 980 } 981 982 983 static void nfs4_xattr_cache_init_once(void *p) 984 { 985 struct nfs4_xattr_cache *cache = (struct nfs4_xattr_cache *)p; 986 987 spin_lock_init(&cache->listxattr_lock); 988 atomic_long_set(&cache->nent, 0); 989 nfs4_xattr_hash_init(cache); 990 cache->listxattr = NULL; 991 INIT_LIST_HEAD(&cache->lru); 992 INIT_LIST_HEAD(&cache->dispose); 993 } 994 995 int __init nfs4_xattr_cache_init(void) 996 { 997 int ret = 0; 998 999 nfs4_xattr_cache_cachep = kmem_cache_create("nfs4_xattr_cache_cache", 1000 sizeof(struct nfs4_xattr_cache), 0, 1001 (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT), 1002 nfs4_xattr_cache_init_once); 1003 if (nfs4_xattr_cache_cachep == NULL) 1004 return -ENOMEM; 1005 1006 ret = list_lru_init_memcg(&nfs4_xattr_large_entry_lru, 1007 &nfs4_xattr_large_entry_shrinker); 1008 if (ret) 1009 goto out4; 1010 1011 ret = list_lru_init_memcg(&nfs4_xattr_entry_lru, 1012 &nfs4_xattr_entry_shrinker); 1013 if (ret) 1014 goto out3; 1015 1016 ret = list_lru_init_memcg(&nfs4_xattr_cache_lru, 1017 &nfs4_xattr_cache_shrinker); 1018 if (ret) 1019 goto out2; 1020 1021 ret = register_shrinker(&nfs4_xattr_cache_shrinker); 1022 if (ret) 1023 goto out1; 1024 1025 ret = register_shrinker(&nfs4_xattr_entry_shrinker); 1026 if (ret) 1027 goto out; 1028 1029 ret = register_shrinker(&nfs4_xattr_large_entry_shrinker); 1030 if (!ret) 1031 return 0; 1032 1033 unregister_shrinker(&nfs4_xattr_entry_shrinker); 1034 out: 1035 unregister_shrinker(&nfs4_xattr_cache_shrinker); 1036 out1: 1037 list_lru_destroy(&nfs4_xattr_cache_lru); 1038 out2: 1039 list_lru_destroy(&nfs4_xattr_entry_lru); 1040 out3: 1041 list_lru_destroy(&nfs4_xattr_large_entry_lru); 1042 out4: 1043 kmem_cache_destroy(nfs4_xattr_cache_cachep); 1044 1045 return ret; 1046 } 1047 1048 void nfs4_xattr_cache_exit(void) 1049 { 1050 unregister_shrinker(&nfs4_xattr_entry_shrinker); 1051 unregister_shrinker(&nfs4_xattr_cache_shrinker); 1052 list_lru_destroy(&nfs4_xattr_entry_lru); 1053 list_lru_destroy(&nfs4_xattr_cache_lru); 1054 kmem_cache_destroy(nfs4_xattr_cache_cachep); 1055 } 1056