1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2019, 2020 Amazon.com, Inc. or its affiliates. All rights reserved. 5 * 6 * User extended attribute client side cache functions. 7 * 8 * Author: Frank van der Linden <fllinden@amazon.com> 9 */ 10 #include <linux/errno.h> 11 #include <linux/nfs_fs.h> 12 #include <linux/hashtable.h> 13 #include <linux/refcount.h> 14 #include <uapi/linux/xattr.h> 15 16 #include "nfs4_fs.h" 17 #include "internal.h" 18 19 /* 20 * User extended attributes client side caching is implemented by having 21 * a cache structure attached to NFS inodes. This structure is allocated 22 * when needed, and freed when the cache is zapped. 23 * 24 * The cache structure contains as hash table of entries, and a pointer 25 * to a special-cased entry for the listxattr cache. 26 * 27 * Accessing and allocating / freeing the caches is done via reference 28 * counting. The cache entries use a similar refcounting scheme. 29 * 30 * This makes freeing a cache, both from the shrinker and from the 31 * zap cache path, easy. It also means that, in current use cases, 32 * the large majority of inodes will not waste any memory, as they 33 * will never have any user extended attributes assigned to them. 34 * 35 * Attribute entries are hashed in to a simple hash table. They are 36 * also part of an LRU. 37 * 38 * There are three shrinkers. 39 * 40 * Two shrinkers deal with the cache entries themselves: one for 41 * large entries (> PAGE_SIZE), and one for smaller entries. The 42 * shrinker for the larger entries works more aggressively than 43 * those for the smaller entries. 44 * 45 * The other shrinker frees the cache structures themselves. 46 */ 47 48 /* 49 * 64 buckets is a good default. There is likely no reasonable 50 * workload that uses more than even 64 user extended attributes. 51 * You can certainly add a lot more - but you get what you ask for 52 * in those circumstances. 53 */ 54 #define NFS4_XATTR_HASH_SIZE 64 55 56 #define NFSDBG_FACILITY NFSDBG_XATTRCACHE 57 58 struct nfs4_xattr_cache; 59 struct nfs4_xattr_entry; 60 61 struct nfs4_xattr_bucket { 62 spinlock_t lock; 63 struct hlist_head hlist; 64 struct nfs4_xattr_cache *cache; 65 bool draining; 66 }; 67 68 struct nfs4_xattr_cache { 69 struct kref ref; 70 spinlock_t hash_lock; /* protects hashtable and lru */ 71 struct nfs4_xattr_bucket buckets[NFS4_XATTR_HASH_SIZE]; 72 struct list_head lru; 73 struct list_head dispose; 74 atomic_long_t nent; 75 spinlock_t listxattr_lock; 76 struct inode *inode; 77 struct nfs4_xattr_entry *listxattr; 78 }; 79 80 struct nfs4_xattr_entry { 81 struct kref ref; 82 struct hlist_node hnode; 83 struct list_head lru; 84 struct list_head dispose; 85 char *xattr_name; 86 void *xattr_value; 87 size_t xattr_size; 88 struct nfs4_xattr_bucket *bucket; 89 uint32_t flags; 90 }; 91 92 #define NFS4_XATTR_ENTRY_EXTVAL 0x0001 93 94 /* 95 * LRU list of NFS inodes that have xattr caches. 96 */ 97 static struct list_lru nfs4_xattr_cache_lru; 98 static struct list_lru nfs4_xattr_entry_lru; 99 static struct list_lru nfs4_xattr_large_entry_lru; 100 101 static struct kmem_cache *nfs4_xattr_cache_cachep; 102 103 /* 104 * Hashing helper functions. 105 */ 106 static void 107 nfs4_xattr_hash_init(struct nfs4_xattr_cache *cache) 108 { 109 unsigned int i; 110 111 for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) { 112 INIT_HLIST_HEAD(&cache->buckets[i].hlist); 113 spin_lock_init(&cache->buckets[i].lock); 114 cache->buckets[i].cache = cache; 115 cache->buckets[i].draining = false; 116 } 117 } 118 119 /* 120 * Locking order: 121 * 1. inode i_lock or bucket lock 122 * 2. list_lru lock (taken by list_lru_* functions) 123 */ 124 125 /* 126 * Wrapper functions to add a cache entry to the right LRU. 127 */ 128 static bool 129 nfs4_xattr_entry_lru_add(struct nfs4_xattr_entry *entry) 130 { 131 struct list_lru *lru; 132 133 lru = (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) ? 134 &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; 135 136 return list_lru_add(lru, &entry->lru); 137 } 138 139 static bool 140 nfs4_xattr_entry_lru_del(struct nfs4_xattr_entry *entry) 141 { 142 struct list_lru *lru; 143 144 lru = (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) ? 145 &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; 146 147 return list_lru_del(lru, &entry->lru); 148 } 149 150 /* 151 * This function allocates cache entries. They are the normal 152 * extended attribute name/value pairs, but may also be a listxattr 153 * cache. Those allocations use the same entry so that they can be 154 * treated as one by the memory shrinker. 155 * 156 * xattr cache entries are allocated together with names. If the 157 * value fits in to one page with the entry structure and the name, 158 * it will also be part of the same allocation (kmalloc). This is 159 * expected to be the vast majority of cases. Larger allocations 160 * have a value pointer that is allocated separately by kvmalloc. 161 * 162 * Parameters: 163 * 164 * @name: Name of the extended attribute. NULL for listxattr cache 165 * entry. 166 * @value: Value of attribute, or listxattr cache. NULL if the 167 * value is to be copied from pages instead. 168 * @pages: Pages to copy the value from, if not NULL. Passed in to 169 * make it easier to copy the value after an RPC, even if 170 * the value will not be passed up to application (e.g. 171 * for a 'query' getxattr with NULL buffer). 172 * @len: Length of the value. Can be 0 for zero-length attribues. 173 * @value and @pages will be NULL if @len is 0. 174 */ 175 static struct nfs4_xattr_entry * 176 nfs4_xattr_alloc_entry(const char *name, const void *value, 177 struct page **pages, size_t len) 178 { 179 struct nfs4_xattr_entry *entry; 180 void *valp; 181 char *namep; 182 size_t alloclen, slen; 183 char *buf; 184 uint32_t flags; 185 186 BUILD_BUG_ON(sizeof(struct nfs4_xattr_entry) + 187 XATTR_NAME_MAX + 1 > PAGE_SIZE); 188 189 alloclen = sizeof(struct nfs4_xattr_entry); 190 if (name != NULL) { 191 slen = strlen(name) + 1; 192 alloclen += slen; 193 } else 194 slen = 0; 195 196 if (alloclen + len <= PAGE_SIZE) { 197 alloclen += len; 198 flags = 0; 199 } else { 200 flags = NFS4_XATTR_ENTRY_EXTVAL; 201 } 202 203 buf = kmalloc(alloclen, GFP_KERNEL_ACCOUNT | GFP_NOFS); 204 if (buf == NULL) 205 return NULL; 206 entry = (struct nfs4_xattr_entry *)buf; 207 208 if (name != NULL) { 209 namep = buf + sizeof(struct nfs4_xattr_entry); 210 memcpy(namep, name, slen); 211 } else { 212 namep = NULL; 213 } 214 215 216 if (flags & NFS4_XATTR_ENTRY_EXTVAL) { 217 valp = kvmalloc(len, GFP_KERNEL_ACCOUNT | GFP_NOFS); 218 if (valp == NULL) { 219 kfree(buf); 220 return NULL; 221 } 222 } else if (len != 0) { 223 valp = buf + sizeof(struct nfs4_xattr_entry) + slen; 224 } else 225 valp = NULL; 226 227 if (valp != NULL) { 228 if (value != NULL) 229 memcpy(valp, value, len); 230 else 231 _copy_from_pages(valp, pages, 0, len); 232 } 233 234 entry->flags = flags; 235 entry->xattr_value = valp; 236 kref_init(&entry->ref); 237 entry->xattr_name = namep; 238 entry->xattr_size = len; 239 entry->bucket = NULL; 240 INIT_LIST_HEAD(&entry->lru); 241 INIT_LIST_HEAD(&entry->dispose); 242 INIT_HLIST_NODE(&entry->hnode); 243 244 return entry; 245 } 246 247 static void 248 nfs4_xattr_free_entry(struct nfs4_xattr_entry *entry) 249 { 250 if (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) 251 kvfree(entry->xattr_value); 252 kfree(entry); 253 } 254 255 static void 256 nfs4_xattr_free_entry_cb(struct kref *kref) 257 { 258 struct nfs4_xattr_entry *entry; 259 260 entry = container_of(kref, struct nfs4_xattr_entry, ref); 261 262 if (WARN_ON(!list_empty(&entry->lru))) 263 return; 264 265 nfs4_xattr_free_entry(entry); 266 } 267 268 static void 269 nfs4_xattr_free_cache_cb(struct kref *kref) 270 { 271 struct nfs4_xattr_cache *cache; 272 int i; 273 274 cache = container_of(kref, struct nfs4_xattr_cache, ref); 275 276 for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) { 277 if (WARN_ON(!hlist_empty(&cache->buckets[i].hlist))) 278 return; 279 cache->buckets[i].draining = false; 280 } 281 282 cache->listxattr = NULL; 283 284 kmem_cache_free(nfs4_xattr_cache_cachep, cache); 285 286 } 287 288 static struct nfs4_xattr_cache * 289 nfs4_xattr_alloc_cache(void) 290 { 291 struct nfs4_xattr_cache *cache; 292 293 cache = kmem_cache_alloc(nfs4_xattr_cache_cachep, 294 GFP_KERNEL_ACCOUNT | GFP_NOFS); 295 if (cache == NULL) 296 return NULL; 297 298 kref_init(&cache->ref); 299 atomic_long_set(&cache->nent, 0); 300 301 return cache; 302 } 303 304 /* 305 * Set the listxattr cache, which is a special-cased cache entry. 306 * The special value ERR_PTR(-ESTALE) is used to indicate that 307 * the cache is being drained - this prevents a new listxattr 308 * cache from being added to what is now a stale cache. 309 */ 310 static int 311 nfs4_xattr_set_listcache(struct nfs4_xattr_cache *cache, 312 struct nfs4_xattr_entry *new) 313 { 314 struct nfs4_xattr_entry *old; 315 int ret = 1; 316 317 spin_lock(&cache->listxattr_lock); 318 319 old = cache->listxattr; 320 321 if (old == ERR_PTR(-ESTALE)) { 322 ret = 0; 323 goto out; 324 } 325 326 cache->listxattr = new; 327 if (new != NULL && new != ERR_PTR(-ESTALE)) 328 nfs4_xattr_entry_lru_add(new); 329 330 if (old != NULL) { 331 nfs4_xattr_entry_lru_del(old); 332 kref_put(&old->ref, nfs4_xattr_free_entry_cb); 333 } 334 out: 335 spin_unlock(&cache->listxattr_lock); 336 337 return ret; 338 } 339 340 /* 341 * Unlink a cache from its parent inode, clearing out an invalid 342 * cache. Must be called with i_lock held. 343 */ 344 static struct nfs4_xattr_cache * 345 nfs4_xattr_cache_unlink(struct inode *inode) 346 { 347 struct nfs_inode *nfsi; 348 struct nfs4_xattr_cache *oldcache; 349 350 nfsi = NFS_I(inode); 351 352 oldcache = nfsi->xattr_cache; 353 if (oldcache != NULL) { 354 list_lru_del(&nfs4_xattr_cache_lru, &oldcache->lru); 355 oldcache->inode = NULL; 356 } 357 nfsi->xattr_cache = NULL; 358 nfsi->cache_validity &= ~NFS_INO_INVALID_XATTR; 359 360 return oldcache; 361 362 } 363 364 /* 365 * Discard a cache. Called by get_cache() if there was an old, 366 * invalid cache. Can also be called from a shrinker callback. 367 * 368 * The cache is dead, it has already been unlinked from its inode, 369 * and no longer appears on the cache LRU list. 370 * 371 * Mark all buckets as draining, so that no new entries are added. This 372 * could still happen in the unlikely, but possible case that another 373 * thread had grabbed a reference before it was unlinked from the inode, 374 * and is still holding it for an add operation. 375 * 376 * Remove all entries from the LRU lists, so that there is no longer 377 * any way to 'find' this cache. Then, remove the entries from the hash 378 * table. 379 * 380 * At that point, the cache will remain empty and can be freed when the final 381 * reference drops, which is very likely the kref_put at the end of 382 * this function, or the one called immediately afterwards in the 383 * shrinker callback. 384 */ 385 static void 386 nfs4_xattr_discard_cache(struct nfs4_xattr_cache *cache) 387 { 388 unsigned int i; 389 struct nfs4_xattr_entry *entry; 390 struct nfs4_xattr_bucket *bucket; 391 struct hlist_node *n; 392 393 nfs4_xattr_set_listcache(cache, ERR_PTR(-ESTALE)); 394 395 for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) { 396 bucket = &cache->buckets[i]; 397 398 spin_lock(&bucket->lock); 399 bucket->draining = true; 400 hlist_for_each_entry_safe(entry, n, &bucket->hlist, hnode) { 401 nfs4_xattr_entry_lru_del(entry); 402 hlist_del_init(&entry->hnode); 403 kref_put(&entry->ref, nfs4_xattr_free_entry_cb); 404 } 405 spin_unlock(&bucket->lock); 406 } 407 408 atomic_long_set(&cache->nent, 0); 409 410 kref_put(&cache->ref, nfs4_xattr_free_cache_cb); 411 } 412 413 /* 414 * Get a referenced copy of the cache structure. Avoid doing allocs 415 * while holding i_lock. Which means that we do some optimistic allocation, 416 * and might have to free the result in rare cases. 417 * 418 * This function only checks the NFS_INO_INVALID_XATTR cache validity bit 419 * and acts accordingly, replacing the cache when needed. For the read case 420 * (!add), this means that the caller must make sure that the cache 421 * is valid before caling this function. getxattr and listxattr call 422 * revalidate_inode to do this. The attribute cache timeout (for the 423 * non-delegated case) is expected to be dealt with in the revalidate 424 * call. 425 */ 426 427 static struct nfs4_xattr_cache * 428 nfs4_xattr_get_cache(struct inode *inode, int add) 429 { 430 struct nfs_inode *nfsi; 431 struct nfs4_xattr_cache *cache, *oldcache, *newcache; 432 433 nfsi = NFS_I(inode); 434 435 cache = oldcache = NULL; 436 437 spin_lock(&inode->i_lock); 438 439 if (nfsi->cache_validity & NFS_INO_INVALID_XATTR) 440 oldcache = nfs4_xattr_cache_unlink(inode); 441 else 442 cache = nfsi->xattr_cache; 443 444 if (cache != NULL) 445 kref_get(&cache->ref); 446 447 spin_unlock(&inode->i_lock); 448 449 if (add && cache == NULL) { 450 newcache = NULL; 451 452 cache = nfs4_xattr_alloc_cache(); 453 if (cache == NULL) 454 goto out; 455 456 spin_lock(&inode->i_lock); 457 if (nfsi->cache_validity & NFS_INO_INVALID_XATTR) { 458 /* 459 * The cache was invalidated again. Give up, 460 * since what we want to enter is now likely 461 * outdated anyway. 462 */ 463 spin_unlock(&inode->i_lock); 464 kref_put(&cache->ref, nfs4_xattr_free_cache_cb); 465 cache = NULL; 466 goto out; 467 } 468 469 /* 470 * Check if someone beat us to it. 471 */ 472 if (nfsi->xattr_cache != NULL) { 473 newcache = nfsi->xattr_cache; 474 kref_get(&newcache->ref); 475 } else { 476 kref_get(&cache->ref); 477 nfsi->xattr_cache = cache; 478 cache->inode = inode; 479 list_lru_add(&nfs4_xattr_cache_lru, &cache->lru); 480 } 481 482 spin_unlock(&inode->i_lock); 483 484 /* 485 * If there was a race, throw away the cache we just 486 * allocated, and use the new one allocated by someone 487 * else. 488 */ 489 if (newcache != NULL) { 490 kref_put(&cache->ref, nfs4_xattr_free_cache_cb); 491 cache = newcache; 492 } 493 } 494 495 out: 496 /* 497 * Discard the now orphaned old cache. 498 */ 499 if (oldcache != NULL) 500 nfs4_xattr_discard_cache(oldcache); 501 502 return cache; 503 } 504 505 static inline struct nfs4_xattr_bucket * 506 nfs4_xattr_hash_bucket(struct nfs4_xattr_cache *cache, const char *name) 507 { 508 return &cache->buckets[jhash(name, strlen(name), 0) & 509 (ARRAY_SIZE(cache->buckets) - 1)]; 510 } 511 512 static struct nfs4_xattr_entry * 513 nfs4_xattr_get_entry(struct nfs4_xattr_bucket *bucket, const char *name) 514 { 515 struct nfs4_xattr_entry *entry; 516 517 entry = NULL; 518 519 hlist_for_each_entry(entry, &bucket->hlist, hnode) { 520 if (!strcmp(entry->xattr_name, name)) 521 break; 522 } 523 524 return entry; 525 } 526 527 static int 528 nfs4_xattr_hash_add(struct nfs4_xattr_cache *cache, 529 struct nfs4_xattr_entry *entry) 530 { 531 struct nfs4_xattr_bucket *bucket; 532 struct nfs4_xattr_entry *oldentry = NULL; 533 int ret = 1; 534 535 bucket = nfs4_xattr_hash_bucket(cache, entry->xattr_name); 536 entry->bucket = bucket; 537 538 spin_lock(&bucket->lock); 539 540 if (bucket->draining) { 541 ret = 0; 542 goto out; 543 } 544 545 oldentry = nfs4_xattr_get_entry(bucket, entry->xattr_name); 546 if (oldentry != NULL) { 547 hlist_del_init(&oldentry->hnode); 548 nfs4_xattr_entry_lru_del(oldentry); 549 } else { 550 atomic_long_inc(&cache->nent); 551 } 552 553 hlist_add_head(&entry->hnode, &bucket->hlist); 554 nfs4_xattr_entry_lru_add(entry); 555 556 out: 557 spin_unlock(&bucket->lock); 558 559 if (oldentry != NULL) 560 kref_put(&oldentry->ref, nfs4_xattr_free_entry_cb); 561 562 return ret; 563 } 564 565 static void 566 nfs4_xattr_hash_remove(struct nfs4_xattr_cache *cache, const char *name) 567 { 568 struct nfs4_xattr_bucket *bucket; 569 struct nfs4_xattr_entry *entry; 570 571 bucket = nfs4_xattr_hash_bucket(cache, name); 572 573 spin_lock(&bucket->lock); 574 575 entry = nfs4_xattr_get_entry(bucket, name); 576 if (entry != NULL) { 577 hlist_del_init(&entry->hnode); 578 nfs4_xattr_entry_lru_del(entry); 579 atomic_long_dec(&cache->nent); 580 } 581 582 spin_unlock(&bucket->lock); 583 584 if (entry != NULL) 585 kref_put(&entry->ref, nfs4_xattr_free_entry_cb); 586 } 587 588 static struct nfs4_xattr_entry * 589 nfs4_xattr_hash_find(struct nfs4_xattr_cache *cache, const char *name) 590 { 591 struct nfs4_xattr_bucket *bucket; 592 struct nfs4_xattr_entry *entry; 593 594 bucket = nfs4_xattr_hash_bucket(cache, name); 595 596 spin_lock(&bucket->lock); 597 598 entry = nfs4_xattr_get_entry(bucket, name); 599 if (entry != NULL) 600 kref_get(&entry->ref); 601 602 spin_unlock(&bucket->lock); 603 604 return entry; 605 } 606 607 /* 608 * Entry point to retrieve an entry from the cache. 609 */ 610 ssize_t nfs4_xattr_cache_get(struct inode *inode, const char *name, char *buf, 611 ssize_t buflen) 612 { 613 struct nfs4_xattr_cache *cache; 614 struct nfs4_xattr_entry *entry; 615 ssize_t ret; 616 617 cache = nfs4_xattr_get_cache(inode, 0); 618 if (cache == NULL) 619 return -ENOENT; 620 621 ret = 0; 622 entry = nfs4_xattr_hash_find(cache, name); 623 624 if (entry != NULL) { 625 dprintk("%s: cache hit '%s', len %lu\n", __func__, 626 entry->xattr_name, (unsigned long)entry->xattr_size); 627 if (buflen == 0) { 628 /* Length probe only */ 629 ret = entry->xattr_size; 630 } else if (buflen < entry->xattr_size) 631 ret = -ERANGE; 632 else { 633 memcpy(buf, entry->xattr_value, entry->xattr_size); 634 ret = entry->xattr_size; 635 } 636 kref_put(&entry->ref, nfs4_xattr_free_entry_cb); 637 } else { 638 dprintk("%s: cache miss '%s'\n", __func__, name); 639 ret = -ENOENT; 640 } 641 642 kref_put(&cache->ref, nfs4_xattr_free_cache_cb); 643 644 return ret; 645 } 646 647 /* 648 * Retrieve a cached list of xattrs from the cache. 649 */ 650 ssize_t nfs4_xattr_cache_list(struct inode *inode, char *buf, ssize_t buflen) 651 { 652 struct nfs4_xattr_cache *cache; 653 struct nfs4_xattr_entry *entry; 654 ssize_t ret; 655 656 cache = nfs4_xattr_get_cache(inode, 0); 657 if (cache == NULL) 658 return -ENOENT; 659 660 spin_lock(&cache->listxattr_lock); 661 662 entry = cache->listxattr; 663 664 if (entry != NULL && entry != ERR_PTR(-ESTALE)) { 665 if (buflen == 0) { 666 /* Length probe only */ 667 ret = entry->xattr_size; 668 } else if (entry->xattr_size > buflen) 669 ret = -ERANGE; 670 else { 671 memcpy(buf, entry->xattr_value, entry->xattr_size); 672 ret = entry->xattr_size; 673 } 674 } else { 675 ret = -ENOENT; 676 } 677 678 spin_unlock(&cache->listxattr_lock); 679 680 kref_put(&cache->ref, nfs4_xattr_free_cache_cb); 681 682 return ret; 683 } 684 685 /* 686 * Add an xattr to the cache. 687 * 688 * This also invalidates the xattr list cache. 689 */ 690 void nfs4_xattr_cache_add(struct inode *inode, const char *name, 691 const char *buf, struct page **pages, ssize_t buflen) 692 { 693 struct nfs4_xattr_cache *cache; 694 struct nfs4_xattr_entry *entry; 695 696 dprintk("%s: add '%s' len %lu\n", __func__, 697 name, (unsigned long)buflen); 698 699 cache = nfs4_xattr_get_cache(inode, 1); 700 if (cache == NULL) 701 return; 702 703 entry = nfs4_xattr_alloc_entry(name, buf, pages, buflen); 704 if (entry == NULL) 705 goto out; 706 707 (void)nfs4_xattr_set_listcache(cache, NULL); 708 709 if (!nfs4_xattr_hash_add(cache, entry)) 710 kref_put(&entry->ref, nfs4_xattr_free_entry_cb); 711 712 out: 713 kref_put(&cache->ref, nfs4_xattr_free_cache_cb); 714 } 715 716 717 /* 718 * Remove an xattr from the cache. 719 * 720 * This also invalidates the xattr list cache. 721 */ 722 void nfs4_xattr_cache_remove(struct inode *inode, const char *name) 723 { 724 struct nfs4_xattr_cache *cache; 725 726 dprintk("%s: remove '%s'\n", __func__, name); 727 728 cache = nfs4_xattr_get_cache(inode, 0); 729 if (cache == NULL) 730 return; 731 732 (void)nfs4_xattr_set_listcache(cache, NULL); 733 nfs4_xattr_hash_remove(cache, name); 734 735 kref_put(&cache->ref, nfs4_xattr_free_cache_cb); 736 } 737 738 /* 739 * Cache listxattr output, replacing any possible old one. 740 */ 741 void nfs4_xattr_cache_set_list(struct inode *inode, const char *buf, 742 ssize_t buflen) 743 { 744 struct nfs4_xattr_cache *cache; 745 struct nfs4_xattr_entry *entry; 746 747 cache = nfs4_xattr_get_cache(inode, 1); 748 if (cache == NULL) 749 return; 750 751 entry = nfs4_xattr_alloc_entry(NULL, buf, NULL, buflen); 752 if (entry == NULL) 753 goto out; 754 755 /* 756 * This is just there to be able to get to bucket->cache, 757 * which is obviously the same for all buckets, so just 758 * use bucket 0. 759 */ 760 entry->bucket = &cache->buckets[0]; 761 762 if (!nfs4_xattr_set_listcache(cache, entry)) 763 kref_put(&entry->ref, nfs4_xattr_free_entry_cb); 764 765 out: 766 kref_put(&cache->ref, nfs4_xattr_free_cache_cb); 767 } 768 769 /* 770 * Zap the entire cache. Called when an inode is evicted. 771 */ 772 void nfs4_xattr_cache_zap(struct inode *inode) 773 { 774 struct nfs4_xattr_cache *oldcache; 775 776 spin_lock(&inode->i_lock); 777 oldcache = nfs4_xattr_cache_unlink(inode); 778 spin_unlock(&inode->i_lock); 779 780 if (oldcache) 781 nfs4_xattr_discard_cache(oldcache); 782 } 783 784 /* 785 * The entry LRU is shrunk more aggressively than the cache LRU, 786 * by settings @seeks to 1. 787 * 788 * Cache structures are freed only when they've become empty, after 789 * pruning all but one entry. 790 */ 791 792 static unsigned long nfs4_xattr_cache_count(struct shrinker *shrink, 793 struct shrink_control *sc); 794 static unsigned long nfs4_xattr_entry_count(struct shrinker *shrink, 795 struct shrink_control *sc); 796 static unsigned long nfs4_xattr_cache_scan(struct shrinker *shrink, 797 struct shrink_control *sc); 798 static unsigned long nfs4_xattr_entry_scan(struct shrinker *shrink, 799 struct shrink_control *sc); 800 801 static struct shrinker nfs4_xattr_cache_shrinker = { 802 .count_objects = nfs4_xattr_cache_count, 803 .scan_objects = nfs4_xattr_cache_scan, 804 .seeks = DEFAULT_SEEKS, 805 .flags = SHRINKER_MEMCG_AWARE, 806 }; 807 808 static struct shrinker nfs4_xattr_entry_shrinker = { 809 .count_objects = nfs4_xattr_entry_count, 810 .scan_objects = nfs4_xattr_entry_scan, 811 .seeks = DEFAULT_SEEKS, 812 .batch = 512, 813 .flags = SHRINKER_MEMCG_AWARE, 814 }; 815 816 static struct shrinker nfs4_xattr_large_entry_shrinker = { 817 .count_objects = nfs4_xattr_entry_count, 818 .scan_objects = nfs4_xattr_entry_scan, 819 .seeks = 1, 820 .batch = 512, 821 .flags = SHRINKER_MEMCG_AWARE, 822 }; 823 824 static enum lru_status 825 cache_lru_isolate(struct list_head *item, 826 struct list_lru_one *lru, spinlock_t *lru_lock, void *arg) 827 { 828 struct list_head *dispose = arg; 829 struct inode *inode; 830 struct nfs4_xattr_cache *cache = container_of(item, 831 struct nfs4_xattr_cache, lru); 832 833 if (atomic_long_read(&cache->nent) > 1) 834 return LRU_SKIP; 835 836 /* 837 * If a cache structure is on the LRU list, we know that 838 * its inode is valid. Try to lock it to break the link. 839 * Since we're inverting the lock order here, only try. 840 */ 841 inode = cache->inode; 842 843 if (!spin_trylock(&inode->i_lock)) 844 return LRU_SKIP; 845 846 kref_get(&cache->ref); 847 848 cache->inode = NULL; 849 NFS_I(inode)->xattr_cache = NULL; 850 NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_XATTR; 851 list_lru_isolate(lru, &cache->lru); 852 853 spin_unlock(&inode->i_lock); 854 855 list_add_tail(&cache->dispose, dispose); 856 return LRU_REMOVED; 857 } 858 859 static unsigned long 860 nfs4_xattr_cache_scan(struct shrinker *shrink, struct shrink_control *sc) 861 { 862 LIST_HEAD(dispose); 863 unsigned long freed; 864 struct nfs4_xattr_cache *cache; 865 866 freed = list_lru_shrink_walk(&nfs4_xattr_cache_lru, sc, 867 cache_lru_isolate, &dispose); 868 while (!list_empty(&dispose)) { 869 cache = list_first_entry(&dispose, struct nfs4_xattr_cache, 870 dispose); 871 list_del_init(&cache->dispose); 872 nfs4_xattr_discard_cache(cache); 873 kref_put(&cache->ref, nfs4_xattr_free_cache_cb); 874 } 875 876 return freed; 877 } 878 879 880 static unsigned long 881 nfs4_xattr_cache_count(struct shrinker *shrink, struct shrink_control *sc) 882 { 883 unsigned long count; 884 885 count = list_lru_count(&nfs4_xattr_cache_lru); 886 return vfs_pressure_ratio(count); 887 } 888 889 static enum lru_status 890 entry_lru_isolate(struct list_head *item, 891 struct list_lru_one *lru, spinlock_t *lru_lock, void *arg) 892 { 893 struct list_head *dispose = arg; 894 struct nfs4_xattr_bucket *bucket; 895 struct nfs4_xattr_cache *cache; 896 struct nfs4_xattr_entry *entry = container_of(item, 897 struct nfs4_xattr_entry, lru); 898 899 bucket = entry->bucket; 900 cache = bucket->cache; 901 902 /* 903 * Unhook the entry from its parent (either a cache bucket 904 * or a cache structure if it's a listxattr buf), so that 905 * it's no longer found. Then add it to the isolate list, 906 * to be freed later. 907 * 908 * In both cases, we're reverting lock order, so use 909 * trylock and skip the entry if we can't get the lock. 910 */ 911 if (entry->xattr_name != NULL) { 912 /* Regular cache entry */ 913 if (!spin_trylock(&bucket->lock)) 914 return LRU_SKIP; 915 916 kref_get(&entry->ref); 917 918 hlist_del_init(&entry->hnode); 919 atomic_long_dec(&cache->nent); 920 list_lru_isolate(lru, &entry->lru); 921 922 spin_unlock(&bucket->lock); 923 } else { 924 /* Listxattr cache entry */ 925 if (!spin_trylock(&cache->listxattr_lock)) 926 return LRU_SKIP; 927 928 kref_get(&entry->ref); 929 930 cache->listxattr = NULL; 931 list_lru_isolate(lru, &entry->lru); 932 933 spin_unlock(&cache->listxattr_lock); 934 } 935 936 list_add_tail(&entry->dispose, dispose); 937 return LRU_REMOVED; 938 } 939 940 static unsigned long 941 nfs4_xattr_entry_scan(struct shrinker *shrink, struct shrink_control *sc) 942 { 943 LIST_HEAD(dispose); 944 unsigned long freed; 945 struct nfs4_xattr_entry *entry; 946 struct list_lru *lru; 947 948 lru = (shrink == &nfs4_xattr_large_entry_shrinker) ? 949 &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; 950 951 freed = list_lru_shrink_walk(lru, sc, entry_lru_isolate, &dispose); 952 953 while (!list_empty(&dispose)) { 954 entry = list_first_entry(&dispose, struct nfs4_xattr_entry, 955 dispose); 956 list_del_init(&entry->dispose); 957 958 /* 959 * Drop two references: the one that we just grabbed 960 * in entry_lru_isolate, and the one that was set 961 * when the entry was first allocated. 962 */ 963 kref_put(&entry->ref, nfs4_xattr_free_entry_cb); 964 kref_put(&entry->ref, nfs4_xattr_free_entry_cb); 965 } 966 967 return freed; 968 } 969 970 static unsigned long 971 nfs4_xattr_entry_count(struct shrinker *shrink, struct shrink_control *sc) 972 { 973 unsigned long count; 974 struct list_lru *lru; 975 976 lru = (shrink == &nfs4_xattr_large_entry_shrinker) ? 977 &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; 978 979 count = list_lru_count(lru); 980 return vfs_pressure_ratio(count); 981 } 982 983 984 static void nfs4_xattr_cache_init_once(void *p) 985 { 986 struct nfs4_xattr_cache *cache = (struct nfs4_xattr_cache *)p; 987 988 spin_lock_init(&cache->listxattr_lock); 989 atomic_long_set(&cache->nent, 0); 990 nfs4_xattr_hash_init(cache); 991 cache->listxattr = NULL; 992 INIT_LIST_HEAD(&cache->lru); 993 INIT_LIST_HEAD(&cache->dispose); 994 } 995 996 int __init nfs4_xattr_cache_init(void) 997 { 998 int ret = 0; 999 1000 nfs4_xattr_cache_cachep = kmem_cache_create("nfs4_xattr_cache_cache", 1001 sizeof(struct nfs4_xattr_cache), 0, 1002 (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT), 1003 nfs4_xattr_cache_init_once); 1004 if (nfs4_xattr_cache_cachep == NULL) 1005 return -ENOMEM; 1006 1007 ret = list_lru_init_memcg(&nfs4_xattr_large_entry_lru, 1008 &nfs4_xattr_large_entry_shrinker); 1009 if (ret) 1010 goto out4; 1011 1012 ret = list_lru_init_memcg(&nfs4_xattr_entry_lru, 1013 &nfs4_xattr_entry_shrinker); 1014 if (ret) 1015 goto out3; 1016 1017 ret = list_lru_init_memcg(&nfs4_xattr_cache_lru, 1018 &nfs4_xattr_cache_shrinker); 1019 if (ret) 1020 goto out2; 1021 1022 ret = register_shrinker(&nfs4_xattr_cache_shrinker); 1023 if (ret) 1024 goto out1; 1025 1026 ret = register_shrinker(&nfs4_xattr_entry_shrinker); 1027 if (ret) 1028 goto out; 1029 1030 ret = register_shrinker(&nfs4_xattr_large_entry_shrinker); 1031 if (!ret) 1032 return 0; 1033 1034 unregister_shrinker(&nfs4_xattr_entry_shrinker); 1035 out: 1036 unregister_shrinker(&nfs4_xattr_cache_shrinker); 1037 out1: 1038 list_lru_destroy(&nfs4_xattr_cache_lru); 1039 out2: 1040 list_lru_destroy(&nfs4_xattr_entry_lru); 1041 out3: 1042 list_lru_destroy(&nfs4_xattr_large_entry_lru); 1043 out4: 1044 kmem_cache_destroy(nfs4_xattr_cache_cachep); 1045 1046 return ret; 1047 } 1048 1049 void nfs4_xattr_cache_exit(void) 1050 { 1051 unregister_shrinker(&nfs4_xattr_entry_shrinker); 1052 unregister_shrinker(&nfs4_xattr_cache_shrinker); 1053 list_lru_destroy(&nfs4_xattr_entry_lru); 1054 list_lru_destroy(&nfs4_xattr_cache_lru); 1055 kmem_cache_destroy(nfs4_xattr_cache_cachep); 1056 } 1057