1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 lru_cache.c 4 5 This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 6 7 Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. 8 Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>. 9 Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 10 11 12 */ 13 14 #include <linux/module.h> 15 #include <linux/bitops.h> 16 #include <linux/slab.h> 17 #include <linux/string.h> /* for memset */ 18 #include <linux/seq_file.h> /* for seq_printf */ 19 #include <linux/lru_cache.h> 20 21 MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, " 22 "Lars Ellenberg <lars@linbit.com>"); 23 MODULE_DESCRIPTION("lru_cache - Track sets of hot objects"); 24 MODULE_LICENSE("GPL"); 25 26 /* this is developers aid only. 27 * it catches concurrent access (lack of locking on the users part) */ 28 #define PARANOIA_ENTRY() do { \ 29 BUG_ON(!lc); \ 30 BUG_ON(!lc->nr_elements); \ 31 BUG_ON(test_and_set_bit(__LC_PARANOIA, &lc->flags)); \ 32 } while (0) 33 34 #define RETURN(x...) do { \ 35 clear_bit_unlock(__LC_PARANOIA, &lc->flags); \ 36 return x ; } while (0) 37 38 /* BUG() if e is not one of the elements tracked by lc */ 39 #define PARANOIA_LC_ELEMENT(lc, e) do { \ 40 struct lru_cache *lc_ = (lc); \ 41 struct lc_element *e_ = (e); \ 42 unsigned i = e_->lc_index; \ 43 BUG_ON(i >= lc_->nr_elements); \ 44 BUG_ON(lc_->lc_element[i] != e_); } while (0) 45 46 47 /* We need to atomically 48 * - try to grab the lock (set LC_LOCKED) 49 * - only if there is no pending transaction 50 * (neither LC_DIRTY nor LC_STARVING is set) 51 * Because of PARANOIA_ENTRY() above abusing lc->flags as well, 52 * it is not sufficient to just say 53 * return 0 == cmpxchg(&lc->flags, 0, LC_LOCKED); 54 */ 55 int lc_try_lock(struct lru_cache *lc) 56 { 57 unsigned long val; 58 do { 59 val = cmpxchg(&lc->flags, 0, LC_LOCKED); 60 } while (unlikely (val == LC_PARANOIA)); 61 /* Spin until no-one is inside a PARANOIA_ENTRY()/RETURN() section. */ 62 return 0 == val; 63 #if 0 64 /* Alternative approach, spin in case someone enters or leaves a 65 * PARANOIA_ENTRY()/RETURN() section. */ 66 unsigned long old, new, val; 67 do { 68 old = lc->flags & LC_PARANOIA; 69 new = old | LC_LOCKED; 70 val = cmpxchg(&lc->flags, old, new); 71 } while (unlikely (val == (old ^ LC_PARANOIA))); 72 return old == val; 73 #endif 74 } 75 76 /** 77 * lc_create - prepares to track objects in an active set 78 * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details 79 * @max_pending_changes: maximum changes to accumulate until a transaction is required 80 * @e_count: number of elements allowed to be active simultaneously 81 * @e_size: size of the tracked objects 82 * @e_off: offset to the &struct lc_element member in a tracked object 83 * 84 * Returns a pointer to a newly initialized struct lru_cache on success, 85 * or NULL on (allocation) failure. 86 */ 87 struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, 88 unsigned max_pending_changes, 89 unsigned e_count, size_t e_size, size_t e_off) 90 { 91 struct hlist_head *slot = NULL; 92 struct lc_element **element = NULL; 93 struct lru_cache *lc; 94 struct lc_element *e; 95 unsigned cache_obj_size = kmem_cache_size(cache); 96 unsigned i; 97 98 WARN_ON(cache_obj_size < e_size); 99 if (cache_obj_size < e_size) 100 return NULL; 101 102 /* e_count too big; would probably fail the allocation below anyways. 103 * for typical use cases, e_count should be few thousand at most. */ 104 if (e_count > LC_MAX_ACTIVE) 105 return NULL; 106 107 slot = kcalloc(e_count, sizeof(struct hlist_head), GFP_KERNEL); 108 if (!slot) 109 goto out_fail; 110 element = kcalloc(e_count, sizeof(struct lc_element *), GFP_KERNEL); 111 if (!element) 112 goto out_fail; 113 114 lc = kzalloc(sizeof(*lc), GFP_KERNEL); 115 if (!lc) 116 goto out_fail; 117 118 INIT_LIST_HEAD(&lc->in_use); 119 INIT_LIST_HEAD(&lc->lru); 120 INIT_LIST_HEAD(&lc->free); 121 INIT_LIST_HEAD(&lc->to_be_changed); 122 123 lc->name = name; 124 lc->element_size = e_size; 125 lc->element_off = e_off; 126 lc->nr_elements = e_count; 127 lc->max_pending_changes = max_pending_changes; 128 lc->lc_cache = cache; 129 lc->lc_element = element; 130 lc->lc_slot = slot; 131 132 /* preallocate all objects */ 133 for (i = 0; i < e_count; i++) { 134 void *p = kmem_cache_alloc(cache, GFP_KERNEL); 135 if (!p) 136 break; 137 memset(p, 0, lc->element_size); 138 e = p + e_off; 139 e->lc_index = i; 140 e->lc_number = LC_FREE; 141 e->lc_new_number = LC_FREE; 142 list_add(&e->list, &lc->free); 143 element[i] = e; 144 } 145 if (i == e_count) 146 return lc; 147 148 /* else: could not allocate all elements, give up */ 149 for (i--; i; i--) { 150 void *p = element[i]; 151 kmem_cache_free(cache, p - e_off); 152 } 153 kfree(lc); 154 out_fail: 155 kfree(element); 156 kfree(slot); 157 return NULL; 158 } 159 160 static void lc_free_by_index(struct lru_cache *lc, unsigned i) 161 { 162 void *p = lc->lc_element[i]; 163 WARN_ON(!p); 164 if (p) { 165 p -= lc->element_off; 166 kmem_cache_free(lc->lc_cache, p); 167 } 168 } 169 170 /** 171 * lc_destroy - frees memory allocated by lc_create() 172 * @lc: the lru cache to destroy 173 */ 174 void lc_destroy(struct lru_cache *lc) 175 { 176 unsigned i; 177 if (!lc) 178 return; 179 for (i = 0; i < lc->nr_elements; i++) 180 lc_free_by_index(lc, i); 181 kfree(lc->lc_element); 182 kfree(lc->lc_slot); 183 kfree(lc); 184 } 185 186 /** 187 * lc_reset - does a full reset for @lc and the hash table slots. 188 * @lc: the lru cache to operate on 189 * 190 * It is roughly the equivalent of re-allocating a fresh lru_cache object, 191 * basically a short cut to lc_destroy(lc); lc = lc_create(...); 192 */ 193 void lc_reset(struct lru_cache *lc) 194 { 195 unsigned i; 196 197 INIT_LIST_HEAD(&lc->in_use); 198 INIT_LIST_HEAD(&lc->lru); 199 INIT_LIST_HEAD(&lc->free); 200 INIT_LIST_HEAD(&lc->to_be_changed); 201 lc->used = 0; 202 lc->hits = 0; 203 lc->misses = 0; 204 lc->starving = 0; 205 lc->locked = 0; 206 lc->changed = 0; 207 lc->pending_changes = 0; 208 lc->flags = 0; 209 memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements); 210 211 for (i = 0; i < lc->nr_elements; i++) { 212 struct lc_element *e = lc->lc_element[i]; 213 void *p = e; 214 p -= lc->element_off; 215 memset(p, 0, lc->element_size); 216 /* re-init it */ 217 e->lc_index = i; 218 e->lc_number = LC_FREE; 219 e->lc_new_number = LC_FREE; 220 list_add(&e->list, &lc->free); 221 } 222 } 223 224 /** 225 * lc_seq_printf_stats - print stats about @lc into @seq 226 * @seq: the seq_file to print into 227 * @lc: the lru cache to print statistics of 228 */ 229 void lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) 230 { 231 /* NOTE: 232 * total calls to lc_get are 233 * (starving + hits + misses) 234 * misses include "locked" count (update from an other thread in 235 * progress) and "changed", when this in fact lead to an successful 236 * update of the cache. 237 */ 238 seq_printf(seq, "\t%s: used:%u/%u hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n", 239 lc->name, lc->used, lc->nr_elements, 240 lc->hits, lc->misses, lc->starving, lc->locked, lc->changed); 241 } 242 243 static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) 244 { 245 return lc->lc_slot + (enr % lc->nr_elements); 246 } 247 248 249 static struct lc_element *__lc_find(struct lru_cache *lc, unsigned int enr, 250 bool include_changing) 251 { 252 struct lc_element *e; 253 254 BUG_ON(!lc); 255 BUG_ON(!lc->nr_elements); 256 hlist_for_each_entry(e, lc_hash_slot(lc, enr), colision) { 257 /* "about to be changed" elements, pending transaction commit, 258 * are hashed by their "new number". "Normal" elements have 259 * lc_number == lc_new_number. */ 260 if (e->lc_new_number != enr) 261 continue; 262 if (e->lc_new_number == e->lc_number || include_changing) 263 return e; 264 break; 265 } 266 return NULL; 267 } 268 269 /** 270 * lc_find - find element by label, if present in the hash table 271 * @lc: The lru_cache object 272 * @enr: element number 273 * 274 * Returns the pointer to an element, if the element with the requested 275 * "label" or element number is present in the hash table, 276 * or NULL if not found. Does not change the refcnt. 277 * Ignores elements that are "about to be used", i.e. not yet in the active 278 * set, but still pending transaction commit. 279 */ 280 struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) 281 { 282 return __lc_find(lc, enr, 0); 283 } 284 285 /** 286 * lc_is_used - find element by label 287 * @lc: The lru_cache object 288 * @enr: element number 289 * 290 * Returns true, if the element with the requested "label" or element number is 291 * present in the hash table, and is used (refcnt > 0). 292 * Also finds elements that are not _currently_ used but only "about to be 293 * used", i.e. on the "to_be_changed" list, pending transaction commit. 294 */ 295 bool lc_is_used(struct lru_cache *lc, unsigned int enr) 296 { 297 struct lc_element *e = __lc_find(lc, enr, 1); 298 return e && e->refcnt; 299 } 300 301 /** 302 * lc_del - removes an element from the cache 303 * @lc: The lru_cache object 304 * @e: The element to remove 305 * 306 * @e must be unused (refcnt == 0). Moves @e from "lru" to "free" list, 307 * sets @e->enr to %LC_FREE. 308 */ 309 void lc_del(struct lru_cache *lc, struct lc_element *e) 310 { 311 PARANOIA_ENTRY(); 312 PARANOIA_LC_ELEMENT(lc, e); 313 BUG_ON(e->refcnt); 314 315 e->lc_number = e->lc_new_number = LC_FREE; 316 hlist_del_init(&e->colision); 317 list_move(&e->list, &lc->free); 318 RETURN(); 319 } 320 321 static struct lc_element *lc_prepare_for_change(struct lru_cache *lc, unsigned new_number) 322 { 323 struct list_head *n; 324 struct lc_element *e; 325 326 if (!list_empty(&lc->free)) 327 n = lc->free.next; 328 else if (!list_empty(&lc->lru)) 329 n = lc->lru.prev; 330 else 331 return NULL; 332 333 e = list_entry(n, struct lc_element, list); 334 PARANOIA_LC_ELEMENT(lc, e); 335 336 e->lc_new_number = new_number; 337 if (!hlist_unhashed(&e->colision)) 338 __hlist_del(&e->colision); 339 hlist_add_head(&e->colision, lc_hash_slot(lc, new_number)); 340 list_move(&e->list, &lc->to_be_changed); 341 342 return e; 343 } 344 345 static int lc_unused_element_available(struct lru_cache *lc) 346 { 347 if (!list_empty(&lc->free)) 348 return 1; /* something on the free list */ 349 if (!list_empty(&lc->lru)) 350 return 1; /* something to evict */ 351 352 return 0; 353 } 354 355 /* used as internal flags to __lc_get */ 356 enum { 357 LC_GET_MAY_CHANGE = 1, 358 LC_GET_MAY_USE_UNCOMMITTED = 2, 359 }; 360 361 static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, unsigned int flags) 362 { 363 struct lc_element *e; 364 365 PARANOIA_ENTRY(); 366 if (lc->flags & LC_STARVING) { 367 ++lc->starving; 368 RETURN(NULL); 369 } 370 371 e = __lc_find(lc, enr, 1); 372 /* if lc_new_number != lc_number, 373 * this enr is currently being pulled in already, 374 * and will be available once the pending transaction 375 * has been committed. */ 376 if (e) { 377 if (e->lc_new_number != e->lc_number) { 378 /* It has been found above, but on the "to_be_changed" 379 * list, not yet committed. Don't pull it in twice, 380 * wait for the transaction, then try again... 381 */ 382 if (!(flags & LC_GET_MAY_USE_UNCOMMITTED)) 383 RETURN(NULL); 384 /* ... unless the caller is aware of the implications, 385 * probably preparing a cumulative transaction. */ 386 ++e->refcnt; 387 ++lc->hits; 388 RETURN(e); 389 } 390 /* else: lc_new_number == lc_number; a real hit. */ 391 ++lc->hits; 392 if (e->refcnt++ == 0) 393 lc->used++; 394 list_move(&e->list, &lc->in_use); /* Not evictable... */ 395 RETURN(e); 396 } 397 /* e == NULL */ 398 399 ++lc->misses; 400 if (!(flags & LC_GET_MAY_CHANGE)) 401 RETURN(NULL); 402 403 /* To avoid races with lc_try_lock(), first, mark us dirty 404 * (using test_and_set_bit, as it implies memory barriers), ... */ 405 test_and_set_bit(__LC_DIRTY, &lc->flags); 406 407 /* ... only then check if it is locked anyways. If lc_unlock clears 408 * the dirty bit again, that's not a problem, we will come here again. 409 */ 410 if (test_bit(__LC_LOCKED, &lc->flags)) { 411 ++lc->locked; 412 RETURN(NULL); 413 } 414 415 /* In case there is nothing available and we can not kick out 416 * the LRU element, we have to wait ... 417 */ 418 if (!lc_unused_element_available(lc)) { 419 __set_bit(__LC_STARVING, &lc->flags); 420 RETURN(NULL); 421 } 422 423 /* It was not present in the active set. We are going to recycle an 424 * unused (or even "free") element, but we won't accumulate more than 425 * max_pending_changes changes. */ 426 if (lc->pending_changes >= lc->max_pending_changes) 427 RETURN(NULL); 428 429 e = lc_prepare_for_change(lc, enr); 430 BUG_ON(!e); 431 432 clear_bit(__LC_STARVING, &lc->flags); 433 BUG_ON(++e->refcnt != 1); 434 lc->used++; 435 lc->pending_changes++; 436 437 RETURN(e); 438 } 439 440 /** 441 * lc_get - get element by label, maybe change the active set 442 * @lc: the lru cache to operate on 443 * @enr: the label to look up 444 * 445 * Finds an element in the cache, increases its usage count, 446 * "touches" and returns it. 447 * 448 * In case the requested number is not present, it needs to be added to the 449 * cache. Therefore it is possible that an other element becomes evicted from 450 * the cache. In either case, the user is notified so he is able to e.g. keep 451 * a persistent log of the cache changes, and therefore the objects in use. 452 * 453 * Return values: 454 * NULL 455 * The cache was marked %LC_STARVING, 456 * or the requested label was not in the active set 457 * and a changing transaction is still pending (@lc was marked %LC_DIRTY). 458 * Or no unused or free element could be recycled (@lc will be marked as 459 * %LC_STARVING, blocking further lc_get() operations). 460 * 461 * pointer to the element with the REQUESTED element number. 462 * In this case, it can be used right away 463 * 464 * pointer to an UNUSED element with some different element number, 465 * where that different number may also be %LC_FREE. 466 * 467 * In this case, the cache is marked %LC_DIRTY, 468 * so lc_try_lock() will no longer succeed. 469 * The returned element pointer is moved to the "to_be_changed" list, 470 * and registered with the new element number on the hash collision chains, 471 * so it is possible to pick it up from lc_is_used(). 472 * Up to "max_pending_changes" (see lc_create()) can be accumulated. 473 * The user now should do whatever housekeeping is necessary, 474 * typically serialize on lc_try_lock_for_transaction(), then call 475 * lc_committed(lc) and lc_unlock(), to finish the change. 476 * 477 * NOTE: The user needs to check the lc_number on EACH use, so he recognizes 478 * any cache set change. 479 */ 480 struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr) 481 { 482 return __lc_get(lc, enr, LC_GET_MAY_CHANGE); 483 } 484 485 /** 486 * lc_get_cumulative - like lc_get; also finds to-be-changed elements 487 * @lc: the lru cache to operate on 488 * @enr: the label to look up 489 * 490 * Unlike lc_get this also returns the element for @enr, if it is belonging to 491 * a pending transaction, so the return values are like for lc_get(), 492 * plus: 493 * 494 * pointer to an element already on the "to_be_changed" list. 495 * In this case, the cache was already marked %LC_DIRTY. 496 * 497 * Caller needs to make sure that the pending transaction is completed, 498 * before proceeding to actually use this element. 499 */ 500 struct lc_element *lc_get_cumulative(struct lru_cache *lc, unsigned int enr) 501 { 502 return __lc_get(lc, enr, LC_GET_MAY_CHANGE|LC_GET_MAY_USE_UNCOMMITTED); 503 } 504 505 /** 506 * lc_try_get - get element by label, if present; do not change the active set 507 * @lc: the lru cache to operate on 508 * @enr: the label to look up 509 * 510 * Finds an element in the cache, increases its usage count, 511 * "touches" and returns it. 512 * 513 * Return values: 514 * NULL 515 * The cache was marked %LC_STARVING, 516 * or the requested label was not in the active set 517 * 518 * pointer to the element with the REQUESTED element number. 519 * In this case, it can be used right away 520 */ 521 struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr) 522 { 523 return __lc_get(lc, enr, 0); 524 } 525 526 /** 527 * lc_committed - tell @lc that pending changes have been recorded 528 * @lc: the lru cache to operate on 529 * 530 * User is expected to serialize on explicit lc_try_lock_for_transaction() 531 * before the transaction is started, and later needs to lc_unlock() explicitly 532 * as well. 533 */ 534 void lc_committed(struct lru_cache *lc) 535 { 536 struct lc_element *e, *tmp; 537 538 PARANOIA_ENTRY(); 539 list_for_each_entry_safe(e, tmp, &lc->to_be_changed, list) { 540 /* count number of changes, not number of transactions */ 541 ++lc->changed; 542 e->lc_number = e->lc_new_number; 543 list_move(&e->list, &lc->in_use); 544 } 545 lc->pending_changes = 0; 546 RETURN(); 547 } 548 549 550 /** 551 * lc_put - give up refcnt of @e 552 * @lc: the lru cache to operate on 553 * @e: the element to put 554 * 555 * If refcnt reaches zero, the element is moved to the lru list, 556 * and a %LC_STARVING (if set) is cleared. 557 * Returns the new (post-decrement) refcnt. 558 */ 559 unsigned int lc_put(struct lru_cache *lc, struct lc_element *e) 560 { 561 PARANOIA_ENTRY(); 562 PARANOIA_LC_ELEMENT(lc, e); 563 BUG_ON(e->refcnt == 0); 564 BUG_ON(e->lc_number != e->lc_new_number); 565 if (--e->refcnt == 0) { 566 /* move it to the front of LRU. */ 567 list_move(&e->list, &lc->lru); 568 lc->used--; 569 clear_bit_unlock(__LC_STARVING, &lc->flags); 570 } 571 RETURN(e->refcnt); 572 } 573 574 /** 575 * lc_element_by_index 576 * @lc: the lru cache to operate on 577 * @i: the index of the element to return 578 */ 579 struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i) 580 { 581 BUG_ON(i >= lc->nr_elements); 582 BUG_ON(lc->lc_element[i] == NULL); 583 BUG_ON(lc->lc_element[i]->lc_index != i); 584 return lc->lc_element[i]; 585 } 586 587 /** 588 * lc_index_of 589 * @lc: the lru cache to operate on 590 * @e: the element to query for its index position in lc->element 591 */ 592 unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e) 593 { 594 PARANOIA_LC_ELEMENT(lc, e); 595 return e->lc_index; 596 } 597 598 /** 599 * lc_set - associate index with label 600 * @lc: the lru cache to operate on 601 * @enr: the label to set 602 * @index: the element index to associate label with. 603 * 604 * Used to initialize the active set to some previously recorded state. 605 */ 606 void lc_set(struct lru_cache *lc, unsigned int enr, int index) 607 { 608 struct lc_element *e; 609 struct list_head *lh; 610 611 if (index < 0 || index >= lc->nr_elements) 612 return; 613 614 e = lc_element_by_index(lc, index); 615 BUG_ON(e->lc_number != e->lc_new_number); 616 BUG_ON(e->refcnt != 0); 617 618 e->lc_number = e->lc_new_number = enr; 619 hlist_del_init(&e->colision); 620 if (enr == LC_FREE) 621 lh = &lc->free; 622 else { 623 hlist_add_head(&e->colision, lc_hash_slot(lc, enr)); 624 lh = &lc->lru; 625 } 626 list_move(&e->list, lh); 627 } 628 629 /** 630 * lc_dump - Dump a complete LRU cache to seq in textual form. 631 * @lc: the lru cache to operate on 632 * @seq: the &struct seq_file pointer to seq_printf into 633 * @utext: user supplied additional "heading" or other info 634 * @detail: function pointer the user may provide to dump further details 635 * of the object the lc_element is embedded in. May be NULL. 636 * Note: a leading space ' ' and trailing newline '\n' is implied. 637 */ 638 void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext, 639 void (*detail) (struct seq_file *, struct lc_element *)) 640 { 641 unsigned int nr_elements = lc->nr_elements; 642 struct lc_element *e; 643 int i; 644 645 seq_printf(seq, "\tnn: lc_number (new nr) refcnt %s\n ", utext); 646 for (i = 0; i < nr_elements; i++) { 647 e = lc_element_by_index(lc, i); 648 if (e->lc_number != e->lc_new_number) 649 seq_printf(seq, "\t%5d: %6d %8d %6d ", 650 i, e->lc_number, e->lc_new_number, e->refcnt); 651 else 652 seq_printf(seq, "\t%5d: %6d %-8s %6d ", 653 i, e->lc_number, "-\"-", e->refcnt); 654 if (detail) 655 detail(seq, e); 656 seq_putc(seq, '\n'); 657 } 658 } 659 660 EXPORT_SYMBOL(lc_create); 661 EXPORT_SYMBOL(lc_reset); 662 EXPORT_SYMBOL(lc_destroy); 663 EXPORT_SYMBOL(lc_set); 664 EXPORT_SYMBOL(lc_del); 665 EXPORT_SYMBOL(lc_try_get); 666 EXPORT_SYMBOL(lc_find); 667 EXPORT_SYMBOL(lc_get); 668 EXPORT_SYMBOL(lc_put); 669 EXPORT_SYMBOL(lc_committed); 670 EXPORT_SYMBOL(lc_element_by_index); 671 EXPORT_SYMBOL(lc_index_of); 672 EXPORT_SYMBOL(lc_seq_printf_stats); 673 EXPORT_SYMBOL(lc_seq_dump_details); 674 EXPORT_SYMBOL(lc_try_lock); 675 EXPORT_SYMBOL(lc_is_used); 676 EXPORT_SYMBOL(lc_get_cumulative); 677