xref: /openbmc/linux/lib/lru_cache.c (revision bbaf1ff06af49e856501024abbe161d96c1f0d66)
1  // SPDX-License-Identifier: GPL-2.0-or-later
2  /*
3     lru_cache.c
4  
5     This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
6  
7     Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
8     Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
9     Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
10  
11  
12   */
13  
14  #include <linux/module.h>
15  #include <linux/bitops.h>
16  #include <linux/slab.h>
17  #include <linux/string.h> /* for memset */
18  #include <linux/seq_file.h> /* for seq_printf */
19  #include <linux/lru_cache.h>
20  
21  MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
22  	      "Lars Ellenberg <lars@linbit.com>");
23  MODULE_DESCRIPTION("lru_cache - Track sets of hot objects");
24  MODULE_LICENSE("GPL");
25  
26  /* this is developers aid only.
27   * it catches concurrent access (lack of locking on the users part) */
28  #define PARANOIA_ENTRY() do {		\
29  	BUG_ON(!lc);			\
30  	BUG_ON(!lc->nr_elements);	\
31  	BUG_ON(test_and_set_bit(__LC_PARANOIA, &lc->flags)); \
32  } while (0)
33  
34  #define RETURN(x...)     do { \
35  	clear_bit_unlock(__LC_PARANOIA, &lc->flags); \
36  	return x ; } while (0)
37  
38  /* BUG() if e is not one of the elements tracked by lc */
39  #define PARANOIA_LC_ELEMENT(lc, e) do {	\
40  	struct lru_cache *lc_ = (lc);	\
41  	struct lc_element *e_ = (e);	\
42  	unsigned i = e_->lc_index;	\
43  	BUG_ON(i >= lc_->nr_elements);	\
44  	BUG_ON(lc_->lc_element[i] != e_); } while (0)
45  
46  
47  /* We need to atomically
48   *  - try to grab the lock (set LC_LOCKED)
49   *  - only if there is no pending transaction
50   *    (neither LC_DIRTY nor LC_STARVING is set)
51   * Because of PARANOIA_ENTRY() above abusing lc->flags as well,
52   * it is not sufficient to just say
53   *	return 0 == cmpxchg(&lc->flags, 0, LC_LOCKED);
54   */
55  int lc_try_lock(struct lru_cache *lc)
56  {
57  	unsigned long val;
58  	do {
59  		val = cmpxchg(&lc->flags, 0, LC_LOCKED);
60  	} while (unlikely (val == LC_PARANOIA));
61  	/* Spin until no-one is inside a PARANOIA_ENTRY()/RETURN() section. */
62  	return 0 == val;
63  }
64  
65  /**
66   * lc_create - prepares to track objects in an active set
67   * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details
68   * @cache: cache root pointer
69   * @max_pending_changes: maximum changes to accumulate until a transaction is required
70   * @e_count: number of elements allowed to be active simultaneously
71   * @e_size: size of the tracked objects
72   * @e_off: offset to the &struct lc_element member in a tracked object
73   *
74   * Returns a pointer to a newly initialized struct lru_cache on success,
75   * or NULL on (allocation) failure.
76   */
77  struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
78  		unsigned max_pending_changes,
79  		unsigned e_count, size_t e_size, size_t e_off)
80  {
81  	struct hlist_head *slot = NULL;
82  	struct lc_element **element = NULL;
83  	struct lru_cache *lc;
84  	struct lc_element *e;
85  	unsigned cache_obj_size = kmem_cache_size(cache);
86  	unsigned i;
87  
88  	WARN_ON(cache_obj_size < e_size);
89  	if (cache_obj_size < e_size)
90  		return NULL;
91  
92  	/* e_count too big; would probably fail the allocation below anyways.
93  	 * for typical use cases, e_count should be few thousand at most. */
94  	if (e_count > LC_MAX_ACTIVE)
95  		return NULL;
96  
97  	slot = kcalloc(e_count, sizeof(struct hlist_head), GFP_KERNEL);
98  	if (!slot)
99  		goto out_fail;
100  	element = kcalloc(e_count, sizeof(struct lc_element *), GFP_KERNEL);
101  	if (!element)
102  		goto out_fail;
103  
104  	lc = kzalloc(sizeof(*lc), GFP_KERNEL);
105  	if (!lc)
106  		goto out_fail;
107  
108  	INIT_LIST_HEAD(&lc->in_use);
109  	INIT_LIST_HEAD(&lc->lru);
110  	INIT_LIST_HEAD(&lc->free);
111  	INIT_LIST_HEAD(&lc->to_be_changed);
112  
113  	lc->name = name;
114  	lc->element_size = e_size;
115  	lc->element_off = e_off;
116  	lc->nr_elements = e_count;
117  	lc->max_pending_changes = max_pending_changes;
118  	lc->lc_cache = cache;
119  	lc->lc_element = element;
120  	lc->lc_slot = slot;
121  
122  	/* preallocate all objects */
123  	for (i = 0; i < e_count; i++) {
124  		void *p = kmem_cache_alloc(cache, GFP_KERNEL);
125  		if (!p)
126  			break;
127  		memset(p, 0, lc->element_size);
128  		e = p + e_off;
129  		e->lc_index = i;
130  		e->lc_number = LC_FREE;
131  		e->lc_new_number = LC_FREE;
132  		list_add(&e->list, &lc->free);
133  		element[i] = e;
134  	}
135  	if (i == e_count)
136  		return lc;
137  
138  	/* else: could not allocate all elements, give up */
139  	while (i) {
140  		void *p = element[--i];
141  		kmem_cache_free(cache, p - e_off);
142  	}
143  	kfree(lc);
144  out_fail:
145  	kfree(element);
146  	kfree(slot);
147  	return NULL;
148  }
149  
150  static void lc_free_by_index(struct lru_cache *lc, unsigned i)
151  {
152  	void *p = lc->lc_element[i];
153  	WARN_ON(!p);
154  	if (p) {
155  		p -= lc->element_off;
156  		kmem_cache_free(lc->lc_cache, p);
157  	}
158  }
159  
160  /**
161   * lc_destroy - frees memory allocated by lc_create()
162   * @lc: the lru cache to destroy
163   */
164  void lc_destroy(struct lru_cache *lc)
165  {
166  	unsigned i;
167  	if (!lc)
168  		return;
169  	for (i = 0; i < lc->nr_elements; i++)
170  		lc_free_by_index(lc, i);
171  	kfree(lc->lc_element);
172  	kfree(lc->lc_slot);
173  	kfree(lc);
174  }
175  
176  /**
177   * lc_reset - does a full reset for @lc and the hash table slots.
178   * @lc: the lru cache to operate on
179   *
180   * It is roughly the equivalent of re-allocating a fresh lru_cache object,
181   * basically a short cut to lc_destroy(lc); lc = lc_create(...);
182   */
183  void lc_reset(struct lru_cache *lc)
184  {
185  	unsigned i;
186  
187  	INIT_LIST_HEAD(&lc->in_use);
188  	INIT_LIST_HEAD(&lc->lru);
189  	INIT_LIST_HEAD(&lc->free);
190  	INIT_LIST_HEAD(&lc->to_be_changed);
191  	lc->used = 0;
192  	lc->hits = 0;
193  	lc->misses = 0;
194  	lc->starving = 0;
195  	lc->locked = 0;
196  	lc->changed = 0;
197  	lc->pending_changes = 0;
198  	lc->flags = 0;
199  	memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements);
200  
201  	for (i = 0; i < lc->nr_elements; i++) {
202  		struct lc_element *e = lc->lc_element[i];
203  		void *p = e;
204  		p -= lc->element_off;
205  		memset(p, 0, lc->element_size);
206  		/* re-init it */
207  		e->lc_index = i;
208  		e->lc_number = LC_FREE;
209  		e->lc_new_number = LC_FREE;
210  		list_add(&e->list, &lc->free);
211  	}
212  }
213  
214  /**
215   * lc_seq_printf_stats - print stats about @lc into @seq
216   * @seq: the seq_file to print into
217   * @lc: the lru cache to print statistics of
218   */
219  void lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc)
220  {
221  	/* NOTE:
222  	 * total calls to lc_get are
223  	 * (starving + hits + misses)
224  	 * misses include "locked" count (update from an other thread in
225  	 * progress) and "changed", when this in fact lead to an successful
226  	 * update of the cache.
227  	 */
228  	seq_printf(seq, "\t%s: used:%u/%u hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n",
229  		   lc->name, lc->used, lc->nr_elements,
230  		   lc->hits, lc->misses, lc->starving, lc->locked, lc->changed);
231  }
232  
233  static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
234  {
235  	return  lc->lc_slot + (enr % lc->nr_elements);
236  }
237  
238  
239  static struct lc_element *__lc_find(struct lru_cache *lc, unsigned int enr,
240  		bool include_changing)
241  {
242  	struct lc_element *e;
243  
244  	BUG_ON(!lc);
245  	BUG_ON(!lc->nr_elements);
246  	hlist_for_each_entry(e, lc_hash_slot(lc, enr), colision) {
247  		/* "about to be changed" elements, pending transaction commit,
248  		 * are hashed by their "new number". "Normal" elements have
249  		 * lc_number == lc_new_number. */
250  		if (e->lc_new_number != enr)
251  			continue;
252  		if (e->lc_new_number == e->lc_number || include_changing)
253  			return e;
254  		break;
255  	}
256  	return NULL;
257  }
258  
259  /**
260   * lc_find - find element by label, if present in the hash table
261   * @lc: The lru_cache object
262   * @enr: element number
263   *
264   * Returns the pointer to an element, if the element with the requested
265   * "label" or element number is present in the hash table,
266   * or NULL if not found. Does not change the refcnt.
267   * Ignores elements that are "about to be used", i.e. not yet in the active
268   * set, but still pending transaction commit.
269   */
270  struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr)
271  {
272  	return __lc_find(lc, enr, 0);
273  }
274  
275  /**
276   * lc_is_used - find element by label
277   * @lc: The lru_cache object
278   * @enr: element number
279   *
280   * Returns true, if the element with the requested "label" or element number is
281   * present in the hash table, and is used (refcnt > 0).
282   * Also finds elements that are not _currently_ used but only "about to be
283   * used", i.e. on the "to_be_changed" list, pending transaction commit.
284   */
285  bool lc_is_used(struct lru_cache *lc, unsigned int enr)
286  {
287  	struct lc_element *e = __lc_find(lc, enr, 1);
288  	return e && e->refcnt;
289  }
290  
291  /**
292   * lc_del - removes an element from the cache
293   * @lc: The lru_cache object
294   * @e: The element to remove
295   *
296   * @e must be unused (refcnt == 0). Moves @e from "lru" to "free" list,
297   * sets @e->enr to %LC_FREE.
298   */
299  void lc_del(struct lru_cache *lc, struct lc_element *e)
300  {
301  	PARANOIA_ENTRY();
302  	PARANOIA_LC_ELEMENT(lc, e);
303  	BUG_ON(e->refcnt);
304  
305  	e->lc_number = e->lc_new_number = LC_FREE;
306  	hlist_del_init(&e->colision);
307  	list_move(&e->list, &lc->free);
308  	RETURN();
309  }
310  
311  static struct lc_element *lc_prepare_for_change(struct lru_cache *lc, unsigned new_number)
312  {
313  	struct list_head *n;
314  	struct lc_element *e;
315  
316  	if (!list_empty(&lc->free))
317  		n = lc->free.next;
318  	else if (!list_empty(&lc->lru))
319  		n = lc->lru.prev;
320  	else
321  		return NULL;
322  
323  	e = list_entry(n, struct lc_element, list);
324  	PARANOIA_LC_ELEMENT(lc, e);
325  
326  	e->lc_new_number = new_number;
327  	if (!hlist_unhashed(&e->colision))
328  		__hlist_del(&e->colision);
329  	hlist_add_head(&e->colision, lc_hash_slot(lc, new_number));
330  	list_move(&e->list, &lc->to_be_changed);
331  
332  	return e;
333  }
334  
335  static int lc_unused_element_available(struct lru_cache *lc)
336  {
337  	if (!list_empty(&lc->free))
338  		return 1; /* something on the free list */
339  	if (!list_empty(&lc->lru))
340  		return 1;  /* something to evict */
341  
342  	return 0;
343  }
344  
345  /* used as internal flags to __lc_get */
346  enum {
347  	LC_GET_MAY_CHANGE = 1,
348  	LC_GET_MAY_USE_UNCOMMITTED = 2,
349  };
350  
351  static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, unsigned int flags)
352  {
353  	struct lc_element *e;
354  
355  	PARANOIA_ENTRY();
356  	if (test_bit(__LC_STARVING, &lc->flags)) {
357  		++lc->starving;
358  		RETURN(NULL);
359  	}
360  
361  	e = __lc_find(lc, enr, 1);
362  	/* if lc_new_number != lc_number,
363  	 * this enr is currently being pulled in already,
364  	 * and will be available once the pending transaction
365  	 * has been committed. */
366  	if (e) {
367  		if (e->lc_new_number != e->lc_number) {
368  			/* It has been found above, but on the "to_be_changed"
369  			 * list, not yet committed.  Don't pull it in twice,
370  			 * wait for the transaction, then try again...
371  			 */
372  			if (!(flags & LC_GET_MAY_USE_UNCOMMITTED))
373  				RETURN(NULL);
374  			/* ... unless the caller is aware of the implications,
375  			 * probably preparing a cumulative transaction. */
376  			++e->refcnt;
377  			++lc->hits;
378  			RETURN(e);
379  		}
380  		/* else: lc_new_number == lc_number; a real hit. */
381  		++lc->hits;
382  		if (e->refcnt++ == 0)
383  			lc->used++;
384  		list_move(&e->list, &lc->in_use); /* Not evictable... */
385  		RETURN(e);
386  	}
387  	/* e == NULL */
388  
389  	++lc->misses;
390  	if (!(flags & LC_GET_MAY_CHANGE))
391  		RETURN(NULL);
392  
393  	/* To avoid races with lc_try_lock(), first, mark us dirty
394  	 * (using test_and_set_bit, as it implies memory barriers), ... */
395  	test_and_set_bit(__LC_DIRTY, &lc->flags);
396  
397  	/* ... only then check if it is locked anyways. If lc_unlock clears
398  	 * the dirty bit again, that's not a problem, we will come here again.
399  	 */
400  	if (test_bit(__LC_LOCKED, &lc->flags)) {
401  		++lc->locked;
402  		RETURN(NULL);
403  	}
404  
405  	/* In case there is nothing available and we can not kick out
406  	 * the LRU element, we have to wait ...
407  	 */
408  	if (!lc_unused_element_available(lc)) {
409  		set_bit(__LC_STARVING, &lc->flags);
410  		RETURN(NULL);
411  	}
412  
413  	/* It was not present in the active set.  We are going to recycle an
414  	 * unused (or even "free") element, but we won't accumulate more than
415  	 * max_pending_changes changes.  */
416  	if (lc->pending_changes >= lc->max_pending_changes)
417  		RETURN(NULL);
418  
419  	e = lc_prepare_for_change(lc, enr);
420  	BUG_ON(!e);
421  
422  	clear_bit(__LC_STARVING, &lc->flags);
423  	BUG_ON(++e->refcnt != 1);
424  	lc->used++;
425  	lc->pending_changes++;
426  
427  	RETURN(e);
428  }
429  
430  /**
431   * lc_get - get element by label, maybe change the active set
432   * @lc: the lru cache to operate on
433   * @enr: the label to look up
434   *
435   * Finds an element in the cache, increases its usage count,
436   * "touches" and returns it.
437   *
438   * In case the requested number is not present, it needs to be added to the
439   * cache. Therefore it is possible that an other element becomes evicted from
440   * the cache. In either case, the user is notified so he is able to e.g. keep
441   * a persistent log of the cache changes, and therefore the objects in use.
442   *
443   * Return values:
444   *  NULL
445   *     The cache was marked %LC_STARVING,
446   *     or the requested label was not in the active set
447   *     and a changing transaction is still pending (@lc was marked %LC_DIRTY).
448   *     Or no unused or free element could be recycled (@lc will be marked as
449   *     %LC_STARVING, blocking further lc_get() operations).
450   *
451   *  pointer to the element with the REQUESTED element number.
452   *     In this case, it can be used right away
453   *
454   *  pointer to an UNUSED element with some different element number,
455   *          where that different number may also be %LC_FREE.
456   *
457   *          In this case, the cache is marked %LC_DIRTY,
458   *          so lc_try_lock() will no longer succeed.
459   *          The returned element pointer is moved to the "to_be_changed" list,
460   *          and registered with the new element number on the hash collision chains,
461   *          so it is possible to pick it up from lc_is_used().
462   *          Up to "max_pending_changes" (see lc_create()) can be accumulated.
463   *          The user now should do whatever housekeeping is necessary,
464   *          typically serialize on lc_try_lock_for_transaction(), then call
465   *          lc_committed(lc) and lc_unlock(), to finish the change.
466   *
467   * NOTE: The user needs to check the lc_number on EACH use, so he recognizes
468   *       any cache set change.
469   */
470  struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr)
471  {
472  	return __lc_get(lc, enr, LC_GET_MAY_CHANGE);
473  }
474  
475  /**
476   * lc_get_cumulative - like lc_get; also finds to-be-changed elements
477   * @lc: the lru cache to operate on
478   * @enr: the label to look up
479   *
480   * Unlike lc_get this also returns the element for @enr, if it is belonging to
481   * a pending transaction, so the return values are like for lc_get(),
482   * plus:
483   *
484   * pointer to an element already on the "to_be_changed" list.
485   * 	In this case, the cache was already marked %LC_DIRTY.
486   *
487   * Caller needs to make sure that the pending transaction is completed,
488   * before proceeding to actually use this element.
489   */
490  struct lc_element *lc_get_cumulative(struct lru_cache *lc, unsigned int enr)
491  {
492  	return __lc_get(lc, enr, LC_GET_MAY_CHANGE|LC_GET_MAY_USE_UNCOMMITTED);
493  }
494  
495  /**
496   * lc_try_get - get element by label, if present; do not change the active set
497   * @lc: the lru cache to operate on
498   * @enr: the label to look up
499   *
500   * Finds an element in the cache, increases its usage count,
501   * "touches" and returns it.
502   *
503   * Return values:
504   *  NULL
505   *     The cache was marked %LC_STARVING,
506   *     or the requested label was not in the active set
507   *
508   *  pointer to the element with the REQUESTED element number.
509   *     In this case, it can be used right away
510   */
511  struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr)
512  {
513  	return __lc_get(lc, enr, 0);
514  }
515  
516  /**
517   * lc_committed - tell @lc that pending changes have been recorded
518   * @lc: the lru cache to operate on
519   *
520   * User is expected to serialize on explicit lc_try_lock_for_transaction()
521   * before the transaction is started, and later needs to lc_unlock() explicitly
522   * as well.
523   */
524  void lc_committed(struct lru_cache *lc)
525  {
526  	struct lc_element *e, *tmp;
527  
528  	PARANOIA_ENTRY();
529  	list_for_each_entry_safe(e, tmp, &lc->to_be_changed, list) {
530  		/* count number of changes, not number of transactions */
531  		++lc->changed;
532  		e->lc_number = e->lc_new_number;
533  		list_move(&e->list, &lc->in_use);
534  	}
535  	lc->pending_changes = 0;
536  	RETURN();
537  }
538  
539  
540  /**
541   * lc_put - give up refcnt of @e
542   * @lc: the lru cache to operate on
543   * @e: the element to put
544   *
545   * If refcnt reaches zero, the element is moved to the lru list,
546   * and a %LC_STARVING (if set) is cleared.
547   * Returns the new (post-decrement) refcnt.
548   */
549  unsigned int lc_put(struct lru_cache *lc, struct lc_element *e)
550  {
551  	PARANOIA_ENTRY();
552  	PARANOIA_LC_ELEMENT(lc, e);
553  	BUG_ON(e->refcnt == 0);
554  	BUG_ON(e->lc_number != e->lc_new_number);
555  	if (--e->refcnt == 0) {
556  		/* move it to the front of LRU. */
557  		list_move(&e->list, &lc->lru);
558  		lc->used--;
559  		clear_bit_unlock(__LC_STARVING, &lc->flags);
560  	}
561  	RETURN(e->refcnt);
562  }
563  
564  /**
565   * lc_element_by_index
566   * @lc: the lru cache to operate on
567   * @i: the index of the element to return
568   */
569  struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i)
570  {
571  	BUG_ON(i >= lc->nr_elements);
572  	BUG_ON(lc->lc_element[i] == NULL);
573  	BUG_ON(lc->lc_element[i]->lc_index != i);
574  	return lc->lc_element[i];
575  }
576  
577  /**
578   * lc_seq_dump_details - Dump a complete LRU cache to seq in textual form.
579   * @lc: the lru cache to operate on
580   * @seq: the &struct seq_file pointer to seq_printf into
581   * @utext: user supplied additional "heading" or other info
582   * @detail: function pointer the user may provide to dump further details
583   * of the object the lc_element is embedded in. May be NULL.
584   * Note: a leading space ' ' and trailing newline '\n' is implied.
585   */
586  void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext,
587  	     void (*detail) (struct seq_file *, struct lc_element *))
588  {
589  	unsigned int nr_elements = lc->nr_elements;
590  	struct lc_element *e;
591  	int i;
592  
593  	seq_printf(seq, "\tnn: lc_number (new nr) refcnt %s\n ", utext);
594  	for (i = 0; i < nr_elements; i++) {
595  		e = lc_element_by_index(lc, i);
596  		if (e->lc_number != e->lc_new_number)
597  			seq_printf(seq, "\t%5d: %6d %8d %6d ",
598  				i, e->lc_number, e->lc_new_number, e->refcnt);
599  		else
600  			seq_printf(seq, "\t%5d: %6d %-8s %6d ",
601  				i, e->lc_number, "-\"-", e->refcnt);
602  		if (detail)
603  			detail(seq, e);
604  		seq_putc(seq, '\n');
605  	}
606  }
607  
608  EXPORT_SYMBOL(lc_create);
609  EXPORT_SYMBOL(lc_reset);
610  EXPORT_SYMBOL(lc_destroy);
611  EXPORT_SYMBOL(lc_del);
612  EXPORT_SYMBOL(lc_try_get);
613  EXPORT_SYMBOL(lc_find);
614  EXPORT_SYMBOL(lc_get);
615  EXPORT_SYMBOL(lc_put);
616  EXPORT_SYMBOL(lc_committed);
617  EXPORT_SYMBOL(lc_element_by_index);
618  EXPORT_SYMBOL(lc_seq_printf_stats);
619  EXPORT_SYMBOL(lc_seq_dump_details);
620  EXPORT_SYMBOL(lc_try_lock);
621  EXPORT_SYMBOL(lc_is_used);
622  EXPORT_SYMBOL(lc_get_cumulative);
623