xref: /openbmc/linux/fs/mbcache.c (revision dc8d5e56)
1 #include <linux/spinlock.h>
2 #include <linux/slab.h>
3 #include <linux/list.h>
4 #include <linux/list_bl.h>
5 #include <linux/module.h>
6 #include <linux/sched.h>
7 #include <linux/workqueue.h>
8 #include <linux/mbcache.h>
9 
10 /*
11  * Mbcache is a simple key-value store. Keys need not be unique, however
12  * key-value pairs are expected to be unique (we use this fact in
13  * mb_cache_entry_delete_block()).
14  *
15  * Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
16  * They use hash of a block contents as a key and block number as a value.
17  * That's why keys need not be unique (different xattr blocks may end up having
18  * the same hash). However block number always uniquely identifies a cache
19  * entry.
20  *
21  * We provide functions for creation and removal of entries, search by key,
22  * and a special "delete entry with given key-value pair" operation. Fixed
23  * size hash table is used for fast key lookups.
24  */
25 
26 struct mb_cache {
27 	/* Hash table of entries */
28 	struct hlist_bl_head	*c_hash;
29 	/* log2 of hash table size */
30 	int			c_bucket_bits;
31 	/* Maximum entries in cache to avoid degrading hash too much */
32 	int			c_max_entries;
33 	/* Protects c_list, c_entry_count */
34 	spinlock_t		c_list_lock;
35 	struct list_head	c_list;
36 	/* Number of entries in cache */
37 	unsigned long		c_entry_count;
38 	struct shrinker		c_shrink;
39 	/* Work for shrinking when the cache has too many entries */
40 	struct work_struct	c_shrink_work;
41 };
42 
43 static struct kmem_cache *mb_entry_cache;
44 
45 static unsigned long mb_cache_shrink(struct mb_cache *cache,
46 				     unsigned int nr_to_scan);
47 
48 static inline struct hlist_bl_head *mb_cache_entry_head(struct mb_cache *cache,
49 							u32 key)
50 {
51 	return &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
52 }
53 
54 /*
55  * Number of entries to reclaim synchronously when there are too many entries
56  * in cache
57  */
58 #define SYNC_SHRINK_BATCH 64
59 
60 /*
61  * mb_cache_entry_create - create entry in cache
62  * @cache - cache where the entry should be created
63  * @mask - gfp mask with which the entry should be allocated
64  * @key - key of the entry
65  * @block - block that contains data
66  *
67  * Creates entry in @cache with key @key and records that data is stored in
68  * block @block. The function returns -EBUSY if entry with the same key
69  * and for the same block already exists in cache. Otherwise 0 is returned.
70  */
71 int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
72 			  sector_t block)
73 {
74 	struct mb_cache_entry *entry, *dup;
75 	struct hlist_bl_node *dup_node;
76 	struct hlist_bl_head *head;
77 
78 	/* Schedule background reclaim if there are too many entries */
79 	if (cache->c_entry_count >= cache->c_max_entries)
80 		schedule_work(&cache->c_shrink_work);
81 	/* Do some sync reclaim if background reclaim cannot keep up */
82 	if (cache->c_entry_count >= 2*cache->c_max_entries)
83 		mb_cache_shrink(cache, SYNC_SHRINK_BATCH);
84 
85 	entry = kmem_cache_alloc(mb_entry_cache, mask);
86 	if (!entry)
87 		return -ENOMEM;
88 
89 	INIT_LIST_HEAD(&entry->e_list);
90 	/* One ref for hash, one ref returned */
91 	atomic_set(&entry->e_refcnt, 1);
92 	entry->e_key = key;
93 	entry->e_block = block;
94 	head = mb_cache_entry_head(cache, key);
95 	hlist_bl_lock(head);
96 	hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
97 		if (dup->e_key == key && dup->e_block == block) {
98 			hlist_bl_unlock(head);
99 			kmem_cache_free(mb_entry_cache, entry);
100 			return -EBUSY;
101 		}
102 	}
103 	hlist_bl_add_head(&entry->e_hash_list, head);
104 	hlist_bl_unlock(head);
105 
106 	spin_lock(&cache->c_list_lock);
107 	list_add_tail(&entry->e_list, &cache->c_list);
108 	/* Grab ref for LRU list */
109 	atomic_inc(&entry->e_refcnt);
110 	cache->c_entry_count++;
111 	spin_unlock(&cache->c_list_lock);
112 
113 	return 0;
114 }
115 EXPORT_SYMBOL(mb_cache_entry_create);
116 
117 void __mb_cache_entry_free(struct mb_cache_entry *entry)
118 {
119 	kmem_cache_free(mb_entry_cache, entry);
120 }
121 EXPORT_SYMBOL(__mb_cache_entry_free);
122 
123 static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
124 					   struct mb_cache_entry *entry,
125 					   u32 key)
126 {
127 	struct mb_cache_entry *old_entry = entry;
128 	struct hlist_bl_node *node;
129 	struct hlist_bl_head *head;
130 
131 	head = mb_cache_entry_head(cache, key);
132 	hlist_bl_lock(head);
133 	if (entry && !hlist_bl_unhashed(&entry->e_hash_list))
134 		node = entry->e_hash_list.next;
135 	else
136 		node = hlist_bl_first(head);
137 	while (node) {
138 		entry = hlist_bl_entry(node, struct mb_cache_entry,
139 				       e_hash_list);
140 		if (entry->e_key == key) {
141 			atomic_inc(&entry->e_refcnt);
142 			goto out;
143 		}
144 		node = node->next;
145 	}
146 	entry = NULL;
147 out:
148 	hlist_bl_unlock(head);
149 	if (old_entry)
150 		mb_cache_entry_put(cache, old_entry);
151 
152 	return entry;
153 }
154 
155 /*
156  * mb_cache_entry_find_first - find the first entry in cache with given key
157  * @cache: cache where we should search
158  * @key: key to look for
159  *
160  * Search in @cache for entry with key @key. Grabs reference to the first
161  * entry found and returns the entry.
162  */
163 struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache,
164 						 u32 key)
165 {
166 	return __entry_find(cache, NULL, key);
167 }
168 EXPORT_SYMBOL(mb_cache_entry_find_first);
169 
170 /*
171  * mb_cache_entry_find_next - find next entry in cache with the same
172  * @cache: cache where we should search
173  * @entry: entry to start search from
174  *
175  * Finds next entry in the hash chain which has the same key as @entry.
176  * If @entry is unhashed (which can happen when deletion of entry races
177  * with the search), finds the first entry in the hash chain. The function
178  * drops reference to @entry and returns with a reference to the found entry.
179  */
180 struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache *cache,
181 						struct mb_cache_entry *entry)
182 {
183 	return __entry_find(cache, entry, entry->e_key);
184 }
185 EXPORT_SYMBOL(mb_cache_entry_find_next);
186 
187 /* mb_cache_entry_delete_block - remove information about block from cache
188  * @cache - cache we work with
189  * @key - key of the entry to remove
190  * @block - block containing data for @key
191  *
192  * Remove entry from cache @cache with key @key with data stored in @block.
193  */
194 void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key,
195 				 sector_t block)
196 {
197 	struct hlist_bl_node *node;
198 	struct hlist_bl_head *head;
199 	struct mb_cache_entry *entry;
200 
201 	head = mb_cache_entry_head(cache, key);
202 	hlist_bl_lock(head);
203 	hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
204 		if (entry->e_key == key && entry->e_block == block) {
205 			/* We keep hash list reference to keep entry alive */
206 			hlist_bl_del_init(&entry->e_hash_list);
207 			hlist_bl_unlock(head);
208 			spin_lock(&cache->c_list_lock);
209 			if (!list_empty(&entry->e_list)) {
210 				list_del_init(&entry->e_list);
211 				cache->c_entry_count--;
212 				atomic_dec(&entry->e_refcnt);
213 			}
214 			spin_unlock(&cache->c_list_lock);
215 			mb_cache_entry_put(cache, entry);
216 			return;
217 		}
218 	}
219 	hlist_bl_unlock(head);
220 }
221 EXPORT_SYMBOL(mb_cache_entry_delete_block);
222 
223 /* mb_cache_entry_touch - cache entry got used
224  * @cache - cache the entry belongs to
225  * @entry - entry that got used
226  *
227  * Marks entry as used to give hit higher chances of surviving in cache.
228  */
229 void mb_cache_entry_touch(struct mb_cache *cache,
230 			  struct mb_cache_entry *entry)
231 {
232 	entry->e_referenced = 1;
233 }
234 EXPORT_SYMBOL(mb_cache_entry_touch);
235 
236 static unsigned long mb_cache_count(struct shrinker *shrink,
237 				    struct shrink_control *sc)
238 {
239 	struct mb_cache *cache = container_of(shrink, struct mb_cache,
240 					      c_shrink);
241 
242 	return cache->c_entry_count;
243 }
244 
245 /* Shrink number of entries in cache */
246 static unsigned long mb_cache_shrink(struct mb_cache *cache,
247 				     unsigned int nr_to_scan)
248 {
249 	struct mb_cache_entry *entry;
250 	struct hlist_bl_head *head;
251 	unsigned int shrunk = 0;
252 
253 	spin_lock(&cache->c_list_lock);
254 	while (nr_to_scan-- && !list_empty(&cache->c_list)) {
255 		entry = list_first_entry(&cache->c_list,
256 					 struct mb_cache_entry, e_list);
257 		if (entry->e_referenced) {
258 			entry->e_referenced = 0;
259 			list_move_tail(&cache->c_list, &entry->e_list);
260 			continue;
261 		}
262 		list_del_init(&entry->e_list);
263 		cache->c_entry_count--;
264 		/*
265 		 * We keep LRU list reference so that entry doesn't go away
266 		 * from under us.
267 		 */
268 		spin_unlock(&cache->c_list_lock);
269 		head = mb_cache_entry_head(cache, entry->e_key);
270 		hlist_bl_lock(head);
271 		if (!hlist_bl_unhashed(&entry->e_hash_list)) {
272 			hlist_bl_del_init(&entry->e_hash_list);
273 			atomic_dec(&entry->e_refcnt);
274 		}
275 		hlist_bl_unlock(head);
276 		if (mb_cache_entry_put(cache, entry))
277 			shrunk++;
278 		cond_resched();
279 		spin_lock(&cache->c_list_lock);
280 	}
281 	spin_unlock(&cache->c_list_lock);
282 
283 	return shrunk;
284 }
285 
286 static unsigned long mb_cache_scan(struct shrinker *shrink,
287 				   struct shrink_control *sc)
288 {
289 	int nr_to_scan = sc->nr_to_scan;
290 	struct mb_cache *cache = container_of(shrink, struct mb_cache,
291 					      c_shrink);
292 	return mb_cache_shrink(cache, nr_to_scan);
293 }
294 
295 /* We shrink 1/X of the cache when we have too many entries in it */
296 #define SHRINK_DIVISOR 16
297 
298 static void mb_cache_shrink_worker(struct work_struct *work)
299 {
300 	struct mb_cache *cache = container_of(work, struct mb_cache,
301 					      c_shrink_work);
302 	mb_cache_shrink(cache, cache->c_max_entries / SHRINK_DIVISOR);
303 }
304 
305 /*
306  * mb_cache_create - create cache
307  * @bucket_bits: log2 of the hash table size
308  *
309  * Create cache for keys with 2^bucket_bits hash entries.
310  */
311 struct mb_cache *mb_cache_create(int bucket_bits)
312 {
313 	struct mb_cache *cache;
314 	int bucket_count = 1 << bucket_bits;
315 	int i;
316 
317 	if (!try_module_get(THIS_MODULE))
318 		return NULL;
319 
320 	cache = kzalloc(sizeof(struct mb_cache), GFP_KERNEL);
321 	if (!cache)
322 		goto err_out;
323 	cache->c_bucket_bits = bucket_bits;
324 	cache->c_max_entries = bucket_count << 4;
325 	INIT_LIST_HEAD(&cache->c_list);
326 	spin_lock_init(&cache->c_list_lock);
327 	cache->c_hash = kmalloc(bucket_count * sizeof(struct hlist_bl_head),
328 				GFP_KERNEL);
329 	if (!cache->c_hash) {
330 		kfree(cache);
331 		goto err_out;
332 	}
333 	for (i = 0; i < bucket_count; i++)
334 		INIT_HLIST_BL_HEAD(&cache->c_hash[i]);
335 
336 	cache->c_shrink.count_objects = mb_cache_count;
337 	cache->c_shrink.scan_objects = mb_cache_scan;
338 	cache->c_shrink.seeks = DEFAULT_SEEKS;
339 	register_shrinker(&cache->c_shrink);
340 
341 	INIT_WORK(&cache->c_shrink_work, mb_cache_shrink_worker);
342 
343 	return cache;
344 
345 err_out:
346 	module_put(THIS_MODULE);
347 	return NULL;
348 }
349 EXPORT_SYMBOL(mb_cache_create);
350 
351 /*
352  * mb_cache_destroy - destroy cache
353  * @cache: the cache to destroy
354  *
355  * Free all entries in cache and cache itself. Caller must make sure nobody
356  * (except shrinker) can reach @cache when calling this.
357  */
358 void mb_cache_destroy(struct mb_cache *cache)
359 {
360 	struct mb_cache_entry *entry, *next;
361 
362 	unregister_shrinker(&cache->c_shrink);
363 
364 	/*
365 	 * We don't bother with any locking. Cache must not be used at this
366 	 * point.
367 	 */
368 	list_for_each_entry_safe(entry, next, &cache->c_list, e_list) {
369 		if (!hlist_bl_unhashed(&entry->e_hash_list)) {
370 			hlist_bl_del_init(&entry->e_hash_list);
371 			atomic_dec(&entry->e_refcnt);
372 		} else
373 			WARN_ON(1);
374 		list_del(&entry->e_list);
375 		WARN_ON(atomic_read(&entry->e_refcnt) != 1);
376 		mb_cache_entry_put(cache, entry);
377 	}
378 	kfree(cache->c_hash);
379 	kfree(cache);
380 	module_put(THIS_MODULE);
381 }
382 EXPORT_SYMBOL(mb_cache_destroy);
383 
384 static int __init mbcache_init(void)
385 {
386 	mb_entry_cache = kmem_cache_create("mbcache",
387 				sizeof(struct mb_cache_entry), 0,
388 				SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
389 	BUG_ON(!mb_entry_cache);
390 	return 0;
391 }
392 
393 static void __exit mbcache_exit(void)
394 {
395 	kmem_cache_destroy(mb_entry_cache);
396 }
397 
398 module_init(mbcache_init)
399 module_exit(mbcache_exit)
400 
401 MODULE_AUTHOR("Jan Kara <jack@suse.cz>");
402 MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
403 MODULE_LICENSE("GPL");
404