xref: /openbmc/linux/mm/list_lru.c (revision 2427f03f)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
4  * Authors: David Chinner and Glauber Costa
5  *
6  * Generic LRU infrastructure
7  */
8 #include <linux/kernel.h>
9 #include <linux/module.h>
10 #include <linux/mm.h>
11 #include <linux/list_lru.h>
12 #include <linux/slab.h>
13 #include <linux/mutex.h>
14 #include <linux/memcontrol.h>
15 #include "slab.h"
16 
17 #ifdef CONFIG_MEMCG_KMEM
18 static LIST_HEAD(memcg_list_lrus);
19 static DEFINE_MUTEX(list_lrus_mutex);
20 
21 static inline bool list_lru_memcg_aware(struct list_lru *lru)
22 {
23 	return lru->memcg_aware;
24 }
25 
26 static void list_lru_register(struct list_lru *lru)
27 {
28 	if (!list_lru_memcg_aware(lru))
29 		return;
30 
31 	mutex_lock(&list_lrus_mutex);
32 	list_add(&lru->list, &memcg_list_lrus);
33 	mutex_unlock(&list_lrus_mutex);
34 }
35 
36 static void list_lru_unregister(struct list_lru *lru)
37 {
38 	if (!list_lru_memcg_aware(lru))
39 		return;
40 
41 	mutex_lock(&list_lrus_mutex);
42 	list_del(&lru->list);
43 	mutex_unlock(&list_lrus_mutex);
44 }
45 
46 static int lru_shrinker_id(struct list_lru *lru)
47 {
48 	return lru->shrinker_id;
49 }
50 
51 static inline struct list_lru_one *
52 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
53 {
54 	struct list_lru_memcg *memcg_lrus;
55 	/*
56 	 * Either lock or RCU protects the array of per cgroup lists
57 	 * from relocation (see memcg_update_list_lru_node).
58 	 */
59 	memcg_lrus = rcu_dereference_check(nlru->memcg_lrus,
60 					   lockdep_is_held(&nlru->lock));
61 	if (memcg_lrus && idx >= 0)
62 		return memcg_lrus->lru[idx];
63 	return &nlru->lru;
64 }
65 
66 static inline struct list_lru_one *
67 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr,
68 		   struct mem_cgroup **memcg_ptr)
69 {
70 	struct list_lru_one *l = &nlru->lru;
71 	struct mem_cgroup *memcg = NULL;
72 
73 	if (!nlru->memcg_lrus)
74 		goto out;
75 
76 	memcg = mem_cgroup_from_obj(ptr);
77 	if (!memcg)
78 		goto out;
79 
80 	l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
81 out:
82 	if (memcg_ptr)
83 		*memcg_ptr = memcg;
84 	return l;
85 }
86 #else
87 static void list_lru_register(struct list_lru *lru)
88 {
89 }
90 
91 static void list_lru_unregister(struct list_lru *lru)
92 {
93 }
94 
95 static int lru_shrinker_id(struct list_lru *lru)
96 {
97 	return -1;
98 }
99 
100 static inline bool list_lru_memcg_aware(struct list_lru *lru)
101 {
102 	return false;
103 }
104 
105 static inline struct list_lru_one *
106 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
107 {
108 	return &nlru->lru;
109 }
110 
111 static inline struct list_lru_one *
112 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr,
113 		   struct mem_cgroup **memcg_ptr)
114 {
115 	if (memcg_ptr)
116 		*memcg_ptr = NULL;
117 	return &nlru->lru;
118 }
119 #endif /* CONFIG_MEMCG_KMEM */
120 
121 bool list_lru_add(struct list_lru *lru, struct list_head *item)
122 {
123 	int nid = page_to_nid(virt_to_page(item));
124 	struct list_lru_node *nlru = &lru->node[nid];
125 	struct mem_cgroup *memcg;
126 	struct list_lru_one *l;
127 
128 	spin_lock(&nlru->lock);
129 	if (list_empty(item)) {
130 		l = list_lru_from_kmem(nlru, item, &memcg);
131 		list_add_tail(item, &l->list);
132 		/* Set shrinker bit if the first element was added */
133 		if (!l->nr_items++)
134 			set_shrinker_bit(memcg, nid,
135 					 lru_shrinker_id(lru));
136 		nlru->nr_items++;
137 		spin_unlock(&nlru->lock);
138 		return true;
139 	}
140 	spin_unlock(&nlru->lock);
141 	return false;
142 }
143 EXPORT_SYMBOL_GPL(list_lru_add);
144 
145 bool list_lru_del(struct list_lru *lru, struct list_head *item)
146 {
147 	int nid = page_to_nid(virt_to_page(item));
148 	struct list_lru_node *nlru = &lru->node[nid];
149 	struct list_lru_one *l;
150 
151 	spin_lock(&nlru->lock);
152 	if (!list_empty(item)) {
153 		l = list_lru_from_kmem(nlru, item, NULL);
154 		list_del_init(item);
155 		l->nr_items--;
156 		nlru->nr_items--;
157 		spin_unlock(&nlru->lock);
158 		return true;
159 	}
160 	spin_unlock(&nlru->lock);
161 	return false;
162 }
163 EXPORT_SYMBOL_GPL(list_lru_del);
164 
165 void list_lru_isolate(struct list_lru_one *list, struct list_head *item)
166 {
167 	list_del_init(item);
168 	list->nr_items--;
169 }
170 EXPORT_SYMBOL_GPL(list_lru_isolate);
171 
172 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
173 			   struct list_head *head)
174 {
175 	list_move(item, head);
176 	list->nr_items--;
177 }
178 EXPORT_SYMBOL_GPL(list_lru_isolate_move);
179 
180 unsigned long list_lru_count_one(struct list_lru *lru,
181 				 int nid, struct mem_cgroup *memcg)
182 {
183 	struct list_lru_node *nlru = &lru->node[nid];
184 	struct list_lru_one *l;
185 	long count;
186 
187 	rcu_read_lock();
188 	l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
189 	count = READ_ONCE(l->nr_items);
190 	rcu_read_unlock();
191 
192 	if (unlikely(count < 0))
193 		count = 0;
194 
195 	return count;
196 }
197 EXPORT_SYMBOL_GPL(list_lru_count_one);
198 
199 unsigned long list_lru_count_node(struct list_lru *lru, int nid)
200 {
201 	struct list_lru_node *nlru;
202 
203 	nlru = &lru->node[nid];
204 	return nlru->nr_items;
205 }
206 EXPORT_SYMBOL_GPL(list_lru_count_node);
207 
208 static unsigned long
209 __list_lru_walk_one(struct list_lru_node *nlru, int memcg_idx,
210 		    list_lru_walk_cb isolate, void *cb_arg,
211 		    unsigned long *nr_to_walk)
212 {
213 
214 	struct list_lru_one *l;
215 	struct list_head *item, *n;
216 	unsigned long isolated = 0;
217 
218 	l = list_lru_from_memcg_idx(nlru, memcg_idx);
219 restart:
220 	list_for_each_safe(item, n, &l->list) {
221 		enum lru_status ret;
222 
223 		/*
224 		 * decrement nr_to_walk first so that we don't livelock if we
225 		 * get stuck on large numbers of LRU_RETRY items
226 		 */
227 		if (!*nr_to_walk)
228 			break;
229 		--*nr_to_walk;
230 
231 		ret = isolate(item, l, &nlru->lock, cb_arg);
232 		switch (ret) {
233 		case LRU_REMOVED_RETRY:
234 			assert_spin_locked(&nlru->lock);
235 			fallthrough;
236 		case LRU_REMOVED:
237 			isolated++;
238 			nlru->nr_items--;
239 			/*
240 			 * If the lru lock has been dropped, our list
241 			 * traversal is now invalid and so we have to
242 			 * restart from scratch.
243 			 */
244 			if (ret == LRU_REMOVED_RETRY)
245 				goto restart;
246 			break;
247 		case LRU_ROTATE:
248 			list_move_tail(item, &l->list);
249 			break;
250 		case LRU_SKIP:
251 			break;
252 		case LRU_RETRY:
253 			/*
254 			 * The lru lock has been dropped, our list traversal is
255 			 * now invalid and so we have to restart from scratch.
256 			 */
257 			assert_spin_locked(&nlru->lock);
258 			goto restart;
259 		default:
260 			BUG();
261 		}
262 	}
263 	return isolated;
264 }
265 
266 unsigned long
267 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
268 		  list_lru_walk_cb isolate, void *cb_arg,
269 		  unsigned long *nr_to_walk)
270 {
271 	struct list_lru_node *nlru = &lru->node[nid];
272 	unsigned long ret;
273 
274 	spin_lock(&nlru->lock);
275 	ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg,
276 				  nr_to_walk);
277 	spin_unlock(&nlru->lock);
278 	return ret;
279 }
280 EXPORT_SYMBOL_GPL(list_lru_walk_one);
281 
282 unsigned long
283 list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
284 		      list_lru_walk_cb isolate, void *cb_arg,
285 		      unsigned long *nr_to_walk)
286 {
287 	struct list_lru_node *nlru = &lru->node[nid];
288 	unsigned long ret;
289 
290 	spin_lock_irq(&nlru->lock);
291 	ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg,
292 				  nr_to_walk);
293 	spin_unlock_irq(&nlru->lock);
294 	return ret;
295 }
296 
297 unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
298 				 list_lru_walk_cb isolate, void *cb_arg,
299 				 unsigned long *nr_to_walk)
300 {
301 	long isolated = 0;
302 	int memcg_idx;
303 
304 	isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg,
305 				      nr_to_walk);
306 	if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
307 		for_each_memcg_cache_index(memcg_idx) {
308 			struct list_lru_node *nlru = &lru->node[nid];
309 
310 			spin_lock(&nlru->lock);
311 			isolated += __list_lru_walk_one(nlru, memcg_idx,
312 							isolate, cb_arg,
313 							nr_to_walk);
314 			spin_unlock(&nlru->lock);
315 
316 			if (*nr_to_walk <= 0)
317 				break;
318 		}
319 	}
320 	return isolated;
321 }
322 EXPORT_SYMBOL_GPL(list_lru_walk_node);
323 
324 static void init_one_lru(struct list_lru_one *l)
325 {
326 	INIT_LIST_HEAD(&l->list);
327 	l->nr_items = 0;
328 }
329 
330 #ifdef CONFIG_MEMCG_KMEM
331 static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus,
332 					  int begin, int end)
333 {
334 	int i;
335 
336 	for (i = begin; i < end; i++)
337 		kfree(memcg_lrus->lru[i]);
338 }
339 
340 static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus,
341 				      int begin, int end)
342 {
343 	int i;
344 
345 	for (i = begin; i < end; i++) {
346 		struct list_lru_one *l;
347 
348 		l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL);
349 		if (!l)
350 			goto fail;
351 
352 		init_one_lru(l);
353 		memcg_lrus->lru[i] = l;
354 	}
355 	return 0;
356 fail:
357 	__memcg_destroy_list_lru_node(memcg_lrus, begin, i);
358 	return -ENOMEM;
359 }
360 
361 static int memcg_init_list_lru_node(struct list_lru_node *nlru)
362 {
363 	struct list_lru_memcg *memcg_lrus;
364 	int size = memcg_nr_cache_ids;
365 
366 	memcg_lrus = kvmalloc(struct_size(memcg_lrus, lru, size), GFP_KERNEL);
367 	if (!memcg_lrus)
368 		return -ENOMEM;
369 
370 	if (__memcg_init_list_lru_node(memcg_lrus, 0, size)) {
371 		kvfree(memcg_lrus);
372 		return -ENOMEM;
373 	}
374 	RCU_INIT_POINTER(nlru->memcg_lrus, memcg_lrus);
375 
376 	return 0;
377 }
378 
379 static void memcg_destroy_list_lru_node(struct list_lru_node *nlru)
380 {
381 	struct list_lru_memcg *memcg_lrus;
382 	/*
383 	 * This is called when shrinker has already been unregistered,
384 	 * and nobody can use it. So, there is no need to use kvfree_rcu().
385 	 */
386 	memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, true);
387 	__memcg_destroy_list_lru_node(memcg_lrus, 0, memcg_nr_cache_ids);
388 	kvfree(memcg_lrus);
389 }
390 
391 static int memcg_update_list_lru_node(struct list_lru_node *nlru,
392 				      int old_size, int new_size)
393 {
394 	struct list_lru_memcg *old, *new;
395 
396 	BUG_ON(old_size > new_size);
397 
398 	old = rcu_dereference_protected(nlru->memcg_lrus,
399 					lockdep_is_held(&list_lrus_mutex));
400 	new = kvmalloc(struct_size(new, lru, new_size), GFP_KERNEL);
401 	if (!new)
402 		return -ENOMEM;
403 
404 	if (__memcg_init_list_lru_node(new, old_size, new_size)) {
405 		kvfree(new);
406 		return -ENOMEM;
407 	}
408 
409 	memcpy(&new->lru, &old->lru, flex_array_size(new, lru, old_size));
410 	rcu_assign_pointer(nlru->memcg_lrus, new);
411 	kvfree_rcu(old, rcu);
412 	return 0;
413 }
414 
415 static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru,
416 					      int old_size, int new_size)
417 {
418 	struct list_lru_memcg *memcg_lrus;
419 
420 	memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus,
421 					       lockdep_is_held(&list_lrus_mutex));
422 	/* do not bother shrinking the array back to the old size, because we
423 	 * cannot handle allocation failures here */
424 	__memcg_destroy_list_lru_node(memcg_lrus, old_size, new_size);
425 }
426 
427 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
428 {
429 	int i;
430 
431 	lru->memcg_aware = memcg_aware;
432 
433 	if (!memcg_aware)
434 		return 0;
435 
436 	for_each_node(i) {
437 		if (memcg_init_list_lru_node(&lru->node[i]))
438 			goto fail;
439 	}
440 	return 0;
441 fail:
442 	for (i = i - 1; i >= 0; i--) {
443 		if (!lru->node[i].memcg_lrus)
444 			continue;
445 		memcg_destroy_list_lru_node(&lru->node[i]);
446 	}
447 	return -ENOMEM;
448 }
449 
450 static void memcg_destroy_list_lru(struct list_lru *lru)
451 {
452 	int i;
453 
454 	if (!list_lru_memcg_aware(lru))
455 		return;
456 
457 	for_each_node(i)
458 		memcg_destroy_list_lru_node(&lru->node[i]);
459 }
460 
461 static int memcg_update_list_lru(struct list_lru *lru,
462 				 int old_size, int new_size)
463 {
464 	int i;
465 
466 	for_each_node(i) {
467 		if (memcg_update_list_lru_node(&lru->node[i],
468 					       old_size, new_size))
469 			goto fail;
470 	}
471 	return 0;
472 fail:
473 	for (i = i - 1; i >= 0; i--) {
474 		if (!lru->node[i].memcg_lrus)
475 			continue;
476 
477 		memcg_cancel_update_list_lru_node(&lru->node[i],
478 						  old_size, new_size);
479 	}
480 	return -ENOMEM;
481 }
482 
483 static void memcg_cancel_update_list_lru(struct list_lru *lru,
484 					 int old_size, int new_size)
485 {
486 	int i;
487 
488 	for_each_node(i)
489 		memcg_cancel_update_list_lru_node(&lru->node[i],
490 						  old_size, new_size);
491 }
492 
493 int memcg_update_all_list_lrus(int new_size)
494 {
495 	int ret = 0;
496 	struct list_lru *lru;
497 	int old_size = memcg_nr_cache_ids;
498 
499 	mutex_lock(&list_lrus_mutex);
500 	list_for_each_entry(lru, &memcg_list_lrus, list) {
501 		ret = memcg_update_list_lru(lru, old_size, new_size);
502 		if (ret)
503 			goto fail;
504 	}
505 out:
506 	mutex_unlock(&list_lrus_mutex);
507 	return ret;
508 fail:
509 	list_for_each_entry_continue_reverse(lru, &memcg_list_lrus, list)
510 		memcg_cancel_update_list_lru(lru, old_size, new_size);
511 	goto out;
512 }
513 
514 static void memcg_drain_list_lru_node(struct list_lru *lru, int nid,
515 				      int src_idx, struct mem_cgroup *dst_memcg)
516 {
517 	struct list_lru_node *nlru = &lru->node[nid];
518 	int dst_idx = dst_memcg->kmemcg_id;
519 	struct list_lru_one *src, *dst;
520 
521 	/*
522 	 * Since list_lru_{add,del} may be called under an IRQ-safe lock,
523 	 * we have to use IRQ-safe primitives here to avoid deadlock.
524 	 */
525 	spin_lock_irq(&nlru->lock);
526 
527 	src = list_lru_from_memcg_idx(nlru, src_idx);
528 	dst = list_lru_from_memcg_idx(nlru, dst_idx);
529 
530 	list_splice_init(&src->list, &dst->list);
531 
532 	if (src->nr_items) {
533 		dst->nr_items += src->nr_items;
534 		set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
535 		src->nr_items = 0;
536 	}
537 
538 	spin_unlock_irq(&nlru->lock);
539 }
540 
541 static void memcg_drain_list_lru(struct list_lru *lru,
542 				 int src_idx, struct mem_cgroup *dst_memcg)
543 {
544 	int i;
545 
546 	for_each_node(i)
547 		memcg_drain_list_lru_node(lru, i, src_idx, dst_memcg);
548 }
549 
550 void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg)
551 {
552 	struct list_lru *lru;
553 
554 	mutex_lock(&list_lrus_mutex);
555 	list_for_each_entry(lru, &memcg_list_lrus, list)
556 		memcg_drain_list_lru(lru, src_idx, dst_memcg);
557 	mutex_unlock(&list_lrus_mutex);
558 }
559 #else
560 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
561 {
562 	return 0;
563 }
564 
565 static void memcg_destroy_list_lru(struct list_lru *lru)
566 {
567 }
568 #endif /* CONFIG_MEMCG_KMEM */
569 
570 int __list_lru_init(struct list_lru *lru, bool memcg_aware,
571 		    struct lock_class_key *key, struct shrinker *shrinker)
572 {
573 	int i;
574 	int err = -ENOMEM;
575 
576 #ifdef CONFIG_MEMCG_KMEM
577 	if (shrinker)
578 		lru->shrinker_id = shrinker->id;
579 	else
580 		lru->shrinker_id = -1;
581 #endif
582 	memcg_get_cache_ids();
583 
584 	lru->node = kcalloc(nr_node_ids, sizeof(*lru->node), GFP_KERNEL);
585 	if (!lru->node)
586 		goto out;
587 
588 	for_each_node(i) {
589 		spin_lock_init(&lru->node[i].lock);
590 		if (key)
591 			lockdep_set_class(&lru->node[i].lock, key);
592 		init_one_lru(&lru->node[i].lru);
593 	}
594 
595 	err = memcg_init_list_lru(lru, memcg_aware);
596 	if (err) {
597 		kfree(lru->node);
598 		/* Do this so a list_lru_destroy() doesn't crash: */
599 		lru->node = NULL;
600 		goto out;
601 	}
602 
603 	list_lru_register(lru);
604 out:
605 	memcg_put_cache_ids();
606 	return err;
607 }
608 EXPORT_SYMBOL_GPL(__list_lru_init);
609 
610 void list_lru_destroy(struct list_lru *lru)
611 {
612 	/* Already destroyed or not yet initialized? */
613 	if (!lru->node)
614 		return;
615 
616 	memcg_get_cache_ids();
617 
618 	list_lru_unregister(lru);
619 
620 	memcg_destroy_list_lru(lru);
621 	kfree(lru->node);
622 	lru->node = NULL;
623 
624 #ifdef CONFIG_MEMCG_KMEM
625 	lru->shrinker_id = -1;
626 #endif
627 	memcg_put_cache_ids();
628 }
629 EXPORT_SYMBOL_GPL(list_lru_destroy);
630