xref: /openbmc/linux/mm/list_lru.c (revision ae213c44)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
4  * Authors: David Chinner and Glauber Costa
5  *
6  * Generic LRU infrastructure
7  */
8 #include <linux/kernel.h>
9 #include <linux/module.h>
10 #include <linux/mm.h>
11 #include <linux/list_lru.h>
12 #include <linux/slab.h>
13 #include <linux/mutex.h>
14 #include <linux/memcontrol.h>
15 
16 #ifdef CONFIG_MEMCG_KMEM
17 static LIST_HEAD(list_lrus);
18 static DEFINE_MUTEX(list_lrus_mutex);
19 
20 static void list_lru_register(struct list_lru *lru)
21 {
22 	mutex_lock(&list_lrus_mutex);
23 	list_add(&lru->list, &list_lrus);
24 	mutex_unlock(&list_lrus_mutex);
25 }
26 
27 static void list_lru_unregister(struct list_lru *lru)
28 {
29 	mutex_lock(&list_lrus_mutex);
30 	list_del(&lru->list);
31 	mutex_unlock(&list_lrus_mutex);
32 }
33 
34 static int lru_shrinker_id(struct list_lru *lru)
35 {
36 	return lru->shrinker_id;
37 }
38 
39 static inline bool list_lru_memcg_aware(struct list_lru *lru)
40 {
41 	/*
42 	 * This needs node 0 to be always present, even
43 	 * in the systems supporting sparse numa ids.
44 	 */
45 	return !!lru->node[0].memcg_lrus;
46 }
47 
48 static inline struct list_lru_one *
49 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
50 {
51 	struct list_lru_memcg *memcg_lrus;
52 	/*
53 	 * Either lock or RCU protects the array of per cgroup lists
54 	 * from relocation (see memcg_update_list_lru_node).
55 	 */
56 	memcg_lrus = rcu_dereference_check(nlru->memcg_lrus,
57 					   lockdep_is_held(&nlru->lock));
58 	if (memcg_lrus && idx >= 0)
59 		return memcg_lrus->lru[idx];
60 	return &nlru->lru;
61 }
62 
63 static __always_inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr)
64 {
65 	struct page *page;
66 
67 	if (!memcg_kmem_enabled())
68 		return NULL;
69 	page = virt_to_head_page(ptr);
70 	return page->mem_cgroup;
71 }
72 
73 static inline struct list_lru_one *
74 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr,
75 		   struct mem_cgroup **memcg_ptr)
76 {
77 	struct list_lru_one *l = &nlru->lru;
78 	struct mem_cgroup *memcg = NULL;
79 
80 	if (!nlru->memcg_lrus)
81 		goto out;
82 
83 	memcg = mem_cgroup_from_kmem(ptr);
84 	if (!memcg)
85 		goto out;
86 
87 	l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
88 out:
89 	if (memcg_ptr)
90 		*memcg_ptr = memcg;
91 	return l;
92 }
93 #else
94 static void list_lru_register(struct list_lru *lru)
95 {
96 }
97 
98 static void list_lru_unregister(struct list_lru *lru)
99 {
100 }
101 
102 static int lru_shrinker_id(struct list_lru *lru)
103 {
104 	return -1;
105 }
106 
107 static inline bool list_lru_memcg_aware(struct list_lru *lru)
108 {
109 	return false;
110 }
111 
112 static inline struct list_lru_one *
113 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
114 {
115 	return &nlru->lru;
116 }
117 
118 static inline struct list_lru_one *
119 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr,
120 		   struct mem_cgroup **memcg_ptr)
121 {
122 	if (memcg_ptr)
123 		*memcg_ptr = NULL;
124 	return &nlru->lru;
125 }
126 #endif /* CONFIG_MEMCG_KMEM */
127 
128 bool list_lru_add(struct list_lru *lru, struct list_head *item)
129 {
130 	int nid = page_to_nid(virt_to_page(item));
131 	struct list_lru_node *nlru = &lru->node[nid];
132 	struct mem_cgroup *memcg;
133 	struct list_lru_one *l;
134 
135 	spin_lock(&nlru->lock);
136 	if (list_empty(item)) {
137 		l = list_lru_from_kmem(nlru, item, &memcg);
138 		list_add_tail(item, &l->list);
139 		/* Set shrinker bit if the first element was added */
140 		if (!l->nr_items++)
141 			memcg_set_shrinker_bit(memcg, nid,
142 					       lru_shrinker_id(lru));
143 		nlru->nr_items++;
144 		spin_unlock(&nlru->lock);
145 		return true;
146 	}
147 	spin_unlock(&nlru->lock);
148 	return false;
149 }
150 EXPORT_SYMBOL_GPL(list_lru_add);
151 
152 bool list_lru_del(struct list_lru *lru, struct list_head *item)
153 {
154 	int nid = page_to_nid(virt_to_page(item));
155 	struct list_lru_node *nlru = &lru->node[nid];
156 	struct list_lru_one *l;
157 
158 	spin_lock(&nlru->lock);
159 	if (!list_empty(item)) {
160 		l = list_lru_from_kmem(nlru, item, NULL);
161 		list_del_init(item);
162 		l->nr_items--;
163 		nlru->nr_items--;
164 		spin_unlock(&nlru->lock);
165 		return true;
166 	}
167 	spin_unlock(&nlru->lock);
168 	return false;
169 }
170 EXPORT_SYMBOL_GPL(list_lru_del);
171 
172 void list_lru_isolate(struct list_lru_one *list, struct list_head *item)
173 {
174 	list_del_init(item);
175 	list->nr_items--;
176 }
177 EXPORT_SYMBOL_GPL(list_lru_isolate);
178 
179 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
180 			   struct list_head *head)
181 {
182 	list_move(item, head);
183 	list->nr_items--;
184 }
185 EXPORT_SYMBOL_GPL(list_lru_isolate_move);
186 
187 unsigned long list_lru_count_one(struct list_lru *lru,
188 				 int nid, struct mem_cgroup *memcg)
189 {
190 	struct list_lru_node *nlru = &lru->node[nid];
191 	struct list_lru_one *l;
192 	unsigned long count;
193 
194 	rcu_read_lock();
195 	l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
196 	count = l->nr_items;
197 	rcu_read_unlock();
198 
199 	return count;
200 }
201 EXPORT_SYMBOL_GPL(list_lru_count_one);
202 
203 unsigned long list_lru_count_node(struct list_lru *lru, int nid)
204 {
205 	struct list_lru_node *nlru;
206 
207 	nlru = &lru->node[nid];
208 	return nlru->nr_items;
209 }
210 EXPORT_SYMBOL_GPL(list_lru_count_node);
211 
212 static unsigned long
213 __list_lru_walk_one(struct list_lru_node *nlru, int memcg_idx,
214 		    list_lru_walk_cb isolate, void *cb_arg,
215 		    unsigned long *nr_to_walk)
216 {
217 
218 	struct list_lru_one *l;
219 	struct list_head *item, *n;
220 	unsigned long isolated = 0;
221 
222 	l = list_lru_from_memcg_idx(nlru, memcg_idx);
223 restart:
224 	list_for_each_safe(item, n, &l->list) {
225 		enum lru_status ret;
226 
227 		/*
228 		 * decrement nr_to_walk first so that we don't livelock if we
229 		 * get stuck on large numbesr of LRU_RETRY items
230 		 */
231 		if (!*nr_to_walk)
232 			break;
233 		--*nr_to_walk;
234 
235 		ret = isolate(item, l, &nlru->lock, cb_arg);
236 		switch (ret) {
237 		case LRU_REMOVED_RETRY:
238 			assert_spin_locked(&nlru->lock);
239 			/* fall through */
240 		case LRU_REMOVED:
241 			isolated++;
242 			nlru->nr_items--;
243 			/*
244 			 * If the lru lock has been dropped, our list
245 			 * traversal is now invalid and so we have to
246 			 * restart from scratch.
247 			 */
248 			if (ret == LRU_REMOVED_RETRY)
249 				goto restart;
250 			break;
251 		case LRU_ROTATE:
252 			list_move_tail(item, &l->list);
253 			break;
254 		case LRU_SKIP:
255 			break;
256 		case LRU_RETRY:
257 			/*
258 			 * The lru lock has been dropped, our list traversal is
259 			 * now invalid and so we have to restart from scratch.
260 			 */
261 			assert_spin_locked(&nlru->lock);
262 			goto restart;
263 		default:
264 			BUG();
265 		}
266 	}
267 	return isolated;
268 }
269 
270 unsigned long
271 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
272 		  list_lru_walk_cb isolate, void *cb_arg,
273 		  unsigned long *nr_to_walk)
274 {
275 	struct list_lru_node *nlru = &lru->node[nid];
276 	unsigned long ret;
277 
278 	spin_lock(&nlru->lock);
279 	ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg,
280 				  nr_to_walk);
281 	spin_unlock(&nlru->lock);
282 	return ret;
283 }
284 EXPORT_SYMBOL_GPL(list_lru_walk_one);
285 
286 unsigned long
287 list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
288 		      list_lru_walk_cb isolate, void *cb_arg,
289 		      unsigned long *nr_to_walk)
290 {
291 	struct list_lru_node *nlru = &lru->node[nid];
292 	unsigned long ret;
293 
294 	spin_lock_irq(&nlru->lock);
295 	ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg,
296 				  nr_to_walk);
297 	spin_unlock_irq(&nlru->lock);
298 	return ret;
299 }
300 
301 unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
302 				 list_lru_walk_cb isolate, void *cb_arg,
303 				 unsigned long *nr_to_walk)
304 {
305 	long isolated = 0;
306 	int memcg_idx;
307 
308 	isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg,
309 				      nr_to_walk);
310 	if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
311 		for_each_memcg_cache_index(memcg_idx) {
312 			struct list_lru_node *nlru = &lru->node[nid];
313 
314 			spin_lock(&nlru->lock);
315 			isolated += __list_lru_walk_one(nlru, memcg_idx,
316 							isolate, cb_arg,
317 							nr_to_walk);
318 			spin_unlock(&nlru->lock);
319 
320 			if (*nr_to_walk <= 0)
321 				break;
322 		}
323 	}
324 	return isolated;
325 }
326 EXPORT_SYMBOL_GPL(list_lru_walk_node);
327 
328 static void init_one_lru(struct list_lru_one *l)
329 {
330 	INIT_LIST_HEAD(&l->list);
331 	l->nr_items = 0;
332 }
333 
334 #ifdef CONFIG_MEMCG_KMEM
335 static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus,
336 					  int begin, int end)
337 {
338 	int i;
339 
340 	for (i = begin; i < end; i++)
341 		kfree(memcg_lrus->lru[i]);
342 }
343 
344 static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus,
345 				      int begin, int end)
346 {
347 	int i;
348 
349 	for (i = begin; i < end; i++) {
350 		struct list_lru_one *l;
351 
352 		l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL);
353 		if (!l)
354 			goto fail;
355 
356 		init_one_lru(l);
357 		memcg_lrus->lru[i] = l;
358 	}
359 	return 0;
360 fail:
361 	__memcg_destroy_list_lru_node(memcg_lrus, begin, i - 1);
362 	return -ENOMEM;
363 }
364 
365 static int memcg_init_list_lru_node(struct list_lru_node *nlru)
366 {
367 	struct list_lru_memcg *memcg_lrus;
368 	int size = memcg_nr_cache_ids;
369 
370 	memcg_lrus = kvmalloc(sizeof(*memcg_lrus) +
371 			      size * sizeof(void *), GFP_KERNEL);
372 	if (!memcg_lrus)
373 		return -ENOMEM;
374 
375 	if (__memcg_init_list_lru_node(memcg_lrus, 0, size)) {
376 		kvfree(memcg_lrus);
377 		return -ENOMEM;
378 	}
379 	RCU_INIT_POINTER(nlru->memcg_lrus, memcg_lrus);
380 
381 	return 0;
382 }
383 
384 static void memcg_destroy_list_lru_node(struct list_lru_node *nlru)
385 {
386 	struct list_lru_memcg *memcg_lrus;
387 	/*
388 	 * This is called when shrinker has already been unregistered,
389 	 * and nobody can use it. So, there is no need to use kvfree_rcu().
390 	 */
391 	memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, true);
392 	__memcg_destroy_list_lru_node(memcg_lrus, 0, memcg_nr_cache_ids);
393 	kvfree(memcg_lrus);
394 }
395 
396 static void kvfree_rcu(struct rcu_head *head)
397 {
398 	struct list_lru_memcg *mlru;
399 
400 	mlru = container_of(head, struct list_lru_memcg, rcu);
401 	kvfree(mlru);
402 }
403 
404 static int memcg_update_list_lru_node(struct list_lru_node *nlru,
405 				      int old_size, int new_size)
406 {
407 	struct list_lru_memcg *old, *new;
408 
409 	BUG_ON(old_size > new_size);
410 
411 	old = rcu_dereference_protected(nlru->memcg_lrus,
412 					lockdep_is_held(&list_lrus_mutex));
413 	new = kvmalloc(sizeof(*new) + new_size * sizeof(void *), GFP_KERNEL);
414 	if (!new)
415 		return -ENOMEM;
416 
417 	if (__memcg_init_list_lru_node(new, old_size, new_size)) {
418 		kvfree(new);
419 		return -ENOMEM;
420 	}
421 
422 	memcpy(&new->lru, &old->lru, old_size * sizeof(void *));
423 
424 	/*
425 	 * The locking below allows readers that hold nlru->lock avoid taking
426 	 * rcu_read_lock (see list_lru_from_memcg_idx).
427 	 *
428 	 * Since list_lru_{add,del} may be called under an IRQ-safe lock,
429 	 * we have to use IRQ-safe primitives here to avoid deadlock.
430 	 */
431 	spin_lock_irq(&nlru->lock);
432 	rcu_assign_pointer(nlru->memcg_lrus, new);
433 	spin_unlock_irq(&nlru->lock);
434 
435 	call_rcu(&old->rcu, kvfree_rcu);
436 	return 0;
437 }
438 
439 static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru,
440 					      int old_size, int new_size)
441 {
442 	struct list_lru_memcg *memcg_lrus;
443 
444 	memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus,
445 					       lockdep_is_held(&list_lrus_mutex));
446 	/* do not bother shrinking the array back to the old size, because we
447 	 * cannot handle allocation failures here */
448 	__memcg_destroy_list_lru_node(memcg_lrus, old_size, new_size);
449 }
450 
451 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
452 {
453 	int i;
454 
455 	if (!memcg_aware)
456 		return 0;
457 
458 	for_each_node(i) {
459 		if (memcg_init_list_lru_node(&lru->node[i]))
460 			goto fail;
461 	}
462 	return 0;
463 fail:
464 	for (i = i - 1; i >= 0; i--) {
465 		if (!lru->node[i].memcg_lrus)
466 			continue;
467 		memcg_destroy_list_lru_node(&lru->node[i]);
468 	}
469 	return -ENOMEM;
470 }
471 
472 static void memcg_destroy_list_lru(struct list_lru *lru)
473 {
474 	int i;
475 
476 	if (!list_lru_memcg_aware(lru))
477 		return;
478 
479 	for_each_node(i)
480 		memcg_destroy_list_lru_node(&lru->node[i]);
481 }
482 
483 static int memcg_update_list_lru(struct list_lru *lru,
484 				 int old_size, int new_size)
485 {
486 	int i;
487 
488 	if (!list_lru_memcg_aware(lru))
489 		return 0;
490 
491 	for_each_node(i) {
492 		if (memcg_update_list_lru_node(&lru->node[i],
493 					       old_size, new_size))
494 			goto fail;
495 	}
496 	return 0;
497 fail:
498 	for (i = i - 1; i >= 0; i--) {
499 		if (!lru->node[i].memcg_lrus)
500 			continue;
501 
502 		memcg_cancel_update_list_lru_node(&lru->node[i],
503 						  old_size, new_size);
504 	}
505 	return -ENOMEM;
506 }
507 
508 static void memcg_cancel_update_list_lru(struct list_lru *lru,
509 					 int old_size, int new_size)
510 {
511 	int i;
512 
513 	if (!list_lru_memcg_aware(lru))
514 		return;
515 
516 	for_each_node(i)
517 		memcg_cancel_update_list_lru_node(&lru->node[i],
518 						  old_size, new_size);
519 }
520 
521 int memcg_update_all_list_lrus(int new_size)
522 {
523 	int ret = 0;
524 	struct list_lru *lru;
525 	int old_size = memcg_nr_cache_ids;
526 
527 	mutex_lock(&list_lrus_mutex);
528 	list_for_each_entry(lru, &list_lrus, list) {
529 		ret = memcg_update_list_lru(lru, old_size, new_size);
530 		if (ret)
531 			goto fail;
532 	}
533 out:
534 	mutex_unlock(&list_lrus_mutex);
535 	return ret;
536 fail:
537 	list_for_each_entry_continue_reverse(lru, &list_lrus, list)
538 		memcg_cancel_update_list_lru(lru, old_size, new_size);
539 	goto out;
540 }
541 
542 static void memcg_drain_list_lru_node(struct list_lru *lru, int nid,
543 				      int src_idx, struct mem_cgroup *dst_memcg)
544 {
545 	struct list_lru_node *nlru = &lru->node[nid];
546 	int dst_idx = dst_memcg->kmemcg_id;
547 	struct list_lru_one *src, *dst;
548 	bool set;
549 
550 	/*
551 	 * Since list_lru_{add,del} may be called under an IRQ-safe lock,
552 	 * we have to use IRQ-safe primitives here to avoid deadlock.
553 	 */
554 	spin_lock_irq(&nlru->lock);
555 
556 	src = list_lru_from_memcg_idx(nlru, src_idx);
557 	dst = list_lru_from_memcg_idx(nlru, dst_idx);
558 
559 	list_splice_init(&src->list, &dst->list);
560 	set = (!dst->nr_items && src->nr_items);
561 	dst->nr_items += src->nr_items;
562 	if (set)
563 		memcg_set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
564 	src->nr_items = 0;
565 
566 	spin_unlock_irq(&nlru->lock);
567 }
568 
569 static void memcg_drain_list_lru(struct list_lru *lru,
570 				 int src_idx, struct mem_cgroup *dst_memcg)
571 {
572 	int i;
573 
574 	if (!list_lru_memcg_aware(lru))
575 		return;
576 
577 	for_each_node(i)
578 		memcg_drain_list_lru_node(lru, i, src_idx, dst_memcg);
579 }
580 
581 void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg)
582 {
583 	struct list_lru *lru;
584 
585 	mutex_lock(&list_lrus_mutex);
586 	list_for_each_entry(lru, &list_lrus, list)
587 		memcg_drain_list_lru(lru, src_idx, dst_memcg);
588 	mutex_unlock(&list_lrus_mutex);
589 }
590 #else
591 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
592 {
593 	return 0;
594 }
595 
596 static void memcg_destroy_list_lru(struct list_lru *lru)
597 {
598 }
599 #endif /* CONFIG_MEMCG_KMEM */
600 
601 int __list_lru_init(struct list_lru *lru, bool memcg_aware,
602 		    struct lock_class_key *key, struct shrinker *shrinker)
603 {
604 	int i;
605 	int err = -ENOMEM;
606 
607 #ifdef CONFIG_MEMCG_KMEM
608 	if (shrinker)
609 		lru->shrinker_id = shrinker->id;
610 	else
611 		lru->shrinker_id = -1;
612 #endif
613 	memcg_get_cache_ids();
614 
615 	lru->node = kcalloc(nr_node_ids, sizeof(*lru->node), GFP_KERNEL);
616 	if (!lru->node)
617 		goto out;
618 
619 	for_each_node(i) {
620 		spin_lock_init(&lru->node[i].lock);
621 		if (key)
622 			lockdep_set_class(&lru->node[i].lock, key);
623 		init_one_lru(&lru->node[i].lru);
624 	}
625 
626 	err = memcg_init_list_lru(lru, memcg_aware);
627 	if (err) {
628 		kfree(lru->node);
629 		/* Do this so a list_lru_destroy() doesn't crash: */
630 		lru->node = NULL;
631 		goto out;
632 	}
633 
634 	list_lru_register(lru);
635 out:
636 	memcg_put_cache_ids();
637 	return err;
638 }
639 EXPORT_SYMBOL_GPL(__list_lru_init);
640 
641 void list_lru_destroy(struct list_lru *lru)
642 {
643 	/* Already destroyed or not yet initialized? */
644 	if (!lru->node)
645 		return;
646 
647 	memcg_get_cache_ids();
648 
649 	list_lru_unregister(lru);
650 
651 	memcg_destroy_list_lru(lru);
652 	kfree(lru->node);
653 	lru->node = NULL;
654 
655 #ifdef CONFIG_MEMCG_KMEM
656 	lru->shrinker_id = -1;
657 #endif
658 	memcg_put_cache_ids();
659 }
660 EXPORT_SYMBOL_GPL(list_lru_destroy);
661