xref: /openbmc/linux/mm/list_lru.c (revision 9fb29c73)
1 /*
2  * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
3  * Authors: David Chinner and Glauber Costa
4  *
5  * Generic LRU infrastructure
6  */
7 #include <linux/kernel.h>
8 #include <linux/module.h>
9 #include <linux/mm.h>
10 #include <linux/list_lru.h>
11 #include <linux/slab.h>
12 #include <linux/mutex.h>
13 #include <linux/memcontrol.h>
14 
15 #ifdef CONFIG_MEMCG_KMEM
16 static LIST_HEAD(list_lrus);
17 static DEFINE_MUTEX(list_lrus_mutex);
18 
19 static void list_lru_register(struct list_lru *lru)
20 {
21 	mutex_lock(&list_lrus_mutex);
22 	list_add(&lru->list, &list_lrus);
23 	mutex_unlock(&list_lrus_mutex);
24 }
25 
26 static void list_lru_unregister(struct list_lru *lru)
27 {
28 	mutex_lock(&list_lrus_mutex);
29 	list_del(&lru->list);
30 	mutex_unlock(&list_lrus_mutex);
31 }
32 
33 static int lru_shrinker_id(struct list_lru *lru)
34 {
35 	return lru->shrinker_id;
36 }
37 
38 static inline bool list_lru_memcg_aware(struct list_lru *lru)
39 {
40 	/*
41 	 * This needs node 0 to be always present, even
42 	 * in the systems supporting sparse numa ids.
43 	 */
44 	return !!lru->node[0].memcg_lrus;
45 }
46 
47 static inline struct list_lru_one *
48 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
49 {
50 	struct list_lru_memcg *memcg_lrus;
51 	/*
52 	 * Either lock or RCU protects the array of per cgroup lists
53 	 * from relocation (see memcg_update_list_lru_node).
54 	 */
55 	memcg_lrus = rcu_dereference_check(nlru->memcg_lrus,
56 					   lockdep_is_held(&nlru->lock));
57 	if (memcg_lrus && idx >= 0)
58 		return memcg_lrus->lru[idx];
59 	return &nlru->lru;
60 }
61 
62 static __always_inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr)
63 {
64 	struct page *page;
65 
66 	if (!memcg_kmem_enabled())
67 		return NULL;
68 	page = virt_to_head_page(ptr);
69 	return page->mem_cgroup;
70 }
71 
72 static inline struct list_lru_one *
73 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr,
74 		   struct mem_cgroup **memcg_ptr)
75 {
76 	struct list_lru_one *l = &nlru->lru;
77 	struct mem_cgroup *memcg = NULL;
78 
79 	if (!nlru->memcg_lrus)
80 		goto out;
81 
82 	memcg = mem_cgroup_from_kmem(ptr);
83 	if (!memcg)
84 		goto out;
85 
86 	l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
87 out:
88 	if (memcg_ptr)
89 		*memcg_ptr = memcg;
90 	return l;
91 }
92 #else
93 static void list_lru_register(struct list_lru *lru)
94 {
95 }
96 
97 static void list_lru_unregister(struct list_lru *lru)
98 {
99 }
100 
101 static int lru_shrinker_id(struct list_lru *lru)
102 {
103 	return -1;
104 }
105 
106 static inline bool list_lru_memcg_aware(struct list_lru *lru)
107 {
108 	return false;
109 }
110 
111 static inline struct list_lru_one *
112 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
113 {
114 	return &nlru->lru;
115 }
116 
117 static inline struct list_lru_one *
118 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr,
119 		   struct mem_cgroup **memcg_ptr)
120 {
121 	if (memcg_ptr)
122 		*memcg_ptr = NULL;
123 	return &nlru->lru;
124 }
125 #endif /* CONFIG_MEMCG_KMEM */
126 
127 bool list_lru_add(struct list_lru *lru, struct list_head *item)
128 {
129 	int nid = page_to_nid(virt_to_page(item));
130 	struct list_lru_node *nlru = &lru->node[nid];
131 	struct mem_cgroup *memcg;
132 	struct list_lru_one *l;
133 
134 	spin_lock(&nlru->lock);
135 	if (list_empty(item)) {
136 		l = list_lru_from_kmem(nlru, item, &memcg);
137 		list_add_tail(item, &l->list);
138 		/* Set shrinker bit if the first element was added */
139 		if (!l->nr_items++)
140 			memcg_set_shrinker_bit(memcg, nid,
141 					       lru_shrinker_id(lru));
142 		nlru->nr_items++;
143 		spin_unlock(&nlru->lock);
144 		return true;
145 	}
146 	spin_unlock(&nlru->lock);
147 	return false;
148 }
149 EXPORT_SYMBOL_GPL(list_lru_add);
150 
151 bool list_lru_del(struct list_lru *lru, struct list_head *item)
152 {
153 	int nid = page_to_nid(virt_to_page(item));
154 	struct list_lru_node *nlru = &lru->node[nid];
155 	struct list_lru_one *l;
156 
157 	spin_lock(&nlru->lock);
158 	if (!list_empty(item)) {
159 		l = list_lru_from_kmem(nlru, item, NULL);
160 		list_del_init(item);
161 		l->nr_items--;
162 		nlru->nr_items--;
163 		spin_unlock(&nlru->lock);
164 		return true;
165 	}
166 	spin_unlock(&nlru->lock);
167 	return false;
168 }
169 EXPORT_SYMBOL_GPL(list_lru_del);
170 
171 void list_lru_isolate(struct list_lru_one *list, struct list_head *item)
172 {
173 	list_del_init(item);
174 	list->nr_items--;
175 }
176 EXPORT_SYMBOL_GPL(list_lru_isolate);
177 
178 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
179 			   struct list_head *head)
180 {
181 	list_move(item, head);
182 	list->nr_items--;
183 }
184 EXPORT_SYMBOL_GPL(list_lru_isolate_move);
185 
186 unsigned long list_lru_count_one(struct list_lru *lru,
187 				 int nid, struct mem_cgroup *memcg)
188 {
189 	struct list_lru_node *nlru = &lru->node[nid];
190 	struct list_lru_one *l;
191 	unsigned long count;
192 
193 	rcu_read_lock();
194 	l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
195 	count = l->nr_items;
196 	rcu_read_unlock();
197 
198 	return count;
199 }
200 EXPORT_SYMBOL_GPL(list_lru_count_one);
201 
202 unsigned long list_lru_count_node(struct list_lru *lru, int nid)
203 {
204 	struct list_lru_node *nlru;
205 
206 	nlru = &lru->node[nid];
207 	return nlru->nr_items;
208 }
209 EXPORT_SYMBOL_GPL(list_lru_count_node);
210 
211 static unsigned long
212 __list_lru_walk_one(struct list_lru_node *nlru, int memcg_idx,
213 		    list_lru_walk_cb isolate, void *cb_arg,
214 		    unsigned long *nr_to_walk)
215 {
216 
217 	struct list_lru_one *l;
218 	struct list_head *item, *n;
219 	unsigned long isolated = 0;
220 
221 	l = list_lru_from_memcg_idx(nlru, memcg_idx);
222 restart:
223 	list_for_each_safe(item, n, &l->list) {
224 		enum lru_status ret;
225 
226 		/*
227 		 * decrement nr_to_walk first so that we don't livelock if we
228 		 * get stuck on large numbesr of LRU_RETRY items
229 		 */
230 		if (!*nr_to_walk)
231 			break;
232 		--*nr_to_walk;
233 
234 		ret = isolate(item, l, &nlru->lock, cb_arg);
235 		switch (ret) {
236 		case LRU_REMOVED_RETRY:
237 			assert_spin_locked(&nlru->lock);
238 			/* fall through */
239 		case LRU_REMOVED:
240 			isolated++;
241 			nlru->nr_items--;
242 			/*
243 			 * If the lru lock has been dropped, our list
244 			 * traversal is now invalid and so we have to
245 			 * restart from scratch.
246 			 */
247 			if (ret == LRU_REMOVED_RETRY)
248 				goto restart;
249 			break;
250 		case LRU_ROTATE:
251 			list_move_tail(item, &l->list);
252 			break;
253 		case LRU_SKIP:
254 			break;
255 		case LRU_RETRY:
256 			/*
257 			 * The lru lock has been dropped, our list traversal is
258 			 * now invalid and so we have to restart from scratch.
259 			 */
260 			assert_spin_locked(&nlru->lock);
261 			goto restart;
262 		default:
263 			BUG();
264 		}
265 	}
266 	return isolated;
267 }
268 
269 unsigned long
270 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
271 		  list_lru_walk_cb isolate, void *cb_arg,
272 		  unsigned long *nr_to_walk)
273 {
274 	struct list_lru_node *nlru = &lru->node[nid];
275 	unsigned long ret;
276 
277 	spin_lock(&nlru->lock);
278 	ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg,
279 				  nr_to_walk);
280 	spin_unlock(&nlru->lock);
281 	return ret;
282 }
283 EXPORT_SYMBOL_GPL(list_lru_walk_one);
284 
285 unsigned long
286 list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
287 		      list_lru_walk_cb isolate, void *cb_arg,
288 		      unsigned long *nr_to_walk)
289 {
290 	struct list_lru_node *nlru = &lru->node[nid];
291 	unsigned long ret;
292 
293 	spin_lock_irq(&nlru->lock);
294 	ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg,
295 				  nr_to_walk);
296 	spin_unlock_irq(&nlru->lock);
297 	return ret;
298 }
299 
300 unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
301 				 list_lru_walk_cb isolate, void *cb_arg,
302 				 unsigned long *nr_to_walk)
303 {
304 	long isolated = 0;
305 	int memcg_idx;
306 
307 	isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg,
308 				      nr_to_walk);
309 	if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
310 		for_each_memcg_cache_index(memcg_idx) {
311 			struct list_lru_node *nlru = &lru->node[nid];
312 
313 			spin_lock(&nlru->lock);
314 			isolated += __list_lru_walk_one(nlru, memcg_idx,
315 							isolate, cb_arg,
316 							nr_to_walk);
317 			spin_unlock(&nlru->lock);
318 
319 			if (*nr_to_walk <= 0)
320 				break;
321 		}
322 	}
323 	return isolated;
324 }
325 EXPORT_SYMBOL_GPL(list_lru_walk_node);
326 
327 static void init_one_lru(struct list_lru_one *l)
328 {
329 	INIT_LIST_HEAD(&l->list);
330 	l->nr_items = 0;
331 }
332 
333 #ifdef CONFIG_MEMCG_KMEM
334 static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus,
335 					  int begin, int end)
336 {
337 	int i;
338 
339 	for (i = begin; i < end; i++)
340 		kfree(memcg_lrus->lru[i]);
341 }
342 
343 static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus,
344 				      int begin, int end)
345 {
346 	int i;
347 
348 	for (i = begin; i < end; i++) {
349 		struct list_lru_one *l;
350 
351 		l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL);
352 		if (!l)
353 			goto fail;
354 
355 		init_one_lru(l);
356 		memcg_lrus->lru[i] = l;
357 	}
358 	return 0;
359 fail:
360 	__memcg_destroy_list_lru_node(memcg_lrus, begin, i - 1);
361 	return -ENOMEM;
362 }
363 
364 static int memcg_init_list_lru_node(struct list_lru_node *nlru)
365 {
366 	struct list_lru_memcg *memcg_lrus;
367 	int size = memcg_nr_cache_ids;
368 
369 	memcg_lrus = kvmalloc(sizeof(*memcg_lrus) +
370 			      size * sizeof(void *), GFP_KERNEL);
371 	if (!memcg_lrus)
372 		return -ENOMEM;
373 
374 	if (__memcg_init_list_lru_node(memcg_lrus, 0, size)) {
375 		kvfree(memcg_lrus);
376 		return -ENOMEM;
377 	}
378 	RCU_INIT_POINTER(nlru->memcg_lrus, memcg_lrus);
379 
380 	return 0;
381 }
382 
383 static void memcg_destroy_list_lru_node(struct list_lru_node *nlru)
384 {
385 	struct list_lru_memcg *memcg_lrus;
386 	/*
387 	 * This is called when shrinker has already been unregistered,
388 	 * and nobody can use it. So, there is no need to use kvfree_rcu().
389 	 */
390 	memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, true);
391 	__memcg_destroy_list_lru_node(memcg_lrus, 0, memcg_nr_cache_ids);
392 	kvfree(memcg_lrus);
393 }
394 
395 static void kvfree_rcu(struct rcu_head *head)
396 {
397 	struct list_lru_memcg *mlru;
398 
399 	mlru = container_of(head, struct list_lru_memcg, rcu);
400 	kvfree(mlru);
401 }
402 
403 static int memcg_update_list_lru_node(struct list_lru_node *nlru,
404 				      int old_size, int new_size)
405 {
406 	struct list_lru_memcg *old, *new;
407 
408 	BUG_ON(old_size > new_size);
409 
410 	old = rcu_dereference_protected(nlru->memcg_lrus,
411 					lockdep_is_held(&list_lrus_mutex));
412 	new = kvmalloc(sizeof(*new) + new_size * sizeof(void *), GFP_KERNEL);
413 	if (!new)
414 		return -ENOMEM;
415 
416 	if (__memcg_init_list_lru_node(new, old_size, new_size)) {
417 		kvfree(new);
418 		return -ENOMEM;
419 	}
420 
421 	memcpy(&new->lru, &old->lru, old_size * sizeof(void *));
422 
423 	/*
424 	 * The locking below allows readers that hold nlru->lock avoid taking
425 	 * rcu_read_lock (see list_lru_from_memcg_idx).
426 	 *
427 	 * Since list_lru_{add,del} may be called under an IRQ-safe lock,
428 	 * we have to use IRQ-safe primitives here to avoid deadlock.
429 	 */
430 	spin_lock_irq(&nlru->lock);
431 	rcu_assign_pointer(nlru->memcg_lrus, new);
432 	spin_unlock_irq(&nlru->lock);
433 
434 	call_rcu(&old->rcu, kvfree_rcu);
435 	return 0;
436 }
437 
438 static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru,
439 					      int old_size, int new_size)
440 {
441 	struct list_lru_memcg *memcg_lrus;
442 
443 	memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus,
444 					       lockdep_is_held(&list_lrus_mutex));
445 	/* do not bother shrinking the array back to the old size, because we
446 	 * cannot handle allocation failures here */
447 	__memcg_destroy_list_lru_node(memcg_lrus, old_size, new_size);
448 }
449 
450 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
451 {
452 	int i;
453 
454 	if (!memcg_aware)
455 		return 0;
456 
457 	for_each_node(i) {
458 		if (memcg_init_list_lru_node(&lru->node[i]))
459 			goto fail;
460 	}
461 	return 0;
462 fail:
463 	for (i = i - 1; i >= 0; i--) {
464 		if (!lru->node[i].memcg_lrus)
465 			continue;
466 		memcg_destroy_list_lru_node(&lru->node[i]);
467 	}
468 	return -ENOMEM;
469 }
470 
471 static void memcg_destroy_list_lru(struct list_lru *lru)
472 {
473 	int i;
474 
475 	if (!list_lru_memcg_aware(lru))
476 		return;
477 
478 	for_each_node(i)
479 		memcg_destroy_list_lru_node(&lru->node[i]);
480 }
481 
482 static int memcg_update_list_lru(struct list_lru *lru,
483 				 int old_size, int new_size)
484 {
485 	int i;
486 
487 	if (!list_lru_memcg_aware(lru))
488 		return 0;
489 
490 	for_each_node(i) {
491 		if (memcg_update_list_lru_node(&lru->node[i],
492 					       old_size, new_size))
493 			goto fail;
494 	}
495 	return 0;
496 fail:
497 	for (i = i - 1; i >= 0; i--) {
498 		if (!lru->node[i].memcg_lrus)
499 			continue;
500 
501 		memcg_cancel_update_list_lru_node(&lru->node[i],
502 						  old_size, new_size);
503 	}
504 	return -ENOMEM;
505 }
506 
507 static void memcg_cancel_update_list_lru(struct list_lru *lru,
508 					 int old_size, int new_size)
509 {
510 	int i;
511 
512 	if (!list_lru_memcg_aware(lru))
513 		return;
514 
515 	for_each_node(i)
516 		memcg_cancel_update_list_lru_node(&lru->node[i],
517 						  old_size, new_size);
518 }
519 
520 int memcg_update_all_list_lrus(int new_size)
521 {
522 	int ret = 0;
523 	struct list_lru *lru;
524 	int old_size = memcg_nr_cache_ids;
525 
526 	mutex_lock(&list_lrus_mutex);
527 	list_for_each_entry(lru, &list_lrus, list) {
528 		ret = memcg_update_list_lru(lru, old_size, new_size);
529 		if (ret)
530 			goto fail;
531 	}
532 out:
533 	mutex_unlock(&list_lrus_mutex);
534 	return ret;
535 fail:
536 	list_for_each_entry_continue_reverse(lru, &list_lrus, list)
537 		memcg_cancel_update_list_lru(lru, old_size, new_size);
538 	goto out;
539 }
540 
541 static void memcg_drain_list_lru_node(struct list_lru *lru, int nid,
542 				      int src_idx, struct mem_cgroup *dst_memcg)
543 {
544 	struct list_lru_node *nlru = &lru->node[nid];
545 	int dst_idx = dst_memcg->kmemcg_id;
546 	struct list_lru_one *src, *dst;
547 	bool set;
548 
549 	/*
550 	 * Since list_lru_{add,del} may be called under an IRQ-safe lock,
551 	 * we have to use IRQ-safe primitives here to avoid deadlock.
552 	 */
553 	spin_lock_irq(&nlru->lock);
554 
555 	src = list_lru_from_memcg_idx(nlru, src_idx);
556 	dst = list_lru_from_memcg_idx(nlru, dst_idx);
557 
558 	list_splice_init(&src->list, &dst->list);
559 	set = (!dst->nr_items && src->nr_items);
560 	dst->nr_items += src->nr_items;
561 	if (set)
562 		memcg_set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
563 	src->nr_items = 0;
564 
565 	spin_unlock_irq(&nlru->lock);
566 }
567 
568 static void memcg_drain_list_lru(struct list_lru *lru,
569 				 int src_idx, struct mem_cgroup *dst_memcg)
570 {
571 	int i;
572 
573 	if (!list_lru_memcg_aware(lru))
574 		return;
575 
576 	for_each_node(i)
577 		memcg_drain_list_lru_node(lru, i, src_idx, dst_memcg);
578 }
579 
580 void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg)
581 {
582 	struct list_lru *lru;
583 
584 	mutex_lock(&list_lrus_mutex);
585 	list_for_each_entry(lru, &list_lrus, list)
586 		memcg_drain_list_lru(lru, src_idx, dst_memcg);
587 	mutex_unlock(&list_lrus_mutex);
588 }
589 #else
590 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
591 {
592 	return 0;
593 }
594 
595 static void memcg_destroy_list_lru(struct list_lru *lru)
596 {
597 }
598 #endif /* CONFIG_MEMCG_KMEM */
599 
600 int __list_lru_init(struct list_lru *lru, bool memcg_aware,
601 		    struct lock_class_key *key, struct shrinker *shrinker)
602 {
603 	int i;
604 	size_t size = sizeof(*lru->node) * nr_node_ids;
605 	int err = -ENOMEM;
606 
607 #ifdef CONFIG_MEMCG_KMEM
608 	if (shrinker)
609 		lru->shrinker_id = shrinker->id;
610 	else
611 		lru->shrinker_id = -1;
612 #endif
613 	memcg_get_cache_ids();
614 
615 	lru->node = kzalloc(size, GFP_KERNEL);
616 	if (!lru->node)
617 		goto out;
618 
619 	for_each_node(i) {
620 		spin_lock_init(&lru->node[i].lock);
621 		if (key)
622 			lockdep_set_class(&lru->node[i].lock, key);
623 		init_one_lru(&lru->node[i].lru);
624 	}
625 
626 	err = memcg_init_list_lru(lru, memcg_aware);
627 	if (err) {
628 		kfree(lru->node);
629 		/* Do this so a list_lru_destroy() doesn't crash: */
630 		lru->node = NULL;
631 		goto out;
632 	}
633 
634 	list_lru_register(lru);
635 out:
636 	memcg_put_cache_ids();
637 	return err;
638 }
639 EXPORT_SYMBOL_GPL(__list_lru_init);
640 
641 void list_lru_destroy(struct list_lru *lru)
642 {
643 	/* Already destroyed or not yet initialized? */
644 	if (!lru->node)
645 		return;
646 
647 	memcg_get_cache_ids();
648 
649 	list_lru_unregister(lru);
650 
651 	memcg_destroy_list_lru(lru);
652 	kfree(lru->node);
653 	lru->node = NULL;
654 
655 #ifdef CONFIG_MEMCG_KMEM
656 	lru->shrinker_id = -1;
657 #endif
658 	memcg_put_cache_ids();
659 }
660 EXPORT_SYMBOL_GPL(list_lru_destroy);
661