xref: /openbmc/linux/mm/slab_common.c (revision 160b8e75)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Slab allocator functions that are independent of the allocator strategy
4  *
5  * (C) 2012 Christoph Lameter <cl@linux.com>
6  */
7 #include <linux/slab.h>
8 
9 #include <linux/mm.h>
10 #include <linux/poison.h>
11 #include <linux/interrupt.h>
12 #include <linux/memory.h>
13 #include <linux/compiler.h>
14 #include <linux/module.h>
15 #include <linux/cpu.h>
16 #include <linux/uaccess.h>
17 #include <linux/seq_file.h>
18 #include <linux/proc_fs.h>
19 #include <asm/cacheflush.h>
20 #include <asm/tlbflush.h>
21 #include <asm/page.h>
22 #include <linux/memcontrol.h>
23 
24 #define CREATE_TRACE_POINTS
25 #include <trace/events/kmem.h>
26 
27 #include "slab.h"
28 
29 enum slab_state slab_state;
30 LIST_HEAD(slab_caches);
31 DEFINE_MUTEX(slab_mutex);
32 struct kmem_cache *kmem_cache;
33 
34 #ifdef CONFIG_HARDENED_USERCOPY
35 bool usercopy_fallback __ro_after_init =
36 		IS_ENABLED(CONFIG_HARDENED_USERCOPY_FALLBACK);
37 module_param(usercopy_fallback, bool, 0400);
38 MODULE_PARM_DESC(usercopy_fallback,
39 		"WARN instead of reject usercopy whitelist violations");
40 #endif
41 
42 static LIST_HEAD(slab_caches_to_rcu_destroy);
43 static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work);
44 static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
45 		    slab_caches_to_rcu_destroy_workfn);
46 
47 /*
48  * Set of flags that will prevent slab merging
49  */
50 #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
51 		SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \
52 		SLAB_FAILSLAB | SLAB_KASAN)
53 
54 #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
55 			 SLAB_ACCOUNT)
56 
57 /*
58  * Merge control. If this is set then no merging of slab caches will occur.
59  */
60 static bool slab_nomerge = !IS_ENABLED(CONFIG_SLAB_MERGE_DEFAULT);
61 
62 static int __init setup_slab_nomerge(char *str)
63 {
64 	slab_nomerge = true;
65 	return 1;
66 }
67 
68 #ifdef CONFIG_SLUB
69 __setup_param("slub_nomerge", slub_nomerge, setup_slab_nomerge, 0);
70 #endif
71 
72 __setup("slab_nomerge", setup_slab_nomerge);
73 
74 /*
75  * Determine the size of a slab object
76  */
77 unsigned int kmem_cache_size(struct kmem_cache *s)
78 {
79 	return s->object_size;
80 }
81 EXPORT_SYMBOL(kmem_cache_size);
82 
83 #ifdef CONFIG_DEBUG_VM
84 static int kmem_cache_sanity_check(const char *name, size_t size)
85 {
86 	struct kmem_cache *s = NULL;
87 
88 	if (!name || in_interrupt() || size < sizeof(void *) ||
89 		size > KMALLOC_MAX_SIZE) {
90 		pr_err("kmem_cache_create(%s) integrity check failed\n", name);
91 		return -EINVAL;
92 	}
93 
94 	list_for_each_entry(s, &slab_caches, list) {
95 		char tmp;
96 		int res;
97 
98 		/*
99 		 * This happens when the module gets unloaded and doesn't
100 		 * destroy its slab cache and no-one else reuses the vmalloc
101 		 * area of the module.  Print a warning.
102 		 */
103 		res = probe_kernel_address(s->name, tmp);
104 		if (res) {
105 			pr_err("Slab cache with size %d has lost its name\n",
106 			       s->object_size);
107 			continue;
108 		}
109 	}
110 
111 	WARN_ON(strchr(name, ' '));	/* It confuses parsers */
112 	return 0;
113 }
114 #else
115 static inline int kmem_cache_sanity_check(const char *name, size_t size)
116 {
117 	return 0;
118 }
119 #endif
120 
121 void __kmem_cache_free_bulk(struct kmem_cache *s, size_t nr, void **p)
122 {
123 	size_t i;
124 
125 	for (i = 0; i < nr; i++) {
126 		if (s)
127 			kmem_cache_free(s, p[i]);
128 		else
129 			kfree(p[i]);
130 	}
131 }
132 
133 int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
134 								void **p)
135 {
136 	size_t i;
137 
138 	for (i = 0; i < nr; i++) {
139 		void *x = p[i] = kmem_cache_alloc(s, flags);
140 		if (!x) {
141 			__kmem_cache_free_bulk(s, i, p);
142 			return 0;
143 		}
144 	}
145 	return i;
146 }
147 
148 #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
149 
150 LIST_HEAD(slab_root_caches);
151 
152 void slab_init_memcg_params(struct kmem_cache *s)
153 {
154 	s->memcg_params.root_cache = NULL;
155 	RCU_INIT_POINTER(s->memcg_params.memcg_caches, NULL);
156 	INIT_LIST_HEAD(&s->memcg_params.children);
157 }
158 
159 static int init_memcg_params(struct kmem_cache *s,
160 		struct mem_cgroup *memcg, struct kmem_cache *root_cache)
161 {
162 	struct memcg_cache_array *arr;
163 
164 	if (root_cache) {
165 		s->memcg_params.root_cache = root_cache;
166 		s->memcg_params.memcg = memcg;
167 		INIT_LIST_HEAD(&s->memcg_params.children_node);
168 		INIT_LIST_HEAD(&s->memcg_params.kmem_caches_node);
169 		return 0;
170 	}
171 
172 	slab_init_memcg_params(s);
173 
174 	if (!memcg_nr_cache_ids)
175 		return 0;
176 
177 	arr = kvzalloc(sizeof(struct memcg_cache_array) +
178 		       memcg_nr_cache_ids * sizeof(void *),
179 		       GFP_KERNEL);
180 	if (!arr)
181 		return -ENOMEM;
182 
183 	RCU_INIT_POINTER(s->memcg_params.memcg_caches, arr);
184 	return 0;
185 }
186 
187 static void destroy_memcg_params(struct kmem_cache *s)
188 {
189 	if (is_root_cache(s))
190 		kvfree(rcu_access_pointer(s->memcg_params.memcg_caches));
191 }
192 
193 static void free_memcg_params(struct rcu_head *rcu)
194 {
195 	struct memcg_cache_array *old;
196 
197 	old = container_of(rcu, struct memcg_cache_array, rcu);
198 	kvfree(old);
199 }
200 
201 static int update_memcg_params(struct kmem_cache *s, int new_array_size)
202 {
203 	struct memcg_cache_array *old, *new;
204 
205 	new = kvzalloc(sizeof(struct memcg_cache_array) +
206 		       new_array_size * sizeof(void *), GFP_KERNEL);
207 	if (!new)
208 		return -ENOMEM;
209 
210 	old = rcu_dereference_protected(s->memcg_params.memcg_caches,
211 					lockdep_is_held(&slab_mutex));
212 	if (old)
213 		memcpy(new->entries, old->entries,
214 		       memcg_nr_cache_ids * sizeof(void *));
215 
216 	rcu_assign_pointer(s->memcg_params.memcg_caches, new);
217 	if (old)
218 		call_rcu(&old->rcu, free_memcg_params);
219 	return 0;
220 }
221 
222 int memcg_update_all_caches(int num_memcgs)
223 {
224 	struct kmem_cache *s;
225 	int ret = 0;
226 
227 	mutex_lock(&slab_mutex);
228 	list_for_each_entry(s, &slab_root_caches, root_caches_node) {
229 		ret = update_memcg_params(s, num_memcgs);
230 		/*
231 		 * Instead of freeing the memory, we'll just leave the caches
232 		 * up to this point in an updated state.
233 		 */
234 		if (ret)
235 			break;
236 	}
237 	mutex_unlock(&slab_mutex);
238 	return ret;
239 }
240 
241 void memcg_link_cache(struct kmem_cache *s)
242 {
243 	if (is_root_cache(s)) {
244 		list_add(&s->root_caches_node, &slab_root_caches);
245 	} else {
246 		list_add(&s->memcg_params.children_node,
247 			 &s->memcg_params.root_cache->memcg_params.children);
248 		list_add(&s->memcg_params.kmem_caches_node,
249 			 &s->memcg_params.memcg->kmem_caches);
250 	}
251 }
252 
253 static void memcg_unlink_cache(struct kmem_cache *s)
254 {
255 	if (is_root_cache(s)) {
256 		list_del(&s->root_caches_node);
257 	} else {
258 		list_del(&s->memcg_params.children_node);
259 		list_del(&s->memcg_params.kmem_caches_node);
260 	}
261 }
262 #else
263 static inline int init_memcg_params(struct kmem_cache *s,
264 		struct mem_cgroup *memcg, struct kmem_cache *root_cache)
265 {
266 	return 0;
267 }
268 
269 static inline void destroy_memcg_params(struct kmem_cache *s)
270 {
271 }
272 
273 static inline void memcg_unlink_cache(struct kmem_cache *s)
274 {
275 }
276 #endif /* CONFIG_MEMCG && !CONFIG_SLOB */
277 
278 /*
279  * Figure out what the alignment of the objects will be given a set of
280  * flags, a user specified alignment and the size of the objects.
281  */
282 static unsigned long calculate_alignment(unsigned long flags,
283 		unsigned long align, unsigned long size)
284 {
285 	/*
286 	 * If the user wants hardware cache aligned objects then follow that
287 	 * suggestion if the object is sufficiently large.
288 	 *
289 	 * The hardware cache alignment cannot override the specified
290 	 * alignment though. If that is greater then use it.
291 	 */
292 	if (flags & SLAB_HWCACHE_ALIGN) {
293 		unsigned long ralign;
294 
295 		ralign = cache_line_size();
296 		while (size <= ralign / 2)
297 			ralign /= 2;
298 		align = max(align, ralign);
299 	}
300 
301 	if (align < ARCH_SLAB_MINALIGN)
302 		align = ARCH_SLAB_MINALIGN;
303 
304 	return ALIGN(align, sizeof(void *));
305 }
306 
307 /*
308  * Find a mergeable slab cache
309  */
310 int slab_unmergeable(struct kmem_cache *s)
311 {
312 	if (slab_nomerge || (s->flags & SLAB_NEVER_MERGE))
313 		return 1;
314 
315 	if (!is_root_cache(s))
316 		return 1;
317 
318 	if (s->ctor)
319 		return 1;
320 
321 	if (s->usersize)
322 		return 1;
323 
324 	/*
325 	 * We may have set a slab to be unmergeable during bootstrap.
326 	 */
327 	if (s->refcount < 0)
328 		return 1;
329 
330 	return 0;
331 }
332 
333 struct kmem_cache *find_mergeable(size_t size, size_t align,
334 		slab_flags_t flags, const char *name, void (*ctor)(void *))
335 {
336 	struct kmem_cache *s;
337 
338 	if (slab_nomerge)
339 		return NULL;
340 
341 	if (ctor)
342 		return NULL;
343 
344 	size = ALIGN(size, sizeof(void *));
345 	align = calculate_alignment(flags, align, size);
346 	size = ALIGN(size, align);
347 	flags = kmem_cache_flags(size, flags, name, NULL);
348 
349 	if (flags & SLAB_NEVER_MERGE)
350 		return NULL;
351 
352 	list_for_each_entry_reverse(s, &slab_root_caches, root_caches_node) {
353 		if (slab_unmergeable(s))
354 			continue;
355 
356 		if (size > s->size)
357 			continue;
358 
359 		if ((flags & SLAB_MERGE_SAME) != (s->flags & SLAB_MERGE_SAME))
360 			continue;
361 		/*
362 		 * Check if alignment is compatible.
363 		 * Courtesy of Adrian Drzewiecki
364 		 */
365 		if ((s->size & ~(align - 1)) != s->size)
366 			continue;
367 
368 		if (s->size - size >= sizeof(void *))
369 			continue;
370 
371 		if (IS_ENABLED(CONFIG_SLAB) && align &&
372 			(align > s->align || s->align % align))
373 			continue;
374 
375 		return s;
376 	}
377 	return NULL;
378 }
379 
380 static struct kmem_cache *create_cache(const char *name,
381 		size_t object_size, size_t size, size_t align,
382 		slab_flags_t flags, size_t useroffset,
383 		size_t usersize, void (*ctor)(void *),
384 		struct mem_cgroup *memcg, struct kmem_cache *root_cache)
385 {
386 	struct kmem_cache *s;
387 	int err;
388 
389 	if (WARN_ON(useroffset + usersize > object_size))
390 		useroffset = usersize = 0;
391 
392 	err = -ENOMEM;
393 	s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
394 	if (!s)
395 		goto out;
396 
397 	s->name = name;
398 	s->object_size = object_size;
399 	s->size = size;
400 	s->align = align;
401 	s->ctor = ctor;
402 	s->useroffset = useroffset;
403 	s->usersize = usersize;
404 
405 	err = init_memcg_params(s, memcg, root_cache);
406 	if (err)
407 		goto out_free_cache;
408 
409 	err = __kmem_cache_create(s, flags);
410 	if (err)
411 		goto out_free_cache;
412 
413 	s->refcount = 1;
414 	list_add(&s->list, &slab_caches);
415 	memcg_link_cache(s);
416 out:
417 	if (err)
418 		return ERR_PTR(err);
419 	return s;
420 
421 out_free_cache:
422 	destroy_memcg_params(s);
423 	kmem_cache_free(kmem_cache, s);
424 	goto out;
425 }
426 
427 /*
428  * kmem_cache_create_usercopy - Create a cache.
429  * @name: A string which is used in /proc/slabinfo to identify this cache.
430  * @size: The size of objects to be created in this cache.
431  * @align: The required alignment for the objects.
432  * @flags: SLAB flags
433  * @useroffset: Usercopy region offset
434  * @usersize: Usercopy region size
435  * @ctor: A constructor for the objects.
436  *
437  * Returns a ptr to the cache on success, NULL on failure.
438  * Cannot be called within a interrupt, but can be interrupted.
439  * The @ctor is run when new pages are allocated by the cache.
440  *
441  * The flags are
442  *
443  * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
444  * to catch references to uninitialised memory.
445  *
446  * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
447  * for buffer overruns.
448  *
449  * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
450  * cacheline.  This can be beneficial if you're counting cycles as closely
451  * as davem.
452  */
453 struct kmem_cache *
454 kmem_cache_create_usercopy(const char *name, size_t size, size_t align,
455 		  slab_flags_t flags, size_t useroffset, size_t usersize,
456 		  void (*ctor)(void *))
457 {
458 	struct kmem_cache *s = NULL;
459 	const char *cache_name;
460 	int err;
461 
462 	get_online_cpus();
463 	get_online_mems();
464 	memcg_get_cache_ids();
465 
466 	mutex_lock(&slab_mutex);
467 
468 	err = kmem_cache_sanity_check(name, size);
469 	if (err) {
470 		goto out_unlock;
471 	}
472 
473 	/* Refuse requests with allocator specific flags */
474 	if (flags & ~SLAB_FLAGS_PERMITTED) {
475 		err = -EINVAL;
476 		goto out_unlock;
477 	}
478 
479 	/*
480 	 * Some allocators will constraint the set of valid flags to a subset
481 	 * of all flags. We expect them to define CACHE_CREATE_MASK in this
482 	 * case, and we'll just provide them with a sanitized version of the
483 	 * passed flags.
484 	 */
485 	flags &= CACHE_CREATE_MASK;
486 
487 	/* Fail closed on bad usersize of useroffset values. */
488 	if (WARN_ON(!usersize && useroffset) ||
489 	    WARN_ON(size < usersize || size - usersize < useroffset))
490 		usersize = useroffset = 0;
491 
492 	if (!usersize)
493 		s = __kmem_cache_alias(name, size, align, flags, ctor);
494 	if (s)
495 		goto out_unlock;
496 
497 	cache_name = kstrdup_const(name, GFP_KERNEL);
498 	if (!cache_name) {
499 		err = -ENOMEM;
500 		goto out_unlock;
501 	}
502 
503 	s = create_cache(cache_name, size, size,
504 			 calculate_alignment(flags, align, size),
505 			 flags, useroffset, usersize, ctor, NULL, NULL);
506 	if (IS_ERR(s)) {
507 		err = PTR_ERR(s);
508 		kfree_const(cache_name);
509 	}
510 
511 out_unlock:
512 	mutex_unlock(&slab_mutex);
513 
514 	memcg_put_cache_ids();
515 	put_online_mems();
516 	put_online_cpus();
517 
518 	if (err) {
519 		if (flags & SLAB_PANIC)
520 			panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n",
521 				name, err);
522 		else {
523 			pr_warn("kmem_cache_create(%s) failed with error %d\n",
524 				name, err);
525 			dump_stack();
526 		}
527 		return NULL;
528 	}
529 	return s;
530 }
531 EXPORT_SYMBOL(kmem_cache_create_usercopy);
532 
533 struct kmem_cache *
534 kmem_cache_create(const char *name, size_t size, size_t align,
535 		slab_flags_t flags, void (*ctor)(void *))
536 {
537 	return kmem_cache_create_usercopy(name, size, align, flags, 0, 0,
538 					  ctor);
539 }
540 EXPORT_SYMBOL(kmem_cache_create);
541 
542 static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
543 {
544 	LIST_HEAD(to_destroy);
545 	struct kmem_cache *s, *s2;
546 
547 	/*
548 	 * On destruction, SLAB_TYPESAFE_BY_RCU kmem_caches are put on the
549 	 * @slab_caches_to_rcu_destroy list.  The slab pages are freed
550 	 * through RCU and and the associated kmem_cache are dereferenced
551 	 * while freeing the pages, so the kmem_caches should be freed only
552 	 * after the pending RCU operations are finished.  As rcu_barrier()
553 	 * is a pretty slow operation, we batch all pending destructions
554 	 * asynchronously.
555 	 */
556 	mutex_lock(&slab_mutex);
557 	list_splice_init(&slab_caches_to_rcu_destroy, &to_destroy);
558 	mutex_unlock(&slab_mutex);
559 
560 	if (list_empty(&to_destroy))
561 		return;
562 
563 	rcu_barrier();
564 
565 	list_for_each_entry_safe(s, s2, &to_destroy, list) {
566 #ifdef SLAB_SUPPORTS_SYSFS
567 		sysfs_slab_release(s);
568 #else
569 		slab_kmem_cache_release(s);
570 #endif
571 	}
572 }
573 
574 static int shutdown_cache(struct kmem_cache *s)
575 {
576 	/* free asan quarantined objects */
577 	kasan_cache_shutdown(s);
578 
579 	if (__kmem_cache_shutdown(s) != 0)
580 		return -EBUSY;
581 
582 	memcg_unlink_cache(s);
583 	list_del(&s->list);
584 
585 	if (s->flags & SLAB_TYPESAFE_BY_RCU) {
586 		list_add_tail(&s->list, &slab_caches_to_rcu_destroy);
587 		schedule_work(&slab_caches_to_rcu_destroy_work);
588 	} else {
589 #ifdef SLAB_SUPPORTS_SYSFS
590 		sysfs_slab_release(s);
591 #else
592 		slab_kmem_cache_release(s);
593 #endif
594 	}
595 
596 	return 0;
597 }
598 
599 #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
600 /*
601  * memcg_create_kmem_cache - Create a cache for a memory cgroup.
602  * @memcg: The memory cgroup the new cache is for.
603  * @root_cache: The parent of the new cache.
604  *
605  * This function attempts to create a kmem cache that will serve allocation
606  * requests going from @memcg to @root_cache. The new cache inherits properties
607  * from its parent.
608  */
609 void memcg_create_kmem_cache(struct mem_cgroup *memcg,
610 			     struct kmem_cache *root_cache)
611 {
612 	static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */
613 	struct cgroup_subsys_state *css = &memcg->css;
614 	struct memcg_cache_array *arr;
615 	struct kmem_cache *s = NULL;
616 	char *cache_name;
617 	int idx;
618 
619 	get_online_cpus();
620 	get_online_mems();
621 
622 	mutex_lock(&slab_mutex);
623 
624 	/*
625 	 * The memory cgroup could have been offlined while the cache
626 	 * creation work was pending.
627 	 */
628 	if (memcg->kmem_state != KMEM_ONLINE)
629 		goto out_unlock;
630 
631 	idx = memcg_cache_id(memcg);
632 	arr = rcu_dereference_protected(root_cache->memcg_params.memcg_caches,
633 					lockdep_is_held(&slab_mutex));
634 
635 	/*
636 	 * Since per-memcg caches are created asynchronously on first
637 	 * allocation (see memcg_kmem_get_cache()), several threads can try to
638 	 * create the same cache, but only one of them may succeed.
639 	 */
640 	if (arr->entries[idx])
641 		goto out_unlock;
642 
643 	cgroup_name(css->cgroup, memcg_name_buf, sizeof(memcg_name_buf));
644 	cache_name = kasprintf(GFP_KERNEL, "%s(%llu:%s)", root_cache->name,
645 			       css->serial_nr, memcg_name_buf);
646 	if (!cache_name)
647 		goto out_unlock;
648 
649 	s = create_cache(cache_name, root_cache->object_size,
650 			 root_cache->size, root_cache->align,
651 			 root_cache->flags & CACHE_CREATE_MASK,
652 			 root_cache->useroffset, root_cache->usersize,
653 			 root_cache->ctor, memcg, root_cache);
654 	/*
655 	 * If we could not create a memcg cache, do not complain, because
656 	 * that's not critical at all as we can always proceed with the root
657 	 * cache.
658 	 */
659 	if (IS_ERR(s)) {
660 		kfree(cache_name);
661 		goto out_unlock;
662 	}
663 
664 	/*
665 	 * Since readers won't lock (see cache_from_memcg_idx()), we need a
666 	 * barrier here to ensure nobody will see the kmem_cache partially
667 	 * initialized.
668 	 */
669 	smp_wmb();
670 	arr->entries[idx] = s;
671 
672 out_unlock:
673 	mutex_unlock(&slab_mutex);
674 
675 	put_online_mems();
676 	put_online_cpus();
677 }
678 
679 static void kmemcg_deactivate_workfn(struct work_struct *work)
680 {
681 	struct kmem_cache *s = container_of(work, struct kmem_cache,
682 					    memcg_params.deact_work);
683 
684 	get_online_cpus();
685 	get_online_mems();
686 
687 	mutex_lock(&slab_mutex);
688 
689 	s->memcg_params.deact_fn(s);
690 
691 	mutex_unlock(&slab_mutex);
692 
693 	put_online_mems();
694 	put_online_cpus();
695 
696 	/* done, put the ref from slab_deactivate_memcg_cache_rcu_sched() */
697 	css_put(&s->memcg_params.memcg->css);
698 }
699 
700 static void kmemcg_deactivate_rcufn(struct rcu_head *head)
701 {
702 	struct kmem_cache *s = container_of(head, struct kmem_cache,
703 					    memcg_params.deact_rcu_head);
704 
705 	/*
706 	 * We need to grab blocking locks.  Bounce to ->deact_work.  The
707 	 * work item shares the space with the RCU head and can't be
708 	 * initialized eariler.
709 	 */
710 	INIT_WORK(&s->memcg_params.deact_work, kmemcg_deactivate_workfn);
711 	queue_work(memcg_kmem_cache_wq, &s->memcg_params.deact_work);
712 }
713 
714 /**
715  * slab_deactivate_memcg_cache_rcu_sched - schedule deactivation after a
716  *					   sched RCU grace period
717  * @s: target kmem_cache
718  * @deact_fn: deactivation function to call
719  *
720  * Schedule @deact_fn to be invoked with online cpus, mems and slab_mutex
721  * held after a sched RCU grace period.  The slab is guaranteed to stay
722  * alive until @deact_fn is finished.  This is to be used from
723  * __kmemcg_cache_deactivate().
724  */
725 void slab_deactivate_memcg_cache_rcu_sched(struct kmem_cache *s,
726 					   void (*deact_fn)(struct kmem_cache *))
727 {
728 	if (WARN_ON_ONCE(is_root_cache(s)) ||
729 	    WARN_ON_ONCE(s->memcg_params.deact_fn))
730 		return;
731 
732 	/* pin memcg so that @s doesn't get destroyed in the middle */
733 	css_get(&s->memcg_params.memcg->css);
734 
735 	s->memcg_params.deact_fn = deact_fn;
736 	call_rcu_sched(&s->memcg_params.deact_rcu_head, kmemcg_deactivate_rcufn);
737 }
738 
739 void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
740 {
741 	int idx;
742 	struct memcg_cache_array *arr;
743 	struct kmem_cache *s, *c;
744 
745 	idx = memcg_cache_id(memcg);
746 
747 	get_online_cpus();
748 	get_online_mems();
749 
750 	mutex_lock(&slab_mutex);
751 	list_for_each_entry(s, &slab_root_caches, root_caches_node) {
752 		arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
753 						lockdep_is_held(&slab_mutex));
754 		c = arr->entries[idx];
755 		if (!c)
756 			continue;
757 
758 		__kmemcg_cache_deactivate(c);
759 		arr->entries[idx] = NULL;
760 	}
761 	mutex_unlock(&slab_mutex);
762 
763 	put_online_mems();
764 	put_online_cpus();
765 }
766 
767 void memcg_destroy_kmem_caches(struct mem_cgroup *memcg)
768 {
769 	struct kmem_cache *s, *s2;
770 
771 	get_online_cpus();
772 	get_online_mems();
773 
774 	mutex_lock(&slab_mutex);
775 	list_for_each_entry_safe(s, s2, &memcg->kmem_caches,
776 				 memcg_params.kmem_caches_node) {
777 		/*
778 		 * The cgroup is about to be freed and therefore has no charges
779 		 * left. Hence, all its caches must be empty by now.
780 		 */
781 		BUG_ON(shutdown_cache(s));
782 	}
783 	mutex_unlock(&slab_mutex);
784 
785 	put_online_mems();
786 	put_online_cpus();
787 }
788 
789 static int shutdown_memcg_caches(struct kmem_cache *s)
790 {
791 	struct memcg_cache_array *arr;
792 	struct kmem_cache *c, *c2;
793 	LIST_HEAD(busy);
794 	int i;
795 
796 	BUG_ON(!is_root_cache(s));
797 
798 	/*
799 	 * First, shutdown active caches, i.e. caches that belong to online
800 	 * memory cgroups.
801 	 */
802 	arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
803 					lockdep_is_held(&slab_mutex));
804 	for_each_memcg_cache_index(i) {
805 		c = arr->entries[i];
806 		if (!c)
807 			continue;
808 		if (shutdown_cache(c))
809 			/*
810 			 * The cache still has objects. Move it to a temporary
811 			 * list so as not to try to destroy it for a second
812 			 * time while iterating over inactive caches below.
813 			 */
814 			list_move(&c->memcg_params.children_node, &busy);
815 		else
816 			/*
817 			 * The cache is empty and will be destroyed soon. Clear
818 			 * the pointer to it in the memcg_caches array so that
819 			 * it will never be accessed even if the root cache
820 			 * stays alive.
821 			 */
822 			arr->entries[i] = NULL;
823 	}
824 
825 	/*
826 	 * Second, shutdown all caches left from memory cgroups that are now
827 	 * offline.
828 	 */
829 	list_for_each_entry_safe(c, c2, &s->memcg_params.children,
830 				 memcg_params.children_node)
831 		shutdown_cache(c);
832 
833 	list_splice(&busy, &s->memcg_params.children);
834 
835 	/*
836 	 * A cache being destroyed must be empty. In particular, this means
837 	 * that all per memcg caches attached to it must be empty too.
838 	 */
839 	if (!list_empty(&s->memcg_params.children))
840 		return -EBUSY;
841 	return 0;
842 }
843 #else
844 static inline int shutdown_memcg_caches(struct kmem_cache *s)
845 {
846 	return 0;
847 }
848 #endif /* CONFIG_MEMCG && !CONFIG_SLOB */
849 
850 void slab_kmem_cache_release(struct kmem_cache *s)
851 {
852 	__kmem_cache_release(s);
853 	destroy_memcg_params(s);
854 	kfree_const(s->name);
855 	kmem_cache_free(kmem_cache, s);
856 }
857 
858 void kmem_cache_destroy(struct kmem_cache *s)
859 {
860 	int err;
861 
862 	if (unlikely(!s))
863 		return;
864 
865 	get_online_cpus();
866 	get_online_mems();
867 
868 	mutex_lock(&slab_mutex);
869 
870 	s->refcount--;
871 	if (s->refcount)
872 		goto out_unlock;
873 
874 	err = shutdown_memcg_caches(s);
875 	if (!err)
876 		err = shutdown_cache(s);
877 
878 	if (err) {
879 		pr_err("kmem_cache_destroy %s: Slab cache still has objects\n",
880 		       s->name);
881 		dump_stack();
882 	}
883 out_unlock:
884 	mutex_unlock(&slab_mutex);
885 
886 	put_online_mems();
887 	put_online_cpus();
888 }
889 EXPORT_SYMBOL(kmem_cache_destroy);
890 
891 /**
892  * kmem_cache_shrink - Shrink a cache.
893  * @cachep: The cache to shrink.
894  *
895  * Releases as many slabs as possible for a cache.
896  * To help debugging, a zero exit status indicates all slabs were released.
897  */
898 int kmem_cache_shrink(struct kmem_cache *cachep)
899 {
900 	int ret;
901 
902 	get_online_cpus();
903 	get_online_mems();
904 	kasan_cache_shrink(cachep);
905 	ret = __kmem_cache_shrink(cachep);
906 	put_online_mems();
907 	put_online_cpus();
908 	return ret;
909 }
910 EXPORT_SYMBOL(kmem_cache_shrink);
911 
912 bool slab_is_available(void)
913 {
914 	return slab_state >= UP;
915 }
916 
917 #ifndef CONFIG_SLOB
918 /* Create a cache during boot when no slab services are available yet */
919 void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size,
920 		slab_flags_t flags, size_t useroffset, size_t usersize)
921 {
922 	int err;
923 
924 	s->name = name;
925 	s->size = s->object_size = size;
926 	s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
927 	s->useroffset = useroffset;
928 	s->usersize = usersize;
929 
930 	slab_init_memcg_params(s);
931 
932 	err = __kmem_cache_create(s, flags);
933 
934 	if (err)
935 		panic("Creation of kmalloc slab %s size=%zu failed. Reason %d\n",
936 					name, size, err);
937 
938 	s->refcount = -1;	/* Exempt from merging for now */
939 }
940 
941 struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size,
942 				slab_flags_t flags, size_t useroffset,
943 				size_t usersize)
944 {
945 	struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
946 
947 	if (!s)
948 		panic("Out of memory when creating slab %s\n", name);
949 
950 	create_boot_cache(s, name, size, flags, useroffset, usersize);
951 	list_add(&s->list, &slab_caches);
952 	memcg_link_cache(s);
953 	s->refcount = 1;
954 	return s;
955 }
956 
957 struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1];
958 EXPORT_SYMBOL(kmalloc_caches);
959 
960 #ifdef CONFIG_ZONE_DMA
961 struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1];
962 EXPORT_SYMBOL(kmalloc_dma_caches);
963 #endif
964 
965 /*
966  * Conversion table for small slabs sizes / 8 to the index in the
967  * kmalloc array. This is necessary for slabs < 192 since we have non power
968  * of two cache sizes there. The size of larger slabs can be determined using
969  * fls.
970  */
971 static s8 size_index[24] = {
972 	3,	/* 8 */
973 	4,	/* 16 */
974 	5,	/* 24 */
975 	5,	/* 32 */
976 	6,	/* 40 */
977 	6,	/* 48 */
978 	6,	/* 56 */
979 	6,	/* 64 */
980 	1,	/* 72 */
981 	1,	/* 80 */
982 	1,	/* 88 */
983 	1,	/* 96 */
984 	7,	/* 104 */
985 	7,	/* 112 */
986 	7,	/* 120 */
987 	7,	/* 128 */
988 	2,	/* 136 */
989 	2,	/* 144 */
990 	2,	/* 152 */
991 	2,	/* 160 */
992 	2,	/* 168 */
993 	2,	/* 176 */
994 	2,	/* 184 */
995 	2	/* 192 */
996 };
997 
998 static inline int size_index_elem(size_t bytes)
999 {
1000 	return (bytes - 1) / 8;
1001 }
1002 
1003 /*
1004  * Find the kmem_cache structure that serves a given size of
1005  * allocation
1006  */
1007 struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
1008 {
1009 	int index;
1010 
1011 	if (unlikely(size > KMALLOC_MAX_SIZE)) {
1012 		WARN_ON_ONCE(!(flags & __GFP_NOWARN));
1013 		return NULL;
1014 	}
1015 
1016 	if (size <= 192) {
1017 		if (!size)
1018 			return ZERO_SIZE_PTR;
1019 
1020 		index = size_index[size_index_elem(size)];
1021 	} else
1022 		index = fls(size - 1);
1023 
1024 #ifdef CONFIG_ZONE_DMA
1025 	if (unlikely((flags & GFP_DMA)))
1026 		return kmalloc_dma_caches[index];
1027 
1028 #endif
1029 	return kmalloc_caches[index];
1030 }
1031 
1032 /*
1033  * kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time.
1034  * kmalloc_index() supports up to 2^26=64MB, so the final entry of the table is
1035  * kmalloc-67108864.
1036  */
1037 const struct kmalloc_info_struct kmalloc_info[] __initconst = {
1038 	{NULL,                      0},		{"kmalloc-96",             96},
1039 	{"kmalloc-192",           192},		{"kmalloc-8",               8},
1040 	{"kmalloc-16",             16},		{"kmalloc-32",             32},
1041 	{"kmalloc-64",             64},		{"kmalloc-128",           128},
1042 	{"kmalloc-256",           256},		{"kmalloc-512",           512},
1043 	{"kmalloc-1024",         1024},		{"kmalloc-2048",         2048},
1044 	{"kmalloc-4096",         4096},		{"kmalloc-8192",         8192},
1045 	{"kmalloc-16384",       16384},		{"kmalloc-32768",       32768},
1046 	{"kmalloc-65536",       65536},		{"kmalloc-131072",     131072},
1047 	{"kmalloc-262144",     262144},		{"kmalloc-524288",     524288},
1048 	{"kmalloc-1048576",   1048576},		{"kmalloc-2097152",   2097152},
1049 	{"kmalloc-4194304",   4194304},		{"kmalloc-8388608",   8388608},
1050 	{"kmalloc-16777216", 16777216},		{"kmalloc-33554432", 33554432},
1051 	{"kmalloc-67108864", 67108864}
1052 };
1053 
1054 /*
1055  * Patch up the size_index table if we have strange large alignment
1056  * requirements for the kmalloc array. This is only the case for
1057  * MIPS it seems. The standard arches will not generate any code here.
1058  *
1059  * Largest permitted alignment is 256 bytes due to the way we
1060  * handle the index determination for the smaller caches.
1061  *
1062  * Make sure that nothing crazy happens if someone starts tinkering
1063  * around with ARCH_KMALLOC_MINALIGN
1064  */
1065 void __init setup_kmalloc_cache_index_table(void)
1066 {
1067 	int i;
1068 
1069 	BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
1070 		(KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
1071 
1072 	for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
1073 		int elem = size_index_elem(i);
1074 
1075 		if (elem >= ARRAY_SIZE(size_index))
1076 			break;
1077 		size_index[elem] = KMALLOC_SHIFT_LOW;
1078 	}
1079 
1080 	if (KMALLOC_MIN_SIZE >= 64) {
1081 		/*
1082 		 * The 96 byte size cache is not used if the alignment
1083 		 * is 64 byte.
1084 		 */
1085 		for (i = 64 + 8; i <= 96; i += 8)
1086 			size_index[size_index_elem(i)] = 7;
1087 
1088 	}
1089 
1090 	if (KMALLOC_MIN_SIZE >= 128) {
1091 		/*
1092 		 * The 192 byte sized cache is not used if the alignment
1093 		 * is 128 byte. Redirect kmalloc to use the 256 byte cache
1094 		 * instead.
1095 		 */
1096 		for (i = 128 + 8; i <= 192; i += 8)
1097 			size_index[size_index_elem(i)] = 8;
1098 	}
1099 }
1100 
1101 static void __init new_kmalloc_cache(int idx, slab_flags_t flags)
1102 {
1103 	kmalloc_caches[idx] = create_kmalloc_cache(kmalloc_info[idx].name,
1104 					kmalloc_info[idx].size, flags, 0,
1105 					kmalloc_info[idx].size);
1106 }
1107 
1108 /*
1109  * Create the kmalloc array. Some of the regular kmalloc arrays
1110  * may already have been created because they were needed to
1111  * enable allocations for slab creation.
1112  */
1113 void __init create_kmalloc_caches(slab_flags_t flags)
1114 {
1115 	int i;
1116 
1117 	for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
1118 		if (!kmalloc_caches[i])
1119 			new_kmalloc_cache(i, flags);
1120 
1121 		/*
1122 		 * Caches that are not of the two-to-the-power-of size.
1123 		 * These have to be created immediately after the
1124 		 * earlier power of two caches
1125 		 */
1126 		if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1] && i == 6)
1127 			new_kmalloc_cache(1, flags);
1128 		if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2] && i == 7)
1129 			new_kmalloc_cache(2, flags);
1130 	}
1131 
1132 	/* Kmalloc array is now usable */
1133 	slab_state = UP;
1134 
1135 #ifdef CONFIG_ZONE_DMA
1136 	for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) {
1137 		struct kmem_cache *s = kmalloc_caches[i];
1138 
1139 		if (s) {
1140 			int size = kmalloc_size(i);
1141 			char *n = kasprintf(GFP_NOWAIT,
1142 				 "dma-kmalloc-%d", size);
1143 
1144 			BUG_ON(!n);
1145 			kmalloc_dma_caches[i] = create_kmalloc_cache(n,
1146 				size, SLAB_CACHE_DMA | flags, 0, 0);
1147 		}
1148 	}
1149 #endif
1150 }
1151 #endif /* !CONFIG_SLOB */
1152 
1153 /*
1154  * To avoid unnecessary overhead, we pass through large allocation requests
1155  * directly to the page allocator. We use __GFP_COMP, because we will need to
1156  * know the allocation order to free the pages properly in kfree.
1157  */
1158 void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
1159 {
1160 	void *ret;
1161 	struct page *page;
1162 
1163 	flags |= __GFP_COMP;
1164 	page = alloc_pages(flags, order);
1165 	ret = page ? page_address(page) : NULL;
1166 	kmemleak_alloc(ret, size, 1, flags);
1167 	kasan_kmalloc_large(ret, size, flags);
1168 	return ret;
1169 }
1170 EXPORT_SYMBOL(kmalloc_order);
1171 
1172 #ifdef CONFIG_TRACING
1173 void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
1174 {
1175 	void *ret = kmalloc_order(size, flags, order);
1176 	trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
1177 	return ret;
1178 }
1179 EXPORT_SYMBOL(kmalloc_order_trace);
1180 #endif
1181 
1182 #ifdef CONFIG_SLAB_FREELIST_RANDOM
1183 /* Randomize a generic freelist */
1184 static void freelist_randomize(struct rnd_state *state, unsigned int *list,
1185 			size_t count)
1186 {
1187 	size_t i;
1188 	unsigned int rand;
1189 
1190 	for (i = 0; i < count; i++)
1191 		list[i] = i;
1192 
1193 	/* Fisher-Yates shuffle */
1194 	for (i = count - 1; i > 0; i--) {
1195 		rand = prandom_u32_state(state);
1196 		rand %= (i + 1);
1197 		swap(list[i], list[rand]);
1198 	}
1199 }
1200 
1201 /* Create a random sequence per cache */
1202 int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count,
1203 				    gfp_t gfp)
1204 {
1205 	struct rnd_state state;
1206 
1207 	if (count < 2 || cachep->random_seq)
1208 		return 0;
1209 
1210 	cachep->random_seq = kcalloc(count, sizeof(unsigned int), gfp);
1211 	if (!cachep->random_seq)
1212 		return -ENOMEM;
1213 
1214 	/* Get best entropy at this stage of boot */
1215 	prandom_seed_state(&state, get_random_long());
1216 
1217 	freelist_randomize(&state, cachep->random_seq, count);
1218 	return 0;
1219 }
1220 
1221 /* Destroy the per-cache random freelist sequence */
1222 void cache_random_seq_destroy(struct kmem_cache *cachep)
1223 {
1224 	kfree(cachep->random_seq);
1225 	cachep->random_seq = NULL;
1226 }
1227 #endif /* CONFIG_SLAB_FREELIST_RANDOM */
1228 
1229 #if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG)
1230 #ifdef CONFIG_SLAB
1231 #define SLABINFO_RIGHTS (S_IWUSR | S_IRUSR)
1232 #else
1233 #define SLABINFO_RIGHTS S_IRUSR
1234 #endif
1235 
1236 static void print_slabinfo_header(struct seq_file *m)
1237 {
1238 	/*
1239 	 * Output format version, so at least we can change it
1240 	 * without _too_ many complaints.
1241 	 */
1242 #ifdef CONFIG_DEBUG_SLAB
1243 	seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
1244 #else
1245 	seq_puts(m, "slabinfo - version: 2.1\n");
1246 #endif
1247 	seq_puts(m, "# name            <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>");
1248 	seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
1249 	seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
1250 #ifdef CONFIG_DEBUG_SLAB
1251 	seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> <error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
1252 	seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
1253 #endif
1254 	seq_putc(m, '\n');
1255 }
1256 
1257 void *slab_start(struct seq_file *m, loff_t *pos)
1258 {
1259 	mutex_lock(&slab_mutex);
1260 	return seq_list_start(&slab_root_caches, *pos);
1261 }
1262 
1263 void *slab_next(struct seq_file *m, void *p, loff_t *pos)
1264 {
1265 	return seq_list_next(p, &slab_root_caches, pos);
1266 }
1267 
1268 void slab_stop(struct seq_file *m, void *p)
1269 {
1270 	mutex_unlock(&slab_mutex);
1271 }
1272 
1273 static void
1274 memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info)
1275 {
1276 	struct kmem_cache *c;
1277 	struct slabinfo sinfo;
1278 
1279 	if (!is_root_cache(s))
1280 		return;
1281 
1282 	for_each_memcg_cache(c, s) {
1283 		memset(&sinfo, 0, sizeof(sinfo));
1284 		get_slabinfo(c, &sinfo);
1285 
1286 		info->active_slabs += sinfo.active_slabs;
1287 		info->num_slabs += sinfo.num_slabs;
1288 		info->shared_avail += sinfo.shared_avail;
1289 		info->active_objs += sinfo.active_objs;
1290 		info->num_objs += sinfo.num_objs;
1291 	}
1292 }
1293 
1294 static void cache_show(struct kmem_cache *s, struct seq_file *m)
1295 {
1296 	struct slabinfo sinfo;
1297 
1298 	memset(&sinfo, 0, sizeof(sinfo));
1299 	get_slabinfo(s, &sinfo);
1300 
1301 	memcg_accumulate_slabinfo(s, &sinfo);
1302 
1303 	seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
1304 		   cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size,
1305 		   sinfo.objects_per_slab, (1 << sinfo.cache_order));
1306 
1307 	seq_printf(m, " : tunables %4u %4u %4u",
1308 		   sinfo.limit, sinfo.batchcount, sinfo.shared);
1309 	seq_printf(m, " : slabdata %6lu %6lu %6lu",
1310 		   sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail);
1311 	slabinfo_show_stats(m, s);
1312 	seq_putc(m, '\n');
1313 }
1314 
1315 static int slab_show(struct seq_file *m, void *p)
1316 {
1317 	struct kmem_cache *s = list_entry(p, struct kmem_cache, root_caches_node);
1318 
1319 	if (p == slab_root_caches.next)
1320 		print_slabinfo_header(m);
1321 	cache_show(s, m);
1322 	return 0;
1323 }
1324 
1325 void dump_unreclaimable_slab(void)
1326 {
1327 	struct kmem_cache *s, *s2;
1328 	struct slabinfo sinfo;
1329 
1330 	/*
1331 	 * Here acquiring slab_mutex is risky since we don't prefer to get
1332 	 * sleep in oom path. But, without mutex hold, it may introduce a
1333 	 * risk of crash.
1334 	 * Use mutex_trylock to protect the list traverse, dump nothing
1335 	 * without acquiring the mutex.
1336 	 */
1337 	if (!mutex_trylock(&slab_mutex)) {
1338 		pr_warn("excessive unreclaimable slab but cannot dump stats\n");
1339 		return;
1340 	}
1341 
1342 	pr_info("Unreclaimable slab info:\n");
1343 	pr_info("Name                      Used          Total\n");
1344 
1345 	list_for_each_entry_safe(s, s2, &slab_caches, list) {
1346 		if (!is_root_cache(s) || (s->flags & SLAB_RECLAIM_ACCOUNT))
1347 			continue;
1348 
1349 		get_slabinfo(s, &sinfo);
1350 
1351 		if (sinfo.num_objs > 0)
1352 			pr_info("%-17s %10luKB %10luKB\n", cache_name(s),
1353 				(sinfo.active_objs * s->size) / 1024,
1354 				(sinfo.num_objs * s->size) / 1024);
1355 	}
1356 	mutex_unlock(&slab_mutex);
1357 }
1358 
1359 #if defined(CONFIG_MEMCG)
1360 void *memcg_slab_start(struct seq_file *m, loff_t *pos)
1361 {
1362 	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
1363 
1364 	mutex_lock(&slab_mutex);
1365 	return seq_list_start(&memcg->kmem_caches, *pos);
1366 }
1367 
1368 void *memcg_slab_next(struct seq_file *m, void *p, loff_t *pos)
1369 {
1370 	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
1371 
1372 	return seq_list_next(p, &memcg->kmem_caches, pos);
1373 }
1374 
1375 void memcg_slab_stop(struct seq_file *m, void *p)
1376 {
1377 	mutex_unlock(&slab_mutex);
1378 }
1379 
1380 int memcg_slab_show(struct seq_file *m, void *p)
1381 {
1382 	struct kmem_cache *s = list_entry(p, struct kmem_cache,
1383 					  memcg_params.kmem_caches_node);
1384 	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
1385 
1386 	if (p == memcg->kmem_caches.next)
1387 		print_slabinfo_header(m);
1388 	cache_show(s, m);
1389 	return 0;
1390 }
1391 #endif
1392 
1393 /*
1394  * slabinfo_op - iterator that generates /proc/slabinfo
1395  *
1396  * Output layout:
1397  * cache-name
1398  * num-active-objs
1399  * total-objs
1400  * object size
1401  * num-active-slabs
1402  * total-slabs
1403  * num-pages-per-slab
1404  * + further values on SMP and with statistics enabled
1405  */
1406 static const struct seq_operations slabinfo_op = {
1407 	.start = slab_start,
1408 	.next = slab_next,
1409 	.stop = slab_stop,
1410 	.show = slab_show,
1411 };
1412 
1413 static int slabinfo_open(struct inode *inode, struct file *file)
1414 {
1415 	return seq_open(file, &slabinfo_op);
1416 }
1417 
1418 static const struct file_operations proc_slabinfo_operations = {
1419 	.open		= slabinfo_open,
1420 	.read		= seq_read,
1421 	.write          = slabinfo_write,
1422 	.llseek		= seq_lseek,
1423 	.release	= seq_release,
1424 };
1425 
1426 static int __init slab_proc_init(void)
1427 {
1428 	proc_create("slabinfo", SLABINFO_RIGHTS, NULL,
1429 						&proc_slabinfo_operations);
1430 	return 0;
1431 }
1432 module_init(slab_proc_init);
1433 #endif /* CONFIG_SLAB || CONFIG_SLUB_DEBUG */
1434 
1435 static __always_inline void *__do_krealloc(const void *p, size_t new_size,
1436 					   gfp_t flags)
1437 {
1438 	void *ret;
1439 	size_t ks = 0;
1440 
1441 	if (p)
1442 		ks = ksize(p);
1443 
1444 	if (ks >= new_size) {
1445 		kasan_krealloc((void *)p, new_size, flags);
1446 		return (void *)p;
1447 	}
1448 
1449 	ret = kmalloc_track_caller(new_size, flags);
1450 	if (ret && p)
1451 		memcpy(ret, p, ks);
1452 
1453 	return ret;
1454 }
1455 
1456 /**
1457  * __krealloc - like krealloc() but don't free @p.
1458  * @p: object to reallocate memory for.
1459  * @new_size: how many bytes of memory are required.
1460  * @flags: the type of memory to allocate.
1461  *
1462  * This function is like krealloc() except it never frees the originally
1463  * allocated buffer. Use this if you don't want to free the buffer immediately
1464  * like, for example, with RCU.
1465  */
1466 void *__krealloc(const void *p, size_t new_size, gfp_t flags)
1467 {
1468 	if (unlikely(!new_size))
1469 		return ZERO_SIZE_PTR;
1470 
1471 	return __do_krealloc(p, new_size, flags);
1472 
1473 }
1474 EXPORT_SYMBOL(__krealloc);
1475 
1476 /**
1477  * krealloc - reallocate memory. The contents will remain unchanged.
1478  * @p: object to reallocate memory for.
1479  * @new_size: how many bytes of memory are required.
1480  * @flags: the type of memory to allocate.
1481  *
1482  * The contents of the object pointed to are preserved up to the
1483  * lesser of the new and old sizes.  If @p is %NULL, krealloc()
1484  * behaves exactly like kmalloc().  If @new_size is 0 and @p is not a
1485  * %NULL pointer, the object pointed to is freed.
1486  */
1487 void *krealloc(const void *p, size_t new_size, gfp_t flags)
1488 {
1489 	void *ret;
1490 
1491 	if (unlikely(!new_size)) {
1492 		kfree(p);
1493 		return ZERO_SIZE_PTR;
1494 	}
1495 
1496 	ret = __do_krealloc(p, new_size, flags);
1497 	if (ret && p != ret)
1498 		kfree(p);
1499 
1500 	return ret;
1501 }
1502 EXPORT_SYMBOL(krealloc);
1503 
1504 /**
1505  * kzfree - like kfree but zero memory
1506  * @p: object to free memory of
1507  *
1508  * The memory of the object @p points to is zeroed before freed.
1509  * If @p is %NULL, kzfree() does nothing.
1510  *
1511  * Note: this function zeroes the whole allocated buffer which can be a good
1512  * deal bigger than the requested buffer size passed to kmalloc(). So be
1513  * careful when using this function in performance sensitive code.
1514  */
1515 void kzfree(const void *p)
1516 {
1517 	size_t ks;
1518 	void *mem = (void *)p;
1519 
1520 	if (unlikely(ZERO_OR_NULL_PTR(mem)))
1521 		return;
1522 	ks = ksize(mem);
1523 	memset(mem, 0, ks);
1524 	kfree(mem);
1525 }
1526 EXPORT_SYMBOL(kzfree);
1527 
1528 /* Tracepoints definitions. */
1529 EXPORT_TRACEPOINT_SYMBOL(kmalloc);
1530 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
1531 EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
1532 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
1533 EXPORT_TRACEPOINT_SYMBOL(kfree);
1534 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
1535