xref: /openbmc/linux/mm/slab.c (revision c900529f3d9161bfde5cca0754f83b4d3c3e0220)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds  * linux/mm/slab.c
41da177e4SLinus Torvalds  * Written by Mark Hemment, 1996/97.
51da177e4SLinus Torvalds  * (markhe@nextd.demon.co.uk)
61da177e4SLinus Torvalds  *
71da177e4SLinus Torvalds  * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli
81da177e4SLinus Torvalds  *
91da177e4SLinus Torvalds  * Major cleanup, different bufctl logic, per-cpu arrays
101da177e4SLinus Torvalds  *	(c) 2000 Manfred Spraul
111da177e4SLinus Torvalds  *
121da177e4SLinus Torvalds  * Cleanup, make the head arrays unconditional, preparation for NUMA
131da177e4SLinus Torvalds  * 	(c) 2002 Manfred Spraul
141da177e4SLinus Torvalds  *
151da177e4SLinus Torvalds  * An implementation of the Slab Allocator as described in outline in;
161da177e4SLinus Torvalds  *	UNIX Internals: The New Frontiers by Uresh Vahalia
171da177e4SLinus Torvalds  *	Pub: Prentice Hall	ISBN 0-13-101908-2
181da177e4SLinus Torvalds  * or with a little more detail in;
191da177e4SLinus Torvalds  *	The Slab Allocator: An Object-Caching Kernel Memory Allocator
201da177e4SLinus Torvalds  *	Jeff Bonwick (Sun Microsystems).
211da177e4SLinus Torvalds  *	Presented at: USENIX Summer 1994 Technical Conference
221da177e4SLinus Torvalds  *
231da177e4SLinus Torvalds  * The memory is organized in caches, one cache for each object type.
241da177e4SLinus Torvalds  * (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct)
251da177e4SLinus Torvalds  * Each cache consists out of many slabs (they are small (usually one
261da177e4SLinus Torvalds  * page long) and always contiguous), and each slab contains multiple
271da177e4SLinus Torvalds  * initialized objects.
281da177e4SLinus Torvalds  *
291da177e4SLinus Torvalds  * This means, that your constructor is used only for newly allocated
30183ff22bSSimon Arlott  * slabs and you must pass objects with the same initializations to
311da177e4SLinus Torvalds  * kmem_cache_free.
321da177e4SLinus Torvalds  *
331da177e4SLinus Torvalds  * Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM,
341da177e4SLinus Torvalds  * normal). If you need a special memory type, then must create a new
351da177e4SLinus Torvalds  * cache for that memory type.
361da177e4SLinus Torvalds  *
371da177e4SLinus Torvalds  * In order to reduce fragmentation, the slabs are sorted in 3 groups:
381da177e4SLinus Torvalds  *   full slabs with 0 free objects
391da177e4SLinus Torvalds  *   partial slabs
401da177e4SLinus Torvalds  *   empty slabs with no allocated objects
411da177e4SLinus Torvalds  *
421da177e4SLinus Torvalds  * If partial slabs exist, then new allocations come from these slabs,
431da177e4SLinus Torvalds  * otherwise from empty slabs or new slabs are allocated.
441da177e4SLinus Torvalds  *
451da177e4SLinus Torvalds  * kmem_cache_destroy() CAN CRASH if you try to allocate from the cache
461da177e4SLinus Torvalds  * during kmem_cache_destroy(). The caller must prevent concurrent allocs.
471da177e4SLinus Torvalds  *
481da177e4SLinus Torvalds  * Each cache has a short per-cpu head array, most allocs
491da177e4SLinus Torvalds  * and frees go into that array, and if that array overflows, then 1/2
501da177e4SLinus Torvalds  * of the entries in the array are given back into the global cache.
511da177e4SLinus Torvalds  * The head array is strictly LIFO and should improve the cache hit rates.
521da177e4SLinus Torvalds  * On SMP, it additionally reduces the spinlock operations.
531da177e4SLinus Torvalds  *
541da177e4SLinus Torvalds  * The c_cpuarray may not be read with enabled local interrupts -
551da177e4SLinus Torvalds  * it's changed with a smp_call_function().
561da177e4SLinus Torvalds  *
571da177e4SLinus Torvalds  * SMP synchronization:
581da177e4SLinus Torvalds  *  constructors and destructors are called without any locking.
59343e0d7aSPekka Enberg  *  Several members in struct kmem_cache and struct slab never change, they
601da177e4SLinus Torvalds  *	are accessed without any locking.
611da177e4SLinus Torvalds  *  The per-cpu arrays are never accessed from the wrong cpu, no locking,
621da177e4SLinus Torvalds  *  	and local interrupts are disabled so slab code is preempt-safe.
631da177e4SLinus Torvalds  *  The non-constant members are protected with a per-cache irq spinlock.
641da177e4SLinus Torvalds  *
651da177e4SLinus Torvalds  * Many thanks to Mark Hemment, who wrote another per-cpu slab patch
661da177e4SLinus Torvalds  * in 2000 - many ideas in the current implementation are derived from
671da177e4SLinus Torvalds  * his patch.
681da177e4SLinus Torvalds  *
691da177e4SLinus Torvalds  * Further notes from the original documentation:
701da177e4SLinus Torvalds  *
711da177e4SLinus Torvalds  * 11 April '97.  Started multi-threading - markhe
7218004c5dSChristoph Lameter  *	The global cache-chain is protected by the mutex 'slab_mutex'.
731da177e4SLinus Torvalds  *	The sem is only needed when accessing/extending the cache-chain, which
741da177e4SLinus Torvalds  *	can never happen inside an interrupt (kmem_cache_create(),
751da177e4SLinus Torvalds  *	kmem_cache_shrink() and kmem_cache_reap()).
761da177e4SLinus Torvalds  *
771da177e4SLinus Torvalds  *	At present, each engine can be growing a cache.  This should be blocked.
781da177e4SLinus Torvalds  *
79e498be7dSChristoph Lameter  * 15 March 2005. NUMA slab allocator.
80e498be7dSChristoph Lameter  *	Shai Fultheim <shai@scalex86.org>.
81e498be7dSChristoph Lameter  *	Shobhit Dayal <shobhit@calsoftinc.com>
82e498be7dSChristoph Lameter  *	Alok N Kataria <alokk@calsoftinc.com>
83e498be7dSChristoph Lameter  *	Christoph Lameter <christoph@lameter.com>
84e498be7dSChristoph Lameter  *
85e498be7dSChristoph Lameter  *	Modified the slab allocator to be node aware on NUMA systems.
86e498be7dSChristoph Lameter  *	Each node has its own list of partial, free and full slabs.
87e498be7dSChristoph Lameter  *	All object allocations for a node occur from node specific slab lists.
881da177e4SLinus Torvalds  */
891da177e4SLinus Torvalds 
901da177e4SLinus Torvalds #include	<linux/slab.h>
911da177e4SLinus Torvalds #include	<linux/mm.h>
92c9cf5528SRandy Dunlap #include	<linux/poison.h>
931da177e4SLinus Torvalds #include	<linux/swap.h>
941da177e4SLinus Torvalds #include	<linux/cache.h>
951da177e4SLinus Torvalds #include	<linux/interrupt.h>
961da177e4SLinus Torvalds #include	<linux/init.h>
971da177e4SLinus Torvalds #include	<linux/compiler.h>
98101a5001SPaul Jackson #include	<linux/cpuset.h>
99a0ec95a8SAlexey Dobriyan #include	<linux/proc_fs.h>
1001da177e4SLinus Torvalds #include	<linux/seq_file.h>
1011da177e4SLinus Torvalds #include	<linux/notifier.h>
1021da177e4SLinus Torvalds #include	<linux/kallsyms.h>
103d3fb45f3SAlexander Potapenko #include	<linux/kfence.h>
1041da177e4SLinus Torvalds #include	<linux/cpu.h>
1051da177e4SLinus Torvalds #include	<linux/sysctl.h>
1061da177e4SLinus Torvalds #include	<linux/module.h>
1071da177e4SLinus Torvalds #include	<linux/rcupdate.h>
108543537bdSPaulo Marques #include	<linux/string.h>
109138ae663SAndrew Morton #include	<linux/uaccess.h>
110e498be7dSChristoph Lameter #include	<linux/nodemask.h>
111d5cff635SCatalin Marinas #include	<linux/kmemleak.h>
112dc85da15SChristoph Lameter #include	<linux/mempolicy.h>
113fc0abb14SIngo Molnar #include	<linux/mutex.h>
1148a8b6502SAkinobu Mita #include	<linux/fault-inject.h>
115e7eebaf6SIngo Molnar #include	<linux/rtmutex.h>
1166a2d7a95SEric Dumazet #include	<linux/reciprocal_div.h>
1173ac7fe5aSThomas Gleixner #include	<linux/debugobjects.h>
1188f9f8d9eSDavid Rientjes #include	<linux/memory.h>
119268bb0ceSLinus Torvalds #include	<linux/prefetch.h>
1203f8c2452SIngo Molnar #include	<linux/sched/task_stack.h>
1211da177e4SLinus Torvalds 
122381760eaSMel Gorman #include	<net/sock.h>
123381760eaSMel Gorman 
1241da177e4SLinus Torvalds #include	<asm/cacheflush.h>
1251da177e4SLinus Torvalds #include	<asm/tlbflush.h>
1261da177e4SLinus Torvalds #include	<asm/page.h>
1271da177e4SLinus Torvalds 
1284dee6b64SSteven Rostedt #include <trace/events/kmem.h>
1294dee6b64SSteven Rostedt 
130072bb0aaSMel Gorman #include	"internal.h"
131072bb0aaSMel Gorman 
132b9ce5ef4SGlauber Costa #include	"slab.h"
133b9ce5ef4SGlauber Costa 
1341da177e4SLinus Torvalds /*
13550953fe9SChristoph Lameter  * DEBUG	- 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
1361da177e4SLinus Torvalds  *		  0 for faster, smaller code (especially in the critical paths).
1371da177e4SLinus Torvalds  *
1381da177e4SLinus Torvalds  * STATS	- 1 to collect stats for /proc/slabinfo.
1391da177e4SLinus Torvalds  *		  0 for faster, smaller code (especially in the critical paths).
1401da177e4SLinus Torvalds  *
1411da177e4SLinus Torvalds  * FORCED_DEBUG	- 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
1421da177e4SLinus Torvalds  */
1431da177e4SLinus Torvalds 
1441da177e4SLinus Torvalds #ifdef CONFIG_DEBUG_SLAB
1451da177e4SLinus Torvalds #define	DEBUG		1
1461da177e4SLinus Torvalds #define	STATS		1
1471da177e4SLinus Torvalds #define	FORCED_DEBUG	1
1481da177e4SLinus Torvalds #else
1491da177e4SLinus Torvalds #define	DEBUG		0
1501da177e4SLinus Torvalds #define	STATS		0
1511da177e4SLinus Torvalds #define	FORCED_DEBUG	0
1521da177e4SLinus Torvalds #endif
1531da177e4SLinus Torvalds 
1541da177e4SLinus Torvalds /* Shouldn't this be in a header file somewhere? */
1551da177e4SLinus Torvalds #define	BYTES_PER_WORD		sizeof(void *)
15687a927c7SDavid Woodhouse #define	REDZONE_ALIGN		max(BYTES_PER_WORD, __alignof__(unsigned long long))
1571da177e4SLinus Torvalds 
1581da177e4SLinus Torvalds #ifndef ARCH_KMALLOC_FLAGS
1591da177e4SLinus Torvalds #define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
1601da177e4SLinus Torvalds #endif
1611da177e4SLinus Torvalds 
162f315e3faSJoonsoo Kim #define FREELIST_BYTE_INDEX (((PAGE_SIZE >> BITS_PER_BYTE) \
163f315e3faSJoonsoo Kim 				<= SLAB_OBJ_MIN_SIZE) ? 1 : 0)
164f315e3faSJoonsoo Kim 
165f315e3faSJoonsoo Kim #if FREELIST_BYTE_INDEX
166f315e3faSJoonsoo Kim typedef unsigned char freelist_idx_t;
167f315e3faSJoonsoo Kim #else
168f315e3faSJoonsoo Kim typedef unsigned short freelist_idx_t;
169f315e3faSJoonsoo Kim #endif
170f315e3faSJoonsoo Kim 
17130321c7bSDavid Miller #define SLAB_OBJ_MAX_NUM ((1 << sizeof(freelist_idx_t) * BITS_PER_BYTE) - 1)
172f315e3faSJoonsoo Kim 
173072bb0aaSMel Gorman /*
1741da177e4SLinus Torvalds  * struct array_cache
1751da177e4SLinus Torvalds  *
1761da177e4SLinus Torvalds  * Purpose:
1771da177e4SLinus Torvalds  * - LIFO ordering, to hand out cache-warm objects from _alloc
1781da177e4SLinus Torvalds  * - reduce the number of linked list operations
1791da177e4SLinus Torvalds  * - reduce spinlock operations
1801da177e4SLinus Torvalds  *
1811da177e4SLinus Torvalds  * The limit is stored in the per-cpu structure to reduce the data cache
1821da177e4SLinus Torvalds  * footprint.
1831da177e4SLinus Torvalds  *
1841da177e4SLinus Torvalds  */
1851da177e4SLinus Torvalds struct array_cache {
1861da177e4SLinus Torvalds 	unsigned int avail;
1871da177e4SLinus Torvalds 	unsigned int limit;
1881da177e4SLinus Torvalds 	unsigned int batchcount;
1891da177e4SLinus Torvalds 	unsigned int touched;
190bda5b655SRobert P. J. Day 	void *entry[];	/*
191e498be7dSChristoph Lameter 			 * Must have this definition in here for the proper
192e498be7dSChristoph Lameter 			 * alignment of array_cache. Also simplifies accessing
193e498be7dSChristoph Lameter 			 * the entries.
194e498be7dSChristoph Lameter 			 */
1951da177e4SLinus Torvalds };
1961da177e4SLinus Torvalds 
197c8522a3aSJoonsoo Kim struct alien_cache {
198c8522a3aSJoonsoo Kim 	spinlock_t lock;
199c8522a3aSJoonsoo Kim 	struct array_cache ac;
200c8522a3aSJoonsoo Kim };
201c8522a3aSJoonsoo Kim 
202a737b3e2SAndrew Morton /*
203e498be7dSChristoph Lameter  * Need this for bootstrapping a per node allocator.
204e498be7dSChristoph Lameter  */
205bf0dea23SJoonsoo Kim #define NUM_INIT_LISTS (2 * MAX_NUMNODES)
206ce8eb6c4SChristoph Lameter static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS];
207e498be7dSChristoph Lameter #define	CACHE_CACHE 0
208bf0dea23SJoonsoo Kim #define	SIZE_NODE (MAX_NUMNODES)
2091da177e4SLinus Torvalds 
210ed11d9ebSChristoph Lameter static int drain_freelist(struct kmem_cache *cache,
211ce8eb6c4SChristoph Lameter 			struct kmem_cache_node *n, int tofree);
212ed11d9ebSChristoph Lameter static void free_block(struct kmem_cache *cachep, void **objpp, int len,
21397654dfaSJoonsoo Kim 			int node, struct list_head *list);
21497654dfaSJoonsoo Kim static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list);
21583b519e8SPekka Enberg static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
21665f27f38SDavid Howells static void cache_reap(struct work_struct *unused);
217ed11d9ebSChristoph Lameter 
21876b342bdSJoonsoo Kim static inline void fixup_objfreelist_debug(struct kmem_cache *cachep,
21976b342bdSJoonsoo Kim 						void **list);
22076b342bdSJoonsoo Kim static inline void fixup_slab_list(struct kmem_cache *cachep,
2217981e67eSVlastimil Babka 				struct kmem_cache_node *n, struct slab *slab,
22276b342bdSJoonsoo Kim 				void **list);
223e0a42726SIngo Molnar 
224ce8eb6c4SChristoph Lameter #define INDEX_NODE kmalloc_index(sizeof(struct kmem_cache_node))
225e498be7dSChristoph Lameter 
kmem_cache_node_init(struct kmem_cache_node * parent)226ce8eb6c4SChristoph Lameter static void kmem_cache_node_init(struct kmem_cache_node *parent)
227e498be7dSChristoph Lameter {
228e498be7dSChristoph Lameter 	INIT_LIST_HEAD(&parent->slabs_full);
229e498be7dSChristoph Lameter 	INIT_LIST_HEAD(&parent->slabs_partial);
230e498be7dSChristoph Lameter 	INIT_LIST_HEAD(&parent->slabs_free);
231bf00bd34SDavid Rientjes 	parent->total_slabs = 0;
232f728b0a5SGreg Thelen 	parent->free_slabs = 0;
233e498be7dSChristoph Lameter 	parent->shared = NULL;
234e498be7dSChristoph Lameter 	parent->alien = NULL;
2352e1217cfSRavikiran G Thirumalai 	parent->colour_next = 0;
236b539ce9fSJiri Kosina 	raw_spin_lock_init(&parent->list_lock);
237e498be7dSChristoph Lameter 	parent->free_objects = 0;
238e498be7dSChristoph Lameter 	parent->free_touched = 0;
239e498be7dSChristoph Lameter }
240e498be7dSChristoph Lameter 
241e498be7dSChristoph Lameter #define MAKE_LIST(cachep, listp, slab, nodeid)				\
242e498be7dSChristoph Lameter 	do {								\
243e498be7dSChristoph Lameter 		INIT_LIST_HEAD(listp);					\
24418bf8541SChristoph Lameter 		list_splice(&get_node(cachep, nodeid)->slab, listp);	\
245e498be7dSChristoph Lameter 	} while (0)
246e498be7dSChristoph Lameter 
247e498be7dSChristoph Lameter #define	MAKE_ALL_LISTS(cachep, ptr, nodeid)				\
248e498be7dSChristoph Lameter 	do {								\
249e498be7dSChristoph Lameter 	MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid);	\
250e498be7dSChristoph Lameter 	MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \
251e498be7dSChristoph Lameter 	MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid);	\
252e498be7dSChristoph Lameter 	} while (0)
2531da177e4SLinus Torvalds 
2544fd0b46eSAlexey Dobriyan #define CFLGS_OBJFREELIST_SLAB	((slab_flags_t __force)0x40000000U)
2554fd0b46eSAlexey Dobriyan #define CFLGS_OFF_SLAB		((slab_flags_t __force)0x80000000U)
256b03a017bSJoonsoo Kim #define	OBJFREELIST_SLAB(x)	((x)->flags & CFLGS_OBJFREELIST_SLAB)
2571da177e4SLinus Torvalds #define	OFF_SLAB(x)	((x)->flags & CFLGS_OFF_SLAB)
2581da177e4SLinus Torvalds 
2591da177e4SLinus Torvalds #define BATCHREFILL_LIMIT	16
260a737b3e2SAndrew Morton /*
261f0953a1bSIngo Molnar  * Optimization question: fewer reaps means less probability for unnecessary
262a737b3e2SAndrew Morton  * cpucache drain/refill cycles.
2631da177e4SLinus Torvalds  *
264dc6f3f27SAdrian Bunk  * OTOH the cpuarrays can contain lots of objects,
2651da177e4SLinus Torvalds  * which could lock up otherwise freeable slabs.
2661da177e4SLinus Torvalds  */
2675f0985bbSJianyu Zhan #define REAPTIMEOUT_AC		(2*HZ)
2685f0985bbSJianyu Zhan #define REAPTIMEOUT_NODE	(4*HZ)
2691da177e4SLinus Torvalds 
2701da177e4SLinus Torvalds #if STATS
2711da177e4SLinus Torvalds #define	STATS_INC_ACTIVE(x)	((x)->num_active++)
2721da177e4SLinus Torvalds #define	STATS_DEC_ACTIVE(x)	((x)->num_active--)
2731da177e4SLinus Torvalds #define	STATS_INC_ALLOCED(x)	((x)->num_allocations++)
2741da177e4SLinus Torvalds #define	STATS_INC_GROWN(x)	((x)->grown++)
275ed11d9ebSChristoph Lameter #define	STATS_ADD_REAPED(x, y)	((x)->reaped += (y))
276a737b3e2SAndrew Morton #define	STATS_SET_HIGH(x)						\
277a737b3e2SAndrew Morton 	do {								\
278a737b3e2SAndrew Morton 		if ((x)->num_active > (x)->high_mark)			\
2791da177e4SLinus Torvalds 			(x)->high_mark = (x)->num_active;		\
2801da177e4SLinus Torvalds 	} while (0)
2811da177e4SLinus Torvalds #define	STATS_INC_ERR(x)	((x)->errors++)
2821da177e4SLinus Torvalds #define	STATS_INC_NODEALLOCS(x)	((x)->node_allocs++)
283e498be7dSChristoph Lameter #define	STATS_INC_NODEFREES(x)	((x)->node_frees++)
284fb7faf33SRavikiran G Thirumalai #define STATS_INC_ACOVERFLOW(x)   ((x)->node_overflow++)
2851da177e4SLinus Torvalds #define	STATS_SET_FREEABLE(x, i)					\
286a737b3e2SAndrew Morton 	do {								\
287a737b3e2SAndrew Morton 		if ((x)->max_freeable < i)				\
2881da177e4SLinus Torvalds 			(x)->max_freeable = i;				\
2891da177e4SLinus Torvalds 	} while (0)
2901da177e4SLinus Torvalds #define STATS_INC_ALLOCHIT(x)	atomic_inc(&(x)->allochit)
2911da177e4SLinus Torvalds #define STATS_INC_ALLOCMISS(x)	atomic_inc(&(x)->allocmiss)
2921da177e4SLinus Torvalds #define STATS_INC_FREEHIT(x)	atomic_inc(&(x)->freehit)
2931da177e4SLinus Torvalds #define STATS_INC_FREEMISS(x)	atomic_inc(&(x)->freemiss)
2941da177e4SLinus Torvalds #else
2951da177e4SLinus Torvalds #define	STATS_INC_ACTIVE(x)	do { } while (0)
2961da177e4SLinus Torvalds #define	STATS_DEC_ACTIVE(x)	do { } while (0)
2971da177e4SLinus Torvalds #define	STATS_INC_ALLOCED(x)	do { } while (0)
2981da177e4SLinus Torvalds #define	STATS_INC_GROWN(x)	do { } while (0)
2994e60c86bSAndi Kleen #define	STATS_ADD_REAPED(x, y)	do { (void)(y); } while (0)
3001da177e4SLinus Torvalds #define	STATS_SET_HIGH(x)	do { } while (0)
3011da177e4SLinus Torvalds #define	STATS_INC_ERR(x)	do { } while (0)
3021da177e4SLinus Torvalds #define	STATS_INC_NODEALLOCS(x)	do { } while (0)
303e498be7dSChristoph Lameter #define	STATS_INC_NODEFREES(x)	do { } while (0)
304fb7faf33SRavikiran G Thirumalai #define STATS_INC_ACOVERFLOW(x)   do { } while (0)
305a737b3e2SAndrew Morton #define	STATS_SET_FREEABLE(x, i) do { } while (0)
3061da177e4SLinus Torvalds #define STATS_INC_ALLOCHIT(x)	do { } while (0)
3071da177e4SLinus Torvalds #define STATS_INC_ALLOCMISS(x)	do { } while (0)
3081da177e4SLinus Torvalds #define STATS_INC_FREEHIT(x)	do { } while (0)
3091da177e4SLinus Torvalds #define STATS_INC_FREEMISS(x)	do { } while (0)
3101da177e4SLinus Torvalds #endif
3111da177e4SLinus Torvalds 
3121da177e4SLinus Torvalds #if DEBUG
3131da177e4SLinus Torvalds 
314a737b3e2SAndrew Morton /*
315a737b3e2SAndrew Morton  * memory layout of objects:
3161da177e4SLinus Torvalds  * 0		: objp
3173dafccf2SManfred Spraul  * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that
3181da177e4SLinus Torvalds  * 		the end of an object is aligned with the end of the real
3191da177e4SLinus Torvalds  * 		allocation. Catches writes behind the end of the allocation.
3203dafccf2SManfred Spraul  * cachep->obj_offset - BYTES_PER_WORD .. cachep->obj_offset - 1:
3211da177e4SLinus Torvalds  * 		redzone word.
3223dafccf2SManfred Spraul  * cachep->obj_offset: The real object.
3233b0efdfaSChristoph Lameter  * cachep->size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long]
3243b0efdfaSChristoph Lameter  * cachep->size - 1* BYTES_PER_WORD: last caller address
325a737b3e2SAndrew Morton  *					[BYTES_PER_WORD long]
3261da177e4SLinus Torvalds  */
obj_offset(struct kmem_cache * cachep)327343e0d7aSPekka Enberg static int obj_offset(struct kmem_cache *cachep)
3281da177e4SLinus Torvalds {
3293dafccf2SManfred Spraul 	return cachep->obj_offset;
3301da177e4SLinus Torvalds }
3311da177e4SLinus Torvalds 
dbg_redzone1(struct kmem_cache * cachep,void * objp)332b46b8f19SDavid Woodhouse static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp)
3331da177e4SLinus Torvalds {
3341da177e4SLinus Torvalds 	BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
335b46b8f19SDavid Woodhouse 	return (unsigned long long *) (objp + obj_offset(cachep) -
336b46b8f19SDavid Woodhouse 				      sizeof(unsigned long long));
3371da177e4SLinus Torvalds }
3381da177e4SLinus Torvalds 
dbg_redzone2(struct kmem_cache * cachep,void * objp)339b46b8f19SDavid Woodhouse static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp)
3401da177e4SLinus Torvalds {
3411da177e4SLinus Torvalds 	BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
3421da177e4SLinus Torvalds 	if (cachep->flags & SLAB_STORE_USER)
3433b0efdfaSChristoph Lameter 		return (unsigned long long *)(objp + cachep->size -
344b46b8f19SDavid Woodhouse 					      sizeof(unsigned long long) -
34587a927c7SDavid Woodhouse 					      REDZONE_ALIGN);
3463b0efdfaSChristoph Lameter 	return (unsigned long long *) (objp + cachep->size -
347b46b8f19SDavid Woodhouse 				       sizeof(unsigned long long));
3481da177e4SLinus Torvalds }
3491da177e4SLinus Torvalds 
dbg_userword(struct kmem_cache * cachep,void * objp)350343e0d7aSPekka Enberg static void **dbg_userword(struct kmem_cache *cachep, void *objp)
3511da177e4SLinus Torvalds {
3521da177e4SLinus Torvalds 	BUG_ON(!(cachep->flags & SLAB_STORE_USER));
3533b0efdfaSChristoph Lameter 	return (void **)(objp + cachep->size - BYTES_PER_WORD);
3541da177e4SLinus Torvalds }
3551da177e4SLinus Torvalds 
3561da177e4SLinus Torvalds #else
3571da177e4SLinus Torvalds 
3583dafccf2SManfred Spraul #define obj_offset(x)			0
359b46b8f19SDavid Woodhouse #define dbg_redzone1(cachep, objp)	({BUG(); (unsigned long long *)NULL;})
360b46b8f19SDavid Woodhouse #define dbg_redzone2(cachep, objp)	({BUG(); (unsigned long long *)NULL;})
3611da177e4SLinus Torvalds #define dbg_userword(cachep, objp)	({BUG(); (void **)NULL;})
3621da177e4SLinus Torvalds 
3631da177e4SLinus Torvalds #endif
3641da177e4SLinus Torvalds 
3651da177e4SLinus Torvalds /*
3663df1cccdSDavid Rientjes  * Do not go above this order unless 0 objects fit into the slab or
3673df1cccdSDavid Rientjes  * overridden on the command line.
3681da177e4SLinus Torvalds  */
369543585ccSDavid Rientjes #define	SLAB_MAX_ORDER_HI	1
370543585ccSDavid Rientjes #define	SLAB_MAX_ORDER_LO	0
371543585ccSDavid Rientjes static int slab_max_order = SLAB_MAX_ORDER_LO;
3723df1cccdSDavid Rientjes static bool slab_max_order_set __initdata;
3731da177e4SLinus Torvalds 
index_to_obj(struct kmem_cache * cache,const struct slab * slab,unsigned int idx)3740b3eb091SMatthew Wilcox (Oracle) static inline void *index_to_obj(struct kmem_cache *cache,
3757981e67eSVlastimil Babka 				 const struct slab *slab, unsigned int idx)
3768fea4e96SPekka Enberg {
3777981e67eSVlastimil Babka 	return slab->s_mem + cache->size * idx;
3788fea4e96SPekka Enberg }
3798fea4e96SPekka Enberg 
3806fb92430SJoonsoo Kim #define BOOT_CPUCACHE_ENTRIES	1
3811da177e4SLinus Torvalds /* internal cache of cache description objs */
3829b030cb8SChristoph Lameter static struct kmem_cache kmem_cache_boot = {
3831da177e4SLinus Torvalds 	.batchcount = 1,
3841da177e4SLinus Torvalds 	.limit = BOOT_CPUCACHE_ENTRIES,
385e498be7dSChristoph Lameter 	.shared = 1,
3863b0efdfaSChristoph Lameter 	.size = sizeof(struct kmem_cache),
3871da177e4SLinus Torvalds 	.name = "kmem_cache",
3881da177e4SLinus Torvalds };
3891da177e4SLinus Torvalds 
3901871e52cSTejun Heo static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
3911da177e4SLinus Torvalds 
cpu_cache_get(struct kmem_cache * cachep)392343e0d7aSPekka Enberg static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
3931da177e4SLinus Torvalds {
394bf0dea23SJoonsoo Kim 	return this_cpu_ptr(cachep->cpu_cache);
3951da177e4SLinus Torvalds }
3961da177e4SLinus Torvalds 
397a737b3e2SAndrew Morton /*
398a737b3e2SAndrew Morton  * Calculate the number of objects and left-over bytes for a given buffer size.
399a737b3e2SAndrew Morton  */
cache_estimate(unsigned long gfporder,size_t buffer_size,slab_flags_t flags,size_t * left_over)40070f75067SJoonsoo Kim static unsigned int cache_estimate(unsigned long gfporder, size_t buffer_size,
401d50112edSAlexey Dobriyan 		slab_flags_t flags, size_t *left_over)
402fbaccacfSSteven Rostedt {
40370f75067SJoonsoo Kim 	unsigned int num;
404fbaccacfSSteven Rostedt 	size_t slab_size = PAGE_SIZE << gfporder;
4051da177e4SLinus Torvalds 
406fbaccacfSSteven Rostedt 	/*
407fbaccacfSSteven Rostedt 	 * The slab management structure can be either off the slab or
408fbaccacfSSteven Rostedt 	 * on it. For the latter case, the memory allocated for a
409fbaccacfSSteven Rostedt 	 * slab is used for:
410fbaccacfSSteven Rostedt 	 *
411fbaccacfSSteven Rostedt 	 * - @buffer_size bytes for each object
4122e6b3602SJoonsoo Kim 	 * - One freelist_idx_t for each object
4132e6b3602SJoonsoo Kim 	 *
4142e6b3602SJoonsoo Kim 	 * We don't need to consider alignment of freelist because
4152e6b3602SJoonsoo Kim 	 * freelist will be at the end of slab page. The objects will be
4162e6b3602SJoonsoo Kim 	 * at the correct alignment.
417fbaccacfSSteven Rostedt 	 *
418fbaccacfSSteven Rostedt 	 * If the slab management structure is off the slab, then the
419fbaccacfSSteven Rostedt 	 * alignment will already be calculated into the size. Because
420fbaccacfSSteven Rostedt 	 * the slabs are all pages aligned, the objects will be at the
421fbaccacfSSteven Rostedt 	 * correct alignment when allocated.
422fbaccacfSSteven Rostedt 	 */
423b03a017bSJoonsoo Kim 	if (flags & (CFLGS_OBJFREELIST_SLAB | CFLGS_OFF_SLAB)) {
42470f75067SJoonsoo Kim 		num = slab_size / buffer_size;
4252e6b3602SJoonsoo Kim 		*left_over = slab_size % buffer_size;
426fbaccacfSSteven Rostedt 	} else {
42770f75067SJoonsoo Kim 		num = slab_size / (buffer_size + sizeof(freelist_idx_t));
4282e6b3602SJoonsoo Kim 		*left_over = slab_size %
4292e6b3602SJoonsoo Kim 			(buffer_size + sizeof(freelist_idx_t));
430fbaccacfSSteven Rostedt 	}
43170f75067SJoonsoo Kim 
43270f75067SJoonsoo Kim 	return num;
4331da177e4SLinus Torvalds }
4341da177e4SLinus Torvalds 
435f28510d3SChristoph Lameter #if DEBUG
436d40cee24SHarvey Harrison #define slab_error(cachep, msg) __slab_error(__func__, cachep, msg)
4371da177e4SLinus Torvalds 
__slab_error(const char * function,struct kmem_cache * cachep,char * msg)438a737b3e2SAndrew Morton static void __slab_error(const char *function, struct kmem_cache *cachep,
439a737b3e2SAndrew Morton 			char *msg)
4401da177e4SLinus Torvalds {
4411170532bSJoe Perches 	pr_err("slab error in %s(): cache `%s': %s\n",
4421da177e4SLinus Torvalds 	       function, cachep->name, msg);
4431da177e4SLinus Torvalds 	dump_stack();
444373d4d09SRusty Russell 	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
4451da177e4SLinus Torvalds }
446f28510d3SChristoph Lameter #endif
4471da177e4SLinus Torvalds 
4483395ee05SPaul Menage /*
4493395ee05SPaul Menage  * By default on NUMA we use alien caches to stage the freeing of
4503395ee05SPaul Menage  * objects allocated from other nodes. This causes massive memory
4513395ee05SPaul Menage  * inefficiencies when using fake NUMA setup to split memory into a
4523395ee05SPaul Menage  * large number of small nodes, so it can be disabled on the command
4533395ee05SPaul Menage  * line
4543395ee05SPaul Menage   */
4553395ee05SPaul Menage 
4563395ee05SPaul Menage static int use_alien_caches __read_mostly = 1;
noaliencache_setup(char * s)4573395ee05SPaul Menage static int __init noaliencache_setup(char *s)
4583395ee05SPaul Menage {
4593395ee05SPaul Menage 	use_alien_caches = 0;
4603395ee05SPaul Menage 	return 1;
4613395ee05SPaul Menage }
4623395ee05SPaul Menage __setup("noaliencache", noaliencache_setup);
4633395ee05SPaul Menage 
slab_max_order_setup(char * str)4643df1cccdSDavid Rientjes static int __init slab_max_order_setup(char *str)
4653df1cccdSDavid Rientjes {
4663df1cccdSDavid Rientjes 	get_option(&str, &slab_max_order);
4673df1cccdSDavid Rientjes 	slab_max_order = slab_max_order < 0 ? 0 :
46823baf831SKirill A. Shutemov 				min(slab_max_order, MAX_ORDER);
4693df1cccdSDavid Rientjes 	slab_max_order_set = true;
4703df1cccdSDavid Rientjes 
4713df1cccdSDavid Rientjes 	return 1;
4723df1cccdSDavid Rientjes }
4733df1cccdSDavid Rientjes __setup("slab_max_order=", slab_max_order_setup);
4743df1cccdSDavid Rientjes 
4758fce4d8eSChristoph Lameter #ifdef CONFIG_NUMA
4768fce4d8eSChristoph Lameter /*
4778fce4d8eSChristoph Lameter  * Special reaping functions for NUMA systems called from cache_reap().
4788fce4d8eSChristoph Lameter  * These take care of doing round robin flushing of alien caches (containing
4798fce4d8eSChristoph Lameter  * objects freed on different nodes from which they were allocated) and the
4808fce4d8eSChristoph Lameter  * flushing of remote pcps by calling drain_node_pages.
4818fce4d8eSChristoph Lameter  */
4821871e52cSTejun Heo static DEFINE_PER_CPU(unsigned long, slab_reap_node);
4838fce4d8eSChristoph Lameter 
init_reap_node(int cpu)4848fce4d8eSChristoph Lameter static void init_reap_node(int cpu)
4858fce4d8eSChristoph Lameter {
4860edaf86cSAndrew Morton 	per_cpu(slab_reap_node, cpu) = next_node_in(cpu_to_mem(cpu),
4870edaf86cSAndrew Morton 						    node_online_map);
4888fce4d8eSChristoph Lameter }
4898fce4d8eSChristoph Lameter 
next_reap_node(void)4908fce4d8eSChristoph Lameter static void next_reap_node(void)
4918fce4d8eSChristoph Lameter {
492909ea964SChristoph Lameter 	int node = __this_cpu_read(slab_reap_node);
4938fce4d8eSChristoph Lameter 
4940edaf86cSAndrew Morton 	node = next_node_in(node, node_online_map);
495909ea964SChristoph Lameter 	__this_cpu_write(slab_reap_node, node);
4968fce4d8eSChristoph Lameter }
4978fce4d8eSChristoph Lameter 
4988fce4d8eSChristoph Lameter #else
4998fce4d8eSChristoph Lameter #define init_reap_node(cpu) do { } while (0)
5008fce4d8eSChristoph Lameter #define next_reap_node(void) do { } while (0)
5018fce4d8eSChristoph Lameter #endif
5028fce4d8eSChristoph Lameter 
5031da177e4SLinus Torvalds /*
5041da177e4SLinus Torvalds  * Initiate the reap timer running on the target CPU.  We run at around 1 to 2Hz
5051da177e4SLinus Torvalds  * via the workqueue/eventd.
5061da177e4SLinus Torvalds  * Add the CPU number into the expiration time to minimize the possibility of
5071da177e4SLinus Torvalds  * the CPUs getting into lockstep and contending for the global cache chain
5081da177e4SLinus Torvalds  * lock.
5091da177e4SLinus Torvalds  */
start_cpu_timer(int cpu)5100db0628dSPaul Gortmaker static void start_cpu_timer(int cpu)
5111da177e4SLinus Torvalds {
5121871e52cSTejun Heo 	struct delayed_work *reap_work = &per_cpu(slab_reap_work, cpu);
5131da177e4SLinus Torvalds 
514eac0337aSTejun Heo 	if (reap_work->work.func == NULL) {
5158fce4d8eSChristoph Lameter 		init_reap_node(cpu);
516203b42f7STejun Heo 		INIT_DEFERRABLE_WORK(reap_work, cache_reap);
5172b284214SArjan van de Ven 		schedule_delayed_work_on(cpu, reap_work,
5182b284214SArjan van de Ven 					__round_jiffies_relative(HZ, cpu));
5191da177e4SLinus Torvalds 	}
5201da177e4SLinus Torvalds }
5211da177e4SLinus Torvalds 
init_arraycache(struct array_cache * ac,int limit,int batch)5221fe00d50SJoonsoo Kim static void init_arraycache(struct array_cache *ac, int limit, int batch)
5231da177e4SLinus Torvalds {
5241fe00d50SJoonsoo Kim 	if (ac) {
5251fe00d50SJoonsoo Kim 		ac->avail = 0;
5261fe00d50SJoonsoo Kim 		ac->limit = limit;
5271fe00d50SJoonsoo Kim 		ac->batchcount = batch;
5281fe00d50SJoonsoo Kim 		ac->touched = 0;
5291da177e4SLinus Torvalds 	}
5301fe00d50SJoonsoo Kim }
5311fe00d50SJoonsoo Kim 
alloc_arraycache(int node,int entries,int batchcount,gfp_t gfp)5321fe00d50SJoonsoo Kim static struct array_cache *alloc_arraycache(int node, int entries,
5331fe00d50SJoonsoo Kim 					    int batchcount, gfp_t gfp)
5341fe00d50SJoonsoo Kim {
5355e804789SJoonsoo Kim 	size_t memsize = sizeof(void *) * entries + sizeof(struct array_cache);
5361fe00d50SJoonsoo Kim 	struct array_cache *ac = NULL;
5371fe00d50SJoonsoo Kim 
5381fe00d50SJoonsoo Kim 	ac = kmalloc_node(memsize, gfp, node);
53992d1d07dSQian Cai 	/*
54092d1d07dSQian Cai 	 * The array_cache structures contain pointers to free object.
54192d1d07dSQian Cai 	 * However, when such objects are allocated or transferred to another
54292d1d07dSQian Cai 	 * cache the pointers are not cleared and they could be counted as
54392d1d07dSQian Cai 	 * valid references during a kmemleak scan. Therefore, kmemleak must
54492d1d07dSQian Cai 	 * not scan such objects.
54592d1d07dSQian Cai 	 */
54692d1d07dSQian Cai 	kmemleak_no_scan(ac);
5471fe00d50SJoonsoo Kim 	init_arraycache(ac, entries, batchcount);
5481fe00d50SJoonsoo Kim 	return ac;
5491da177e4SLinus Torvalds }
5501da177e4SLinus Torvalds 
cache_free_pfmemalloc(struct kmem_cache * cachep,struct slab * slab,void * objp)551f68f8dddSJoonsoo Kim static noinline void cache_free_pfmemalloc(struct kmem_cache *cachep,
5527981e67eSVlastimil Babka 					struct slab *slab, void *objp)
553072bb0aaSMel Gorman {
554ce8eb6c4SChristoph Lameter 	struct kmem_cache_node *n;
5557981e67eSVlastimil Babka 	int slab_node;
556f68f8dddSJoonsoo Kim 	LIST_HEAD(list);
557072bb0aaSMel Gorman 
5587981e67eSVlastimil Babka 	slab_node = slab_nid(slab);
5597981e67eSVlastimil Babka 	n = get_node(cachep, slab_node);
560072bb0aaSMel Gorman 
561b539ce9fSJiri Kosina 	raw_spin_lock(&n->list_lock);
5627981e67eSVlastimil Babka 	free_block(cachep, &objp, 1, slab_node, &list);
563b539ce9fSJiri Kosina 	raw_spin_unlock(&n->list_lock);
564072bb0aaSMel Gorman 
565f68f8dddSJoonsoo Kim 	slabs_destroy(cachep, &list);
566072bb0aaSMel Gorman }
567072bb0aaSMel Gorman 
5683ded175aSChristoph Lameter /*
5693ded175aSChristoph Lameter  * Transfer objects in one arraycache to another.
5703ded175aSChristoph Lameter  * Locking must be handled by the caller.
5713ded175aSChristoph Lameter  *
5723ded175aSChristoph Lameter  * Return the number of entries transferred.
5733ded175aSChristoph Lameter  */
transfer_objects(struct array_cache * to,struct array_cache * from,unsigned int max)5743ded175aSChristoph Lameter static int transfer_objects(struct array_cache *to,
5753ded175aSChristoph Lameter 		struct array_cache *from, unsigned int max)
5763ded175aSChristoph Lameter {
5773ded175aSChristoph Lameter 	/* Figure out how many entries to transfer */
578732eacc0SHagen Paul Pfeifer 	int nr = min3(from->avail, max, to->limit - to->avail);
5793ded175aSChristoph Lameter 
5803ded175aSChristoph Lameter 	if (!nr)
5813ded175aSChristoph Lameter 		return 0;
5823ded175aSChristoph Lameter 
5833ded175aSChristoph Lameter 	memcpy(to->entry + to->avail, from->entry + from->avail - nr,
5843ded175aSChristoph Lameter 			sizeof(void *) *nr);
5853ded175aSChristoph Lameter 
5863ded175aSChristoph Lameter 	from->avail -= nr;
5873ded175aSChristoph Lameter 	to->avail += nr;
5883ded175aSChristoph Lameter 	return nr;
5893ded175aSChristoph Lameter }
5903ded175aSChristoph Lameter 
591dabc3e29SKees Cook /* &alien->lock must be held by alien callers. */
__free_one(struct array_cache * ac,void * objp)592dabc3e29SKees Cook static __always_inline void __free_one(struct array_cache *ac, void *objp)
593dabc3e29SKees Cook {
594dabc3e29SKees Cook 	/* Avoid trivial double-free. */
595dabc3e29SKees Cook 	if (IS_ENABLED(CONFIG_SLAB_FREELIST_HARDENED) &&
596dabc3e29SKees Cook 	    WARN_ON_ONCE(ac->avail > 0 && ac->entry[ac->avail - 1] == objp))
597dabc3e29SKees Cook 		return;
598dabc3e29SKees Cook 	ac->entry[ac->avail++] = objp;
599dabc3e29SKees Cook }
600dabc3e29SKees Cook 
601765c4507SChristoph Lameter #ifndef CONFIG_NUMA
602765c4507SChristoph Lameter 
603765c4507SChristoph Lameter #define drain_alien_cache(cachep, alien) do { } while (0)
604ce8eb6c4SChristoph Lameter #define reap_alien(cachep, n) do { } while (0)
605765c4507SChristoph Lameter 
alloc_alien_cache(int node,int limit,gfp_t gfp)606c8522a3aSJoonsoo Kim static inline struct alien_cache **alloc_alien_cache(int node,
607c8522a3aSJoonsoo Kim 						int limit, gfp_t gfp)
608765c4507SChristoph Lameter {
6098888177eSJoonsoo Kim 	return NULL;
610765c4507SChristoph Lameter }
611765c4507SChristoph Lameter 
free_alien_cache(struct alien_cache ** ac_ptr)612c8522a3aSJoonsoo Kim static inline void free_alien_cache(struct alien_cache **ac_ptr)
613765c4507SChristoph Lameter {
614765c4507SChristoph Lameter }
615765c4507SChristoph Lameter 
cache_free_alien(struct kmem_cache * cachep,void * objp)616765c4507SChristoph Lameter static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
617765c4507SChristoph Lameter {
618765c4507SChristoph Lameter 	return 0;
619765c4507SChristoph Lameter }
620765c4507SChristoph Lameter 
gfp_exact_node(gfp_t flags)6214167e9b2SDavid Rientjes static inline gfp_t gfp_exact_node(gfp_t flags)
6224167e9b2SDavid Rientjes {
623444eb2a4SMel Gorman 	return flags & ~__GFP_NOFAIL;
6244167e9b2SDavid Rientjes }
6254167e9b2SDavid Rientjes 
626765c4507SChristoph Lameter #else	/* CONFIG_NUMA */
627765c4507SChristoph Lameter 
__alloc_alien_cache(int node,int entries,int batch,gfp_t gfp)628c8522a3aSJoonsoo Kim static struct alien_cache *__alloc_alien_cache(int node, int entries,
629c8522a3aSJoonsoo Kim 						int batch, gfp_t gfp)
630e498be7dSChristoph Lameter {
6315e804789SJoonsoo Kim 	size_t memsize = sizeof(void *) * entries + sizeof(struct alien_cache);
632c8522a3aSJoonsoo Kim 	struct alien_cache *alc = NULL;
633c8522a3aSJoonsoo Kim 
634c8522a3aSJoonsoo Kim 	alc = kmalloc_node(memsize, gfp, node);
63509c2e76eSChristoph Lameter 	if (alc) {
63692d1d07dSQian Cai 		kmemleak_no_scan(alc);
637c8522a3aSJoonsoo Kim 		init_arraycache(&alc->ac, entries, batch);
63849dfc304SJoonsoo Kim 		spin_lock_init(&alc->lock);
63909c2e76eSChristoph Lameter 	}
640c8522a3aSJoonsoo Kim 	return alc;
641c8522a3aSJoonsoo Kim }
642c8522a3aSJoonsoo Kim 
alloc_alien_cache(int node,int limit,gfp_t gfp)643c8522a3aSJoonsoo Kim static struct alien_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
644c8522a3aSJoonsoo Kim {
645c8522a3aSJoonsoo Kim 	struct alien_cache **alc_ptr;
646e498be7dSChristoph Lameter 	int i;
647e498be7dSChristoph Lameter 
648e498be7dSChristoph Lameter 	if (limit > 1)
649e498be7dSChristoph Lameter 		limit = 12;
650b9726c26SAlexey Dobriyan 	alc_ptr = kcalloc_node(nr_node_ids, sizeof(void *), gfp, node);
651c8522a3aSJoonsoo Kim 	if (!alc_ptr)
652c8522a3aSJoonsoo Kim 		return NULL;
653c8522a3aSJoonsoo Kim 
654e498be7dSChristoph Lameter 	for_each_node(i) {
655f3186a9cSHaicheng Li 		if (i == node || !node_online(i))
656e498be7dSChristoph Lameter 			continue;
657c8522a3aSJoonsoo Kim 		alc_ptr[i] = __alloc_alien_cache(node, limit, 0xbaadf00d, gfp);
658c8522a3aSJoonsoo Kim 		if (!alc_ptr[i]) {
659cc550defSAkinobu Mita 			for (i--; i >= 0; i--)
660c8522a3aSJoonsoo Kim 				kfree(alc_ptr[i]);
661c8522a3aSJoonsoo Kim 			kfree(alc_ptr);
662e498be7dSChristoph Lameter 			return NULL;
663e498be7dSChristoph Lameter 		}
664e498be7dSChristoph Lameter 	}
665c8522a3aSJoonsoo Kim 	return alc_ptr;
666e498be7dSChristoph Lameter }
667e498be7dSChristoph Lameter 
free_alien_cache(struct alien_cache ** alc_ptr)668c8522a3aSJoonsoo Kim static void free_alien_cache(struct alien_cache **alc_ptr)
669e498be7dSChristoph Lameter {
670e498be7dSChristoph Lameter 	int i;
671e498be7dSChristoph Lameter 
672c8522a3aSJoonsoo Kim 	if (!alc_ptr)
673e498be7dSChristoph Lameter 		return;
674e498be7dSChristoph Lameter 	for_each_node(i)
675c8522a3aSJoonsoo Kim 	    kfree(alc_ptr[i]);
676c8522a3aSJoonsoo Kim 	kfree(alc_ptr);
677e498be7dSChristoph Lameter }
678e498be7dSChristoph Lameter 
__drain_alien_cache(struct kmem_cache * cachep,struct array_cache * ac,int node,struct list_head * list)679343e0d7aSPekka Enberg static void __drain_alien_cache(struct kmem_cache *cachep,
680833b706cSJoonsoo Kim 				struct array_cache *ac, int node,
681833b706cSJoonsoo Kim 				struct list_head *list)
682e498be7dSChristoph Lameter {
68318bf8541SChristoph Lameter 	struct kmem_cache_node *n = get_node(cachep, node);
684e498be7dSChristoph Lameter 
685e498be7dSChristoph Lameter 	if (ac->avail) {
686b539ce9fSJiri Kosina 		raw_spin_lock(&n->list_lock);
687e00946feSChristoph Lameter 		/*
688e00946feSChristoph Lameter 		 * Stuff objects into the remote nodes shared array first.
689e00946feSChristoph Lameter 		 * That way we could avoid the overhead of putting the objects
690e00946feSChristoph Lameter 		 * into the free lists and getting them back later.
691e00946feSChristoph Lameter 		 */
692ce8eb6c4SChristoph Lameter 		if (n->shared)
693ce8eb6c4SChristoph Lameter 			transfer_objects(n->shared, ac, ac->limit);
694e00946feSChristoph Lameter 
695833b706cSJoonsoo Kim 		free_block(cachep, ac->entry, ac->avail, node, list);
696e498be7dSChristoph Lameter 		ac->avail = 0;
697b539ce9fSJiri Kosina 		raw_spin_unlock(&n->list_lock);
698e498be7dSChristoph Lameter 	}
699e498be7dSChristoph Lameter }
700e498be7dSChristoph Lameter 
7018fce4d8eSChristoph Lameter /*
7028fce4d8eSChristoph Lameter  * Called from cache_reap() to regularly drain alien caches round robin.
7038fce4d8eSChristoph Lameter  */
reap_alien(struct kmem_cache * cachep,struct kmem_cache_node * n)704ce8eb6c4SChristoph Lameter static void reap_alien(struct kmem_cache *cachep, struct kmem_cache_node *n)
7058fce4d8eSChristoph Lameter {
706909ea964SChristoph Lameter 	int node = __this_cpu_read(slab_reap_node);
7078fce4d8eSChristoph Lameter 
708ce8eb6c4SChristoph Lameter 	if (n->alien) {
709c8522a3aSJoonsoo Kim 		struct alien_cache *alc = n->alien[node];
710c8522a3aSJoonsoo Kim 		struct array_cache *ac;
711e00946feSChristoph Lameter 
712c8522a3aSJoonsoo Kim 		if (alc) {
713c8522a3aSJoonsoo Kim 			ac = &alc->ac;
71449dfc304SJoonsoo Kim 			if (ac->avail && spin_trylock_irq(&alc->lock)) {
715833b706cSJoonsoo Kim 				LIST_HEAD(list);
716833b706cSJoonsoo Kim 
717833b706cSJoonsoo Kim 				__drain_alien_cache(cachep, ac, node, &list);
71849dfc304SJoonsoo Kim 				spin_unlock_irq(&alc->lock);
719833b706cSJoonsoo Kim 				slabs_destroy(cachep, &list);
7208fce4d8eSChristoph Lameter 			}
7218fce4d8eSChristoph Lameter 		}
7228fce4d8eSChristoph Lameter 	}
723c8522a3aSJoonsoo Kim }
7248fce4d8eSChristoph Lameter 
drain_alien_cache(struct kmem_cache * cachep,struct alien_cache ** alien)725a737b3e2SAndrew Morton static void drain_alien_cache(struct kmem_cache *cachep,
726c8522a3aSJoonsoo Kim 				struct alien_cache **alien)
727e498be7dSChristoph Lameter {
728e498be7dSChristoph Lameter 	int i = 0;
729c8522a3aSJoonsoo Kim 	struct alien_cache *alc;
730e498be7dSChristoph Lameter 	struct array_cache *ac;
731e498be7dSChristoph Lameter 	unsigned long flags;
732e498be7dSChristoph Lameter 
733e498be7dSChristoph Lameter 	for_each_online_node(i) {
734c8522a3aSJoonsoo Kim 		alc = alien[i];
735c8522a3aSJoonsoo Kim 		if (alc) {
736833b706cSJoonsoo Kim 			LIST_HEAD(list);
737833b706cSJoonsoo Kim 
738c8522a3aSJoonsoo Kim 			ac = &alc->ac;
73949dfc304SJoonsoo Kim 			spin_lock_irqsave(&alc->lock, flags);
740833b706cSJoonsoo Kim 			__drain_alien_cache(cachep, ac, i, &list);
74149dfc304SJoonsoo Kim 			spin_unlock_irqrestore(&alc->lock, flags);
742833b706cSJoonsoo Kim 			slabs_destroy(cachep, &list);
743e498be7dSChristoph Lameter 		}
744e498be7dSChristoph Lameter 	}
745e498be7dSChristoph Lameter }
746729bd0b7SPekka Enberg 
__cache_free_alien(struct kmem_cache * cachep,void * objp,int node,int slab_node)74725c4f304SJoonsoo Kim static int __cache_free_alien(struct kmem_cache *cachep, void *objp,
7487981e67eSVlastimil Babka 				int node, int slab_node)
749729bd0b7SPekka Enberg {
750ce8eb6c4SChristoph Lameter 	struct kmem_cache_node *n;
751c8522a3aSJoonsoo Kim 	struct alien_cache *alien = NULL;
752c8522a3aSJoonsoo Kim 	struct array_cache *ac;
75397654dfaSJoonsoo Kim 	LIST_HEAD(list);
7541ca4cb24SPekka Enberg 
75518bf8541SChristoph Lameter 	n = get_node(cachep, node);
756729bd0b7SPekka Enberg 	STATS_INC_NODEFREES(cachep);
7577981e67eSVlastimil Babka 	if (n->alien && n->alien[slab_node]) {
7587981e67eSVlastimil Babka 		alien = n->alien[slab_node];
759c8522a3aSJoonsoo Kim 		ac = &alien->ac;
76049dfc304SJoonsoo Kim 		spin_lock(&alien->lock);
761c8522a3aSJoonsoo Kim 		if (unlikely(ac->avail == ac->limit)) {
762729bd0b7SPekka Enberg 			STATS_INC_ACOVERFLOW(cachep);
7637981e67eSVlastimil Babka 			__drain_alien_cache(cachep, ac, slab_node, &list);
764729bd0b7SPekka Enberg 		}
765dabc3e29SKees Cook 		__free_one(ac, objp);
76649dfc304SJoonsoo Kim 		spin_unlock(&alien->lock);
767833b706cSJoonsoo Kim 		slabs_destroy(cachep, &list);
768729bd0b7SPekka Enberg 	} else {
7697981e67eSVlastimil Babka 		n = get_node(cachep, slab_node);
770b539ce9fSJiri Kosina 		raw_spin_lock(&n->list_lock);
7717981e67eSVlastimil Babka 		free_block(cachep, &objp, 1, slab_node, &list);
772b539ce9fSJiri Kosina 		raw_spin_unlock(&n->list_lock);
77397654dfaSJoonsoo Kim 		slabs_destroy(cachep, &list);
774729bd0b7SPekka Enberg 	}
775729bd0b7SPekka Enberg 	return 1;
776729bd0b7SPekka Enberg }
77725c4f304SJoonsoo Kim 
cache_free_alien(struct kmem_cache * cachep,void * objp)77825c4f304SJoonsoo Kim static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
77925c4f304SJoonsoo Kim {
780dd35f71aSVlastimil Babka 	int slab_node = slab_nid(virt_to_slab(objp));
78125c4f304SJoonsoo Kim 	int node = numa_mem_id();
78225c4f304SJoonsoo Kim 	/*
783a8f23dd1SYixuan Cao 	 * Make sure we are not freeing an object from another node to the array
78425c4f304SJoonsoo Kim 	 * cache on this cpu.
78525c4f304SJoonsoo Kim 	 */
786dd35f71aSVlastimil Babka 	if (likely(node == slab_node))
78725c4f304SJoonsoo Kim 		return 0;
78825c4f304SJoonsoo Kim 
789dd35f71aSVlastimil Babka 	return __cache_free_alien(cachep, objp, node, slab_node);
79025c4f304SJoonsoo Kim }
7914167e9b2SDavid Rientjes 
7924167e9b2SDavid Rientjes /*
793444eb2a4SMel Gorman  * Construct gfp mask to allocate from a specific node but do not reclaim or
794444eb2a4SMel Gorman  * warn about failures.
7954167e9b2SDavid Rientjes  */
gfp_exact_node(gfp_t flags)7964167e9b2SDavid Rientjes static inline gfp_t gfp_exact_node(gfp_t flags)
7974167e9b2SDavid Rientjes {
798444eb2a4SMel Gorman 	return (flags | __GFP_THISNODE | __GFP_NOWARN) & ~(__GFP_RECLAIM|__GFP_NOFAIL);
7994167e9b2SDavid Rientjes }
800e498be7dSChristoph Lameter #endif
801e498be7dSChristoph Lameter 
init_cache_node(struct kmem_cache * cachep,int node,gfp_t gfp)802ded0ecf6SJoonsoo Kim static int init_cache_node(struct kmem_cache *cachep, int node, gfp_t gfp)
803ded0ecf6SJoonsoo Kim {
804ded0ecf6SJoonsoo Kim 	struct kmem_cache_node *n;
805ded0ecf6SJoonsoo Kim 
806ded0ecf6SJoonsoo Kim 	/*
807ded0ecf6SJoonsoo Kim 	 * Set up the kmem_cache_node for cpu before we can
808ded0ecf6SJoonsoo Kim 	 * begin anything. Make sure some other cpu on this
809ded0ecf6SJoonsoo Kim 	 * node has not already allocated this
810ded0ecf6SJoonsoo Kim 	 */
811ded0ecf6SJoonsoo Kim 	n = get_node(cachep, node);
812ded0ecf6SJoonsoo Kim 	if (n) {
813b539ce9fSJiri Kosina 		raw_spin_lock_irq(&n->list_lock);
814ded0ecf6SJoonsoo Kim 		n->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount +
815ded0ecf6SJoonsoo Kim 				cachep->num;
816b539ce9fSJiri Kosina 		raw_spin_unlock_irq(&n->list_lock);
817ded0ecf6SJoonsoo Kim 
818ded0ecf6SJoonsoo Kim 		return 0;
819ded0ecf6SJoonsoo Kim 	}
820ded0ecf6SJoonsoo Kim 
821ded0ecf6SJoonsoo Kim 	n = kmalloc_node(sizeof(struct kmem_cache_node), gfp, node);
822ded0ecf6SJoonsoo Kim 	if (!n)
823ded0ecf6SJoonsoo Kim 		return -ENOMEM;
824ded0ecf6SJoonsoo Kim 
825ded0ecf6SJoonsoo Kim 	kmem_cache_node_init(n);
826ded0ecf6SJoonsoo Kim 	n->next_reap = jiffies + REAPTIMEOUT_NODE +
827ded0ecf6SJoonsoo Kim 		    ((unsigned long)cachep) % REAPTIMEOUT_NODE;
828ded0ecf6SJoonsoo Kim 
829ded0ecf6SJoonsoo Kim 	n->free_limit =
830ded0ecf6SJoonsoo Kim 		(1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num;
831ded0ecf6SJoonsoo Kim 
832ded0ecf6SJoonsoo Kim 	/*
833ded0ecf6SJoonsoo Kim 	 * The kmem_cache_nodes don't come and go as CPUs
834a8f23dd1SYixuan Cao 	 * come and go.  slab_mutex provides sufficient
835ded0ecf6SJoonsoo Kim 	 * protection here.
836ded0ecf6SJoonsoo Kim 	 */
837ded0ecf6SJoonsoo Kim 	cachep->node[node] = n;
838ded0ecf6SJoonsoo Kim 
839ded0ecf6SJoonsoo Kim 	return 0;
840ded0ecf6SJoonsoo Kim }
841ded0ecf6SJoonsoo Kim 
84266a1c22bSGeert Uytterhoeven #if defined(CONFIG_NUMA) || defined(CONFIG_SMP)
8438f9f8d9eSDavid Rientjes /*
8446a67368cSChristoph Lameter  * Allocates and initializes node for a node on each slab cache, used for
845ce8eb6c4SChristoph Lameter  * either memory or cpu hotplug.  If memory is being hot-added, the kmem_cache_node
8468f9f8d9eSDavid Rientjes  * will be allocated off-node since memory is not yet online for the new node.
847a8f23dd1SYixuan Cao  * When hotplugging memory or a cpu, existing nodes are not replaced if
8488f9f8d9eSDavid Rientjes  * already in use.
8498f9f8d9eSDavid Rientjes  *
85018004c5dSChristoph Lameter  * Must hold slab_mutex.
8518f9f8d9eSDavid Rientjes  */
init_cache_node_node(int node)8526a67368cSChristoph Lameter static int init_cache_node_node(int node)
8538f9f8d9eSDavid Rientjes {
854ded0ecf6SJoonsoo Kim 	int ret;
8558f9f8d9eSDavid Rientjes 	struct kmem_cache *cachep;
8568f9f8d9eSDavid Rientjes 
85718004c5dSChristoph Lameter 	list_for_each_entry(cachep, &slab_caches, list) {
858ded0ecf6SJoonsoo Kim 		ret = init_cache_node(cachep, node, GFP_KERNEL);
859ded0ecf6SJoonsoo Kim 		if (ret)
860ded0ecf6SJoonsoo Kim 			return ret;
8618f9f8d9eSDavid Rientjes 	}
8628f9f8d9eSDavid Rientjes 
8638f9f8d9eSDavid Rientjes 	return 0;
8648f9f8d9eSDavid Rientjes }
8656731d4f1SSebastian Andrzej Siewior #endif
8668f9f8d9eSDavid Rientjes 
setup_kmem_cache_node(struct kmem_cache * cachep,int node,gfp_t gfp,bool force_change)867c3d332b6SJoonsoo Kim static int setup_kmem_cache_node(struct kmem_cache *cachep,
868c3d332b6SJoonsoo Kim 				int node, gfp_t gfp, bool force_change)
869c3d332b6SJoonsoo Kim {
870c3d332b6SJoonsoo Kim 	int ret = -ENOMEM;
871c3d332b6SJoonsoo Kim 	struct kmem_cache_node *n;
872c3d332b6SJoonsoo Kim 	struct array_cache *old_shared = NULL;
873c3d332b6SJoonsoo Kim 	struct array_cache *new_shared = NULL;
874c3d332b6SJoonsoo Kim 	struct alien_cache **new_alien = NULL;
875c3d332b6SJoonsoo Kim 	LIST_HEAD(list);
876c3d332b6SJoonsoo Kim 
877c3d332b6SJoonsoo Kim 	if (use_alien_caches) {
878c3d332b6SJoonsoo Kim 		new_alien = alloc_alien_cache(node, cachep->limit, gfp);
879c3d332b6SJoonsoo Kim 		if (!new_alien)
880c3d332b6SJoonsoo Kim 			goto fail;
881c3d332b6SJoonsoo Kim 	}
882c3d332b6SJoonsoo Kim 
883c3d332b6SJoonsoo Kim 	if (cachep->shared) {
884c3d332b6SJoonsoo Kim 		new_shared = alloc_arraycache(node,
885c3d332b6SJoonsoo Kim 			cachep->shared * cachep->batchcount, 0xbaadf00d, gfp);
886c3d332b6SJoonsoo Kim 		if (!new_shared)
887c3d332b6SJoonsoo Kim 			goto fail;
888c3d332b6SJoonsoo Kim 	}
889c3d332b6SJoonsoo Kim 
890c3d332b6SJoonsoo Kim 	ret = init_cache_node(cachep, node, gfp);
891c3d332b6SJoonsoo Kim 	if (ret)
892c3d332b6SJoonsoo Kim 		goto fail;
893c3d332b6SJoonsoo Kim 
894c3d332b6SJoonsoo Kim 	n = get_node(cachep, node);
895b539ce9fSJiri Kosina 	raw_spin_lock_irq(&n->list_lock);
896c3d332b6SJoonsoo Kim 	if (n->shared && force_change) {
897c3d332b6SJoonsoo Kim 		free_block(cachep, n->shared->entry,
898c3d332b6SJoonsoo Kim 				n->shared->avail, node, &list);
899c3d332b6SJoonsoo Kim 		n->shared->avail = 0;
900c3d332b6SJoonsoo Kim 	}
901c3d332b6SJoonsoo Kim 
902c3d332b6SJoonsoo Kim 	if (!n->shared || force_change) {
903c3d332b6SJoonsoo Kim 		old_shared = n->shared;
904c3d332b6SJoonsoo Kim 		n->shared = new_shared;
905c3d332b6SJoonsoo Kim 		new_shared = NULL;
906c3d332b6SJoonsoo Kim 	}
907c3d332b6SJoonsoo Kim 
908c3d332b6SJoonsoo Kim 	if (!n->alien) {
909c3d332b6SJoonsoo Kim 		n->alien = new_alien;
910c3d332b6SJoonsoo Kim 		new_alien = NULL;
911c3d332b6SJoonsoo Kim 	}
912c3d332b6SJoonsoo Kim 
913b539ce9fSJiri Kosina 	raw_spin_unlock_irq(&n->list_lock);
914c3d332b6SJoonsoo Kim 	slabs_destroy(cachep, &list);
915c3d332b6SJoonsoo Kim 
916801faf0dSJoonsoo Kim 	/*
917801faf0dSJoonsoo Kim 	 * To protect lockless access to n->shared during irq disabled context.
918801faf0dSJoonsoo Kim 	 * If n->shared isn't NULL in irq disabled context, accessing to it is
919801faf0dSJoonsoo Kim 	 * guaranteed to be valid until irq is re-enabled, because it will be
9206564a25eSPaul E. McKenney 	 * freed after synchronize_rcu().
921801faf0dSJoonsoo Kim 	 */
92286d9f485SJoonsoo Kim 	if (old_shared && force_change)
9236564a25eSPaul E. McKenney 		synchronize_rcu();
924801faf0dSJoonsoo Kim 
925c3d332b6SJoonsoo Kim fail:
926c3d332b6SJoonsoo Kim 	kfree(old_shared);
927c3d332b6SJoonsoo Kim 	kfree(new_shared);
928c3d332b6SJoonsoo Kim 	free_alien_cache(new_alien);
929c3d332b6SJoonsoo Kim 
930c3d332b6SJoonsoo Kim 	return ret;
931c3d332b6SJoonsoo Kim }
932c3d332b6SJoonsoo Kim 
9336731d4f1SSebastian Andrzej Siewior #ifdef CONFIG_SMP
9346731d4f1SSebastian Andrzej Siewior 
cpuup_canceled(long cpu)9350db0628dSPaul Gortmaker static void cpuup_canceled(long cpu)
9361da177e4SLinus Torvalds {
937fbf1e473SAkinobu Mita 	struct kmem_cache *cachep;
938ce8eb6c4SChristoph Lameter 	struct kmem_cache_node *n = NULL;
9397d6e6d09SLee Schermerhorn 	int node = cpu_to_mem(cpu);
940a70f7302SRusty Russell 	const struct cpumask *mask = cpumask_of_node(node);
941fbf1e473SAkinobu Mita 
94218004c5dSChristoph Lameter 	list_for_each_entry(cachep, &slab_caches, list) {
943fbf1e473SAkinobu Mita 		struct array_cache *nc;
944fbf1e473SAkinobu Mita 		struct array_cache *shared;
945c8522a3aSJoonsoo Kim 		struct alien_cache **alien;
94697654dfaSJoonsoo Kim 		LIST_HEAD(list);
947fbf1e473SAkinobu Mita 
94818bf8541SChristoph Lameter 		n = get_node(cachep, node);
949ce8eb6c4SChristoph Lameter 		if (!n)
950bf0dea23SJoonsoo Kim 			continue;
951fbf1e473SAkinobu Mita 
952b539ce9fSJiri Kosina 		raw_spin_lock_irq(&n->list_lock);
953fbf1e473SAkinobu Mita 
954ce8eb6c4SChristoph Lameter 		/* Free limit for this kmem_cache_node */
955ce8eb6c4SChristoph Lameter 		n->free_limit -= cachep->batchcount;
956bf0dea23SJoonsoo Kim 
957bf0dea23SJoonsoo Kim 		/* cpu is dead; no one can alloc from it. */
958bf0dea23SJoonsoo Kim 		nc = per_cpu_ptr(cachep->cpu_cache, cpu);
95997654dfaSJoonsoo Kim 		free_block(cachep, nc->entry, nc->avail, node, &list);
960bf0dea23SJoonsoo Kim 		nc->avail = 0;
961fbf1e473SAkinobu Mita 
96258463c1fSRusty Russell 		if (!cpumask_empty(mask)) {
963b539ce9fSJiri Kosina 			raw_spin_unlock_irq(&n->list_lock);
964bf0dea23SJoonsoo Kim 			goto free_slab;
965fbf1e473SAkinobu Mita 		}
966fbf1e473SAkinobu Mita 
967ce8eb6c4SChristoph Lameter 		shared = n->shared;
968fbf1e473SAkinobu Mita 		if (shared) {
969fbf1e473SAkinobu Mita 			free_block(cachep, shared->entry,
97097654dfaSJoonsoo Kim 				   shared->avail, node, &list);
971ce8eb6c4SChristoph Lameter 			n->shared = NULL;
972fbf1e473SAkinobu Mita 		}
973fbf1e473SAkinobu Mita 
974ce8eb6c4SChristoph Lameter 		alien = n->alien;
975ce8eb6c4SChristoph Lameter 		n->alien = NULL;
976fbf1e473SAkinobu Mita 
977b539ce9fSJiri Kosina 		raw_spin_unlock_irq(&n->list_lock);
978fbf1e473SAkinobu Mita 
979fbf1e473SAkinobu Mita 		kfree(shared);
980fbf1e473SAkinobu Mita 		if (alien) {
981fbf1e473SAkinobu Mita 			drain_alien_cache(cachep, alien);
982fbf1e473SAkinobu Mita 			free_alien_cache(alien);
983fbf1e473SAkinobu Mita 		}
984bf0dea23SJoonsoo Kim 
985bf0dea23SJoonsoo Kim free_slab:
98697654dfaSJoonsoo Kim 		slabs_destroy(cachep, &list);
987fbf1e473SAkinobu Mita 	}
988fbf1e473SAkinobu Mita 	/*
989fbf1e473SAkinobu Mita 	 * In the previous loop, all the objects were freed to
990fbf1e473SAkinobu Mita 	 * the respective cache's slabs,  now we can go ahead and
991fbf1e473SAkinobu Mita 	 * shrink each nodelist to its limit.
992fbf1e473SAkinobu Mita 	 */
99318004c5dSChristoph Lameter 	list_for_each_entry(cachep, &slab_caches, list) {
99418bf8541SChristoph Lameter 		n = get_node(cachep, node);
995ce8eb6c4SChristoph Lameter 		if (!n)
996fbf1e473SAkinobu Mita 			continue;
997a5aa63a5SJoonsoo Kim 		drain_freelist(cachep, n, INT_MAX);
998fbf1e473SAkinobu Mita 	}
999fbf1e473SAkinobu Mita }
1000fbf1e473SAkinobu Mita 
cpuup_prepare(long cpu)10010db0628dSPaul Gortmaker static int cpuup_prepare(long cpu)
1002fbf1e473SAkinobu Mita {
1003343e0d7aSPekka Enberg 	struct kmem_cache *cachep;
10047d6e6d09SLee Schermerhorn 	int node = cpu_to_mem(cpu);
10058f9f8d9eSDavid Rientjes 	int err;
10061da177e4SLinus Torvalds 
1007a737b3e2SAndrew Morton 	/*
1008a737b3e2SAndrew Morton 	 * We need to do this right in the beginning since
1009e498be7dSChristoph Lameter 	 * alloc_arraycache's are going to use this list.
1010e498be7dSChristoph Lameter 	 * kmalloc_node allows us to add the slab to the right
1011ce8eb6c4SChristoph Lameter 	 * kmem_cache_node and not this cpu's kmem_cache_node
1012e498be7dSChristoph Lameter 	 */
10136a67368cSChristoph Lameter 	err = init_cache_node_node(node);
10148f9f8d9eSDavid Rientjes 	if (err < 0)
1015e498be7dSChristoph Lameter 		goto bad;
1016e498be7dSChristoph Lameter 
1017a737b3e2SAndrew Morton 	/*
1018a737b3e2SAndrew Morton 	 * Now we can go ahead with allocating the shared arrays and
1019a737b3e2SAndrew Morton 	 * array caches
1020a737b3e2SAndrew Morton 	 */
102118004c5dSChristoph Lameter 	list_for_each_entry(cachep, &slab_caches, list) {
1022c3d332b6SJoonsoo Kim 		err = setup_kmem_cache_node(cachep, node, GFP_KERNEL, false);
1023c3d332b6SJoonsoo Kim 		if (err)
10244484ebf1SRavikiran G Thirumalai 			goto bad;
102563109846SEric Dumazet 	}
1026ce79ddc8SPekka Enberg 
1027fbf1e473SAkinobu Mita 	return 0;
1028fbf1e473SAkinobu Mita bad:
102912d00f6aSAkinobu Mita 	cpuup_canceled(cpu);
1030fbf1e473SAkinobu Mita 	return -ENOMEM;
1031fbf1e473SAkinobu Mita }
1032fbf1e473SAkinobu Mita 
slab_prepare_cpu(unsigned int cpu)10336731d4f1SSebastian Andrzej Siewior int slab_prepare_cpu(unsigned int cpu)
1034fbf1e473SAkinobu Mita {
10356731d4f1SSebastian Andrzej Siewior 	int err;
1036fbf1e473SAkinobu Mita 
103718004c5dSChristoph Lameter 	mutex_lock(&slab_mutex);
1038fbf1e473SAkinobu Mita 	err = cpuup_prepare(cpu);
103918004c5dSChristoph Lameter 	mutex_unlock(&slab_mutex);
10406731d4f1SSebastian Andrzej Siewior 	return err;
10416731d4f1SSebastian Andrzej Siewior }
10426731d4f1SSebastian Andrzej Siewior 
10435830c590SChristoph Lameter /*
10446731d4f1SSebastian Andrzej Siewior  * This is called for a failed online attempt and for a successful
10456731d4f1SSebastian Andrzej Siewior  * offline.
10466731d4f1SSebastian Andrzej Siewior  *
10476731d4f1SSebastian Andrzej Siewior  * Even if all the cpus of a node are down, we don't free the
1048a8f23dd1SYixuan Cao  * kmem_cache_node of any cache. This is to avoid a race between cpu_down, and
10496731d4f1SSebastian Andrzej Siewior  * a kmalloc allocation from another cpu for memory from the node of
105070b6d25eSChen Tao  * the cpu going down.  The kmem_cache_node structure is usually allocated from
10516731d4f1SSebastian Andrzej Siewior  * kmem_cache_create() and gets destroyed at kmem_cache_destroy().
10526731d4f1SSebastian Andrzej Siewior  */
slab_dead_cpu(unsigned int cpu)10536731d4f1SSebastian Andrzej Siewior int slab_dead_cpu(unsigned int cpu)
10546731d4f1SSebastian Andrzej Siewior {
10556731d4f1SSebastian Andrzej Siewior 	mutex_lock(&slab_mutex);
10566731d4f1SSebastian Andrzej Siewior 	cpuup_canceled(cpu);
10576731d4f1SSebastian Andrzej Siewior 	mutex_unlock(&slab_mutex);
10586731d4f1SSebastian Andrzej Siewior 	return 0;
10596731d4f1SSebastian Andrzej Siewior }
10606731d4f1SSebastian Andrzej Siewior #endif
10616731d4f1SSebastian Andrzej Siewior 
slab_online_cpu(unsigned int cpu)10626731d4f1SSebastian Andrzej Siewior static int slab_online_cpu(unsigned int cpu)
10636731d4f1SSebastian Andrzej Siewior {
10646731d4f1SSebastian Andrzej Siewior 	start_cpu_timer(cpu);
10656731d4f1SSebastian Andrzej Siewior 	return 0;
10666731d4f1SSebastian Andrzej Siewior }
10676731d4f1SSebastian Andrzej Siewior 
slab_offline_cpu(unsigned int cpu)10686731d4f1SSebastian Andrzej Siewior static int slab_offline_cpu(unsigned int cpu)
10696731d4f1SSebastian Andrzej Siewior {
10706731d4f1SSebastian Andrzej Siewior 	/*
10716731d4f1SSebastian Andrzej Siewior 	 * Shutdown cache reaper. Note that the slab_mutex is held so
10726731d4f1SSebastian Andrzej Siewior 	 * that if cache_reap() is invoked it cannot do anything
10736731d4f1SSebastian Andrzej Siewior 	 * expensive but will only modify reap_work and reschedule the
10746731d4f1SSebastian Andrzej Siewior 	 * timer.
10755830c590SChristoph Lameter 	 */
1076afe2c511STejun Heo 	cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu));
10775830c590SChristoph Lameter 	/* Now the cache_reaper is guaranteed to be not running. */
10781871e52cSTejun Heo 	per_cpu(slab_reap_work, cpu).work.func = NULL;
10796731d4f1SSebastian Andrzej Siewior 	return 0;
10801da177e4SLinus Torvalds }
10811da177e4SLinus Torvalds 
108276af6a05SDave Hansen #if defined(CONFIG_NUMA)
10838f9f8d9eSDavid Rientjes /*
10848f9f8d9eSDavid Rientjes  * Drains freelist for a node on each slab cache, used for memory hot-remove.
10858f9f8d9eSDavid Rientjes  * Returns -EBUSY if all objects cannot be drained so that the node is not
10868f9f8d9eSDavid Rientjes  * removed.
10878f9f8d9eSDavid Rientjes  *
108818004c5dSChristoph Lameter  * Must hold slab_mutex.
10898f9f8d9eSDavid Rientjes  */
drain_cache_node_node(int node)10906a67368cSChristoph Lameter static int __meminit drain_cache_node_node(int node)
10918f9f8d9eSDavid Rientjes {
10928f9f8d9eSDavid Rientjes 	struct kmem_cache *cachep;
10938f9f8d9eSDavid Rientjes 	int ret = 0;
10948f9f8d9eSDavid Rientjes 
109518004c5dSChristoph Lameter 	list_for_each_entry(cachep, &slab_caches, list) {
1096ce8eb6c4SChristoph Lameter 		struct kmem_cache_node *n;
10978f9f8d9eSDavid Rientjes 
109818bf8541SChristoph Lameter 		n = get_node(cachep, node);
1099ce8eb6c4SChristoph Lameter 		if (!n)
11008f9f8d9eSDavid Rientjes 			continue;
11018f9f8d9eSDavid Rientjes 
1102a5aa63a5SJoonsoo Kim 		drain_freelist(cachep, n, INT_MAX);
11038f9f8d9eSDavid Rientjes 
1104ce8eb6c4SChristoph Lameter 		if (!list_empty(&n->slabs_full) ||
1105ce8eb6c4SChristoph Lameter 		    !list_empty(&n->slabs_partial)) {
11068f9f8d9eSDavid Rientjes 			ret = -EBUSY;
11078f9f8d9eSDavid Rientjes 			break;
11088f9f8d9eSDavid Rientjes 		}
11098f9f8d9eSDavid Rientjes 	}
11108f9f8d9eSDavid Rientjes 	return ret;
11118f9f8d9eSDavid Rientjes }
11128f9f8d9eSDavid Rientjes 
slab_memory_callback(struct notifier_block * self,unsigned long action,void * arg)11138f9f8d9eSDavid Rientjes static int __meminit slab_memory_callback(struct notifier_block *self,
11148f9f8d9eSDavid Rientjes 					unsigned long action, void *arg)
11158f9f8d9eSDavid Rientjes {
11168f9f8d9eSDavid Rientjes 	struct memory_notify *mnb = arg;
11178f9f8d9eSDavid Rientjes 	int ret = 0;
11188f9f8d9eSDavid Rientjes 	int nid;
11198f9f8d9eSDavid Rientjes 
11208f9f8d9eSDavid Rientjes 	nid = mnb->status_change_nid;
11218f9f8d9eSDavid Rientjes 	if (nid < 0)
11228f9f8d9eSDavid Rientjes 		goto out;
11238f9f8d9eSDavid Rientjes 
11248f9f8d9eSDavid Rientjes 	switch (action) {
11258f9f8d9eSDavid Rientjes 	case MEM_GOING_ONLINE:
112618004c5dSChristoph Lameter 		mutex_lock(&slab_mutex);
11276a67368cSChristoph Lameter 		ret = init_cache_node_node(nid);
112818004c5dSChristoph Lameter 		mutex_unlock(&slab_mutex);
11298f9f8d9eSDavid Rientjes 		break;
11308f9f8d9eSDavid Rientjes 	case MEM_GOING_OFFLINE:
113118004c5dSChristoph Lameter 		mutex_lock(&slab_mutex);
11326a67368cSChristoph Lameter 		ret = drain_cache_node_node(nid);
113318004c5dSChristoph Lameter 		mutex_unlock(&slab_mutex);
11348f9f8d9eSDavid Rientjes 		break;
11358f9f8d9eSDavid Rientjes 	case MEM_ONLINE:
11368f9f8d9eSDavid Rientjes 	case MEM_OFFLINE:
11378f9f8d9eSDavid Rientjes 	case MEM_CANCEL_ONLINE:
11388f9f8d9eSDavid Rientjes 	case MEM_CANCEL_OFFLINE:
11398f9f8d9eSDavid Rientjes 		break;
11408f9f8d9eSDavid Rientjes 	}
11418f9f8d9eSDavid Rientjes out:
11425fda1bd5SPrarit Bhargava 	return notifier_from_errno(ret);
11438f9f8d9eSDavid Rientjes }
114476af6a05SDave Hansen #endif /* CONFIG_NUMA */
11458f9f8d9eSDavid Rientjes 
1146e498be7dSChristoph Lameter /*
1147ce8eb6c4SChristoph Lameter  * swap the static kmem_cache_node with kmalloced memory
1148e498be7dSChristoph Lameter  */
init_list(struct kmem_cache * cachep,struct kmem_cache_node * list,int nodeid)11496744f087SChristoph Lameter static void __init init_list(struct kmem_cache *cachep, struct kmem_cache_node *list,
1150a737b3e2SAndrew Morton 				int nodeid)
1151e498be7dSChristoph Lameter {
11526744f087SChristoph Lameter 	struct kmem_cache_node *ptr;
1153e498be7dSChristoph Lameter 
11546744f087SChristoph Lameter 	ptr = kmalloc_node(sizeof(struct kmem_cache_node), GFP_NOWAIT, nodeid);
1155e498be7dSChristoph Lameter 	BUG_ON(!ptr);
1156e498be7dSChristoph Lameter 
11576744f087SChristoph Lameter 	memcpy(ptr, list, sizeof(struct kmem_cache_node));
11582b2d5493SIngo Molnar 	/*
11592b2d5493SIngo Molnar 	 * Do not assume that spinlocks can be initialized via memcpy:
11602b2d5493SIngo Molnar 	 */
1161b539ce9fSJiri Kosina 	raw_spin_lock_init(&ptr->list_lock);
11622b2d5493SIngo Molnar 
1163e498be7dSChristoph Lameter 	MAKE_ALL_LISTS(cachep, ptr, nodeid);
11646a67368cSChristoph Lameter 	cachep->node[nodeid] = ptr;
1165e498be7dSChristoph Lameter }
1166e498be7dSChristoph Lameter 
1167a737b3e2SAndrew Morton /*
1168ce8eb6c4SChristoph Lameter  * For setting up all the kmem_cache_node for cache whose buffer_size is same as
1169ce8eb6c4SChristoph Lameter  * size of kmem_cache_node.
1170556a169dSPekka Enberg  */
set_up_node(struct kmem_cache * cachep,int index)1171ce8eb6c4SChristoph Lameter static void __init set_up_node(struct kmem_cache *cachep, int index)
1172556a169dSPekka Enberg {
1173556a169dSPekka Enberg 	int node;
1174556a169dSPekka Enberg 
1175556a169dSPekka Enberg 	for_each_online_node(node) {
1176ce8eb6c4SChristoph Lameter 		cachep->node[node] = &init_kmem_cache_node[index + node];
11776a67368cSChristoph Lameter 		cachep->node[node]->next_reap = jiffies +
11785f0985bbSJianyu Zhan 		    REAPTIMEOUT_NODE +
11795f0985bbSJianyu Zhan 		    ((unsigned long)cachep) % REAPTIMEOUT_NODE;
1180556a169dSPekka Enberg 	}
1181556a169dSPekka Enberg }
1182556a169dSPekka Enberg 
1183556a169dSPekka Enberg /*
1184a737b3e2SAndrew Morton  * Initialisation.  Called after the page allocator have been initialised and
1185a737b3e2SAndrew Morton  * before smp_init().
11861da177e4SLinus Torvalds  */
kmem_cache_init(void)11871da177e4SLinus Torvalds void __init kmem_cache_init(void)
11881da177e4SLinus Torvalds {
1189e498be7dSChristoph Lameter 	int i;
1190e498be7dSChristoph Lameter 
11919b030cb8SChristoph Lameter 	kmem_cache = &kmem_cache_boot;
11929b030cb8SChristoph Lameter 
11938888177eSJoonsoo Kim 	if (!IS_ENABLED(CONFIG_NUMA) || num_possible_nodes() == 1)
119462918a03SSiddha, Suresh B 		use_alien_caches = 0;
119562918a03SSiddha, Suresh B 
11963c583465SChristoph Lameter 	for (i = 0; i < NUM_INIT_LISTS; i++)
1197ce8eb6c4SChristoph Lameter 		kmem_cache_node_init(&init_kmem_cache_node[i]);
11983c583465SChristoph Lameter 
11991da177e4SLinus Torvalds 	/*
12001da177e4SLinus Torvalds 	 * Fragmentation resistance on low memory - only use bigger
12013df1cccdSDavid Rientjes 	 * page orders on machines with more than 32MB of memory if
12023df1cccdSDavid Rientjes 	 * not overridden on the command line.
12031da177e4SLinus Torvalds 	 */
1204ca79b0c2SArun KS 	if (!slab_max_order_set && totalram_pages() > (32 << 20) >> PAGE_SHIFT)
1205543585ccSDavid Rientjes 		slab_max_order = SLAB_MAX_ORDER_HI;
12061da177e4SLinus Torvalds 
12071da177e4SLinus Torvalds 	/* Bootstrap is tricky, because several objects are allocated
12081da177e4SLinus Torvalds 	 * from caches that do not exist yet:
12099b030cb8SChristoph Lameter 	 * 1) initialize the kmem_cache cache: it contains the struct
12109b030cb8SChristoph Lameter 	 *    kmem_cache structures of all caches, except kmem_cache itself:
12119b030cb8SChristoph Lameter 	 *    kmem_cache is statically allocated.
1212e498be7dSChristoph Lameter 	 *    Initially an __init data area is used for the head array and the
1213ce8eb6c4SChristoph Lameter 	 *    kmem_cache_node structures, it's replaced with a kmalloc allocated
1214e498be7dSChristoph Lameter 	 *    array at the end of the bootstrap.
12151da177e4SLinus Torvalds 	 * 2) Create the first kmalloc cache.
1216343e0d7aSPekka Enberg 	 *    The struct kmem_cache for the new cache is allocated normally.
1217e498be7dSChristoph Lameter 	 *    An __init data area is used for the head array.
1218e498be7dSChristoph Lameter 	 * 3) Create the remaining kmalloc caches, with minimally sized
1219e498be7dSChristoph Lameter 	 *    head arrays.
12209b030cb8SChristoph Lameter 	 * 4) Replace the __init data head arrays for kmem_cache and the first
12211da177e4SLinus Torvalds 	 *    kmalloc cache with kmalloc allocated arrays.
1222ce8eb6c4SChristoph Lameter 	 * 5) Replace the __init data for kmem_cache_node for kmem_cache and
1223e498be7dSChristoph Lameter 	 *    the other cache's with kmalloc allocated memory.
1224e498be7dSChristoph Lameter 	 * 6) Resize the head arrays of the kmalloc caches to their final sizes.
12251da177e4SLinus Torvalds 	 */
12261da177e4SLinus Torvalds 
12279b030cb8SChristoph Lameter 	/* 1) create the kmem_cache */
12281da177e4SLinus Torvalds 
12298da3430dSEric Dumazet 	/*
1230b56efcf0SEric Dumazet 	 * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
12318da3430dSEric Dumazet 	 */
12322f9baa9fSChristoph Lameter 	create_boot_cache(kmem_cache, "kmem_cache",
1233bf0dea23SJoonsoo Kim 		offsetof(struct kmem_cache, node) +
12346744f087SChristoph Lameter 				  nr_node_ids * sizeof(struct kmem_cache_node *),
12358eb8284bSDavid Windsor 				  SLAB_HWCACHE_ALIGN, 0, 0);
12362f9baa9fSChristoph Lameter 	list_add(&kmem_cache->list, &slab_caches);
1237bf0dea23SJoonsoo Kim 	slab_state = PARTIAL;
12381da177e4SLinus Torvalds 
1239a737b3e2SAndrew Morton 	/*
1240bf0dea23SJoonsoo Kim 	 * Initialize the caches that provide memory for the  kmem_cache_node
1241bf0dea23SJoonsoo Kim 	 * structures first.  Without this, further allocations will bug.
1242e498be7dSChristoph Lameter 	 */
12430c474d31SCatalin Marinas 	new_kmalloc_cache(INDEX_NODE, KMALLOC_NORMAL, ARCH_KMALLOC_FLAGS);
1244bf0dea23SJoonsoo Kim 	slab_state = PARTIAL_NODE;
124534cc6990SDaniel Sanders 	setup_kmalloc_cache_index_table();
1246e498be7dSChristoph Lameter 
1247ce8eb6c4SChristoph Lameter 	/* 5) Replace the bootstrap kmem_cache_node */
1248e498be7dSChristoph Lameter 	{
12491ca4cb24SPekka Enberg 		int nid;
12501da177e4SLinus Torvalds 
12519c09a95cSMel Gorman 		for_each_online_node(nid) {
1252ce8eb6c4SChristoph Lameter 			init_list(kmem_cache, &init_kmem_cache_node[CACHE_CACHE + nid], nid);
1253556a169dSPekka Enberg 
1254cc252eaeSVlastimil Babka 			init_list(kmalloc_caches[KMALLOC_NORMAL][INDEX_NODE],
1255ce8eb6c4SChristoph Lameter 					  &init_kmem_cache_node[SIZE_NODE + nid], nid);
1256e498be7dSChristoph Lameter 		}
1257e498be7dSChristoph Lameter 	}
1258e498be7dSChristoph Lameter 
1259f97d5f63SChristoph Lameter 	create_kmalloc_caches(ARCH_KMALLOC_FLAGS);
12608429db5cSPekka Enberg }
12618429db5cSPekka Enberg 
kmem_cache_init_late(void)12628429db5cSPekka Enberg void __init kmem_cache_init_late(void)
12631da177e4SLinus Torvalds {
1264343e0d7aSPekka Enberg 	struct kmem_cache *cachep;
12658429db5cSPekka Enberg 
12668429db5cSPekka Enberg 	/* 6) resize the head arrays to their final sizes */
126718004c5dSChristoph Lameter 	mutex_lock(&slab_mutex);
126818004c5dSChristoph Lameter 	list_for_each_entry(cachep, &slab_caches, list)
126983b519e8SPekka Enberg 		if (enable_cpucache(cachep, GFP_NOWAIT))
12702ed3a4efSChristoph Lameter 			BUG();
127118004c5dSChristoph Lameter 	mutex_unlock(&slab_mutex);
1272056c6241SRavikiran G Thirumalai 
127397d06609SChristoph Lameter 	/* Done! */
127497d06609SChristoph Lameter 	slab_state = FULL;
127597d06609SChristoph Lameter 
12768f9f8d9eSDavid Rientjes #ifdef CONFIG_NUMA
12778f9f8d9eSDavid Rientjes 	/*
12788f9f8d9eSDavid Rientjes 	 * Register a memory hotplug callback that initializes and frees
12796a67368cSChristoph Lameter 	 * node.
12808f9f8d9eSDavid Rientjes 	 */
12818f9f8d9eSDavid Rientjes 	hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
12828f9f8d9eSDavid Rientjes #endif
12838f9f8d9eSDavid Rientjes 
1284a737b3e2SAndrew Morton 	/*
1285a737b3e2SAndrew Morton 	 * The reap timers are started later, with a module init call: That part
1286a737b3e2SAndrew Morton 	 * of the kernel is not yet operational.
12871da177e4SLinus Torvalds 	 */
12881da177e4SLinus Torvalds }
12891da177e4SLinus Torvalds 
cpucache_init(void)12901da177e4SLinus Torvalds static int __init cpucache_init(void)
12911da177e4SLinus Torvalds {
12926731d4f1SSebastian Andrzej Siewior 	int ret;
12931da177e4SLinus Torvalds 
12941da177e4SLinus Torvalds 	/*
1295a737b3e2SAndrew Morton 	 * Register the timers that return unneeded pages to the page allocator
12961da177e4SLinus Torvalds 	 */
12976731d4f1SSebastian Andrzej Siewior 	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "SLAB online",
12986731d4f1SSebastian Andrzej Siewior 				slab_online_cpu, slab_offline_cpu);
12996731d4f1SSebastian Andrzej Siewior 	WARN_ON(ret < 0);
1300a164f896SGlauber Costa 
13011da177e4SLinus Torvalds 	return 0;
13021da177e4SLinus Torvalds }
13031da177e4SLinus Torvalds __initcall(cpucache_init);
13041da177e4SLinus Torvalds 
13058bdec192SRafael Aquini static noinline void
slab_out_of_memory(struct kmem_cache * cachep,gfp_t gfpflags,int nodeid)13068bdec192SRafael Aquini slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
13078bdec192SRafael Aquini {
13089a02d699SDavid Rientjes #if DEBUG
1309ce8eb6c4SChristoph Lameter 	struct kmem_cache_node *n;
13108bdec192SRafael Aquini 	unsigned long flags;
13118bdec192SRafael Aquini 	int node;
13129a02d699SDavid Rientjes 	static DEFINE_RATELIMIT_STATE(slab_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
13139a02d699SDavid Rientjes 				      DEFAULT_RATELIMIT_BURST);
13149a02d699SDavid Rientjes 
13159a02d699SDavid Rientjes 	if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slab_oom_rs))
13169a02d699SDavid Rientjes 		return;
13178bdec192SRafael Aquini 
13185b3810e5SVlastimil Babka 	pr_warn("SLAB: Unable to allocate memory on node %d, gfp=%#x(%pGg)\n",
13195b3810e5SVlastimil Babka 		nodeid, gfpflags, &gfpflags);
13205b3810e5SVlastimil Babka 	pr_warn("  cache: %s, object size: %d, order: %d\n",
13213b0efdfaSChristoph Lameter 		cachep->name, cachep->size, cachep->gfporder);
13228bdec192SRafael Aquini 
132318bf8541SChristoph Lameter 	for_each_kmem_cache_node(cachep, node, n) {
1324bf00bd34SDavid Rientjes 		unsigned long total_slabs, free_slabs, free_objs;
13258bdec192SRafael Aquini 
1326b539ce9fSJiri Kosina 		raw_spin_lock_irqsave(&n->list_lock, flags);
1327bf00bd34SDavid Rientjes 		total_slabs = n->total_slabs;
1328bf00bd34SDavid Rientjes 		free_slabs = n->free_slabs;
1329bf00bd34SDavid Rientjes 		free_objs = n->free_objects;
1330b539ce9fSJiri Kosina 		raw_spin_unlock_irqrestore(&n->list_lock, flags);
13318bdec192SRafael Aquini 
1332bf00bd34SDavid Rientjes 		pr_warn("  node %d: slabs: %ld/%ld, objs: %ld/%ld\n",
1333bf00bd34SDavid Rientjes 			node, total_slabs - free_slabs, total_slabs,
1334bf00bd34SDavid Rientjes 			(total_slabs * cachep->num) - free_objs,
1335bf00bd34SDavid Rientjes 			total_slabs * cachep->num);
13368bdec192SRafael Aquini 	}
13379a02d699SDavid Rientjes #endif
13388bdec192SRafael Aquini }
13398bdec192SRafael Aquini 
13401da177e4SLinus Torvalds /*
13418a7d9b43SWang Sheng-Hui  * Interface to system's page allocator. No need to hold the
13428a7d9b43SWang Sheng-Hui  * kmem_cache_node ->list_lock.
13431da177e4SLinus Torvalds  *
13441da177e4SLinus Torvalds  * If we requested dmaable memory, we will get it. Even if we
13451da177e4SLinus Torvalds  * did not request dmaable memory, we might get it, but that
13461da177e4SLinus Torvalds  * would be relatively rare and ignorable.
13471da177e4SLinus Torvalds  */
kmem_getpages(struct kmem_cache * cachep,gfp_t flags,int nodeid)134842c0faacSVlastimil Babka static struct slab *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
13490c3aa83eSJoonsoo Kim 								int nodeid)
13501da177e4SLinus Torvalds {
135142c0faacSVlastimil Babka 	struct folio *folio;
135242c0faacSVlastimil Babka 	struct slab *slab;
1353765c4507SChristoph Lameter 
1354a618e89fSGlauber Costa 	flags |= cachep->allocflags;
1355e1b6aa6fSChristoph Hellwig 
135642c0faacSVlastimil Babka 	folio = (struct folio *) __alloc_pages_node(nodeid, flags, cachep->gfporder);
135742c0faacSVlastimil Babka 	if (!folio) {
13588bdec192SRafael Aquini 		slab_out_of_memory(cachep, flags, nodeid);
13591da177e4SLinus Torvalds 		return NULL;
13608bdec192SRafael Aquini 	}
13611da177e4SLinus Torvalds 
136242c0faacSVlastimil Babka 	slab = folio_slab(folio);
1363072bb0aaSMel Gorman 
136442c0faacSVlastimil Babka 	account_slab(slab, cachep->gfporder, cachep, flags);
136542c0faacSVlastimil Babka 	__folio_set_slab(folio);
13668b881763SVlastimil Babka 	/* Make the flag visible before any changes to folio->mapping */
13678b881763SVlastimil Babka 	smp_wmb();
136842c0faacSVlastimil Babka 	/* Record if ALLOC_NO_WATERMARKS was set when allocating the slab */
136902d65d6fSSidhartha Kumar 	if (sk_memalloc_socks() && folio_is_pfmemalloc(folio))
137042c0faacSVlastimil Babka 		slab_set_pfmemalloc(slab);
137142c0faacSVlastimil Babka 
137242c0faacSVlastimil Babka 	return slab;
13731da177e4SLinus Torvalds }
13741da177e4SLinus Torvalds 
13751da177e4SLinus Torvalds /*
13761da177e4SLinus Torvalds  * Interface to system's page release.
13771da177e4SLinus Torvalds  */
kmem_freepages(struct kmem_cache * cachep,struct slab * slab)137842c0faacSVlastimil Babka static void kmem_freepages(struct kmem_cache *cachep, struct slab *slab)
13791da177e4SLinus Torvalds {
138027ee57c9SVladimir Davydov 	int order = cachep->gfporder;
138142c0faacSVlastimil Babka 	struct folio *folio = slab_folio(slab);
138273293c2fSJoonsoo Kim 
138342c0faacSVlastimil Babka 	BUG_ON(!folio_test_slab(folio));
138442c0faacSVlastimil Babka 	__slab_clear_pfmemalloc(slab);
1385c034c6a4SSeongJae Park 	page_mapcount_reset(&folio->page);
138642c0faacSVlastimil Babka 	folio->mapping = NULL;
13878b881763SVlastimil Babka 	/* Make the mapping reset visible before clearing the flag */
13888b881763SVlastimil Babka 	smp_wmb();
13898b881763SVlastimil Babka 	__folio_clear_slab(folio);
13901f458cbfSGlauber Costa 
1391c7b23b68SYosry Ahmed 	mm_account_reclaimed_pages(1 << order);
139242c0faacSVlastimil Babka 	unaccount_slab(slab, order, cachep);
1393c034c6a4SSeongJae Park 	__free_pages(&folio->page, order);
13941da177e4SLinus Torvalds }
13951da177e4SLinus Torvalds 
kmem_rcu_free(struct rcu_head * head)13961da177e4SLinus Torvalds static void kmem_rcu_free(struct rcu_head *head)
13971da177e4SLinus Torvalds {
139868126702SJoonsoo Kim 	struct kmem_cache *cachep;
139942c0faacSVlastimil Babka 	struct slab *slab;
14001da177e4SLinus Torvalds 
140142c0faacSVlastimil Babka 	slab = container_of(head, struct slab, rcu_head);
140242c0faacSVlastimil Babka 	cachep = slab->slab_cache;
140368126702SJoonsoo Kim 
140442c0faacSVlastimil Babka 	kmem_freepages(cachep, slab);
14051da177e4SLinus Torvalds }
14061da177e4SLinus Torvalds 
14071da177e4SLinus Torvalds #if DEBUG
is_debug_pagealloc_cache(struct kmem_cache * cachep)140881ce2ebdSlvqian static inline bool is_debug_pagealloc_cache(struct kmem_cache *cachep)
140940b44137SJoonsoo Kim {
141081ce2ebdSlvqian 	return debug_pagealloc_enabled_static() && OFF_SLAB(cachep) &&
141181ce2ebdSlvqian 			((cachep->size % PAGE_SIZE) == 0);
141240b44137SJoonsoo Kim }
14131da177e4SLinus Torvalds 
14141da177e4SLinus Torvalds #ifdef CONFIG_DEBUG_PAGEALLOC
slab_kernel_map(struct kmem_cache * cachep,void * objp,int map)141580552f0fSQian Cai static void slab_kernel_map(struct kmem_cache *cachep, void *objp, int map)
141640b44137SJoonsoo Kim {
141740b44137SJoonsoo Kim 	if (!is_debug_pagealloc_cache(cachep))
141840b44137SJoonsoo Kim 		return;
141940b44137SJoonsoo Kim 
142077bc7fd6SMike Rapoport 	__kernel_map_pages(virt_to_page(objp), cachep->size / PAGE_SIZE, map);
142140b44137SJoonsoo Kim }
142240b44137SJoonsoo Kim 
142340b44137SJoonsoo Kim #else
slab_kernel_map(struct kmem_cache * cachep,void * objp,int map)142440b44137SJoonsoo Kim static inline void slab_kernel_map(struct kmem_cache *cachep, void *objp,
142580552f0fSQian Cai 				int map) {}
142640b44137SJoonsoo Kim 
14271da177e4SLinus Torvalds #endif
14281da177e4SLinus Torvalds 
poison_obj(struct kmem_cache * cachep,void * addr,unsigned char val)1429343e0d7aSPekka Enberg static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)
14301da177e4SLinus Torvalds {
14318c138bc0SChristoph Lameter 	int size = cachep->object_size;
14323dafccf2SManfred Spraul 	addr = &((char *)addr)[obj_offset(cachep)];
14331da177e4SLinus Torvalds 
14341da177e4SLinus Torvalds 	memset(addr, val, size);
14351da177e4SLinus Torvalds 	*(unsigned char *)(addr + size - 1) = POISON_END;
14361da177e4SLinus Torvalds }
14371da177e4SLinus Torvalds 
dump_line(char * data,int offset,int limit)14381da177e4SLinus Torvalds static void dump_line(char *data, int offset, int limit)
14391da177e4SLinus Torvalds {
14401da177e4SLinus Torvalds 	int i;
1441aa83aa40SDave Jones 	unsigned char error = 0;
1442aa83aa40SDave Jones 	int bad_count = 0;
1443aa83aa40SDave Jones 
14441170532bSJoe Perches 	pr_err("%03x: ", offset);
1445aa83aa40SDave Jones 	for (i = 0; i < limit; i++) {
1446aa83aa40SDave Jones 		if (data[offset + i] != POISON_FREE) {
1447aa83aa40SDave Jones 			error = data[offset + i];
1448aa83aa40SDave Jones 			bad_count++;
1449aa83aa40SDave Jones 		}
1450aa83aa40SDave Jones 	}
1451fdde6abbSSebastian Andrzej Siewior 	print_hex_dump(KERN_CONT, "", 0, 16, 1,
1452fdde6abbSSebastian Andrzej Siewior 			&data[offset], limit, 1);
1453aa83aa40SDave Jones 
1454aa83aa40SDave Jones 	if (bad_count == 1) {
1455aa83aa40SDave Jones 		error ^= POISON_FREE;
1456aa83aa40SDave Jones 		if (!(error & (error - 1))) {
14571170532bSJoe Perches 			pr_err("Single bit error detected. Probably bad RAM.\n");
1458aa83aa40SDave Jones #ifdef CONFIG_X86
14591170532bSJoe Perches 			pr_err("Run memtest86+ or a similar memory test tool.\n");
1460aa83aa40SDave Jones #else
14611170532bSJoe Perches 			pr_err("Run a memory test tool.\n");
1462aa83aa40SDave Jones #endif
1463aa83aa40SDave Jones 		}
1464aa83aa40SDave Jones 	}
14651da177e4SLinus Torvalds }
14661da177e4SLinus Torvalds #endif
14671da177e4SLinus Torvalds 
14681da177e4SLinus Torvalds #if DEBUG
14691da177e4SLinus Torvalds 
print_objinfo(struct kmem_cache * cachep,void * objp,int lines)1470343e0d7aSPekka Enberg static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
14711da177e4SLinus Torvalds {
14721da177e4SLinus Torvalds 	int i, size;
14731da177e4SLinus Torvalds 	char *realobj;
14741da177e4SLinus Torvalds 
14751da177e4SLinus Torvalds 	if (cachep->flags & SLAB_RED_ZONE) {
14761170532bSJoe Perches 		pr_err("Redzone: 0x%llx/0x%llx\n",
14771da177e4SLinus Torvalds 		       *dbg_redzone1(cachep, objp),
14781da177e4SLinus Torvalds 		       *dbg_redzone2(cachep, objp));
14791da177e4SLinus Torvalds 	}
14801da177e4SLinus Torvalds 
148185c3e4a5SGeert Uytterhoeven 	if (cachep->flags & SLAB_STORE_USER)
148285c3e4a5SGeert Uytterhoeven 		pr_err("Last user: (%pSR)\n", *dbg_userword(cachep, objp));
14833dafccf2SManfred Spraul 	realobj = (char *)objp + obj_offset(cachep);
14848c138bc0SChristoph Lameter 	size = cachep->object_size;
14851da177e4SLinus Torvalds 	for (i = 0; i < size && lines; i += 16, lines--) {
14861da177e4SLinus Torvalds 		int limit;
14871da177e4SLinus Torvalds 		limit = 16;
14881da177e4SLinus Torvalds 		if (i + limit > size)
14891da177e4SLinus Torvalds 			limit = size - i;
14901da177e4SLinus Torvalds 		dump_line(realobj, i, limit);
14911da177e4SLinus Torvalds 	}
14921da177e4SLinus Torvalds }
14931da177e4SLinus Torvalds 
check_poison_obj(struct kmem_cache * cachep,void * objp)1494343e0d7aSPekka Enberg static void check_poison_obj(struct kmem_cache *cachep, void *objp)
14951da177e4SLinus Torvalds {
14961da177e4SLinus Torvalds 	char *realobj;
14971da177e4SLinus Torvalds 	int size, i;
14981da177e4SLinus Torvalds 	int lines = 0;
14991da177e4SLinus Torvalds 
150040b44137SJoonsoo Kim 	if (is_debug_pagealloc_cache(cachep))
150140b44137SJoonsoo Kim 		return;
150240b44137SJoonsoo Kim 
15033dafccf2SManfred Spraul 	realobj = (char *)objp + obj_offset(cachep);
15048c138bc0SChristoph Lameter 	size = cachep->object_size;
15051da177e4SLinus Torvalds 
15061da177e4SLinus Torvalds 	for (i = 0; i < size; i++) {
15071da177e4SLinus Torvalds 		char exp = POISON_FREE;
15081da177e4SLinus Torvalds 		if (i == size - 1)
15091da177e4SLinus Torvalds 			exp = POISON_END;
15101da177e4SLinus Torvalds 		if (realobj[i] != exp) {
15111da177e4SLinus Torvalds 			int limit;
15121da177e4SLinus Torvalds 			/* Mismatch ! */
15131da177e4SLinus Torvalds 			/* Print header */
15141da177e4SLinus Torvalds 			if (lines == 0) {
151585c3e4a5SGeert Uytterhoeven 				pr_err("Slab corruption (%s): %s start=%px, len=%d\n",
15161170532bSJoe Perches 				       print_tainted(), cachep->name,
15171170532bSJoe Perches 				       realobj, size);
15181da177e4SLinus Torvalds 				print_objinfo(cachep, objp, 0);
15191da177e4SLinus Torvalds 			}
15201da177e4SLinus Torvalds 			/* Hexdump the affected line */
15211da177e4SLinus Torvalds 			i = (i / 16) * 16;
15221da177e4SLinus Torvalds 			limit = 16;
15231da177e4SLinus Torvalds 			if (i + limit > size)
15241da177e4SLinus Torvalds 				limit = size - i;
15251da177e4SLinus Torvalds 			dump_line(realobj, i, limit);
15261da177e4SLinus Torvalds 			i += 16;
15271da177e4SLinus Torvalds 			lines++;
15281da177e4SLinus Torvalds 			/* Limit to 5 lines */
15291da177e4SLinus Torvalds 			if (lines > 5)
15301da177e4SLinus Torvalds 				break;
15311da177e4SLinus Torvalds 		}
15321da177e4SLinus Torvalds 	}
15331da177e4SLinus Torvalds 	if (lines != 0) {
15341da177e4SLinus Torvalds 		/* Print some data about the neighboring objects, if they
15351da177e4SLinus Torvalds 		 * exist:
15361da177e4SLinus Torvalds 		 */
15377981e67eSVlastimil Babka 		struct slab *slab = virt_to_slab(objp);
15388fea4e96SPekka Enberg 		unsigned int objnr;
15391da177e4SLinus Torvalds 
154040f3bf0cSVlastimil Babka 		objnr = obj_to_index(cachep, slab, objp);
15411da177e4SLinus Torvalds 		if (objnr) {
15427981e67eSVlastimil Babka 			objp = index_to_obj(cachep, slab, objnr - 1);
15433dafccf2SManfred Spraul 			realobj = (char *)objp + obj_offset(cachep);
154485c3e4a5SGeert Uytterhoeven 			pr_err("Prev obj: start=%px, len=%d\n", realobj, size);
15451da177e4SLinus Torvalds 			print_objinfo(cachep, objp, 2);
15461da177e4SLinus Torvalds 		}
15471da177e4SLinus Torvalds 		if (objnr + 1 < cachep->num) {
15487981e67eSVlastimil Babka 			objp = index_to_obj(cachep, slab, objnr + 1);
15493dafccf2SManfred Spraul 			realobj = (char *)objp + obj_offset(cachep);
155085c3e4a5SGeert Uytterhoeven 			pr_err("Next obj: start=%px, len=%d\n", realobj, size);
15511da177e4SLinus Torvalds 			print_objinfo(cachep, objp, 2);
15521da177e4SLinus Torvalds 		}
15531da177e4SLinus Torvalds 	}
15541da177e4SLinus Torvalds }
15551da177e4SLinus Torvalds #endif
15561da177e4SLinus Torvalds 
15571da177e4SLinus Torvalds #if DEBUG
slab_destroy_debugcheck(struct kmem_cache * cachep,struct slab * slab)15588456a648SJoonsoo Kim static void slab_destroy_debugcheck(struct kmem_cache *cachep,
15597981e67eSVlastimil Babka 						struct slab *slab)
156012dd36faSMatthew Dobson {
15611da177e4SLinus Torvalds 	int i;
1562b03a017bSJoonsoo Kim 
1563b03a017bSJoonsoo Kim 	if (OBJFREELIST_SLAB(cachep) && cachep->flags & SLAB_POISON) {
15647981e67eSVlastimil Babka 		poison_obj(cachep, slab->freelist - obj_offset(cachep),
1565b03a017bSJoonsoo Kim 			POISON_FREE);
1566b03a017bSJoonsoo Kim 	}
1567b03a017bSJoonsoo Kim 
15681da177e4SLinus Torvalds 	for (i = 0; i < cachep->num; i++) {
15697981e67eSVlastimil Babka 		void *objp = index_to_obj(cachep, slab, i);
15701da177e4SLinus Torvalds 
15711da177e4SLinus Torvalds 		if (cachep->flags & SLAB_POISON) {
15721da177e4SLinus Torvalds 			check_poison_obj(cachep, objp);
157380552f0fSQian Cai 			slab_kernel_map(cachep, objp, 1);
15741da177e4SLinus Torvalds 		}
15751da177e4SLinus Torvalds 		if (cachep->flags & SLAB_RED_ZONE) {
15761da177e4SLinus Torvalds 			if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
1577756a025fSJoe Perches 				slab_error(cachep, "start of a freed object was overwritten");
15781da177e4SLinus Torvalds 			if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
1579756a025fSJoe Perches 				slab_error(cachep, "end of a freed object was overwritten");
15801da177e4SLinus Torvalds 		}
15811da177e4SLinus Torvalds 	}
158212dd36faSMatthew Dobson }
15831da177e4SLinus Torvalds #else
slab_destroy_debugcheck(struct kmem_cache * cachep,struct slab * slab)15848456a648SJoonsoo Kim static void slab_destroy_debugcheck(struct kmem_cache *cachep,
15857981e67eSVlastimil Babka 						struct slab *slab)
158612dd36faSMatthew Dobson {
158712dd36faSMatthew Dobson }
15881da177e4SLinus Torvalds #endif
15891da177e4SLinus Torvalds 
1590911851e6SRandy Dunlap /**
1591911851e6SRandy Dunlap  * slab_destroy - destroy and release all objects in a slab
1592911851e6SRandy Dunlap  * @cachep: cache pointer being destroyed
1593dd35f71aSVlastimil Babka  * @slab: slab being destroyed
1594911851e6SRandy Dunlap  *
1595dd35f71aSVlastimil Babka  * Destroy all the objs in a slab, and release the mem back to the system.
1596dd35f71aSVlastimil Babka  * Before calling the slab must have been unlinked from the cache. The
15978a7d9b43SWang Sheng-Hui  * kmem_cache_node ->list_lock is not held/needed.
159812dd36faSMatthew Dobson  */
slab_destroy(struct kmem_cache * cachep,struct slab * slab)15997981e67eSVlastimil Babka static void slab_destroy(struct kmem_cache *cachep, struct slab *slab)
160012dd36faSMatthew Dobson {
16017e007355SJoonsoo Kim 	void *freelist;
160212dd36faSMatthew Dobson 
16037981e67eSVlastimil Babka 	freelist = slab->freelist;
16047981e67eSVlastimil Babka 	slab_destroy_debugcheck(cachep, slab);
16055f0d5a3aSPaul E. McKenney 	if (unlikely(cachep->flags & SLAB_TYPESAFE_BY_RCU))
16067981e67eSVlastimil Babka 		call_rcu(&slab->rcu_head, kmem_rcu_free);
1607bc4f610dSKirill A. Shutemov 	else
16087981e67eSVlastimil Babka 		kmem_freepages(cachep, slab);
160968126702SJoonsoo Kim 
161068126702SJoonsoo Kim 	/*
16118456a648SJoonsoo Kim 	 * From now on, we don't use freelist
161268126702SJoonsoo Kim 	 * although actual page can be freed in rcu context
161368126702SJoonsoo Kim 	 */
1614873623dfSIngo Molnar 	if (OFF_SLAB(cachep))
1615e36ce448SHyeonggon Yoo 		kfree(freelist);
16161da177e4SLinus Torvalds }
16171da177e4SLinus Torvalds 
1618678ff6a7SShakeel Butt /*
1619678ff6a7SShakeel Butt  * Update the size of the caches before calling slabs_destroy as it may
1620678ff6a7SShakeel Butt  * recursively call kfree.
1621678ff6a7SShakeel Butt  */
slabs_destroy(struct kmem_cache * cachep,struct list_head * list)162297654dfaSJoonsoo Kim static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list)
162397654dfaSJoonsoo Kim {
16247981e67eSVlastimil Babka 	struct slab *slab, *n;
162597654dfaSJoonsoo Kim 
16267981e67eSVlastimil Babka 	list_for_each_entry_safe(slab, n, list, slab_list) {
16277981e67eSVlastimil Babka 		list_del(&slab->slab_list);
16287981e67eSVlastimil Babka 		slab_destroy(cachep, slab);
162997654dfaSJoonsoo Kim 	}
163097654dfaSJoonsoo Kim }
163197654dfaSJoonsoo Kim 
16321da177e4SLinus Torvalds /**
1633a70773ddSRandy.Dunlap  * calculate_slab_order - calculate size (page order) of slabs
1634a70773ddSRandy.Dunlap  * @cachep: pointer to the cache that is being created
1635a70773ddSRandy.Dunlap  * @size: size of objects to be created in this cache.
1636a70773ddSRandy.Dunlap  * @flags: slab allocation flags
1637a70773ddSRandy.Dunlap  *
1638a70773ddSRandy.Dunlap  * Also calculates the number of objects per slab.
16394d268ebaSPekka Enberg  *
16404d268ebaSPekka Enberg  * This could be made much more intelligent.  For now, try to avoid using
16414d268ebaSPekka Enberg  * high order pages for slabs.  When the gfp() functions are more friendly
16424d268ebaSPekka Enberg  * towards high-order requests, this should be changed.
1643a862f68aSMike Rapoport  *
1644a862f68aSMike Rapoport  * Return: number of left-over bytes in a slab
16454d268ebaSPekka Enberg  */
calculate_slab_order(struct kmem_cache * cachep,size_t size,slab_flags_t flags)1646a737b3e2SAndrew Morton static size_t calculate_slab_order(struct kmem_cache *cachep,
1647d50112edSAlexey Dobriyan 				size_t size, slab_flags_t flags)
16484d268ebaSPekka Enberg {
16494d268ebaSPekka Enberg 	size_t left_over = 0;
16509888e6faSLinus Torvalds 	int gfporder;
16514d268ebaSPekka Enberg 
16520aa817f0SChristoph Lameter 	for (gfporder = 0; gfporder <= KMALLOC_MAX_ORDER; gfporder++) {
16534d268ebaSPekka Enberg 		unsigned int num;
16544d268ebaSPekka Enberg 		size_t remainder;
16554d268ebaSPekka Enberg 
165670f75067SJoonsoo Kim 		num = cache_estimate(gfporder, size, flags, &remainder);
16574d268ebaSPekka Enberg 		if (!num)
16584d268ebaSPekka Enberg 			continue;
16599888e6faSLinus Torvalds 
1660f315e3faSJoonsoo Kim 		/* Can't handle number of objects more than SLAB_OBJ_MAX_NUM */
1661f315e3faSJoonsoo Kim 		if (num > SLAB_OBJ_MAX_NUM)
1662f315e3faSJoonsoo Kim 			break;
1663f315e3faSJoonsoo Kim 
1664b1ab41c4SIngo Molnar 		if (flags & CFLGS_OFF_SLAB) {
16653217fd9bSJoonsoo Kim 			struct kmem_cache *freelist_cache;
16663217fd9bSJoonsoo Kim 			size_t freelist_size;
1667e36ce448SHyeonggon Yoo 			size_t freelist_cache_size;
1668b1ab41c4SIngo Molnar 
16693217fd9bSJoonsoo Kim 			freelist_size = num * sizeof(freelist_idx_t);
1670e36ce448SHyeonggon Yoo 			if (freelist_size > KMALLOC_MAX_CACHE_SIZE) {
1671e36ce448SHyeonggon Yoo 				freelist_cache_size = PAGE_SIZE << get_order(freelist_size);
1672e36ce448SHyeonggon Yoo 			} else {
1673*3c615294SGONG, Ruiqi 				freelist_cache = kmalloc_slab(freelist_size, 0u, _RET_IP_);
16743217fd9bSJoonsoo Kim 				if (!freelist_cache)
16753217fd9bSJoonsoo Kim 					continue;
1676e36ce448SHyeonggon Yoo 				freelist_cache_size = freelist_cache->size;
16773217fd9bSJoonsoo Kim 
16783217fd9bSJoonsoo Kim 				/*
16793217fd9bSJoonsoo Kim 				 * Needed to avoid possible looping condition
168076b342bdSJoonsoo Kim 				 * in cache_grow_begin()
16813217fd9bSJoonsoo Kim 				 */
16823217fd9bSJoonsoo Kim 				if (OFF_SLAB(freelist_cache))
16833217fd9bSJoonsoo Kim 					continue;
1684e36ce448SHyeonggon Yoo 			}
16853217fd9bSJoonsoo Kim 
16863217fd9bSJoonsoo Kim 			/* check if off slab has enough benefit */
1687e36ce448SHyeonggon Yoo 			if (freelist_cache_size > cachep->size / 2)
16883217fd9bSJoonsoo Kim 				continue;
1689b1ab41c4SIngo Molnar 		}
16904d268ebaSPekka Enberg 
16919888e6faSLinus Torvalds 		/* Found something acceptable - save it away */
16924d268ebaSPekka Enberg 		cachep->num = num;
16939888e6faSLinus Torvalds 		cachep->gfporder = gfporder;
16944d268ebaSPekka Enberg 		left_over = remainder;
16954d268ebaSPekka Enberg 
16964d268ebaSPekka Enberg 		/*
1697f78bb8adSLinus Torvalds 		 * A VFS-reclaimable slab tends to have most allocations
1698f78bb8adSLinus Torvalds 		 * as GFP_NOFS and we really don't want to have to be allocating
1699f78bb8adSLinus Torvalds 		 * higher-order pages when we are unable to shrink dcache.
1700f78bb8adSLinus Torvalds 		 */
1701f78bb8adSLinus Torvalds 		if (flags & SLAB_RECLAIM_ACCOUNT)
1702f78bb8adSLinus Torvalds 			break;
1703f78bb8adSLinus Torvalds 
1704f78bb8adSLinus Torvalds 		/*
17054d268ebaSPekka Enberg 		 * Large number of objects is good, but very large slabs are
17064d268ebaSPekka Enberg 		 * currently bad for the gfp()s.
17074d268ebaSPekka Enberg 		 */
1708543585ccSDavid Rientjes 		if (gfporder >= slab_max_order)
17094d268ebaSPekka Enberg 			break;
17104d268ebaSPekka Enberg 
17119888e6faSLinus Torvalds 		/*
17129888e6faSLinus Torvalds 		 * Acceptable internal fragmentation?
17139888e6faSLinus Torvalds 		 */
1714a737b3e2SAndrew Morton 		if (left_over * 8 <= (PAGE_SIZE << gfporder))
17154d268ebaSPekka Enberg 			break;
17164d268ebaSPekka Enberg 	}
17174d268ebaSPekka Enberg 	return left_over;
17184d268ebaSPekka Enberg }
17194d268ebaSPekka Enberg 
alloc_kmem_cache_cpus(struct kmem_cache * cachep,int entries,int batchcount)1720bf0dea23SJoonsoo Kim static struct array_cache __percpu *alloc_kmem_cache_cpus(
1721bf0dea23SJoonsoo Kim 		struct kmem_cache *cachep, int entries, int batchcount)
1722bf0dea23SJoonsoo Kim {
1723bf0dea23SJoonsoo Kim 	int cpu;
1724bf0dea23SJoonsoo Kim 	size_t size;
1725bf0dea23SJoonsoo Kim 	struct array_cache __percpu *cpu_cache;
1726bf0dea23SJoonsoo Kim 
1727bf0dea23SJoonsoo Kim 	size = sizeof(void *) * entries + sizeof(struct array_cache);
172885c9f4b0SJoonsoo Kim 	cpu_cache = __alloc_percpu(size, sizeof(void *));
1729bf0dea23SJoonsoo Kim 
1730bf0dea23SJoonsoo Kim 	if (!cpu_cache)
1731bf0dea23SJoonsoo Kim 		return NULL;
1732bf0dea23SJoonsoo Kim 
1733bf0dea23SJoonsoo Kim 	for_each_possible_cpu(cpu) {
1734bf0dea23SJoonsoo Kim 		init_arraycache(per_cpu_ptr(cpu_cache, cpu),
1735bf0dea23SJoonsoo Kim 				entries, batchcount);
1736bf0dea23SJoonsoo Kim 	}
1737bf0dea23SJoonsoo Kim 
1738bf0dea23SJoonsoo Kim 	return cpu_cache;
1739bf0dea23SJoonsoo Kim }
1740bf0dea23SJoonsoo Kim 
setup_cpu_cache(struct kmem_cache * cachep,gfp_t gfp)1741bd721ea7SFabian Frederick static int __ref setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
1742f30cf7d1SPekka Enberg {
174397d06609SChristoph Lameter 	if (slab_state >= FULL)
174483b519e8SPekka Enberg 		return enable_cpucache(cachep, gfp);
17452ed3a4efSChristoph Lameter 
1746bf0dea23SJoonsoo Kim 	cachep->cpu_cache = alloc_kmem_cache_cpus(cachep, 1, 1);
1747bf0dea23SJoonsoo Kim 	if (!cachep->cpu_cache)
1748bf0dea23SJoonsoo Kim 		return 1;
1749bf0dea23SJoonsoo Kim 
175097d06609SChristoph Lameter 	if (slab_state == DOWN) {
1751bf0dea23SJoonsoo Kim 		/* Creation of first cache (kmem_cache). */
1752bf0dea23SJoonsoo Kim 		set_up_node(kmem_cache, CACHE_CACHE);
17532f9baa9fSChristoph Lameter 	} else if (slab_state == PARTIAL) {
1754bf0dea23SJoonsoo Kim 		/* For kmem_cache_node */
1755ce8eb6c4SChristoph Lameter 		set_up_node(cachep, SIZE_NODE);
1756f30cf7d1SPekka Enberg 	} else {
1757f30cf7d1SPekka Enberg 		int node;
1758bf0dea23SJoonsoo Kim 
1759556a169dSPekka Enberg 		for_each_online_node(node) {
1760bf0dea23SJoonsoo Kim 			cachep->node[node] = kmalloc_node(
1761bf0dea23SJoonsoo Kim 				sizeof(struct kmem_cache_node), gfp, node);
17626a67368cSChristoph Lameter 			BUG_ON(!cachep->node[node]);
1763ce8eb6c4SChristoph Lameter 			kmem_cache_node_init(cachep->node[node]);
1764f30cf7d1SPekka Enberg 		}
1765f30cf7d1SPekka Enberg 	}
1766bf0dea23SJoonsoo Kim 
17676a67368cSChristoph Lameter 	cachep->node[numa_mem_id()]->next_reap =
17685f0985bbSJianyu Zhan 			jiffies + REAPTIMEOUT_NODE +
17695f0985bbSJianyu Zhan 			((unsigned long)cachep) % REAPTIMEOUT_NODE;
1770f30cf7d1SPekka Enberg 
1771f30cf7d1SPekka Enberg 	cpu_cache_get(cachep)->avail = 0;
1772f30cf7d1SPekka Enberg 	cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
1773f30cf7d1SPekka Enberg 	cpu_cache_get(cachep)->batchcount = 1;
1774f30cf7d1SPekka Enberg 	cpu_cache_get(cachep)->touched = 0;
1775f30cf7d1SPekka Enberg 	cachep->batchcount = 1;
1776f30cf7d1SPekka Enberg 	cachep->limit = BOOT_CPUCACHE_ENTRIES;
17772ed3a4efSChristoph Lameter 	return 0;
1778f30cf7d1SPekka Enberg }
1779f30cf7d1SPekka Enberg 
kmem_cache_flags(unsigned int object_size,slab_flags_t flags,const char * name)17800293d1fdSAlexey Dobriyan slab_flags_t kmem_cache_flags(unsigned int object_size,
178137540008SNikolay Borisov 	slab_flags_t flags, const char *name)
178212220deaSJoonsoo Kim {
178312220deaSJoonsoo Kim 	return flags;
178412220deaSJoonsoo Kim }
178512220deaSJoonsoo Kim 
178612220deaSJoonsoo Kim struct kmem_cache *
__kmem_cache_alias(const char * name,unsigned int size,unsigned int align,slab_flags_t flags,void (* ctor)(void *))1787f4957d5bSAlexey Dobriyan __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
1788d50112edSAlexey Dobriyan 		   slab_flags_t flags, void (*ctor)(void *))
178912220deaSJoonsoo Kim {
179012220deaSJoonsoo Kim 	struct kmem_cache *cachep;
179112220deaSJoonsoo Kim 
179212220deaSJoonsoo Kim 	cachep = find_mergeable(size, align, flags, name, ctor);
179312220deaSJoonsoo Kim 	if (cachep) {
179412220deaSJoonsoo Kim 		cachep->refcount++;
179512220deaSJoonsoo Kim 
179612220deaSJoonsoo Kim 		/*
179712220deaSJoonsoo Kim 		 * Adjust the object sizes so that we clear
179812220deaSJoonsoo Kim 		 * the complete object on kzalloc.
179912220deaSJoonsoo Kim 		 */
180012220deaSJoonsoo Kim 		cachep->object_size = max_t(int, cachep->object_size, size);
180112220deaSJoonsoo Kim 	}
180212220deaSJoonsoo Kim 	return cachep;
180312220deaSJoonsoo Kim }
180412220deaSJoonsoo Kim 
set_objfreelist_slab_cache(struct kmem_cache * cachep,size_t size,slab_flags_t flags)1805b03a017bSJoonsoo Kim static bool set_objfreelist_slab_cache(struct kmem_cache *cachep,
1806d50112edSAlexey Dobriyan 			size_t size, slab_flags_t flags)
1807b03a017bSJoonsoo Kim {
1808b03a017bSJoonsoo Kim 	size_t left;
1809b03a017bSJoonsoo Kim 
1810b03a017bSJoonsoo Kim 	cachep->num = 0;
1811b03a017bSJoonsoo Kim 
18126471384aSAlexander Potapenko 	/*
18136471384aSAlexander Potapenko 	 * If slab auto-initialization on free is enabled, store the freelist
18146471384aSAlexander Potapenko 	 * off-slab, so that its contents don't end up in one of the allocated
18156471384aSAlexander Potapenko 	 * objects.
18166471384aSAlexander Potapenko 	 */
18176471384aSAlexander Potapenko 	if (unlikely(slab_want_init_on_free(cachep)))
18186471384aSAlexander Potapenko 		return false;
18196471384aSAlexander Potapenko 
18205f0d5a3aSPaul E. McKenney 	if (cachep->ctor || flags & SLAB_TYPESAFE_BY_RCU)
1821b03a017bSJoonsoo Kim 		return false;
1822b03a017bSJoonsoo Kim 
1823b03a017bSJoonsoo Kim 	left = calculate_slab_order(cachep, size,
1824b03a017bSJoonsoo Kim 			flags | CFLGS_OBJFREELIST_SLAB);
1825b03a017bSJoonsoo Kim 	if (!cachep->num)
1826b03a017bSJoonsoo Kim 		return false;
1827b03a017bSJoonsoo Kim 
1828b03a017bSJoonsoo Kim 	if (cachep->num * sizeof(freelist_idx_t) > cachep->object_size)
1829b03a017bSJoonsoo Kim 		return false;
1830b03a017bSJoonsoo Kim 
1831b03a017bSJoonsoo Kim 	cachep->colour = left / cachep->colour_off;
1832b03a017bSJoonsoo Kim 
1833b03a017bSJoonsoo Kim 	return true;
1834b03a017bSJoonsoo Kim }
1835b03a017bSJoonsoo Kim 
set_off_slab_cache(struct kmem_cache * cachep,size_t size,slab_flags_t flags)1836158e319bSJoonsoo Kim static bool set_off_slab_cache(struct kmem_cache *cachep,
1837d50112edSAlexey Dobriyan 			size_t size, slab_flags_t flags)
1838158e319bSJoonsoo Kim {
1839158e319bSJoonsoo Kim 	size_t left;
1840158e319bSJoonsoo Kim 
1841158e319bSJoonsoo Kim 	cachep->num = 0;
1842158e319bSJoonsoo Kim 
1843158e319bSJoonsoo Kim 	/*
18443217fd9bSJoonsoo Kim 	 * Always use on-slab management when SLAB_NOLEAKTRACE
18453217fd9bSJoonsoo Kim 	 * to avoid recursive calls into kmemleak.
1846158e319bSJoonsoo Kim 	 */
1847158e319bSJoonsoo Kim 	if (flags & SLAB_NOLEAKTRACE)
1848158e319bSJoonsoo Kim 		return false;
1849158e319bSJoonsoo Kim 
1850158e319bSJoonsoo Kim 	/*
1851158e319bSJoonsoo Kim 	 * Size is large, assume best to place the slab management obj
1852158e319bSJoonsoo Kim 	 * off-slab (should allow better packing of objs).
1853158e319bSJoonsoo Kim 	 */
1854158e319bSJoonsoo Kim 	left = calculate_slab_order(cachep, size, flags | CFLGS_OFF_SLAB);
1855158e319bSJoonsoo Kim 	if (!cachep->num)
1856158e319bSJoonsoo Kim 		return false;
1857158e319bSJoonsoo Kim 
1858158e319bSJoonsoo Kim 	/*
1859158e319bSJoonsoo Kim 	 * If the slab has been placed off-slab, and we have enough space then
1860158e319bSJoonsoo Kim 	 * move it on-slab. This is at the expense of any extra colouring.
1861158e319bSJoonsoo Kim 	 */
1862158e319bSJoonsoo Kim 	if (left >= cachep->num * sizeof(freelist_idx_t))
1863158e319bSJoonsoo Kim 		return false;
1864158e319bSJoonsoo Kim 
1865158e319bSJoonsoo Kim 	cachep->colour = left / cachep->colour_off;
1866158e319bSJoonsoo Kim 
1867158e319bSJoonsoo Kim 	return true;
1868158e319bSJoonsoo Kim }
1869158e319bSJoonsoo Kim 
set_on_slab_cache(struct kmem_cache * cachep,size_t size,slab_flags_t flags)1870158e319bSJoonsoo Kim static bool set_on_slab_cache(struct kmem_cache *cachep,
1871d50112edSAlexey Dobriyan 			size_t size, slab_flags_t flags)
1872158e319bSJoonsoo Kim {
1873158e319bSJoonsoo Kim 	size_t left;
1874158e319bSJoonsoo Kim 
1875158e319bSJoonsoo Kim 	cachep->num = 0;
1876158e319bSJoonsoo Kim 
1877158e319bSJoonsoo Kim 	left = calculate_slab_order(cachep, size, flags);
1878158e319bSJoonsoo Kim 	if (!cachep->num)
1879158e319bSJoonsoo Kim 		return false;
1880158e319bSJoonsoo Kim 
1881158e319bSJoonsoo Kim 	cachep->colour = left / cachep->colour_off;
1882158e319bSJoonsoo Kim 
1883158e319bSJoonsoo Kim 	return true;
1884158e319bSJoonsoo Kim }
1885158e319bSJoonsoo Kim 
1886444f20c2Szhaoxinchao /*
1887039363f3SChristoph Lameter  * __kmem_cache_create - Create a cache.
1888a755b76aSRandy Dunlap  * @cachep: cache management descriptor
18891da177e4SLinus Torvalds  * @flags: SLAB flags
18901da177e4SLinus Torvalds  *
1891444f20c2Szhaoxinchao  * Returns zero on success, nonzero on failure.
18921da177e4SLinus Torvalds  *
18931da177e4SLinus Torvalds  * The flags are
18941da177e4SLinus Torvalds  *
18951da177e4SLinus Torvalds  * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
18961da177e4SLinus Torvalds  * to catch references to uninitialised memory.
18971da177e4SLinus Torvalds  *
18981da177e4SLinus Torvalds  * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
18991da177e4SLinus Torvalds  * for buffer overruns.
19001da177e4SLinus Torvalds  *
19011da177e4SLinus Torvalds  * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
19021da177e4SLinus Torvalds  * cacheline.  This can be beneficial if you're counting cycles as closely
19031da177e4SLinus Torvalds  * as davem.
19041da177e4SLinus Torvalds  */
__kmem_cache_create(struct kmem_cache * cachep,slab_flags_t flags)1905d50112edSAlexey Dobriyan int __kmem_cache_create(struct kmem_cache *cachep, slab_flags_t flags)
19061da177e4SLinus Torvalds {
1907d4a5fca5SDavid Rientjes 	size_t ralign = BYTES_PER_WORD;
190883b519e8SPekka Enberg 	gfp_t gfp;
1909278b1bb1SChristoph Lameter 	int err;
1910be4a7988SAlexey Dobriyan 	unsigned int size = cachep->size;
19111da177e4SLinus Torvalds 
19121da177e4SLinus Torvalds #if DEBUG
19131da177e4SLinus Torvalds #if FORCED_DEBUG
19141da177e4SLinus Torvalds 	/*
19151da177e4SLinus Torvalds 	 * Enable redzoning and last user accounting, except for caches with
19161da177e4SLinus Torvalds 	 * large objects, if the increased size would increase the object size
19171da177e4SLinus Torvalds 	 * above the next power of two: caches with object sizes just above a
19181da177e4SLinus Torvalds 	 * power of two have a significant amount of internal fragmentation.
19191da177e4SLinus Torvalds 	 */
192087a927c7SDavid Woodhouse 	if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN +
192187a927c7SDavid Woodhouse 						2 * sizeof(unsigned long long)))
19221da177e4SLinus Torvalds 		flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
19235f0d5a3aSPaul E. McKenney 	if (!(flags & SLAB_TYPESAFE_BY_RCU))
19241da177e4SLinus Torvalds 		flags |= SLAB_POISON;
19251da177e4SLinus Torvalds #endif
19261da177e4SLinus Torvalds #endif
19271da177e4SLinus Torvalds 
1928a737b3e2SAndrew Morton 	/*
1929a737b3e2SAndrew Morton 	 * Check that size is in terms of words.  This is needed to avoid
19301da177e4SLinus Torvalds 	 * unaligned accesses for some archs when redzoning is used, and makes
19311da177e4SLinus Torvalds 	 * sure any on-slab bufctl's are also correctly aligned.
19321da177e4SLinus Torvalds 	 */
1933e0771950SCanjiang Lu 	size = ALIGN(size, BYTES_PER_WORD);
19341da177e4SLinus Torvalds 
193587a927c7SDavid Woodhouse 	if (flags & SLAB_RED_ZONE) {
193687a927c7SDavid Woodhouse 		ralign = REDZONE_ALIGN;
193787a927c7SDavid Woodhouse 		/* If redzoning, ensure that the second redzone is suitably
193887a927c7SDavid Woodhouse 		 * aligned, by adjusting the object size accordingly. */
1939e0771950SCanjiang Lu 		size = ALIGN(size, REDZONE_ALIGN);
194087a927c7SDavid Woodhouse 	}
1941ca5f9703SPekka Enberg 
1942a44b56d3SKevin Hilman 	/* 3) caller mandated alignment */
19438a13a4ccSChristoph Lameter 	if (ralign < cachep->align) {
19448a13a4ccSChristoph Lameter 		ralign = cachep->align;
1945a44b56d3SKevin Hilman 	}
19463ff84a7fSPekka Enberg 	/* disable debug if necessary */
19473ff84a7fSPekka Enberg 	if (ralign > __alignof__(unsigned long long))
19481da177e4SLinus Torvalds 		flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
1949a737b3e2SAndrew Morton 	/*
1950ca5f9703SPekka Enberg 	 * 4) Store it.
19511da177e4SLinus Torvalds 	 */
19528a13a4ccSChristoph Lameter 	cachep->align = ralign;
1953158e319bSJoonsoo Kim 	cachep->colour_off = cache_line_size();
1954158e319bSJoonsoo Kim 	/* Offset must be a multiple of the alignment. */
1955158e319bSJoonsoo Kim 	if (cachep->colour_off < cachep->align)
1956158e319bSJoonsoo Kim 		cachep->colour_off = cachep->align;
19571da177e4SLinus Torvalds 
195883b519e8SPekka Enberg 	if (slab_is_available())
195983b519e8SPekka Enberg 		gfp = GFP_KERNEL;
196083b519e8SPekka Enberg 	else
196183b519e8SPekka Enberg 		gfp = GFP_NOWAIT;
196283b519e8SPekka Enberg 
19631da177e4SLinus Torvalds #if DEBUG
19641da177e4SLinus Torvalds 
1965ca5f9703SPekka Enberg 	/*
1966ca5f9703SPekka Enberg 	 * Both debugging options require word-alignment which is calculated
1967ca5f9703SPekka Enberg 	 * into align above.
1968ca5f9703SPekka Enberg 	 */
19691da177e4SLinus Torvalds 	if (flags & SLAB_RED_ZONE) {
19701da177e4SLinus Torvalds 		/* add space for red zone words */
19713ff84a7fSPekka Enberg 		cachep->obj_offset += sizeof(unsigned long long);
19723ff84a7fSPekka Enberg 		size += 2 * sizeof(unsigned long long);
19731da177e4SLinus Torvalds 	}
19741da177e4SLinus Torvalds 	if (flags & SLAB_STORE_USER) {
1975ca5f9703SPekka Enberg 		/* user store requires one word storage behind the end of
197687a927c7SDavid Woodhouse 		 * the real object. But if the second red zone needs to be
197787a927c7SDavid Woodhouse 		 * aligned to 64 bits, we must allow that much space.
19781da177e4SLinus Torvalds 		 */
197987a927c7SDavid Woodhouse 		if (flags & SLAB_RED_ZONE)
198087a927c7SDavid Woodhouse 			size += REDZONE_ALIGN;
198187a927c7SDavid Woodhouse 		else
19821da177e4SLinus Torvalds 			size += BYTES_PER_WORD;
19831da177e4SLinus Torvalds 	}
1984832a15d2SJoonsoo Kim #endif
1985832a15d2SJoonsoo Kim 
19867ed2f9e6SAlexander Potapenko 	kasan_cache_create(cachep, &size, &flags);
19877ed2f9e6SAlexander Potapenko 
1988832a15d2SJoonsoo Kim 	size = ALIGN(size, cachep->align);
1989832a15d2SJoonsoo Kim 	/*
1990832a15d2SJoonsoo Kim 	 * We should restrict the number of objects in a slab to implement
1991832a15d2SJoonsoo Kim 	 * byte sized index. Refer comment on SLAB_OBJ_MIN_SIZE definition.
1992832a15d2SJoonsoo Kim 	 */
1993832a15d2SJoonsoo Kim 	if (FREELIST_BYTE_INDEX && size < SLAB_OBJ_MIN_SIZE)
1994832a15d2SJoonsoo Kim 		size = ALIGN(SLAB_OBJ_MIN_SIZE, cachep->align);
1995832a15d2SJoonsoo Kim 
1996832a15d2SJoonsoo Kim #if DEBUG
199703a2d2a3SJoonsoo Kim 	/*
199803a2d2a3SJoonsoo Kim 	 * To activate debug pagealloc, off-slab management is necessary
199903a2d2a3SJoonsoo Kim 	 * requirement. In early phase of initialization, small sized slab
200003a2d2a3SJoonsoo Kim 	 * doesn't get initialized so it would not be possible. So, we need
200103a2d2a3SJoonsoo Kim 	 * to check size >= 256. It guarantees that all necessary small
200203a2d2a3SJoonsoo Kim 	 * sized slab is initialized in current slab initialization sequence.
200303a2d2a3SJoonsoo Kim 	 */
20048e57f8acSVlastimil Babka 	if (debug_pagealloc_enabled_static() && (flags & SLAB_POISON) &&
2005f3a3c320SJoonsoo Kim 		size >= 256 && cachep->object_size > cache_line_size()) {
2006f3a3c320SJoonsoo Kim 		if (size < PAGE_SIZE || size % PAGE_SIZE == 0) {
2007f3a3c320SJoonsoo Kim 			size_t tmp_size = ALIGN(size, PAGE_SIZE);
2008f3a3c320SJoonsoo Kim 
2009f3a3c320SJoonsoo Kim 			if (set_off_slab_cache(cachep, tmp_size, flags)) {
2010f3a3c320SJoonsoo Kim 				flags |= CFLGS_OFF_SLAB;
2011f3a3c320SJoonsoo Kim 				cachep->obj_offset += tmp_size - size;
2012f3a3c320SJoonsoo Kim 				size = tmp_size;
2013f3a3c320SJoonsoo Kim 				goto done;
2014f3a3c320SJoonsoo Kim 			}
2015f3a3c320SJoonsoo Kim 		}
20161da177e4SLinus Torvalds 	}
20171da177e4SLinus Torvalds #endif
20181da177e4SLinus Torvalds 
2019b03a017bSJoonsoo Kim 	if (set_objfreelist_slab_cache(cachep, size, flags)) {
2020b03a017bSJoonsoo Kim 		flags |= CFLGS_OBJFREELIST_SLAB;
2021b03a017bSJoonsoo Kim 		goto done;
2022b03a017bSJoonsoo Kim 	}
2023b03a017bSJoonsoo Kim 
2024158e319bSJoonsoo Kim 	if (set_off_slab_cache(cachep, size, flags)) {
20251da177e4SLinus Torvalds 		flags |= CFLGS_OFF_SLAB;
2026158e319bSJoonsoo Kim 		goto done;
2027832a15d2SJoonsoo Kim 	}
20281da177e4SLinus Torvalds 
2029158e319bSJoonsoo Kim 	if (set_on_slab_cache(cachep, size, flags))
2030158e319bSJoonsoo Kim 		goto done;
20311da177e4SLinus Torvalds 
2032278b1bb1SChristoph Lameter 	return -E2BIG;
20338a13a4ccSChristoph Lameter 
2034158e319bSJoonsoo Kim done:
2035158e319bSJoonsoo Kim 	cachep->freelist_size = cachep->num * sizeof(freelist_idx_t);
20361da177e4SLinus Torvalds 	cachep->flags = flags;
2037a57a4988SJoonsoo Kim 	cachep->allocflags = __GFP_COMP;
2038a3187e43SYang Shi 	if (flags & SLAB_CACHE_DMA)
2039a618e89fSGlauber Costa 		cachep->allocflags |= GFP_DMA;
20406d6ea1e9SNicolas Boichat 	if (flags & SLAB_CACHE_DMA32)
20416d6ea1e9SNicolas Boichat 		cachep->allocflags |= GFP_DMA32;
2042a3ba0744SDavid Rientjes 	if (flags & SLAB_RECLAIM_ACCOUNT)
2043a3ba0744SDavid Rientjes 		cachep->allocflags |= __GFP_RECLAIMABLE;
20443b0efdfaSChristoph Lameter 	cachep->size = size;
20456a2d7a95SEric Dumazet 	cachep->reciprocal_buffer_size = reciprocal_value(size);
20461da177e4SLinus Torvalds 
204740b44137SJoonsoo Kim #if DEBUG
204840b44137SJoonsoo Kim 	/*
204940b44137SJoonsoo Kim 	 * If we're going to use the generic kernel_map_pages()
205040b44137SJoonsoo Kim 	 * poisoning, then it's going to smash the contents of
205140b44137SJoonsoo Kim 	 * the redzone and userword anyhow, so switch them off.
205240b44137SJoonsoo Kim 	 */
205340b44137SJoonsoo Kim 	if (IS_ENABLED(CONFIG_PAGE_POISONING) &&
205440b44137SJoonsoo Kim 		(cachep->flags & SLAB_POISON) &&
205540b44137SJoonsoo Kim 		is_debug_pagealloc_cache(cachep))
205640b44137SJoonsoo Kim 		cachep->flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
205740b44137SJoonsoo Kim #endif
205840b44137SJoonsoo Kim 
2059278b1bb1SChristoph Lameter 	err = setup_cpu_cache(cachep, gfp);
2060278b1bb1SChristoph Lameter 	if (err) {
206152b4b950SDmitry Safonov 		__kmem_cache_release(cachep);
2062278b1bb1SChristoph Lameter 		return err;
20632ed3a4efSChristoph Lameter 	}
20641da177e4SLinus Torvalds 
2065278b1bb1SChristoph Lameter 	return 0;
20661da177e4SLinus Torvalds }
20671da177e4SLinus Torvalds 
20681da177e4SLinus Torvalds #if DEBUG
check_irq_off(void)20691da177e4SLinus Torvalds static void check_irq_off(void)
20701da177e4SLinus Torvalds {
20711da177e4SLinus Torvalds 	BUG_ON(!irqs_disabled());
20721da177e4SLinus Torvalds }
20731da177e4SLinus Torvalds 
check_irq_on(void)20741da177e4SLinus Torvalds static void check_irq_on(void)
20751da177e4SLinus Torvalds {
20761da177e4SLinus Torvalds 	BUG_ON(irqs_disabled());
20771da177e4SLinus Torvalds }
20781da177e4SLinus Torvalds 
check_mutex_acquired(void)207918726ca8SJoonsoo Kim static void check_mutex_acquired(void)
208018726ca8SJoonsoo Kim {
208118726ca8SJoonsoo Kim 	BUG_ON(!mutex_is_locked(&slab_mutex));
208218726ca8SJoonsoo Kim }
208318726ca8SJoonsoo Kim 
check_spinlock_acquired(struct kmem_cache * cachep)2084343e0d7aSPekka Enberg static void check_spinlock_acquired(struct kmem_cache *cachep)
20851da177e4SLinus Torvalds {
20861da177e4SLinus Torvalds #ifdef CONFIG_SMP
20871da177e4SLinus Torvalds 	check_irq_off();
2088b539ce9fSJiri Kosina 	assert_raw_spin_locked(&get_node(cachep, numa_mem_id())->list_lock);
20891da177e4SLinus Torvalds #endif
20901da177e4SLinus Torvalds }
2091e498be7dSChristoph Lameter 
check_spinlock_acquired_node(struct kmem_cache * cachep,int node)2092343e0d7aSPekka Enberg static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
2093e498be7dSChristoph Lameter {
2094e498be7dSChristoph Lameter #ifdef CONFIG_SMP
2095e498be7dSChristoph Lameter 	check_irq_off();
2096b539ce9fSJiri Kosina 	assert_raw_spin_locked(&get_node(cachep, node)->list_lock);
2097e498be7dSChristoph Lameter #endif
2098e498be7dSChristoph Lameter }
2099e498be7dSChristoph Lameter 
21001da177e4SLinus Torvalds #else
21011da177e4SLinus Torvalds #define check_irq_off()	do { } while(0)
21021da177e4SLinus Torvalds #define check_irq_on()	do { } while(0)
210318726ca8SJoonsoo Kim #define check_mutex_acquired()	do { } while(0)
21041da177e4SLinus Torvalds #define check_spinlock_acquired(x) do { } while(0)
2105e498be7dSChristoph Lameter #define check_spinlock_acquired_node(x, y) do { } while(0)
21061da177e4SLinus Torvalds #endif
21071da177e4SLinus Torvalds 
drain_array_locked(struct kmem_cache * cachep,struct array_cache * ac,int node,bool free_all,struct list_head * list)210818726ca8SJoonsoo Kim static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac,
210918726ca8SJoonsoo Kim 				int node, bool free_all, struct list_head *list)
211018726ca8SJoonsoo Kim {
211118726ca8SJoonsoo Kim 	int tofree;
211218726ca8SJoonsoo Kim 
211318726ca8SJoonsoo Kim 	if (!ac || !ac->avail)
211418726ca8SJoonsoo Kim 		return;
211518726ca8SJoonsoo Kim 
211618726ca8SJoonsoo Kim 	tofree = free_all ? ac->avail : (ac->limit + 4) / 5;
211718726ca8SJoonsoo Kim 	if (tofree > ac->avail)
211818726ca8SJoonsoo Kim 		tofree = (ac->avail + 1) / 2;
211918726ca8SJoonsoo Kim 
212018726ca8SJoonsoo Kim 	free_block(cachep, ac->entry, tofree, node, list);
212118726ca8SJoonsoo Kim 	ac->avail -= tofree;
212218726ca8SJoonsoo Kim 	memmove(ac->entry, &(ac->entry[tofree]), sizeof(void *) * ac->avail);
212318726ca8SJoonsoo Kim }
2124aab2207cSChristoph Lameter 
do_drain(void * arg)21251da177e4SLinus Torvalds static void do_drain(void *arg)
21261da177e4SLinus Torvalds {
2127a737b3e2SAndrew Morton 	struct kmem_cache *cachep = arg;
21281da177e4SLinus Torvalds 	struct array_cache *ac;
21297d6e6d09SLee Schermerhorn 	int node = numa_mem_id();
213018bf8541SChristoph Lameter 	struct kmem_cache_node *n;
213197654dfaSJoonsoo Kim 	LIST_HEAD(list);
21321da177e4SLinus Torvalds 
21331da177e4SLinus Torvalds 	check_irq_off();
21349a2dba4bSPekka Enberg 	ac = cpu_cache_get(cachep);
213518bf8541SChristoph Lameter 	n = get_node(cachep, node);
2136b539ce9fSJiri Kosina 	raw_spin_lock(&n->list_lock);
213797654dfaSJoonsoo Kim 	free_block(cachep, ac->entry, ac->avail, node, &list);
2138b539ce9fSJiri Kosina 	raw_spin_unlock(&n->list_lock);
21391da177e4SLinus Torvalds 	ac->avail = 0;
2140678ff6a7SShakeel Butt 	slabs_destroy(cachep, &list);
21411da177e4SLinus Torvalds }
21421da177e4SLinus Torvalds 
drain_cpu_caches(struct kmem_cache * cachep)2143343e0d7aSPekka Enberg static void drain_cpu_caches(struct kmem_cache *cachep)
21441da177e4SLinus Torvalds {
2145ce8eb6c4SChristoph Lameter 	struct kmem_cache_node *n;
2146e498be7dSChristoph Lameter 	int node;
214718726ca8SJoonsoo Kim 	LIST_HEAD(list);
2148e498be7dSChristoph Lameter 
214915c8b6c1SJens Axboe 	on_each_cpu(do_drain, cachep, 1);
21501da177e4SLinus Torvalds 	check_irq_on();
215118bf8541SChristoph Lameter 	for_each_kmem_cache_node(cachep, node, n)
215218bf8541SChristoph Lameter 		if (n->alien)
2153ce8eb6c4SChristoph Lameter 			drain_alien_cache(cachep, n->alien);
2154a4523a8bSRoland Dreier 
215518726ca8SJoonsoo Kim 	for_each_kmem_cache_node(cachep, node, n) {
2156b539ce9fSJiri Kosina 		raw_spin_lock_irq(&n->list_lock);
215718726ca8SJoonsoo Kim 		drain_array_locked(cachep, n->shared, node, true, &list);
2158b539ce9fSJiri Kosina 		raw_spin_unlock_irq(&n->list_lock);
215918726ca8SJoonsoo Kim 
216018726ca8SJoonsoo Kim 		slabs_destroy(cachep, &list);
216118726ca8SJoonsoo Kim 	}
2162e498be7dSChristoph Lameter }
21631da177e4SLinus Torvalds 
2164ed11d9ebSChristoph Lameter /*
2165ed11d9ebSChristoph Lameter  * Remove slabs from the list of free slabs.
2166ed11d9ebSChristoph Lameter  * Specify the number of slabs to drain in tofree.
2167ed11d9ebSChristoph Lameter  *
2168ed11d9ebSChristoph Lameter  * Returns the actual number of slabs released.
2169ed11d9ebSChristoph Lameter  */
drain_freelist(struct kmem_cache * cache,struct kmem_cache_node * n,int tofree)2170ed11d9ebSChristoph Lameter static int drain_freelist(struct kmem_cache *cache,
2171ce8eb6c4SChristoph Lameter 			struct kmem_cache_node *n, int tofree)
21721da177e4SLinus Torvalds {
21731da177e4SLinus Torvalds 	struct list_head *p;
2174ed11d9ebSChristoph Lameter 	int nr_freed;
21757981e67eSVlastimil Babka 	struct slab *slab;
21761da177e4SLinus Torvalds 
2177ed11d9ebSChristoph Lameter 	nr_freed = 0;
2178ce8eb6c4SChristoph Lameter 	while (nr_freed < tofree && !list_empty(&n->slabs_free)) {
2179ed11d9ebSChristoph Lameter 
2180b539ce9fSJiri Kosina 		raw_spin_lock_irq(&n->list_lock);
2181ce8eb6c4SChristoph Lameter 		p = n->slabs_free.prev;
2182ce8eb6c4SChristoph Lameter 		if (p == &n->slabs_free) {
2183b539ce9fSJiri Kosina 			raw_spin_unlock_irq(&n->list_lock);
2184ed11d9ebSChristoph Lameter 			goto out;
2185ed11d9ebSChristoph Lameter 		}
21861da177e4SLinus Torvalds 
21877981e67eSVlastimil Babka 		slab = list_entry(p, struct slab, slab_list);
21887981e67eSVlastimil Babka 		list_del(&slab->slab_list);
2189f728b0a5SGreg Thelen 		n->free_slabs--;
2190bf00bd34SDavid Rientjes 		n->total_slabs--;
2191ed11d9ebSChristoph Lameter 		/*
2192ed11d9ebSChristoph Lameter 		 * Safe to drop the lock. The slab is no longer linked
2193ed11d9ebSChristoph Lameter 		 * to the cache.
2194ed11d9ebSChristoph Lameter 		 */
2195ce8eb6c4SChristoph Lameter 		n->free_objects -= cache->num;
2196b539ce9fSJiri Kosina 		raw_spin_unlock_irq(&n->list_lock);
21977981e67eSVlastimil Babka 		slab_destroy(cache, slab);
2198ed11d9ebSChristoph Lameter 		nr_freed++;
2199cc2e9d2bSDavid Rientjes 
2200cc2e9d2bSDavid Rientjes 		cond_resched();
22011da177e4SLinus Torvalds 	}
2202ed11d9ebSChristoph Lameter out:
2203ed11d9ebSChristoph Lameter 	return nr_freed;
22041da177e4SLinus Torvalds }
22051da177e4SLinus Torvalds 
__kmem_cache_empty(struct kmem_cache * s)2206f9e13c0aSShakeel Butt bool __kmem_cache_empty(struct kmem_cache *s)
2207f9e13c0aSShakeel Butt {
2208f9e13c0aSShakeel Butt 	int node;
2209f9e13c0aSShakeel Butt 	struct kmem_cache_node *n;
2210f9e13c0aSShakeel Butt 
2211f9e13c0aSShakeel Butt 	for_each_kmem_cache_node(s, node, n)
2212f9e13c0aSShakeel Butt 		if (!list_empty(&n->slabs_full) ||
2213f9e13c0aSShakeel Butt 		    !list_empty(&n->slabs_partial))
2214f9e13c0aSShakeel Butt 			return false;
2215f9e13c0aSShakeel Butt 	return true;
2216f9e13c0aSShakeel Butt }
2217f9e13c0aSShakeel Butt 
__kmem_cache_shrink(struct kmem_cache * cachep)2218c9fc5864STejun Heo int __kmem_cache_shrink(struct kmem_cache *cachep)
2219e498be7dSChristoph Lameter {
222018bf8541SChristoph Lameter 	int ret = 0;
222118bf8541SChristoph Lameter 	int node;
2222ce8eb6c4SChristoph Lameter 	struct kmem_cache_node *n;
2223e498be7dSChristoph Lameter 
2224e498be7dSChristoph Lameter 	drain_cpu_caches(cachep);
2225e498be7dSChristoph Lameter 
2226e498be7dSChristoph Lameter 	check_irq_on();
222718bf8541SChristoph Lameter 	for_each_kmem_cache_node(cachep, node, n) {
2228a5aa63a5SJoonsoo Kim 		drain_freelist(cachep, n, INT_MAX);
2229ed11d9ebSChristoph Lameter 
2230ce8eb6c4SChristoph Lameter 		ret += !list_empty(&n->slabs_full) ||
2231ce8eb6c4SChristoph Lameter 			!list_empty(&n->slabs_partial);
2232e498be7dSChristoph Lameter 	}
2233e498be7dSChristoph Lameter 	return (ret ? 1 : 0);
2234e498be7dSChristoph Lameter }
2235e498be7dSChristoph Lameter 
__kmem_cache_shutdown(struct kmem_cache * cachep)2236945cf2b6SChristoph Lameter int __kmem_cache_shutdown(struct kmem_cache *cachep)
22371da177e4SLinus Torvalds {
2238c9fc5864STejun Heo 	return __kmem_cache_shrink(cachep);
223952b4b950SDmitry Safonov }
224052b4b950SDmitry Safonov 
__kmem_cache_release(struct kmem_cache * cachep)224152b4b950SDmitry Safonov void __kmem_cache_release(struct kmem_cache *cachep)
224252b4b950SDmitry Safonov {
224312c3667fSChristoph Lameter 	int i;
2244ce8eb6c4SChristoph Lameter 	struct kmem_cache_node *n;
224512c3667fSChristoph Lameter 
2246c7ce4f60SThomas Garnier 	cache_random_seq_destroy(cachep);
2247c7ce4f60SThomas Garnier 
2248bf0dea23SJoonsoo Kim 	free_percpu(cachep->cpu_cache);
224912c3667fSChristoph Lameter 
2250ce8eb6c4SChristoph Lameter 	/* NUMA: free the node structures */
225118bf8541SChristoph Lameter 	for_each_kmem_cache_node(cachep, i, n) {
2252ce8eb6c4SChristoph Lameter 		kfree(n->shared);
2253ce8eb6c4SChristoph Lameter 		free_alien_cache(n->alien);
2254ce8eb6c4SChristoph Lameter 		kfree(n);
225518bf8541SChristoph Lameter 		cachep->node[i] = NULL;
22561da177e4SLinus Torvalds 	}
22571da177e4SLinus Torvalds }
22581da177e4SLinus Torvalds 
2259e5ac9c5aSRavikiran G Thirumalai /*
2260e5ac9c5aSRavikiran G Thirumalai  * Get the memory for a slab management obj.
22615f0985bbSJianyu Zhan  *
22625f0985bbSJianyu Zhan  * For a slab cache when the slab descriptor is off-slab, the
22635f0985bbSJianyu Zhan  * slab descriptor can't come from the same cache which is being created,
22645f0985bbSJianyu Zhan  * Because if it is the case, that means we defer the creation of
22655f0985bbSJianyu Zhan  * the kmalloc_{dma,}_cache of size sizeof(slab descriptor) to this point.
22665f0985bbSJianyu Zhan  * And we eventually call down to __kmem_cache_create(), which
226780d01558SColin Ian King  * in turn looks up in the kmalloc_{dma,}_caches for the desired-size one.
22685f0985bbSJianyu Zhan  * This is a "chicken-and-egg" problem.
22695f0985bbSJianyu Zhan  *
22705f0985bbSJianyu Zhan  * So the off-slab slab descriptor shall come from the kmalloc_{dma,}_caches,
22715f0985bbSJianyu Zhan  * which are all initialized during kmem_cache_init().
2272e5ac9c5aSRavikiran G Thirumalai  */
alloc_slabmgmt(struct kmem_cache * cachep,struct slab * slab,int colour_off,gfp_t local_flags,int nodeid)22737e007355SJoonsoo Kim static void *alloc_slabmgmt(struct kmem_cache *cachep,
22747981e67eSVlastimil Babka 				   struct slab *slab, int colour_off,
22750c3aa83eSJoonsoo Kim 				   gfp_t local_flags, int nodeid)
22761da177e4SLinus Torvalds {
22777e007355SJoonsoo Kim 	void *freelist;
22787981e67eSVlastimil Babka 	void *addr = slab_address(slab);
22791da177e4SLinus Torvalds 
22807981e67eSVlastimil Babka 	slab->s_mem = addr + colour_off;
22817981e67eSVlastimil Babka 	slab->active = 0;
22822e6b3602SJoonsoo Kim 
2283b03a017bSJoonsoo Kim 	if (OBJFREELIST_SLAB(cachep))
2284b03a017bSJoonsoo Kim 		freelist = NULL;
2285b03a017bSJoonsoo Kim 	else if (OFF_SLAB(cachep)) {
22861da177e4SLinus Torvalds 		/* Slab management obj is off-slab. */
2287e36ce448SHyeonggon Yoo 		freelist = kmalloc_node(cachep->freelist_size,
22888759ec50SPekka Enberg 					      local_flags, nodeid);
22891da177e4SLinus Torvalds 	} else {
22902e6b3602SJoonsoo Kim 		/* We will use last bytes at the slab for freelist */
22912e6b3602SJoonsoo Kim 		freelist = addr + (PAGE_SIZE << cachep->gfporder) -
22922e6b3602SJoonsoo Kim 				cachep->freelist_size;
22931da177e4SLinus Torvalds 	}
22942e6b3602SJoonsoo Kim 
22958456a648SJoonsoo Kim 	return freelist;
22961da177e4SLinus Torvalds }
22971da177e4SLinus Torvalds 
get_free_obj(struct slab * slab,unsigned int idx)22987981e67eSVlastimil Babka static inline freelist_idx_t get_free_obj(struct slab *slab, unsigned int idx)
22991da177e4SLinus Torvalds {
23007981e67eSVlastimil Babka 	return ((freelist_idx_t *) slab->freelist)[idx];
2301e5c58dfdSJoonsoo Kim }
2302e5c58dfdSJoonsoo Kim 
set_free_obj(struct slab * slab,unsigned int idx,freelist_idx_t val)23037981e67eSVlastimil Babka static inline void set_free_obj(struct slab *slab,
23047cc68973SJoonsoo Kim 					unsigned int idx, freelist_idx_t val)
2305e5c58dfdSJoonsoo Kim {
23067981e67eSVlastimil Babka 	((freelist_idx_t *)(slab->freelist))[idx] = val;
23071da177e4SLinus Torvalds }
23081da177e4SLinus Torvalds 
cache_init_objs_debug(struct kmem_cache * cachep,struct slab * slab)23097981e67eSVlastimil Babka static void cache_init_objs_debug(struct kmem_cache *cachep, struct slab *slab)
23101da177e4SLinus Torvalds {
231110b2e9e8SJoonsoo Kim #if DEBUG
23121da177e4SLinus Torvalds 	int i;
23131da177e4SLinus Torvalds 
23141da177e4SLinus Torvalds 	for (i = 0; i < cachep->num; i++) {
23157981e67eSVlastimil Babka 		void *objp = index_to_obj(cachep, slab, i);
231610b2e9e8SJoonsoo Kim 
23171da177e4SLinus Torvalds 		if (cachep->flags & SLAB_STORE_USER)
23181da177e4SLinus Torvalds 			*dbg_userword(cachep, objp) = NULL;
23191da177e4SLinus Torvalds 
23201da177e4SLinus Torvalds 		if (cachep->flags & SLAB_RED_ZONE) {
23211da177e4SLinus Torvalds 			*dbg_redzone1(cachep, objp) = RED_INACTIVE;
23221da177e4SLinus Torvalds 			*dbg_redzone2(cachep, objp) = RED_INACTIVE;
23231da177e4SLinus Torvalds 		}
23241da177e4SLinus Torvalds 		/*
2325a737b3e2SAndrew Morton 		 * Constructors are not allowed to allocate memory from the same
2326a737b3e2SAndrew Morton 		 * cache which they are a constructor for.  Otherwise, deadlock.
2327a737b3e2SAndrew Morton 		 * They must also be threaded.
23281da177e4SLinus Torvalds 		 */
23297ed2f9e6SAlexander Potapenko 		if (cachep->ctor && !(cachep->flags & SLAB_POISON)) {
23307ed2f9e6SAlexander Potapenko 			kasan_unpoison_object_data(cachep,
23317ed2f9e6SAlexander Potapenko 						   objp + obj_offset(cachep));
233251cc5068SAlexey Dobriyan 			cachep->ctor(objp + obj_offset(cachep));
23337ed2f9e6SAlexander Potapenko 			kasan_poison_object_data(
23347ed2f9e6SAlexander Potapenko 				cachep, objp + obj_offset(cachep));
23357ed2f9e6SAlexander Potapenko 		}
23361da177e4SLinus Torvalds 
23371da177e4SLinus Torvalds 		if (cachep->flags & SLAB_RED_ZONE) {
23381da177e4SLinus Torvalds 			if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
2339756a025fSJoe Perches 				slab_error(cachep, "constructor overwrote the end of an object");
23401da177e4SLinus Torvalds 			if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
2341756a025fSJoe Perches 				slab_error(cachep, "constructor overwrote the start of an object");
23421da177e4SLinus Torvalds 		}
234340b44137SJoonsoo Kim 		/* need to poison the objs? */
234440b44137SJoonsoo Kim 		if (cachep->flags & SLAB_POISON) {
234540b44137SJoonsoo Kim 			poison_obj(cachep, objp, POISON_FREE);
234680552f0fSQian Cai 			slab_kernel_map(cachep, objp, 0);
234740b44137SJoonsoo Kim 		}
234810b2e9e8SJoonsoo Kim 	}
23491da177e4SLinus Torvalds #endif
235010b2e9e8SJoonsoo Kim }
235110b2e9e8SJoonsoo Kim 
2352c7ce4f60SThomas Garnier #ifdef CONFIG_SLAB_FREELIST_RANDOM
2353c7ce4f60SThomas Garnier /* Hold information during a freelist initialization */
2354f7e466e9SDavid Keisar Schmidt struct freelist_init_state {
2355c7ce4f60SThomas Garnier 	unsigned int pos;
23567c00fce9SThomas Garnier 	unsigned int *list;
2357c7ce4f60SThomas Garnier 	unsigned int count;
2358c7ce4f60SThomas Garnier };
2359c7ce4f60SThomas Garnier 
2360c7ce4f60SThomas Garnier /*
2361f0953a1bSIngo Molnar  * Initialize the state based on the randomization method available.
2362f0953a1bSIngo Molnar  * return true if the pre-computed list is available, false otherwise.
2363c7ce4f60SThomas Garnier  */
freelist_state_initialize(struct freelist_init_state * state,struct kmem_cache * cachep,unsigned int count)2364f7e466e9SDavid Keisar Schmidt static bool freelist_state_initialize(struct freelist_init_state *state,
2365c7ce4f60SThomas Garnier 				struct kmem_cache *cachep,
2366c7ce4f60SThomas Garnier 				unsigned int count)
2367c7ce4f60SThomas Garnier {
2368c7ce4f60SThomas Garnier 	bool ret;
2369c7ce4f60SThomas Garnier 	if (!cachep->random_seq) {
2370c7ce4f60SThomas Garnier 		ret = false;
2371c7ce4f60SThomas Garnier 	} else {
2372c7ce4f60SThomas Garnier 		state->list = cachep->random_seq;
2373c7ce4f60SThomas Garnier 		state->count = count;
2374f7e466e9SDavid Keisar Schmidt 		state->pos = get_random_u32_below(count);
2375c7ce4f60SThomas Garnier 		ret = true;
2376c7ce4f60SThomas Garnier 	}
2377c7ce4f60SThomas Garnier 	return ret;
2378c7ce4f60SThomas Garnier }
2379c7ce4f60SThomas Garnier 
2380c7ce4f60SThomas Garnier /* Get the next entry on the list and randomize it using a random shift */
next_random_slot(struct freelist_init_state * state)2381f7e466e9SDavid Keisar Schmidt static freelist_idx_t next_random_slot(struct freelist_init_state *state)
2382c7ce4f60SThomas Garnier {
2383c4e490cfSJohn Sperbeck 	if (state->pos >= state->count)
2384c4e490cfSJohn Sperbeck 		state->pos = 0;
2385c4e490cfSJohn Sperbeck 	return state->list[state->pos++];
2386c7ce4f60SThomas Garnier }
2387c7ce4f60SThomas Garnier 
23887c00fce9SThomas Garnier /* Swap two freelist entries */
swap_free_obj(struct slab * slab,unsigned int a,unsigned int b)23897981e67eSVlastimil Babka static void swap_free_obj(struct slab *slab, unsigned int a, unsigned int b)
23907c00fce9SThomas Garnier {
23917981e67eSVlastimil Babka 	swap(((freelist_idx_t *) slab->freelist)[a],
23927981e67eSVlastimil Babka 		((freelist_idx_t *) slab->freelist)[b]);
23937c00fce9SThomas Garnier }
23947c00fce9SThomas Garnier 
2395c7ce4f60SThomas Garnier /*
2396c7ce4f60SThomas Garnier  * Shuffle the freelist initialization state based on pre-computed lists.
2397c7ce4f60SThomas Garnier  * return true if the list was successfully shuffled, false otherwise.
2398c7ce4f60SThomas Garnier  */
shuffle_freelist(struct kmem_cache * cachep,struct slab * slab)23997981e67eSVlastimil Babka static bool shuffle_freelist(struct kmem_cache *cachep, struct slab *slab)
2400c7ce4f60SThomas Garnier {
24017c00fce9SThomas Garnier 	unsigned int objfreelist = 0, i, rand, count = cachep->num;
2402f7e466e9SDavid Keisar Schmidt 	struct freelist_init_state state;
2403c7ce4f60SThomas Garnier 	bool precomputed;
2404c7ce4f60SThomas Garnier 
2405c7ce4f60SThomas Garnier 	if (count < 2)
2406c7ce4f60SThomas Garnier 		return false;
2407c7ce4f60SThomas Garnier 
2408c7ce4f60SThomas Garnier 	precomputed = freelist_state_initialize(&state, cachep, count);
2409c7ce4f60SThomas Garnier 
2410c7ce4f60SThomas Garnier 	/* Take a random entry as the objfreelist */
2411c7ce4f60SThomas Garnier 	if (OBJFREELIST_SLAB(cachep)) {
2412c7ce4f60SThomas Garnier 		if (!precomputed)
2413c7ce4f60SThomas Garnier 			objfreelist = count - 1;
2414c7ce4f60SThomas Garnier 		else
2415c7ce4f60SThomas Garnier 			objfreelist = next_random_slot(&state);
24167981e67eSVlastimil Babka 		slab->freelist = index_to_obj(cachep, slab, objfreelist) +
2417c7ce4f60SThomas Garnier 						obj_offset(cachep);
2418c7ce4f60SThomas Garnier 		count--;
2419c7ce4f60SThomas Garnier 	}
2420c7ce4f60SThomas Garnier 
2421c7ce4f60SThomas Garnier 	/*
2422c7ce4f60SThomas Garnier 	 * On early boot, generate the list dynamically.
2423c7ce4f60SThomas Garnier 	 * Later use a pre-computed list for speed.
2424c7ce4f60SThomas Garnier 	 */
2425c7ce4f60SThomas Garnier 	if (!precomputed) {
24267c00fce9SThomas Garnier 		for (i = 0; i < count; i++)
24277981e67eSVlastimil Babka 			set_free_obj(slab, i, i);
24287c00fce9SThomas Garnier 
24297c00fce9SThomas Garnier 		/* Fisher-Yates shuffle */
24307c00fce9SThomas Garnier 		for (i = count - 1; i > 0; i--) {
2431f7e466e9SDavid Keisar Schmidt 			rand = get_random_u32_below(i + 1);
24327981e67eSVlastimil Babka 			swap_free_obj(slab, i, rand);
24337c00fce9SThomas Garnier 		}
2434c7ce4f60SThomas Garnier 	} else {
2435c7ce4f60SThomas Garnier 		for (i = 0; i < count; i++)
24367981e67eSVlastimil Babka 			set_free_obj(slab, i, next_random_slot(&state));
2437c7ce4f60SThomas Garnier 	}
2438c7ce4f60SThomas Garnier 
2439c7ce4f60SThomas Garnier 	if (OBJFREELIST_SLAB(cachep))
24407981e67eSVlastimil Babka 		set_free_obj(slab, cachep->num - 1, objfreelist);
2441c7ce4f60SThomas Garnier 
2442c7ce4f60SThomas Garnier 	return true;
2443c7ce4f60SThomas Garnier }
2444c7ce4f60SThomas Garnier #else
shuffle_freelist(struct kmem_cache * cachep,struct slab * slab)2445c7ce4f60SThomas Garnier static inline bool shuffle_freelist(struct kmem_cache *cachep,
24467981e67eSVlastimil Babka 				struct slab *slab)
2447c7ce4f60SThomas Garnier {
2448c7ce4f60SThomas Garnier 	return false;
2449c7ce4f60SThomas Garnier }
2450c7ce4f60SThomas Garnier #endif /* CONFIG_SLAB_FREELIST_RANDOM */
2451c7ce4f60SThomas Garnier 
cache_init_objs(struct kmem_cache * cachep,struct slab * slab)245210b2e9e8SJoonsoo Kim static void cache_init_objs(struct kmem_cache *cachep,
24537981e67eSVlastimil Babka 			    struct slab *slab)
245410b2e9e8SJoonsoo Kim {
245510b2e9e8SJoonsoo Kim 	int i;
24567ed2f9e6SAlexander Potapenko 	void *objp;
2457c7ce4f60SThomas Garnier 	bool shuffled;
245810b2e9e8SJoonsoo Kim 
24597981e67eSVlastimil Babka 	cache_init_objs_debug(cachep, slab);
246010b2e9e8SJoonsoo Kim 
2461c7ce4f60SThomas Garnier 	/* Try to randomize the freelist if enabled */
24627981e67eSVlastimil Babka 	shuffled = shuffle_freelist(cachep, slab);
2463c7ce4f60SThomas Garnier 
2464c7ce4f60SThomas Garnier 	if (!shuffled && OBJFREELIST_SLAB(cachep)) {
24657981e67eSVlastimil Babka 		slab->freelist = index_to_obj(cachep, slab, cachep->num - 1) +
2466b03a017bSJoonsoo Kim 						obj_offset(cachep);
2467b03a017bSJoonsoo Kim 	}
2468b03a017bSJoonsoo Kim 
246910b2e9e8SJoonsoo Kim 	for (i = 0; i < cachep->num; i++) {
24707981e67eSVlastimil Babka 		objp = index_to_obj(cachep, slab, i);
24714d176711SAndrey Konovalov 		objp = kasan_init_slab_obj(cachep, objp);
2472b3cbd9bfSAndrey Ryabinin 
247310b2e9e8SJoonsoo Kim 		/* constructor could break poison info */
24747ed2f9e6SAlexander Potapenko 		if (DEBUG == 0 && cachep->ctor) {
24757ed2f9e6SAlexander Potapenko 			kasan_unpoison_object_data(cachep, objp);
24767ed2f9e6SAlexander Potapenko 			cachep->ctor(objp);
24777ed2f9e6SAlexander Potapenko 			kasan_poison_object_data(cachep, objp);
24787ed2f9e6SAlexander Potapenko 		}
247910b2e9e8SJoonsoo Kim 
2480c7ce4f60SThomas Garnier 		if (!shuffled)
24817981e67eSVlastimil Babka 			set_free_obj(slab, i, i);
24821da177e4SLinus Torvalds 	}
24831da177e4SLinus Torvalds }
24841da177e4SLinus Torvalds 
slab_get_obj(struct kmem_cache * cachep,struct slab * slab)24857981e67eSVlastimil Babka static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slab)
248678d382d7SMatthew Dobson {
2487b1cb0982SJoonsoo Kim 	void *objp;
248878d382d7SMatthew Dobson 
24897981e67eSVlastimil Babka 	objp = index_to_obj(cachep, slab, get_free_obj(slab, slab->active));
24907981e67eSVlastimil Babka 	slab->active++;
249178d382d7SMatthew Dobson 
249278d382d7SMatthew Dobson 	return objp;
249378d382d7SMatthew Dobson }
249478d382d7SMatthew Dobson 
slab_put_obj(struct kmem_cache * cachep,struct slab * slab,void * objp)2495260b61ddSJoonsoo Kim static void slab_put_obj(struct kmem_cache *cachep,
24967981e67eSVlastimil Babka 			struct slab *slab, void *objp)
249778d382d7SMatthew Dobson {
249840f3bf0cSVlastimil Babka 	unsigned int objnr = obj_to_index(cachep, slab, objp);
249978d382d7SMatthew Dobson #if DEBUG
250016025177SJoonsoo Kim 	unsigned int i;
250178d382d7SMatthew Dobson 
2502b1cb0982SJoonsoo Kim 	/* Verify double free bug */
25037981e67eSVlastimil Babka 	for (i = slab->active; i < cachep->num; i++) {
25047981e67eSVlastimil Babka 		if (get_free_obj(slab, i) == objnr) {
250585c3e4a5SGeert Uytterhoeven 			pr_err("slab: double free detected in cache '%s', objp %px\n",
2506756a025fSJoe Perches 			       cachep->name, objp);
250778d382d7SMatthew Dobson 			BUG();
250878d382d7SMatthew Dobson 		}
2509b1cb0982SJoonsoo Kim 	}
251078d382d7SMatthew Dobson #endif
25117981e67eSVlastimil Babka 	slab->active--;
25127981e67eSVlastimil Babka 	if (!slab->freelist)
25137981e67eSVlastimil Babka 		slab->freelist = objp + obj_offset(cachep);
2514b03a017bSJoonsoo Kim 
25157981e67eSVlastimil Babka 	set_free_obj(slab, slab->active, objnr);
251678d382d7SMatthew Dobson }
251778d382d7SMatthew Dobson 
25184776874fSPekka Enberg /*
25191da177e4SLinus Torvalds  * Grow (by 1) the number of slabs within a cache.  This is called by
25201da177e4SLinus Torvalds  * kmem_cache_alloc() when there are no active objs left in a cache.
25211da177e4SLinus Torvalds  */
cache_grow_begin(struct kmem_cache * cachep,gfp_t flags,int nodeid)25227981e67eSVlastimil Babka static struct slab *cache_grow_begin(struct kmem_cache *cachep,
252376b342bdSJoonsoo Kim 				gfp_t flags, int nodeid)
25241da177e4SLinus Torvalds {
25257e007355SJoonsoo Kim 	void *freelist;
25261da177e4SLinus Torvalds 	size_t offset;
25276daa0e28SAl Viro 	gfp_t local_flags;
2528dd35f71aSVlastimil Babka 	int slab_node;
2529ce8eb6c4SChristoph Lameter 	struct kmem_cache_node *n;
25307981e67eSVlastimil Babka 	struct slab *slab;
25311da177e4SLinus Torvalds 
2532a737b3e2SAndrew Morton 	/*
2533a737b3e2SAndrew Morton 	 * Be lazy and only check for valid flags here,  keeping it out of the
2534a737b3e2SAndrew Morton 	 * critical path in kmem_cache_alloc().
25351da177e4SLinus Torvalds 	 */
253644405099SLong Li 	if (unlikely(flags & GFP_SLAB_BUG_MASK))
253744405099SLong Li 		flags = kmalloc_fix_flags(flags);
253844405099SLong Li 
2539128227e7SMatthew Wilcox 	WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO));
25406cb06229SChristoph Lameter 	local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
25411da177e4SLinus Torvalds 
25421da177e4SLinus Torvalds 	check_irq_off();
2543d0164adcSMel Gorman 	if (gfpflags_allow_blocking(local_flags))
25441da177e4SLinus Torvalds 		local_irq_enable();
25451da177e4SLinus Torvalds 
25461da177e4SLinus Torvalds 	/*
2547a737b3e2SAndrew Morton 	 * Get mem for the objs.  Attempt to allocate a physical page from
2548a737b3e2SAndrew Morton 	 * 'nodeid'.
2549e498be7dSChristoph Lameter 	 */
25507981e67eSVlastimil Babka 	slab = kmem_getpages(cachep, local_flags, nodeid);
25517981e67eSVlastimil Babka 	if (!slab)
25521da177e4SLinus Torvalds 		goto failed;
25531da177e4SLinus Torvalds 
2554dd35f71aSVlastimil Babka 	slab_node = slab_nid(slab);
2555dd35f71aSVlastimil Babka 	n = get_node(cachep, slab_node);
255603d1d43aSJoonsoo Kim 
255703d1d43aSJoonsoo Kim 	/* Get colour for the slab, and cal the next value. */
255803d1d43aSJoonsoo Kim 	n->colour_next++;
255903d1d43aSJoonsoo Kim 	if (n->colour_next >= cachep->colour)
256003d1d43aSJoonsoo Kim 		n->colour_next = 0;
256103d1d43aSJoonsoo Kim 
256203d1d43aSJoonsoo Kim 	offset = n->colour_next;
256303d1d43aSJoonsoo Kim 	if (offset >= cachep->colour)
256403d1d43aSJoonsoo Kim 		offset = 0;
256503d1d43aSJoonsoo Kim 
256603d1d43aSJoonsoo Kim 	offset *= cachep->colour_off;
256703d1d43aSJoonsoo Kim 
256851dedad0SAndrey Konovalov 	/*
256951dedad0SAndrey Konovalov 	 * Call kasan_poison_slab() before calling alloc_slabmgmt(), so
257051dedad0SAndrey Konovalov 	 * page_address() in the latter returns a non-tagged pointer,
257151dedad0SAndrey Konovalov 	 * as it should be for slab pages.
257251dedad0SAndrey Konovalov 	 */
25736e48a966SMatthew Wilcox (Oracle) 	kasan_poison_slab(slab);
257451dedad0SAndrey Konovalov 
25751da177e4SLinus Torvalds 	/* Get slab management. */
25767981e67eSVlastimil Babka 	freelist = alloc_slabmgmt(cachep, slab, offset,
2577dd35f71aSVlastimil Babka 			local_flags & ~GFP_CONSTRAINT_MASK, slab_node);
2578b03a017bSJoonsoo Kim 	if (OFF_SLAB(cachep) && !freelist)
25791da177e4SLinus Torvalds 		goto opps1;
25801da177e4SLinus Torvalds 
25817981e67eSVlastimil Babka 	slab->slab_cache = cachep;
25827981e67eSVlastimil Babka 	slab->freelist = freelist;
25831da177e4SLinus Torvalds 
25847981e67eSVlastimil Babka 	cache_init_objs(cachep, slab);
25851da177e4SLinus Torvalds 
2586d0164adcSMel Gorman 	if (gfpflags_allow_blocking(local_flags))
25871da177e4SLinus Torvalds 		local_irq_disable();
25881da177e4SLinus Torvalds 
25897981e67eSVlastimil Babka 	return slab;
259076b342bdSJoonsoo Kim 
25911da177e4SLinus Torvalds opps1:
25927981e67eSVlastimil Babka 	kmem_freepages(cachep, slab);
25931da177e4SLinus Torvalds failed:
2594d0164adcSMel Gorman 	if (gfpflags_allow_blocking(local_flags))
25951da177e4SLinus Torvalds 		local_irq_disable();
259676b342bdSJoonsoo Kim 	return NULL;
259776b342bdSJoonsoo Kim }
259876b342bdSJoonsoo Kim 
cache_grow_end(struct kmem_cache * cachep,struct slab * slab)25997981e67eSVlastimil Babka static void cache_grow_end(struct kmem_cache *cachep, struct slab *slab)
260076b342bdSJoonsoo Kim {
260176b342bdSJoonsoo Kim 	struct kmem_cache_node *n;
260276b342bdSJoonsoo Kim 	void *list = NULL;
260376b342bdSJoonsoo Kim 
260476b342bdSJoonsoo Kim 	check_irq_off();
260576b342bdSJoonsoo Kim 
26067981e67eSVlastimil Babka 	if (!slab)
260776b342bdSJoonsoo Kim 		return;
260876b342bdSJoonsoo Kim 
26097981e67eSVlastimil Babka 	INIT_LIST_HEAD(&slab->slab_list);
26107981e67eSVlastimil Babka 	n = get_node(cachep, slab_nid(slab));
261176b342bdSJoonsoo Kim 
2612b539ce9fSJiri Kosina 	raw_spin_lock(&n->list_lock);
2613bf00bd34SDavid Rientjes 	n->total_slabs++;
26147981e67eSVlastimil Babka 	if (!slab->active) {
26157981e67eSVlastimil Babka 		list_add_tail(&slab->slab_list, &n->slabs_free);
2616f728b0a5SGreg Thelen 		n->free_slabs++;
2617bf00bd34SDavid Rientjes 	} else
26187981e67eSVlastimil Babka 		fixup_slab_list(cachep, n, slab, &list);
261907a63c41SAruna Ramakrishna 
262076b342bdSJoonsoo Kim 	STATS_INC_GROWN(cachep);
26217981e67eSVlastimil Babka 	n->free_objects += cachep->num - slab->active;
2622b539ce9fSJiri Kosina 	raw_spin_unlock(&n->list_lock);
262376b342bdSJoonsoo Kim 
262476b342bdSJoonsoo Kim 	fixup_objfreelist_debug(cachep, &list);
26251da177e4SLinus Torvalds }
26261da177e4SLinus Torvalds 
26271da177e4SLinus Torvalds #if DEBUG
26281da177e4SLinus Torvalds 
26291da177e4SLinus Torvalds /*
26301da177e4SLinus Torvalds  * Perform extra freeing checks:
26311da177e4SLinus Torvalds  * - detect bad pointers.
26321da177e4SLinus Torvalds  * - POISON/RED_ZONE checking
26331da177e4SLinus Torvalds  */
kfree_debugcheck(const void * objp)26341da177e4SLinus Torvalds static void kfree_debugcheck(const void *objp)
26351da177e4SLinus Torvalds {
26361da177e4SLinus Torvalds 	if (!virt_addr_valid(objp)) {
26371170532bSJoe Perches 		pr_err("kfree_debugcheck: out of range ptr %lxh\n",
26381da177e4SLinus Torvalds 		       (unsigned long)objp);
26391da177e4SLinus Torvalds 		BUG();
26401da177e4SLinus Torvalds 	}
26411da177e4SLinus Torvalds }
26421da177e4SLinus Torvalds 
verify_redzone_free(struct kmem_cache * cache,void * obj)264358ce1fd5SPekka Enberg static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
264458ce1fd5SPekka Enberg {
2645b46b8f19SDavid Woodhouse 	unsigned long long redzone1, redzone2;
264658ce1fd5SPekka Enberg 
264758ce1fd5SPekka Enberg 	redzone1 = *dbg_redzone1(cache, obj);
264858ce1fd5SPekka Enberg 	redzone2 = *dbg_redzone2(cache, obj);
264958ce1fd5SPekka Enberg 
265058ce1fd5SPekka Enberg 	/*
265158ce1fd5SPekka Enberg 	 * Redzone is ok.
265258ce1fd5SPekka Enberg 	 */
265358ce1fd5SPekka Enberg 	if (redzone1 == RED_ACTIVE && redzone2 == RED_ACTIVE)
265458ce1fd5SPekka Enberg 		return;
265558ce1fd5SPekka Enberg 
265658ce1fd5SPekka Enberg 	if (redzone1 == RED_INACTIVE && redzone2 == RED_INACTIVE)
265758ce1fd5SPekka Enberg 		slab_error(cache, "double free detected");
265858ce1fd5SPekka Enberg 	else
265958ce1fd5SPekka Enberg 		slab_error(cache, "memory outside object was overwritten");
266058ce1fd5SPekka Enberg 
266185c3e4a5SGeert Uytterhoeven 	pr_err("%px: redzone 1:0x%llx, redzone 2:0x%llx\n",
266258ce1fd5SPekka Enberg 	       obj, redzone1, redzone2);
266358ce1fd5SPekka Enberg }
266458ce1fd5SPekka Enberg 
cache_free_debugcheck(struct kmem_cache * cachep,void * objp,unsigned long caller)2665343e0d7aSPekka Enberg static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
26667c0cb9c6SEzequiel Garcia 				   unsigned long caller)
26671da177e4SLinus Torvalds {
26681da177e4SLinus Torvalds 	unsigned int objnr;
26697981e67eSVlastimil Babka 	struct slab *slab;
26701da177e4SLinus Torvalds 
267180cbd911SMatthew Wilcox 	BUG_ON(virt_to_cache(objp) != cachep);
267280cbd911SMatthew Wilcox 
26733dafccf2SManfred Spraul 	objp -= obj_offset(cachep);
26741da177e4SLinus Torvalds 	kfree_debugcheck(objp);
26757981e67eSVlastimil Babka 	slab = virt_to_slab(objp);
26761da177e4SLinus Torvalds 
26771da177e4SLinus Torvalds 	if (cachep->flags & SLAB_RED_ZONE) {
267858ce1fd5SPekka Enberg 		verify_redzone_free(cachep, objp);
26791da177e4SLinus Torvalds 		*dbg_redzone1(cachep, objp) = RED_INACTIVE;
26801da177e4SLinus Torvalds 		*dbg_redzone2(cachep, objp) = RED_INACTIVE;
26811da177e4SLinus Torvalds 	}
26827878c231SQian Cai 	if (cachep->flags & SLAB_STORE_USER)
26837c0cb9c6SEzequiel Garcia 		*dbg_userword(cachep, objp) = (void *)caller;
26841da177e4SLinus Torvalds 
268540f3bf0cSVlastimil Babka 	objnr = obj_to_index(cachep, slab, objp);
26861da177e4SLinus Torvalds 
26871da177e4SLinus Torvalds 	BUG_ON(objnr >= cachep->num);
26887981e67eSVlastimil Babka 	BUG_ON(objp != index_to_obj(cachep, slab, objnr));
26891da177e4SLinus Torvalds 
26901da177e4SLinus Torvalds 	if (cachep->flags & SLAB_POISON) {
26911da177e4SLinus Torvalds 		poison_obj(cachep, objp, POISON_FREE);
269280552f0fSQian Cai 		slab_kernel_map(cachep, objp, 0);
26931da177e4SLinus Torvalds 	}
26941da177e4SLinus Torvalds 	return objp;
26951da177e4SLinus Torvalds }
26961da177e4SLinus Torvalds 
26971da177e4SLinus Torvalds #else
26981da177e4SLinus Torvalds #define kfree_debugcheck(x) do { } while(0)
26991da177e4SLinus Torvalds #define cache_free_debugcheck(x, objp, z) (objp)
27001da177e4SLinus Torvalds #endif
27011da177e4SLinus Torvalds 
fixup_objfreelist_debug(struct kmem_cache * cachep,void ** list)2702b03a017bSJoonsoo Kim static inline void fixup_objfreelist_debug(struct kmem_cache *cachep,
2703b03a017bSJoonsoo Kim 						void **list)
2704b03a017bSJoonsoo Kim {
2705b03a017bSJoonsoo Kim #if DEBUG
2706b03a017bSJoonsoo Kim 	void *next = *list;
2707b03a017bSJoonsoo Kim 	void *objp;
2708b03a017bSJoonsoo Kim 
2709b03a017bSJoonsoo Kim 	while (next) {
2710b03a017bSJoonsoo Kim 		objp = next - obj_offset(cachep);
2711b03a017bSJoonsoo Kim 		next = *(void **)next;
2712b03a017bSJoonsoo Kim 		poison_obj(cachep, objp, POISON_FREE);
2713b03a017bSJoonsoo Kim 	}
2714b03a017bSJoonsoo Kim #endif
2715b03a017bSJoonsoo Kim }
2716b03a017bSJoonsoo Kim 
fixup_slab_list(struct kmem_cache * cachep,struct kmem_cache_node * n,struct slab * slab,void ** list)2717d8410234SJoonsoo Kim static inline void fixup_slab_list(struct kmem_cache *cachep,
27187981e67eSVlastimil Babka 				struct kmem_cache_node *n, struct slab *slab,
2719b03a017bSJoonsoo Kim 				void **list)
2720d8410234SJoonsoo Kim {
2721d8410234SJoonsoo Kim 	/* move slabp to correct slabp list: */
27227981e67eSVlastimil Babka 	list_del(&slab->slab_list);
27237981e67eSVlastimil Babka 	if (slab->active == cachep->num) {
27247981e67eSVlastimil Babka 		list_add(&slab->slab_list, &n->slabs_full);
2725b03a017bSJoonsoo Kim 		if (OBJFREELIST_SLAB(cachep)) {
2726b03a017bSJoonsoo Kim #if DEBUG
2727b03a017bSJoonsoo Kim 			/* Poisoning will be done without holding the lock */
2728b03a017bSJoonsoo Kim 			if (cachep->flags & SLAB_POISON) {
27297981e67eSVlastimil Babka 				void **objp = slab->freelist;
2730b03a017bSJoonsoo Kim 
2731b03a017bSJoonsoo Kim 				*objp = *list;
2732b03a017bSJoonsoo Kim 				*list = objp;
2733b03a017bSJoonsoo Kim 			}
2734b03a017bSJoonsoo Kim #endif
27357981e67eSVlastimil Babka 			slab->freelist = NULL;
2736b03a017bSJoonsoo Kim 		}
2737b03a017bSJoonsoo Kim 	} else
27387981e67eSVlastimil Babka 		list_add(&slab->slab_list, &n->slabs_partial);
2739d8410234SJoonsoo Kim }
2740d8410234SJoonsoo Kim 
2741f68f8dddSJoonsoo Kim /* Try to find non-pfmemalloc slab if needed */
get_valid_first_slab(struct kmem_cache_node * n,struct slab * slab,bool pfmemalloc)27427981e67eSVlastimil Babka static noinline struct slab *get_valid_first_slab(struct kmem_cache_node *n,
27437981e67eSVlastimil Babka 					struct slab *slab, bool pfmemalloc)
2744f68f8dddSJoonsoo Kim {
27457981e67eSVlastimil Babka 	if (!slab)
2746f68f8dddSJoonsoo Kim 		return NULL;
2747f68f8dddSJoonsoo Kim 
2748f68f8dddSJoonsoo Kim 	if (pfmemalloc)
27497981e67eSVlastimil Babka 		return slab;
2750f68f8dddSJoonsoo Kim 
27517981e67eSVlastimil Babka 	if (!slab_test_pfmemalloc(slab))
27527981e67eSVlastimil Babka 		return slab;
2753f68f8dddSJoonsoo Kim 
2754f68f8dddSJoonsoo Kim 	/* No need to keep pfmemalloc slab if we have enough free objects */
2755f68f8dddSJoonsoo Kim 	if (n->free_objects > n->free_limit) {
27567981e67eSVlastimil Babka 		slab_clear_pfmemalloc(slab);
27577981e67eSVlastimil Babka 		return slab;
2758f68f8dddSJoonsoo Kim 	}
2759f68f8dddSJoonsoo Kim 
2760f68f8dddSJoonsoo Kim 	/* Move pfmemalloc slab to the end of list to speed up next search */
27617981e67eSVlastimil Babka 	list_del(&slab->slab_list);
27627981e67eSVlastimil Babka 	if (!slab->active) {
27637981e67eSVlastimil Babka 		list_add_tail(&slab->slab_list, &n->slabs_free);
2764bf00bd34SDavid Rientjes 		n->free_slabs++;
2765f728b0a5SGreg Thelen 	} else
27667981e67eSVlastimil Babka 		list_add_tail(&slab->slab_list, &n->slabs_partial);
2767f68f8dddSJoonsoo Kim 
27687981e67eSVlastimil Babka 	list_for_each_entry(slab, &n->slabs_partial, slab_list) {
27697981e67eSVlastimil Babka 		if (!slab_test_pfmemalloc(slab))
27707981e67eSVlastimil Babka 			return slab;
2771f68f8dddSJoonsoo Kim 	}
2772f68f8dddSJoonsoo Kim 
2773f728b0a5SGreg Thelen 	n->free_touched = 1;
27747981e67eSVlastimil Babka 	list_for_each_entry(slab, &n->slabs_free, slab_list) {
27757981e67eSVlastimil Babka 		if (!slab_test_pfmemalloc(slab)) {
2776bf00bd34SDavid Rientjes 			n->free_slabs--;
27777981e67eSVlastimil Babka 			return slab;
2778f68f8dddSJoonsoo Kim 		}
2779f728b0a5SGreg Thelen 	}
2780f68f8dddSJoonsoo Kim 
2781f68f8dddSJoonsoo Kim 	return NULL;
2782f68f8dddSJoonsoo Kim }
2783f68f8dddSJoonsoo Kim 
get_first_slab(struct kmem_cache_node * n,bool pfmemalloc)27847981e67eSVlastimil Babka static struct slab *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc)
27857aa0d227SGeliang Tang {
27867981e67eSVlastimil Babka 	struct slab *slab;
27877aa0d227SGeliang Tang 
2788b539ce9fSJiri Kosina 	assert_raw_spin_locked(&n->list_lock);
27897981e67eSVlastimil Babka 	slab = list_first_entry_or_null(&n->slabs_partial, struct slab,
279016cb0ec7STobin C. Harding 					slab_list);
27917981e67eSVlastimil Babka 	if (!slab) {
27927aa0d227SGeliang Tang 		n->free_touched = 1;
27937981e67eSVlastimil Babka 		slab = list_first_entry_or_null(&n->slabs_free, struct slab,
279416cb0ec7STobin C. Harding 						slab_list);
27957981e67eSVlastimil Babka 		if (slab)
2796bf00bd34SDavid Rientjes 			n->free_slabs--;
27977aa0d227SGeliang Tang 	}
27987aa0d227SGeliang Tang 
2799f68f8dddSJoonsoo Kim 	if (sk_memalloc_socks())
28007981e67eSVlastimil Babka 		slab = get_valid_first_slab(n, slab, pfmemalloc);
2801f68f8dddSJoonsoo Kim 
28027981e67eSVlastimil Babka 	return slab;
28037aa0d227SGeliang Tang }
28047aa0d227SGeliang Tang 
cache_alloc_pfmemalloc(struct kmem_cache * cachep,struct kmem_cache_node * n,gfp_t flags)2805f68f8dddSJoonsoo Kim static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep,
2806f68f8dddSJoonsoo Kim 				struct kmem_cache_node *n, gfp_t flags)
2807f68f8dddSJoonsoo Kim {
28087981e67eSVlastimil Babka 	struct slab *slab;
2809f68f8dddSJoonsoo Kim 	void *obj;
2810f68f8dddSJoonsoo Kim 	void *list = NULL;
2811f68f8dddSJoonsoo Kim 
2812f68f8dddSJoonsoo Kim 	if (!gfp_pfmemalloc_allowed(flags))
2813f68f8dddSJoonsoo Kim 		return NULL;
2814f68f8dddSJoonsoo Kim 
2815b539ce9fSJiri Kosina 	raw_spin_lock(&n->list_lock);
28167981e67eSVlastimil Babka 	slab = get_first_slab(n, true);
28177981e67eSVlastimil Babka 	if (!slab) {
2818b539ce9fSJiri Kosina 		raw_spin_unlock(&n->list_lock);
2819f68f8dddSJoonsoo Kim 		return NULL;
2820f68f8dddSJoonsoo Kim 	}
2821f68f8dddSJoonsoo Kim 
28227981e67eSVlastimil Babka 	obj = slab_get_obj(cachep, slab);
2823f68f8dddSJoonsoo Kim 	n->free_objects--;
2824f68f8dddSJoonsoo Kim 
28257981e67eSVlastimil Babka 	fixup_slab_list(cachep, n, slab, &list);
2826f68f8dddSJoonsoo Kim 
2827b539ce9fSJiri Kosina 	raw_spin_unlock(&n->list_lock);
2828f68f8dddSJoonsoo Kim 	fixup_objfreelist_debug(cachep, &list);
2829f68f8dddSJoonsoo Kim 
2830f68f8dddSJoonsoo Kim 	return obj;
2831f68f8dddSJoonsoo Kim }
2832f68f8dddSJoonsoo Kim 
2833213b4695SJoonsoo Kim /*
2834213b4695SJoonsoo Kim  * Slab list should be fixed up by fixup_slab_list() for existing slab
2835213b4695SJoonsoo Kim  * or cache_grow_end() for new slab
2836213b4695SJoonsoo Kim  */
alloc_block(struct kmem_cache * cachep,struct array_cache * ac,struct slab * slab,int batchcount)2837213b4695SJoonsoo Kim static __always_inline int alloc_block(struct kmem_cache *cachep,
28387981e67eSVlastimil Babka 		struct array_cache *ac, struct slab *slab, int batchcount)
2839213b4695SJoonsoo Kim {
2840213b4695SJoonsoo Kim 	/*
2841213b4695SJoonsoo Kim 	 * There must be at least one object available for
2842213b4695SJoonsoo Kim 	 * allocation.
2843213b4695SJoonsoo Kim 	 */
28447981e67eSVlastimil Babka 	BUG_ON(slab->active >= cachep->num);
2845213b4695SJoonsoo Kim 
28467981e67eSVlastimil Babka 	while (slab->active < cachep->num && batchcount--) {
2847213b4695SJoonsoo Kim 		STATS_INC_ALLOCED(cachep);
2848213b4695SJoonsoo Kim 		STATS_INC_ACTIVE(cachep);
2849213b4695SJoonsoo Kim 		STATS_SET_HIGH(cachep);
2850213b4695SJoonsoo Kim 
28517981e67eSVlastimil Babka 		ac->entry[ac->avail++] = slab_get_obj(cachep, slab);
2852213b4695SJoonsoo Kim 	}
2853213b4695SJoonsoo Kim 
2854213b4695SJoonsoo Kim 	return batchcount;
2855213b4695SJoonsoo Kim }
2856213b4695SJoonsoo Kim 
cache_alloc_refill(struct kmem_cache * cachep,gfp_t flags)2857f68f8dddSJoonsoo Kim static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
28581da177e4SLinus Torvalds {
28591da177e4SLinus Torvalds 	int batchcount;
2860ce8eb6c4SChristoph Lameter 	struct kmem_cache_node *n;
2861801faf0dSJoonsoo Kim 	struct array_cache *ac, *shared;
28621ca4cb24SPekka Enberg 	int node;
2863b03a017bSJoonsoo Kim 	void *list = NULL;
28647981e67eSVlastimil Babka 	struct slab *slab;
28651ca4cb24SPekka Enberg 
28666d2144d3SJoe Korty 	check_irq_off();
28677d6e6d09SLee Schermerhorn 	node = numa_mem_id();
2868f68f8dddSJoonsoo Kim 
28696d2144d3SJoe Korty 	ac = cpu_cache_get(cachep);
28701da177e4SLinus Torvalds 	batchcount = ac->batchcount;
28711da177e4SLinus Torvalds 	if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
2872a737b3e2SAndrew Morton 		/*
2873a737b3e2SAndrew Morton 		 * If there was little recent activity on this cache, then
2874a737b3e2SAndrew Morton 		 * perform only a partial refill.  Otherwise we could generate
2875a737b3e2SAndrew Morton 		 * refill bouncing.
28761da177e4SLinus Torvalds 		 */
28771da177e4SLinus Torvalds 		batchcount = BATCHREFILL_LIMIT;
28781da177e4SLinus Torvalds 	}
287918bf8541SChristoph Lameter 	n = get_node(cachep, node);
28801da177e4SLinus Torvalds 
2881ce8eb6c4SChristoph Lameter 	BUG_ON(ac->avail > 0 || !n);
2882801faf0dSJoonsoo Kim 	shared = READ_ONCE(n->shared);
2883801faf0dSJoonsoo Kim 	if (!n->free_objects && (!shared || !shared->avail))
2884801faf0dSJoonsoo Kim 		goto direct_grow;
2885801faf0dSJoonsoo Kim 
2886b539ce9fSJiri Kosina 	raw_spin_lock(&n->list_lock);
2887801faf0dSJoonsoo Kim 	shared = READ_ONCE(n->shared);
2888e498be7dSChristoph Lameter 
28893ded175aSChristoph Lameter 	/* See if we can refill from the shared array */
2890801faf0dSJoonsoo Kim 	if (shared && transfer_objects(ac, shared, batchcount)) {
2891801faf0dSJoonsoo Kim 		shared->touched = 1;
28921da177e4SLinus Torvalds 		goto alloc_done;
289344b57f1cSNick Piggin 	}
28943ded175aSChristoph Lameter 
28951da177e4SLinus Torvalds 	while (batchcount > 0) {
28961da177e4SLinus Torvalds 		/* Get slab alloc is to come from. */
28977981e67eSVlastimil Babka 		slab = get_first_slab(n, false);
28987981e67eSVlastimil Babka 		if (!slab)
28991da177e4SLinus Torvalds 			goto must_grow;
29001da177e4SLinus Torvalds 
29011da177e4SLinus Torvalds 		check_spinlock_acquired(cachep);
2902714b8171SPekka Enberg 
29037981e67eSVlastimil Babka 		batchcount = alloc_block(cachep, ac, slab, batchcount);
29047981e67eSVlastimil Babka 		fixup_slab_list(cachep, n, slab, &list);
29051da177e4SLinus Torvalds 	}
29061da177e4SLinus Torvalds 
29071da177e4SLinus Torvalds must_grow:
2908ce8eb6c4SChristoph Lameter 	n->free_objects -= ac->avail;
29091da177e4SLinus Torvalds alloc_done:
2910b539ce9fSJiri Kosina 	raw_spin_unlock(&n->list_lock);
2911b03a017bSJoonsoo Kim 	fixup_objfreelist_debug(cachep, &list);
29121da177e4SLinus Torvalds 
2913801faf0dSJoonsoo Kim direct_grow:
29141da177e4SLinus Torvalds 	if (unlikely(!ac->avail)) {
2915f68f8dddSJoonsoo Kim 		/* Check if we can use obj in pfmemalloc slab */
2916f68f8dddSJoonsoo Kim 		if (sk_memalloc_socks()) {
2917f68f8dddSJoonsoo Kim 			void *obj = cache_alloc_pfmemalloc(cachep, n, flags);
2918f68f8dddSJoonsoo Kim 
2919f68f8dddSJoonsoo Kim 			if (obj)
2920f68f8dddSJoonsoo Kim 				return obj;
2921f68f8dddSJoonsoo Kim 		}
2922f68f8dddSJoonsoo Kim 
29237981e67eSVlastimil Babka 		slab = cache_grow_begin(cachep, gfp_exact_node(flags), node);
29241da177e4SLinus Torvalds 
292576b342bdSJoonsoo Kim 		/*
292676b342bdSJoonsoo Kim 		 * cache_grow_begin() can reenable interrupts,
292776b342bdSJoonsoo Kim 		 * then ac could change.
292876b342bdSJoonsoo Kim 		 */
29299a2dba4bSPekka Enberg 		ac = cpu_cache_get(cachep);
29307981e67eSVlastimil Babka 		if (!ac->avail && slab)
29317981e67eSVlastimil Babka 			alloc_block(cachep, ac, slab, batchcount);
29327981e67eSVlastimil Babka 		cache_grow_end(cachep, slab);
2933072bb0aaSMel Gorman 
2934213b4695SJoonsoo Kim 		if (!ac->avail)
29351da177e4SLinus Torvalds 			return NULL;
29361da177e4SLinus Torvalds 	}
29371da177e4SLinus Torvalds 	ac->touched = 1;
2938072bb0aaSMel Gorman 
2939f68f8dddSJoonsoo Kim 	return ac->entry[--ac->avail];
29401da177e4SLinus Torvalds }
29411da177e4SLinus Torvalds 
29421da177e4SLinus Torvalds #if DEBUG
cache_alloc_debugcheck_after(struct kmem_cache * cachep,gfp_t flags,void * objp,unsigned long caller)2943a737b3e2SAndrew Morton static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
29447c0cb9c6SEzequiel Garcia 				gfp_t flags, void *objp, unsigned long caller)
29451da177e4SLinus Torvalds {
2946128227e7SMatthew Wilcox 	WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO));
2947df3ae2c9SMarco Elver 	if (!objp || is_kfence_address(objp))
29481da177e4SLinus Torvalds 		return objp;
29491da177e4SLinus Torvalds 	if (cachep->flags & SLAB_POISON) {
29501da177e4SLinus Torvalds 		check_poison_obj(cachep, objp);
295180552f0fSQian Cai 		slab_kernel_map(cachep, objp, 1);
29521da177e4SLinus Torvalds 		poison_obj(cachep, objp, POISON_INUSE);
29531da177e4SLinus Torvalds 	}
29541da177e4SLinus Torvalds 	if (cachep->flags & SLAB_STORE_USER)
29557c0cb9c6SEzequiel Garcia 		*dbg_userword(cachep, objp) = (void *)caller;
29561da177e4SLinus Torvalds 
29571da177e4SLinus Torvalds 	if (cachep->flags & SLAB_RED_ZONE) {
2958a737b3e2SAndrew Morton 		if (*dbg_redzone1(cachep, objp) != RED_INACTIVE ||
2959a737b3e2SAndrew Morton 				*dbg_redzone2(cachep, objp) != RED_INACTIVE) {
2960756a025fSJoe Perches 			slab_error(cachep, "double free, or memory outside object was overwritten");
296185c3e4a5SGeert Uytterhoeven 			pr_err("%px: redzone 1:0x%llx, redzone 2:0x%llx\n",
2962b28a02deSPekka Enberg 			       objp, *dbg_redzone1(cachep, objp),
2963b28a02deSPekka Enberg 			       *dbg_redzone2(cachep, objp));
29641da177e4SLinus Torvalds 		}
29651da177e4SLinus Torvalds 		*dbg_redzone1(cachep, objp) = RED_ACTIVE;
29661da177e4SLinus Torvalds 		*dbg_redzone2(cachep, objp) = RED_ACTIVE;
29671da177e4SLinus Torvalds 	}
296803787301SJoonsoo Kim 
29693dafccf2SManfred Spraul 	objp += obj_offset(cachep);
29704f104934SChristoph Lameter 	if (cachep->ctor && cachep->flags & SLAB_POISON)
297151cc5068SAlexey Dobriyan 		cachep->ctor(objp);
2972d949a815SPeter Collingbourne 	if ((unsigned long)objp & (arch_slab_minalign() - 1)) {
2973d949a815SPeter Collingbourne 		pr_err("0x%px: not aligned to arch_slab_minalign()=%u\n", objp,
2974d949a815SPeter Collingbourne 		       arch_slab_minalign());
2975a44b56d3SKevin Hilman 	}
29761da177e4SLinus Torvalds 	return objp;
29771da177e4SLinus Torvalds }
29781da177e4SLinus Torvalds #else
29791da177e4SLinus Torvalds #define cache_alloc_debugcheck_after(a, b, objp, d) (objp)
29801da177e4SLinus Torvalds #endif
29811da177e4SLinus Torvalds 
____cache_alloc(struct kmem_cache * cachep,gfp_t flags)2982343e0d7aSPekka Enberg static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
29831da177e4SLinus Torvalds {
29841da177e4SLinus Torvalds 	void *objp;
29851da177e4SLinus Torvalds 	struct array_cache *ac;
29861da177e4SLinus Torvalds 
29875c382300SAlok N Kataria 	check_irq_off();
29888a8b6502SAkinobu Mita 
29899a2dba4bSPekka Enberg 	ac = cpu_cache_get(cachep);
29901da177e4SLinus Torvalds 	if (likely(ac->avail)) {
29911da177e4SLinus Torvalds 		ac->touched = 1;
2992f68f8dddSJoonsoo Kim 		objp = ac->entry[--ac->avail];
2993072bb0aaSMel Gorman 
2994072bb0aaSMel Gorman 		STATS_INC_ALLOCHIT(cachep);
2995072bb0aaSMel Gorman 		goto out;
2996072bb0aaSMel Gorman 	}
2997072bb0aaSMel Gorman 
29981da177e4SLinus Torvalds 	STATS_INC_ALLOCMISS(cachep);
2999f68f8dddSJoonsoo Kim 	objp = cache_alloc_refill(cachep, flags);
3000ddbf2e83SJ. R. Okajima 	/*
3001ddbf2e83SJ. R. Okajima 	 * the 'ac' may be updated by cache_alloc_refill(),
3002ddbf2e83SJ. R. Okajima 	 * and kmemleak_erase() requires its correct value.
3003ddbf2e83SJ. R. Okajima 	 */
3004ddbf2e83SJ. R. Okajima 	ac = cpu_cache_get(cachep);
3005072bb0aaSMel Gorman 
3006072bb0aaSMel Gorman out:
3007d5cff635SCatalin Marinas 	/*
3008d5cff635SCatalin Marinas 	 * To avoid a false negative, if an object that is in one of the
3009d5cff635SCatalin Marinas 	 * per-CPU caches is leaked, we need to make sure kmemleak doesn't
3010d5cff635SCatalin Marinas 	 * treat the array pointers as a reference to the object.
3011d5cff635SCatalin Marinas 	 */
3012f3d8b53aSJ. R. Okajima 	if (objp)
3013d5cff635SCatalin Marinas 		kmemleak_erase(&ac->entry[ac->avail]);
30145c382300SAlok N Kataria 	return objp;
30155c382300SAlok N Kataria }
30165c382300SAlok N Kataria 
3017e498be7dSChristoph Lameter #ifdef CONFIG_NUMA
30181e703d05SMiaohe Lin static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
30191e703d05SMiaohe Lin 
30201da177e4SLinus Torvalds /*
30212ad654bcSZefan Li  * Try allocating on another node if PFA_SPREAD_SLAB is a mempolicy is set.
3022c61afb18SPaul Jackson  *
3023c61afb18SPaul Jackson  * If we are in_interrupt, then process context, including cpusets and
3024c61afb18SPaul Jackson  * mempolicy, may not apply and should not be used for allocation policy.
3025c61afb18SPaul Jackson  */
alternate_node_alloc(struct kmem_cache * cachep,gfp_t flags)3026c61afb18SPaul Jackson static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
3027c61afb18SPaul Jackson {
3028c61afb18SPaul Jackson 	int nid_alloc, nid_here;
3029c61afb18SPaul Jackson 
3030765c4507SChristoph Lameter 	if (in_interrupt() || (flags & __GFP_THISNODE))
3031c61afb18SPaul Jackson 		return NULL;
30327d6e6d09SLee Schermerhorn 	nid_alloc = nid_here = numa_mem_id();
3033c61afb18SPaul Jackson 	if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
30346adef3ebSJack Steiner 		nid_alloc = cpuset_slab_spread_node();
3035c61afb18SPaul Jackson 	else if (current->mempolicy)
30362a389610SDavid Rientjes 		nid_alloc = mempolicy_slab_node();
3037c61afb18SPaul Jackson 	if (nid_alloc != nid_here)
30388b98c169SChristoph Hellwig 		return ____cache_alloc_node(cachep, flags, nid_alloc);
3039c61afb18SPaul Jackson 	return NULL;
3040c61afb18SPaul Jackson }
3041c61afb18SPaul Jackson 
3042c61afb18SPaul Jackson /*
3043765c4507SChristoph Lameter  * Fallback function if there was no memory available and no objects on a
30443c517a61SChristoph Lameter  * certain node and fall back is permitted. First we scan all the
30456a67368cSChristoph Lameter  * available node for available objects. If that fails then we
30463c517a61SChristoph Lameter  * perform an allocation without specifying a node. This allows the page
30473c517a61SChristoph Lameter  * allocator to do its reclaim / fallback magic. We then insert the
30483c517a61SChristoph Lameter  * slab into the proper nodelist and then allocate from it.
3049765c4507SChristoph Lameter  */
fallback_alloc(struct kmem_cache * cache,gfp_t flags)30508c8cc2c1SPekka Enberg static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3051765c4507SChristoph Lameter {
30528c8cc2c1SPekka Enberg 	struct zonelist *zonelist;
3053dd1a239fSMel Gorman 	struct zoneref *z;
305454a6eb5cSMel Gorman 	struct zone *zone;
305597a225e6SJoonsoo Kim 	enum zone_type highest_zoneidx = gfp_zone(flags);
3056765c4507SChristoph Lameter 	void *obj = NULL;
30577981e67eSVlastimil Babka 	struct slab *slab;
30583c517a61SChristoph Lameter 	int nid;
3059cc9a6c87SMel Gorman 	unsigned int cpuset_mems_cookie;
30608c8cc2c1SPekka Enberg 
30618c8cc2c1SPekka Enberg 	if (flags & __GFP_THISNODE)
30628c8cc2c1SPekka Enberg 		return NULL;
30638c8cc2c1SPekka Enberg 
3064cc9a6c87SMel Gorman retry_cpuset:
3065d26914d1SMel Gorman 	cpuset_mems_cookie = read_mems_allowed_begin();
30662a389610SDavid Rientjes 	zonelist = node_zonelist(mempolicy_slab_node(), flags);
3067cc9a6c87SMel Gorman 
30683c517a61SChristoph Lameter retry:
30693c517a61SChristoph Lameter 	/*
30703c517a61SChristoph Lameter 	 * Look through allowed nodes for objects available
30713c517a61SChristoph Lameter 	 * from existing per node queues.
30723c517a61SChristoph Lameter 	 */
307397a225e6SJoonsoo Kim 	for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) {
307454a6eb5cSMel Gorman 		nid = zone_to_nid(zone);
3075aedb0eb1SChristoph Lameter 
3076061d7074SVladimir Davydov 		if (cpuset_zone_allowed(zone, flags) &&
307718bf8541SChristoph Lameter 			get_node(cache, nid) &&
307818bf8541SChristoph Lameter 			get_node(cache, nid)->free_objects) {
30798b98c169SChristoph Hellwig 				obj = ____cache_alloc_node(cache,
30804167e9b2SDavid Rientjes 					gfp_exact_node(flags), nid);
3081481c5346SChristoph Lameter 				if (obj)
3082481c5346SChristoph Lameter 					break;
3083481c5346SChristoph Lameter 		}
30843c517a61SChristoph Lameter 	}
30853c517a61SChristoph Lameter 
3086cfce6604SChristoph Lameter 	if (!obj) {
30873c517a61SChristoph Lameter 		/*
30883c517a61SChristoph Lameter 		 * This allocation will be performed within the constraints
30893c517a61SChristoph Lameter 		 * of the current cpuset / memory policy requirements.
30903c517a61SChristoph Lameter 		 * We may trigger various forms of reclaim on the allowed
30913c517a61SChristoph Lameter 		 * set and go into memory reserves if necessary.
30923c517a61SChristoph Lameter 		 */
30937981e67eSVlastimil Babka 		slab = cache_grow_begin(cache, flags, numa_mem_id());
30947981e67eSVlastimil Babka 		cache_grow_end(cache, slab);
30957981e67eSVlastimil Babka 		if (slab) {
30967981e67eSVlastimil Babka 			nid = slab_nid(slab);
30973c517a61SChristoph Lameter 			obj = ____cache_alloc_node(cache,
30984167e9b2SDavid Rientjes 				gfp_exact_node(flags), nid);
3099511e3a05SJoonsoo Kim 
31003c517a61SChristoph Lameter 			/*
3101511e3a05SJoonsoo Kim 			 * Another processor may allocate the objects in
3102511e3a05SJoonsoo Kim 			 * the slab since we are not holding any locks.
31033c517a61SChristoph Lameter 			 */
3104511e3a05SJoonsoo Kim 			if (!obj)
31053c517a61SChristoph Lameter 				goto retry;
31063c517a61SChristoph Lameter 		}
3107aedb0eb1SChristoph Lameter 	}
3108cc9a6c87SMel Gorman 
3109d26914d1SMel Gorman 	if (unlikely(!obj && read_mems_allowed_retry(cpuset_mems_cookie)))
3110cc9a6c87SMel Gorman 		goto retry_cpuset;
3111765c4507SChristoph Lameter 	return obj;
3112765c4507SChristoph Lameter }
3113765c4507SChristoph Lameter 
3114765c4507SChristoph Lameter /*
3115a8f23dd1SYixuan Cao  * An interface to enable slab creation on nodeid
31161da177e4SLinus Torvalds  */
____cache_alloc_node(struct kmem_cache * cachep,gfp_t flags,int nodeid)31178b98c169SChristoph Hellwig static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
3118a737b3e2SAndrew Morton 				int nodeid)
3119e498be7dSChristoph Lameter {
31207981e67eSVlastimil Babka 	struct slab *slab;
3121ce8eb6c4SChristoph Lameter 	struct kmem_cache_node *n;
3122213b4695SJoonsoo Kim 	void *obj = NULL;
3123b03a017bSJoonsoo Kim 	void *list = NULL;
31241da177e4SLinus Torvalds 
31257c3fbbddSPaul Mackerras 	VM_BUG_ON(nodeid < 0 || nodeid >= MAX_NUMNODES);
312618bf8541SChristoph Lameter 	n = get_node(cachep, nodeid);
3127ce8eb6c4SChristoph Lameter 	BUG_ON(!n);
3128e498be7dSChristoph Lameter 
3129ca3b9b91SRavikiran G Thirumalai 	check_irq_off();
3130b539ce9fSJiri Kosina 	raw_spin_lock(&n->list_lock);
31317981e67eSVlastimil Babka 	slab = get_first_slab(n, false);
31327981e67eSVlastimil Babka 	if (!slab)
3133e498be7dSChristoph Lameter 		goto must_grow;
3134e498be7dSChristoph Lameter 
3135e498be7dSChristoph Lameter 	check_spinlock_acquired_node(cachep, nodeid);
3136e498be7dSChristoph Lameter 
3137e498be7dSChristoph Lameter 	STATS_INC_NODEALLOCS(cachep);
3138e498be7dSChristoph Lameter 	STATS_INC_ACTIVE(cachep);
3139e498be7dSChristoph Lameter 	STATS_SET_HIGH(cachep);
3140e498be7dSChristoph Lameter 
31417981e67eSVlastimil Babka 	BUG_ON(slab->active == cachep->num);
3142e498be7dSChristoph Lameter 
31437981e67eSVlastimil Babka 	obj = slab_get_obj(cachep, slab);
3144ce8eb6c4SChristoph Lameter 	n->free_objects--;
3145e498be7dSChristoph Lameter 
31467981e67eSVlastimil Babka 	fixup_slab_list(cachep, n, slab, &list);
3147e498be7dSChristoph Lameter 
3148b539ce9fSJiri Kosina 	raw_spin_unlock(&n->list_lock);
3149b03a017bSJoonsoo Kim 	fixup_objfreelist_debug(cachep, &list);
3150213b4695SJoonsoo Kim 	return obj;
3151e498be7dSChristoph Lameter 
3152e498be7dSChristoph Lameter must_grow:
3153b539ce9fSJiri Kosina 	raw_spin_unlock(&n->list_lock);
31547981e67eSVlastimil Babka 	slab = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid);
31557981e67eSVlastimil Babka 	if (slab) {
3156213b4695SJoonsoo Kim 		/* This slab isn't counted yet so don't update free_objects */
31577981e67eSVlastimil Babka 		obj = slab_get_obj(cachep, slab);
3158213b4695SJoonsoo Kim 	}
31597981e67eSVlastimil Babka 	cache_grow_end(cachep, slab);
3160e498be7dSChristoph Lameter 
3161213b4695SJoonsoo Kim 	return obj ? obj : fallback_alloc(cachep, flags);
3162e498be7dSChristoph Lameter }
31638c8cc2c1SPekka Enberg 
31648c8cc2c1SPekka Enberg static __always_inline void *
__do_cache_alloc(struct kmem_cache * cachep,gfp_t flags,int nodeid)3165c31a910cSHyeonggon Yoo __do_cache_alloc(struct kmem_cache *cachep, gfp_t flags, int nodeid)
31668c8cc2c1SPekka Enberg {
3167c31a910cSHyeonggon Yoo 	void *objp = NULL;
3168c31a910cSHyeonggon Yoo 	int slab_node = numa_mem_id();
31698c8cc2c1SPekka Enberg 
3170c31a910cSHyeonggon Yoo 	if (nodeid == NUMA_NO_NODE) {
31712ad654bcSZefan Li 		if (current->mempolicy || cpuset_do_slab_mem_spread()) {
3172c31a910cSHyeonggon Yoo 			objp = alternate_node_alloc(cachep, flags);
31738c8cc2c1SPekka Enberg 			if (objp)
31748c8cc2c1SPekka Enberg 				goto out;
31758c8cc2c1SPekka Enberg 		}
3176c31a910cSHyeonggon Yoo 		/*
3177c31a910cSHyeonggon Yoo 		 * Use the locally cached objects if possible.
3178c31a910cSHyeonggon Yoo 		 * However ____cache_alloc does not allow fallback
3179c31a910cSHyeonggon Yoo 		 * to other nodes. It may fail while we still have
3180c31a910cSHyeonggon Yoo 		 * objects on other nodes available.
3181c31a910cSHyeonggon Yoo 		 */
3182c31a910cSHyeonggon Yoo 		objp = ____cache_alloc(cachep, flags);
3183c31a910cSHyeonggon Yoo 		nodeid = slab_node;
3184c31a910cSHyeonggon Yoo 	} else if (nodeid == slab_node) {
3185c31a910cSHyeonggon Yoo 		objp = ____cache_alloc(cachep, flags);
3186c31a910cSHyeonggon Yoo 	} else if (!get_node(cachep, nodeid)) {
3187c31a910cSHyeonggon Yoo 		/* Node not bootstrapped yet */
3188c31a910cSHyeonggon Yoo 		objp = fallback_alloc(cachep, flags);
3189c31a910cSHyeonggon Yoo 		goto out;
3190c31a910cSHyeonggon Yoo 	}
31918c8cc2c1SPekka Enberg 
31928c8cc2c1SPekka Enberg 	/*
31938c8cc2c1SPekka Enberg 	 * We may just have run out of memory on the local node.
31948c8cc2c1SPekka Enberg 	 * ____cache_alloc_node() knows how to locate memory on other nodes
31958c8cc2c1SPekka Enberg 	 */
31968c8cc2c1SPekka Enberg 	if (!objp)
3197c31a910cSHyeonggon Yoo 		objp = ____cache_alloc_node(cachep, flags, nodeid);
31988c8cc2c1SPekka Enberg out:
31998c8cc2c1SPekka Enberg 	return objp;
32008c8cc2c1SPekka Enberg }
32018c8cc2c1SPekka Enberg #else
32028c8cc2c1SPekka Enberg 
32038c8cc2c1SPekka Enberg static __always_inline void *
__do_cache_alloc(struct kmem_cache * cachep,gfp_t flags,int nodeid __maybe_unused)3204c31a910cSHyeonggon Yoo __do_cache_alloc(struct kmem_cache *cachep, gfp_t flags, int nodeid __maybe_unused)
32058c8cc2c1SPekka Enberg {
32068c8cc2c1SPekka Enberg 	return ____cache_alloc(cachep, flags);
32078c8cc2c1SPekka Enberg }
32088c8cc2c1SPekka Enberg 
32098c8cc2c1SPekka Enberg #endif /* CONFIG_NUMA */
32108c8cc2c1SPekka Enberg 
32118c8cc2c1SPekka Enberg static __always_inline void *
slab_alloc_node(struct kmem_cache * cachep,struct list_lru * lru,gfp_t flags,int nodeid,size_t orig_size,unsigned long caller)321207588d72SHyeonggon Yoo slab_alloc_node(struct kmem_cache *cachep, struct list_lru *lru, gfp_t flags,
321307588d72SHyeonggon Yoo 		int nodeid, size_t orig_size, unsigned long caller)
32148c8cc2c1SPekka Enberg {
32158c8cc2c1SPekka Enberg 	unsigned long save_flags;
32168c8cc2c1SPekka Enberg 	void *objp;
3217964d4bd3SRoman Gushchin 	struct obj_cgroup *objcg = NULL;
3218da844b78SAndrey Konovalov 	bool init = false;
32198c8cc2c1SPekka Enberg 
3220dcce284aSBenjamin Herrenschmidt 	flags &= gfp_allowed_mask;
322188f2ef73SMuchun Song 	cachep = slab_pre_alloc_hook(cachep, lru, &objcg, 1, flags);
3222011eceafSJesper Dangaard Brouer 	if (unlikely(!cachep))
3223824ebef1SAkinobu Mita 		return NULL;
3224824ebef1SAkinobu Mita 
3225d3fb45f3SAlexander Potapenko 	objp = kfence_alloc(cachep, orig_size, flags);
3226d3fb45f3SAlexander Potapenko 	if (unlikely(objp))
3227d3fb45f3SAlexander Potapenko 		goto out;
3228d3fb45f3SAlexander Potapenko 
32298c8cc2c1SPekka Enberg 	local_irq_save(save_flags);
323007588d72SHyeonggon Yoo 	objp = __do_cache_alloc(cachep, flags, nodeid);
32318c8cc2c1SPekka Enberg 	local_irq_restore(save_flags);
32328c8cc2c1SPekka Enberg 	objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
32338c8cc2c1SPekka Enberg 	prefetchw(objp);
3234da844b78SAndrey Konovalov 	init = slab_want_init_on_alloc(flags, cachep);
3235d07dbea4SChristoph Lameter 
3236d3fb45f3SAlexander Potapenko out:
32379ce67395SFeng Tang 	slab_post_alloc_hook(cachep, objcg, flags, 1, &objp, init,
32389ce67395SFeng Tang 				cachep->object_size);
32398c8cc2c1SPekka Enberg 	return objp;
32408c8cc2c1SPekka Enberg }
3241e498be7dSChristoph Lameter 
324207588d72SHyeonggon Yoo static __always_inline void *
slab_alloc(struct kmem_cache * cachep,struct list_lru * lru,gfp_t flags,size_t orig_size,unsigned long caller)324307588d72SHyeonggon Yoo slab_alloc(struct kmem_cache *cachep, struct list_lru *lru, gfp_t flags,
324407588d72SHyeonggon Yoo 	   size_t orig_size, unsigned long caller)
324507588d72SHyeonggon Yoo {
324607588d72SHyeonggon Yoo 	return slab_alloc_node(cachep, lru, flags, NUMA_NO_NODE, orig_size,
324707588d72SHyeonggon Yoo 			       caller);
324807588d72SHyeonggon Yoo }
324907588d72SHyeonggon Yoo 
3250e498be7dSChristoph Lameter /*
32515f0985bbSJianyu Zhan  * Caller needs to acquire correct kmem_cache_node's list_lock
325297654dfaSJoonsoo Kim  * @list: List of detached free slabs should be freed by caller
3253e498be7dSChristoph Lameter  */
free_block(struct kmem_cache * cachep,void ** objpp,int nr_objects,int node,struct list_head * list)325497654dfaSJoonsoo Kim static void free_block(struct kmem_cache *cachep, void **objpp,
325597654dfaSJoonsoo Kim 			int nr_objects, int node, struct list_head *list)
32561da177e4SLinus Torvalds {
32571da177e4SLinus Torvalds 	int i;
325825c063fbSJoonsoo Kim 	struct kmem_cache_node *n = get_node(cachep, node);
32597981e67eSVlastimil Babka 	struct slab *slab;
32606052b788SJoonsoo Kim 
32616052b788SJoonsoo Kim 	n->free_objects += nr_objects;
32621da177e4SLinus Torvalds 
32631da177e4SLinus Torvalds 	for (i = 0; i < nr_objects; i++) {
3264072bb0aaSMel Gorman 		void *objp;
32657981e67eSVlastimil Babka 		struct slab *slab;
32661da177e4SLinus Torvalds 
3267072bb0aaSMel Gorman 		objp = objpp[i];
3268072bb0aaSMel Gorman 
32697981e67eSVlastimil Babka 		slab = virt_to_slab(objp);
32707981e67eSVlastimil Babka 		list_del(&slab->slab_list);
3271ff69416eSChristoph Lameter 		check_spinlock_acquired_node(cachep, node);
32727981e67eSVlastimil Babka 		slab_put_obj(cachep, slab, objp);
32731da177e4SLinus Torvalds 		STATS_DEC_ACTIVE(cachep);
32741da177e4SLinus Torvalds 
32751da177e4SLinus Torvalds 		/* fixup slab chains */
32767981e67eSVlastimil Babka 		if (slab->active == 0) {
32777981e67eSVlastimil Babka 			list_add(&slab->slab_list, &n->slabs_free);
3278f728b0a5SGreg Thelen 			n->free_slabs++;
3279f728b0a5SGreg Thelen 		} else {
32801da177e4SLinus Torvalds 			/* Unconditionally move a slab to the end of the
32811da177e4SLinus Torvalds 			 * partial list on free - maximum time for the
32821da177e4SLinus Torvalds 			 * other objects to be freed, too.
32831da177e4SLinus Torvalds 			 */
32847981e67eSVlastimil Babka 			list_add_tail(&slab->slab_list, &n->slabs_partial);
32851da177e4SLinus Torvalds 		}
32861da177e4SLinus Torvalds 	}
32876052b788SJoonsoo Kim 
32886052b788SJoonsoo Kim 	while (n->free_objects > n->free_limit && !list_empty(&n->slabs_free)) {
32896052b788SJoonsoo Kim 		n->free_objects -= cachep->num;
32906052b788SJoonsoo Kim 
32917981e67eSVlastimil Babka 		slab = list_last_entry(&n->slabs_free, struct slab, slab_list);
32927981e67eSVlastimil Babka 		list_move(&slab->slab_list, list);
3293f728b0a5SGreg Thelen 		n->free_slabs--;
3294bf00bd34SDavid Rientjes 		n->total_slabs--;
32956052b788SJoonsoo Kim 	}
32961da177e4SLinus Torvalds }
32971da177e4SLinus Torvalds 
cache_flusharray(struct kmem_cache * cachep,struct array_cache * ac)3298343e0d7aSPekka Enberg static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
32991da177e4SLinus Torvalds {
33001da177e4SLinus Torvalds 	int batchcount;
3301ce8eb6c4SChristoph Lameter 	struct kmem_cache_node *n;
33027d6e6d09SLee Schermerhorn 	int node = numa_mem_id();
330397654dfaSJoonsoo Kim 	LIST_HEAD(list);
33041da177e4SLinus Torvalds 
33051da177e4SLinus Torvalds 	batchcount = ac->batchcount;
3306260b61ddSJoonsoo Kim 
33071da177e4SLinus Torvalds 	check_irq_off();
330818bf8541SChristoph Lameter 	n = get_node(cachep, node);
3309b539ce9fSJiri Kosina 	raw_spin_lock(&n->list_lock);
3310ce8eb6c4SChristoph Lameter 	if (n->shared) {
3311ce8eb6c4SChristoph Lameter 		struct array_cache *shared_array = n->shared;
33121da177e4SLinus Torvalds 		int max = shared_array->limit - shared_array->avail;
33131da177e4SLinus Torvalds 		if (max) {
33141da177e4SLinus Torvalds 			if (batchcount > max)
33151da177e4SLinus Torvalds 				batchcount = max;
3316e498be7dSChristoph Lameter 			memcpy(&(shared_array->entry[shared_array->avail]),
3317b28a02deSPekka Enberg 			       ac->entry, sizeof(void *) * batchcount);
33181da177e4SLinus Torvalds 			shared_array->avail += batchcount;
33191da177e4SLinus Torvalds 			goto free_done;
33201da177e4SLinus Torvalds 		}
33211da177e4SLinus Torvalds 	}
33221da177e4SLinus Torvalds 
332397654dfaSJoonsoo Kim 	free_block(cachep, ac->entry, batchcount, node, &list);
33241da177e4SLinus Torvalds free_done:
33251da177e4SLinus Torvalds #if STATS
33261da177e4SLinus Torvalds 	{
33271da177e4SLinus Torvalds 		int i = 0;
33287981e67eSVlastimil Babka 		struct slab *slab;
33291da177e4SLinus Torvalds 
33307981e67eSVlastimil Babka 		list_for_each_entry(slab, &n->slabs_free, slab_list) {
33317981e67eSVlastimil Babka 			BUG_ON(slab->active);
33321da177e4SLinus Torvalds 
33331da177e4SLinus Torvalds 			i++;
33341da177e4SLinus Torvalds 		}
33351da177e4SLinus Torvalds 		STATS_SET_FREEABLE(cachep, i);
33361da177e4SLinus Torvalds 	}
33371da177e4SLinus Torvalds #endif
3338b539ce9fSJiri Kosina 	raw_spin_unlock(&n->list_lock);
33391da177e4SLinus Torvalds 	ac->avail -= batchcount;
3340a737b3e2SAndrew Morton 	memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
3341678ff6a7SShakeel Butt 	slabs_destroy(cachep, &list);
33421da177e4SLinus Torvalds }
33431da177e4SLinus Torvalds 
33441da177e4SLinus Torvalds /*
3345a737b3e2SAndrew Morton  * Release an obj back to its cache. If the obj has a constructed state, it must
3346a737b3e2SAndrew Morton  * be in this state _before_ it is released.  Called with disabled ints.
33471da177e4SLinus Torvalds  */
__cache_free(struct kmem_cache * cachep,void * objp,unsigned long caller)3348ee3ce779SDmitry Vyukov static __always_inline void __cache_free(struct kmem_cache *cachep, void *objp,
33497c0cb9c6SEzequiel Garcia 					 unsigned long caller)
33501da177e4SLinus Torvalds {
3351d57a964eSAndrey Konovalov 	bool init;
3352d57a964eSAndrey Konovalov 
3353b77d5b1bSMuchun Song 	memcg_slab_free_hook(cachep, virt_to_slab(objp), &objp, 1);
3354b77d5b1bSMuchun Song 
3355d3fb45f3SAlexander Potapenko 	if (is_kfence_address(objp)) {
3356d3fb45f3SAlexander Potapenko 		kmemleak_free_recursive(objp, cachep->flags);
3357d3fb45f3SAlexander Potapenko 		__kfence_free(objp);
3358d3fb45f3SAlexander Potapenko 		return;
3359d3fb45f3SAlexander Potapenko 	}
3360d3fb45f3SAlexander Potapenko 
3361d57a964eSAndrey Konovalov 	/*
3362d57a964eSAndrey Konovalov 	 * As memory initialization might be integrated into KASAN,
3363d57a964eSAndrey Konovalov 	 * kasan_slab_free and initialization memset must be
3364d57a964eSAndrey Konovalov 	 * kept together to avoid discrepancies in behavior.
3365d57a964eSAndrey Konovalov 	 */
3366d57a964eSAndrey Konovalov 	init = slab_want_init_on_free(cachep);
3367d57a964eSAndrey Konovalov 	if (init && !kasan_has_integrated_init())
3368a32d654dSAlexander Popov 		memset(objp, 0, cachep->object_size);
3369d57a964eSAndrey Konovalov 	/* KASAN might put objp into memory quarantine, delaying its reuse. */
3370d57a964eSAndrey Konovalov 	if (kasan_slab_free(cachep, objp, init))
337155834c59SAlexander Potapenko 		return;
33721da177e4SLinus Torvalds 
3373cfbe1636SMarco Elver 	/* Use KCSAN to help debug racy use-after-free. */
3374cfbe1636SMarco Elver 	if (!(cachep->flags & SLAB_TYPESAFE_BY_RCU))
3375cfbe1636SMarco Elver 		__kcsan_check_access(objp, cachep->object_size,
3376cfbe1636SMarco Elver 				     KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT);
3377cfbe1636SMarco Elver 
337855834c59SAlexander Potapenko 	___cache_free(cachep, objp, caller);
337955834c59SAlexander Potapenko }
338055834c59SAlexander Potapenko 
___cache_free(struct kmem_cache * cachep,void * objp,unsigned long caller)338155834c59SAlexander Potapenko void ___cache_free(struct kmem_cache *cachep, void *objp,
338255834c59SAlexander Potapenko 		unsigned long caller)
338355834c59SAlexander Potapenko {
338455834c59SAlexander Potapenko 	struct array_cache *ac = cpu_cache_get(cachep);
33857ed2f9e6SAlexander Potapenko 
33861da177e4SLinus Torvalds 	check_irq_off();
3387d5cff635SCatalin Marinas 	kmemleak_free_recursive(objp, cachep->flags);
3388a947eb95SSuleiman Souhlal 	objp = cache_free_debugcheck(cachep, objp, caller);
33891da177e4SLinus Torvalds 
33901807a1aaSSiddha, Suresh B 	/*
33911807a1aaSSiddha, Suresh B 	 * Skip calling cache_free_alien() when the platform is not numa.
33921807a1aaSSiddha, Suresh B 	 * This will avoid cache misses that happen while accessing slabp (which
33931807a1aaSSiddha, Suresh B 	 * is per page memory  reference) to get nodeid. Instead use a global
33941807a1aaSSiddha, Suresh B 	 * variable to skip the call, which is mostly likely to be present in
33951807a1aaSSiddha, Suresh B 	 * the cache.
33961807a1aaSSiddha, Suresh B 	 */
3397b6e68bc1SMel Gorman 	if (nr_online_nodes > 1 && cache_free_alien(cachep, objp))
3398e498be7dSChristoph Lameter 		return;
3399729bd0b7SPekka Enberg 
34003d880194SJoonsoo Kim 	if (ac->avail < ac->limit) {
34011da177e4SLinus Torvalds 		STATS_INC_FREEHIT(cachep);
34021da177e4SLinus Torvalds 	} else {
34031da177e4SLinus Torvalds 		STATS_INC_FREEMISS(cachep);
34041da177e4SLinus Torvalds 		cache_flusharray(cachep, ac);
34051da177e4SLinus Torvalds 	}
340642c8c99cSZhao Jin 
3407f68f8dddSJoonsoo Kim 	if (sk_memalloc_socks()) {
34087981e67eSVlastimil Babka 		struct slab *slab = virt_to_slab(objp);
3409f68f8dddSJoonsoo Kim 
34107981e67eSVlastimil Babka 		if (unlikely(slab_test_pfmemalloc(slab))) {
34117981e67eSVlastimil Babka 			cache_free_pfmemalloc(cachep, slab, objp);
3412f68f8dddSJoonsoo Kim 			return;
3413f68f8dddSJoonsoo Kim 		}
3414f68f8dddSJoonsoo Kim 	}
3415f68f8dddSJoonsoo Kim 
3416dabc3e29SKees Cook 	__free_one(ac, objp);
34171da177e4SLinus Torvalds }
34181da177e4SLinus Torvalds 
341988f2ef73SMuchun Song static __always_inline
__kmem_cache_alloc_lru(struct kmem_cache * cachep,struct list_lru * lru,gfp_t flags)342088f2ef73SMuchun Song void *__kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
342188f2ef73SMuchun Song 			     gfp_t flags)
342288f2ef73SMuchun Song {
342388f2ef73SMuchun Song 	void *ret = slab_alloc(cachep, lru, flags, cachep->object_size, _RET_IP_);
342488f2ef73SMuchun Song 
34252c1d697fSHyeonggon Yoo 	trace_kmem_cache_alloc(_RET_IP_, ret, cachep, flags, NUMA_NO_NODE);
342688f2ef73SMuchun Song 
342788f2ef73SMuchun Song 	return ret;
342888f2ef73SMuchun Song }
342988f2ef73SMuchun Song 
kmem_cache_alloc(struct kmem_cache * cachep,gfp_t flags)3430343e0d7aSPekka Enberg void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
34311da177e4SLinus Torvalds {
343288f2ef73SMuchun Song 	return __kmem_cache_alloc_lru(cachep, NULL, flags);
34331da177e4SLinus Torvalds }
34341da177e4SLinus Torvalds EXPORT_SYMBOL(kmem_cache_alloc);
34351da177e4SLinus Torvalds 
kmem_cache_alloc_lru(struct kmem_cache * cachep,struct list_lru * lru,gfp_t flags)343688f2ef73SMuchun Song void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
343788f2ef73SMuchun Song 			   gfp_t flags)
343888f2ef73SMuchun Song {
343988f2ef73SMuchun Song 	return __kmem_cache_alloc_lru(cachep, lru, flags);
344088f2ef73SMuchun Song }
344188f2ef73SMuchun Song EXPORT_SYMBOL(kmem_cache_alloc_lru);
344288f2ef73SMuchun Song 
34437b0501ddSJesper Dangaard Brouer static __always_inline void
cache_alloc_debugcheck_after_bulk(struct kmem_cache * s,gfp_t flags,size_t size,void ** p,unsigned long caller)34447b0501ddSJesper Dangaard Brouer cache_alloc_debugcheck_after_bulk(struct kmem_cache *s, gfp_t flags,
34457b0501ddSJesper Dangaard Brouer 				  size_t size, void **p, unsigned long caller)
34467b0501ddSJesper Dangaard Brouer {
34477b0501ddSJesper Dangaard Brouer 	size_t i;
34487b0501ddSJesper Dangaard Brouer 
34497b0501ddSJesper Dangaard Brouer 	for (i = 0; i < size; i++)
34507b0501ddSJesper Dangaard Brouer 		p[i] = cache_alloc_debugcheck_after(s, flags, p[i], caller);
34517b0501ddSJesper Dangaard Brouer }
34527b0501ddSJesper Dangaard Brouer 
kmem_cache_alloc_bulk(struct kmem_cache * s,gfp_t flags,size_t size,void ** p)3453865762a8SJesper Dangaard Brouer int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
3454484748f0SChristoph Lameter 			  void **p)
3455484748f0SChristoph Lameter {
3456964d4bd3SRoman Gushchin 	struct obj_cgroup *objcg = NULL;
3457f5451547SThomas Gleixner 	unsigned long irqflags;
3458f5451547SThomas Gleixner 	size_t i;
34592a777eacSJesper Dangaard Brouer 
346088f2ef73SMuchun Song 	s = slab_pre_alloc_hook(s, NULL, &objcg, size, flags);
34612a777eacSJesper Dangaard Brouer 	if (!s)
34622a777eacSJesper Dangaard Brouer 		return 0;
34632a777eacSJesper Dangaard Brouer 
3464f5451547SThomas Gleixner 	local_irq_save(irqflags);
34652a777eacSJesper Dangaard Brouer 	for (i = 0; i < size; i++) {
3466c31a910cSHyeonggon Yoo 		void *objp = kfence_alloc(s, s->object_size, flags) ?:
3467c31a910cSHyeonggon Yoo 			     __do_cache_alloc(s, flags, NUMA_NO_NODE);
34682a777eacSJesper Dangaard Brouer 
34692a777eacSJesper Dangaard Brouer 		if (unlikely(!objp))
34702a777eacSJesper Dangaard Brouer 			goto error;
34712a777eacSJesper Dangaard Brouer 		p[i] = objp;
34722a777eacSJesper Dangaard Brouer 	}
3473f5451547SThomas Gleixner 	local_irq_restore(irqflags);
34742a777eacSJesper Dangaard Brouer 
34757b0501ddSJesper Dangaard Brouer 	cache_alloc_debugcheck_after_bulk(s, flags, size, p, _RET_IP_);
34767b0501ddSJesper Dangaard Brouer 
3477da844b78SAndrey Konovalov 	/*
3478da844b78SAndrey Konovalov 	 * memcg and kmem_cache debug support and memory initialization.
3479da844b78SAndrey Konovalov 	 * Done outside of the IRQ disabled section.
3480da844b78SAndrey Konovalov 	 */
3481da844b78SAndrey Konovalov 	slab_post_alloc_hook(s, objcg, flags, size, p,
34829ce67395SFeng Tang 			slab_want_init_on_alloc(flags, s), s->object_size);
34832a777eacSJesper Dangaard Brouer 	/* FIXME: Trace call missing. Christoph would like a bulk variant */
34842a777eacSJesper Dangaard Brouer 	return size;
34852a777eacSJesper Dangaard Brouer error:
3486f5451547SThomas Gleixner 	local_irq_restore(irqflags);
34877b0501ddSJesper Dangaard Brouer 	cache_alloc_debugcheck_after_bulk(s, flags, i, p, _RET_IP_);
34889ce67395SFeng Tang 	slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
34892055e67bSHyeonggon Yoo 	kmem_cache_free_bulk(s, i, p);
34902a777eacSJesper Dangaard Brouer 	return 0;
3491484748f0SChristoph Lameter }
3492484748f0SChristoph Lameter EXPORT_SYMBOL(kmem_cache_alloc_bulk);
3493484748f0SChristoph Lameter 
3494d0d04b78SZhouping Liu /**
3495d0d04b78SZhouping Liu  * kmem_cache_alloc_node - Allocate an object on the specified node
3496d0d04b78SZhouping Liu  * @cachep: The cache to allocate from.
3497d0d04b78SZhouping Liu  * @flags: See kmalloc().
3498d0d04b78SZhouping Liu  * @nodeid: node number of the target node.
3499d0d04b78SZhouping Liu  *
3500d0d04b78SZhouping Liu  * Identical to kmem_cache_alloc but it will allocate memory on the given
3501d0d04b78SZhouping Liu  * node, which can improve the performance for cpu bound structures.
3502d0d04b78SZhouping Liu  *
3503d0d04b78SZhouping Liu  * Fallback to other node is possible if __GFP_THISNODE is not set.
3504a862f68aSMike Rapoport  *
3505a862f68aSMike Rapoport  * Return: pointer to the new object or %NULL in case of error
3506d0d04b78SZhouping Liu  */
kmem_cache_alloc_node(struct kmem_cache * cachep,gfp_t flags,int nodeid)35078b98c169SChristoph Hellwig void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
35088b98c169SChristoph Hellwig {
350907588d72SHyeonggon Yoo 	void *ret = slab_alloc_node(cachep, NULL, flags, nodeid, cachep->object_size, _RET_IP_);
351036555751SEduard - Gabriel Munteanu 
35112c1d697fSHyeonggon Yoo 	trace_kmem_cache_alloc(_RET_IP_, ret, cachep, flags, nodeid);
351236555751SEduard - Gabriel Munteanu 
351336555751SEduard - Gabriel Munteanu 	return ret;
351436555751SEduard - Gabriel Munteanu }
351536555751SEduard - Gabriel Munteanu EXPORT_SYMBOL(kmem_cache_alloc_node);
351636555751SEduard - Gabriel Munteanu 
__kmem_cache_alloc_node(struct kmem_cache * cachep,gfp_t flags,int nodeid,size_t orig_size,unsigned long caller)3517ed4cd17eSHyeonggon Yoo void *__kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
3518ed4cd17eSHyeonggon Yoo 			     int nodeid, size_t orig_size,
3519ed4cd17eSHyeonggon Yoo 			     unsigned long caller)
3520ed4cd17eSHyeonggon Yoo {
3521ed4cd17eSHyeonggon Yoo 	return slab_alloc_node(cachep, NULL, flags, nodeid,
3522ed4cd17eSHyeonggon Yoo 			       orig_size, caller);
3523ed4cd17eSHyeonggon Yoo }
3524ed4cd17eSHyeonggon Yoo 
35255bb1bb35SPaul E. McKenney #ifdef CONFIG_PRINTK
__kmem_obj_info(struct kmem_obj_info * kpp,void * object,struct slab * slab)35262dfe63e6SMarco Elver void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
35278e7f37f2SPaul E. McKenney {
35288e7f37f2SPaul E. McKenney 	struct kmem_cache *cachep;
35298e7f37f2SPaul E. McKenney 	unsigned int objnr;
35308e7f37f2SPaul E. McKenney 	void *objp;
35318e7f37f2SPaul E. McKenney 
35328e7f37f2SPaul E. McKenney 	kpp->kp_ptr = object;
35337213230aSMatthew Wilcox (Oracle) 	kpp->kp_slab = slab;
35347213230aSMatthew Wilcox (Oracle) 	cachep = slab->slab_cache;
35358e7f37f2SPaul E. McKenney 	kpp->kp_slab_cache = cachep;
35368e7f37f2SPaul E. McKenney 	objp = object - obj_offset(cachep);
35378e7f37f2SPaul E. McKenney 	kpp->kp_data_offset = obj_offset(cachep);
35387213230aSMatthew Wilcox (Oracle) 	slab = virt_to_slab(objp);
353940f3bf0cSVlastimil Babka 	objnr = obj_to_index(cachep, slab, objp);
35407981e67eSVlastimil Babka 	objp = index_to_obj(cachep, slab, objnr);
35418e7f37f2SPaul E. McKenney 	kpp->kp_objp = objp;
35428e7f37f2SPaul E. McKenney 	if (DEBUG && cachep->flags & SLAB_STORE_USER)
35438e7f37f2SPaul E. McKenney 		kpp->kp_ret = *dbg_userword(cachep, objp);
35448e7f37f2SPaul E. McKenney }
35455bb1bb35SPaul E. McKenney #endif
35468e7f37f2SPaul E. McKenney 
3547ed4cd17eSHyeonggon Yoo static __always_inline
__do_kmem_cache_free(struct kmem_cache * cachep,void * objp,unsigned long caller)3548ed4cd17eSHyeonggon Yoo void __do_kmem_cache_free(struct kmem_cache *cachep, void *objp,
3549ed4cd17eSHyeonggon Yoo 			  unsigned long caller)
3550ed4cd17eSHyeonggon Yoo {
3551ed4cd17eSHyeonggon Yoo 	unsigned long flags;
3552ed4cd17eSHyeonggon Yoo 
3553ed4cd17eSHyeonggon Yoo 	local_irq_save(flags);
3554ed4cd17eSHyeonggon Yoo 	debug_check_no_locks_freed(objp, cachep->object_size);
3555ed4cd17eSHyeonggon Yoo 	if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
3556ed4cd17eSHyeonggon Yoo 		debug_check_no_obj_freed(objp, cachep->object_size);
3557ed4cd17eSHyeonggon Yoo 	__cache_free(cachep, objp, caller);
3558ed4cd17eSHyeonggon Yoo 	local_irq_restore(flags);
3559ed4cd17eSHyeonggon Yoo }
3560ed4cd17eSHyeonggon Yoo 
__kmem_cache_free(struct kmem_cache * cachep,void * objp,unsigned long caller)3561ed4cd17eSHyeonggon Yoo void __kmem_cache_free(struct kmem_cache *cachep, void *objp,
3562ed4cd17eSHyeonggon Yoo 		       unsigned long caller)
3563ed4cd17eSHyeonggon Yoo {
3564ed4cd17eSHyeonggon Yoo 	__do_kmem_cache_free(cachep, objp, caller);
3565ed4cd17eSHyeonggon Yoo }
3566ed4cd17eSHyeonggon Yoo 
35671da177e4SLinus Torvalds /**
35681da177e4SLinus Torvalds  * kmem_cache_free - Deallocate an object
35691da177e4SLinus Torvalds  * @cachep: The cache the allocation was from.
35701da177e4SLinus Torvalds  * @objp: The previously allocated object.
35711da177e4SLinus Torvalds  *
35721da177e4SLinus Torvalds  * Free an object which was previously allocated from this
35731da177e4SLinus Torvalds  * cache.
35741da177e4SLinus Torvalds  */
kmem_cache_free(struct kmem_cache * cachep,void * objp)3575343e0d7aSPekka Enberg void kmem_cache_free(struct kmem_cache *cachep, void *objp)
35761da177e4SLinus Torvalds {
3577b9ce5ef4SGlauber Costa 	cachep = cache_from_obj(cachep, objp);
3578b9ce5ef4SGlauber Costa 	if (!cachep)
3579b9ce5ef4SGlauber Costa 		return;
35801da177e4SLinus Torvalds 
35812c1d697fSHyeonggon Yoo 	trace_kmem_cache_free(_RET_IP_, objp, cachep);
3582ed4cd17eSHyeonggon Yoo 	__do_kmem_cache_free(cachep, objp, _RET_IP_);
35831da177e4SLinus Torvalds }
35841da177e4SLinus Torvalds EXPORT_SYMBOL(kmem_cache_free);
35851da177e4SLinus Torvalds 
kmem_cache_free_bulk(struct kmem_cache * orig_s,size_t size,void ** p)3586e6cdb58dSJesper Dangaard Brouer void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
3587e6cdb58dSJesper Dangaard Brouer {
3588f5451547SThomas Gleixner 	unsigned long flags;
3589e6cdb58dSJesper Dangaard Brouer 
3590f5451547SThomas Gleixner 	local_irq_save(flags);
3591d6a71648SHyeonggon Yoo 	for (int i = 0; i < size; i++) {
3592e6cdb58dSJesper Dangaard Brouer 		void *objp = p[i];
3593d6a71648SHyeonggon Yoo 		struct kmem_cache *s;
3594e6cdb58dSJesper Dangaard Brouer 
3595d6a71648SHyeonggon Yoo 		if (!orig_s) {
3596d6a71648SHyeonggon Yoo 			struct folio *folio = virt_to_folio(objp);
3597d6a71648SHyeonggon Yoo 
3598d6a71648SHyeonggon Yoo 			/* called via kfree_bulk */
3599d6a71648SHyeonggon Yoo 			if (!folio_test_slab(folio)) {
3600f5451547SThomas Gleixner 				local_irq_restore(flags);
3601d6a71648SHyeonggon Yoo 				free_large_kmalloc(folio, objp);
3602f5451547SThomas Gleixner 				local_irq_save(flags);
3603d6a71648SHyeonggon Yoo 				continue;
3604d6a71648SHyeonggon Yoo 			}
3605d6a71648SHyeonggon Yoo 			s = folio_slab(folio)->slab_cache;
3606d6a71648SHyeonggon Yoo 		} else {
3607e6cdb58dSJesper Dangaard Brouer 			s = cache_from_obj(orig_s, objp);
3608d6a71648SHyeonggon Yoo 		}
3609d6a71648SHyeonggon Yoo 
3610a64b5378SKees Cook 		if (!s)
3611a64b5378SKees Cook 			continue;
3612e6cdb58dSJesper Dangaard Brouer 
3613e6cdb58dSJesper Dangaard Brouer 		debug_check_no_locks_freed(objp, s->object_size);
3614e6cdb58dSJesper Dangaard Brouer 		if (!(s->flags & SLAB_DEBUG_OBJECTS))
3615e6cdb58dSJesper Dangaard Brouer 			debug_check_no_obj_freed(objp, s->object_size);
3616e6cdb58dSJesper Dangaard Brouer 
3617e6cdb58dSJesper Dangaard Brouer 		__cache_free(s, objp, _RET_IP_);
3618e6cdb58dSJesper Dangaard Brouer 	}
3619f5451547SThomas Gleixner 	local_irq_restore(flags);
3620e6cdb58dSJesper Dangaard Brouer 
3621e6cdb58dSJesper Dangaard Brouer 	/* FIXME: add tracing */
3622e6cdb58dSJesper Dangaard Brouer }
3623e6cdb58dSJesper Dangaard Brouer EXPORT_SYMBOL(kmem_cache_free_bulk);
3624e6cdb58dSJesper Dangaard Brouer 
3625e498be7dSChristoph Lameter /*
3626ce8eb6c4SChristoph Lameter  * This initializes kmem_cache_node or resizes various caches for all nodes.
3627e498be7dSChristoph Lameter  */
setup_kmem_cache_nodes(struct kmem_cache * cachep,gfp_t gfp)3628c3d332b6SJoonsoo Kim static int setup_kmem_cache_nodes(struct kmem_cache *cachep, gfp_t gfp)
3629e498be7dSChristoph Lameter {
3630c3d332b6SJoonsoo Kim 	int ret;
3631e498be7dSChristoph Lameter 	int node;
3632ce8eb6c4SChristoph Lameter 	struct kmem_cache_node *n;
3633e498be7dSChristoph Lameter 
36349c09a95cSMel Gorman 	for_each_online_node(node) {
3635c3d332b6SJoonsoo Kim 		ret = setup_kmem_cache_node(cachep, node, gfp, true);
3636c3d332b6SJoonsoo Kim 		if (ret)
3637e498be7dSChristoph Lameter 			goto fail;
3638c3d332b6SJoonsoo Kim 
36393395ee05SPaul Menage 	}
3640cafeb02eSChristoph Lameter 
3641cafeb02eSChristoph Lameter 	return 0;
36420718dc2aSChristoph Lameter 
3643e498be7dSChristoph Lameter fail:
36443b0efdfaSChristoph Lameter 	if (!cachep->list.next) {
36450718dc2aSChristoph Lameter 		/* Cache is not active yet. Roll back what we did */
36460718dc2aSChristoph Lameter 		node--;
36470718dc2aSChristoph Lameter 		while (node >= 0) {
364818bf8541SChristoph Lameter 			n = get_node(cachep, node);
364918bf8541SChristoph Lameter 			if (n) {
3650ce8eb6c4SChristoph Lameter 				kfree(n->shared);
3651ce8eb6c4SChristoph Lameter 				free_alien_cache(n->alien);
3652ce8eb6c4SChristoph Lameter 				kfree(n);
36536a67368cSChristoph Lameter 				cachep->node[node] = NULL;
36540718dc2aSChristoph Lameter 			}
36550718dc2aSChristoph Lameter 			node--;
36560718dc2aSChristoph Lameter 		}
36570718dc2aSChristoph Lameter 	}
3658cafeb02eSChristoph Lameter 	return -ENOMEM;
3659e498be7dSChristoph Lameter }
3660e498be7dSChristoph Lameter 
366118004c5dSChristoph Lameter /* Always called with the slab_mutex held */
do_tune_cpucache(struct kmem_cache * cachep,int limit,int batchcount,int shared,gfp_t gfp)366210befea9SRoman Gushchin static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
366383b519e8SPekka Enberg 			    int batchcount, int shared, gfp_t gfp)
36641da177e4SLinus Torvalds {
3665bf0dea23SJoonsoo Kim 	struct array_cache __percpu *cpu_cache, *prev;
3666bf0dea23SJoonsoo Kim 	int cpu;
36671da177e4SLinus Torvalds 
3668bf0dea23SJoonsoo Kim 	cpu_cache = alloc_kmem_cache_cpus(cachep, limit, batchcount);
3669bf0dea23SJoonsoo Kim 	if (!cpu_cache)
3670d2e7b7d0SSiddha, Suresh B 		return -ENOMEM;
3671d2e7b7d0SSiddha, Suresh B 
3672bf0dea23SJoonsoo Kim 	prev = cachep->cpu_cache;
3673bf0dea23SJoonsoo Kim 	cachep->cpu_cache = cpu_cache;
3674a87c75fbSGreg Thelen 	/*
3675a87c75fbSGreg Thelen 	 * Without a previous cpu_cache there's no need to synchronize remote
3676a87c75fbSGreg Thelen 	 * cpus, so skip the IPIs.
3677a87c75fbSGreg Thelen 	 */
3678a87c75fbSGreg Thelen 	if (prev)
3679bf0dea23SJoonsoo Kim 		kick_all_cpus_sync();
36801da177e4SLinus Torvalds 
36811da177e4SLinus Torvalds 	check_irq_on();
36821da177e4SLinus Torvalds 	cachep->batchcount = batchcount;
36831da177e4SLinus Torvalds 	cachep->limit = limit;
3684e498be7dSChristoph Lameter 	cachep->shared = shared;
36851da177e4SLinus Torvalds 
3686bf0dea23SJoonsoo Kim 	if (!prev)
3687c3d332b6SJoonsoo Kim 		goto setup_node;
3688bf0dea23SJoonsoo Kim 
3689bf0dea23SJoonsoo Kim 	for_each_online_cpu(cpu) {
369097654dfaSJoonsoo Kim 		LIST_HEAD(list);
369118bf8541SChristoph Lameter 		int node;
369218bf8541SChristoph Lameter 		struct kmem_cache_node *n;
3693bf0dea23SJoonsoo Kim 		struct array_cache *ac = per_cpu_ptr(prev, cpu);
369418bf8541SChristoph Lameter 
3695bf0dea23SJoonsoo Kim 		node = cpu_to_mem(cpu);
369618bf8541SChristoph Lameter 		n = get_node(cachep, node);
3697b539ce9fSJiri Kosina 		raw_spin_lock_irq(&n->list_lock);
3698bf0dea23SJoonsoo Kim 		free_block(cachep, ac->entry, ac->avail, node, &list);
3699b539ce9fSJiri Kosina 		raw_spin_unlock_irq(&n->list_lock);
370097654dfaSJoonsoo Kim 		slabs_destroy(cachep, &list);
37011da177e4SLinus Torvalds 	}
3702bf0dea23SJoonsoo Kim 	free_percpu(prev);
3703bf0dea23SJoonsoo Kim 
3704c3d332b6SJoonsoo Kim setup_node:
3705c3d332b6SJoonsoo Kim 	return setup_kmem_cache_nodes(cachep, gfp);
37061da177e4SLinus Torvalds }
37071da177e4SLinus Torvalds 
370818004c5dSChristoph Lameter /* Called with slab_mutex held always */
enable_cpucache(struct kmem_cache * cachep,gfp_t gfp)370983b519e8SPekka Enberg static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
37101da177e4SLinus Torvalds {
37111da177e4SLinus Torvalds 	int err;
3712943a451aSGlauber Costa 	int limit = 0;
3713943a451aSGlauber Costa 	int shared = 0;
3714943a451aSGlauber Costa 	int batchcount = 0;
37151da177e4SLinus Torvalds 
37167c00fce9SThomas Garnier 	err = cache_random_seq_create(cachep, cachep->num, gfp);
3717c7ce4f60SThomas Garnier 	if (err)
3718c7ce4f60SThomas Garnier 		goto end;
3719c7ce4f60SThomas Garnier 
3720a737b3e2SAndrew Morton 	/*
3721a737b3e2SAndrew Morton 	 * The head array serves three purposes:
37221da177e4SLinus Torvalds 	 * - create a LIFO ordering, i.e. return objects that are cache-warm
37231da177e4SLinus Torvalds 	 * - reduce the number of spinlock operations.
37241da177e4SLinus Torvalds 	 * - reduce the number of linked list operations on the slab and
37251da177e4SLinus Torvalds 	 *   bufctl chains: array operations are cheaper.
37261da177e4SLinus Torvalds 	 * The numbers are guessed, we should auto-tune as described by
37271da177e4SLinus Torvalds 	 * Bonwick.
37281da177e4SLinus Torvalds 	 */
37293b0efdfaSChristoph Lameter 	if (cachep->size > 131072)
37301da177e4SLinus Torvalds 		limit = 1;
37313b0efdfaSChristoph Lameter 	else if (cachep->size > PAGE_SIZE)
37321da177e4SLinus Torvalds 		limit = 8;
37333b0efdfaSChristoph Lameter 	else if (cachep->size > 1024)
37341da177e4SLinus Torvalds 		limit = 24;
37353b0efdfaSChristoph Lameter 	else if (cachep->size > 256)
37361da177e4SLinus Torvalds 		limit = 54;
37371da177e4SLinus Torvalds 	else
37381da177e4SLinus Torvalds 		limit = 120;
37391da177e4SLinus Torvalds 
3740a737b3e2SAndrew Morton 	/*
3741a737b3e2SAndrew Morton 	 * CPU bound tasks (e.g. network routing) can exhibit cpu bound
37421da177e4SLinus Torvalds 	 * allocation behaviour: Most allocs on one cpu, most free operations
37431da177e4SLinus Torvalds 	 * on another cpu. For these cases, an efficient object passing between
37441da177e4SLinus Torvalds 	 * cpus is necessary. This is provided by a shared array. The array
37451da177e4SLinus Torvalds 	 * replaces Bonwick's magazine layer.
37461da177e4SLinus Torvalds 	 * On uniprocessor, it's functionally equivalent (but less efficient)
37471da177e4SLinus Torvalds 	 * to a larger limit. Thus disabled by default.
37481da177e4SLinus Torvalds 	 */
37491da177e4SLinus Torvalds 	shared = 0;
37503b0efdfaSChristoph Lameter 	if (cachep->size <= PAGE_SIZE && num_possible_cpus() > 1)
37511da177e4SLinus Torvalds 		shared = 8;
37521da177e4SLinus Torvalds 
37531da177e4SLinus Torvalds #if DEBUG
3754a737b3e2SAndrew Morton 	/*
3755a737b3e2SAndrew Morton 	 * With debugging enabled, large batchcount lead to excessively long
3756a737b3e2SAndrew Morton 	 * periods with disabled local interrupts. Limit the batchcount
37571da177e4SLinus Torvalds 	 */
37581da177e4SLinus Torvalds 	if (limit > 32)
37591da177e4SLinus Torvalds 		limit = 32;
37601da177e4SLinus Torvalds #endif
3761943a451aSGlauber Costa 	batchcount = (limit + 1) / 2;
3762943a451aSGlauber Costa 	err = do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
3763c7ce4f60SThomas Garnier end:
37641da177e4SLinus Torvalds 	if (err)
37651170532bSJoe Perches 		pr_err("enable_cpucache failed for %s, error %d\n",
37661da177e4SLinus Torvalds 		       cachep->name, -err);
37672ed3a4efSChristoph Lameter 	return err;
37681da177e4SLinus Torvalds }
37691da177e4SLinus Torvalds 
37701b55253aSChristoph Lameter /*
3771ce8eb6c4SChristoph Lameter  * Drain an array if it contains any elements taking the node lock only if
3772ce8eb6c4SChristoph Lameter  * necessary. Note that the node listlock also protects the array_cache
3773b18e7e65SChristoph Lameter  * if drain_array() is used on the shared array.
37741b55253aSChristoph Lameter  */
drain_array(struct kmem_cache * cachep,struct kmem_cache_node * n,struct array_cache * ac,int node)3775ce8eb6c4SChristoph Lameter static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
377618726ca8SJoonsoo Kim 			 struct array_cache *ac, int node)
37771da177e4SLinus Torvalds {
377897654dfaSJoonsoo Kim 	LIST_HEAD(list);
377918726ca8SJoonsoo Kim 
378018726ca8SJoonsoo Kim 	/* ac from n->shared can be freed if we don't hold the slab_mutex. */
378118726ca8SJoonsoo Kim 	check_mutex_acquired();
37821da177e4SLinus Torvalds 
37831b55253aSChristoph Lameter 	if (!ac || !ac->avail)
37841b55253aSChristoph Lameter 		return;
378518726ca8SJoonsoo Kim 
378618726ca8SJoonsoo Kim 	if (ac->touched) {
37871da177e4SLinus Torvalds 		ac->touched = 0;
378818726ca8SJoonsoo Kim 		return;
378918726ca8SJoonsoo Kim 	}
379018726ca8SJoonsoo Kim 
3791b539ce9fSJiri Kosina 	raw_spin_lock_irq(&n->list_lock);
379218726ca8SJoonsoo Kim 	drain_array_locked(cachep, ac, node, false, &list);
3793b539ce9fSJiri Kosina 	raw_spin_unlock_irq(&n->list_lock);
379418726ca8SJoonsoo Kim 
379597654dfaSJoonsoo Kim 	slabs_destroy(cachep, &list);
3796b18e7e65SChristoph Lameter }
37971da177e4SLinus Torvalds 
37981da177e4SLinus Torvalds /**
37991da177e4SLinus Torvalds  * cache_reap - Reclaim memory from caches.
380005fb6bf0SRandy Dunlap  * @w: work descriptor
38011da177e4SLinus Torvalds  *
38021da177e4SLinus Torvalds  * Called from workqueue/eventd every few seconds.
38031da177e4SLinus Torvalds  * Purpose:
38041da177e4SLinus Torvalds  * - clear the per-cpu caches for this CPU.
38051da177e4SLinus Torvalds  * - return freeable pages to the main free memory pool.
38061da177e4SLinus Torvalds  *
3807a737b3e2SAndrew Morton  * If we cannot acquire the cache chain mutex then just give up - we'll try
3808a737b3e2SAndrew Morton  * again on the next iteration.
38091da177e4SLinus Torvalds  */
cache_reap(struct work_struct * w)38107c5cae36SChristoph Lameter static void cache_reap(struct work_struct *w)
38111da177e4SLinus Torvalds {
38127a7c381dSChristoph Hellwig 	struct kmem_cache *searchp;
3813ce8eb6c4SChristoph Lameter 	struct kmem_cache_node *n;
38147d6e6d09SLee Schermerhorn 	int node = numa_mem_id();
3815bf6aede7SJean Delvare 	struct delayed_work *work = to_delayed_work(w);
38161da177e4SLinus Torvalds 
381718004c5dSChristoph Lameter 	if (!mutex_trylock(&slab_mutex))
38181da177e4SLinus Torvalds 		/* Give up. Setup the next iteration. */
38197c5cae36SChristoph Lameter 		goto out;
38201da177e4SLinus Torvalds 
382118004c5dSChristoph Lameter 	list_for_each_entry(searchp, &slab_caches, list) {
38221da177e4SLinus Torvalds 		check_irq_on();
38231da177e4SLinus Torvalds 
382435386e3bSChristoph Lameter 		/*
3825ce8eb6c4SChristoph Lameter 		 * We only take the node lock if absolutely necessary and we
382635386e3bSChristoph Lameter 		 * have established with reasonable certainty that
382735386e3bSChristoph Lameter 		 * we can do some work if the lock was obtained.
382835386e3bSChristoph Lameter 		 */
382918bf8541SChristoph Lameter 		n = get_node(searchp, node);
383035386e3bSChristoph Lameter 
3831ce8eb6c4SChristoph Lameter 		reap_alien(searchp, n);
38321da177e4SLinus Torvalds 
383318726ca8SJoonsoo Kim 		drain_array(searchp, n, cpu_cache_get(searchp), node);
38341da177e4SLinus Torvalds 
383535386e3bSChristoph Lameter 		/*
383635386e3bSChristoph Lameter 		 * These are racy checks but it does not matter
383735386e3bSChristoph Lameter 		 * if we skip one check or scan twice.
383835386e3bSChristoph Lameter 		 */
3839ce8eb6c4SChristoph Lameter 		if (time_after(n->next_reap, jiffies))
384035386e3bSChristoph Lameter 			goto next;
38411da177e4SLinus Torvalds 
38425f0985bbSJianyu Zhan 		n->next_reap = jiffies + REAPTIMEOUT_NODE;
38431da177e4SLinus Torvalds 
384418726ca8SJoonsoo Kim 		drain_array(searchp, n, n->shared, node);
38451da177e4SLinus Torvalds 
3846ce8eb6c4SChristoph Lameter 		if (n->free_touched)
3847ce8eb6c4SChristoph Lameter 			n->free_touched = 0;
3848ed11d9ebSChristoph Lameter 		else {
3849ed11d9ebSChristoph Lameter 			int freed;
3850ed11d9ebSChristoph Lameter 
3851ce8eb6c4SChristoph Lameter 			freed = drain_freelist(searchp, n, (n->free_limit +
3852ed11d9ebSChristoph Lameter 				5 * searchp->num - 1) / (5 * searchp->num));
3853ed11d9ebSChristoph Lameter 			STATS_ADD_REAPED(searchp, freed);
38541da177e4SLinus Torvalds 		}
385535386e3bSChristoph Lameter next:
38561da177e4SLinus Torvalds 		cond_resched();
38571da177e4SLinus Torvalds 	}
38581da177e4SLinus Torvalds 	check_irq_on();
385918004c5dSChristoph Lameter 	mutex_unlock(&slab_mutex);
38608fce4d8eSChristoph Lameter 	next_reap_node();
38617c5cae36SChristoph Lameter out:
38621da177e4SLinus Torvalds 	/* Set up the next iteration */
3863a9f2a846SVlastimil Babka 	schedule_delayed_work_on(smp_processor_id(), work,
3864a9f2a846SVlastimil Babka 				round_jiffies_relative(REAPTIMEOUT_AC));
38651da177e4SLinus Torvalds }
38661da177e4SLinus Torvalds 
get_slabinfo(struct kmem_cache * cachep,struct slabinfo * sinfo)38670d7561c6SGlauber Costa void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
38681da177e4SLinus Torvalds {
3869f728b0a5SGreg Thelen 	unsigned long active_objs, num_objs, active_slabs;
3870bf00bd34SDavid Rientjes 	unsigned long total_slabs = 0, free_objs = 0, shared_avail = 0;
3871bf00bd34SDavid Rientjes 	unsigned long free_slabs = 0;
3872e498be7dSChristoph Lameter 	int node;
3873ce8eb6c4SChristoph Lameter 	struct kmem_cache_node *n;
38741da177e4SLinus Torvalds 
387518bf8541SChristoph Lameter 	for_each_kmem_cache_node(cachep, node, n) {
3876ca3b9b91SRavikiran G Thirumalai 		check_irq_on();
3877b539ce9fSJiri Kosina 		raw_spin_lock_irq(&n->list_lock);
3878e498be7dSChristoph Lameter 
3879bf00bd34SDavid Rientjes 		total_slabs += n->total_slabs;
3880bf00bd34SDavid Rientjes 		free_slabs += n->free_slabs;
3881f728b0a5SGreg Thelen 		free_objs += n->free_objects;
388207a63c41SAruna Ramakrishna 
3883ce8eb6c4SChristoph Lameter 		if (n->shared)
3884ce8eb6c4SChristoph Lameter 			shared_avail += n->shared->avail;
3885e498be7dSChristoph Lameter 
3886b539ce9fSJiri Kosina 		raw_spin_unlock_irq(&n->list_lock);
3887e498be7dSChristoph Lameter 	}
3888bf00bd34SDavid Rientjes 	num_objs = total_slabs * cachep->num;
3889bf00bd34SDavid Rientjes 	active_slabs = total_slabs - free_slabs;
3890f728b0a5SGreg Thelen 	active_objs = num_objs - free_objs;
38911da177e4SLinus Torvalds 
38920d7561c6SGlauber Costa 	sinfo->active_objs = active_objs;
38930d7561c6SGlauber Costa 	sinfo->num_objs = num_objs;
38940d7561c6SGlauber Costa 	sinfo->active_slabs = active_slabs;
3895bf00bd34SDavid Rientjes 	sinfo->num_slabs = total_slabs;
38960d7561c6SGlauber Costa 	sinfo->shared_avail = shared_avail;
38970d7561c6SGlauber Costa 	sinfo->limit = cachep->limit;
38980d7561c6SGlauber Costa 	sinfo->batchcount = cachep->batchcount;
38990d7561c6SGlauber Costa 	sinfo->shared = cachep->shared;
39000d7561c6SGlauber Costa 	sinfo->objects_per_slab = cachep->num;
39010d7561c6SGlauber Costa 	sinfo->cache_order = cachep->gfporder;
39020d7561c6SGlauber Costa }
39030d7561c6SGlauber Costa 
slabinfo_show_stats(struct seq_file * m,struct kmem_cache * cachep)39040d7561c6SGlauber Costa void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *cachep)
39050d7561c6SGlauber Costa {
39061da177e4SLinus Torvalds #if STATS
3907ce8eb6c4SChristoph Lameter 	{			/* node stats */
39081da177e4SLinus Torvalds 		unsigned long high = cachep->high_mark;
39091da177e4SLinus Torvalds 		unsigned long allocs = cachep->num_allocations;
39101da177e4SLinus Torvalds 		unsigned long grown = cachep->grown;
39111da177e4SLinus Torvalds 		unsigned long reaped = cachep->reaped;
39121da177e4SLinus Torvalds 		unsigned long errors = cachep->errors;
39131da177e4SLinus Torvalds 		unsigned long max_freeable = cachep->max_freeable;
39141da177e4SLinus Torvalds 		unsigned long node_allocs = cachep->node_allocs;
3915e498be7dSChristoph Lameter 		unsigned long node_frees = cachep->node_frees;
3916fb7faf33SRavikiran G Thirumalai 		unsigned long overflows = cachep->node_overflow;
39171da177e4SLinus Torvalds 
3918756a025fSJoe Perches 		seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu %4lu %4lu %4lu %4lu %4lu",
3919e92dd4fdSJoe Perches 			   allocs, high, grown,
3920a737b3e2SAndrew Morton 			   reaped, errors, max_freeable, node_allocs,
3921fb7faf33SRavikiran G Thirumalai 			   node_frees, overflows);
39221da177e4SLinus Torvalds 	}
39231da177e4SLinus Torvalds 	/* cpu stats */
39241da177e4SLinus Torvalds 	{
39251da177e4SLinus Torvalds 		unsigned long allochit = atomic_read(&cachep->allochit);
39261da177e4SLinus Torvalds 		unsigned long allocmiss = atomic_read(&cachep->allocmiss);
39271da177e4SLinus Torvalds 		unsigned long freehit = atomic_read(&cachep->freehit);
39281da177e4SLinus Torvalds 		unsigned long freemiss = atomic_read(&cachep->freemiss);
39291da177e4SLinus Torvalds 
39301da177e4SLinus Torvalds 		seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu",
39311da177e4SLinus Torvalds 			   allochit, allocmiss, freehit, freemiss);
39321da177e4SLinus Torvalds 	}
39331da177e4SLinus Torvalds #endif
39341da177e4SLinus Torvalds }
39351da177e4SLinus Torvalds 
39361da177e4SLinus Torvalds #define MAX_SLABINFO_WRITE 128
39371da177e4SLinus Torvalds /**
39381da177e4SLinus Torvalds  * slabinfo_write - Tuning for the slab allocator
39391da177e4SLinus Torvalds  * @file: unused
39401da177e4SLinus Torvalds  * @buffer: user buffer
39411da177e4SLinus Torvalds  * @count: data length
39421da177e4SLinus Torvalds  * @ppos: unused
3943a862f68aSMike Rapoport  *
3944a862f68aSMike Rapoport  * Return: %0 on success, negative error code otherwise.
39451da177e4SLinus Torvalds  */
slabinfo_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)3946b7454ad3SGlauber Costa ssize_t slabinfo_write(struct file *file, const char __user *buffer,
39471da177e4SLinus Torvalds 		       size_t count, loff_t *ppos)
39481da177e4SLinus Torvalds {
39491da177e4SLinus Torvalds 	char kbuf[MAX_SLABINFO_WRITE + 1], *tmp;
39501da177e4SLinus Torvalds 	int limit, batchcount, shared, res;
39517a7c381dSChristoph Hellwig 	struct kmem_cache *cachep;
39521da177e4SLinus Torvalds 
39531da177e4SLinus Torvalds 	if (count > MAX_SLABINFO_WRITE)
39541da177e4SLinus Torvalds 		return -EINVAL;
39551da177e4SLinus Torvalds 	if (copy_from_user(&kbuf, buffer, count))
39561da177e4SLinus Torvalds 		return -EFAULT;
39571da177e4SLinus Torvalds 	kbuf[MAX_SLABINFO_WRITE] = '\0';
39581da177e4SLinus Torvalds 
39591da177e4SLinus Torvalds 	tmp = strchr(kbuf, ' ');
39601da177e4SLinus Torvalds 	if (!tmp)
39611da177e4SLinus Torvalds 		return -EINVAL;
39621da177e4SLinus Torvalds 	*tmp = '\0';
39631da177e4SLinus Torvalds 	tmp++;
39641da177e4SLinus Torvalds 	if (sscanf(tmp, " %d %d %d", &limit, &batchcount, &shared) != 3)
39651da177e4SLinus Torvalds 		return -EINVAL;
39661da177e4SLinus Torvalds 
39671da177e4SLinus Torvalds 	/* Find the cache in the chain of caches. */
396818004c5dSChristoph Lameter 	mutex_lock(&slab_mutex);
39691da177e4SLinus Torvalds 	res = -EINVAL;
397018004c5dSChristoph Lameter 	list_for_each_entry(cachep, &slab_caches, list) {
39711da177e4SLinus Torvalds 		if (!strcmp(cachep->name, kbuf)) {
3972a737b3e2SAndrew Morton 			if (limit < 1 || batchcount < 1 ||
3973b28a02deSPekka Enberg 					batchcount > limit || shared < 0) {
3974e498be7dSChristoph Lameter 				res = 0;
39751da177e4SLinus Torvalds 			} else {
3976e498be7dSChristoph Lameter 				res = do_tune_cpucache(cachep, limit,
397783b519e8SPekka Enberg 						       batchcount, shared,
397883b519e8SPekka Enberg 						       GFP_KERNEL);
39791da177e4SLinus Torvalds 			}
39801da177e4SLinus Torvalds 			break;
39811da177e4SLinus Torvalds 		}
39821da177e4SLinus Torvalds 	}
398318004c5dSChristoph Lameter 	mutex_unlock(&slab_mutex);
39841da177e4SLinus Torvalds 	if (res >= 0)
39851da177e4SLinus Torvalds 		res = count;
39861da177e4SLinus Torvalds 	return res;
39871da177e4SLinus Torvalds }
3988871751e2SAl Viro 
398904385fc5SKees Cook #ifdef CONFIG_HARDENED_USERCOPY
399004385fc5SKees Cook /*
3991afcc90f8SKees Cook  * Rejects incorrectly sized objects and objects that are to be copied
3992afcc90f8SKees Cook  * to/from userspace but do not fall entirely within the containing slab
3993afcc90f8SKees Cook  * cache's usercopy region.
399404385fc5SKees Cook  *
399504385fc5SKees Cook  * Returns NULL if check passes, otherwise const char * to name of cache
399604385fc5SKees Cook  * to indicate an error.
399704385fc5SKees Cook  */
__check_heap_object(const void * ptr,unsigned long n,const struct slab * slab,bool to_user)39980b3eb091SMatthew Wilcox (Oracle) void __check_heap_object(const void *ptr, unsigned long n,
39990b3eb091SMatthew Wilcox (Oracle) 			 const struct slab *slab, bool to_user)
400004385fc5SKees Cook {
400104385fc5SKees Cook 	struct kmem_cache *cachep;
400204385fc5SKees Cook 	unsigned int objnr;
400304385fc5SKees Cook 	unsigned long offset;
400404385fc5SKees Cook 
4005219667c2SAndrey Konovalov 	ptr = kasan_reset_tag(ptr);
4006219667c2SAndrey Konovalov 
400704385fc5SKees Cook 	/* Find and validate object. */
40080b3eb091SMatthew Wilcox (Oracle) 	cachep = slab->slab_cache;
400940f3bf0cSVlastimil Babka 	objnr = obj_to_index(cachep, slab, (void *)ptr);
401004385fc5SKees Cook 	BUG_ON(objnr >= cachep->num);
401104385fc5SKees Cook 
401204385fc5SKees Cook 	/* Find offset within object. */
4013d3fb45f3SAlexander Potapenko 	if (is_kfence_address(ptr))
4014d3fb45f3SAlexander Potapenko 		offset = ptr - kfence_object_start(ptr);
4015d3fb45f3SAlexander Potapenko 	else
40167981e67eSVlastimil Babka 		offset = ptr - index_to_obj(cachep, slab, objnr) - obj_offset(cachep);
401704385fc5SKees Cook 
4018afcc90f8SKees Cook 	/* Allow address range falling entirely within usercopy region. */
4019afcc90f8SKees Cook 	if (offset >= cachep->useroffset &&
4020afcc90f8SKees Cook 	    offset - cachep->useroffset <= cachep->usersize &&
4021afcc90f8SKees Cook 	    n <= cachep->useroffset - offset + cachep->usersize)
4022f4e6e289SKees Cook 		return;
402304385fc5SKees Cook 
4024f4e6e289SKees Cook 	usercopy_abort("SLAB object", cachep->name, to_user, offset, n);
402504385fc5SKees Cook }
402604385fc5SKees Cook #endif /* CONFIG_HARDENED_USERCOPY */
4027