1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds * linux/mm/slab.c
41da177e4SLinus Torvalds * Written by Mark Hemment, 1996/97.
51da177e4SLinus Torvalds * (markhe@nextd.demon.co.uk)
61da177e4SLinus Torvalds *
71da177e4SLinus Torvalds * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli
81da177e4SLinus Torvalds *
91da177e4SLinus Torvalds * Major cleanup, different bufctl logic, per-cpu arrays
101da177e4SLinus Torvalds * (c) 2000 Manfred Spraul
111da177e4SLinus Torvalds *
121da177e4SLinus Torvalds * Cleanup, make the head arrays unconditional, preparation for NUMA
131da177e4SLinus Torvalds * (c) 2002 Manfred Spraul
141da177e4SLinus Torvalds *
151da177e4SLinus Torvalds * An implementation of the Slab Allocator as described in outline in;
161da177e4SLinus Torvalds * UNIX Internals: The New Frontiers by Uresh Vahalia
171da177e4SLinus Torvalds * Pub: Prentice Hall ISBN 0-13-101908-2
181da177e4SLinus Torvalds * or with a little more detail in;
191da177e4SLinus Torvalds * The Slab Allocator: An Object-Caching Kernel Memory Allocator
201da177e4SLinus Torvalds * Jeff Bonwick (Sun Microsystems).
211da177e4SLinus Torvalds * Presented at: USENIX Summer 1994 Technical Conference
221da177e4SLinus Torvalds *
231da177e4SLinus Torvalds * The memory is organized in caches, one cache for each object type.
241da177e4SLinus Torvalds * (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct)
251da177e4SLinus Torvalds * Each cache consists out of many slabs (they are small (usually one
261da177e4SLinus Torvalds * page long) and always contiguous), and each slab contains multiple
271da177e4SLinus Torvalds * initialized objects.
281da177e4SLinus Torvalds *
291da177e4SLinus Torvalds * This means, that your constructor is used only for newly allocated
30183ff22bSSimon Arlott * slabs and you must pass objects with the same initializations to
311da177e4SLinus Torvalds * kmem_cache_free.
321da177e4SLinus Torvalds *
331da177e4SLinus Torvalds * Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM,
341da177e4SLinus Torvalds * normal). If you need a special memory type, then must create a new
351da177e4SLinus Torvalds * cache for that memory type.
361da177e4SLinus Torvalds *
371da177e4SLinus Torvalds * In order to reduce fragmentation, the slabs are sorted in 3 groups:
381da177e4SLinus Torvalds * full slabs with 0 free objects
391da177e4SLinus Torvalds * partial slabs
401da177e4SLinus Torvalds * empty slabs with no allocated objects
411da177e4SLinus Torvalds *
421da177e4SLinus Torvalds * If partial slabs exist, then new allocations come from these slabs,
431da177e4SLinus Torvalds * otherwise from empty slabs or new slabs are allocated.
441da177e4SLinus Torvalds *
451da177e4SLinus Torvalds * kmem_cache_destroy() CAN CRASH if you try to allocate from the cache
461da177e4SLinus Torvalds * during kmem_cache_destroy(). The caller must prevent concurrent allocs.
471da177e4SLinus Torvalds *
481da177e4SLinus Torvalds * Each cache has a short per-cpu head array, most allocs
491da177e4SLinus Torvalds * and frees go into that array, and if that array overflows, then 1/2
501da177e4SLinus Torvalds * of the entries in the array are given back into the global cache.
511da177e4SLinus Torvalds * The head array is strictly LIFO and should improve the cache hit rates.
521da177e4SLinus Torvalds * On SMP, it additionally reduces the spinlock operations.
531da177e4SLinus Torvalds *
541da177e4SLinus Torvalds * The c_cpuarray may not be read with enabled local interrupts -
551da177e4SLinus Torvalds * it's changed with a smp_call_function().
561da177e4SLinus Torvalds *
571da177e4SLinus Torvalds * SMP synchronization:
581da177e4SLinus Torvalds * constructors and destructors are called without any locking.
59343e0d7aSPekka Enberg * Several members in struct kmem_cache and struct slab never change, they
601da177e4SLinus Torvalds * are accessed without any locking.
611da177e4SLinus Torvalds * The per-cpu arrays are never accessed from the wrong cpu, no locking,
621da177e4SLinus Torvalds * and local interrupts are disabled so slab code is preempt-safe.
631da177e4SLinus Torvalds * The non-constant members are protected with a per-cache irq spinlock.
641da177e4SLinus Torvalds *
651da177e4SLinus Torvalds * Many thanks to Mark Hemment, who wrote another per-cpu slab patch
661da177e4SLinus Torvalds * in 2000 - many ideas in the current implementation are derived from
671da177e4SLinus Torvalds * his patch.
681da177e4SLinus Torvalds *
691da177e4SLinus Torvalds * Further notes from the original documentation:
701da177e4SLinus Torvalds *
711da177e4SLinus Torvalds * 11 April '97. Started multi-threading - markhe
7218004c5dSChristoph Lameter * The global cache-chain is protected by the mutex 'slab_mutex'.
731da177e4SLinus Torvalds * The sem is only needed when accessing/extending the cache-chain, which
741da177e4SLinus Torvalds * can never happen inside an interrupt (kmem_cache_create(),
751da177e4SLinus Torvalds * kmem_cache_shrink() and kmem_cache_reap()).
761da177e4SLinus Torvalds *
771da177e4SLinus Torvalds * At present, each engine can be growing a cache. This should be blocked.
781da177e4SLinus Torvalds *
79e498be7dSChristoph Lameter * 15 March 2005. NUMA slab allocator.
80e498be7dSChristoph Lameter * Shai Fultheim <shai@scalex86.org>.
81e498be7dSChristoph Lameter * Shobhit Dayal <shobhit@calsoftinc.com>
82e498be7dSChristoph Lameter * Alok N Kataria <alokk@calsoftinc.com>
83e498be7dSChristoph Lameter * Christoph Lameter <christoph@lameter.com>
84e498be7dSChristoph Lameter *
85e498be7dSChristoph Lameter * Modified the slab allocator to be node aware on NUMA systems.
86e498be7dSChristoph Lameter * Each node has its own list of partial, free and full slabs.
87e498be7dSChristoph Lameter * All object allocations for a node occur from node specific slab lists.
881da177e4SLinus Torvalds */
891da177e4SLinus Torvalds
901da177e4SLinus Torvalds #include <linux/slab.h>
911da177e4SLinus Torvalds #include <linux/mm.h>
92c9cf5528SRandy Dunlap #include <linux/poison.h>
931da177e4SLinus Torvalds #include <linux/swap.h>
941da177e4SLinus Torvalds #include <linux/cache.h>
951da177e4SLinus Torvalds #include <linux/interrupt.h>
961da177e4SLinus Torvalds #include <linux/init.h>
971da177e4SLinus Torvalds #include <linux/compiler.h>
98101a5001SPaul Jackson #include <linux/cpuset.h>
99a0ec95a8SAlexey Dobriyan #include <linux/proc_fs.h>
1001da177e4SLinus Torvalds #include <linux/seq_file.h>
1011da177e4SLinus Torvalds #include <linux/notifier.h>
1021da177e4SLinus Torvalds #include <linux/kallsyms.h>
103d3fb45f3SAlexander Potapenko #include <linux/kfence.h>
1041da177e4SLinus Torvalds #include <linux/cpu.h>
1051da177e4SLinus Torvalds #include <linux/sysctl.h>
1061da177e4SLinus Torvalds #include <linux/module.h>
1071da177e4SLinus Torvalds #include <linux/rcupdate.h>
108543537bdSPaulo Marques #include <linux/string.h>
109138ae663SAndrew Morton #include <linux/uaccess.h>
110e498be7dSChristoph Lameter #include <linux/nodemask.h>
111d5cff635SCatalin Marinas #include <linux/kmemleak.h>
112dc85da15SChristoph Lameter #include <linux/mempolicy.h>
113fc0abb14SIngo Molnar #include <linux/mutex.h>
1148a8b6502SAkinobu Mita #include <linux/fault-inject.h>
115e7eebaf6SIngo Molnar #include <linux/rtmutex.h>
1166a2d7a95SEric Dumazet #include <linux/reciprocal_div.h>
1173ac7fe5aSThomas Gleixner #include <linux/debugobjects.h>
1188f9f8d9eSDavid Rientjes #include <linux/memory.h>
119268bb0ceSLinus Torvalds #include <linux/prefetch.h>
1203f8c2452SIngo Molnar #include <linux/sched/task_stack.h>
1211da177e4SLinus Torvalds
122381760eaSMel Gorman #include <net/sock.h>
123381760eaSMel Gorman
1241da177e4SLinus Torvalds #include <asm/cacheflush.h>
1251da177e4SLinus Torvalds #include <asm/tlbflush.h>
1261da177e4SLinus Torvalds #include <asm/page.h>
1271da177e4SLinus Torvalds
1284dee6b64SSteven Rostedt #include <trace/events/kmem.h>
1294dee6b64SSteven Rostedt
130072bb0aaSMel Gorman #include "internal.h"
131072bb0aaSMel Gorman
132b9ce5ef4SGlauber Costa #include "slab.h"
133b9ce5ef4SGlauber Costa
1341da177e4SLinus Torvalds /*
13550953fe9SChristoph Lameter * DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
1361da177e4SLinus Torvalds * 0 for faster, smaller code (especially in the critical paths).
1371da177e4SLinus Torvalds *
1381da177e4SLinus Torvalds * STATS - 1 to collect stats for /proc/slabinfo.
1391da177e4SLinus Torvalds * 0 for faster, smaller code (especially in the critical paths).
1401da177e4SLinus Torvalds *
1411da177e4SLinus Torvalds * FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
1421da177e4SLinus Torvalds */
1431da177e4SLinus Torvalds
1441da177e4SLinus Torvalds #ifdef CONFIG_DEBUG_SLAB
1451da177e4SLinus Torvalds #define DEBUG 1
1461da177e4SLinus Torvalds #define STATS 1
1471da177e4SLinus Torvalds #define FORCED_DEBUG 1
1481da177e4SLinus Torvalds #else
1491da177e4SLinus Torvalds #define DEBUG 0
1501da177e4SLinus Torvalds #define STATS 0
1511da177e4SLinus Torvalds #define FORCED_DEBUG 0
1521da177e4SLinus Torvalds #endif
1531da177e4SLinus Torvalds
1541da177e4SLinus Torvalds /* Shouldn't this be in a header file somewhere? */
1551da177e4SLinus Torvalds #define BYTES_PER_WORD sizeof(void *)
15687a927c7SDavid Woodhouse #define REDZONE_ALIGN max(BYTES_PER_WORD, __alignof__(unsigned long long))
1571da177e4SLinus Torvalds
1581da177e4SLinus Torvalds #ifndef ARCH_KMALLOC_FLAGS
1591da177e4SLinus Torvalds #define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
1601da177e4SLinus Torvalds #endif
1611da177e4SLinus Torvalds
162f315e3faSJoonsoo Kim #define FREELIST_BYTE_INDEX (((PAGE_SIZE >> BITS_PER_BYTE) \
163f315e3faSJoonsoo Kim <= SLAB_OBJ_MIN_SIZE) ? 1 : 0)
164f315e3faSJoonsoo Kim
165f315e3faSJoonsoo Kim #if FREELIST_BYTE_INDEX
166f315e3faSJoonsoo Kim typedef unsigned char freelist_idx_t;
167f315e3faSJoonsoo Kim #else
168f315e3faSJoonsoo Kim typedef unsigned short freelist_idx_t;
169f315e3faSJoonsoo Kim #endif
170f315e3faSJoonsoo Kim
17130321c7bSDavid Miller #define SLAB_OBJ_MAX_NUM ((1 << sizeof(freelist_idx_t) * BITS_PER_BYTE) - 1)
172f315e3faSJoonsoo Kim
173072bb0aaSMel Gorman /*
1741da177e4SLinus Torvalds * struct array_cache
1751da177e4SLinus Torvalds *
1761da177e4SLinus Torvalds * Purpose:
1771da177e4SLinus Torvalds * - LIFO ordering, to hand out cache-warm objects from _alloc
1781da177e4SLinus Torvalds * - reduce the number of linked list operations
1791da177e4SLinus Torvalds * - reduce spinlock operations
1801da177e4SLinus Torvalds *
1811da177e4SLinus Torvalds * The limit is stored in the per-cpu structure to reduce the data cache
1821da177e4SLinus Torvalds * footprint.
1831da177e4SLinus Torvalds *
1841da177e4SLinus Torvalds */
1851da177e4SLinus Torvalds struct array_cache {
1861da177e4SLinus Torvalds unsigned int avail;
1871da177e4SLinus Torvalds unsigned int limit;
1881da177e4SLinus Torvalds unsigned int batchcount;
1891da177e4SLinus Torvalds unsigned int touched;
190bda5b655SRobert P. J. Day void *entry[]; /*
191e498be7dSChristoph Lameter * Must have this definition in here for the proper
192e498be7dSChristoph Lameter * alignment of array_cache. Also simplifies accessing
193e498be7dSChristoph Lameter * the entries.
194e498be7dSChristoph Lameter */
1951da177e4SLinus Torvalds };
1961da177e4SLinus Torvalds
197c8522a3aSJoonsoo Kim struct alien_cache {
198c8522a3aSJoonsoo Kim spinlock_t lock;
199c8522a3aSJoonsoo Kim struct array_cache ac;
200c8522a3aSJoonsoo Kim };
201c8522a3aSJoonsoo Kim
202a737b3e2SAndrew Morton /*
203e498be7dSChristoph Lameter * Need this for bootstrapping a per node allocator.
204e498be7dSChristoph Lameter */
205bf0dea23SJoonsoo Kim #define NUM_INIT_LISTS (2 * MAX_NUMNODES)
206ce8eb6c4SChristoph Lameter static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS];
207e498be7dSChristoph Lameter #define CACHE_CACHE 0
208bf0dea23SJoonsoo Kim #define SIZE_NODE (MAX_NUMNODES)
2091da177e4SLinus Torvalds
210ed11d9ebSChristoph Lameter static int drain_freelist(struct kmem_cache *cache,
211ce8eb6c4SChristoph Lameter struct kmem_cache_node *n, int tofree);
212ed11d9ebSChristoph Lameter static void free_block(struct kmem_cache *cachep, void **objpp, int len,
21397654dfaSJoonsoo Kim int node, struct list_head *list);
21497654dfaSJoonsoo Kim static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list);
21583b519e8SPekka Enberg static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
21665f27f38SDavid Howells static void cache_reap(struct work_struct *unused);
217ed11d9ebSChristoph Lameter
21876b342bdSJoonsoo Kim static inline void fixup_objfreelist_debug(struct kmem_cache *cachep,
21976b342bdSJoonsoo Kim void **list);
22076b342bdSJoonsoo Kim static inline void fixup_slab_list(struct kmem_cache *cachep,
2217981e67eSVlastimil Babka struct kmem_cache_node *n, struct slab *slab,
22276b342bdSJoonsoo Kim void **list);
223e0a42726SIngo Molnar
224ce8eb6c4SChristoph Lameter #define INDEX_NODE kmalloc_index(sizeof(struct kmem_cache_node))
225e498be7dSChristoph Lameter
kmem_cache_node_init(struct kmem_cache_node * parent)226ce8eb6c4SChristoph Lameter static void kmem_cache_node_init(struct kmem_cache_node *parent)
227e498be7dSChristoph Lameter {
228e498be7dSChristoph Lameter INIT_LIST_HEAD(&parent->slabs_full);
229e498be7dSChristoph Lameter INIT_LIST_HEAD(&parent->slabs_partial);
230e498be7dSChristoph Lameter INIT_LIST_HEAD(&parent->slabs_free);
231bf00bd34SDavid Rientjes parent->total_slabs = 0;
232f728b0a5SGreg Thelen parent->free_slabs = 0;
233e498be7dSChristoph Lameter parent->shared = NULL;
234e498be7dSChristoph Lameter parent->alien = NULL;
2352e1217cfSRavikiran G Thirumalai parent->colour_next = 0;
236b539ce9fSJiri Kosina raw_spin_lock_init(&parent->list_lock);
237e498be7dSChristoph Lameter parent->free_objects = 0;
238e498be7dSChristoph Lameter parent->free_touched = 0;
239e498be7dSChristoph Lameter }
240e498be7dSChristoph Lameter
241e498be7dSChristoph Lameter #define MAKE_LIST(cachep, listp, slab, nodeid) \
242e498be7dSChristoph Lameter do { \
243e498be7dSChristoph Lameter INIT_LIST_HEAD(listp); \
24418bf8541SChristoph Lameter list_splice(&get_node(cachep, nodeid)->slab, listp); \
245e498be7dSChristoph Lameter } while (0)
246e498be7dSChristoph Lameter
247e498be7dSChristoph Lameter #define MAKE_ALL_LISTS(cachep, ptr, nodeid) \
248e498be7dSChristoph Lameter do { \
249e498be7dSChristoph Lameter MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \
250e498be7dSChristoph Lameter MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \
251e498be7dSChristoph Lameter MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \
252e498be7dSChristoph Lameter } while (0)
2531da177e4SLinus Torvalds
2544fd0b46eSAlexey Dobriyan #define CFLGS_OBJFREELIST_SLAB ((slab_flags_t __force)0x40000000U)
2554fd0b46eSAlexey Dobriyan #define CFLGS_OFF_SLAB ((slab_flags_t __force)0x80000000U)
256b03a017bSJoonsoo Kim #define OBJFREELIST_SLAB(x) ((x)->flags & CFLGS_OBJFREELIST_SLAB)
2571da177e4SLinus Torvalds #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
2581da177e4SLinus Torvalds
2591da177e4SLinus Torvalds #define BATCHREFILL_LIMIT 16
260a737b3e2SAndrew Morton /*
261f0953a1bSIngo Molnar * Optimization question: fewer reaps means less probability for unnecessary
262a737b3e2SAndrew Morton * cpucache drain/refill cycles.
2631da177e4SLinus Torvalds *
264dc6f3f27SAdrian Bunk * OTOH the cpuarrays can contain lots of objects,
2651da177e4SLinus Torvalds * which could lock up otherwise freeable slabs.
2661da177e4SLinus Torvalds */
2675f0985bbSJianyu Zhan #define REAPTIMEOUT_AC (2*HZ)
2685f0985bbSJianyu Zhan #define REAPTIMEOUT_NODE (4*HZ)
2691da177e4SLinus Torvalds
2701da177e4SLinus Torvalds #if STATS
2711da177e4SLinus Torvalds #define STATS_INC_ACTIVE(x) ((x)->num_active++)
2721da177e4SLinus Torvalds #define STATS_DEC_ACTIVE(x) ((x)->num_active--)
2731da177e4SLinus Torvalds #define STATS_INC_ALLOCED(x) ((x)->num_allocations++)
2741da177e4SLinus Torvalds #define STATS_INC_GROWN(x) ((x)->grown++)
275ed11d9ebSChristoph Lameter #define STATS_ADD_REAPED(x, y) ((x)->reaped += (y))
276a737b3e2SAndrew Morton #define STATS_SET_HIGH(x) \
277a737b3e2SAndrew Morton do { \
278a737b3e2SAndrew Morton if ((x)->num_active > (x)->high_mark) \
2791da177e4SLinus Torvalds (x)->high_mark = (x)->num_active; \
2801da177e4SLinus Torvalds } while (0)
2811da177e4SLinus Torvalds #define STATS_INC_ERR(x) ((x)->errors++)
2821da177e4SLinus Torvalds #define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++)
283e498be7dSChristoph Lameter #define STATS_INC_NODEFREES(x) ((x)->node_frees++)
284fb7faf33SRavikiran G Thirumalai #define STATS_INC_ACOVERFLOW(x) ((x)->node_overflow++)
2851da177e4SLinus Torvalds #define STATS_SET_FREEABLE(x, i) \
286a737b3e2SAndrew Morton do { \
287a737b3e2SAndrew Morton if ((x)->max_freeable < i) \
2881da177e4SLinus Torvalds (x)->max_freeable = i; \
2891da177e4SLinus Torvalds } while (0)
2901da177e4SLinus Torvalds #define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit)
2911da177e4SLinus Torvalds #define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss)
2921da177e4SLinus Torvalds #define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit)
2931da177e4SLinus Torvalds #define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss)
2941da177e4SLinus Torvalds #else
2951da177e4SLinus Torvalds #define STATS_INC_ACTIVE(x) do { } while (0)
2961da177e4SLinus Torvalds #define STATS_DEC_ACTIVE(x) do { } while (0)
2971da177e4SLinus Torvalds #define STATS_INC_ALLOCED(x) do { } while (0)
2981da177e4SLinus Torvalds #define STATS_INC_GROWN(x) do { } while (0)
2994e60c86bSAndi Kleen #define STATS_ADD_REAPED(x, y) do { (void)(y); } while (0)
3001da177e4SLinus Torvalds #define STATS_SET_HIGH(x) do { } while (0)
3011da177e4SLinus Torvalds #define STATS_INC_ERR(x) do { } while (0)
3021da177e4SLinus Torvalds #define STATS_INC_NODEALLOCS(x) do { } while (0)
303e498be7dSChristoph Lameter #define STATS_INC_NODEFREES(x) do { } while (0)
304fb7faf33SRavikiran G Thirumalai #define STATS_INC_ACOVERFLOW(x) do { } while (0)
305a737b3e2SAndrew Morton #define STATS_SET_FREEABLE(x, i) do { } while (0)
3061da177e4SLinus Torvalds #define STATS_INC_ALLOCHIT(x) do { } while (0)
3071da177e4SLinus Torvalds #define STATS_INC_ALLOCMISS(x) do { } while (0)
3081da177e4SLinus Torvalds #define STATS_INC_FREEHIT(x) do { } while (0)
3091da177e4SLinus Torvalds #define STATS_INC_FREEMISS(x) do { } while (0)
3101da177e4SLinus Torvalds #endif
3111da177e4SLinus Torvalds
3121da177e4SLinus Torvalds #if DEBUG
3131da177e4SLinus Torvalds
314a737b3e2SAndrew Morton /*
315a737b3e2SAndrew Morton * memory layout of objects:
3161da177e4SLinus Torvalds * 0 : objp
3173dafccf2SManfred Spraul * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that
3181da177e4SLinus Torvalds * the end of an object is aligned with the end of the real
3191da177e4SLinus Torvalds * allocation. Catches writes behind the end of the allocation.
3203dafccf2SManfred Spraul * cachep->obj_offset - BYTES_PER_WORD .. cachep->obj_offset - 1:
3211da177e4SLinus Torvalds * redzone word.
3223dafccf2SManfred Spraul * cachep->obj_offset: The real object.
3233b0efdfaSChristoph Lameter * cachep->size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long]
3243b0efdfaSChristoph Lameter * cachep->size - 1* BYTES_PER_WORD: last caller address
325a737b3e2SAndrew Morton * [BYTES_PER_WORD long]
3261da177e4SLinus Torvalds */
obj_offset(struct kmem_cache * cachep)327343e0d7aSPekka Enberg static int obj_offset(struct kmem_cache *cachep)
3281da177e4SLinus Torvalds {
3293dafccf2SManfred Spraul return cachep->obj_offset;
3301da177e4SLinus Torvalds }
3311da177e4SLinus Torvalds
dbg_redzone1(struct kmem_cache * cachep,void * objp)332b46b8f19SDavid Woodhouse static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp)
3331da177e4SLinus Torvalds {
3341da177e4SLinus Torvalds BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
335b46b8f19SDavid Woodhouse return (unsigned long long *) (objp + obj_offset(cachep) -
336b46b8f19SDavid Woodhouse sizeof(unsigned long long));
3371da177e4SLinus Torvalds }
3381da177e4SLinus Torvalds
dbg_redzone2(struct kmem_cache * cachep,void * objp)339b46b8f19SDavid Woodhouse static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp)
3401da177e4SLinus Torvalds {
3411da177e4SLinus Torvalds BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
3421da177e4SLinus Torvalds if (cachep->flags & SLAB_STORE_USER)
3433b0efdfaSChristoph Lameter return (unsigned long long *)(objp + cachep->size -
344b46b8f19SDavid Woodhouse sizeof(unsigned long long) -
34587a927c7SDavid Woodhouse REDZONE_ALIGN);
3463b0efdfaSChristoph Lameter return (unsigned long long *) (objp + cachep->size -
347b46b8f19SDavid Woodhouse sizeof(unsigned long long));
3481da177e4SLinus Torvalds }
3491da177e4SLinus Torvalds
dbg_userword(struct kmem_cache * cachep,void * objp)350343e0d7aSPekka Enberg static void **dbg_userword(struct kmem_cache *cachep, void *objp)
3511da177e4SLinus Torvalds {
3521da177e4SLinus Torvalds BUG_ON(!(cachep->flags & SLAB_STORE_USER));
3533b0efdfaSChristoph Lameter return (void **)(objp + cachep->size - BYTES_PER_WORD);
3541da177e4SLinus Torvalds }
3551da177e4SLinus Torvalds
3561da177e4SLinus Torvalds #else
3571da177e4SLinus Torvalds
3583dafccf2SManfred Spraul #define obj_offset(x) 0
359b46b8f19SDavid Woodhouse #define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
360b46b8f19SDavid Woodhouse #define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
3611da177e4SLinus Torvalds #define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;})
3621da177e4SLinus Torvalds
3631da177e4SLinus Torvalds #endif
3641da177e4SLinus Torvalds
3651da177e4SLinus Torvalds /*
3663df1cccdSDavid Rientjes * Do not go above this order unless 0 objects fit into the slab or
3673df1cccdSDavid Rientjes * overridden on the command line.
3681da177e4SLinus Torvalds */
369543585ccSDavid Rientjes #define SLAB_MAX_ORDER_HI 1
370543585ccSDavid Rientjes #define SLAB_MAX_ORDER_LO 0
371543585ccSDavid Rientjes static int slab_max_order = SLAB_MAX_ORDER_LO;
3723df1cccdSDavid Rientjes static bool slab_max_order_set __initdata;
3731da177e4SLinus Torvalds
index_to_obj(struct kmem_cache * cache,const struct slab * slab,unsigned int idx)3740b3eb091SMatthew Wilcox (Oracle) static inline void *index_to_obj(struct kmem_cache *cache,
3757981e67eSVlastimil Babka const struct slab *slab, unsigned int idx)
3768fea4e96SPekka Enberg {
3777981e67eSVlastimil Babka return slab->s_mem + cache->size * idx;
3788fea4e96SPekka Enberg }
3798fea4e96SPekka Enberg
3806fb92430SJoonsoo Kim #define BOOT_CPUCACHE_ENTRIES 1
3811da177e4SLinus Torvalds /* internal cache of cache description objs */
3829b030cb8SChristoph Lameter static struct kmem_cache kmem_cache_boot = {
3831da177e4SLinus Torvalds .batchcount = 1,
3841da177e4SLinus Torvalds .limit = BOOT_CPUCACHE_ENTRIES,
385e498be7dSChristoph Lameter .shared = 1,
3863b0efdfaSChristoph Lameter .size = sizeof(struct kmem_cache),
3871da177e4SLinus Torvalds .name = "kmem_cache",
3881da177e4SLinus Torvalds };
3891da177e4SLinus Torvalds
3901871e52cSTejun Heo static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
3911da177e4SLinus Torvalds
cpu_cache_get(struct kmem_cache * cachep)392343e0d7aSPekka Enberg static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
3931da177e4SLinus Torvalds {
394bf0dea23SJoonsoo Kim return this_cpu_ptr(cachep->cpu_cache);
3951da177e4SLinus Torvalds }
3961da177e4SLinus Torvalds
397a737b3e2SAndrew Morton /*
398a737b3e2SAndrew Morton * Calculate the number of objects and left-over bytes for a given buffer size.
399a737b3e2SAndrew Morton */
cache_estimate(unsigned long gfporder,size_t buffer_size,slab_flags_t flags,size_t * left_over)40070f75067SJoonsoo Kim static unsigned int cache_estimate(unsigned long gfporder, size_t buffer_size,
401d50112edSAlexey Dobriyan slab_flags_t flags, size_t *left_over)
402fbaccacfSSteven Rostedt {
40370f75067SJoonsoo Kim unsigned int num;
404fbaccacfSSteven Rostedt size_t slab_size = PAGE_SIZE << gfporder;
4051da177e4SLinus Torvalds
406fbaccacfSSteven Rostedt /*
407fbaccacfSSteven Rostedt * The slab management structure can be either off the slab or
408fbaccacfSSteven Rostedt * on it. For the latter case, the memory allocated for a
409fbaccacfSSteven Rostedt * slab is used for:
410fbaccacfSSteven Rostedt *
411fbaccacfSSteven Rostedt * - @buffer_size bytes for each object
4122e6b3602SJoonsoo Kim * - One freelist_idx_t for each object
4132e6b3602SJoonsoo Kim *
4142e6b3602SJoonsoo Kim * We don't need to consider alignment of freelist because
4152e6b3602SJoonsoo Kim * freelist will be at the end of slab page. The objects will be
4162e6b3602SJoonsoo Kim * at the correct alignment.
417fbaccacfSSteven Rostedt *
418fbaccacfSSteven Rostedt * If the slab management structure is off the slab, then the
419fbaccacfSSteven Rostedt * alignment will already be calculated into the size. Because
420fbaccacfSSteven Rostedt * the slabs are all pages aligned, the objects will be at the
421fbaccacfSSteven Rostedt * correct alignment when allocated.
422fbaccacfSSteven Rostedt */
423b03a017bSJoonsoo Kim if (flags & (CFLGS_OBJFREELIST_SLAB | CFLGS_OFF_SLAB)) {
42470f75067SJoonsoo Kim num = slab_size / buffer_size;
4252e6b3602SJoonsoo Kim *left_over = slab_size % buffer_size;
426fbaccacfSSteven Rostedt } else {
42770f75067SJoonsoo Kim num = slab_size / (buffer_size + sizeof(freelist_idx_t));
4282e6b3602SJoonsoo Kim *left_over = slab_size %
4292e6b3602SJoonsoo Kim (buffer_size + sizeof(freelist_idx_t));
430fbaccacfSSteven Rostedt }
43170f75067SJoonsoo Kim
43270f75067SJoonsoo Kim return num;
4331da177e4SLinus Torvalds }
4341da177e4SLinus Torvalds
435f28510d3SChristoph Lameter #if DEBUG
436d40cee24SHarvey Harrison #define slab_error(cachep, msg) __slab_error(__func__, cachep, msg)
4371da177e4SLinus Torvalds
__slab_error(const char * function,struct kmem_cache * cachep,char * msg)438a737b3e2SAndrew Morton static void __slab_error(const char *function, struct kmem_cache *cachep,
439a737b3e2SAndrew Morton char *msg)
4401da177e4SLinus Torvalds {
4411170532bSJoe Perches pr_err("slab error in %s(): cache `%s': %s\n",
4421da177e4SLinus Torvalds function, cachep->name, msg);
4431da177e4SLinus Torvalds dump_stack();
444373d4d09SRusty Russell add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
4451da177e4SLinus Torvalds }
446f28510d3SChristoph Lameter #endif
4471da177e4SLinus Torvalds
4483395ee05SPaul Menage /*
4493395ee05SPaul Menage * By default on NUMA we use alien caches to stage the freeing of
4503395ee05SPaul Menage * objects allocated from other nodes. This causes massive memory
4513395ee05SPaul Menage * inefficiencies when using fake NUMA setup to split memory into a
4523395ee05SPaul Menage * large number of small nodes, so it can be disabled on the command
4533395ee05SPaul Menage * line
4543395ee05SPaul Menage */
4553395ee05SPaul Menage
4563395ee05SPaul Menage static int use_alien_caches __read_mostly = 1;
noaliencache_setup(char * s)4573395ee05SPaul Menage static int __init noaliencache_setup(char *s)
4583395ee05SPaul Menage {
4593395ee05SPaul Menage use_alien_caches = 0;
4603395ee05SPaul Menage return 1;
4613395ee05SPaul Menage }
4623395ee05SPaul Menage __setup("noaliencache", noaliencache_setup);
4633395ee05SPaul Menage
slab_max_order_setup(char * str)4643df1cccdSDavid Rientjes static int __init slab_max_order_setup(char *str)
4653df1cccdSDavid Rientjes {
4663df1cccdSDavid Rientjes get_option(&str, &slab_max_order);
4673df1cccdSDavid Rientjes slab_max_order = slab_max_order < 0 ? 0 :
46823baf831SKirill A. Shutemov min(slab_max_order, MAX_ORDER);
4693df1cccdSDavid Rientjes slab_max_order_set = true;
4703df1cccdSDavid Rientjes
4713df1cccdSDavid Rientjes return 1;
4723df1cccdSDavid Rientjes }
4733df1cccdSDavid Rientjes __setup("slab_max_order=", slab_max_order_setup);
4743df1cccdSDavid Rientjes
4758fce4d8eSChristoph Lameter #ifdef CONFIG_NUMA
4768fce4d8eSChristoph Lameter /*
4778fce4d8eSChristoph Lameter * Special reaping functions for NUMA systems called from cache_reap().
4788fce4d8eSChristoph Lameter * These take care of doing round robin flushing of alien caches (containing
4798fce4d8eSChristoph Lameter * objects freed on different nodes from which they were allocated) and the
4808fce4d8eSChristoph Lameter * flushing of remote pcps by calling drain_node_pages.
4818fce4d8eSChristoph Lameter */
4821871e52cSTejun Heo static DEFINE_PER_CPU(unsigned long, slab_reap_node);
4838fce4d8eSChristoph Lameter
init_reap_node(int cpu)4848fce4d8eSChristoph Lameter static void init_reap_node(int cpu)
4858fce4d8eSChristoph Lameter {
4860edaf86cSAndrew Morton per_cpu(slab_reap_node, cpu) = next_node_in(cpu_to_mem(cpu),
4870edaf86cSAndrew Morton node_online_map);
4888fce4d8eSChristoph Lameter }
4898fce4d8eSChristoph Lameter
next_reap_node(void)4908fce4d8eSChristoph Lameter static void next_reap_node(void)
4918fce4d8eSChristoph Lameter {
492909ea964SChristoph Lameter int node = __this_cpu_read(slab_reap_node);
4938fce4d8eSChristoph Lameter
4940edaf86cSAndrew Morton node = next_node_in(node, node_online_map);
495909ea964SChristoph Lameter __this_cpu_write(slab_reap_node, node);
4968fce4d8eSChristoph Lameter }
4978fce4d8eSChristoph Lameter
4988fce4d8eSChristoph Lameter #else
4998fce4d8eSChristoph Lameter #define init_reap_node(cpu) do { } while (0)
5008fce4d8eSChristoph Lameter #define next_reap_node(void) do { } while (0)
5018fce4d8eSChristoph Lameter #endif
5028fce4d8eSChristoph Lameter
5031da177e4SLinus Torvalds /*
5041da177e4SLinus Torvalds * Initiate the reap timer running on the target CPU. We run at around 1 to 2Hz
5051da177e4SLinus Torvalds * via the workqueue/eventd.
5061da177e4SLinus Torvalds * Add the CPU number into the expiration time to minimize the possibility of
5071da177e4SLinus Torvalds * the CPUs getting into lockstep and contending for the global cache chain
5081da177e4SLinus Torvalds * lock.
5091da177e4SLinus Torvalds */
start_cpu_timer(int cpu)5100db0628dSPaul Gortmaker static void start_cpu_timer(int cpu)
5111da177e4SLinus Torvalds {
5121871e52cSTejun Heo struct delayed_work *reap_work = &per_cpu(slab_reap_work, cpu);
5131da177e4SLinus Torvalds
514eac0337aSTejun Heo if (reap_work->work.func == NULL) {
5158fce4d8eSChristoph Lameter init_reap_node(cpu);
516203b42f7STejun Heo INIT_DEFERRABLE_WORK(reap_work, cache_reap);
5172b284214SArjan van de Ven schedule_delayed_work_on(cpu, reap_work,
5182b284214SArjan van de Ven __round_jiffies_relative(HZ, cpu));
5191da177e4SLinus Torvalds }
5201da177e4SLinus Torvalds }
5211da177e4SLinus Torvalds
init_arraycache(struct array_cache * ac,int limit,int batch)5221fe00d50SJoonsoo Kim static void init_arraycache(struct array_cache *ac, int limit, int batch)
5231da177e4SLinus Torvalds {
5241fe00d50SJoonsoo Kim if (ac) {
5251fe00d50SJoonsoo Kim ac->avail = 0;
5261fe00d50SJoonsoo Kim ac->limit = limit;
5271fe00d50SJoonsoo Kim ac->batchcount = batch;
5281fe00d50SJoonsoo Kim ac->touched = 0;
5291da177e4SLinus Torvalds }
5301fe00d50SJoonsoo Kim }
5311fe00d50SJoonsoo Kim
alloc_arraycache(int node,int entries,int batchcount,gfp_t gfp)5321fe00d50SJoonsoo Kim static struct array_cache *alloc_arraycache(int node, int entries,
5331fe00d50SJoonsoo Kim int batchcount, gfp_t gfp)
5341fe00d50SJoonsoo Kim {
5355e804789SJoonsoo Kim size_t memsize = sizeof(void *) * entries + sizeof(struct array_cache);
5361fe00d50SJoonsoo Kim struct array_cache *ac = NULL;
5371fe00d50SJoonsoo Kim
5381fe00d50SJoonsoo Kim ac = kmalloc_node(memsize, gfp, node);
53992d1d07dSQian Cai /*
54092d1d07dSQian Cai * The array_cache structures contain pointers to free object.
54192d1d07dSQian Cai * However, when such objects are allocated or transferred to another
54292d1d07dSQian Cai * cache the pointers are not cleared and they could be counted as
54392d1d07dSQian Cai * valid references during a kmemleak scan. Therefore, kmemleak must
54492d1d07dSQian Cai * not scan such objects.
54592d1d07dSQian Cai */
54692d1d07dSQian Cai kmemleak_no_scan(ac);
5471fe00d50SJoonsoo Kim init_arraycache(ac, entries, batchcount);
5481fe00d50SJoonsoo Kim return ac;
5491da177e4SLinus Torvalds }
5501da177e4SLinus Torvalds
cache_free_pfmemalloc(struct kmem_cache * cachep,struct slab * slab,void * objp)551f68f8dddSJoonsoo Kim static noinline void cache_free_pfmemalloc(struct kmem_cache *cachep,
5527981e67eSVlastimil Babka struct slab *slab, void *objp)
553072bb0aaSMel Gorman {
554ce8eb6c4SChristoph Lameter struct kmem_cache_node *n;
5557981e67eSVlastimil Babka int slab_node;
556f68f8dddSJoonsoo Kim LIST_HEAD(list);
557072bb0aaSMel Gorman
5587981e67eSVlastimil Babka slab_node = slab_nid(slab);
5597981e67eSVlastimil Babka n = get_node(cachep, slab_node);
560072bb0aaSMel Gorman
561b539ce9fSJiri Kosina raw_spin_lock(&n->list_lock);
5627981e67eSVlastimil Babka free_block(cachep, &objp, 1, slab_node, &list);
563b539ce9fSJiri Kosina raw_spin_unlock(&n->list_lock);
564072bb0aaSMel Gorman
565f68f8dddSJoonsoo Kim slabs_destroy(cachep, &list);
566072bb0aaSMel Gorman }
567072bb0aaSMel Gorman
5683ded175aSChristoph Lameter /*
5693ded175aSChristoph Lameter * Transfer objects in one arraycache to another.
5703ded175aSChristoph Lameter * Locking must be handled by the caller.
5713ded175aSChristoph Lameter *
5723ded175aSChristoph Lameter * Return the number of entries transferred.
5733ded175aSChristoph Lameter */
transfer_objects(struct array_cache * to,struct array_cache * from,unsigned int max)5743ded175aSChristoph Lameter static int transfer_objects(struct array_cache *to,
5753ded175aSChristoph Lameter struct array_cache *from, unsigned int max)
5763ded175aSChristoph Lameter {
5773ded175aSChristoph Lameter /* Figure out how many entries to transfer */
578732eacc0SHagen Paul Pfeifer int nr = min3(from->avail, max, to->limit - to->avail);
5793ded175aSChristoph Lameter
5803ded175aSChristoph Lameter if (!nr)
5813ded175aSChristoph Lameter return 0;
5823ded175aSChristoph Lameter
5833ded175aSChristoph Lameter memcpy(to->entry + to->avail, from->entry + from->avail - nr,
5843ded175aSChristoph Lameter sizeof(void *) *nr);
5853ded175aSChristoph Lameter
5863ded175aSChristoph Lameter from->avail -= nr;
5873ded175aSChristoph Lameter to->avail += nr;
5883ded175aSChristoph Lameter return nr;
5893ded175aSChristoph Lameter }
5903ded175aSChristoph Lameter
591dabc3e29SKees Cook /* &alien->lock must be held by alien callers. */
__free_one(struct array_cache * ac,void * objp)592dabc3e29SKees Cook static __always_inline void __free_one(struct array_cache *ac, void *objp)
593dabc3e29SKees Cook {
594dabc3e29SKees Cook /* Avoid trivial double-free. */
595dabc3e29SKees Cook if (IS_ENABLED(CONFIG_SLAB_FREELIST_HARDENED) &&
596dabc3e29SKees Cook WARN_ON_ONCE(ac->avail > 0 && ac->entry[ac->avail - 1] == objp))
597dabc3e29SKees Cook return;
598dabc3e29SKees Cook ac->entry[ac->avail++] = objp;
599dabc3e29SKees Cook }
600dabc3e29SKees Cook
601765c4507SChristoph Lameter #ifndef CONFIG_NUMA
602765c4507SChristoph Lameter
603765c4507SChristoph Lameter #define drain_alien_cache(cachep, alien) do { } while (0)
604ce8eb6c4SChristoph Lameter #define reap_alien(cachep, n) do { } while (0)
605765c4507SChristoph Lameter
alloc_alien_cache(int node,int limit,gfp_t gfp)606c8522a3aSJoonsoo Kim static inline struct alien_cache **alloc_alien_cache(int node,
607c8522a3aSJoonsoo Kim int limit, gfp_t gfp)
608765c4507SChristoph Lameter {
6098888177eSJoonsoo Kim return NULL;
610765c4507SChristoph Lameter }
611765c4507SChristoph Lameter
free_alien_cache(struct alien_cache ** ac_ptr)612c8522a3aSJoonsoo Kim static inline void free_alien_cache(struct alien_cache **ac_ptr)
613765c4507SChristoph Lameter {
614765c4507SChristoph Lameter }
615765c4507SChristoph Lameter
cache_free_alien(struct kmem_cache * cachep,void * objp)616765c4507SChristoph Lameter static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
617765c4507SChristoph Lameter {
618765c4507SChristoph Lameter return 0;
619765c4507SChristoph Lameter }
620765c4507SChristoph Lameter
gfp_exact_node(gfp_t flags)6214167e9b2SDavid Rientjes static inline gfp_t gfp_exact_node(gfp_t flags)
6224167e9b2SDavid Rientjes {
623444eb2a4SMel Gorman return flags & ~__GFP_NOFAIL;
6244167e9b2SDavid Rientjes }
6254167e9b2SDavid Rientjes
626765c4507SChristoph Lameter #else /* CONFIG_NUMA */
627765c4507SChristoph Lameter
__alloc_alien_cache(int node,int entries,int batch,gfp_t gfp)628c8522a3aSJoonsoo Kim static struct alien_cache *__alloc_alien_cache(int node, int entries,
629c8522a3aSJoonsoo Kim int batch, gfp_t gfp)
630e498be7dSChristoph Lameter {
6315e804789SJoonsoo Kim size_t memsize = sizeof(void *) * entries + sizeof(struct alien_cache);
632c8522a3aSJoonsoo Kim struct alien_cache *alc = NULL;
633c8522a3aSJoonsoo Kim
634c8522a3aSJoonsoo Kim alc = kmalloc_node(memsize, gfp, node);
63509c2e76eSChristoph Lameter if (alc) {
63692d1d07dSQian Cai kmemleak_no_scan(alc);
637c8522a3aSJoonsoo Kim init_arraycache(&alc->ac, entries, batch);
63849dfc304SJoonsoo Kim spin_lock_init(&alc->lock);
63909c2e76eSChristoph Lameter }
640c8522a3aSJoonsoo Kim return alc;
641c8522a3aSJoonsoo Kim }
642c8522a3aSJoonsoo Kim
alloc_alien_cache(int node,int limit,gfp_t gfp)643c8522a3aSJoonsoo Kim static struct alien_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
644c8522a3aSJoonsoo Kim {
645c8522a3aSJoonsoo Kim struct alien_cache **alc_ptr;
646e498be7dSChristoph Lameter int i;
647e498be7dSChristoph Lameter
648e498be7dSChristoph Lameter if (limit > 1)
649e498be7dSChristoph Lameter limit = 12;
650b9726c26SAlexey Dobriyan alc_ptr = kcalloc_node(nr_node_ids, sizeof(void *), gfp, node);
651c8522a3aSJoonsoo Kim if (!alc_ptr)
652c8522a3aSJoonsoo Kim return NULL;
653c8522a3aSJoonsoo Kim
654e498be7dSChristoph Lameter for_each_node(i) {
655f3186a9cSHaicheng Li if (i == node || !node_online(i))
656e498be7dSChristoph Lameter continue;
657c8522a3aSJoonsoo Kim alc_ptr[i] = __alloc_alien_cache(node, limit, 0xbaadf00d, gfp);
658c8522a3aSJoonsoo Kim if (!alc_ptr[i]) {
659cc550defSAkinobu Mita for (i--; i >= 0; i--)
660c8522a3aSJoonsoo Kim kfree(alc_ptr[i]);
661c8522a3aSJoonsoo Kim kfree(alc_ptr);
662e498be7dSChristoph Lameter return NULL;
663e498be7dSChristoph Lameter }
664e498be7dSChristoph Lameter }
665c8522a3aSJoonsoo Kim return alc_ptr;
666e498be7dSChristoph Lameter }
667e498be7dSChristoph Lameter
free_alien_cache(struct alien_cache ** alc_ptr)668c8522a3aSJoonsoo Kim static void free_alien_cache(struct alien_cache **alc_ptr)
669e498be7dSChristoph Lameter {
670e498be7dSChristoph Lameter int i;
671e498be7dSChristoph Lameter
672c8522a3aSJoonsoo Kim if (!alc_ptr)
673e498be7dSChristoph Lameter return;
674e498be7dSChristoph Lameter for_each_node(i)
675c8522a3aSJoonsoo Kim kfree(alc_ptr[i]);
676c8522a3aSJoonsoo Kim kfree(alc_ptr);
677e498be7dSChristoph Lameter }
678e498be7dSChristoph Lameter
__drain_alien_cache(struct kmem_cache * cachep,struct array_cache * ac,int node,struct list_head * list)679343e0d7aSPekka Enberg static void __drain_alien_cache(struct kmem_cache *cachep,
680833b706cSJoonsoo Kim struct array_cache *ac, int node,
681833b706cSJoonsoo Kim struct list_head *list)
682e498be7dSChristoph Lameter {
68318bf8541SChristoph Lameter struct kmem_cache_node *n = get_node(cachep, node);
684e498be7dSChristoph Lameter
685e498be7dSChristoph Lameter if (ac->avail) {
686b539ce9fSJiri Kosina raw_spin_lock(&n->list_lock);
687e00946feSChristoph Lameter /*
688e00946feSChristoph Lameter * Stuff objects into the remote nodes shared array first.
689e00946feSChristoph Lameter * That way we could avoid the overhead of putting the objects
690e00946feSChristoph Lameter * into the free lists and getting them back later.
691e00946feSChristoph Lameter */
692ce8eb6c4SChristoph Lameter if (n->shared)
693ce8eb6c4SChristoph Lameter transfer_objects(n->shared, ac, ac->limit);
694e00946feSChristoph Lameter
695833b706cSJoonsoo Kim free_block(cachep, ac->entry, ac->avail, node, list);
696e498be7dSChristoph Lameter ac->avail = 0;
697b539ce9fSJiri Kosina raw_spin_unlock(&n->list_lock);
698e498be7dSChristoph Lameter }
699e498be7dSChristoph Lameter }
700e498be7dSChristoph Lameter
7018fce4d8eSChristoph Lameter /*
7028fce4d8eSChristoph Lameter * Called from cache_reap() to regularly drain alien caches round robin.
7038fce4d8eSChristoph Lameter */
reap_alien(struct kmem_cache * cachep,struct kmem_cache_node * n)704ce8eb6c4SChristoph Lameter static void reap_alien(struct kmem_cache *cachep, struct kmem_cache_node *n)
7058fce4d8eSChristoph Lameter {
706909ea964SChristoph Lameter int node = __this_cpu_read(slab_reap_node);
7078fce4d8eSChristoph Lameter
708ce8eb6c4SChristoph Lameter if (n->alien) {
709c8522a3aSJoonsoo Kim struct alien_cache *alc = n->alien[node];
710c8522a3aSJoonsoo Kim struct array_cache *ac;
711e00946feSChristoph Lameter
712c8522a3aSJoonsoo Kim if (alc) {
713c8522a3aSJoonsoo Kim ac = &alc->ac;
71449dfc304SJoonsoo Kim if (ac->avail && spin_trylock_irq(&alc->lock)) {
715833b706cSJoonsoo Kim LIST_HEAD(list);
716833b706cSJoonsoo Kim
717833b706cSJoonsoo Kim __drain_alien_cache(cachep, ac, node, &list);
71849dfc304SJoonsoo Kim spin_unlock_irq(&alc->lock);
719833b706cSJoonsoo Kim slabs_destroy(cachep, &list);
7208fce4d8eSChristoph Lameter }
7218fce4d8eSChristoph Lameter }
7228fce4d8eSChristoph Lameter }
723c8522a3aSJoonsoo Kim }
7248fce4d8eSChristoph Lameter
drain_alien_cache(struct kmem_cache * cachep,struct alien_cache ** alien)725a737b3e2SAndrew Morton static void drain_alien_cache(struct kmem_cache *cachep,
726c8522a3aSJoonsoo Kim struct alien_cache **alien)
727e498be7dSChristoph Lameter {
728e498be7dSChristoph Lameter int i = 0;
729c8522a3aSJoonsoo Kim struct alien_cache *alc;
730e498be7dSChristoph Lameter struct array_cache *ac;
731e498be7dSChristoph Lameter unsigned long flags;
732e498be7dSChristoph Lameter
733e498be7dSChristoph Lameter for_each_online_node(i) {
734c8522a3aSJoonsoo Kim alc = alien[i];
735c8522a3aSJoonsoo Kim if (alc) {
736833b706cSJoonsoo Kim LIST_HEAD(list);
737833b706cSJoonsoo Kim
738c8522a3aSJoonsoo Kim ac = &alc->ac;
73949dfc304SJoonsoo Kim spin_lock_irqsave(&alc->lock, flags);
740833b706cSJoonsoo Kim __drain_alien_cache(cachep, ac, i, &list);
74149dfc304SJoonsoo Kim spin_unlock_irqrestore(&alc->lock, flags);
742833b706cSJoonsoo Kim slabs_destroy(cachep, &list);
743e498be7dSChristoph Lameter }
744e498be7dSChristoph Lameter }
745e498be7dSChristoph Lameter }
746729bd0b7SPekka Enberg
__cache_free_alien(struct kmem_cache * cachep,void * objp,int node,int slab_node)74725c4f304SJoonsoo Kim static int __cache_free_alien(struct kmem_cache *cachep, void *objp,
7487981e67eSVlastimil Babka int node, int slab_node)
749729bd0b7SPekka Enberg {
750ce8eb6c4SChristoph Lameter struct kmem_cache_node *n;
751c8522a3aSJoonsoo Kim struct alien_cache *alien = NULL;
752c8522a3aSJoonsoo Kim struct array_cache *ac;
75397654dfaSJoonsoo Kim LIST_HEAD(list);
7541ca4cb24SPekka Enberg
75518bf8541SChristoph Lameter n = get_node(cachep, node);
756729bd0b7SPekka Enberg STATS_INC_NODEFREES(cachep);
7577981e67eSVlastimil Babka if (n->alien && n->alien[slab_node]) {
7587981e67eSVlastimil Babka alien = n->alien[slab_node];
759c8522a3aSJoonsoo Kim ac = &alien->ac;
76049dfc304SJoonsoo Kim spin_lock(&alien->lock);
761c8522a3aSJoonsoo Kim if (unlikely(ac->avail == ac->limit)) {
762729bd0b7SPekka Enberg STATS_INC_ACOVERFLOW(cachep);
7637981e67eSVlastimil Babka __drain_alien_cache(cachep, ac, slab_node, &list);
764729bd0b7SPekka Enberg }
765dabc3e29SKees Cook __free_one(ac, objp);
76649dfc304SJoonsoo Kim spin_unlock(&alien->lock);
767833b706cSJoonsoo Kim slabs_destroy(cachep, &list);
768729bd0b7SPekka Enberg } else {
7697981e67eSVlastimil Babka n = get_node(cachep, slab_node);
770b539ce9fSJiri Kosina raw_spin_lock(&n->list_lock);
7717981e67eSVlastimil Babka free_block(cachep, &objp, 1, slab_node, &list);
772b539ce9fSJiri Kosina raw_spin_unlock(&n->list_lock);
77397654dfaSJoonsoo Kim slabs_destroy(cachep, &list);
774729bd0b7SPekka Enberg }
775729bd0b7SPekka Enberg return 1;
776729bd0b7SPekka Enberg }
77725c4f304SJoonsoo Kim
cache_free_alien(struct kmem_cache * cachep,void * objp)77825c4f304SJoonsoo Kim static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
77925c4f304SJoonsoo Kim {
780dd35f71aSVlastimil Babka int slab_node = slab_nid(virt_to_slab(objp));
78125c4f304SJoonsoo Kim int node = numa_mem_id();
78225c4f304SJoonsoo Kim /*
783a8f23dd1SYixuan Cao * Make sure we are not freeing an object from another node to the array
78425c4f304SJoonsoo Kim * cache on this cpu.
78525c4f304SJoonsoo Kim */
786dd35f71aSVlastimil Babka if (likely(node == slab_node))
78725c4f304SJoonsoo Kim return 0;
78825c4f304SJoonsoo Kim
789dd35f71aSVlastimil Babka return __cache_free_alien(cachep, objp, node, slab_node);
79025c4f304SJoonsoo Kim }
7914167e9b2SDavid Rientjes
7924167e9b2SDavid Rientjes /*
793444eb2a4SMel Gorman * Construct gfp mask to allocate from a specific node but do not reclaim or
794444eb2a4SMel Gorman * warn about failures.
7954167e9b2SDavid Rientjes */
gfp_exact_node(gfp_t flags)7964167e9b2SDavid Rientjes static inline gfp_t gfp_exact_node(gfp_t flags)
7974167e9b2SDavid Rientjes {
798444eb2a4SMel Gorman return (flags | __GFP_THISNODE | __GFP_NOWARN) & ~(__GFP_RECLAIM|__GFP_NOFAIL);
7994167e9b2SDavid Rientjes }
800e498be7dSChristoph Lameter #endif
801e498be7dSChristoph Lameter
init_cache_node(struct kmem_cache * cachep,int node,gfp_t gfp)802ded0ecf6SJoonsoo Kim static int init_cache_node(struct kmem_cache *cachep, int node, gfp_t gfp)
803ded0ecf6SJoonsoo Kim {
804ded0ecf6SJoonsoo Kim struct kmem_cache_node *n;
805ded0ecf6SJoonsoo Kim
806ded0ecf6SJoonsoo Kim /*
807ded0ecf6SJoonsoo Kim * Set up the kmem_cache_node for cpu before we can
808ded0ecf6SJoonsoo Kim * begin anything. Make sure some other cpu on this
809ded0ecf6SJoonsoo Kim * node has not already allocated this
810ded0ecf6SJoonsoo Kim */
811ded0ecf6SJoonsoo Kim n = get_node(cachep, node);
812ded0ecf6SJoonsoo Kim if (n) {
813b539ce9fSJiri Kosina raw_spin_lock_irq(&n->list_lock);
814ded0ecf6SJoonsoo Kim n->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount +
815ded0ecf6SJoonsoo Kim cachep->num;
816b539ce9fSJiri Kosina raw_spin_unlock_irq(&n->list_lock);
817ded0ecf6SJoonsoo Kim
818ded0ecf6SJoonsoo Kim return 0;
819ded0ecf6SJoonsoo Kim }
820ded0ecf6SJoonsoo Kim
821ded0ecf6SJoonsoo Kim n = kmalloc_node(sizeof(struct kmem_cache_node), gfp, node);
822ded0ecf6SJoonsoo Kim if (!n)
823ded0ecf6SJoonsoo Kim return -ENOMEM;
824ded0ecf6SJoonsoo Kim
825ded0ecf6SJoonsoo Kim kmem_cache_node_init(n);
826ded0ecf6SJoonsoo Kim n->next_reap = jiffies + REAPTIMEOUT_NODE +
827ded0ecf6SJoonsoo Kim ((unsigned long)cachep) % REAPTIMEOUT_NODE;
828ded0ecf6SJoonsoo Kim
829ded0ecf6SJoonsoo Kim n->free_limit =
830ded0ecf6SJoonsoo Kim (1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num;
831ded0ecf6SJoonsoo Kim
832ded0ecf6SJoonsoo Kim /*
833ded0ecf6SJoonsoo Kim * The kmem_cache_nodes don't come and go as CPUs
834a8f23dd1SYixuan Cao * come and go. slab_mutex provides sufficient
835ded0ecf6SJoonsoo Kim * protection here.
836ded0ecf6SJoonsoo Kim */
837ded0ecf6SJoonsoo Kim cachep->node[node] = n;
838ded0ecf6SJoonsoo Kim
839ded0ecf6SJoonsoo Kim return 0;
840ded0ecf6SJoonsoo Kim }
841ded0ecf6SJoonsoo Kim
84266a1c22bSGeert Uytterhoeven #if defined(CONFIG_NUMA) || defined(CONFIG_SMP)
8438f9f8d9eSDavid Rientjes /*
8446a67368cSChristoph Lameter * Allocates and initializes node for a node on each slab cache, used for
845ce8eb6c4SChristoph Lameter * either memory or cpu hotplug. If memory is being hot-added, the kmem_cache_node
8468f9f8d9eSDavid Rientjes * will be allocated off-node since memory is not yet online for the new node.
847a8f23dd1SYixuan Cao * When hotplugging memory or a cpu, existing nodes are not replaced if
8488f9f8d9eSDavid Rientjes * already in use.
8498f9f8d9eSDavid Rientjes *
85018004c5dSChristoph Lameter * Must hold slab_mutex.
8518f9f8d9eSDavid Rientjes */
init_cache_node_node(int node)8526a67368cSChristoph Lameter static int init_cache_node_node(int node)
8538f9f8d9eSDavid Rientjes {
854ded0ecf6SJoonsoo Kim int ret;
8558f9f8d9eSDavid Rientjes struct kmem_cache *cachep;
8568f9f8d9eSDavid Rientjes
85718004c5dSChristoph Lameter list_for_each_entry(cachep, &slab_caches, list) {
858ded0ecf6SJoonsoo Kim ret = init_cache_node(cachep, node, GFP_KERNEL);
859ded0ecf6SJoonsoo Kim if (ret)
860ded0ecf6SJoonsoo Kim return ret;
8618f9f8d9eSDavid Rientjes }
8628f9f8d9eSDavid Rientjes
8638f9f8d9eSDavid Rientjes return 0;
8648f9f8d9eSDavid Rientjes }
8656731d4f1SSebastian Andrzej Siewior #endif
8668f9f8d9eSDavid Rientjes
setup_kmem_cache_node(struct kmem_cache * cachep,int node,gfp_t gfp,bool force_change)867c3d332b6SJoonsoo Kim static int setup_kmem_cache_node(struct kmem_cache *cachep,
868c3d332b6SJoonsoo Kim int node, gfp_t gfp, bool force_change)
869c3d332b6SJoonsoo Kim {
870c3d332b6SJoonsoo Kim int ret = -ENOMEM;
871c3d332b6SJoonsoo Kim struct kmem_cache_node *n;
872c3d332b6SJoonsoo Kim struct array_cache *old_shared = NULL;
873c3d332b6SJoonsoo Kim struct array_cache *new_shared = NULL;
874c3d332b6SJoonsoo Kim struct alien_cache **new_alien = NULL;
875c3d332b6SJoonsoo Kim LIST_HEAD(list);
876c3d332b6SJoonsoo Kim
877c3d332b6SJoonsoo Kim if (use_alien_caches) {
878c3d332b6SJoonsoo Kim new_alien = alloc_alien_cache(node, cachep->limit, gfp);
879c3d332b6SJoonsoo Kim if (!new_alien)
880c3d332b6SJoonsoo Kim goto fail;
881c3d332b6SJoonsoo Kim }
882c3d332b6SJoonsoo Kim
883c3d332b6SJoonsoo Kim if (cachep->shared) {
884c3d332b6SJoonsoo Kim new_shared = alloc_arraycache(node,
885c3d332b6SJoonsoo Kim cachep->shared * cachep->batchcount, 0xbaadf00d, gfp);
886c3d332b6SJoonsoo Kim if (!new_shared)
887c3d332b6SJoonsoo Kim goto fail;
888c3d332b6SJoonsoo Kim }
889c3d332b6SJoonsoo Kim
890c3d332b6SJoonsoo Kim ret = init_cache_node(cachep, node, gfp);
891c3d332b6SJoonsoo Kim if (ret)
892c3d332b6SJoonsoo Kim goto fail;
893c3d332b6SJoonsoo Kim
894c3d332b6SJoonsoo Kim n = get_node(cachep, node);
895b539ce9fSJiri Kosina raw_spin_lock_irq(&n->list_lock);
896c3d332b6SJoonsoo Kim if (n->shared && force_change) {
897c3d332b6SJoonsoo Kim free_block(cachep, n->shared->entry,
898c3d332b6SJoonsoo Kim n->shared->avail, node, &list);
899c3d332b6SJoonsoo Kim n->shared->avail = 0;
900c3d332b6SJoonsoo Kim }
901c3d332b6SJoonsoo Kim
902c3d332b6SJoonsoo Kim if (!n->shared || force_change) {
903c3d332b6SJoonsoo Kim old_shared = n->shared;
904c3d332b6SJoonsoo Kim n->shared = new_shared;
905c3d332b6SJoonsoo Kim new_shared = NULL;
906c3d332b6SJoonsoo Kim }
907c3d332b6SJoonsoo Kim
908c3d332b6SJoonsoo Kim if (!n->alien) {
909c3d332b6SJoonsoo Kim n->alien = new_alien;
910c3d332b6SJoonsoo Kim new_alien = NULL;
911c3d332b6SJoonsoo Kim }
912c3d332b6SJoonsoo Kim
913b539ce9fSJiri Kosina raw_spin_unlock_irq(&n->list_lock);
914c3d332b6SJoonsoo Kim slabs_destroy(cachep, &list);
915c3d332b6SJoonsoo Kim
916801faf0dSJoonsoo Kim /*
917801faf0dSJoonsoo Kim * To protect lockless access to n->shared during irq disabled context.
918801faf0dSJoonsoo Kim * If n->shared isn't NULL in irq disabled context, accessing to it is
919801faf0dSJoonsoo Kim * guaranteed to be valid until irq is re-enabled, because it will be
9206564a25eSPaul E. McKenney * freed after synchronize_rcu().
921801faf0dSJoonsoo Kim */
92286d9f485SJoonsoo Kim if (old_shared && force_change)
9236564a25eSPaul E. McKenney synchronize_rcu();
924801faf0dSJoonsoo Kim
925c3d332b6SJoonsoo Kim fail:
926c3d332b6SJoonsoo Kim kfree(old_shared);
927c3d332b6SJoonsoo Kim kfree(new_shared);
928c3d332b6SJoonsoo Kim free_alien_cache(new_alien);
929c3d332b6SJoonsoo Kim
930c3d332b6SJoonsoo Kim return ret;
931c3d332b6SJoonsoo Kim }
932c3d332b6SJoonsoo Kim
9336731d4f1SSebastian Andrzej Siewior #ifdef CONFIG_SMP
9346731d4f1SSebastian Andrzej Siewior
cpuup_canceled(long cpu)9350db0628dSPaul Gortmaker static void cpuup_canceled(long cpu)
9361da177e4SLinus Torvalds {
937fbf1e473SAkinobu Mita struct kmem_cache *cachep;
938ce8eb6c4SChristoph Lameter struct kmem_cache_node *n = NULL;
9397d6e6d09SLee Schermerhorn int node = cpu_to_mem(cpu);
940a70f7302SRusty Russell const struct cpumask *mask = cpumask_of_node(node);
941fbf1e473SAkinobu Mita
94218004c5dSChristoph Lameter list_for_each_entry(cachep, &slab_caches, list) {
943fbf1e473SAkinobu Mita struct array_cache *nc;
944fbf1e473SAkinobu Mita struct array_cache *shared;
945c8522a3aSJoonsoo Kim struct alien_cache **alien;
94697654dfaSJoonsoo Kim LIST_HEAD(list);
947fbf1e473SAkinobu Mita
94818bf8541SChristoph Lameter n = get_node(cachep, node);
949ce8eb6c4SChristoph Lameter if (!n)
950bf0dea23SJoonsoo Kim continue;
951fbf1e473SAkinobu Mita
952b539ce9fSJiri Kosina raw_spin_lock_irq(&n->list_lock);
953fbf1e473SAkinobu Mita
954ce8eb6c4SChristoph Lameter /* Free limit for this kmem_cache_node */
955ce8eb6c4SChristoph Lameter n->free_limit -= cachep->batchcount;
956bf0dea23SJoonsoo Kim
957bf0dea23SJoonsoo Kim /* cpu is dead; no one can alloc from it. */
958bf0dea23SJoonsoo Kim nc = per_cpu_ptr(cachep->cpu_cache, cpu);
95997654dfaSJoonsoo Kim free_block(cachep, nc->entry, nc->avail, node, &list);
960bf0dea23SJoonsoo Kim nc->avail = 0;
961fbf1e473SAkinobu Mita
96258463c1fSRusty Russell if (!cpumask_empty(mask)) {
963b539ce9fSJiri Kosina raw_spin_unlock_irq(&n->list_lock);
964bf0dea23SJoonsoo Kim goto free_slab;
965fbf1e473SAkinobu Mita }
966fbf1e473SAkinobu Mita
967ce8eb6c4SChristoph Lameter shared = n->shared;
968fbf1e473SAkinobu Mita if (shared) {
969fbf1e473SAkinobu Mita free_block(cachep, shared->entry,
97097654dfaSJoonsoo Kim shared->avail, node, &list);
971ce8eb6c4SChristoph Lameter n->shared = NULL;
972fbf1e473SAkinobu Mita }
973fbf1e473SAkinobu Mita
974ce8eb6c4SChristoph Lameter alien = n->alien;
975ce8eb6c4SChristoph Lameter n->alien = NULL;
976fbf1e473SAkinobu Mita
977b539ce9fSJiri Kosina raw_spin_unlock_irq(&n->list_lock);
978fbf1e473SAkinobu Mita
979fbf1e473SAkinobu Mita kfree(shared);
980fbf1e473SAkinobu Mita if (alien) {
981fbf1e473SAkinobu Mita drain_alien_cache(cachep, alien);
982fbf1e473SAkinobu Mita free_alien_cache(alien);
983fbf1e473SAkinobu Mita }
984bf0dea23SJoonsoo Kim
985bf0dea23SJoonsoo Kim free_slab:
98697654dfaSJoonsoo Kim slabs_destroy(cachep, &list);
987fbf1e473SAkinobu Mita }
988fbf1e473SAkinobu Mita /*
989fbf1e473SAkinobu Mita * In the previous loop, all the objects were freed to
990fbf1e473SAkinobu Mita * the respective cache's slabs, now we can go ahead and
991fbf1e473SAkinobu Mita * shrink each nodelist to its limit.
992fbf1e473SAkinobu Mita */
99318004c5dSChristoph Lameter list_for_each_entry(cachep, &slab_caches, list) {
99418bf8541SChristoph Lameter n = get_node(cachep, node);
995ce8eb6c4SChristoph Lameter if (!n)
996fbf1e473SAkinobu Mita continue;
997a5aa63a5SJoonsoo Kim drain_freelist(cachep, n, INT_MAX);
998fbf1e473SAkinobu Mita }
999fbf1e473SAkinobu Mita }
1000fbf1e473SAkinobu Mita
cpuup_prepare(long cpu)10010db0628dSPaul Gortmaker static int cpuup_prepare(long cpu)
1002fbf1e473SAkinobu Mita {
1003343e0d7aSPekka Enberg struct kmem_cache *cachep;
10047d6e6d09SLee Schermerhorn int node = cpu_to_mem(cpu);
10058f9f8d9eSDavid Rientjes int err;
10061da177e4SLinus Torvalds
1007a737b3e2SAndrew Morton /*
1008a737b3e2SAndrew Morton * We need to do this right in the beginning since
1009e498be7dSChristoph Lameter * alloc_arraycache's are going to use this list.
1010e498be7dSChristoph Lameter * kmalloc_node allows us to add the slab to the right
1011ce8eb6c4SChristoph Lameter * kmem_cache_node and not this cpu's kmem_cache_node
1012e498be7dSChristoph Lameter */
10136a67368cSChristoph Lameter err = init_cache_node_node(node);
10148f9f8d9eSDavid Rientjes if (err < 0)
1015e498be7dSChristoph Lameter goto bad;
1016e498be7dSChristoph Lameter
1017a737b3e2SAndrew Morton /*
1018a737b3e2SAndrew Morton * Now we can go ahead with allocating the shared arrays and
1019a737b3e2SAndrew Morton * array caches
1020a737b3e2SAndrew Morton */
102118004c5dSChristoph Lameter list_for_each_entry(cachep, &slab_caches, list) {
1022c3d332b6SJoonsoo Kim err = setup_kmem_cache_node(cachep, node, GFP_KERNEL, false);
1023c3d332b6SJoonsoo Kim if (err)
10244484ebf1SRavikiran G Thirumalai goto bad;
102563109846SEric Dumazet }
1026ce79ddc8SPekka Enberg
1027fbf1e473SAkinobu Mita return 0;
1028fbf1e473SAkinobu Mita bad:
102912d00f6aSAkinobu Mita cpuup_canceled(cpu);
1030fbf1e473SAkinobu Mita return -ENOMEM;
1031fbf1e473SAkinobu Mita }
1032fbf1e473SAkinobu Mita
slab_prepare_cpu(unsigned int cpu)10336731d4f1SSebastian Andrzej Siewior int slab_prepare_cpu(unsigned int cpu)
1034fbf1e473SAkinobu Mita {
10356731d4f1SSebastian Andrzej Siewior int err;
1036fbf1e473SAkinobu Mita
103718004c5dSChristoph Lameter mutex_lock(&slab_mutex);
1038fbf1e473SAkinobu Mita err = cpuup_prepare(cpu);
103918004c5dSChristoph Lameter mutex_unlock(&slab_mutex);
10406731d4f1SSebastian Andrzej Siewior return err;
10416731d4f1SSebastian Andrzej Siewior }
10426731d4f1SSebastian Andrzej Siewior
10435830c590SChristoph Lameter /*
10446731d4f1SSebastian Andrzej Siewior * This is called for a failed online attempt and for a successful
10456731d4f1SSebastian Andrzej Siewior * offline.
10466731d4f1SSebastian Andrzej Siewior *
10476731d4f1SSebastian Andrzej Siewior * Even if all the cpus of a node are down, we don't free the
1048a8f23dd1SYixuan Cao * kmem_cache_node of any cache. This is to avoid a race between cpu_down, and
10496731d4f1SSebastian Andrzej Siewior * a kmalloc allocation from another cpu for memory from the node of
105070b6d25eSChen Tao * the cpu going down. The kmem_cache_node structure is usually allocated from
10516731d4f1SSebastian Andrzej Siewior * kmem_cache_create() and gets destroyed at kmem_cache_destroy().
10526731d4f1SSebastian Andrzej Siewior */
slab_dead_cpu(unsigned int cpu)10536731d4f1SSebastian Andrzej Siewior int slab_dead_cpu(unsigned int cpu)
10546731d4f1SSebastian Andrzej Siewior {
10556731d4f1SSebastian Andrzej Siewior mutex_lock(&slab_mutex);
10566731d4f1SSebastian Andrzej Siewior cpuup_canceled(cpu);
10576731d4f1SSebastian Andrzej Siewior mutex_unlock(&slab_mutex);
10586731d4f1SSebastian Andrzej Siewior return 0;
10596731d4f1SSebastian Andrzej Siewior }
10606731d4f1SSebastian Andrzej Siewior #endif
10616731d4f1SSebastian Andrzej Siewior
slab_online_cpu(unsigned int cpu)10626731d4f1SSebastian Andrzej Siewior static int slab_online_cpu(unsigned int cpu)
10636731d4f1SSebastian Andrzej Siewior {
10646731d4f1SSebastian Andrzej Siewior start_cpu_timer(cpu);
10656731d4f1SSebastian Andrzej Siewior return 0;
10666731d4f1SSebastian Andrzej Siewior }
10676731d4f1SSebastian Andrzej Siewior
slab_offline_cpu(unsigned int cpu)10686731d4f1SSebastian Andrzej Siewior static int slab_offline_cpu(unsigned int cpu)
10696731d4f1SSebastian Andrzej Siewior {
10706731d4f1SSebastian Andrzej Siewior /*
10716731d4f1SSebastian Andrzej Siewior * Shutdown cache reaper. Note that the slab_mutex is held so
10726731d4f1SSebastian Andrzej Siewior * that if cache_reap() is invoked it cannot do anything
10736731d4f1SSebastian Andrzej Siewior * expensive but will only modify reap_work and reschedule the
10746731d4f1SSebastian Andrzej Siewior * timer.
10755830c590SChristoph Lameter */
1076afe2c511STejun Heo cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu));
10775830c590SChristoph Lameter /* Now the cache_reaper is guaranteed to be not running. */
10781871e52cSTejun Heo per_cpu(slab_reap_work, cpu).work.func = NULL;
10796731d4f1SSebastian Andrzej Siewior return 0;
10801da177e4SLinus Torvalds }
10811da177e4SLinus Torvalds
108276af6a05SDave Hansen #if defined(CONFIG_NUMA)
10838f9f8d9eSDavid Rientjes /*
10848f9f8d9eSDavid Rientjes * Drains freelist for a node on each slab cache, used for memory hot-remove.
10858f9f8d9eSDavid Rientjes * Returns -EBUSY if all objects cannot be drained so that the node is not
10868f9f8d9eSDavid Rientjes * removed.
10878f9f8d9eSDavid Rientjes *
108818004c5dSChristoph Lameter * Must hold slab_mutex.
10898f9f8d9eSDavid Rientjes */
drain_cache_node_node(int node)10906a67368cSChristoph Lameter static int __meminit drain_cache_node_node(int node)
10918f9f8d9eSDavid Rientjes {
10928f9f8d9eSDavid Rientjes struct kmem_cache *cachep;
10938f9f8d9eSDavid Rientjes int ret = 0;
10948f9f8d9eSDavid Rientjes
109518004c5dSChristoph Lameter list_for_each_entry(cachep, &slab_caches, list) {
1096ce8eb6c4SChristoph Lameter struct kmem_cache_node *n;
10978f9f8d9eSDavid Rientjes
109818bf8541SChristoph Lameter n = get_node(cachep, node);
1099ce8eb6c4SChristoph Lameter if (!n)
11008f9f8d9eSDavid Rientjes continue;
11018f9f8d9eSDavid Rientjes
1102a5aa63a5SJoonsoo Kim drain_freelist(cachep, n, INT_MAX);
11038f9f8d9eSDavid Rientjes
1104ce8eb6c4SChristoph Lameter if (!list_empty(&n->slabs_full) ||
1105ce8eb6c4SChristoph Lameter !list_empty(&n->slabs_partial)) {
11068f9f8d9eSDavid Rientjes ret = -EBUSY;
11078f9f8d9eSDavid Rientjes break;
11088f9f8d9eSDavid Rientjes }
11098f9f8d9eSDavid Rientjes }
11108f9f8d9eSDavid Rientjes return ret;
11118f9f8d9eSDavid Rientjes }
11128f9f8d9eSDavid Rientjes
slab_memory_callback(struct notifier_block * self,unsigned long action,void * arg)11138f9f8d9eSDavid Rientjes static int __meminit slab_memory_callback(struct notifier_block *self,
11148f9f8d9eSDavid Rientjes unsigned long action, void *arg)
11158f9f8d9eSDavid Rientjes {
11168f9f8d9eSDavid Rientjes struct memory_notify *mnb = arg;
11178f9f8d9eSDavid Rientjes int ret = 0;
11188f9f8d9eSDavid Rientjes int nid;
11198f9f8d9eSDavid Rientjes
11208f9f8d9eSDavid Rientjes nid = mnb->status_change_nid;
11218f9f8d9eSDavid Rientjes if (nid < 0)
11228f9f8d9eSDavid Rientjes goto out;
11238f9f8d9eSDavid Rientjes
11248f9f8d9eSDavid Rientjes switch (action) {
11258f9f8d9eSDavid Rientjes case MEM_GOING_ONLINE:
112618004c5dSChristoph Lameter mutex_lock(&slab_mutex);
11276a67368cSChristoph Lameter ret = init_cache_node_node(nid);
112818004c5dSChristoph Lameter mutex_unlock(&slab_mutex);
11298f9f8d9eSDavid Rientjes break;
11308f9f8d9eSDavid Rientjes case MEM_GOING_OFFLINE:
113118004c5dSChristoph Lameter mutex_lock(&slab_mutex);
11326a67368cSChristoph Lameter ret = drain_cache_node_node(nid);
113318004c5dSChristoph Lameter mutex_unlock(&slab_mutex);
11348f9f8d9eSDavid Rientjes break;
11358f9f8d9eSDavid Rientjes case MEM_ONLINE:
11368f9f8d9eSDavid Rientjes case MEM_OFFLINE:
11378f9f8d9eSDavid Rientjes case MEM_CANCEL_ONLINE:
11388f9f8d9eSDavid Rientjes case MEM_CANCEL_OFFLINE:
11398f9f8d9eSDavid Rientjes break;
11408f9f8d9eSDavid Rientjes }
11418f9f8d9eSDavid Rientjes out:
11425fda1bd5SPrarit Bhargava return notifier_from_errno(ret);
11438f9f8d9eSDavid Rientjes }
114476af6a05SDave Hansen #endif /* CONFIG_NUMA */
11458f9f8d9eSDavid Rientjes
1146e498be7dSChristoph Lameter /*
1147ce8eb6c4SChristoph Lameter * swap the static kmem_cache_node with kmalloced memory
1148e498be7dSChristoph Lameter */
init_list(struct kmem_cache * cachep,struct kmem_cache_node * list,int nodeid)11496744f087SChristoph Lameter static void __init init_list(struct kmem_cache *cachep, struct kmem_cache_node *list,
1150a737b3e2SAndrew Morton int nodeid)
1151e498be7dSChristoph Lameter {
11526744f087SChristoph Lameter struct kmem_cache_node *ptr;
1153e498be7dSChristoph Lameter
11546744f087SChristoph Lameter ptr = kmalloc_node(sizeof(struct kmem_cache_node), GFP_NOWAIT, nodeid);
1155e498be7dSChristoph Lameter BUG_ON(!ptr);
1156e498be7dSChristoph Lameter
11576744f087SChristoph Lameter memcpy(ptr, list, sizeof(struct kmem_cache_node));
11582b2d5493SIngo Molnar /*
11592b2d5493SIngo Molnar * Do not assume that spinlocks can be initialized via memcpy:
11602b2d5493SIngo Molnar */
1161b539ce9fSJiri Kosina raw_spin_lock_init(&ptr->list_lock);
11622b2d5493SIngo Molnar
1163e498be7dSChristoph Lameter MAKE_ALL_LISTS(cachep, ptr, nodeid);
11646a67368cSChristoph Lameter cachep->node[nodeid] = ptr;
1165e498be7dSChristoph Lameter }
1166e498be7dSChristoph Lameter
1167a737b3e2SAndrew Morton /*
1168ce8eb6c4SChristoph Lameter * For setting up all the kmem_cache_node for cache whose buffer_size is same as
1169ce8eb6c4SChristoph Lameter * size of kmem_cache_node.
1170556a169dSPekka Enberg */
set_up_node(struct kmem_cache * cachep,int index)1171ce8eb6c4SChristoph Lameter static void __init set_up_node(struct kmem_cache *cachep, int index)
1172556a169dSPekka Enberg {
1173556a169dSPekka Enberg int node;
1174556a169dSPekka Enberg
1175556a169dSPekka Enberg for_each_online_node(node) {
1176ce8eb6c4SChristoph Lameter cachep->node[node] = &init_kmem_cache_node[index + node];
11776a67368cSChristoph Lameter cachep->node[node]->next_reap = jiffies +
11785f0985bbSJianyu Zhan REAPTIMEOUT_NODE +
11795f0985bbSJianyu Zhan ((unsigned long)cachep) % REAPTIMEOUT_NODE;
1180556a169dSPekka Enberg }
1181556a169dSPekka Enberg }
1182556a169dSPekka Enberg
1183556a169dSPekka Enberg /*
1184a737b3e2SAndrew Morton * Initialisation. Called after the page allocator have been initialised and
1185a737b3e2SAndrew Morton * before smp_init().
11861da177e4SLinus Torvalds */
kmem_cache_init(void)11871da177e4SLinus Torvalds void __init kmem_cache_init(void)
11881da177e4SLinus Torvalds {
1189e498be7dSChristoph Lameter int i;
1190e498be7dSChristoph Lameter
11919b030cb8SChristoph Lameter kmem_cache = &kmem_cache_boot;
11929b030cb8SChristoph Lameter
11938888177eSJoonsoo Kim if (!IS_ENABLED(CONFIG_NUMA) || num_possible_nodes() == 1)
119462918a03SSiddha, Suresh B use_alien_caches = 0;
119562918a03SSiddha, Suresh B
11963c583465SChristoph Lameter for (i = 0; i < NUM_INIT_LISTS; i++)
1197ce8eb6c4SChristoph Lameter kmem_cache_node_init(&init_kmem_cache_node[i]);
11983c583465SChristoph Lameter
11991da177e4SLinus Torvalds /*
12001da177e4SLinus Torvalds * Fragmentation resistance on low memory - only use bigger
12013df1cccdSDavid Rientjes * page orders on machines with more than 32MB of memory if
12023df1cccdSDavid Rientjes * not overridden on the command line.
12031da177e4SLinus Torvalds */
1204ca79b0c2SArun KS if (!slab_max_order_set && totalram_pages() > (32 << 20) >> PAGE_SHIFT)
1205543585ccSDavid Rientjes slab_max_order = SLAB_MAX_ORDER_HI;
12061da177e4SLinus Torvalds
12071da177e4SLinus Torvalds /* Bootstrap is tricky, because several objects are allocated
12081da177e4SLinus Torvalds * from caches that do not exist yet:
12099b030cb8SChristoph Lameter * 1) initialize the kmem_cache cache: it contains the struct
12109b030cb8SChristoph Lameter * kmem_cache structures of all caches, except kmem_cache itself:
12119b030cb8SChristoph Lameter * kmem_cache is statically allocated.
1212e498be7dSChristoph Lameter * Initially an __init data area is used for the head array and the
1213ce8eb6c4SChristoph Lameter * kmem_cache_node structures, it's replaced with a kmalloc allocated
1214e498be7dSChristoph Lameter * array at the end of the bootstrap.
12151da177e4SLinus Torvalds * 2) Create the first kmalloc cache.
1216343e0d7aSPekka Enberg * The struct kmem_cache for the new cache is allocated normally.
1217e498be7dSChristoph Lameter * An __init data area is used for the head array.
1218e498be7dSChristoph Lameter * 3) Create the remaining kmalloc caches, with minimally sized
1219e498be7dSChristoph Lameter * head arrays.
12209b030cb8SChristoph Lameter * 4) Replace the __init data head arrays for kmem_cache and the first
12211da177e4SLinus Torvalds * kmalloc cache with kmalloc allocated arrays.
1222ce8eb6c4SChristoph Lameter * 5) Replace the __init data for kmem_cache_node for kmem_cache and
1223e498be7dSChristoph Lameter * the other cache's with kmalloc allocated memory.
1224e498be7dSChristoph Lameter * 6) Resize the head arrays of the kmalloc caches to their final sizes.
12251da177e4SLinus Torvalds */
12261da177e4SLinus Torvalds
12279b030cb8SChristoph Lameter /* 1) create the kmem_cache */
12281da177e4SLinus Torvalds
12298da3430dSEric Dumazet /*
1230b56efcf0SEric Dumazet * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
12318da3430dSEric Dumazet */
12322f9baa9fSChristoph Lameter create_boot_cache(kmem_cache, "kmem_cache",
1233bf0dea23SJoonsoo Kim offsetof(struct kmem_cache, node) +
12346744f087SChristoph Lameter nr_node_ids * sizeof(struct kmem_cache_node *),
12358eb8284bSDavid Windsor SLAB_HWCACHE_ALIGN, 0, 0);
12362f9baa9fSChristoph Lameter list_add(&kmem_cache->list, &slab_caches);
1237bf0dea23SJoonsoo Kim slab_state = PARTIAL;
12381da177e4SLinus Torvalds
1239a737b3e2SAndrew Morton /*
1240bf0dea23SJoonsoo Kim * Initialize the caches that provide memory for the kmem_cache_node
1241bf0dea23SJoonsoo Kim * structures first. Without this, further allocations will bug.
1242e498be7dSChristoph Lameter */
12430c474d31SCatalin Marinas new_kmalloc_cache(INDEX_NODE, KMALLOC_NORMAL, ARCH_KMALLOC_FLAGS);
1244bf0dea23SJoonsoo Kim slab_state = PARTIAL_NODE;
124534cc6990SDaniel Sanders setup_kmalloc_cache_index_table();
1246e498be7dSChristoph Lameter
1247ce8eb6c4SChristoph Lameter /* 5) Replace the bootstrap kmem_cache_node */
1248e498be7dSChristoph Lameter {
12491ca4cb24SPekka Enberg int nid;
12501da177e4SLinus Torvalds
12519c09a95cSMel Gorman for_each_online_node(nid) {
1252ce8eb6c4SChristoph Lameter init_list(kmem_cache, &init_kmem_cache_node[CACHE_CACHE + nid], nid);
1253556a169dSPekka Enberg
1254cc252eaeSVlastimil Babka init_list(kmalloc_caches[KMALLOC_NORMAL][INDEX_NODE],
1255ce8eb6c4SChristoph Lameter &init_kmem_cache_node[SIZE_NODE + nid], nid);
1256e498be7dSChristoph Lameter }
1257e498be7dSChristoph Lameter }
1258e498be7dSChristoph Lameter
1259f97d5f63SChristoph Lameter create_kmalloc_caches(ARCH_KMALLOC_FLAGS);
12608429db5cSPekka Enberg }
12618429db5cSPekka Enberg
kmem_cache_init_late(void)12628429db5cSPekka Enberg void __init kmem_cache_init_late(void)
12631da177e4SLinus Torvalds {
1264343e0d7aSPekka Enberg struct kmem_cache *cachep;
12658429db5cSPekka Enberg
12668429db5cSPekka Enberg /* 6) resize the head arrays to their final sizes */
126718004c5dSChristoph Lameter mutex_lock(&slab_mutex);
126818004c5dSChristoph Lameter list_for_each_entry(cachep, &slab_caches, list)
126983b519e8SPekka Enberg if (enable_cpucache(cachep, GFP_NOWAIT))
12702ed3a4efSChristoph Lameter BUG();
127118004c5dSChristoph Lameter mutex_unlock(&slab_mutex);
1272056c6241SRavikiran G Thirumalai
127397d06609SChristoph Lameter /* Done! */
127497d06609SChristoph Lameter slab_state = FULL;
127597d06609SChristoph Lameter
12768f9f8d9eSDavid Rientjes #ifdef CONFIG_NUMA
12778f9f8d9eSDavid Rientjes /*
12788f9f8d9eSDavid Rientjes * Register a memory hotplug callback that initializes and frees
12796a67368cSChristoph Lameter * node.
12808f9f8d9eSDavid Rientjes */
12818f9f8d9eSDavid Rientjes hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
12828f9f8d9eSDavid Rientjes #endif
12838f9f8d9eSDavid Rientjes
1284a737b3e2SAndrew Morton /*
1285a737b3e2SAndrew Morton * The reap timers are started later, with a module init call: That part
1286a737b3e2SAndrew Morton * of the kernel is not yet operational.
12871da177e4SLinus Torvalds */
12881da177e4SLinus Torvalds }
12891da177e4SLinus Torvalds
cpucache_init(void)12901da177e4SLinus Torvalds static int __init cpucache_init(void)
12911da177e4SLinus Torvalds {
12926731d4f1SSebastian Andrzej Siewior int ret;
12931da177e4SLinus Torvalds
12941da177e4SLinus Torvalds /*
1295a737b3e2SAndrew Morton * Register the timers that return unneeded pages to the page allocator
12961da177e4SLinus Torvalds */
12976731d4f1SSebastian Andrzej Siewior ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "SLAB online",
12986731d4f1SSebastian Andrzej Siewior slab_online_cpu, slab_offline_cpu);
12996731d4f1SSebastian Andrzej Siewior WARN_ON(ret < 0);
1300a164f896SGlauber Costa
13011da177e4SLinus Torvalds return 0;
13021da177e4SLinus Torvalds }
13031da177e4SLinus Torvalds __initcall(cpucache_init);
13041da177e4SLinus Torvalds
13058bdec192SRafael Aquini static noinline void
slab_out_of_memory(struct kmem_cache * cachep,gfp_t gfpflags,int nodeid)13068bdec192SRafael Aquini slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
13078bdec192SRafael Aquini {
13089a02d699SDavid Rientjes #if DEBUG
1309ce8eb6c4SChristoph Lameter struct kmem_cache_node *n;
13108bdec192SRafael Aquini unsigned long flags;
13118bdec192SRafael Aquini int node;
13129a02d699SDavid Rientjes static DEFINE_RATELIMIT_STATE(slab_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
13139a02d699SDavid Rientjes DEFAULT_RATELIMIT_BURST);
13149a02d699SDavid Rientjes
13159a02d699SDavid Rientjes if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slab_oom_rs))
13169a02d699SDavid Rientjes return;
13178bdec192SRafael Aquini
13185b3810e5SVlastimil Babka pr_warn("SLAB: Unable to allocate memory on node %d, gfp=%#x(%pGg)\n",
13195b3810e5SVlastimil Babka nodeid, gfpflags, &gfpflags);
13205b3810e5SVlastimil Babka pr_warn(" cache: %s, object size: %d, order: %d\n",
13213b0efdfaSChristoph Lameter cachep->name, cachep->size, cachep->gfporder);
13228bdec192SRafael Aquini
132318bf8541SChristoph Lameter for_each_kmem_cache_node(cachep, node, n) {
1324bf00bd34SDavid Rientjes unsigned long total_slabs, free_slabs, free_objs;
13258bdec192SRafael Aquini
1326b539ce9fSJiri Kosina raw_spin_lock_irqsave(&n->list_lock, flags);
1327bf00bd34SDavid Rientjes total_slabs = n->total_slabs;
1328bf00bd34SDavid Rientjes free_slabs = n->free_slabs;
1329bf00bd34SDavid Rientjes free_objs = n->free_objects;
1330b539ce9fSJiri Kosina raw_spin_unlock_irqrestore(&n->list_lock, flags);
13318bdec192SRafael Aquini
1332bf00bd34SDavid Rientjes pr_warn(" node %d: slabs: %ld/%ld, objs: %ld/%ld\n",
1333bf00bd34SDavid Rientjes node, total_slabs - free_slabs, total_slabs,
1334bf00bd34SDavid Rientjes (total_slabs * cachep->num) - free_objs,
1335bf00bd34SDavid Rientjes total_slabs * cachep->num);
13368bdec192SRafael Aquini }
13379a02d699SDavid Rientjes #endif
13388bdec192SRafael Aquini }
13398bdec192SRafael Aquini
13401da177e4SLinus Torvalds /*
13418a7d9b43SWang Sheng-Hui * Interface to system's page allocator. No need to hold the
13428a7d9b43SWang Sheng-Hui * kmem_cache_node ->list_lock.
13431da177e4SLinus Torvalds *
13441da177e4SLinus Torvalds * If we requested dmaable memory, we will get it. Even if we
13451da177e4SLinus Torvalds * did not request dmaable memory, we might get it, but that
13461da177e4SLinus Torvalds * would be relatively rare and ignorable.
13471da177e4SLinus Torvalds */
kmem_getpages(struct kmem_cache * cachep,gfp_t flags,int nodeid)134842c0faacSVlastimil Babka static struct slab *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
13490c3aa83eSJoonsoo Kim int nodeid)
13501da177e4SLinus Torvalds {
135142c0faacSVlastimil Babka struct folio *folio;
135242c0faacSVlastimil Babka struct slab *slab;
1353765c4507SChristoph Lameter
1354a618e89fSGlauber Costa flags |= cachep->allocflags;
1355e1b6aa6fSChristoph Hellwig
135642c0faacSVlastimil Babka folio = (struct folio *) __alloc_pages_node(nodeid, flags, cachep->gfporder);
135742c0faacSVlastimil Babka if (!folio) {
13588bdec192SRafael Aquini slab_out_of_memory(cachep, flags, nodeid);
13591da177e4SLinus Torvalds return NULL;
13608bdec192SRafael Aquini }
13611da177e4SLinus Torvalds
136242c0faacSVlastimil Babka slab = folio_slab(folio);
1363072bb0aaSMel Gorman
136442c0faacSVlastimil Babka account_slab(slab, cachep->gfporder, cachep, flags);
136542c0faacSVlastimil Babka __folio_set_slab(folio);
13668b881763SVlastimil Babka /* Make the flag visible before any changes to folio->mapping */
13678b881763SVlastimil Babka smp_wmb();
136842c0faacSVlastimil Babka /* Record if ALLOC_NO_WATERMARKS was set when allocating the slab */
136902d65d6fSSidhartha Kumar if (sk_memalloc_socks() && folio_is_pfmemalloc(folio))
137042c0faacSVlastimil Babka slab_set_pfmemalloc(slab);
137142c0faacSVlastimil Babka
137242c0faacSVlastimil Babka return slab;
13731da177e4SLinus Torvalds }
13741da177e4SLinus Torvalds
13751da177e4SLinus Torvalds /*
13761da177e4SLinus Torvalds * Interface to system's page release.
13771da177e4SLinus Torvalds */
kmem_freepages(struct kmem_cache * cachep,struct slab * slab)137842c0faacSVlastimil Babka static void kmem_freepages(struct kmem_cache *cachep, struct slab *slab)
13791da177e4SLinus Torvalds {
138027ee57c9SVladimir Davydov int order = cachep->gfporder;
138142c0faacSVlastimil Babka struct folio *folio = slab_folio(slab);
138273293c2fSJoonsoo Kim
138342c0faacSVlastimil Babka BUG_ON(!folio_test_slab(folio));
138442c0faacSVlastimil Babka __slab_clear_pfmemalloc(slab);
1385c034c6a4SSeongJae Park page_mapcount_reset(&folio->page);
138642c0faacSVlastimil Babka folio->mapping = NULL;
13878b881763SVlastimil Babka /* Make the mapping reset visible before clearing the flag */
13888b881763SVlastimil Babka smp_wmb();
13898b881763SVlastimil Babka __folio_clear_slab(folio);
13901f458cbfSGlauber Costa
1391c7b23b68SYosry Ahmed mm_account_reclaimed_pages(1 << order);
139242c0faacSVlastimil Babka unaccount_slab(slab, order, cachep);
1393c034c6a4SSeongJae Park __free_pages(&folio->page, order);
13941da177e4SLinus Torvalds }
13951da177e4SLinus Torvalds
kmem_rcu_free(struct rcu_head * head)13961da177e4SLinus Torvalds static void kmem_rcu_free(struct rcu_head *head)
13971da177e4SLinus Torvalds {
139868126702SJoonsoo Kim struct kmem_cache *cachep;
139942c0faacSVlastimil Babka struct slab *slab;
14001da177e4SLinus Torvalds
140142c0faacSVlastimil Babka slab = container_of(head, struct slab, rcu_head);
140242c0faacSVlastimil Babka cachep = slab->slab_cache;
140368126702SJoonsoo Kim
140442c0faacSVlastimil Babka kmem_freepages(cachep, slab);
14051da177e4SLinus Torvalds }
14061da177e4SLinus Torvalds
14071da177e4SLinus Torvalds #if DEBUG
is_debug_pagealloc_cache(struct kmem_cache * cachep)140881ce2ebdSlvqian static inline bool is_debug_pagealloc_cache(struct kmem_cache *cachep)
140940b44137SJoonsoo Kim {
141081ce2ebdSlvqian return debug_pagealloc_enabled_static() && OFF_SLAB(cachep) &&
141181ce2ebdSlvqian ((cachep->size % PAGE_SIZE) == 0);
141240b44137SJoonsoo Kim }
14131da177e4SLinus Torvalds
14141da177e4SLinus Torvalds #ifdef CONFIG_DEBUG_PAGEALLOC
slab_kernel_map(struct kmem_cache * cachep,void * objp,int map)141580552f0fSQian Cai static void slab_kernel_map(struct kmem_cache *cachep, void *objp, int map)
141640b44137SJoonsoo Kim {
141740b44137SJoonsoo Kim if (!is_debug_pagealloc_cache(cachep))
141840b44137SJoonsoo Kim return;
141940b44137SJoonsoo Kim
142077bc7fd6SMike Rapoport __kernel_map_pages(virt_to_page(objp), cachep->size / PAGE_SIZE, map);
142140b44137SJoonsoo Kim }
142240b44137SJoonsoo Kim
142340b44137SJoonsoo Kim #else
slab_kernel_map(struct kmem_cache * cachep,void * objp,int map)142440b44137SJoonsoo Kim static inline void slab_kernel_map(struct kmem_cache *cachep, void *objp,
142580552f0fSQian Cai int map) {}
142640b44137SJoonsoo Kim
14271da177e4SLinus Torvalds #endif
14281da177e4SLinus Torvalds
poison_obj(struct kmem_cache * cachep,void * addr,unsigned char val)1429343e0d7aSPekka Enberg static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)
14301da177e4SLinus Torvalds {
14318c138bc0SChristoph Lameter int size = cachep->object_size;
14323dafccf2SManfred Spraul addr = &((char *)addr)[obj_offset(cachep)];
14331da177e4SLinus Torvalds
14341da177e4SLinus Torvalds memset(addr, val, size);
14351da177e4SLinus Torvalds *(unsigned char *)(addr + size - 1) = POISON_END;
14361da177e4SLinus Torvalds }
14371da177e4SLinus Torvalds
dump_line(char * data,int offset,int limit)14381da177e4SLinus Torvalds static void dump_line(char *data, int offset, int limit)
14391da177e4SLinus Torvalds {
14401da177e4SLinus Torvalds int i;
1441aa83aa40SDave Jones unsigned char error = 0;
1442aa83aa40SDave Jones int bad_count = 0;
1443aa83aa40SDave Jones
14441170532bSJoe Perches pr_err("%03x: ", offset);
1445aa83aa40SDave Jones for (i = 0; i < limit; i++) {
1446aa83aa40SDave Jones if (data[offset + i] != POISON_FREE) {
1447aa83aa40SDave Jones error = data[offset + i];
1448aa83aa40SDave Jones bad_count++;
1449aa83aa40SDave Jones }
1450aa83aa40SDave Jones }
1451fdde6abbSSebastian Andrzej Siewior print_hex_dump(KERN_CONT, "", 0, 16, 1,
1452fdde6abbSSebastian Andrzej Siewior &data[offset], limit, 1);
1453aa83aa40SDave Jones
1454aa83aa40SDave Jones if (bad_count == 1) {
1455aa83aa40SDave Jones error ^= POISON_FREE;
1456aa83aa40SDave Jones if (!(error & (error - 1))) {
14571170532bSJoe Perches pr_err("Single bit error detected. Probably bad RAM.\n");
1458aa83aa40SDave Jones #ifdef CONFIG_X86
14591170532bSJoe Perches pr_err("Run memtest86+ or a similar memory test tool.\n");
1460aa83aa40SDave Jones #else
14611170532bSJoe Perches pr_err("Run a memory test tool.\n");
1462aa83aa40SDave Jones #endif
1463aa83aa40SDave Jones }
1464aa83aa40SDave Jones }
14651da177e4SLinus Torvalds }
14661da177e4SLinus Torvalds #endif
14671da177e4SLinus Torvalds
14681da177e4SLinus Torvalds #if DEBUG
14691da177e4SLinus Torvalds
print_objinfo(struct kmem_cache * cachep,void * objp,int lines)1470343e0d7aSPekka Enberg static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
14711da177e4SLinus Torvalds {
14721da177e4SLinus Torvalds int i, size;
14731da177e4SLinus Torvalds char *realobj;
14741da177e4SLinus Torvalds
14751da177e4SLinus Torvalds if (cachep->flags & SLAB_RED_ZONE) {
14761170532bSJoe Perches pr_err("Redzone: 0x%llx/0x%llx\n",
14771da177e4SLinus Torvalds *dbg_redzone1(cachep, objp),
14781da177e4SLinus Torvalds *dbg_redzone2(cachep, objp));
14791da177e4SLinus Torvalds }
14801da177e4SLinus Torvalds
148185c3e4a5SGeert Uytterhoeven if (cachep->flags & SLAB_STORE_USER)
148285c3e4a5SGeert Uytterhoeven pr_err("Last user: (%pSR)\n", *dbg_userword(cachep, objp));
14833dafccf2SManfred Spraul realobj = (char *)objp + obj_offset(cachep);
14848c138bc0SChristoph Lameter size = cachep->object_size;
14851da177e4SLinus Torvalds for (i = 0; i < size && lines; i += 16, lines--) {
14861da177e4SLinus Torvalds int limit;
14871da177e4SLinus Torvalds limit = 16;
14881da177e4SLinus Torvalds if (i + limit > size)
14891da177e4SLinus Torvalds limit = size - i;
14901da177e4SLinus Torvalds dump_line(realobj, i, limit);
14911da177e4SLinus Torvalds }
14921da177e4SLinus Torvalds }
14931da177e4SLinus Torvalds
check_poison_obj(struct kmem_cache * cachep,void * objp)1494343e0d7aSPekka Enberg static void check_poison_obj(struct kmem_cache *cachep, void *objp)
14951da177e4SLinus Torvalds {
14961da177e4SLinus Torvalds char *realobj;
14971da177e4SLinus Torvalds int size, i;
14981da177e4SLinus Torvalds int lines = 0;
14991da177e4SLinus Torvalds
150040b44137SJoonsoo Kim if (is_debug_pagealloc_cache(cachep))
150140b44137SJoonsoo Kim return;
150240b44137SJoonsoo Kim
15033dafccf2SManfred Spraul realobj = (char *)objp + obj_offset(cachep);
15048c138bc0SChristoph Lameter size = cachep->object_size;
15051da177e4SLinus Torvalds
15061da177e4SLinus Torvalds for (i = 0; i < size; i++) {
15071da177e4SLinus Torvalds char exp = POISON_FREE;
15081da177e4SLinus Torvalds if (i == size - 1)
15091da177e4SLinus Torvalds exp = POISON_END;
15101da177e4SLinus Torvalds if (realobj[i] != exp) {
15111da177e4SLinus Torvalds int limit;
15121da177e4SLinus Torvalds /* Mismatch ! */
15131da177e4SLinus Torvalds /* Print header */
15141da177e4SLinus Torvalds if (lines == 0) {
151585c3e4a5SGeert Uytterhoeven pr_err("Slab corruption (%s): %s start=%px, len=%d\n",
15161170532bSJoe Perches print_tainted(), cachep->name,
15171170532bSJoe Perches realobj, size);
15181da177e4SLinus Torvalds print_objinfo(cachep, objp, 0);
15191da177e4SLinus Torvalds }
15201da177e4SLinus Torvalds /* Hexdump the affected line */
15211da177e4SLinus Torvalds i = (i / 16) * 16;
15221da177e4SLinus Torvalds limit = 16;
15231da177e4SLinus Torvalds if (i + limit > size)
15241da177e4SLinus Torvalds limit = size - i;
15251da177e4SLinus Torvalds dump_line(realobj, i, limit);
15261da177e4SLinus Torvalds i += 16;
15271da177e4SLinus Torvalds lines++;
15281da177e4SLinus Torvalds /* Limit to 5 lines */
15291da177e4SLinus Torvalds if (lines > 5)
15301da177e4SLinus Torvalds break;
15311da177e4SLinus Torvalds }
15321da177e4SLinus Torvalds }
15331da177e4SLinus Torvalds if (lines != 0) {
15341da177e4SLinus Torvalds /* Print some data about the neighboring objects, if they
15351da177e4SLinus Torvalds * exist:
15361da177e4SLinus Torvalds */
15377981e67eSVlastimil Babka struct slab *slab = virt_to_slab(objp);
15388fea4e96SPekka Enberg unsigned int objnr;
15391da177e4SLinus Torvalds
154040f3bf0cSVlastimil Babka objnr = obj_to_index(cachep, slab, objp);
15411da177e4SLinus Torvalds if (objnr) {
15427981e67eSVlastimil Babka objp = index_to_obj(cachep, slab, objnr - 1);
15433dafccf2SManfred Spraul realobj = (char *)objp + obj_offset(cachep);
154485c3e4a5SGeert Uytterhoeven pr_err("Prev obj: start=%px, len=%d\n", realobj, size);
15451da177e4SLinus Torvalds print_objinfo(cachep, objp, 2);
15461da177e4SLinus Torvalds }
15471da177e4SLinus Torvalds if (objnr + 1 < cachep->num) {
15487981e67eSVlastimil Babka objp = index_to_obj(cachep, slab, objnr + 1);
15493dafccf2SManfred Spraul realobj = (char *)objp + obj_offset(cachep);
155085c3e4a5SGeert Uytterhoeven pr_err("Next obj: start=%px, len=%d\n", realobj, size);
15511da177e4SLinus Torvalds print_objinfo(cachep, objp, 2);
15521da177e4SLinus Torvalds }
15531da177e4SLinus Torvalds }
15541da177e4SLinus Torvalds }
15551da177e4SLinus Torvalds #endif
15561da177e4SLinus Torvalds
15571da177e4SLinus Torvalds #if DEBUG
slab_destroy_debugcheck(struct kmem_cache * cachep,struct slab * slab)15588456a648SJoonsoo Kim static void slab_destroy_debugcheck(struct kmem_cache *cachep,
15597981e67eSVlastimil Babka struct slab *slab)
156012dd36faSMatthew Dobson {
15611da177e4SLinus Torvalds int i;
1562b03a017bSJoonsoo Kim
1563b03a017bSJoonsoo Kim if (OBJFREELIST_SLAB(cachep) && cachep->flags & SLAB_POISON) {
15647981e67eSVlastimil Babka poison_obj(cachep, slab->freelist - obj_offset(cachep),
1565b03a017bSJoonsoo Kim POISON_FREE);
1566b03a017bSJoonsoo Kim }
1567b03a017bSJoonsoo Kim
15681da177e4SLinus Torvalds for (i = 0; i < cachep->num; i++) {
15697981e67eSVlastimil Babka void *objp = index_to_obj(cachep, slab, i);
15701da177e4SLinus Torvalds
15711da177e4SLinus Torvalds if (cachep->flags & SLAB_POISON) {
15721da177e4SLinus Torvalds check_poison_obj(cachep, objp);
157380552f0fSQian Cai slab_kernel_map(cachep, objp, 1);
15741da177e4SLinus Torvalds }
15751da177e4SLinus Torvalds if (cachep->flags & SLAB_RED_ZONE) {
15761da177e4SLinus Torvalds if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
1577756a025fSJoe Perches slab_error(cachep, "start of a freed object was overwritten");
15781da177e4SLinus Torvalds if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
1579756a025fSJoe Perches slab_error(cachep, "end of a freed object was overwritten");
15801da177e4SLinus Torvalds }
15811da177e4SLinus Torvalds }
158212dd36faSMatthew Dobson }
15831da177e4SLinus Torvalds #else
slab_destroy_debugcheck(struct kmem_cache * cachep,struct slab * slab)15848456a648SJoonsoo Kim static void slab_destroy_debugcheck(struct kmem_cache *cachep,
15857981e67eSVlastimil Babka struct slab *slab)
158612dd36faSMatthew Dobson {
158712dd36faSMatthew Dobson }
15881da177e4SLinus Torvalds #endif
15891da177e4SLinus Torvalds
1590911851e6SRandy Dunlap /**
1591911851e6SRandy Dunlap * slab_destroy - destroy and release all objects in a slab
1592911851e6SRandy Dunlap * @cachep: cache pointer being destroyed
1593dd35f71aSVlastimil Babka * @slab: slab being destroyed
1594911851e6SRandy Dunlap *
1595dd35f71aSVlastimil Babka * Destroy all the objs in a slab, and release the mem back to the system.
1596dd35f71aSVlastimil Babka * Before calling the slab must have been unlinked from the cache. The
15978a7d9b43SWang Sheng-Hui * kmem_cache_node ->list_lock is not held/needed.
159812dd36faSMatthew Dobson */
slab_destroy(struct kmem_cache * cachep,struct slab * slab)15997981e67eSVlastimil Babka static void slab_destroy(struct kmem_cache *cachep, struct slab *slab)
160012dd36faSMatthew Dobson {
16017e007355SJoonsoo Kim void *freelist;
160212dd36faSMatthew Dobson
16037981e67eSVlastimil Babka freelist = slab->freelist;
16047981e67eSVlastimil Babka slab_destroy_debugcheck(cachep, slab);
16055f0d5a3aSPaul E. McKenney if (unlikely(cachep->flags & SLAB_TYPESAFE_BY_RCU))
16067981e67eSVlastimil Babka call_rcu(&slab->rcu_head, kmem_rcu_free);
1607bc4f610dSKirill A. Shutemov else
16087981e67eSVlastimil Babka kmem_freepages(cachep, slab);
160968126702SJoonsoo Kim
161068126702SJoonsoo Kim /*
16118456a648SJoonsoo Kim * From now on, we don't use freelist
161268126702SJoonsoo Kim * although actual page can be freed in rcu context
161368126702SJoonsoo Kim */
1614873623dfSIngo Molnar if (OFF_SLAB(cachep))
1615e36ce448SHyeonggon Yoo kfree(freelist);
16161da177e4SLinus Torvalds }
16171da177e4SLinus Torvalds
1618678ff6a7SShakeel Butt /*
1619678ff6a7SShakeel Butt * Update the size of the caches before calling slabs_destroy as it may
1620678ff6a7SShakeel Butt * recursively call kfree.
1621678ff6a7SShakeel Butt */
slabs_destroy(struct kmem_cache * cachep,struct list_head * list)162297654dfaSJoonsoo Kim static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list)
162397654dfaSJoonsoo Kim {
16247981e67eSVlastimil Babka struct slab *slab, *n;
162597654dfaSJoonsoo Kim
16267981e67eSVlastimil Babka list_for_each_entry_safe(slab, n, list, slab_list) {
16277981e67eSVlastimil Babka list_del(&slab->slab_list);
16287981e67eSVlastimil Babka slab_destroy(cachep, slab);
162997654dfaSJoonsoo Kim }
163097654dfaSJoonsoo Kim }
163197654dfaSJoonsoo Kim
16321da177e4SLinus Torvalds /**
1633a70773ddSRandy.Dunlap * calculate_slab_order - calculate size (page order) of slabs
1634a70773ddSRandy.Dunlap * @cachep: pointer to the cache that is being created
1635a70773ddSRandy.Dunlap * @size: size of objects to be created in this cache.
1636a70773ddSRandy.Dunlap * @flags: slab allocation flags
1637a70773ddSRandy.Dunlap *
1638a70773ddSRandy.Dunlap * Also calculates the number of objects per slab.
16394d268ebaSPekka Enberg *
16404d268ebaSPekka Enberg * This could be made much more intelligent. For now, try to avoid using
16414d268ebaSPekka Enberg * high order pages for slabs. When the gfp() functions are more friendly
16424d268ebaSPekka Enberg * towards high-order requests, this should be changed.
1643a862f68aSMike Rapoport *
1644a862f68aSMike Rapoport * Return: number of left-over bytes in a slab
16454d268ebaSPekka Enberg */
calculate_slab_order(struct kmem_cache * cachep,size_t size,slab_flags_t flags)1646a737b3e2SAndrew Morton static size_t calculate_slab_order(struct kmem_cache *cachep,
1647d50112edSAlexey Dobriyan size_t size, slab_flags_t flags)
16484d268ebaSPekka Enberg {
16494d268ebaSPekka Enberg size_t left_over = 0;
16509888e6faSLinus Torvalds int gfporder;
16514d268ebaSPekka Enberg
16520aa817f0SChristoph Lameter for (gfporder = 0; gfporder <= KMALLOC_MAX_ORDER; gfporder++) {
16534d268ebaSPekka Enberg unsigned int num;
16544d268ebaSPekka Enberg size_t remainder;
16554d268ebaSPekka Enberg
165670f75067SJoonsoo Kim num = cache_estimate(gfporder, size, flags, &remainder);
16574d268ebaSPekka Enberg if (!num)
16584d268ebaSPekka Enberg continue;
16599888e6faSLinus Torvalds
1660f315e3faSJoonsoo Kim /* Can't handle number of objects more than SLAB_OBJ_MAX_NUM */
1661f315e3faSJoonsoo Kim if (num > SLAB_OBJ_MAX_NUM)
1662f315e3faSJoonsoo Kim break;
1663f315e3faSJoonsoo Kim
1664b1ab41c4SIngo Molnar if (flags & CFLGS_OFF_SLAB) {
16653217fd9bSJoonsoo Kim struct kmem_cache *freelist_cache;
16663217fd9bSJoonsoo Kim size_t freelist_size;
1667e36ce448SHyeonggon Yoo size_t freelist_cache_size;
1668b1ab41c4SIngo Molnar
16693217fd9bSJoonsoo Kim freelist_size = num * sizeof(freelist_idx_t);
1670e36ce448SHyeonggon Yoo if (freelist_size > KMALLOC_MAX_CACHE_SIZE) {
1671e36ce448SHyeonggon Yoo freelist_cache_size = PAGE_SIZE << get_order(freelist_size);
1672e36ce448SHyeonggon Yoo } else {
1673*3c615294SGONG, Ruiqi freelist_cache = kmalloc_slab(freelist_size, 0u, _RET_IP_);
16743217fd9bSJoonsoo Kim if (!freelist_cache)
16753217fd9bSJoonsoo Kim continue;
1676e36ce448SHyeonggon Yoo freelist_cache_size = freelist_cache->size;
16773217fd9bSJoonsoo Kim
16783217fd9bSJoonsoo Kim /*
16793217fd9bSJoonsoo Kim * Needed to avoid possible looping condition
168076b342bdSJoonsoo Kim * in cache_grow_begin()
16813217fd9bSJoonsoo Kim */
16823217fd9bSJoonsoo Kim if (OFF_SLAB(freelist_cache))
16833217fd9bSJoonsoo Kim continue;
1684e36ce448SHyeonggon Yoo }
16853217fd9bSJoonsoo Kim
16863217fd9bSJoonsoo Kim /* check if off slab has enough benefit */
1687e36ce448SHyeonggon Yoo if (freelist_cache_size > cachep->size / 2)
16883217fd9bSJoonsoo Kim continue;
1689b1ab41c4SIngo Molnar }
16904d268ebaSPekka Enberg
16919888e6faSLinus Torvalds /* Found something acceptable - save it away */
16924d268ebaSPekka Enberg cachep->num = num;
16939888e6faSLinus Torvalds cachep->gfporder = gfporder;
16944d268ebaSPekka Enberg left_over = remainder;
16954d268ebaSPekka Enberg
16964d268ebaSPekka Enberg /*
1697f78bb8adSLinus Torvalds * A VFS-reclaimable slab tends to have most allocations
1698f78bb8adSLinus Torvalds * as GFP_NOFS and we really don't want to have to be allocating
1699f78bb8adSLinus Torvalds * higher-order pages when we are unable to shrink dcache.
1700f78bb8adSLinus Torvalds */
1701f78bb8adSLinus Torvalds if (flags & SLAB_RECLAIM_ACCOUNT)
1702f78bb8adSLinus Torvalds break;
1703f78bb8adSLinus Torvalds
1704f78bb8adSLinus Torvalds /*
17054d268ebaSPekka Enberg * Large number of objects is good, but very large slabs are
17064d268ebaSPekka Enberg * currently bad for the gfp()s.
17074d268ebaSPekka Enberg */
1708543585ccSDavid Rientjes if (gfporder >= slab_max_order)
17094d268ebaSPekka Enberg break;
17104d268ebaSPekka Enberg
17119888e6faSLinus Torvalds /*
17129888e6faSLinus Torvalds * Acceptable internal fragmentation?
17139888e6faSLinus Torvalds */
1714a737b3e2SAndrew Morton if (left_over * 8 <= (PAGE_SIZE << gfporder))
17154d268ebaSPekka Enberg break;
17164d268ebaSPekka Enberg }
17174d268ebaSPekka Enberg return left_over;
17184d268ebaSPekka Enberg }
17194d268ebaSPekka Enberg
alloc_kmem_cache_cpus(struct kmem_cache * cachep,int entries,int batchcount)1720bf0dea23SJoonsoo Kim static struct array_cache __percpu *alloc_kmem_cache_cpus(
1721bf0dea23SJoonsoo Kim struct kmem_cache *cachep, int entries, int batchcount)
1722bf0dea23SJoonsoo Kim {
1723bf0dea23SJoonsoo Kim int cpu;
1724bf0dea23SJoonsoo Kim size_t size;
1725bf0dea23SJoonsoo Kim struct array_cache __percpu *cpu_cache;
1726bf0dea23SJoonsoo Kim
1727bf0dea23SJoonsoo Kim size = sizeof(void *) * entries + sizeof(struct array_cache);
172885c9f4b0SJoonsoo Kim cpu_cache = __alloc_percpu(size, sizeof(void *));
1729bf0dea23SJoonsoo Kim
1730bf0dea23SJoonsoo Kim if (!cpu_cache)
1731bf0dea23SJoonsoo Kim return NULL;
1732bf0dea23SJoonsoo Kim
1733bf0dea23SJoonsoo Kim for_each_possible_cpu(cpu) {
1734bf0dea23SJoonsoo Kim init_arraycache(per_cpu_ptr(cpu_cache, cpu),
1735bf0dea23SJoonsoo Kim entries, batchcount);
1736bf0dea23SJoonsoo Kim }
1737bf0dea23SJoonsoo Kim
1738bf0dea23SJoonsoo Kim return cpu_cache;
1739bf0dea23SJoonsoo Kim }
1740bf0dea23SJoonsoo Kim
setup_cpu_cache(struct kmem_cache * cachep,gfp_t gfp)1741bd721ea7SFabian Frederick static int __ref setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
1742f30cf7d1SPekka Enberg {
174397d06609SChristoph Lameter if (slab_state >= FULL)
174483b519e8SPekka Enberg return enable_cpucache(cachep, gfp);
17452ed3a4efSChristoph Lameter
1746bf0dea23SJoonsoo Kim cachep->cpu_cache = alloc_kmem_cache_cpus(cachep, 1, 1);
1747bf0dea23SJoonsoo Kim if (!cachep->cpu_cache)
1748bf0dea23SJoonsoo Kim return 1;
1749bf0dea23SJoonsoo Kim
175097d06609SChristoph Lameter if (slab_state == DOWN) {
1751bf0dea23SJoonsoo Kim /* Creation of first cache (kmem_cache). */
1752bf0dea23SJoonsoo Kim set_up_node(kmem_cache, CACHE_CACHE);
17532f9baa9fSChristoph Lameter } else if (slab_state == PARTIAL) {
1754bf0dea23SJoonsoo Kim /* For kmem_cache_node */
1755ce8eb6c4SChristoph Lameter set_up_node(cachep, SIZE_NODE);
1756f30cf7d1SPekka Enberg } else {
1757f30cf7d1SPekka Enberg int node;
1758bf0dea23SJoonsoo Kim
1759556a169dSPekka Enberg for_each_online_node(node) {
1760bf0dea23SJoonsoo Kim cachep->node[node] = kmalloc_node(
1761bf0dea23SJoonsoo Kim sizeof(struct kmem_cache_node), gfp, node);
17626a67368cSChristoph Lameter BUG_ON(!cachep->node[node]);
1763ce8eb6c4SChristoph Lameter kmem_cache_node_init(cachep->node[node]);
1764f30cf7d1SPekka Enberg }
1765f30cf7d1SPekka Enberg }
1766bf0dea23SJoonsoo Kim
17676a67368cSChristoph Lameter cachep->node[numa_mem_id()]->next_reap =
17685f0985bbSJianyu Zhan jiffies + REAPTIMEOUT_NODE +
17695f0985bbSJianyu Zhan ((unsigned long)cachep) % REAPTIMEOUT_NODE;
1770f30cf7d1SPekka Enberg
1771f30cf7d1SPekka Enberg cpu_cache_get(cachep)->avail = 0;
1772f30cf7d1SPekka Enberg cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
1773f30cf7d1SPekka Enberg cpu_cache_get(cachep)->batchcount = 1;
1774f30cf7d1SPekka Enberg cpu_cache_get(cachep)->touched = 0;
1775f30cf7d1SPekka Enberg cachep->batchcount = 1;
1776f30cf7d1SPekka Enberg cachep->limit = BOOT_CPUCACHE_ENTRIES;
17772ed3a4efSChristoph Lameter return 0;
1778f30cf7d1SPekka Enberg }
1779f30cf7d1SPekka Enberg
kmem_cache_flags(unsigned int object_size,slab_flags_t flags,const char * name)17800293d1fdSAlexey Dobriyan slab_flags_t kmem_cache_flags(unsigned int object_size,
178137540008SNikolay Borisov slab_flags_t flags, const char *name)
178212220deaSJoonsoo Kim {
178312220deaSJoonsoo Kim return flags;
178412220deaSJoonsoo Kim }
178512220deaSJoonsoo Kim
178612220deaSJoonsoo Kim struct kmem_cache *
__kmem_cache_alias(const char * name,unsigned int size,unsigned int align,slab_flags_t flags,void (* ctor)(void *))1787f4957d5bSAlexey Dobriyan __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
1788d50112edSAlexey Dobriyan slab_flags_t flags, void (*ctor)(void *))
178912220deaSJoonsoo Kim {
179012220deaSJoonsoo Kim struct kmem_cache *cachep;
179112220deaSJoonsoo Kim
179212220deaSJoonsoo Kim cachep = find_mergeable(size, align, flags, name, ctor);
179312220deaSJoonsoo Kim if (cachep) {
179412220deaSJoonsoo Kim cachep->refcount++;
179512220deaSJoonsoo Kim
179612220deaSJoonsoo Kim /*
179712220deaSJoonsoo Kim * Adjust the object sizes so that we clear
179812220deaSJoonsoo Kim * the complete object on kzalloc.
179912220deaSJoonsoo Kim */
180012220deaSJoonsoo Kim cachep->object_size = max_t(int, cachep->object_size, size);
180112220deaSJoonsoo Kim }
180212220deaSJoonsoo Kim return cachep;
180312220deaSJoonsoo Kim }
180412220deaSJoonsoo Kim
set_objfreelist_slab_cache(struct kmem_cache * cachep,size_t size,slab_flags_t flags)1805b03a017bSJoonsoo Kim static bool set_objfreelist_slab_cache(struct kmem_cache *cachep,
1806d50112edSAlexey Dobriyan size_t size, slab_flags_t flags)
1807b03a017bSJoonsoo Kim {
1808b03a017bSJoonsoo Kim size_t left;
1809b03a017bSJoonsoo Kim
1810b03a017bSJoonsoo Kim cachep->num = 0;
1811b03a017bSJoonsoo Kim
18126471384aSAlexander Potapenko /*
18136471384aSAlexander Potapenko * If slab auto-initialization on free is enabled, store the freelist
18146471384aSAlexander Potapenko * off-slab, so that its contents don't end up in one of the allocated
18156471384aSAlexander Potapenko * objects.
18166471384aSAlexander Potapenko */
18176471384aSAlexander Potapenko if (unlikely(slab_want_init_on_free(cachep)))
18186471384aSAlexander Potapenko return false;
18196471384aSAlexander Potapenko
18205f0d5a3aSPaul E. McKenney if (cachep->ctor || flags & SLAB_TYPESAFE_BY_RCU)
1821b03a017bSJoonsoo Kim return false;
1822b03a017bSJoonsoo Kim
1823b03a017bSJoonsoo Kim left = calculate_slab_order(cachep, size,
1824b03a017bSJoonsoo Kim flags | CFLGS_OBJFREELIST_SLAB);
1825b03a017bSJoonsoo Kim if (!cachep->num)
1826b03a017bSJoonsoo Kim return false;
1827b03a017bSJoonsoo Kim
1828b03a017bSJoonsoo Kim if (cachep->num * sizeof(freelist_idx_t) > cachep->object_size)
1829b03a017bSJoonsoo Kim return false;
1830b03a017bSJoonsoo Kim
1831b03a017bSJoonsoo Kim cachep->colour = left / cachep->colour_off;
1832b03a017bSJoonsoo Kim
1833b03a017bSJoonsoo Kim return true;
1834b03a017bSJoonsoo Kim }
1835b03a017bSJoonsoo Kim
set_off_slab_cache(struct kmem_cache * cachep,size_t size,slab_flags_t flags)1836158e319bSJoonsoo Kim static bool set_off_slab_cache(struct kmem_cache *cachep,
1837d50112edSAlexey Dobriyan size_t size, slab_flags_t flags)
1838158e319bSJoonsoo Kim {
1839158e319bSJoonsoo Kim size_t left;
1840158e319bSJoonsoo Kim
1841158e319bSJoonsoo Kim cachep->num = 0;
1842158e319bSJoonsoo Kim
1843158e319bSJoonsoo Kim /*
18443217fd9bSJoonsoo Kim * Always use on-slab management when SLAB_NOLEAKTRACE
18453217fd9bSJoonsoo Kim * to avoid recursive calls into kmemleak.
1846158e319bSJoonsoo Kim */
1847158e319bSJoonsoo Kim if (flags & SLAB_NOLEAKTRACE)
1848158e319bSJoonsoo Kim return false;
1849158e319bSJoonsoo Kim
1850158e319bSJoonsoo Kim /*
1851158e319bSJoonsoo Kim * Size is large, assume best to place the slab management obj
1852158e319bSJoonsoo Kim * off-slab (should allow better packing of objs).
1853158e319bSJoonsoo Kim */
1854158e319bSJoonsoo Kim left = calculate_slab_order(cachep, size, flags | CFLGS_OFF_SLAB);
1855158e319bSJoonsoo Kim if (!cachep->num)
1856158e319bSJoonsoo Kim return false;
1857158e319bSJoonsoo Kim
1858158e319bSJoonsoo Kim /*
1859158e319bSJoonsoo Kim * If the slab has been placed off-slab, and we have enough space then
1860158e319bSJoonsoo Kim * move it on-slab. This is at the expense of any extra colouring.
1861158e319bSJoonsoo Kim */
1862158e319bSJoonsoo Kim if (left >= cachep->num * sizeof(freelist_idx_t))
1863158e319bSJoonsoo Kim return false;
1864158e319bSJoonsoo Kim
1865158e319bSJoonsoo Kim cachep->colour = left / cachep->colour_off;
1866158e319bSJoonsoo Kim
1867158e319bSJoonsoo Kim return true;
1868158e319bSJoonsoo Kim }
1869158e319bSJoonsoo Kim
set_on_slab_cache(struct kmem_cache * cachep,size_t size,slab_flags_t flags)1870158e319bSJoonsoo Kim static bool set_on_slab_cache(struct kmem_cache *cachep,
1871d50112edSAlexey Dobriyan size_t size, slab_flags_t flags)
1872158e319bSJoonsoo Kim {
1873158e319bSJoonsoo Kim size_t left;
1874158e319bSJoonsoo Kim
1875158e319bSJoonsoo Kim cachep->num = 0;
1876158e319bSJoonsoo Kim
1877158e319bSJoonsoo Kim left = calculate_slab_order(cachep, size, flags);
1878158e319bSJoonsoo Kim if (!cachep->num)
1879158e319bSJoonsoo Kim return false;
1880158e319bSJoonsoo Kim
1881158e319bSJoonsoo Kim cachep->colour = left / cachep->colour_off;
1882158e319bSJoonsoo Kim
1883158e319bSJoonsoo Kim return true;
1884158e319bSJoonsoo Kim }
1885158e319bSJoonsoo Kim
1886444f20c2Szhaoxinchao /*
1887039363f3SChristoph Lameter * __kmem_cache_create - Create a cache.
1888a755b76aSRandy Dunlap * @cachep: cache management descriptor
18891da177e4SLinus Torvalds * @flags: SLAB flags
18901da177e4SLinus Torvalds *
1891444f20c2Szhaoxinchao * Returns zero on success, nonzero on failure.
18921da177e4SLinus Torvalds *
18931da177e4SLinus Torvalds * The flags are
18941da177e4SLinus Torvalds *
18951da177e4SLinus Torvalds * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
18961da177e4SLinus Torvalds * to catch references to uninitialised memory.
18971da177e4SLinus Torvalds *
18981da177e4SLinus Torvalds * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
18991da177e4SLinus Torvalds * for buffer overruns.
19001da177e4SLinus Torvalds *
19011da177e4SLinus Torvalds * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
19021da177e4SLinus Torvalds * cacheline. This can be beneficial if you're counting cycles as closely
19031da177e4SLinus Torvalds * as davem.
19041da177e4SLinus Torvalds */
__kmem_cache_create(struct kmem_cache * cachep,slab_flags_t flags)1905d50112edSAlexey Dobriyan int __kmem_cache_create(struct kmem_cache *cachep, slab_flags_t flags)
19061da177e4SLinus Torvalds {
1907d4a5fca5SDavid Rientjes size_t ralign = BYTES_PER_WORD;
190883b519e8SPekka Enberg gfp_t gfp;
1909278b1bb1SChristoph Lameter int err;
1910be4a7988SAlexey Dobriyan unsigned int size = cachep->size;
19111da177e4SLinus Torvalds
19121da177e4SLinus Torvalds #if DEBUG
19131da177e4SLinus Torvalds #if FORCED_DEBUG
19141da177e4SLinus Torvalds /*
19151da177e4SLinus Torvalds * Enable redzoning and last user accounting, except for caches with
19161da177e4SLinus Torvalds * large objects, if the increased size would increase the object size
19171da177e4SLinus Torvalds * above the next power of two: caches with object sizes just above a
19181da177e4SLinus Torvalds * power of two have a significant amount of internal fragmentation.
19191da177e4SLinus Torvalds */
192087a927c7SDavid Woodhouse if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN +
192187a927c7SDavid Woodhouse 2 * sizeof(unsigned long long)))
19221da177e4SLinus Torvalds flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
19235f0d5a3aSPaul E. McKenney if (!(flags & SLAB_TYPESAFE_BY_RCU))
19241da177e4SLinus Torvalds flags |= SLAB_POISON;
19251da177e4SLinus Torvalds #endif
19261da177e4SLinus Torvalds #endif
19271da177e4SLinus Torvalds
1928a737b3e2SAndrew Morton /*
1929a737b3e2SAndrew Morton * Check that size is in terms of words. This is needed to avoid
19301da177e4SLinus Torvalds * unaligned accesses for some archs when redzoning is used, and makes
19311da177e4SLinus Torvalds * sure any on-slab bufctl's are also correctly aligned.
19321da177e4SLinus Torvalds */
1933e0771950SCanjiang Lu size = ALIGN(size, BYTES_PER_WORD);
19341da177e4SLinus Torvalds
193587a927c7SDavid Woodhouse if (flags & SLAB_RED_ZONE) {
193687a927c7SDavid Woodhouse ralign = REDZONE_ALIGN;
193787a927c7SDavid Woodhouse /* If redzoning, ensure that the second redzone is suitably
193887a927c7SDavid Woodhouse * aligned, by adjusting the object size accordingly. */
1939e0771950SCanjiang Lu size = ALIGN(size, REDZONE_ALIGN);
194087a927c7SDavid Woodhouse }
1941ca5f9703SPekka Enberg
1942a44b56d3SKevin Hilman /* 3) caller mandated alignment */
19438a13a4ccSChristoph Lameter if (ralign < cachep->align) {
19448a13a4ccSChristoph Lameter ralign = cachep->align;
1945a44b56d3SKevin Hilman }
19463ff84a7fSPekka Enberg /* disable debug if necessary */
19473ff84a7fSPekka Enberg if (ralign > __alignof__(unsigned long long))
19481da177e4SLinus Torvalds flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
1949a737b3e2SAndrew Morton /*
1950ca5f9703SPekka Enberg * 4) Store it.
19511da177e4SLinus Torvalds */
19528a13a4ccSChristoph Lameter cachep->align = ralign;
1953158e319bSJoonsoo Kim cachep->colour_off = cache_line_size();
1954158e319bSJoonsoo Kim /* Offset must be a multiple of the alignment. */
1955158e319bSJoonsoo Kim if (cachep->colour_off < cachep->align)
1956158e319bSJoonsoo Kim cachep->colour_off = cachep->align;
19571da177e4SLinus Torvalds
195883b519e8SPekka Enberg if (slab_is_available())
195983b519e8SPekka Enberg gfp = GFP_KERNEL;
196083b519e8SPekka Enberg else
196183b519e8SPekka Enberg gfp = GFP_NOWAIT;
196283b519e8SPekka Enberg
19631da177e4SLinus Torvalds #if DEBUG
19641da177e4SLinus Torvalds
1965ca5f9703SPekka Enberg /*
1966ca5f9703SPekka Enberg * Both debugging options require word-alignment which is calculated
1967ca5f9703SPekka Enberg * into align above.
1968ca5f9703SPekka Enberg */
19691da177e4SLinus Torvalds if (flags & SLAB_RED_ZONE) {
19701da177e4SLinus Torvalds /* add space for red zone words */
19713ff84a7fSPekka Enberg cachep->obj_offset += sizeof(unsigned long long);
19723ff84a7fSPekka Enberg size += 2 * sizeof(unsigned long long);
19731da177e4SLinus Torvalds }
19741da177e4SLinus Torvalds if (flags & SLAB_STORE_USER) {
1975ca5f9703SPekka Enberg /* user store requires one word storage behind the end of
197687a927c7SDavid Woodhouse * the real object. But if the second red zone needs to be
197787a927c7SDavid Woodhouse * aligned to 64 bits, we must allow that much space.
19781da177e4SLinus Torvalds */
197987a927c7SDavid Woodhouse if (flags & SLAB_RED_ZONE)
198087a927c7SDavid Woodhouse size += REDZONE_ALIGN;
198187a927c7SDavid Woodhouse else
19821da177e4SLinus Torvalds size += BYTES_PER_WORD;
19831da177e4SLinus Torvalds }
1984832a15d2SJoonsoo Kim #endif
1985832a15d2SJoonsoo Kim
19867ed2f9e6SAlexander Potapenko kasan_cache_create(cachep, &size, &flags);
19877ed2f9e6SAlexander Potapenko
1988832a15d2SJoonsoo Kim size = ALIGN(size, cachep->align);
1989832a15d2SJoonsoo Kim /*
1990832a15d2SJoonsoo Kim * We should restrict the number of objects in a slab to implement
1991832a15d2SJoonsoo Kim * byte sized index. Refer comment on SLAB_OBJ_MIN_SIZE definition.
1992832a15d2SJoonsoo Kim */
1993832a15d2SJoonsoo Kim if (FREELIST_BYTE_INDEX && size < SLAB_OBJ_MIN_SIZE)
1994832a15d2SJoonsoo Kim size = ALIGN(SLAB_OBJ_MIN_SIZE, cachep->align);
1995832a15d2SJoonsoo Kim
1996832a15d2SJoonsoo Kim #if DEBUG
199703a2d2a3SJoonsoo Kim /*
199803a2d2a3SJoonsoo Kim * To activate debug pagealloc, off-slab management is necessary
199903a2d2a3SJoonsoo Kim * requirement. In early phase of initialization, small sized slab
200003a2d2a3SJoonsoo Kim * doesn't get initialized so it would not be possible. So, we need
200103a2d2a3SJoonsoo Kim * to check size >= 256. It guarantees that all necessary small
200203a2d2a3SJoonsoo Kim * sized slab is initialized in current slab initialization sequence.
200303a2d2a3SJoonsoo Kim */
20048e57f8acSVlastimil Babka if (debug_pagealloc_enabled_static() && (flags & SLAB_POISON) &&
2005f3a3c320SJoonsoo Kim size >= 256 && cachep->object_size > cache_line_size()) {
2006f3a3c320SJoonsoo Kim if (size < PAGE_SIZE || size % PAGE_SIZE == 0) {
2007f3a3c320SJoonsoo Kim size_t tmp_size = ALIGN(size, PAGE_SIZE);
2008f3a3c320SJoonsoo Kim
2009f3a3c320SJoonsoo Kim if (set_off_slab_cache(cachep, tmp_size, flags)) {
2010f3a3c320SJoonsoo Kim flags |= CFLGS_OFF_SLAB;
2011f3a3c320SJoonsoo Kim cachep->obj_offset += tmp_size - size;
2012f3a3c320SJoonsoo Kim size = tmp_size;
2013f3a3c320SJoonsoo Kim goto done;
2014f3a3c320SJoonsoo Kim }
2015f3a3c320SJoonsoo Kim }
20161da177e4SLinus Torvalds }
20171da177e4SLinus Torvalds #endif
20181da177e4SLinus Torvalds
2019b03a017bSJoonsoo Kim if (set_objfreelist_slab_cache(cachep, size, flags)) {
2020b03a017bSJoonsoo Kim flags |= CFLGS_OBJFREELIST_SLAB;
2021b03a017bSJoonsoo Kim goto done;
2022b03a017bSJoonsoo Kim }
2023b03a017bSJoonsoo Kim
2024158e319bSJoonsoo Kim if (set_off_slab_cache(cachep, size, flags)) {
20251da177e4SLinus Torvalds flags |= CFLGS_OFF_SLAB;
2026158e319bSJoonsoo Kim goto done;
2027832a15d2SJoonsoo Kim }
20281da177e4SLinus Torvalds
2029158e319bSJoonsoo Kim if (set_on_slab_cache(cachep, size, flags))
2030158e319bSJoonsoo Kim goto done;
20311da177e4SLinus Torvalds
2032278b1bb1SChristoph Lameter return -E2BIG;
20338a13a4ccSChristoph Lameter
2034158e319bSJoonsoo Kim done:
2035158e319bSJoonsoo Kim cachep->freelist_size = cachep->num * sizeof(freelist_idx_t);
20361da177e4SLinus Torvalds cachep->flags = flags;
2037a57a4988SJoonsoo Kim cachep->allocflags = __GFP_COMP;
2038a3187e43SYang Shi if (flags & SLAB_CACHE_DMA)
2039a618e89fSGlauber Costa cachep->allocflags |= GFP_DMA;
20406d6ea1e9SNicolas Boichat if (flags & SLAB_CACHE_DMA32)
20416d6ea1e9SNicolas Boichat cachep->allocflags |= GFP_DMA32;
2042a3ba0744SDavid Rientjes if (flags & SLAB_RECLAIM_ACCOUNT)
2043a3ba0744SDavid Rientjes cachep->allocflags |= __GFP_RECLAIMABLE;
20443b0efdfaSChristoph Lameter cachep->size = size;
20456a2d7a95SEric Dumazet cachep->reciprocal_buffer_size = reciprocal_value(size);
20461da177e4SLinus Torvalds
204740b44137SJoonsoo Kim #if DEBUG
204840b44137SJoonsoo Kim /*
204940b44137SJoonsoo Kim * If we're going to use the generic kernel_map_pages()
205040b44137SJoonsoo Kim * poisoning, then it's going to smash the contents of
205140b44137SJoonsoo Kim * the redzone and userword anyhow, so switch them off.
205240b44137SJoonsoo Kim */
205340b44137SJoonsoo Kim if (IS_ENABLED(CONFIG_PAGE_POISONING) &&
205440b44137SJoonsoo Kim (cachep->flags & SLAB_POISON) &&
205540b44137SJoonsoo Kim is_debug_pagealloc_cache(cachep))
205640b44137SJoonsoo Kim cachep->flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
205740b44137SJoonsoo Kim #endif
205840b44137SJoonsoo Kim
2059278b1bb1SChristoph Lameter err = setup_cpu_cache(cachep, gfp);
2060278b1bb1SChristoph Lameter if (err) {
206152b4b950SDmitry Safonov __kmem_cache_release(cachep);
2062278b1bb1SChristoph Lameter return err;
20632ed3a4efSChristoph Lameter }
20641da177e4SLinus Torvalds
2065278b1bb1SChristoph Lameter return 0;
20661da177e4SLinus Torvalds }
20671da177e4SLinus Torvalds
20681da177e4SLinus Torvalds #if DEBUG
check_irq_off(void)20691da177e4SLinus Torvalds static void check_irq_off(void)
20701da177e4SLinus Torvalds {
20711da177e4SLinus Torvalds BUG_ON(!irqs_disabled());
20721da177e4SLinus Torvalds }
20731da177e4SLinus Torvalds
check_irq_on(void)20741da177e4SLinus Torvalds static void check_irq_on(void)
20751da177e4SLinus Torvalds {
20761da177e4SLinus Torvalds BUG_ON(irqs_disabled());
20771da177e4SLinus Torvalds }
20781da177e4SLinus Torvalds
check_mutex_acquired(void)207918726ca8SJoonsoo Kim static void check_mutex_acquired(void)
208018726ca8SJoonsoo Kim {
208118726ca8SJoonsoo Kim BUG_ON(!mutex_is_locked(&slab_mutex));
208218726ca8SJoonsoo Kim }
208318726ca8SJoonsoo Kim
check_spinlock_acquired(struct kmem_cache * cachep)2084343e0d7aSPekka Enberg static void check_spinlock_acquired(struct kmem_cache *cachep)
20851da177e4SLinus Torvalds {
20861da177e4SLinus Torvalds #ifdef CONFIG_SMP
20871da177e4SLinus Torvalds check_irq_off();
2088b539ce9fSJiri Kosina assert_raw_spin_locked(&get_node(cachep, numa_mem_id())->list_lock);
20891da177e4SLinus Torvalds #endif
20901da177e4SLinus Torvalds }
2091e498be7dSChristoph Lameter
check_spinlock_acquired_node(struct kmem_cache * cachep,int node)2092343e0d7aSPekka Enberg static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
2093e498be7dSChristoph Lameter {
2094e498be7dSChristoph Lameter #ifdef CONFIG_SMP
2095e498be7dSChristoph Lameter check_irq_off();
2096b539ce9fSJiri Kosina assert_raw_spin_locked(&get_node(cachep, node)->list_lock);
2097e498be7dSChristoph Lameter #endif
2098e498be7dSChristoph Lameter }
2099e498be7dSChristoph Lameter
21001da177e4SLinus Torvalds #else
21011da177e4SLinus Torvalds #define check_irq_off() do { } while(0)
21021da177e4SLinus Torvalds #define check_irq_on() do { } while(0)
210318726ca8SJoonsoo Kim #define check_mutex_acquired() do { } while(0)
21041da177e4SLinus Torvalds #define check_spinlock_acquired(x) do { } while(0)
2105e498be7dSChristoph Lameter #define check_spinlock_acquired_node(x, y) do { } while(0)
21061da177e4SLinus Torvalds #endif
21071da177e4SLinus Torvalds
drain_array_locked(struct kmem_cache * cachep,struct array_cache * ac,int node,bool free_all,struct list_head * list)210818726ca8SJoonsoo Kim static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac,
210918726ca8SJoonsoo Kim int node, bool free_all, struct list_head *list)
211018726ca8SJoonsoo Kim {
211118726ca8SJoonsoo Kim int tofree;
211218726ca8SJoonsoo Kim
211318726ca8SJoonsoo Kim if (!ac || !ac->avail)
211418726ca8SJoonsoo Kim return;
211518726ca8SJoonsoo Kim
211618726ca8SJoonsoo Kim tofree = free_all ? ac->avail : (ac->limit + 4) / 5;
211718726ca8SJoonsoo Kim if (tofree > ac->avail)
211818726ca8SJoonsoo Kim tofree = (ac->avail + 1) / 2;
211918726ca8SJoonsoo Kim
212018726ca8SJoonsoo Kim free_block(cachep, ac->entry, tofree, node, list);
212118726ca8SJoonsoo Kim ac->avail -= tofree;
212218726ca8SJoonsoo Kim memmove(ac->entry, &(ac->entry[tofree]), sizeof(void *) * ac->avail);
212318726ca8SJoonsoo Kim }
2124aab2207cSChristoph Lameter
do_drain(void * arg)21251da177e4SLinus Torvalds static void do_drain(void *arg)
21261da177e4SLinus Torvalds {
2127a737b3e2SAndrew Morton struct kmem_cache *cachep = arg;
21281da177e4SLinus Torvalds struct array_cache *ac;
21297d6e6d09SLee Schermerhorn int node = numa_mem_id();
213018bf8541SChristoph Lameter struct kmem_cache_node *n;
213197654dfaSJoonsoo Kim LIST_HEAD(list);
21321da177e4SLinus Torvalds
21331da177e4SLinus Torvalds check_irq_off();
21349a2dba4bSPekka Enberg ac = cpu_cache_get(cachep);
213518bf8541SChristoph Lameter n = get_node(cachep, node);
2136b539ce9fSJiri Kosina raw_spin_lock(&n->list_lock);
213797654dfaSJoonsoo Kim free_block(cachep, ac->entry, ac->avail, node, &list);
2138b539ce9fSJiri Kosina raw_spin_unlock(&n->list_lock);
21391da177e4SLinus Torvalds ac->avail = 0;
2140678ff6a7SShakeel Butt slabs_destroy(cachep, &list);
21411da177e4SLinus Torvalds }
21421da177e4SLinus Torvalds
drain_cpu_caches(struct kmem_cache * cachep)2143343e0d7aSPekka Enberg static void drain_cpu_caches(struct kmem_cache *cachep)
21441da177e4SLinus Torvalds {
2145ce8eb6c4SChristoph Lameter struct kmem_cache_node *n;
2146e498be7dSChristoph Lameter int node;
214718726ca8SJoonsoo Kim LIST_HEAD(list);
2148e498be7dSChristoph Lameter
214915c8b6c1SJens Axboe on_each_cpu(do_drain, cachep, 1);
21501da177e4SLinus Torvalds check_irq_on();
215118bf8541SChristoph Lameter for_each_kmem_cache_node(cachep, node, n)
215218bf8541SChristoph Lameter if (n->alien)
2153ce8eb6c4SChristoph Lameter drain_alien_cache(cachep, n->alien);
2154a4523a8bSRoland Dreier
215518726ca8SJoonsoo Kim for_each_kmem_cache_node(cachep, node, n) {
2156b539ce9fSJiri Kosina raw_spin_lock_irq(&n->list_lock);
215718726ca8SJoonsoo Kim drain_array_locked(cachep, n->shared, node, true, &list);
2158b539ce9fSJiri Kosina raw_spin_unlock_irq(&n->list_lock);
215918726ca8SJoonsoo Kim
216018726ca8SJoonsoo Kim slabs_destroy(cachep, &list);
216118726ca8SJoonsoo Kim }
2162e498be7dSChristoph Lameter }
21631da177e4SLinus Torvalds
2164ed11d9ebSChristoph Lameter /*
2165ed11d9ebSChristoph Lameter * Remove slabs from the list of free slabs.
2166ed11d9ebSChristoph Lameter * Specify the number of slabs to drain in tofree.
2167ed11d9ebSChristoph Lameter *
2168ed11d9ebSChristoph Lameter * Returns the actual number of slabs released.
2169ed11d9ebSChristoph Lameter */
drain_freelist(struct kmem_cache * cache,struct kmem_cache_node * n,int tofree)2170ed11d9ebSChristoph Lameter static int drain_freelist(struct kmem_cache *cache,
2171ce8eb6c4SChristoph Lameter struct kmem_cache_node *n, int tofree)
21721da177e4SLinus Torvalds {
21731da177e4SLinus Torvalds struct list_head *p;
2174ed11d9ebSChristoph Lameter int nr_freed;
21757981e67eSVlastimil Babka struct slab *slab;
21761da177e4SLinus Torvalds
2177ed11d9ebSChristoph Lameter nr_freed = 0;
2178ce8eb6c4SChristoph Lameter while (nr_freed < tofree && !list_empty(&n->slabs_free)) {
2179ed11d9ebSChristoph Lameter
2180b539ce9fSJiri Kosina raw_spin_lock_irq(&n->list_lock);
2181ce8eb6c4SChristoph Lameter p = n->slabs_free.prev;
2182ce8eb6c4SChristoph Lameter if (p == &n->slabs_free) {
2183b539ce9fSJiri Kosina raw_spin_unlock_irq(&n->list_lock);
2184ed11d9ebSChristoph Lameter goto out;
2185ed11d9ebSChristoph Lameter }
21861da177e4SLinus Torvalds
21877981e67eSVlastimil Babka slab = list_entry(p, struct slab, slab_list);
21887981e67eSVlastimil Babka list_del(&slab->slab_list);
2189f728b0a5SGreg Thelen n->free_slabs--;
2190bf00bd34SDavid Rientjes n->total_slabs--;
2191ed11d9ebSChristoph Lameter /*
2192ed11d9ebSChristoph Lameter * Safe to drop the lock. The slab is no longer linked
2193ed11d9ebSChristoph Lameter * to the cache.
2194ed11d9ebSChristoph Lameter */
2195ce8eb6c4SChristoph Lameter n->free_objects -= cache->num;
2196b539ce9fSJiri Kosina raw_spin_unlock_irq(&n->list_lock);
21977981e67eSVlastimil Babka slab_destroy(cache, slab);
2198ed11d9ebSChristoph Lameter nr_freed++;
2199cc2e9d2bSDavid Rientjes
2200cc2e9d2bSDavid Rientjes cond_resched();
22011da177e4SLinus Torvalds }
2202ed11d9ebSChristoph Lameter out:
2203ed11d9ebSChristoph Lameter return nr_freed;
22041da177e4SLinus Torvalds }
22051da177e4SLinus Torvalds
__kmem_cache_empty(struct kmem_cache * s)2206f9e13c0aSShakeel Butt bool __kmem_cache_empty(struct kmem_cache *s)
2207f9e13c0aSShakeel Butt {
2208f9e13c0aSShakeel Butt int node;
2209f9e13c0aSShakeel Butt struct kmem_cache_node *n;
2210f9e13c0aSShakeel Butt
2211f9e13c0aSShakeel Butt for_each_kmem_cache_node(s, node, n)
2212f9e13c0aSShakeel Butt if (!list_empty(&n->slabs_full) ||
2213f9e13c0aSShakeel Butt !list_empty(&n->slabs_partial))
2214f9e13c0aSShakeel Butt return false;
2215f9e13c0aSShakeel Butt return true;
2216f9e13c0aSShakeel Butt }
2217f9e13c0aSShakeel Butt
__kmem_cache_shrink(struct kmem_cache * cachep)2218c9fc5864STejun Heo int __kmem_cache_shrink(struct kmem_cache *cachep)
2219e498be7dSChristoph Lameter {
222018bf8541SChristoph Lameter int ret = 0;
222118bf8541SChristoph Lameter int node;
2222ce8eb6c4SChristoph Lameter struct kmem_cache_node *n;
2223e498be7dSChristoph Lameter
2224e498be7dSChristoph Lameter drain_cpu_caches(cachep);
2225e498be7dSChristoph Lameter
2226e498be7dSChristoph Lameter check_irq_on();
222718bf8541SChristoph Lameter for_each_kmem_cache_node(cachep, node, n) {
2228a5aa63a5SJoonsoo Kim drain_freelist(cachep, n, INT_MAX);
2229ed11d9ebSChristoph Lameter
2230ce8eb6c4SChristoph Lameter ret += !list_empty(&n->slabs_full) ||
2231ce8eb6c4SChristoph Lameter !list_empty(&n->slabs_partial);
2232e498be7dSChristoph Lameter }
2233e498be7dSChristoph Lameter return (ret ? 1 : 0);
2234e498be7dSChristoph Lameter }
2235e498be7dSChristoph Lameter
__kmem_cache_shutdown(struct kmem_cache * cachep)2236945cf2b6SChristoph Lameter int __kmem_cache_shutdown(struct kmem_cache *cachep)
22371da177e4SLinus Torvalds {
2238c9fc5864STejun Heo return __kmem_cache_shrink(cachep);
223952b4b950SDmitry Safonov }
224052b4b950SDmitry Safonov
__kmem_cache_release(struct kmem_cache * cachep)224152b4b950SDmitry Safonov void __kmem_cache_release(struct kmem_cache *cachep)
224252b4b950SDmitry Safonov {
224312c3667fSChristoph Lameter int i;
2244ce8eb6c4SChristoph Lameter struct kmem_cache_node *n;
224512c3667fSChristoph Lameter
2246c7ce4f60SThomas Garnier cache_random_seq_destroy(cachep);
2247c7ce4f60SThomas Garnier
2248bf0dea23SJoonsoo Kim free_percpu(cachep->cpu_cache);
224912c3667fSChristoph Lameter
2250ce8eb6c4SChristoph Lameter /* NUMA: free the node structures */
225118bf8541SChristoph Lameter for_each_kmem_cache_node(cachep, i, n) {
2252ce8eb6c4SChristoph Lameter kfree(n->shared);
2253ce8eb6c4SChristoph Lameter free_alien_cache(n->alien);
2254ce8eb6c4SChristoph Lameter kfree(n);
225518bf8541SChristoph Lameter cachep->node[i] = NULL;
22561da177e4SLinus Torvalds }
22571da177e4SLinus Torvalds }
22581da177e4SLinus Torvalds
2259e5ac9c5aSRavikiran G Thirumalai /*
2260e5ac9c5aSRavikiran G Thirumalai * Get the memory for a slab management obj.
22615f0985bbSJianyu Zhan *
22625f0985bbSJianyu Zhan * For a slab cache when the slab descriptor is off-slab, the
22635f0985bbSJianyu Zhan * slab descriptor can't come from the same cache which is being created,
22645f0985bbSJianyu Zhan * Because if it is the case, that means we defer the creation of
22655f0985bbSJianyu Zhan * the kmalloc_{dma,}_cache of size sizeof(slab descriptor) to this point.
22665f0985bbSJianyu Zhan * And we eventually call down to __kmem_cache_create(), which
226780d01558SColin Ian King * in turn looks up in the kmalloc_{dma,}_caches for the desired-size one.
22685f0985bbSJianyu Zhan * This is a "chicken-and-egg" problem.
22695f0985bbSJianyu Zhan *
22705f0985bbSJianyu Zhan * So the off-slab slab descriptor shall come from the kmalloc_{dma,}_caches,
22715f0985bbSJianyu Zhan * which are all initialized during kmem_cache_init().
2272e5ac9c5aSRavikiran G Thirumalai */
alloc_slabmgmt(struct kmem_cache * cachep,struct slab * slab,int colour_off,gfp_t local_flags,int nodeid)22737e007355SJoonsoo Kim static void *alloc_slabmgmt(struct kmem_cache *cachep,
22747981e67eSVlastimil Babka struct slab *slab, int colour_off,
22750c3aa83eSJoonsoo Kim gfp_t local_flags, int nodeid)
22761da177e4SLinus Torvalds {
22777e007355SJoonsoo Kim void *freelist;
22787981e67eSVlastimil Babka void *addr = slab_address(slab);
22791da177e4SLinus Torvalds
22807981e67eSVlastimil Babka slab->s_mem = addr + colour_off;
22817981e67eSVlastimil Babka slab->active = 0;
22822e6b3602SJoonsoo Kim
2283b03a017bSJoonsoo Kim if (OBJFREELIST_SLAB(cachep))
2284b03a017bSJoonsoo Kim freelist = NULL;
2285b03a017bSJoonsoo Kim else if (OFF_SLAB(cachep)) {
22861da177e4SLinus Torvalds /* Slab management obj is off-slab. */
2287e36ce448SHyeonggon Yoo freelist = kmalloc_node(cachep->freelist_size,
22888759ec50SPekka Enberg local_flags, nodeid);
22891da177e4SLinus Torvalds } else {
22902e6b3602SJoonsoo Kim /* We will use last bytes at the slab for freelist */
22912e6b3602SJoonsoo Kim freelist = addr + (PAGE_SIZE << cachep->gfporder) -
22922e6b3602SJoonsoo Kim cachep->freelist_size;
22931da177e4SLinus Torvalds }
22942e6b3602SJoonsoo Kim
22958456a648SJoonsoo Kim return freelist;
22961da177e4SLinus Torvalds }
22971da177e4SLinus Torvalds
get_free_obj(struct slab * slab,unsigned int idx)22987981e67eSVlastimil Babka static inline freelist_idx_t get_free_obj(struct slab *slab, unsigned int idx)
22991da177e4SLinus Torvalds {
23007981e67eSVlastimil Babka return ((freelist_idx_t *) slab->freelist)[idx];
2301e5c58dfdSJoonsoo Kim }
2302e5c58dfdSJoonsoo Kim
set_free_obj(struct slab * slab,unsigned int idx,freelist_idx_t val)23037981e67eSVlastimil Babka static inline void set_free_obj(struct slab *slab,
23047cc68973SJoonsoo Kim unsigned int idx, freelist_idx_t val)
2305e5c58dfdSJoonsoo Kim {
23067981e67eSVlastimil Babka ((freelist_idx_t *)(slab->freelist))[idx] = val;
23071da177e4SLinus Torvalds }
23081da177e4SLinus Torvalds
cache_init_objs_debug(struct kmem_cache * cachep,struct slab * slab)23097981e67eSVlastimil Babka static void cache_init_objs_debug(struct kmem_cache *cachep, struct slab *slab)
23101da177e4SLinus Torvalds {
231110b2e9e8SJoonsoo Kim #if DEBUG
23121da177e4SLinus Torvalds int i;
23131da177e4SLinus Torvalds
23141da177e4SLinus Torvalds for (i = 0; i < cachep->num; i++) {
23157981e67eSVlastimil Babka void *objp = index_to_obj(cachep, slab, i);
231610b2e9e8SJoonsoo Kim
23171da177e4SLinus Torvalds if (cachep->flags & SLAB_STORE_USER)
23181da177e4SLinus Torvalds *dbg_userword(cachep, objp) = NULL;
23191da177e4SLinus Torvalds
23201da177e4SLinus Torvalds if (cachep->flags & SLAB_RED_ZONE) {
23211da177e4SLinus Torvalds *dbg_redzone1(cachep, objp) = RED_INACTIVE;
23221da177e4SLinus Torvalds *dbg_redzone2(cachep, objp) = RED_INACTIVE;
23231da177e4SLinus Torvalds }
23241da177e4SLinus Torvalds /*
2325a737b3e2SAndrew Morton * Constructors are not allowed to allocate memory from the same
2326a737b3e2SAndrew Morton * cache which they are a constructor for. Otherwise, deadlock.
2327a737b3e2SAndrew Morton * They must also be threaded.
23281da177e4SLinus Torvalds */
23297ed2f9e6SAlexander Potapenko if (cachep->ctor && !(cachep->flags & SLAB_POISON)) {
23307ed2f9e6SAlexander Potapenko kasan_unpoison_object_data(cachep,
23317ed2f9e6SAlexander Potapenko objp + obj_offset(cachep));
233251cc5068SAlexey Dobriyan cachep->ctor(objp + obj_offset(cachep));
23337ed2f9e6SAlexander Potapenko kasan_poison_object_data(
23347ed2f9e6SAlexander Potapenko cachep, objp + obj_offset(cachep));
23357ed2f9e6SAlexander Potapenko }
23361da177e4SLinus Torvalds
23371da177e4SLinus Torvalds if (cachep->flags & SLAB_RED_ZONE) {
23381da177e4SLinus Torvalds if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
2339756a025fSJoe Perches slab_error(cachep, "constructor overwrote the end of an object");
23401da177e4SLinus Torvalds if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
2341756a025fSJoe Perches slab_error(cachep, "constructor overwrote the start of an object");
23421da177e4SLinus Torvalds }
234340b44137SJoonsoo Kim /* need to poison the objs? */
234440b44137SJoonsoo Kim if (cachep->flags & SLAB_POISON) {
234540b44137SJoonsoo Kim poison_obj(cachep, objp, POISON_FREE);
234680552f0fSQian Cai slab_kernel_map(cachep, objp, 0);
234740b44137SJoonsoo Kim }
234810b2e9e8SJoonsoo Kim }
23491da177e4SLinus Torvalds #endif
235010b2e9e8SJoonsoo Kim }
235110b2e9e8SJoonsoo Kim
2352c7ce4f60SThomas Garnier #ifdef CONFIG_SLAB_FREELIST_RANDOM
2353c7ce4f60SThomas Garnier /* Hold information during a freelist initialization */
2354f7e466e9SDavid Keisar Schmidt struct freelist_init_state {
2355c7ce4f60SThomas Garnier unsigned int pos;
23567c00fce9SThomas Garnier unsigned int *list;
2357c7ce4f60SThomas Garnier unsigned int count;
2358c7ce4f60SThomas Garnier };
2359c7ce4f60SThomas Garnier
2360c7ce4f60SThomas Garnier /*
2361f0953a1bSIngo Molnar * Initialize the state based on the randomization method available.
2362f0953a1bSIngo Molnar * return true if the pre-computed list is available, false otherwise.
2363c7ce4f60SThomas Garnier */
freelist_state_initialize(struct freelist_init_state * state,struct kmem_cache * cachep,unsigned int count)2364f7e466e9SDavid Keisar Schmidt static bool freelist_state_initialize(struct freelist_init_state *state,
2365c7ce4f60SThomas Garnier struct kmem_cache *cachep,
2366c7ce4f60SThomas Garnier unsigned int count)
2367c7ce4f60SThomas Garnier {
2368c7ce4f60SThomas Garnier bool ret;
2369c7ce4f60SThomas Garnier if (!cachep->random_seq) {
2370c7ce4f60SThomas Garnier ret = false;
2371c7ce4f60SThomas Garnier } else {
2372c7ce4f60SThomas Garnier state->list = cachep->random_seq;
2373c7ce4f60SThomas Garnier state->count = count;
2374f7e466e9SDavid Keisar Schmidt state->pos = get_random_u32_below(count);
2375c7ce4f60SThomas Garnier ret = true;
2376c7ce4f60SThomas Garnier }
2377c7ce4f60SThomas Garnier return ret;
2378c7ce4f60SThomas Garnier }
2379c7ce4f60SThomas Garnier
2380c7ce4f60SThomas Garnier /* Get the next entry on the list and randomize it using a random shift */
next_random_slot(struct freelist_init_state * state)2381f7e466e9SDavid Keisar Schmidt static freelist_idx_t next_random_slot(struct freelist_init_state *state)
2382c7ce4f60SThomas Garnier {
2383c4e490cfSJohn Sperbeck if (state->pos >= state->count)
2384c4e490cfSJohn Sperbeck state->pos = 0;
2385c4e490cfSJohn Sperbeck return state->list[state->pos++];
2386c7ce4f60SThomas Garnier }
2387c7ce4f60SThomas Garnier
23887c00fce9SThomas Garnier /* Swap two freelist entries */
swap_free_obj(struct slab * slab,unsigned int a,unsigned int b)23897981e67eSVlastimil Babka static void swap_free_obj(struct slab *slab, unsigned int a, unsigned int b)
23907c00fce9SThomas Garnier {
23917981e67eSVlastimil Babka swap(((freelist_idx_t *) slab->freelist)[a],
23927981e67eSVlastimil Babka ((freelist_idx_t *) slab->freelist)[b]);
23937c00fce9SThomas Garnier }
23947c00fce9SThomas Garnier
2395c7ce4f60SThomas Garnier /*
2396c7ce4f60SThomas Garnier * Shuffle the freelist initialization state based on pre-computed lists.
2397c7ce4f60SThomas Garnier * return true if the list was successfully shuffled, false otherwise.
2398c7ce4f60SThomas Garnier */
shuffle_freelist(struct kmem_cache * cachep,struct slab * slab)23997981e67eSVlastimil Babka static bool shuffle_freelist(struct kmem_cache *cachep, struct slab *slab)
2400c7ce4f60SThomas Garnier {
24017c00fce9SThomas Garnier unsigned int objfreelist = 0, i, rand, count = cachep->num;
2402f7e466e9SDavid Keisar Schmidt struct freelist_init_state state;
2403c7ce4f60SThomas Garnier bool precomputed;
2404c7ce4f60SThomas Garnier
2405c7ce4f60SThomas Garnier if (count < 2)
2406c7ce4f60SThomas Garnier return false;
2407c7ce4f60SThomas Garnier
2408c7ce4f60SThomas Garnier precomputed = freelist_state_initialize(&state, cachep, count);
2409c7ce4f60SThomas Garnier
2410c7ce4f60SThomas Garnier /* Take a random entry as the objfreelist */
2411c7ce4f60SThomas Garnier if (OBJFREELIST_SLAB(cachep)) {
2412c7ce4f60SThomas Garnier if (!precomputed)
2413c7ce4f60SThomas Garnier objfreelist = count - 1;
2414c7ce4f60SThomas Garnier else
2415c7ce4f60SThomas Garnier objfreelist = next_random_slot(&state);
24167981e67eSVlastimil Babka slab->freelist = index_to_obj(cachep, slab, objfreelist) +
2417c7ce4f60SThomas Garnier obj_offset(cachep);
2418c7ce4f60SThomas Garnier count--;
2419c7ce4f60SThomas Garnier }
2420c7ce4f60SThomas Garnier
2421c7ce4f60SThomas Garnier /*
2422c7ce4f60SThomas Garnier * On early boot, generate the list dynamically.
2423c7ce4f60SThomas Garnier * Later use a pre-computed list for speed.
2424c7ce4f60SThomas Garnier */
2425c7ce4f60SThomas Garnier if (!precomputed) {
24267c00fce9SThomas Garnier for (i = 0; i < count; i++)
24277981e67eSVlastimil Babka set_free_obj(slab, i, i);
24287c00fce9SThomas Garnier
24297c00fce9SThomas Garnier /* Fisher-Yates shuffle */
24307c00fce9SThomas Garnier for (i = count - 1; i > 0; i--) {
2431f7e466e9SDavid Keisar Schmidt rand = get_random_u32_below(i + 1);
24327981e67eSVlastimil Babka swap_free_obj(slab, i, rand);
24337c00fce9SThomas Garnier }
2434c7ce4f60SThomas Garnier } else {
2435c7ce4f60SThomas Garnier for (i = 0; i < count; i++)
24367981e67eSVlastimil Babka set_free_obj(slab, i, next_random_slot(&state));
2437c7ce4f60SThomas Garnier }
2438c7ce4f60SThomas Garnier
2439c7ce4f60SThomas Garnier if (OBJFREELIST_SLAB(cachep))
24407981e67eSVlastimil Babka set_free_obj(slab, cachep->num - 1, objfreelist);
2441c7ce4f60SThomas Garnier
2442c7ce4f60SThomas Garnier return true;
2443c7ce4f60SThomas Garnier }
2444c7ce4f60SThomas Garnier #else
shuffle_freelist(struct kmem_cache * cachep,struct slab * slab)2445c7ce4f60SThomas Garnier static inline bool shuffle_freelist(struct kmem_cache *cachep,
24467981e67eSVlastimil Babka struct slab *slab)
2447c7ce4f60SThomas Garnier {
2448c7ce4f60SThomas Garnier return false;
2449c7ce4f60SThomas Garnier }
2450c7ce4f60SThomas Garnier #endif /* CONFIG_SLAB_FREELIST_RANDOM */
2451c7ce4f60SThomas Garnier
cache_init_objs(struct kmem_cache * cachep,struct slab * slab)245210b2e9e8SJoonsoo Kim static void cache_init_objs(struct kmem_cache *cachep,
24537981e67eSVlastimil Babka struct slab *slab)
245410b2e9e8SJoonsoo Kim {
245510b2e9e8SJoonsoo Kim int i;
24567ed2f9e6SAlexander Potapenko void *objp;
2457c7ce4f60SThomas Garnier bool shuffled;
245810b2e9e8SJoonsoo Kim
24597981e67eSVlastimil Babka cache_init_objs_debug(cachep, slab);
246010b2e9e8SJoonsoo Kim
2461c7ce4f60SThomas Garnier /* Try to randomize the freelist if enabled */
24627981e67eSVlastimil Babka shuffled = shuffle_freelist(cachep, slab);
2463c7ce4f60SThomas Garnier
2464c7ce4f60SThomas Garnier if (!shuffled && OBJFREELIST_SLAB(cachep)) {
24657981e67eSVlastimil Babka slab->freelist = index_to_obj(cachep, slab, cachep->num - 1) +
2466b03a017bSJoonsoo Kim obj_offset(cachep);
2467b03a017bSJoonsoo Kim }
2468b03a017bSJoonsoo Kim
246910b2e9e8SJoonsoo Kim for (i = 0; i < cachep->num; i++) {
24707981e67eSVlastimil Babka objp = index_to_obj(cachep, slab, i);
24714d176711SAndrey Konovalov objp = kasan_init_slab_obj(cachep, objp);
2472b3cbd9bfSAndrey Ryabinin
247310b2e9e8SJoonsoo Kim /* constructor could break poison info */
24747ed2f9e6SAlexander Potapenko if (DEBUG == 0 && cachep->ctor) {
24757ed2f9e6SAlexander Potapenko kasan_unpoison_object_data(cachep, objp);
24767ed2f9e6SAlexander Potapenko cachep->ctor(objp);
24777ed2f9e6SAlexander Potapenko kasan_poison_object_data(cachep, objp);
24787ed2f9e6SAlexander Potapenko }
247910b2e9e8SJoonsoo Kim
2480c7ce4f60SThomas Garnier if (!shuffled)
24817981e67eSVlastimil Babka set_free_obj(slab, i, i);
24821da177e4SLinus Torvalds }
24831da177e4SLinus Torvalds }
24841da177e4SLinus Torvalds
slab_get_obj(struct kmem_cache * cachep,struct slab * slab)24857981e67eSVlastimil Babka static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slab)
248678d382d7SMatthew Dobson {
2487b1cb0982SJoonsoo Kim void *objp;
248878d382d7SMatthew Dobson
24897981e67eSVlastimil Babka objp = index_to_obj(cachep, slab, get_free_obj(slab, slab->active));
24907981e67eSVlastimil Babka slab->active++;
249178d382d7SMatthew Dobson
249278d382d7SMatthew Dobson return objp;
249378d382d7SMatthew Dobson }
249478d382d7SMatthew Dobson
slab_put_obj(struct kmem_cache * cachep,struct slab * slab,void * objp)2495260b61ddSJoonsoo Kim static void slab_put_obj(struct kmem_cache *cachep,
24967981e67eSVlastimil Babka struct slab *slab, void *objp)
249778d382d7SMatthew Dobson {
249840f3bf0cSVlastimil Babka unsigned int objnr = obj_to_index(cachep, slab, objp);
249978d382d7SMatthew Dobson #if DEBUG
250016025177SJoonsoo Kim unsigned int i;
250178d382d7SMatthew Dobson
2502b1cb0982SJoonsoo Kim /* Verify double free bug */
25037981e67eSVlastimil Babka for (i = slab->active; i < cachep->num; i++) {
25047981e67eSVlastimil Babka if (get_free_obj(slab, i) == objnr) {
250585c3e4a5SGeert Uytterhoeven pr_err("slab: double free detected in cache '%s', objp %px\n",
2506756a025fSJoe Perches cachep->name, objp);
250778d382d7SMatthew Dobson BUG();
250878d382d7SMatthew Dobson }
2509b1cb0982SJoonsoo Kim }
251078d382d7SMatthew Dobson #endif
25117981e67eSVlastimil Babka slab->active--;
25127981e67eSVlastimil Babka if (!slab->freelist)
25137981e67eSVlastimil Babka slab->freelist = objp + obj_offset(cachep);
2514b03a017bSJoonsoo Kim
25157981e67eSVlastimil Babka set_free_obj(slab, slab->active, objnr);
251678d382d7SMatthew Dobson }
251778d382d7SMatthew Dobson
25184776874fSPekka Enberg /*
25191da177e4SLinus Torvalds * Grow (by 1) the number of slabs within a cache. This is called by
25201da177e4SLinus Torvalds * kmem_cache_alloc() when there are no active objs left in a cache.
25211da177e4SLinus Torvalds */
cache_grow_begin(struct kmem_cache * cachep,gfp_t flags,int nodeid)25227981e67eSVlastimil Babka static struct slab *cache_grow_begin(struct kmem_cache *cachep,
252376b342bdSJoonsoo Kim gfp_t flags, int nodeid)
25241da177e4SLinus Torvalds {
25257e007355SJoonsoo Kim void *freelist;
25261da177e4SLinus Torvalds size_t offset;
25276daa0e28SAl Viro gfp_t local_flags;
2528dd35f71aSVlastimil Babka int slab_node;
2529ce8eb6c4SChristoph Lameter struct kmem_cache_node *n;
25307981e67eSVlastimil Babka struct slab *slab;
25311da177e4SLinus Torvalds
2532a737b3e2SAndrew Morton /*
2533a737b3e2SAndrew Morton * Be lazy and only check for valid flags here, keeping it out of the
2534a737b3e2SAndrew Morton * critical path in kmem_cache_alloc().
25351da177e4SLinus Torvalds */
253644405099SLong Li if (unlikely(flags & GFP_SLAB_BUG_MASK))
253744405099SLong Li flags = kmalloc_fix_flags(flags);
253844405099SLong Li
2539128227e7SMatthew Wilcox WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO));
25406cb06229SChristoph Lameter local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
25411da177e4SLinus Torvalds
25421da177e4SLinus Torvalds check_irq_off();
2543d0164adcSMel Gorman if (gfpflags_allow_blocking(local_flags))
25441da177e4SLinus Torvalds local_irq_enable();
25451da177e4SLinus Torvalds
25461da177e4SLinus Torvalds /*
2547a737b3e2SAndrew Morton * Get mem for the objs. Attempt to allocate a physical page from
2548a737b3e2SAndrew Morton * 'nodeid'.
2549e498be7dSChristoph Lameter */
25507981e67eSVlastimil Babka slab = kmem_getpages(cachep, local_flags, nodeid);
25517981e67eSVlastimil Babka if (!slab)
25521da177e4SLinus Torvalds goto failed;
25531da177e4SLinus Torvalds
2554dd35f71aSVlastimil Babka slab_node = slab_nid(slab);
2555dd35f71aSVlastimil Babka n = get_node(cachep, slab_node);
255603d1d43aSJoonsoo Kim
255703d1d43aSJoonsoo Kim /* Get colour for the slab, and cal the next value. */
255803d1d43aSJoonsoo Kim n->colour_next++;
255903d1d43aSJoonsoo Kim if (n->colour_next >= cachep->colour)
256003d1d43aSJoonsoo Kim n->colour_next = 0;
256103d1d43aSJoonsoo Kim
256203d1d43aSJoonsoo Kim offset = n->colour_next;
256303d1d43aSJoonsoo Kim if (offset >= cachep->colour)
256403d1d43aSJoonsoo Kim offset = 0;
256503d1d43aSJoonsoo Kim
256603d1d43aSJoonsoo Kim offset *= cachep->colour_off;
256703d1d43aSJoonsoo Kim
256851dedad0SAndrey Konovalov /*
256951dedad0SAndrey Konovalov * Call kasan_poison_slab() before calling alloc_slabmgmt(), so
257051dedad0SAndrey Konovalov * page_address() in the latter returns a non-tagged pointer,
257151dedad0SAndrey Konovalov * as it should be for slab pages.
257251dedad0SAndrey Konovalov */
25736e48a966SMatthew Wilcox (Oracle) kasan_poison_slab(slab);
257451dedad0SAndrey Konovalov
25751da177e4SLinus Torvalds /* Get slab management. */
25767981e67eSVlastimil Babka freelist = alloc_slabmgmt(cachep, slab, offset,
2577dd35f71aSVlastimil Babka local_flags & ~GFP_CONSTRAINT_MASK, slab_node);
2578b03a017bSJoonsoo Kim if (OFF_SLAB(cachep) && !freelist)
25791da177e4SLinus Torvalds goto opps1;
25801da177e4SLinus Torvalds
25817981e67eSVlastimil Babka slab->slab_cache = cachep;
25827981e67eSVlastimil Babka slab->freelist = freelist;
25831da177e4SLinus Torvalds
25847981e67eSVlastimil Babka cache_init_objs(cachep, slab);
25851da177e4SLinus Torvalds
2586d0164adcSMel Gorman if (gfpflags_allow_blocking(local_flags))
25871da177e4SLinus Torvalds local_irq_disable();
25881da177e4SLinus Torvalds
25897981e67eSVlastimil Babka return slab;
259076b342bdSJoonsoo Kim
25911da177e4SLinus Torvalds opps1:
25927981e67eSVlastimil Babka kmem_freepages(cachep, slab);
25931da177e4SLinus Torvalds failed:
2594d0164adcSMel Gorman if (gfpflags_allow_blocking(local_flags))
25951da177e4SLinus Torvalds local_irq_disable();
259676b342bdSJoonsoo Kim return NULL;
259776b342bdSJoonsoo Kim }
259876b342bdSJoonsoo Kim
cache_grow_end(struct kmem_cache * cachep,struct slab * slab)25997981e67eSVlastimil Babka static void cache_grow_end(struct kmem_cache *cachep, struct slab *slab)
260076b342bdSJoonsoo Kim {
260176b342bdSJoonsoo Kim struct kmem_cache_node *n;
260276b342bdSJoonsoo Kim void *list = NULL;
260376b342bdSJoonsoo Kim
260476b342bdSJoonsoo Kim check_irq_off();
260576b342bdSJoonsoo Kim
26067981e67eSVlastimil Babka if (!slab)
260776b342bdSJoonsoo Kim return;
260876b342bdSJoonsoo Kim
26097981e67eSVlastimil Babka INIT_LIST_HEAD(&slab->slab_list);
26107981e67eSVlastimil Babka n = get_node(cachep, slab_nid(slab));
261176b342bdSJoonsoo Kim
2612b539ce9fSJiri Kosina raw_spin_lock(&n->list_lock);
2613bf00bd34SDavid Rientjes n->total_slabs++;
26147981e67eSVlastimil Babka if (!slab->active) {
26157981e67eSVlastimil Babka list_add_tail(&slab->slab_list, &n->slabs_free);
2616f728b0a5SGreg Thelen n->free_slabs++;
2617bf00bd34SDavid Rientjes } else
26187981e67eSVlastimil Babka fixup_slab_list(cachep, n, slab, &list);
261907a63c41SAruna Ramakrishna
262076b342bdSJoonsoo Kim STATS_INC_GROWN(cachep);
26217981e67eSVlastimil Babka n->free_objects += cachep->num - slab->active;
2622b539ce9fSJiri Kosina raw_spin_unlock(&n->list_lock);
262376b342bdSJoonsoo Kim
262476b342bdSJoonsoo Kim fixup_objfreelist_debug(cachep, &list);
26251da177e4SLinus Torvalds }
26261da177e4SLinus Torvalds
26271da177e4SLinus Torvalds #if DEBUG
26281da177e4SLinus Torvalds
26291da177e4SLinus Torvalds /*
26301da177e4SLinus Torvalds * Perform extra freeing checks:
26311da177e4SLinus Torvalds * - detect bad pointers.
26321da177e4SLinus Torvalds * - POISON/RED_ZONE checking
26331da177e4SLinus Torvalds */
kfree_debugcheck(const void * objp)26341da177e4SLinus Torvalds static void kfree_debugcheck(const void *objp)
26351da177e4SLinus Torvalds {
26361da177e4SLinus Torvalds if (!virt_addr_valid(objp)) {
26371170532bSJoe Perches pr_err("kfree_debugcheck: out of range ptr %lxh\n",
26381da177e4SLinus Torvalds (unsigned long)objp);
26391da177e4SLinus Torvalds BUG();
26401da177e4SLinus Torvalds }
26411da177e4SLinus Torvalds }
26421da177e4SLinus Torvalds
verify_redzone_free(struct kmem_cache * cache,void * obj)264358ce1fd5SPekka Enberg static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
264458ce1fd5SPekka Enberg {
2645b46b8f19SDavid Woodhouse unsigned long long redzone1, redzone2;
264658ce1fd5SPekka Enberg
264758ce1fd5SPekka Enberg redzone1 = *dbg_redzone1(cache, obj);
264858ce1fd5SPekka Enberg redzone2 = *dbg_redzone2(cache, obj);
264958ce1fd5SPekka Enberg
265058ce1fd5SPekka Enberg /*
265158ce1fd5SPekka Enberg * Redzone is ok.
265258ce1fd5SPekka Enberg */
265358ce1fd5SPekka Enberg if (redzone1 == RED_ACTIVE && redzone2 == RED_ACTIVE)
265458ce1fd5SPekka Enberg return;
265558ce1fd5SPekka Enberg
265658ce1fd5SPekka Enberg if (redzone1 == RED_INACTIVE && redzone2 == RED_INACTIVE)
265758ce1fd5SPekka Enberg slab_error(cache, "double free detected");
265858ce1fd5SPekka Enberg else
265958ce1fd5SPekka Enberg slab_error(cache, "memory outside object was overwritten");
266058ce1fd5SPekka Enberg
266185c3e4a5SGeert Uytterhoeven pr_err("%px: redzone 1:0x%llx, redzone 2:0x%llx\n",
266258ce1fd5SPekka Enberg obj, redzone1, redzone2);
266358ce1fd5SPekka Enberg }
266458ce1fd5SPekka Enberg
cache_free_debugcheck(struct kmem_cache * cachep,void * objp,unsigned long caller)2665343e0d7aSPekka Enberg static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
26667c0cb9c6SEzequiel Garcia unsigned long caller)
26671da177e4SLinus Torvalds {
26681da177e4SLinus Torvalds unsigned int objnr;
26697981e67eSVlastimil Babka struct slab *slab;
26701da177e4SLinus Torvalds
267180cbd911SMatthew Wilcox BUG_ON(virt_to_cache(objp) != cachep);
267280cbd911SMatthew Wilcox
26733dafccf2SManfred Spraul objp -= obj_offset(cachep);
26741da177e4SLinus Torvalds kfree_debugcheck(objp);
26757981e67eSVlastimil Babka slab = virt_to_slab(objp);
26761da177e4SLinus Torvalds
26771da177e4SLinus Torvalds if (cachep->flags & SLAB_RED_ZONE) {
267858ce1fd5SPekka Enberg verify_redzone_free(cachep, objp);
26791da177e4SLinus Torvalds *dbg_redzone1(cachep, objp) = RED_INACTIVE;
26801da177e4SLinus Torvalds *dbg_redzone2(cachep, objp) = RED_INACTIVE;
26811da177e4SLinus Torvalds }
26827878c231SQian Cai if (cachep->flags & SLAB_STORE_USER)
26837c0cb9c6SEzequiel Garcia *dbg_userword(cachep, objp) = (void *)caller;
26841da177e4SLinus Torvalds
268540f3bf0cSVlastimil Babka objnr = obj_to_index(cachep, slab, objp);
26861da177e4SLinus Torvalds
26871da177e4SLinus Torvalds BUG_ON(objnr >= cachep->num);
26887981e67eSVlastimil Babka BUG_ON(objp != index_to_obj(cachep, slab, objnr));
26891da177e4SLinus Torvalds
26901da177e4SLinus Torvalds if (cachep->flags & SLAB_POISON) {
26911da177e4SLinus Torvalds poison_obj(cachep, objp, POISON_FREE);
269280552f0fSQian Cai slab_kernel_map(cachep, objp, 0);
26931da177e4SLinus Torvalds }
26941da177e4SLinus Torvalds return objp;
26951da177e4SLinus Torvalds }
26961da177e4SLinus Torvalds
26971da177e4SLinus Torvalds #else
26981da177e4SLinus Torvalds #define kfree_debugcheck(x) do { } while(0)
26991da177e4SLinus Torvalds #define cache_free_debugcheck(x, objp, z) (objp)
27001da177e4SLinus Torvalds #endif
27011da177e4SLinus Torvalds
fixup_objfreelist_debug(struct kmem_cache * cachep,void ** list)2702b03a017bSJoonsoo Kim static inline void fixup_objfreelist_debug(struct kmem_cache *cachep,
2703b03a017bSJoonsoo Kim void **list)
2704b03a017bSJoonsoo Kim {
2705b03a017bSJoonsoo Kim #if DEBUG
2706b03a017bSJoonsoo Kim void *next = *list;
2707b03a017bSJoonsoo Kim void *objp;
2708b03a017bSJoonsoo Kim
2709b03a017bSJoonsoo Kim while (next) {
2710b03a017bSJoonsoo Kim objp = next - obj_offset(cachep);
2711b03a017bSJoonsoo Kim next = *(void **)next;
2712b03a017bSJoonsoo Kim poison_obj(cachep, objp, POISON_FREE);
2713b03a017bSJoonsoo Kim }
2714b03a017bSJoonsoo Kim #endif
2715b03a017bSJoonsoo Kim }
2716b03a017bSJoonsoo Kim
fixup_slab_list(struct kmem_cache * cachep,struct kmem_cache_node * n,struct slab * slab,void ** list)2717d8410234SJoonsoo Kim static inline void fixup_slab_list(struct kmem_cache *cachep,
27187981e67eSVlastimil Babka struct kmem_cache_node *n, struct slab *slab,
2719b03a017bSJoonsoo Kim void **list)
2720d8410234SJoonsoo Kim {
2721d8410234SJoonsoo Kim /* move slabp to correct slabp list: */
27227981e67eSVlastimil Babka list_del(&slab->slab_list);
27237981e67eSVlastimil Babka if (slab->active == cachep->num) {
27247981e67eSVlastimil Babka list_add(&slab->slab_list, &n->slabs_full);
2725b03a017bSJoonsoo Kim if (OBJFREELIST_SLAB(cachep)) {
2726b03a017bSJoonsoo Kim #if DEBUG
2727b03a017bSJoonsoo Kim /* Poisoning will be done without holding the lock */
2728b03a017bSJoonsoo Kim if (cachep->flags & SLAB_POISON) {
27297981e67eSVlastimil Babka void **objp = slab->freelist;
2730b03a017bSJoonsoo Kim
2731b03a017bSJoonsoo Kim *objp = *list;
2732b03a017bSJoonsoo Kim *list = objp;
2733b03a017bSJoonsoo Kim }
2734b03a017bSJoonsoo Kim #endif
27357981e67eSVlastimil Babka slab->freelist = NULL;
2736b03a017bSJoonsoo Kim }
2737b03a017bSJoonsoo Kim } else
27387981e67eSVlastimil Babka list_add(&slab->slab_list, &n->slabs_partial);
2739d8410234SJoonsoo Kim }
2740d8410234SJoonsoo Kim
2741f68f8dddSJoonsoo Kim /* Try to find non-pfmemalloc slab if needed */
get_valid_first_slab(struct kmem_cache_node * n,struct slab * slab,bool pfmemalloc)27427981e67eSVlastimil Babka static noinline struct slab *get_valid_first_slab(struct kmem_cache_node *n,
27437981e67eSVlastimil Babka struct slab *slab, bool pfmemalloc)
2744f68f8dddSJoonsoo Kim {
27457981e67eSVlastimil Babka if (!slab)
2746f68f8dddSJoonsoo Kim return NULL;
2747f68f8dddSJoonsoo Kim
2748f68f8dddSJoonsoo Kim if (pfmemalloc)
27497981e67eSVlastimil Babka return slab;
2750f68f8dddSJoonsoo Kim
27517981e67eSVlastimil Babka if (!slab_test_pfmemalloc(slab))
27527981e67eSVlastimil Babka return slab;
2753f68f8dddSJoonsoo Kim
2754f68f8dddSJoonsoo Kim /* No need to keep pfmemalloc slab if we have enough free objects */
2755f68f8dddSJoonsoo Kim if (n->free_objects > n->free_limit) {
27567981e67eSVlastimil Babka slab_clear_pfmemalloc(slab);
27577981e67eSVlastimil Babka return slab;
2758f68f8dddSJoonsoo Kim }
2759f68f8dddSJoonsoo Kim
2760f68f8dddSJoonsoo Kim /* Move pfmemalloc slab to the end of list to speed up next search */
27617981e67eSVlastimil Babka list_del(&slab->slab_list);
27627981e67eSVlastimil Babka if (!slab->active) {
27637981e67eSVlastimil Babka list_add_tail(&slab->slab_list, &n->slabs_free);
2764bf00bd34SDavid Rientjes n->free_slabs++;
2765f728b0a5SGreg Thelen } else
27667981e67eSVlastimil Babka list_add_tail(&slab->slab_list, &n->slabs_partial);
2767f68f8dddSJoonsoo Kim
27687981e67eSVlastimil Babka list_for_each_entry(slab, &n->slabs_partial, slab_list) {
27697981e67eSVlastimil Babka if (!slab_test_pfmemalloc(slab))
27707981e67eSVlastimil Babka return slab;
2771f68f8dddSJoonsoo Kim }
2772f68f8dddSJoonsoo Kim
2773f728b0a5SGreg Thelen n->free_touched = 1;
27747981e67eSVlastimil Babka list_for_each_entry(slab, &n->slabs_free, slab_list) {
27757981e67eSVlastimil Babka if (!slab_test_pfmemalloc(slab)) {
2776bf00bd34SDavid Rientjes n->free_slabs--;
27777981e67eSVlastimil Babka return slab;
2778f68f8dddSJoonsoo Kim }
2779f728b0a5SGreg Thelen }
2780f68f8dddSJoonsoo Kim
2781f68f8dddSJoonsoo Kim return NULL;
2782f68f8dddSJoonsoo Kim }
2783f68f8dddSJoonsoo Kim
get_first_slab(struct kmem_cache_node * n,bool pfmemalloc)27847981e67eSVlastimil Babka static struct slab *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc)
27857aa0d227SGeliang Tang {
27867981e67eSVlastimil Babka struct slab *slab;
27877aa0d227SGeliang Tang
2788b539ce9fSJiri Kosina assert_raw_spin_locked(&n->list_lock);
27897981e67eSVlastimil Babka slab = list_first_entry_or_null(&n->slabs_partial, struct slab,
279016cb0ec7STobin C. Harding slab_list);
27917981e67eSVlastimil Babka if (!slab) {
27927aa0d227SGeliang Tang n->free_touched = 1;
27937981e67eSVlastimil Babka slab = list_first_entry_or_null(&n->slabs_free, struct slab,
279416cb0ec7STobin C. Harding slab_list);
27957981e67eSVlastimil Babka if (slab)
2796bf00bd34SDavid Rientjes n->free_slabs--;
27977aa0d227SGeliang Tang }
27987aa0d227SGeliang Tang
2799f68f8dddSJoonsoo Kim if (sk_memalloc_socks())
28007981e67eSVlastimil Babka slab = get_valid_first_slab(n, slab, pfmemalloc);
2801f68f8dddSJoonsoo Kim
28027981e67eSVlastimil Babka return slab;
28037aa0d227SGeliang Tang }
28047aa0d227SGeliang Tang
cache_alloc_pfmemalloc(struct kmem_cache * cachep,struct kmem_cache_node * n,gfp_t flags)2805f68f8dddSJoonsoo Kim static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep,
2806f68f8dddSJoonsoo Kim struct kmem_cache_node *n, gfp_t flags)
2807f68f8dddSJoonsoo Kim {
28087981e67eSVlastimil Babka struct slab *slab;
2809f68f8dddSJoonsoo Kim void *obj;
2810f68f8dddSJoonsoo Kim void *list = NULL;
2811f68f8dddSJoonsoo Kim
2812f68f8dddSJoonsoo Kim if (!gfp_pfmemalloc_allowed(flags))
2813f68f8dddSJoonsoo Kim return NULL;
2814f68f8dddSJoonsoo Kim
2815b539ce9fSJiri Kosina raw_spin_lock(&n->list_lock);
28167981e67eSVlastimil Babka slab = get_first_slab(n, true);
28177981e67eSVlastimil Babka if (!slab) {
2818b539ce9fSJiri Kosina raw_spin_unlock(&n->list_lock);
2819f68f8dddSJoonsoo Kim return NULL;
2820f68f8dddSJoonsoo Kim }
2821f68f8dddSJoonsoo Kim
28227981e67eSVlastimil Babka obj = slab_get_obj(cachep, slab);
2823f68f8dddSJoonsoo Kim n->free_objects--;
2824f68f8dddSJoonsoo Kim
28257981e67eSVlastimil Babka fixup_slab_list(cachep, n, slab, &list);
2826f68f8dddSJoonsoo Kim
2827b539ce9fSJiri Kosina raw_spin_unlock(&n->list_lock);
2828f68f8dddSJoonsoo Kim fixup_objfreelist_debug(cachep, &list);
2829f68f8dddSJoonsoo Kim
2830f68f8dddSJoonsoo Kim return obj;
2831f68f8dddSJoonsoo Kim }
2832f68f8dddSJoonsoo Kim
2833213b4695SJoonsoo Kim /*
2834213b4695SJoonsoo Kim * Slab list should be fixed up by fixup_slab_list() for existing slab
2835213b4695SJoonsoo Kim * or cache_grow_end() for new slab
2836213b4695SJoonsoo Kim */
alloc_block(struct kmem_cache * cachep,struct array_cache * ac,struct slab * slab,int batchcount)2837213b4695SJoonsoo Kim static __always_inline int alloc_block(struct kmem_cache *cachep,
28387981e67eSVlastimil Babka struct array_cache *ac, struct slab *slab, int batchcount)
2839213b4695SJoonsoo Kim {
2840213b4695SJoonsoo Kim /*
2841213b4695SJoonsoo Kim * There must be at least one object available for
2842213b4695SJoonsoo Kim * allocation.
2843213b4695SJoonsoo Kim */
28447981e67eSVlastimil Babka BUG_ON(slab->active >= cachep->num);
2845213b4695SJoonsoo Kim
28467981e67eSVlastimil Babka while (slab->active < cachep->num && batchcount--) {
2847213b4695SJoonsoo Kim STATS_INC_ALLOCED(cachep);
2848213b4695SJoonsoo Kim STATS_INC_ACTIVE(cachep);
2849213b4695SJoonsoo Kim STATS_SET_HIGH(cachep);
2850213b4695SJoonsoo Kim
28517981e67eSVlastimil Babka ac->entry[ac->avail++] = slab_get_obj(cachep, slab);
2852213b4695SJoonsoo Kim }
2853213b4695SJoonsoo Kim
2854213b4695SJoonsoo Kim return batchcount;
2855213b4695SJoonsoo Kim }
2856213b4695SJoonsoo Kim
cache_alloc_refill(struct kmem_cache * cachep,gfp_t flags)2857f68f8dddSJoonsoo Kim static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
28581da177e4SLinus Torvalds {
28591da177e4SLinus Torvalds int batchcount;
2860ce8eb6c4SChristoph Lameter struct kmem_cache_node *n;
2861801faf0dSJoonsoo Kim struct array_cache *ac, *shared;
28621ca4cb24SPekka Enberg int node;
2863b03a017bSJoonsoo Kim void *list = NULL;
28647981e67eSVlastimil Babka struct slab *slab;
28651ca4cb24SPekka Enberg
28666d2144d3SJoe Korty check_irq_off();
28677d6e6d09SLee Schermerhorn node = numa_mem_id();
2868f68f8dddSJoonsoo Kim
28696d2144d3SJoe Korty ac = cpu_cache_get(cachep);
28701da177e4SLinus Torvalds batchcount = ac->batchcount;
28711da177e4SLinus Torvalds if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
2872a737b3e2SAndrew Morton /*
2873a737b3e2SAndrew Morton * If there was little recent activity on this cache, then
2874a737b3e2SAndrew Morton * perform only a partial refill. Otherwise we could generate
2875a737b3e2SAndrew Morton * refill bouncing.
28761da177e4SLinus Torvalds */
28771da177e4SLinus Torvalds batchcount = BATCHREFILL_LIMIT;
28781da177e4SLinus Torvalds }
287918bf8541SChristoph Lameter n = get_node(cachep, node);
28801da177e4SLinus Torvalds
2881ce8eb6c4SChristoph Lameter BUG_ON(ac->avail > 0 || !n);
2882801faf0dSJoonsoo Kim shared = READ_ONCE(n->shared);
2883801faf0dSJoonsoo Kim if (!n->free_objects && (!shared || !shared->avail))
2884801faf0dSJoonsoo Kim goto direct_grow;
2885801faf0dSJoonsoo Kim
2886b539ce9fSJiri Kosina raw_spin_lock(&n->list_lock);
2887801faf0dSJoonsoo Kim shared = READ_ONCE(n->shared);
2888e498be7dSChristoph Lameter
28893ded175aSChristoph Lameter /* See if we can refill from the shared array */
2890801faf0dSJoonsoo Kim if (shared && transfer_objects(ac, shared, batchcount)) {
2891801faf0dSJoonsoo Kim shared->touched = 1;
28921da177e4SLinus Torvalds goto alloc_done;
289344b57f1cSNick Piggin }
28943ded175aSChristoph Lameter
28951da177e4SLinus Torvalds while (batchcount > 0) {
28961da177e4SLinus Torvalds /* Get slab alloc is to come from. */
28977981e67eSVlastimil Babka slab = get_first_slab(n, false);
28987981e67eSVlastimil Babka if (!slab)
28991da177e4SLinus Torvalds goto must_grow;
29001da177e4SLinus Torvalds
29011da177e4SLinus Torvalds check_spinlock_acquired(cachep);
2902714b8171SPekka Enberg
29037981e67eSVlastimil Babka batchcount = alloc_block(cachep, ac, slab, batchcount);
29047981e67eSVlastimil Babka fixup_slab_list(cachep, n, slab, &list);
29051da177e4SLinus Torvalds }
29061da177e4SLinus Torvalds
29071da177e4SLinus Torvalds must_grow:
2908ce8eb6c4SChristoph Lameter n->free_objects -= ac->avail;
29091da177e4SLinus Torvalds alloc_done:
2910b539ce9fSJiri Kosina raw_spin_unlock(&n->list_lock);
2911b03a017bSJoonsoo Kim fixup_objfreelist_debug(cachep, &list);
29121da177e4SLinus Torvalds
2913801faf0dSJoonsoo Kim direct_grow:
29141da177e4SLinus Torvalds if (unlikely(!ac->avail)) {
2915f68f8dddSJoonsoo Kim /* Check if we can use obj in pfmemalloc slab */
2916f68f8dddSJoonsoo Kim if (sk_memalloc_socks()) {
2917f68f8dddSJoonsoo Kim void *obj = cache_alloc_pfmemalloc(cachep, n, flags);
2918f68f8dddSJoonsoo Kim
2919f68f8dddSJoonsoo Kim if (obj)
2920f68f8dddSJoonsoo Kim return obj;
2921f68f8dddSJoonsoo Kim }
2922f68f8dddSJoonsoo Kim
29237981e67eSVlastimil Babka slab = cache_grow_begin(cachep, gfp_exact_node(flags), node);
29241da177e4SLinus Torvalds
292576b342bdSJoonsoo Kim /*
292676b342bdSJoonsoo Kim * cache_grow_begin() can reenable interrupts,
292776b342bdSJoonsoo Kim * then ac could change.
292876b342bdSJoonsoo Kim */
29299a2dba4bSPekka Enberg ac = cpu_cache_get(cachep);
29307981e67eSVlastimil Babka if (!ac->avail && slab)
29317981e67eSVlastimil Babka alloc_block(cachep, ac, slab, batchcount);
29327981e67eSVlastimil Babka cache_grow_end(cachep, slab);
2933072bb0aaSMel Gorman
2934213b4695SJoonsoo Kim if (!ac->avail)
29351da177e4SLinus Torvalds return NULL;
29361da177e4SLinus Torvalds }
29371da177e4SLinus Torvalds ac->touched = 1;
2938072bb0aaSMel Gorman
2939f68f8dddSJoonsoo Kim return ac->entry[--ac->avail];
29401da177e4SLinus Torvalds }
29411da177e4SLinus Torvalds
29421da177e4SLinus Torvalds #if DEBUG
cache_alloc_debugcheck_after(struct kmem_cache * cachep,gfp_t flags,void * objp,unsigned long caller)2943a737b3e2SAndrew Morton static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
29447c0cb9c6SEzequiel Garcia gfp_t flags, void *objp, unsigned long caller)
29451da177e4SLinus Torvalds {
2946128227e7SMatthew Wilcox WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO));
2947df3ae2c9SMarco Elver if (!objp || is_kfence_address(objp))
29481da177e4SLinus Torvalds return objp;
29491da177e4SLinus Torvalds if (cachep->flags & SLAB_POISON) {
29501da177e4SLinus Torvalds check_poison_obj(cachep, objp);
295180552f0fSQian Cai slab_kernel_map(cachep, objp, 1);
29521da177e4SLinus Torvalds poison_obj(cachep, objp, POISON_INUSE);
29531da177e4SLinus Torvalds }
29541da177e4SLinus Torvalds if (cachep->flags & SLAB_STORE_USER)
29557c0cb9c6SEzequiel Garcia *dbg_userword(cachep, objp) = (void *)caller;
29561da177e4SLinus Torvalds
29571da177e4SLinus Torvalds if (cachep->flags & SLAB_RED_ZONE) {
2958a737b3e2SAndrew Morton if (*dbg_redzone1(cachep, objp) != RED_INACTIVE ||
2959a737b3e2SAndrew Morton *dbg_redzone2(cachep, objp) != RED_INACTIVE) {
2960756a025fSJoe Perches slab_error(cachep, "double free, or memory outside object was overwritten");
296185c3e4a5SGeert Uytterhoeven pr_err("%px: redzone 1:0x%llx, redzone 2:0x%llx\n",
2962b28a02deSPekka Enberg objp, *dbg_redzone1(cachep, objp),
2963b28a02deSPekka Enberg *dbg_redzone2(cachep, objp));
29641da177e4SLinus Torvalds }
29651da177e4SLinus Torvalds *dbg_redzone1(cachep, objp) = RED_ACTIVE;
29661da177e4SLinus Torvalds *dbg_redzone2(cachep, objp) = RED_ACTIVE;
29671da177e4SLinus Torvalds }
296803787301SJoonsoo Kim
29693dafccf2SManfred Spraul objp += obj_offset(cachep);
29704f104934SChristoph Lameter if (cachep->ctor && cachep->flags & SLAB_POISON)
297151cc5068SAlexey Dobriyan cachep->ctor(objp);
2972d949a815SPeter Collingbourne if ((unsigned long)objp & (arch_slab_minalign() - 1)) {
2973d949a815SPeter Collingbourne pr_err("0x%px: not aligned to arch_slab_minalign()=%u\n", objp,
2974d949a815SPeter Collingbourne arch_slab_minalign());
2975a44b56d3SKevin Hilman }
29761da177e4SLinus Torvalds return objp;
29771da177e4SLinus Torvalds }
29781da177e4SLinus Torvalds #else
29791da177e4SLinus Torvalds #define cache_alloc_debugcheck_after(a, b, objp, d) (objp)
29801da177e4SLinus Torvalds #endif
29811da177e4SLinus Torvalds
____cache_alloc(struct kmem_cache * cachep,gfp_t flags)2982343e0d7aSPekka Enberg static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
29831da177e4SLinus Torvalds {
29841da177e4SLinus Torvalds void *objp;
29851da177e4SLinus Torvalds struct array_cache *ac;
29861da177e4SLinus Torvalds
29875c382300SAlok N Kataria check_irq_off();
29888a8b6502SAkinobu Mita
29899a2dba4bSPekka Enberg ac = cpu_cache_get(cachep);
29901da177e4SLinus Torvalds if (likely(ac->avail)) {
29911da177e4SLinus Torvalds ac->touched = 1;
2992f68f8dddSJoonsoo Kim objp = ac->entry[--ac->avail];
2993072bb0aaSMel Gorman
2994072bb0aaSMel Gorman STATS_INC_ALLOCHIT(cachep);
2995072bb0aaSMel Gorman goto out;
2996072bb0aaSMel Gorman }
2997072bb0aaSMel Gorman
29981da177e4SLinus Torvalds STATS_INC_ALLOCMISS(cachep);
2999f68f8dddSJoonsoo Kim objp = cache_alloc_refill(cachep, flags);
3000ddbf2e83SJ. R. Okajima /*
3001ddbf2e83SJ. R. Okajima * the 'ac' may be updated by cache_alloc_refill(),
3002ddbf2e83SJ. R. Okajima * and kmemleak_erase() requires its correct value.
3003ddbf2e83SJ. R. Okajima */
3004ddbf2e83SJ. R. Okajima ac = cpu_cache_get(cachep);
3005072bb0aaSMel Gorman
3006072bb0aaSMel Gorman out:
3007d5cff635SCatalin Marinas /*
3008d5cff635SCatalin Marinas * To avoid a false negative, if an object that is in one of the
3009d5cff635SCatalin Marinas * per-CPU caches is leaked, we need to make sure kmemleak doesn't
3010d5cff635SCatalin Marinas * treat the array pointers as a reference to the object.
3011d5cff635SCatalin Marinas */
3012f3d8b53aSJ. R. Okajima if (objp)
3013d5cff635SCatalin Marinas kmemleak_erase(&ac->entry[ac->avail]);
30145c382300SAlok N Kataria return objp;
30155c382300SAlok N Kataria }
30165c382300SAlok N Kataria
3017e498be7dSChristoph Lameter #ifdef CONFIG_NUMA
30181e703d05SMiaohe Lin static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
30191e703d05SMiaohe Lin
30201da177e4SLinus Torvalds /*
30212ad654bcSZefan Li * Try allocating on another node if PFA_SPREAD_SLAB is a mempolicy is set.
3022c61afb18SPaul Jackson *
3023c61afb18SPaul Jackson * If we are in_interrupt, then process context, including cpusets and
3024c61afb18SPaul Jackson * mempolicy, may not apply and should not be used for allocation policy.
3025c61afb18SPaul Jackson */
alternate_node_alloc(struct kmem_cache * cachep,gfp_t flags)3026c61afb18SPaul Jackson static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
3027c61afb18SPaul Jackson {
3028c61afb18SPaul Jackson int nid_alloc, nid_here;
3029c61afb18SPaul Jackson
3030765c4507SChristoph Lameter if (in_interrupt() || (flags & __GFP_THISNODE))
3031c61afb18SPaul Jackson return NULL;
30327d6e6d09SLee Schermerhorn nid_alloc = nid_here = numa_mem_id();
3033c61afb18SPaul Jackson if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
30346adef3ebSJack Steiner nid_alloc = cpuset_slab_spread_node();
3035c61afb18SPaul Jackson else if (current->mempolicy)
30362a389610SDavid Rientjes nid_alloc = mempolicy_slab_node();
3037c61afb18SPaul Jackson if (nid_alloc != nid_here)
30388b98c169SChristoph Hellwig return ____cache_alloc_node(cachep, flags, nid_alloc);
3039c61afb18SPaul Jackson return NULL;
3040c61afb18SPaul Jackson }
3041c61afb18SPaul Jackson
3042c61afb18SPaul Jackson /*
3043765c4507SChristoph Lameter * Fallback function if there was no memory available and no objects on a
30443c517a61SChristoph Lameter * certain node and fall back is permitted. First we scan all the
30456a67368cSChristoph Lameter * available node for available objects. If that fails then we
30463c517a61SChristoph Lameter * perform an allocation without specifying a node. This allows the page
30473c517a61SChristoph Lameter * allocator to do its reclaim / fallback magic. We then insert the
30483c517a61SChristoph Lameter * slab into the proper nodelist and then allocate from it.
3049765c4507SChristoph Lameter */
fallback_alloc(struct kmem_cache * cache,gfp_t flags)30508c8cc2c1SPekka Enberg static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3051765c4507SChristoph Lameter {
30528c8cc2c1SPekka Enberg struct zonelist *zonelist;
3053dd1a239fSMel Gorman struct zoneref *z;
305454a6eb5cSMel Gorman struct zone *zone;
305597a225e6SJoonsoo Kim enum zone_type highest_zoneidx = gfp_zone(flags);
3056765c4507SChristoph Lameter void *obj = NULL;
30577981e67eSVlastimil Babka struct slab *slab;
30583c517a61SChristoph Lameter int nid;
3059cc9a6c87SMel Gorman unsigned int cpuset_mems_cookie;
30608c8cc2c1SPekka Enberg
30618c8cc2c1SPekka Enberg if (flags & __GFP_THISNODE)
30628c8cc2c1SPekka Enberg return NULL;
30638c8cc2c1SPekka Enberg
3064cc9a6c87SMel Gorman retry_cpuset:
3065d26914d1SMel Gorman cpuset_mems_cookie = read_mems_allowed_begin();
30662a389610SDavid Rientjes zonelist = node_zonelist(mempolicy_slab_node(), flags);
3067cc9a6c87SMel Gorman
30683c517a61SChristoph Lameter retry:
30693c517a61SChristoph Lameter /*
30703c517a61SChristoph Lameter * Look through allowed nodes for objects available
30713c517a61SChristoph Lameter * from existing per node queues.
30723c517a61SChristoph Lameter */
307397a225e6SJoonsoo Kim for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) {
307454a6eb5cSMel Gorman nid = zone_to_nid(zone);
3075aedb0eb1SChristoph Lameter
3076061d7074SVladimir Davydov if (cpuset_zone_allowed(zone, flags) &&
307718bf8541SChristoph Lameter get_node(cache, nid) &&
307818bf8541SChristoph Lameter get_node(cache, nid)->free_objects) {
30798b98c169SChristoph Hellwig obj = ____cache_alloc_node(cache,
30804167e9b2SDavid Rientjes gfp_exact_node(flags), nid);
3081481c5346SChristoph Lameter if (obj)
3082481c5346SChristoph Lameter break;
3083481c5346SChristoph Lameter }
30843c517a61SChristoph Lameter }
30853c517a61SChristoph Lameter
3086cfce6604SChristoph Lameter if (!obj) {
30873c517a61SChristoph Lameter /*
30883c517a61SChristoph Lameter * This allocation will be performed within the constraints
30893c517a61SChristoph Lameter * of the current cpuset / memory policy requirements.
30903c517a61SChristoph Lameter * We may trigger various forms of reclaim on the allowed
30913c517a61SChristoph Lameter * set and go into memory reserves if necessary.
30923c517a61SChristoph Lameter */
30937981e67eSVlastimil Babka slab = cache_grow_begin(cache, flags, numa_mem_id());
30947981e67eSVlastimil Babka cache_grow_end(cache, slab);
30957981e67eSVlastimil Babka if (slab) {
30967981e67eSVlastimil Babka nid = slab_nid(slab);
30973c517a61SChristoph Lameter obj = ____cache_alloc_node(cache,
30984167e9b2SDavid Rientjes gfp_exact_node(flags), nid);
3099511e3a05SJoonsoo Kim
31003c517a61SChristoph Lameter /*
3101511e3a05SJoonsoo Kim * Another processor may allocate the objects in
3102511e3a05SJoonsoo Kim * the slab since we are not holding any locks.
31033c517a61SChristoph Lameter */
3104511e3a05SJoonsoo Kim if (!obj)
31053c517a61SChristoph Lameter goto retry;
31063c517a61SChristoph Lameter }
3107aedb0eb1SChristoph Lameter }
3108cc9a6c87SMel Gorman
3109d26914d1SMel Gorman if (unlikely(!obj && read_mems_allowed_retry(cpuset_mems_cookie)))
3110cc9a6c87SMel Gorman goto retry_cpuset;
3111765c4507SChristoph Lameter return obj;
3112765c4507SChristoph Lameter }
3113765c4507SChristoph Lameter
3114765c4507SChristoph Lameter /*
3115a8f23dd1SYixuan Cao * An interface to enable slab creation on nodeid
31161da177e4SLinus Torvalds */
____cache_alloc_node(struct kmem_cache * cachep,gfp_t flags,int nodeid)31178b98c169SChristoph Hellwig static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
3118a737b3e2SAndrew Morton int nodeid)
3119e498be7dSChristoph Lameter {
31207981e67eSVlastimil Babka struct slab *slab;
3121ce8eb6c4SChristoph Lameter struct kmem_cache_node *n;
3122213b4695SJoonsoo Kim void *obj = NULL;
3123b03a017bSJoonsoo Kim void *list = NULL;
31241da177e4SLinus Torvalds
31257c3fbbddSPaul Mackerras VM_BUG_ON(nodeid < 0 || nodeid >= MAX_NUMNODES);
312618bf8541SChristoph Lameter n = get_node(cachep, nodeid);
3127ce8eb6c4SChristoph Lameter BUG_ON(!n);
3128e498be7dSChristoph Lameter
3129ca3b9b91SRavikiran G Thirumalai check_irq_off();
3130b539ce9fSJiri Kosina raw_spin_lock(&n->list_lock);
31317981e67eSVlastimil Babka slab = get_first_slab(n, false);
31327981e67eSVlastimil Babka if (!slab)
3133e498be7dSChristoph Lameter goto must_grow;
3134e498be7dSChristoph Lameter
3135e498be7dSChristoph Lameter check_spinlock_acquired_node(cachep, nodeid);
3136e498be7dSChristoph Lameter
3137e498be7dSChristoph Lameter STATS_INC_NODEALLOCS(cachep);
3138e498be7dSChristoph Lameter STATS_INC_ACTIVE(cachep);
3139e498be7dSChristoph Lameter STATS_SET_HIGH(cachep);
3140e498be7dSChristoph Lameter
31417981e67eSVlastimil Babka BUG_ON(slab->active == cachep->num);
3142e498be7dSChristoph Lameter
31437981e67eSVlastimil Babka obj = slab_get_obj(cachep, slab);
3144ce8eb6c4SChristoph Lameter n->free_objects--;
3145e498be7dSChristoph Lameter
31467981e67eSVlastimil Babka fixup_slab_list(cachep, n, slab, &list);
3147e498be7dSChristoph Lameter
3148b539ce9fSJiri Kosina raw_spin_unlock(&n->list_lock);
3149b03a017bSJoonsoo Kim fixup_objfreelist_debug(cachep, &list);
3150213b4695SJoonsoo Kim return obj;
3151e498be7dSChristoph Lameter
3152e498be7dSChristoph Lameter must_grow:
3153b539ce9fSJiri Kosina raw_spin_unlock(&n->list_lock);
31547981e67eSVlastimil Babka slab = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid);
31557981e67eSVlastimil Babka if (slab) {
3156213b4695SJoonsoo Kim /* This slab isn't counted yet so don't update free_objects */
31577981e67eSVlastimil Babka obj = slab_get_obj(cachep, slab);
3158213b4695SJoonsoo Kim }
31597981e67eSVlastimil Babka cache_grow_end(cachep, slab);
3160e498be7dSChristoph Lameter
3161213b4695SJoonsoo Kim return obj ? obj : fallback_alloc(cachep, flags);
3162e498be7dSChristoph Lameter }
31638c8cc2c1SPekka Enberg
31648c8cc2c1SPekka Enberg static __always_inline void *
__do_cache_alloc(struct kmem_cache * cachep,gfp_t flags,int nodeid)3165c31a910cSHyeonggon Yoo __do_cache_alloc(struct kmem_cache *cachep, gfp_t flags, int nodeid)
31668c8cc2c1SPekka Enberg {
3167c31a910cSHyeonggon Yoo void *objp = NULL;
3168c31a910cSHyeonggon Yoo int slab_node = numa_mem_id();
31698c8cc2c1SPekka Enberg
3170c31a910cSHyeonggon Yoo if (nodeid == NUMA_NO_NODE) {
31712ad654bcSZefan Li if (current->mempolicy || cpuset_do_slab_mem_spread()) {
3172c31a910cSHyeonggon Yoo objp = alternate_node_alloc(cachep, flags);
31738c8cc2c1SPekka Enberg if (objp)
31748c8cc2c1SPekka Enberg goto out;
31758c8cc2c1SPekka Enberg }
3176c31a910cSHyeonggon Yoo /*
3177c31a910cSHyeonggon Yoo * Use the locally cached objects if possible.
3178c31a910cSHyeonggon Yoo * However ____cache_alloc does not allow fallback
3179c31a910cSHyeonggon Yoo * to other nodes. It may fail while we still have
3180c31a910cSHyeonggon Yoo * objects on other nodes available.
3181c31a910cSHyeonggon Yoo */
3182c31a910cSHyeonggon Yoo objp = ____cache_alloc(cachep, flags);
3183c31a910cSHyeonggon Yoo nodeid = slab_node;
3184c31a910cSHyeonggon Yoo } else if (nodeid == slab_node) {
3185c31a910cSHyeonggon Yoo objp = ____cache_alloc(cachep, flags);
3186c31a910cSHyeonggon Yoo } else if (!get_node(cachep, nodeid)) {
3187c31a910cSHyeonggon Yoo /* Node not bootstrapped yet */
3188c31a910cSHyeonggon Yoo objp = fallback_alloc(cachep, flags);
3189c31a910cSHyeonggon Yoo goto out;
3190c31a910cSHyeonggon Yoo }
31918c8cc2c1SPekka Enberg
31928c8cc2c1SPekka Enberg /*
31938c8cc2c1SPekka Enberg * We may just have run out of memory on the local node.
31948c8cc2c1SPekka Enberg * ____cache_alloc_node() knows how to locate memory on other nodes
31958c8cc2c1SPekka Enberg */
31968c8cc2c1SPekka Enberg if (!objp)
3197c31a910cSHyeonggon Yoo objp = ____cache_alloc_node(cachep, flags, nodeid);
31988c8cc2c1SPekka Enberg out:
31998c8cc2c1SPekka Enberg return objp;
32008c8cc2c1SPekka Enberg }
32018c8cc2c1SPekka Enberg #else
32028c8cc2c1SPekka Enberg
32038c8cc2c1SPekka Enberg static __always_inline void *
__do_cache_alloc(struct kmem_cache * cachep,gfp_t flags,int nodeid __maybe_unused)3204c31a910cSHyeonggon Yoo __do_cache_alloc(struct kmem_cache *cachep, gfp_t flags, int nodeid __maybe_unused)
32058c8cc2c1SPekka Enberg {
32068c8cc2c1SPekka Enberg return ____cache_alloc(cachep, flags);
32078c8cc2c1SPekka Enberg }
32088c8cc2c1SPekka Enberg
32098c8cc2c1SPekka Enberg #endif /* CONFIG_NUMA */
32108c8cc2c1SPekka Enberg
32118c8cc2c1SPekka Enberg static __always_inline void *
slab_alloc_node(struct kmem_cache * cachep,struct list_lru * lru,gfp_t flags,int nodeid,size_t orig_size,unsigned long caller)321207588d72SHyeonggon Yoo slab_alloc_node(struct kmem_cache *cachep, struct list_lru *lru, gfp_t flags,
321307588d72SHyeonggon Yoo int nodeid, size_t orig_size, unsigned long caller)
32148c8cc2c1SPekka Enberg {
32158c8cc2c1SPekka Enberg unsigned long save_flags;
32168c8cc2c1SPekka Enberg void *objp;
3217964d4bd3SRoman Gushchin struct obj_cgroup *objcg = NULL;
3218da844b78SAndrey Konovalov bool init = false;
32198c8cc2c1SPekka Enberg
3220dcce284aSBenjamin Herrenschmidt flags &= gfp_allowed_mask;
322188f2ef73SMuchun Song cachep = slab_pre_alloc_hook(cachep, lru, &objcg, 1, flags);
3222011eceafSJesper Dangaard Brouer if (unlikely(!cachep))
3223824ebef1SAkinobu Mita return NULL;
3224824ebef1SAkinobu Mita
3225d3fb45f3SAlexander Potapenko objp = kfence_alloc(cachep, orig_size, flags);
3226d3fb45f3SAlexander Potapenko if (unlikely(objp))
3227d3fb45f3SAlexander Potapenko goto out;
3228d3fb45f3SAlexander Potapenko
32298c8cc2c1SPekka Enberg local_irq_save(save_flags);
323007588d72SHyeonggon Yoo objp = __do_cache_alloc(cachep, flags, nodeid);
32318c8cc2c1SPekka Enberg local_irq_restore(save_flags);
32328c8cc2c1SPekka Enberg objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
32338c8cc2c1SPekka Enberg prefetchw(objp);
3234da844b78SAndrey Konovalov init = slab_want_init_on_alloc(flags, cachep);
3235d07dbea4SChristoph Lameter
3236d3fb45f3SAlexander Potapenko out:
32379ce67395SFeng Tang slab_post_alloc_hook(cachep, objcg, flags, 1, &objp, init,
32389ce67395SFeng Tang cachep->object_size);
32398c8cc2c1SPekka Enberg return objp;
32408c8cc2c1SPekka Enberg }
3241e498be7dSChristoph Lameter
324207588d72SHyeonggon Yoo static __always_inline void *
slab_alloc(struct kmem_cache * cachep,struct list_lru * lru,gfp_t flags,size_t orig_size,unsigned long caller)324307588d72SHyeonggon Yoo slab_alloc(struct kmem_cache *cachep, struct list_lru *lru, gfp_t flags,
324407588d72SHyeonggon Yoo size_t orig_size, unsigned long caller)
324507588d72SHyeonggon Yoo {
324607588d72SHyeonggon Yoo return slab_alloc_node(cachep, lru, flags, NUMA_NO_NODE, orig_size,
324707588d72SHyeonggon Yoo caller);
324807588d72SHyeonggon Yoo }
324907588d72SHyeonggon Yoo
3250e498be7dSChristoph Lameter /*
32515f0985bbSJianyu Zhan * Caller needs to acquire correct kmem_cache_node's list_lock
325297654dfaSJoonsoo Kim * @list: List of detached free slabs should be freed by caller
3253e498be7dSChristoph Lameter */
free_block(struct kmem_cache * cachep,void ** objpp,int nr_objects,int node,struct list_head * list)325497654dfaSJoonsoo Kim static void free_block(struct kmem_cache *cachep, void **objpp,
325597654dfaSJoonsoo Kim int nr_objects, int node, struct list_head *list)
32561da177e4SLinus Torvalds {
32571da177e4SLinus Torvalds int i;
325825c063fbSJoonsoo Kim struct kmem_cache_node *n = get_node(cachep, node);
32597981e67eSVlastimil Babka struct slab *slab;
32606052b788SJoonsoo Kim
32616052b788SJoonsoo Kim n->free_objects += nr_objects;
32621da177e4SLinus Torvalds
32631da177e4SLinus Torvalds for (i = 0; i < nr_objects; i++) {
3264072bb0aaSMel Gorman void *objp;
32657981e67eSVlastimil Babka struct slab *slab;
32661da177e4SLinus Torvalds
3267072bb0aaSMel Gorman objp = objpp[i];
3268072bb0aaSMel Gorman
32697981e67eSVlastimil Babka slab = virt_to_slab(objp);
32707981e67eSVlastimil Babka list_del(&slab->slab_list);
3271ff69416eSChristoph Lameter check_spinlock_acquired_node(cachep, node);
32727981e67eSVlastimil Babka slab_put_obj(cachep, slab, objp);
32731da177e4SLinus Torvalds STATS_DEC_ACTIVE(cachep);
32741da177e4SLinus Torvalds
32751da177e4SLinus Torvalds /* fixup slab chains */
32767981e67eSVlastimil Babka if (slab->active == 0) {
32777981e67eSVlastimil Babka list_add(&slab->slab_list, &n->slabs_free);
3278f728b0a5SGreg Thelen n->free_slabs++;
3279f728b0a5SGreg Thelen } else {
32801da177e4SLinus Torvalds /* Unconditionally move a slab to the end of the
32811da177e4SLinus Torvalds * partial list on free - maximum time for the
32821da177e4SLinus Torvalds * other objects to be freed, too.
32831da177e4SLinus Torvalds */
32847981e67eSVlastimil Babka list_add_tail(&slab->slab_list, &n->slabs_partial);
32851da177e4SLinus Torvalds }
32861da177e4SLinus Torvalds }
32876052b788SJoonsoo Kim
32886052b788SJoonsoo Kim while (n->free_objects > n->free_limit && !list_empty(&n->slabs_free)) {
32896052b788SJoonsoo Kim n->free_objects -= cachep->num;
32906052b788SJoonsoo Kim
32917981e67eSVlastimil Babka slab = list_last_entry(&n->slabs_free, struct slab, slab_list);
32927981e67eSVlastimil Babka list_move(&slab->slab_list, list);
3293f728b0a5SGreg Thelen n->free_slabs--;
3294bf00bd34SDavid Rientjes n->total_slabs--;
32956052b788SJoonsoo Kim }
32961da177e4SLinus Torvalds }
32971da177e4SLinus Torvalds
cache_flusharray(struct kmem_cache * cachep,struct array_cache * ac)3298343e0d7aSPekka Enberg static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
32991da177e4SLinus Torvalds {
33001da177e4SLinus Torvalds int batchcount;
3301ce8eb6c4SChristoph Lameter struct kmem_cache_node *n;
33027d6e6d09SLee Schermerhorn int node = numa_mem_id();
330397654dfaSJoonsoo Kim LIST_HEAD(list);
33041da177e4SLinus Torvalds
33051da177e4SLinus Torvalds batchcount = ac->batchcount;
3306260b61ddSJoonsoo Kim
33071da177e4SLinus Torvalds check_irq_off();
330818bf8541SChristoph Lameter n = get_node(cachep, node);
3309b539ce9fSJiri Kosina raw_spin_lock(&n->list_lock);
3310ce8eb6c4SChristoph Lameter if (n->shared) {
3311ce8eb6c4SChristoph Lameter struct array_cache *shared_array = n->shared;
33121da177e4SLinus Torvalds int max = shared_array->limit - shared_array->avail;
33131da177e4SLinus Torvalds if (max) {
33141da177e4SLinus Torvalds if (batchcount > max)
33151da177e4SLinus Torvalds batchcount = max;
3316e498be7dSChristoph Lameter memcpy(&(shared_array->entry[shared_array->avail]),
3317b28a02deSPekka Enberg ac->entry, sizeof(void *) * batchcount);
33181da177e4SLinus Torvalds shared_array->avail += batchcount;
33191da177e4SLinus Torvalds goto free_done;
33201da177e4SLinus Torvalds }
33211da177e4SLinus Torvalds }
33221da177e4SLinus Torvalds
332397654dfaSJoonsoo Kim free_block(cachep, ac->entry, batchcount, node, &list);
33241da177e4SLinus Torvalds free_done:
33251da177e4SLinus Torvalds #if STATS
33261da177e4SLinus Torvalds {
33271da177e4SLinus Torvalds int i = 0;
33287981e67eSVlastimil Babka struct slab *slab;
33291da177e4SLinus Torvalds
33307981e67eSVlastimil Babka list_for_each_entry(slab, &n->slabs_free, slab_list) {
33317981e67eSVlastimil Babka BUG_ON(slab->active);
33321da177e4SLinus Torvalds
33331da177e4SLinus Torvalds i++;
33341da177e4SLinus Torvalds }
33351da177e4SLinus Torvalds STATS_SET_FREEABLE(cachep, i);
33361da177e4SLinus Torvalds }
33371da177e4SLinus Torvalds #endif
3338b539ce9fSJiri Kosina raw_spin_unlock(&n->list_lock);
33391da177e4SLinus Torvalds ac->avail -= batchcount;
3340a737b3e2SAndrew Morton memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
3341678ff6a7SShakeel Butt slabs_destroy(cachep, &list);
33421da177e4SLinus Torvalds }
33431da177e4SLinus Torvalds
33441da177e4SLinus Torvalds /*
3345a737b3e2SAndrew Morton * Release an obj back to its cache. If the obj has a constructed state, it must
3346a737b3e2SAndrew Morton * be in this state _before_ it is released. Called with disabled ints.
33471da177e4SLinus Torvalds */
__cache_free(struct kmem_cache * cachep,void * objp,unsigned long caller)3348ee3ce779SDmitry Vyukov static __always_inline void __cache_free(struct kmem_cache *cachep, void *objp,
33497c0cb9c6SEzequiel Garcia unsigned long caller)
33501da177e4SLinus Torvalds {
3351d57a964eSAndrey Konovalov bool init;
3352d57a964eSAndrey Konovalov
3353b77d5b1bSMuchun Song memcg_slab_free_hook(cachep, virt_to_slab(objp), &objp, 1);
3354b77d5b1bSMuchun Song
3355d3fb45f3SAlexander Potapenko if (is_kfence_address(objp)) {
3356d3fb45f3SAlexander Potapenko kmemleak_free_recursive(objp, cachep->flags);
3357d3fb45f3SAlexander Potapenko __kfence_free(objp);
3358d3fb45f3SAlexander Potapenko return;
3359d3fb45f3SAlexander Potapenko }
3360d3fb45f3SAlexander Potapenko
3361d57a964eSAndrey Konovalov /*
3362d57a964eSAndrey Konovalov * As memory initialization might be integrated into KASAN,
3363d57a964eSAndrey Konovalov * kasan_slab_free and initialization memset must be
3364d57a964eSAndrey Konovalov * kept together to avoid discrepancies in behavior.
3365d57a964eSAndrey Konovalov */
3366d57a964eSAndrey Konovalov init = slab_want_init_on_free(cachep);
3367d57a964eSAndrey Konovalov if (init && !kasan_has_integrated_init())
3368a32d654dSAlexander Popov memset(objp, 0, cachep->object_size);
3369d57a964eSAndrey Konovalov /* KASAN might put objp into memory quarantine, delaying its reuse. */
3370d57a964eSAndrey Konovalov if (kasan_slab_free(cachep, objp, init))
337155834c59SAlexander Potapenko return;
33721da177e4SLinus Torvalds
3373cfbe1636SMarco Elver /* Use KCSAN to help debug racy use-after-free. */
3374cfbe1636SMarco Elver if (!(cachep->flags & SLAB_TYPESAFE_BY_RCU))
3375cfbe1636SMarco Elver __kcsan_check_access(objp, cachep->object_size,
3376cfbe1636SMarco Elver KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT);
3377cfbe1636SMarco Elver
337855834c59SAlexander Potapenko ___cache_free(cachep, objp, caller);
337955834c59SAlexander Potapenko }
338055834c59SAlexander Potapenko
___cache_free(struct kmem_cache * cachep,void * objp,unsigned long caller)338155834c59SAlexander Potapenko void ___cache_free(struct kmem_cache *cachep, void *objp,
338255834c59SAlexander Potapenko unsigned long caller)
338355834c59SAlexander Potapenko {
338455834c59SAlexander Potapenko struct array_cache *ac = cpu_cache_get(cachep);
33857ed2f9e6SAlexander Potapenko
33861da177e4SLinus Torvalds check_irq_off();
3387d5cff635SCatalin Marinas kmemleak_free_recursive(objp, cachep->flags);
3388a947eb95SSuleiman Souhlal objp = cache_free_debugcheck(cachep, objp, caller);
33891da177e4SLinus Torvalds
33901807a1aaSSiddha, Suresh B /*
33911807a1aaSSiddha, Suresh B * Skip calling cache_free_alien() when the platform is not numa.
33921807a1aaSSiddha, Suresh B * This will avoid cache misses that happen while accessing slabp (which
33931807a1aaSSiddha, Suresh B * is per page memory reference) to get nodeid. Instead use a global
33941807a1aaSSiddha, Suresh B * variable to skip the call, which is mostly likely to be present in
33951807a1aaSSiddha, Suresh B * the cache.
33961807a1aaSSiddha, Suresh B */
3397b6e68bc1SMel Gorman if (nr_online_nodes > 1 && cache_free_alien(cachep, objp))
3398e498be7dSChristoph Lameter return;
3399729bd0b7SPekka Enberg
34003d880194SJoonsoo Kim if (ac->avail < ac->limit) {
34011da177e4SLinus Torvalds STATS_INC_FREEHIT(cachep);
34021da177e4SLinus Torvalds } else {
34031da177e4SLinus Torvalds STATS_INC_FREEMISS(cachep);
34041da177e4SLinus Torvalds cache_flusharray(cachep, ac);
34051da177e4SLinus Torvalds }
340642c8c99cSZhao Jin
3407f68f8dddSJoonsoo Kim if (sk_memalloc_socks()) {
34087981e67eSVlastimil Babka struct slab *slab = virt_to_slab(objp);
3409f68f8dddSJoonsoo Kim
34107981e67eSVlastimil Babka if (unlikely(slab_test_pfmemalloc(slab))) {
34117981e67eSVlastimil Babka cache_free_pfmemalloc(cachep, slab, objp);
3412f68f8dddSJoonsoo Kim return;
3413f68f8dddSJoonsoo Kim }
3414f68f8dddSJoonsoo Kim }
3415f68f8dddSJoonsoo Kim
3416dabc3e29SKees Cook __free_one(ac, objp);
34171da177e4SLinus Torvalds }
34181da177e4SLinus Torvalds
341988f2ef73SMuchun Song static __always_inline
__kmem_cache_alloc_lru(struct kmem_cache * cachep,struct list_lru * lru,gfp_t flags)342088f2ef73SMuchun Song void *__kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
342188f2ef73SMuchun Song gfp_t flags)
342288f2ef73SMuchun Song {
342388f2ef73SMuchun Song void *ret = slab_alloc(cachep, lru, flags, cachep->object_size, _RET_IP_);
342488f2ef73SMuchun Song
34252c1d697fSHyeonggon Yoo trace_kmem_cache_alloc(_RET_IP_, ret, cachep, flags, NUMA_NO_NODE);
342688f2ef73SMuchun Song
342788f2ef73SMuchun Song return ret;
342888f2ef73SMuchun Song }
342988f2ef73SMuchun Song
kmem_cache_alloc(struct kmem_cache * cachep,gfp_t flags)3430343e0d7aSPekka Enberg void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
34311da177e4SLinus Torvalds {
343288f2ef73SMuchun Song return __kmem_cache_alloc_lru(cachep, NULL, flags);
34331da177e4SLinus Torvalds }
34341da177e4SLinus Torvalds EXPORT_SYMBOL(kmem_cache_alloc);
34351da177e4SLinus Torvalds
kmem_cache_alloc_lru(struct kmem_cache * cachep,struct list_lru * lru,gfp_t flags)343688f2ef73SMuchun Song void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
343788f2ef73SMuchun Song gfp_t flags)
343888f2ef73SMuchun Song {
343988f2ef73SMuchun Song return __kmem_cache_alloc_lru(cachep, lru, flags);
344088f2ef73SMuchun Song }
344188f2ef73SMuchun Song EXPORT_SYMBOL(kmem_cache_alloc_lru);
344288f2ef73SMuchun Song
34437b0501ddSJesper Dangaard Brouer static __always_inline void
cache_alloc_debugcheck_after_bulk(struct kmem_cache * s,gfp_t flags,size_t size,void ** p,unsigned long caller)34447b0501ddSJesper Dangaard Brouer cache_alloc_debugcheck_after_bulk(struct kmem_cache *s, gfp_t flags,
34457b0501ddSJesper Dangaard Brouer size_t size, void **p, unsigned long caller)
34467b0501ddSJesper Dangaard Brouer {
34477b0501ddSJesper Dangaard Brouer size_t i;
34487b0501ddSJesper Dangaard Brouer
34497b0501ddSJesper Dangaard Brouer for (i = 0; i < size; i++)
34507b0501ddSJesper Dangaard Brouer p[i] = cache_alloc_debugcheck_after(s, flags, p[i], caller);
34517b0501ddSJesper Dangaard Brouer }
34527b0501ddSJesper Dangaard Brouer
kmem_cache_alloc_bulk(struct kmem_cache * s,gfp_t flags,size_t size,void ** p)3453865762a8SJesper Dangaard Brouer int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
3454484748f0SChristoph Lameter void **p)
3455484748f0SChristoph Lameter {
3456964d4bd3SRoman Gushchin struct obj_cgroup *objcg = NULL;
3457f5451547SThomas Gleixner unsigned long irqflags;
3458f5451547SThomas Gleixner size_t i;
34592a777eacSJesper Dangaard Brouer
346088f2ef73SMuchun Song s = slab_pre_alloc_hook(s, NULL, &objcg, size, flags);
34612a777eacSJesper Dangaard Brouer if (!s)
34622a777eacSJesper Dangaard Brouer return 0;
34632a777eacSJesper Dangaard Brouer
3464f5451547SThomas Gleixner local_irq_save(irqflags);
34652a777eacSJesper Dangaard Brouer for (i = 0; i < size; i++) {
3466c31a910cSHyeonggon Yoo void *objp = kfence_alloc(s, s->object_size, flags) ?:
3467c31a910cSHyeonggon Yoo __do_cache_alloc(s, flags, NUMA_NO_NODE);
34682a777eacSJesper Dangaard Brouer
34692a777eacSJesper Dangaard Brouer if (unlikely(!objp))
34702a777eacSJesper Dangaard Brouer goto error;
34712a777eacSJesper Dangaard Brouer p[i] = objp;
34722a777eacSJesper Dangaard Brouer }
3473f5451547SThomas Gleixner local_irq_restore(irqflags);
34742a777eacSJesper Dangaard Brouer
34757b0501ddSJesper Dangaard Brouer cache_alloc_debugcheck_after_bulk(s, flags, size, p, _RET_IP_);
34767b0501ddSJesper Dangaard Brouer
3477da844b78SAndrey Konovalov /*
3478da844b78SAndrey Konovalov * memcg and kmem_cache debug support and memory initialization.
3479da844b78SAndrey Konovalov * Done outside of the IRQ disabled section.
3480da844b78SAndrey Konovalov */
3481da844b78SAndrey Konovalov slab_post_alloc_hook(s, objcg, flags, size, p,
34829ce67395SFeng Tang slab_want_init_on_alloc(flags, s), s->object_size);
34832a777eacSJesper Dangaard Brouer /* FIXME: Trace call missing. Christoph would like a bulk variant */
34842a777eacSJesper Dangaard Brouer return size;
34852a777eacSJesper Dangaard Brouer error:
3486f5451547SThomas Gleixner local_irq_restore(irqflags);
34877b0501ddSJesper Dangaard Brouer cache_alloc_debugcheck_after_bulk(s, flags, i, p, _RET_IP_);
34889ce67395SFeng Tang slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
34892055e67bSHyeonggon Yoo kmem_cache_free_bulk(s, i, p);
34902a777eacSJesper Dangaard Brouer return 0;
3491484748f0SChristoph Lameter }
3492484748f0SChristoph Lameter EXPORT_SYMBOL(kmem_cache_alloc_bulk);
3493484748f0SChristoph Lameter
3494d0d04b78SZhouping Liu /**
3495d0d04b78SZhouping Liu * kmem_cache_alloc_node - Allocate an object on the specified node
3496d0d04b78SZhouping Liu * @cachep: The cache to allocate from.
3497d0d04b78SZhouping Liu * @flags: See kmalloc().
3498d0d04b78SZhouping Liu * @nodeid: node number of the target node.
3499d0d04b78SZhouping Liu *
3500d0d04b78SZhouping Liu * Identical to kmem_cache_alloc but it will allocate memory on the given
3501d0d04b78SZhouping Liu * node, which can improve the performance for cpu bound structures.
3502d0d04b78SZhouping Liu *
3503d0d04b78SZhouping Liu * Fallback to other node is possible if __GFP_THISNODE is not set.
3504a862f68aSMike Rapoport *
3505a862f68aSMike Rapoport * Return: pointer to the new object or %NULL in case of error
3506d0d04b78SZhouping Liu */
kmem_cache_alloc_node(struct kmem_cache * cachep,gfp_t flags,int nodeid)35078b98c169SChristoph Hellwig void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
35088b98c169SChristoph Hellwig {
350907588d72SHyeonggon Yoo void *ret = slab_alloc_node(cachep, NULL, flags, nodeid, cachep->object_size, _RET_IP_);
351036555751SEduard - Gabriel Munteanu
35112c1d697fSHyeonggon Yoo trace_kmem_cache_alloc(_RET_IP_, ret, cachep, flags, nodeid);
351236555751SEduard - Gabriel Munteanu
351336555751SEduard - Gabriel Munteanu return ret;
351436555751SEduard - Gabriel Munteanu }
351536555751SEduard - Gabriel Munteanu EXPORT_SYMBOL(kmem_cache_alloc_node);
351636555751SEduard - Gabriel Munteanu
__kmem_cache_alloc_node(struct kmem_cache * cachep,gfp_t flags,int nodeid,size_t orig_size,unsigned long caller)3517ed4cd17eSHyeonggon Yoo void *__kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
3518ed4cd17eSHyeonggon Yoo int nodeid, size_t orig_size,
3519ed4cd17eSHyeonggon Yoo unsigned long caller)
3520ed4cd17eSHyeonggon Yoo {
3521ed4cd17eSHyeonggon Yoo return slab_alloc_node(cachep, NULL, flags, nodeid,
3522ed4cd17eSHyeonggon Yoo orig_size, caller);
3523ed4cd17eSHyeonggon Yoo }
3524ed4cd17eSHyeonggon Yoo
35255bb1bb35SPaul E. McKenney #ifdef CONFIG_PRINTK
__kmem_obj_info(struct kmem_obj_info * kpp,void * object,struct slab * slab)35262dfe63e6SMarco Elver void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
35278e7f37f2SPaul E. McKenney {
35288e7f37f2SPaul E. McKenney struct kmem_cache *cachep;
35298e7f37f2SPaul E. McKenney unsigned int objnr;
35308e7f37f2SPaul E. McKenney void *objp;
35318e7f37f2SPaul E. McKenney
35328e7f37f2SPaul E. McKenney kpp->kp_ptr = object;
35337213230aSMatthew Wilcox (Oracle) kpp->kp_slab = slab;
35347213230aSMatthew Wilcox (Oracle) cachep = slab->slab_cache;
35358e7f37f2SPaul E. McKenney kpp->kp_slab_cache = cachep;
35368e7f37f2SPaul E. McKenney objp = object - obj_offset(cachep);
35378e7f37f2SPaul E. McKenney kpp->kp_data_offset = obj_offset(cachep);
35387213230aSMatthew Wilcox (Oracle) slab = virt_to_slab(objp);
353940f3bf0cSVlastimil Babka objnr = obj_to_index(cachep, slab, objp);
35407981e67eSVlastimil Babka objp = index_to_obj(cachep, slab, objnr);
35418e7f37f2SPaul E. McKenney kpp->kp_objp = objp;
35428e7f37f2SPaul E. McKenney if (DEBUG && cachep->flags & SLAB_STORE_USER)
35438e7f37f2SPaul E. McKenney kpp->kp_ret = *dbg_userword(cachep, objp);
35448e7f37f2SPaul E. McKenney }
35455bb1bb35SPaul E. McKenney #endif
35468e7f37f2SPaul E. McKenney
3547ed4cd17eSHyeonggon Yoo static __always_inline
__do_kmem_cache_free(struct kmem_cache * cachep,void * objp,unsigned long caller)3548ed4cd17eSHyeonggon Yoo void __do_kmem_cache_free(struct kmem_cache *cachep, void *objp,
3549ed4cd17eSHyeonggon Yoo unsigned long caller)
3550ed4cd17eSHyeonggon Yoo {
3551ed4cd17eSHyeonggon Yoo unsigned long flags;
3552ed4cd17eSHyeonggon Yoo
3553ed4cd17eSHyeonggon Yoo local_irq_save(flags);
3554ed4cd17eSHyeonggon Yoo debug_check_no_locks_freed(objp, cachep->object_size);
3555ed4cd17eSHyeonggon Yoo if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
3556ed4cd17eSHyeonggon Yoo debug_check_no_obj_freed(objp, cachep->object_size);
3557ed4cd17eSHyeonggon Yoo __cache_free(cachep, objp, caller);
3558ed4cd17eSHyeonggon Yoo local_irq_restore(flags);
3559ed4cd17eSHyeonggon Yoo }
3560ed4cd17eSHyeonggon Yoo
__kmem_cache_free(struct kmem_cache * cachep,void * objp,unsigned long caller)3561ed4cd17eSHyeonggon Yoo void __kmem_cache_free(struct kmem_cache *cachep, void *objp,
3562ed4cd17eSHyeonggon Yoo unsigned long caller)
3563ed4cd17eSHyeonggon Yoo {
3564ed4cd17eSHyeonggon Yoo __do_kmem_cache_free(cachep, objp, caller);
3565ed4cd17eSHyeonggon Yoo }
3566ed4cd17eSHyeonggon Yoo
35671da177e4SLinus Torvalds /**
35681da177e4SLinus Torvalds * kmem_cache_free - Deallocate an object
35691da177e4SLinus Torvalds * @cachep: The cache the allocation was from.
35701da177e4SLinus Torvalds * @objp: The previously allocated object.
35711da177e4SLinus Torvalds *
35721da177e4SLinus Torvalds * Free an object which was previously allocated from this
35731da177e4SLinus Torvalds * cache.
35741da177e4SLinus Torvalds */
kmem_cache_free(struct kmem_cache * cachep,void * objp)3575343e0d7aSPekka Enberg void kmem_cache_free(struct kmem_cache *cachep, void *objp)
35761da177e4SLinus Torvalds {
3577b9ce5ef4SGlauber Costa cachep = cache_from_obj(cachep, objp);
3578b9ce5ef4SGlauber Costa if (!cachep)
3579b9ce5ef4SGlauber Costa return;
35801da177e4SLinus Torvalds
35812c1d697fSHyeonggon Yoo trace_kmem_cache_free(_RET_IP_, objp, cachep);
3582ed4cd17eSHyeonggon Yoo __do_kmem_cache_free(cachep, objp, _RET_IP_);
35831da177e4SLinus Torvalds }
35841da177e4SLinus Torvalds EXPORT_SYMBOL(kmem_cache_free);
35851da177e4SLinus Torvalds
kmem_cache_free_bulk(struct kmem_cache * orig_s,size_t size,void ** p)3586e6cdb58dSJesper Dangaard Brouer void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
3587e6cdb58dSJesper Dangaard Brouer {
3588f5451547SThomas Gleixner unsigned long flags;
3589e6cdb58dSJesper Dangaard Brouer
3590f5451547SThomas Gleixner local_irq_save(flags);
3591d6a71648SHyeonggon Yoo for (int i = 0; i < size; i++) {
3592e6cdb58dSJesper Dangaard Brouer void *objp = p[i];
3593d6a71648SHyeonggon Yoo struct kmem_cache *s;
3594e6cdb58dSJesper Dangaard Brouer
3595d6a71648SHyeonggon Yoo if (!orig_s) {
3596d6a71648SHyeonggon Yoo struct folio *folio = virt_to_folio(objp);
3597d6a71648SHyeonggon Yoo
3598d6a71648SHyeonggon Yoo /* called via kfree_bulk */
3599d6a71648SHyeonggon Yoo if (!folio_test_slab(folio)) {
3600f5451547SThomas Gleixner local_irq_restore(flags);
3601d6a71648SHyeonggon Yoo free_large_kmalloc(folio, objp);
3602f5451547SThomas Gleixner local_irq_save(flags);
3603d6a71648SHyeonggon Yoo continue;
3604d6a71648SHyeonggon Yoo }
3605d6a71648SHyeonggon Yoo s = folio_slab(folio)->slab_cache;
3606d6a71648SHyeonggon Yoo } else {
3607e6cdb58dSJesper Dangaard Brouer s = cache_from_obj(orig_s, objp);
3608d6a71648SHyeonggon Yoo }
3609d6a71648SHyeonggon Yoo
3610a64b5378SKees Cook if (!s)
3611a64b5378SKees Cook continue;
3612e6cdb58dSJesper Dangaard Brouer
3613e6cdb58dSJesper Dangaard Brouer debug_check_no_locks_freed(objp, s->object_size);
3614e6cdb58dSJesper Dangaard Brouer if (!(s->flags & SLAB_DEBUG_OBJECTS))
3615e6cdb58dSJesper Dangaard Brouer debug_check_no_obj_freed(objp, s->object_size);
3616e6cdb58dSJesper Dangaard Brouer
3617e6cdb58dSJesper Dangaard Brouer __cache_free(s, objp, _RET_IP_);
3618e6cdb58dSJesper Dangaard Brouer }
3619f5451547SThomas Gleixner local_irq_restore(flags);
3620e6cdb58dSJesper Dangaard Brouer
3621e6cdb58dSJesper Dangaard Brouer /* FIXME: add tracing */
3622e6cdb58dSJesper Dangaard Brouer }
3623e6cdb58dSJesper Dangaard Brouer EXPORT_SYMBOL(kmem_cache_free_bulk);
3624e6cdb58dSJesper Dangaard Brouer
3625e498be7dSChristoph Lameter /*
3626ce8eb6c4SChristoph Lameter * This initializes kmem_cache_node or resizes various caches for all nodes.
3627e498be7dSChristoph Lameter */
setup_kmem_cache_nodes(struct kmem_cache * cachep,gfp_t gfp)3628c3d332b6SJoonsoo Kim static int setup_kmem_cache_nodes(struct kmem_cache *cachep, gfp_t gfp)
3629e498be7dSChristoph Lameter {
3630c3d332b6SJoonsoo Kim int ret;
3631e498be7dSChristoph Lameter int node;
3632ce8eb6c4SChristoph Lameter struct kmem_cache_node *n;
3633e498be7dSChristoph Lameter
36349c09a95cSMel Gorman for_each_online_node(node) {
3635c3d332b6SJoonsoo Kim ret = setup_kmem_cache_node(cachep, node, gfp, true);
3636c3d332b6SJoonsoo Kim if (ret)
3637e498be7dSChristoph Lameter goto fail;
3638c3d332b6SJoonsoo Kim
36393395ee05SPaul Menage }
3640cafeb02eSChristoph Lameter
3641cafeb02eSChristoph Lameter return 0;
36420718dc2aSChristoph Lameter
3643e498be7dSChristoph Lameter fail:
36443b0efdfaSChristoph Lameter if (!cachep->list.next) {
36450718dc2aSChristoph Lameter /* Cache is not active yet. Roll back what we did */
36460718dc2aSChristoph Lameter node--;
36470718dc2aSChristoph Lameter while (node >= 0) {
364818bf8541SChristoph Lameter n = get_node(cachep, node);
364918bf8541SChristoph Lameter if (n) {
3650ce8eb6c4SChristoph Lameter kfree(n->shared);
3651ce8eb6c4SChristoph Lameter free_alien_cache(n->alien);
3652ce8eb6c4SChristoph Lameter kfree(n);
36536a67368cSChristoph Lameter cachep->node[node] = NULL;
36540718dc2aSChristoph Lameter }
36550718dc2aSChristoph Lameter node--;
36560718dc2aSChristoph Lameter }
36570718dc2aSChristoph Lameter }
3658cafeb02eSChristoph Lameter return -ENOMEM;
3659e498be7dSChristoph Lameter }
3660e498be7dSChristoph Lameter
366118004c5dSChristoph Lameter /* Always called with the slab_mutex held */
do_tune_cpucache(struct kmem_cache * cachep,int limit,int batchcount,int shared,gfp_t gfp)366210befea9SRoman Gushchin static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
366383b519e8SPekka Enberg int batchcount, int shared, gfp_t gfp)
36641da177e4SLinus Torvalds {
3665bf0dea23SJoonsoo Kim struct array_cache __percpu *cpu_cache, *prev;
3666bf0dea23SJoonsoo Kim int cpu;
36671da177e4SLinus Torvalds
3668bf0dea23SJoonsoo Kim cpu_cache = alloc_kmem_cache_cpus(cachep, limit, batchcount);
3669bf0dea23SJoonsoo Kim if (!cpu_cache)
3670d2e7b7d0SSiddha, Suresh B return -ENOMEM;
3671d2e7b7d0SSiddha, Suresh B
3672bf0dea23SJoonsoo Kim prev = cachep->cpu_cache;
3673bf0dea23SJoonsoo Kim cachep->cpu_cache = cpu_cache;
3674a87c75fbSGreg Thelen /*
3675a87c75fbSGreg Thelen * Without a previous cpu_cache there's no need to synchronize remote
3676a87c75fbSGreg Thelen * cpus, so skip the IPIs.
3677a87c75fbSGreg Thelen */
3678a87c75fbSGreg Thelen if (prev)
3679bf0dea23SJoonsoo Kim kick_all_cpus_sync();
36801da177e4SLinus Torvalds
36811da177e4SLinus Torvalds check_irq_on();
36821da177e4SLinus Torvalds cachep->batchcount = batchcount;
36831da177e4SLinus Torvalds cachep->limit = limit;
3684e498be7dSChristoph Lameter cachep->shared = shared;
36851da177e4SLinus Torvalds
3686bf0dea23SJoonsoo Kim if (!prev)
3687c3d332b6SJoonsoo Kim goto setup_node;
3688bf0dea23SJoonsoo Kim
3689bf0dea23SJoonsoo Kim for_each_online_cpu(cpu) {
369097654dfaSJoonsoo Kim LIST_HEAD(list);
369118bf8541SChristoph Lameter int node;
369218bf8541SChristoph Lameter struct kmem_cache_node *n;
3693bf0dea23SJoonsoo Kim struct array_cache *ac = per_cpu_ptr(prev, cpu);
369418bf8541SChristoph Lameter
3695bf0dea23SJoonsoo Kim node = cpu_to_mem(cpu);
369618bf8541SChristoph Lameter n = get_node(cachep, node);
3697b539ce9fSJiri Kosina raw_spin_lock_irq(&n->list_lock);
3698bf0dea23SJoonsoo Kim free_block(cachep, ac->entry, ac->avail, node, &list);
3699b539ce9fSJiri Kosina raw_spin_unlock_irq(&n->list_lock);
370097654dfaSJoonsoo Kim slabs_destroy(cachep, &list);
37011da177e4SLinus Torvalds }
3702bf0dea23SJoonsoo Kim free_percpu(prev);
3703bf0dea23SJoonsoo Kim
3704c3d332b6SJoonsoo Kim setup_node:
3705c3d332b6SJoonsoo Kim return setup_kmem_cache_nodes(cachep, gfp);
37061da177e4SLinus Torvalds }
37071da177e4SLinus Torvalds
370818004c5dSChristoph Lameter /* Called with slab_mutex held always */
enable_cpucache(struct kmem_cache * cachep,gfp_t gfp)370983b519e8SPekka Enberg static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
37101da177e4SLinus Torvalds {
37111da177e4SLinus Torvalds int err;
3712943a451aSGlauber Costa int limit = 0;
3713943a451aSGlauber Costa int shared = 0;
3714943a451aSGlauber Costa int batchcount = 0;
37151da177e4SLinus Torvalds
37167c00fce9SThomas Garnier err = cache_random_seq_create(cachep, cachep->num, gfp);
3717c7ce4f60SThomas Garnier if (err)
3718c7ce4f60SThomas Garnier goto end;
3719c7ce4f60SThomas Garnier
3720a737b3e2SAndrew Morton /*
3721a737b3e2SAndrew Morton * The head array serves three purposes:
37221da177e4SLinus Torvalds * - create a LIFO ordering, i.e. return objects that are cache-warm
37231da177e4SLinus Torvalds * - reduce the number of spinlock operations.
37241da177e4SLinus Torvalds * - reduce the number of linked list operations on the slab and
37251da177e4SLinus Torvalds * bufctl chains: array operations are cheaper.
37261da177e4SLinus Torvalds * The numbers are guessed, we should auto-tune as described by
37271da177e4SLinus Torvalds * Bonwick.
37281da177e4SLinus Torvalds */
37293b0efdfaSChristoph Lameter if (cachep->size > 131072)
37301da177e4SLinus Torvalds limit = 1;
37313b0efdfaSChristoph Lameter else if (cachep->size > PAGE_SIZE)
37321da177e4SLinus Torvalds limit = 8;
37333b0efdfaSChristoph Lameter else if (cachep->size > 1024)
37341da177e4SLinus Torvalds limit = 24;
37353b0efdfaSChristoph Lameter else if (cachep->size > 256)
37361da177e4SLinus Torvalds limit = 54;
37371da177e4SLinus Torvalds else
37381da177e4SLinus Torvalds limit = 120;
37391da177e4SLinus Torvalds
3740a737b3e2SAndrew Morton /*
3741a737b3e2SAndrew Morton * CPU bound tasks (e.g. network routing) can exhibit cpu bound
37421da177e4SLinus Torvalds * allocation behaviour: Most allocs on one cpu, most free operations
37431da177e4SLinus Torvalds * on another cpu. For these cases, an efficient object passing between
37441da177e4SLinus Torvalds * cpus is necessary. This is provided by a shared array. The array
37451da177e4SLinus Torvalds * replaces Bonwick's magazine layer.
37461da177e4SLinus Torvalds * On uniprocessor, it's functionally equivalent (but less efficient)
37471da177e4SLinus Torvalds * to a larger limit. Thus disabled by default.
37481da177e4SLinus Torvalds */
37491da177e4SLinus Torvalds shared = 0;
37503b0efdfaSChristoph Lameter if (cachep->size <= PAGE_SIZE && num_possible_cpus() > 1)
37511da177e4SLinus Torvalds shared = 8;
37521da177e4SLinus Torvalds
37531da177e4SLinus Torvalds #if DEBUG
3754a737b3e2SAndrew Morton /*
3755a737b3e2SAndrew Morton * With debugging enabled, large batchcount lead to excessively long
3756a737b3e2SAndrew Morton * periods with disabled local interrupts. Limit the batchcount
37571da177e4SLinus Torvalds */
37581da177e4SLinus Torvalds if (limit > 32)
37591da177e4SLinus Torvalds limit = 32;
37601da177e4SLinus Torvalds #endif
3761943a451aSGlauber Costa batchcount = (limit + 1) / 2;
3762943a451aSGlauber Costa err = do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
3763c7ce4f60SThomas Garnier end:
37641da177e4SLinus Torvalds if (err)
37651170532bSJoe Perches pr_err("enable_cpucache failed for %s, error %d\n",
37661da177e4SLinus Torvalds cachep->name, -err);
37672ed3a4efSChristoph Lameter return err;
37681da177e4SLinus Torvalds }
37691da177e4SLinus Torvalds
37701b55253aSChristoph Lameter /*
3771ce8eb6c4SChristoph Lameter * Drain an array if it contains any elements taking the node lock only if
3772ce8eb6c4SChristoph Lameter * necessary. Note that the node listlock also protects the array_cache
3773b18e7e65SChristoph Lameter * if drain_array() is used on the shared array.
37741b55253aSChristoph Lameter */
drain_array(struct kmem_cache * cachep,struct kmem_cache_node * n,struct array_cache * ac,int node)3775ce8eb6c4SChristoph Lameter static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
377618726ca8SJoonsoo Kim struct array_cache *ac, int node)
37771da177e4SLinus Torvalds {
377897654dfaSJoonsoo Kim LIST_HEAD(list);
377918726ca8SJoonsoo Kim
378018726ca8SJoonsoo Kim /* ac from n->shared can be freed if we don't hold the slab_mutex. */
378118726ca8SJoonsoo Kim check_mutex_acquired();
37821da177e4SLinus Torvalds
37831b55253aSChristoph Lameter if (!ac || !ac->avail)
37841b55253aSChristoph Lameter return;
378518726ca8SJoonsoo Kim
378618726ca8SJoonsoo Kim if (ac->touched) {
37871da177e4SLinus Torvalds ac->touched = 0;
378818726ca8SJoonsoo Kim return;
378918726ca8SJoonsoo Kim }
379018726ca8SJoonsoo Kim
3791b539ce9fSJiri Kosina raw_spin_lock_irq(&n->list_lock);
379218726ca8SJoonsoo Kim drain_array_locked(cachep, ac, node, false, &list);
3793b539ce9fSJiri Kosina raw_spin_unlock_irq(&n->list_lock);
379418726ca8SJoonsoo Kim
379597654dfaSJoonsoo Kim slabs_destroy(cachep, &list);
3796b18e7e65SChristoph Lameter }
37971da177e4SLinus Torvalds
37981da177e4SLinus Torvalds /**
37991da177e4SLinus Torvalds * cache_reap - Reclaim memory from caches.
380005fb6bf0SRandy Dunlap * @w: work descriptor
38011da177e4SLinus Torvalds *
38021da177e4SLinus Torvalds * Called from workqueue/eventd every few seconds.
38031da177e4SLinus Torvalds * Purpose:
38041da177e4SLinus Torvalds * - clear the per-cpu caches for this CPU.
38051da177e4SLinus Torvalds * - return freeable pages to the main free memory pool.
38061da177e4SLinus Torvalds *
3807a737b3e2SAndrew Morton * If we cannot acquire the cache chain mutex then just give up - we'll try
3808a737b3e2SAndrew Morton * again on the next iteration.
38091da177e4SLinus Torvalds */
cache_reap(struct work_struct * w)38107c5cae36SChristoph Lameter static void cache_reap(struct work_struct *w)
38111da177e4SLinus Torvalds {
38127a7c381dSChristoph Hellwig struct kmem_cache *searchp;
3813ce8eb6c4SChristoph Lameter struct kmem_cache_node *n;
38147d6e6d09SLee Schermerhorn int node = numa_mem_id();
3815bf6aede7SJean Delvare struct delayed_work *work = to_delayed_work(w);
38161da177e4SLinus Torvalds
381718004c5dSChristoph Lameter if (!mutex_trylock(&slab_mutex))
38181da177e4SLinus Torvalds /* Give up. Setup the next iteration. */
38197c5cae36SChristoph Lameter goto out;
38201da177e4SLinus Torvalds
382118004c5dSChristoph Lameter list_for_each_entry(searchp, &slab_caches, list) {
38221da177e4SLinus Torvalds check_irq_on();
38231da177e4SLinus Torvalds
382435386e3bSChristoph Lameter /*
3825ce8eb6c4SChristoph Lameter * We only take the node lock if absolutely necessary and we
382635386e3bSChristoph Lameter * have established with reasonable certainty that
382735386e3bSChristoph Lameter * we can do some work if the lock was obtained.
382835386e3bSChristoph Lameter */
382918bf8541SChristoph Lameter n = get_node(searchp, node);
383035386e3bSChristoph Lameter
3831ce8eb6c4SChristoph Lameter reap_alien(searchp, n);
38321da177e4SLinus Torvalds
383318726ca8SJoonsoo Kim drain_array(searchp, n, cpu_cache_get(searchp), node);
38341da177e4SLinus Torvalds
383535386e3bSChristoph Lameter /*
383635386e3bSChristoph Lameter * These are racy checks but it does not matter
383735386e3bSChristoph Lameter * if we skip one check or scan twice.
383835386e3bSChristoph Lameter */
3839ce8eb6c4SChristoph Lameter if (time_after(n->next_reap, jiffies))
384035386e3bSChristoph Lameter goto next;
38411da177e4SLinus Torvalds
38425f0985bbSJianyu Zhan n->next_reap = jiffies + REAPTIMEOUT_NODE;
38431da177e4SLinus Torvalds
384418726ca8SJoonsoo Kim drain_array(searchp, n, n->shared, node);
38451da177e4SLinus Torvalds
3846ce8eb6c4SChristoph Lameter if (n->free_touched)
3847ce8eb6c4SChristoph Lameter n->free_touched = 0;
3848ed11d9ebSChristoph Lameter else {
3849ed11d9ebSChristoph Lameter int freed;
3850ed11d9ebSChristoph Lameter
3851ce8eb6c4SChristoph Lameter freed = drain_freelist(searchp, n, (n->free_limit +
3852ed11d9ebSChristoph Lameter 5 * searchp->num - 1) / (5 * searchp->num));
3853ed11d9ebSChristoph Lameter STATS_ADD_REAPED(searchp, freed);
38541da177e4SLinus Torvalds }
385535386e3bSChristoph Lameter next:
38561da177e4SLinus Torvalds cond_resched();
38571da177e4SLinus Torvalds }
38581da177e4SLinus Torvalds check_irq_on();
385918004c5dSChristoph Lameter mutex_unlock(&slab_mutex);
38608fce4d8eSChristoph Lameter next_reap_node();
38617c5cae36SChristoph Lameter out:
38621da177e4SLinus Torvalds /* Set up the next iteration */
3863a9f2a846SVlastimil Babka schedule_delayed_work_on(smp_processor_id(), work,
3864a9f2a846SVlastimil Babka round_jiffies_relative(REAPTIMEOUT_AC));
38651da177e4SLinus Torvalds }
38661da177e4SLinus Torvalds
get_slabinfo(struct kmem_cache * cachep,struct slabinfo * sinfo)38670d7561c6SGlauber Costa void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
38681da177e4SLinus Torvalds {
3869f728b0a5SGreg Thelen unsigned long active_objs, num_objs, active_slabs;
3870bf00bd34SDavid Rientjes unsigned long total_slabs = 0, free_objs = 0, shared_avail = 0;
3871bf00bd34SDavid Rientjes unsigned long free_slabs = 0;
3872e498be7dSChristoph Lameter int node;
3873ce8eb6c4SChristoph Lameter struct kmem_cache_node *n;
38741da177e4SLinus Torvalds
387518bf8541SChristoph Lameter for_each_kmem_cache_node(cachep, node, n) {
3876ca3b9b91SRavikiran G Thirumalai check_irq_on();
3877b539ce9fSJiri Kosina raw_spin_lock_irq(&n->list_lock);
3878e498be7dSChristoph Lameter
3879bf00bd34SDavid Rientjes total_slabs += n->total_slabs;
3880bf00bd34SDavid Rientjes free_slabs += n->free_slabs;
3881f728b0a5SGreg Thelen free_objs += n->free_objects;
388207a63c41SAruna Ramakrishna
3883ce8eb6c4SChristoph Lameter if (n->shared)
3884ce8eb6c4SChristoph Lameter shared_avail += n->shared->avail;
3885e498be7dSChristoph Lameter
3886b539ce9fSJiri Kosina raw_spin_unlock_irq(&n->list_lock);
3887e498be7dSChristoph Lameter }
3888bf00bd34SDavid Rientjes num_objs = total_slabs * cachep->num;
3889bf00bd34SDavid Rientjes active_slabs = total_slabs - free_slabs;
3890f728b0a5SGreg Thelen active_objs = num_objs - free_objs;
38911da177e4SLinus Torvalds
38920d7561c6SGlauber Costa sinfo->active_objs = active_objs;
38930d7561c6SGlauber Costa sinfo->num_objs = num_objs;
38940d7561c6SGlauber Costa sinfo->active_slabs = active_slabs;
3895bf00bd34SDavid Rientjes sinfo->num_slabs = total_slabs;
38960d7561c6SGlauber Costa sinfo->shared_avail = shared_avail;
38970d7561c6SGlauber Costa sinfo->limit = cachep->limit;
38980d7561c6SGlauber Costa sinfo->batchcount = cachep->batchcount;
38990d7561c6SGlauber Costa sinfo->shared = cachep->shared;
39000d7561c6SGlauber Costa sinfo->objects_per_slab = cachep->num;
39010d7561c6SGlauber Costa sinfo->cache_order = cachep->gfporder;
39020d7561c6SGlauber Costa }
39030d7561c6SGlauber Costa
slabinfo_show_stats(struct seq_file * m,struct kmem_cache * cachep)39040d7561c6SGlauber Costa void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *cachep)
39050d7561c6SGlauber Costa {
39061da177e4SLinus Torvalds #if STATS
3907ce8eb6c4SChristoph Lameter { /* node stats */
39081da177e4SLinus Torvalds unsigned long high = cachep->high_mark;
39091da177e4SLinus Torvalds unsigned long allocs = cachep->num_allocations;
39101da177e4SLinus Torvalds unsigned long grown = cachep->grown;
39111da177e4SLinus Torvalds unsigned long reaped = cachep->reaped;
39121da177e4SLinus Torvalds unsigned long errors = cachep->errors;
39131da177e4SLinus Torvalds unsigned long max_freeable = cachep->max_freeable;
39141da177e4SLinus Torvalds unsigned long node_allocs = cachep->node_allocs;
3915e498be7dSChristoph Lameter unsigned long node_frees = cachep->node_frees;
3916fb7faf33SRavikiran G Thirumalai unsigned long overflows = cachep->node_overflow;
39171da177e4SLinus Torvalds
3918756a025fSJoe Perches seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu %4lu %4lu %4lu %4lu %4lu",
3919e92dd4fdSJoe Perches allocs, high, grown,
3920a737b3e2SAndrew Morton reaped, errors, max_freeable, node_allocs,
3921fb7faf33SRavikiran G Thirumalai node_frees, overflows);
39221da177e4SLinus Torvalds }
39231da177e4SLinus Torvalds /* cpu stats */
39241da177e4SLinus Torvalds {
39251da177e4SLinus Torvalds unsigned long allochit = atomic_read(&cachep->allochit);
39261da177e4SLinus Torvalds unsigned long allocmiss = atomic_read(&cachep->allocmiss);
39271da177e4SLinus Torvalds unsigned long freehit = atomic_read(&cachep->freehit);
39281da177e4SLinus Torvalds unsigned long freemiss = atomic_read(&cachep->freemiss);
39291da177e4SLinus Torvalds
39301da177e4SLinus Torvalds seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu",
39311da177e4SLinus Torvalds allochit, allocmiss, freehit, freemiss);
39321da177e4SLinus Torvalds }
39331da177e4SLinus Torvalds #endif
39341da177e4SLinus Torvalds }
39351da177e4SLinus Torvalds
39361da177e4SLinus Torvalds #define MAX_SLABINFO_WRITE 128
39371da177e4SLinus Torvalds /**
39381da177e4SLinus Torvalds * slabinfo_write - Tuning for the slab allocator
39391da177e4SLinus Torvalds * @file: unused
39401da177e4SLinus Torvalds * @buffer: user buffer
39411da177e4SLinus Torvalds * @count: data length
39421da177e4SLinus Torvalds * @ppos: unused
3943a862f68aSMike Rapoport *
3944a862f68aSMike Rapoport * Return: %0 on success, negative error code otherwise.
39451da177e4SLinus Torvalds */
slabinfo_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)3946b7454ad3SGlauber Costa ssize_t slabinfo_write(struct file *file, const char __user *buffer,
39471da177e4SLinus Torvalds size_t count, loff_t *ppos)
39481da177e4SLinus Torvalds {
39491da177e4SLinus Torvalds char kbuf[MAX_SLABINFO_WRITE + 1], *tmp;
39501da177e4SLinus Torvalds int limit, batchcount, shared, res;
39517a7c381dSChristoph Hellwig struct kmem_cache *cachep;
39521da177e4SLinus Torvalds
39531da177e4SLinus Torvalds if (count > MAX_SLABINFO_WRITE)
39541da177e4SLinus Torvalds return -EINVAL;
39551da177e4SLinus Torvalds if (copy_from_user(&kbuf, buffer, count))
39561da177e4SLinus Torvalds return -EFAULT;
39571da177e4SLinus Torvalds kbuf[MAX_SLABINFO_WRITE] = '\0';
39581da177e4SLinus Torvalds
39591da177e4SLinus Torvalds tmp = strchr(kbuf, ' ');
39601da177e4SLinus Torvalds if (!tmp)
39611da177e4SLinus Torvalds return -EINVAL;
39621da177e4SLinus Torvalds *tmp = '\0';
39631da177e4SLinus Torvalds tmp++;
39641da177e4SLinus Torvalds if (sscanf(tmp, " %d %d %d", &limit, &batchcount, &shared) != 3)
39651da177e4SLinus Torvalds return -EINVAL;
39661da177e4SLinus Torvalds
39671da177e4SLinus Torvalds /* Find the cache in the chain of caches. */
396818004c5dSChristoph Lameter mutex_lock(&slab_mutex);
39691da177e4SLinus Torvalds res = -EINVAL;
397018004c5dSChristoph Lameter list_for_each_entry(cachep, &slab_caches, list) {
39711da177e4SLinus Torvalds if (!strcmp(cachep->name, kbuf)) {
3972a737b3e2SAndrew Morton if (limit < 1 || batchcount < 1 ||
3973b28a02deSPekka Enberg batchcount > limit || shared < 0) {
3974e498be7dSChristoph Lameter res = 0;
39751da177e4SLinus Torvalds } else {
3976e498be7dSChristoph Lameter res = do_tune_cpucache(cachep, limit,
397783b519e8SPekka Enberg batchcount, shared,
397883b519e8SPekka Enberg GFP_KERNEL);
39791da177e4SLinus Torvalds }
39801da177e4SLinus Torvalds break;
39811da177e4SLinus Torvalds }
39821da177e4SLinus Torvalds }
398318004c5dSChristoph Lameter mutex_unlock(&slab_mutex);
39841da177e4SLinus Torvalds if (res >= 0)
39851da177e4SLinus Torvalds res = count;
39861da177e4SLinus Torvalds return res;
39871da177e4SLinus Torvalds }
3988871751e2SAl Viro
398904385fc5SKees Cook #ifdef CONFIG_HARDENED_USERCOPY
399004385fc5SKees Cook /*
3991afcc90f8SKees Cook * Rejects incorrectly sized objects and objects that are to be copied
3992afcc90f8SKees Cook * to/from userspace but do not fall entirely within the containing slab
3993afcc90f8SKees Cook * cache's usercopy region.
399404385fc5SKees Cook *
399504385fc5SKees Cook * Returns NULL if check passes, otherwise const char * to name of cache
399604385fc5SKees Cook * to indicate an error.
399704385fc5SKees Cook */
__check_heap_object(const void * ptr,unsigned long n,const struct slab * slab,bool to_user)39980b3eb091SMatthew Wilcox (Oracle) void __check_heap_object(const void *ptr, unsigned long n,
39990b3eb091SMatthew Wilcox (Oracle) const struct slab *slab, bool to_user)
400004385fc5SKees Cook {
400104385fc5SKees Cook struct kmem_cache *cachep;
400204385fc5SKees Cook unsigned int objnr;
400304385fc5SKees Cook unsigned long offset;
400404385fc5SKees Cook
4005219667c2SAndrey Konovalov ptr = kasan_reset_tag(ptr);
4006219667c2SAndrey Konovalov
400704385fc5SKees Cook /* Find and validate object. */
40080b3eb091SMatthew Wilcox (Oracle) cachep = slab->slab_cache;
400940f3bf0cSVlastimil Babka objnr = obj_to_index(cachep, slab, (void *)ptr);
401004385fc5SKees Cook BUG_ON(objnr >= cachep->num);
401104385fc5SKees Cook
401204385fc5SKees Cook /* Find offset within object. */
4013d3fb45f3SAlexander Potapenko if (is_kfence_address(ptr))
4014d3fb45f3SAlexander Potapenko offset = ptr - kfence_object_start(ptr);
4015d3fb45f3SAlexander Potapenko else
40167981e67eSVlastimil Babka offset = ptr - index_to_obj(cachep, slab, objnr) - obj_offset(cachep);
401704385fc5SKees Cook
4018afcc90f8SKees Cook /* Allow address range falling entirely within usercopy region. */
4019afcc90f8SKees Cook if (offset >= cachep->useroffset &&
4020afcc90f8SKees Cook offset - cachep->useroffset <= cachep->usersize &&
4021afcc90f8SKees Cook n <= cachep->useroffset - offset + cachep->usersize)
4022f4e6e289SKees Cook return;
402304385fc5SKees Cook
4024f4e6e289SKees Cook usercopy_abort("SLAB object", cachep->name, to_user, offset, n);
402504385fc5SKees Cook }
402604385fc5SKees Cook #endif /* CONFIG_HARDENED_USERCOPY */
4027