1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0 21da177e4SLinus Torvalds /* 31da177e4SLinus Torvalds * linux/mm/mempool.c 41da177e4SLinus Torvalds * 51da177e4SLinus Torvalds * memory buffer pool support. Such pools are mostly used 61da177e4SLinus Torvalds * for guaranteed, deadlock-free memory allocations during 71da177e4SLinus Torvalds * extreme VM load. 81da177e4SLinus Torvalds * 91da177e4SLinus Torvalds * started by Ingo Molnar, Copyright (C) 2001 10bdfedb76SDavid Rientjes * debugging by David Rientjes, Copyright (C) 2015 111da177e4SLinus Torvalds */ 121da177e4SLinus Torvalds 131da177e4SLinus Torvalds #include <linux/mm.h> 141da177e4SLinus Torvalds #include <linux/slab.h> 15bdfedb76SDavid Rientjes #include <linux/highmem.h> 1692393615SAndrey Ryabinin #include <linux/kasan.h> 1717411962SCatalin Marinas #include <linux/kmemleak.h> 18b95f1b31SPaul Gortmaker #include <linux/export.h> 191da177e4SLinus Torvalds #include <linux/mempool.h> 201da177e4SLinus Torvalds #include <linux/blkdev.h> 211da177e4SLinus Torvalds #include <linux/writeback.h> 22e244c9e6SDavid Rientjes #include "slab.h" 231da177e4SLinus Torvalds 24bdfedb76SDavid Rientjes #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB_DEBUG_ON) 25bdfedb76SDavid Rientjes static void poison_error(mempool_t *pool, void *element, size_t size, 26bdfedb76SDavid Rientjes size_t byte) 27bdfedb76SDavid Rientjes { 28bdfedb76SDavid Rientjes const int nr = pool->curr_nr; 29bdfedb76SDavid Rientjes const int start = max_t(int, byte - (BITS_PER_LONG / 8), 0); 30bdfedb76SDavid Rientjes const int end = min_t(int, byte + (BITS_PER_LONG / 8), size); 31bdfedb76SDavid Rientjes int i; 32bdfedb76SDavid Rientjes 33bdfedb76SDavid Rientjes pr_err("BUG: mempool element poison mismatch\n"); 34bdfedb76SDavid Rientjes pr_err("Mempool %p size %zu\n", pool, size); 35bdfedb76SDavid Rientjes pr_err(" nr=%d @ %p: %s0x", nr, element, start > 0 ? "... " : ""); 36bdfedb76SDavid Rientjes for (i = start; i < end; i++) 37bdfedb76SDavid Rientjes pr_cont("%x ", *(u8 *)(element + i)); 38bdfedb76SDavid Rientjes pr_cont("%s\n", end < size ? "..." : ""); 39bdfedb76SDavid Rientjes dump_stack(); 40bdfedb76SDavid Rientjes } 41bdfedb76SDavid Rientjes 42bdfedb76SDavid Rientjes static void __check_element(mempool_t *pool, void *element, size_t size) 43bdfedb76SDavid Rientjes { 44bdfedb76SDavid Rientjes u8 *obj = element; 45bdfedb76SDavid Rientjes size_t i; 46bdfedb76SDavid Rientjes 47bdfedb76SDavid Rientjes for (i = 0; i < size; i++) { 48bdfedb76SDavid Rientjes u8 exp = (i < size - 1) ? POISON_FREE : POISON_END; 49bdfedb76SDavid Rientjes 50bdfedb76SDavid Rientjes if (obj[i] != exp) { 51bdfedb76SDavid Rientjes poison_error(pool, element, size, i); 52bdfedb76SDavid Rientjes return; 53bdfedb76SDavid Rientjes } 54bdfedb76SDavid Rientjes } 55bdfedb76SDavid Rientjes memset(obj, POISON_INUSE, size); 56bdfedb76SDavid Rientjes } 57bdfedb76SDavid Rientjes 58bdfedb76SDavid Rientjes static void check_element(mempool_t *pool, void *element) 59bdfedb76SDavid Rientjes { 60bdfedb76SDavid Rientjes /* Mempools backed by slab allocator */ 61bdfedb76SDavid Rientjes if (pool->free == mempool_free_slab || pool->free == mempool_kfree) 62bdfedb76SDavid Rientjes __check_element(pool, element, ksize(element)); 63bdfedb76SDavid Rientjes 64bdfedb76SDavid Rientjes /* Mempools backed by page allocator */ 65bdfedb76SDavid Rientjes if (pool->free == mempool_free_pages) { 66bdfedb76SDavid Rientjes int order = (int)(long)pool->pool_data; 67bdfedb76SDavid Rientjes void *addr = kmap_atomic((struct page *)element); 68bdfedb76SDavid Rientjes 69bdfedb76SDavid Rientjes __check_element(pool, addr, 1UL << (PAGE_SHIFT + order)); 70bdfedb76SDavid Rientjes kunmap_atomic(addr); 71bdfedb76SDavid Rientjes } 72bdfedb76SDavid Rientjes } 73bdfedb76SDavid Rientjes 74bdfedb76SDavid Rientjes static void __poison_element(void *element, size_t size) 75bdfedb76SDavid Rientjes { 76bdfedb76SDavid Rientjes u8 *obj = element; 77bdfedb76SDavid Rientjes 78bdfedb76SDavid Rientjes memset(obj, POISON_FREE, size - 1); 79bdfedb76SDavid Rientjes obj[size - 1] = POISON_END; 80bdfedb76SDavid Rientjes } 81bdfedb76SDavid Rientjes 82bdfedb76SDavid Rientjes static void poison_element(mempool_t *pool, void *element) 83bdfedb76SDavid Rientjes { 84bdfedb76SDavid Rientjes /* Mempools backed by slab allocator */ 85bdfedb76SDavid Rientjes if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc) 86bdfedb76SDavid Rientjes __poison_element(element, ksize(element)); 87bdfedb76SDavid Rientjes 88bdfedb76SDavid Rientjes /* Mempools backed by page allocator */ 89bdfedb76SDavid Rientjes if (pool->alloc == mempool_alloc_pages) { 90bdfedb76SDavid Rientjes int order = (int)(long)pool->pool_data; 91bdfedb76SDavid Rientjes void *addr = kmap_atomic((struct page *)element); 92bdfedb76SDavid Rientjes 93bdfedb76SDavid Rientjes __poison_element(addr, 1UL << (PAGE_SHIFT + order)); 94bdfedb76SDavid Rientjes kunmap_atomic(addr); 95bdfedb76SDavid Rientjes } 96bdfedb76SDavid Rientjes } 97bdfedb76SDavid Rientjes #else /* CONFIG_DEBUG_SLAB || CONFIG_SLUB_DEBUG_ON */ 98bdfedb76SDavid Rientjes static inline void check_element(mempool_t *pool, void *element) 99bdfedb76SDavid Rientjes { 100bdfedb76SDavid Rientjes } 101bdfedb76SDavid Rientjes static inline void poison_element(mempool_t *pool, void *element) 102bdfedb76SDavid Rientjes { 103bdfedb76SDavid Rientjes } 104bdfedb76SDavid Rientjes #endif /* CONFIG_DEBUG_SLAB || CONFIG_SLUB_DEBUG_ON */ 105bdfedb76SDavid Rientjes 1066860f634SDmitry Vyukov static __always_inline void kasan_poison_element(mempool_t *pool, void *element) 10792393615SAndrey Ryabinin { 1089b75a867SAndrey Ryabinin if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc) 1096860f634SDmitry Vyukov kasan_poison_kfree(element, _RET_IP_); 11092393615SAndrey Ryabinin if (pool->alloc == mempool_alloc_pages) 11192393615SAndrey Ryabinin kasan_free_pages(element, (unsigned long)pool->pool_data); 11292393615SAndrey Ryabinin } 11392393615SAndrey Ryabinin 1148cded866SJia-Ju Bai static void kasan_unpoison_element(mempool_t *pool, void *element) 11592393615SAndrey Ryabinin { 1169b75a867SAndrey Ryabinin if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc) 1179b75a867SAndrey Ryabinin kasan_unpoison_slab(element); 11892393615SAndrey Ryabinin if (pool->alloc == mempool_alloc_pages) 11992393615SAndrey Ryabinin kasan_alloc_pages(element, (unsigned long)pool->pool_data); 12092393615SAndrey Ryabinin } 12192393615SAndrey Ryabinin 1226860f634SDmitry Vyukov static __always_inline void add_element(mempool_t *pool, void *element) 1231da177e4SLinus Torvalds { 1241da177e4SLinus Torvalds BUG_ON(pool->curr_nr >= pool->min_nr); 125bdfedb76SDavid Rientjes poison_element(pool, element); 12692393615SAndrey Ryabinin kasan_poison_element(pool, element); 1271da177e4SLinus Torvalds pool->elements[pool->curr_nr++] = element; 1281da177e4SLinus Torvalds } 1291da177e4SLinus Torvalds 1308cded866SJia-Ju Bai static void *remove_element(mempool_t *pool) 1311da177e4SLinus Torvalds { 132bdfedb76SDavid Rientjes void *element = pool->elements[--pool->curr_nr]; 133bdfedb76SDavid Rientjes 134bdfedb76SDavid Rientjes BUG_ON(pool->curr_nr < 0); 1358cded866SJia-Ju Bai kasan_unpoison_element(pool, element); 13676401310SMatthew Dawson check_element(pool, element); 137bdfedb76SDavid Rientjes return element; 1381da177e4SLinus Torvalds } 1391da177e4SLinus Torvalds 1400565d317STejun Heo /** 141c1a67fefSKent Overstreet * mempool_exit - exit a mempool initialized with mempool_init() 142c1a67fefSKent Overstreet * @pool: pointer to the memory pool which was initialized with 143c1a67fefSKent Overstreet * mempool_init(). 144c1a67fefSKent Overstreet * 145c1a67fefSKent Overstreet * Free all reserved elements in @pool and @pool itself. This function 146c1a67fefSKent Overstreet * only sleeps if the free_fn() function sleeps. 147c1a67fefSKent Overstreet * 148c1a67fefSKent Overstreet * May be called on a zeroed but uninitialized mempool (i.e. allocated with 149c1a67fefSKent Overstreet * kzalloc()). 150c1a67fefSKent Overstreet */ 151c1a67fefSKent Overstreet void mempool_exit(mempool_t *pool) 152c1a67fefSKent Overstreet { 153c1a67fefSKent Overstreet while (pool->curr_nr) { 1548cded866SJia-Ju Bai void *element = remove_element(pool); 155c1a67fefSKent Overstreet pool->free(element, pool->pool_data); 156c1a67fefSKent Overstreet } 157c1a67fefSKent Overstreet kfree(pool->elements); 158c1a67fefSKent Overstreet pool->elements = NULL; 159c1a67fefSKent Overstreet } 160c1a67fefSKent Overstreet EXPORT_SYMBOL(mempool_exit); 161c1a67fefSKent Overstreet 162c1a67fefSKent Overstreet /** 1630565d317STejun Heo * mempool_destroy - deallocate a memory pool 1640565d317STejun Heo * @pool: pointer to the memory pool which was allocated via 1650565d317STejun Heo * mempool_create(). 1660565d317STejun Heo * 1670565d317STejun Heo * Free all reserved elements in @pool and @pool itself. This function 1680565d317STejun Heo * only sleeps if the free_fn() function sleeps. 1690565d317STejun Heo */ 1700565d317STejun Heo void mempool_destroy(mempool_t *pool) 1711da177e4SLinus Torvalds { 1724e3ca3e0SSergey Senozhatsky if (unlikely(!pool)) 1734e3ca3e0SSergey Senozhatsky return; 1744e3ca3e0SSergey Senozhatsky 175c1a67fefSKent Overstreet mempool_exit(pool); 1761da177e4SLinus Torvalds kfree(pool); 1771da177e4SLinus Torvalds } 1780565d317STejun Heo EXPORT_SYMBOL(mempool_destroy); 1791da177e4SLinus Torvalds 180c1a67fefSKent Overstreet int mempool_init_node(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn, 181c1a67fefSKent Overstreet mempool_free_t *free_fn, void *pool_data, 182c1a67fefSKent Overstreet gfp_t gfp_mask, int node_id) 183c1a67fefSKent Overstreet { 184c1a67fefSKent Overstreet spin_lock_init(&pool->lock); 185c1a67fefSKent Overstreet pool->min_nr = min_nr; 186c1a67fefSKent Overstreet pool->pool_data = pool_data; 187c1a67fefSKent Overstreet pool->alloc = alloc_fn; 188c1a67fefSKent Overstreet pool->free = free_fn; 189c1a67fefSKent Overstreet init_waitqueue_head(&pool->wait); 190c1a67fefSKent Overstreet 191c1a67fefSKent Overstreet pool->elements = kmalloc_array_node(min_nr, sizeof(void *), 192c1a67fefSKent Overstreet gfp_mask, node_id); 193c1a67fefSKent Overstreet if (!pool->elements) 194c1a67fefSKent Overstreet return -ENOMEM; 195c1a67fefSKent Overstreet 196c1a67fefSKent Overstreet /* 197c1a67fefSKent Overstreet * First pre-allocate the guaranteed number of buffers. 198c1a67fefSKent Overstreet */ 199c1a67fefSKent Overstreet while (pool->curr_nr < pool->min_nr) { 200c1a67fefSKent Overstreet void *element; 201c1a67fefSKent Overstreet 202c1a67fefSKent Overstreet element = pool->alloc(gfp_mask, pool->pool_data); 203c1a67fefSKent Overstreet if (unlikely(!element)) { 204c1a67fefSKent Overstreet mempool_exit(pool); 205c1a67fefSKent Overstreet return -ENOMEM; 206c1a67fefSKent Overstreet } 207c1a67fefSKent Overstreet add_element(pool, element); 208c1a67fefSKent Overstreet } 209c1a67fefSKent Overstreet 210c1a67fefSKent Overstreet return 0; 211c1a67fefSKent Overstreet } 212c1a67fefSKent Overstreet EXPORT_SYMBOL(mempool_init_node); 213c1a67fefSKent Overstreet 214c1a67fefSKent Overstreet /** 215c1a67fefSKent Overstreet * mempool_init - initialize a memory pool 216a3bf6ce3SMike Rapoport * @pool: pointer to the memory pool that should be initialized 217c1a67fefSKent Overstreet * @min_nr: the minimum number of elements guaranteed to be 218c1a67fefSKent Overstreet * allocated for this pool. 219c1a67fefSKent Overstreet * @alloc_fn: user-defined element-allocation function. 220c1a67fefSKent Overstreet * @free_fn: user-defined element-freeing function. 221c1a67fefSKent Overstreet * @pool_data: optional private data available to the user-defined functions. 222c1a67fefSKent Overstreet * 223c1a67fefSKent Overstreet * Like mempool_create(), but initializes the pool in (i.e. embedded in another 224c1a67fefSKent Overstreet * structure). 225a862f68aSMike Rapoport * 226a862f68aSMike Rapoport * Return: %0 on success, negative error code otherwise. 227c1a67fefSKent Overstreet */ 228c1a67fefSKent Overstreet int mempool_init(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn, 229c1a67fefSKent Overstreet mempool_free_t *free_fn, void *pool_data) 230c1a67fefSKent Overstreet { 231c1a67fefSKent Overstreet return mempool_init_node(pool, min_nr, alloc_fn, free_fn, 232c1a67fefSKent Overstreet pool_data, GFP_KERNEL, NUMA_NO_NODE); 233c1a67fefSKent Overstreet 234c1a67fefSKent Overstreet } 235c1a67fefSKent Overstreet EXPORT_SYMBOL(mempool_init); 236c1a67fefSKent Overstreet 2371da177e4SLinus Torvalds /** 2381da177e4SLinus Torvalds * mempool_create - create a memory pool 2391da177e4SLinus Torvalds * @min_nr: the minimum number of elements guaranteed to be 2401da177e4SLinus Torvalds * allocated for this pool. 2411da177e4SLinus Torvalds * @alloc_fn: user-defined element-allocation function. 2421da177e4SLinus Torvalds * @free_fn: user-defined element-freeing function. 2431da177e4SLinus Torvalds * @pool_data: optional private data available to the user-defined functions. 2441da177e4SLinus Torvalds * 2451da177e4SLinus Torvalds * this function creates and allocates a guaranteed size, preallocated 24672fd4a35SRobert P. J. Day * memory pool. The pool can be used from the mempool_alloc() and mempool_free() 2471da177e4SLinus Torvalds * functions. This function might sleep. Both the alloc_fn() and the free_fn() 24872fd4a35SRobert P. J. Day * functions might sleep - as long as the mempool_alloc() function is not called 2491da177e4SLinus Torvalds * from IRQ contexts. 250a862f68aSMike Rapoport * 251a862f68aSMike Rapoport * Return: pointer to the created memory pool object or %NULL on error. 2521da177e4SLinus Torvalds */ 2531da177e4SLinus Torvalds mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, 2541da177e4SLinus Torvalds mempool_free_t *free_fn, void *pool_data) 2551da177e4SLinus Torvalds { 256a91a5ac6STejun Heo return mempool_create_node(min_nr,alloc_fn,free_fn, pool_data, 257a91a5ac6STejun Heo GFP_KERNEL, NUMA_NO_NODE); 2581946089aSChristoph Lameter } 2591946089aSChristoph Lameter EXPORT_SYMBOL(mempool_create); 2601da177e4SLinus Torvalds 2611946089aSChristoph Lameter mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, 262a91a5ac6STejun Heo mempool_free_t *free_fn, void *pool_data, 263a91a5ac6STejun Heo gfp_t gfp_mask, int node_id) 2641946089aSChristoph Lameter { 2651946089aSChristoph Lameter mempool_t *pool; 266c1a67fefSKent Overstreet 2677b5219dbSJoe Perches pool = kzalloc_node(sizeof(*pool), gfp_mask, node_id); 2681da177e4SLinus Torvalds if (!pool) 2691da177e4SLinus Torvalds return NULL; 270c1a67fefSKent Overstreet 271c1a67fefSKent Overstreet if (mempool_init_node(pool, min_nr, alloc_fn, free_fn, pool_data, 272c1a67fefSKent Overstreet gfp_mask, node_id)) { 2731da177e4SLinus Torvalds kfree(pool); 2741da177e4SLinus Torvalds return NULL; 2751da177e4SLinus Torvalds } 2761da177e4SLinus Torvalds 2771da177e4SLinus Torvalds return pool; 2781da177e4SLinus Torvalds } 2791946089aSChristoph Lameter EXPORT_SYMBOL(mempool_create_node); 2801da177e4SLinus Torvalds 2811da177e4SLinus Torvalds /** 2821da177e4SLinus Torvalds * mempool_resize - resize an existing memory pool 2831da177e4SLinus Torvalds * @pool: pointer to the memory pool which was allocated via 2841da177e4SLinus Torvalds * mempool_create(). 2851da177e4SLinus Torvalds * @new_min_nr: the new minimum number of elements guaranteed to be 2861da177e4SLinus Torvalds * allocated for this pool. 2871da177e4SLinus Torvalds * 2881da177e4SLinus Torvalds * This function shrinks/grows the pool. In the case of growing, 2891da177e4SLinus Torvalds * it cannot be guaranteed that the pool will be grown to the new 2901da177e4SLinus Torvalds * size immediately, but new mempool_free() calls will refill it. 29111d83360SDavid Rientjes * This function may sleep. 2921da177e4SLinus Torvalds * 2931da177e4SLinus Torvalds * Note, the caller must guarantee that no mempool_destroy is called 2941da177e4SLinus Torvalds * while this function is running. mempool_alloc() & mempool_free() 2951da177e4SLinus Torvalds * might be called (eg. from IRQ contexts) while this function executes. 296a862f68aSMike Rapoport * 297a862f68aSMike Rapoport * Return: %0 on success, negative error code otherwise. 2981da177e4SLinus Torvalds */ 29911d83360SDavid Rientjes int mempool_resize(mempool_t *pool, int new_min_nr) 3001da177e4SLinus Torvalds { 3011da177e4SLinus Torvalds void *element; 3021da177e4SLinus Torvalds void **new_elements; 3031da177e4SLinus Torvalds unsigned long flags; 3041da177e4SLinus Torvalds 3051da177e4SLinus Torvalds BUG_ON(new_min_nr <= 0); 30611d83360SDavid Rientjes might_sleep(); 3071da177e4SLinus Torvalds 3081da177e4SLinus Torvalds spin_lock_irqsave(&pool->lock, flags); 3091da177e4SLinus Torvalds if (new_min_nr <= pool->min_nr) { 3101da177e4SLinus Torvalds while (new_min_nr < pool->curr_nr) { 3118cded866SJia-Ju Bai element = remove_element(pool); 3121da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 3131da177e4SLinus Torvalds pool->free(element, pool->pool_data); 3141da177e4SLinus Torvalds spin_lock_irqsave(&pool->lock, flags); 3151da177e4SLinus Torvalds } 3161da177e4SLinus Torvalds pool->min_nr = new_min_nr; 3171da177e4SLinus Torvalds goto out_unlock; 3181da177e4SLinus Torvalds } 3191da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 3201da177e4SLinus Torvalds 3211da177e4SLinus Torvalds /* Grow the pool */ 32211d83360SDavid Rientjes new_elements = kmalloc_array(new_min_nr, sizeof(*new_elements), 32311d83360SDavid Rientjes GFP_KERNEL); 3241da177e4SLinus Torvalds if (!new_elements) 3251da177e4SLinus Torvalds return -ENOMEM; 3261da177e4SLinus Torvalds 3271da177e4SLinus Torvalds spin_lock_irqsave(&pool->lock, flags); 3281da177e4SLinus Torvalds if (unlikely(new_min_nr <= pool->min_nr)) { 3291da177e4SLinus Torvalds /* Raced, other resize will do our work */ 3301da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 3311da177e4SLinus Torvalds kfree(new_elements); 3321da177e4SLinus Torvalds goto out; 3331da177e4SLinus Torvalds } 3341da177e4SLinus Torvalds memcpy(new_elements, pool->elements, 3351da177e4SLinus Torvalds pool->curr_nr * sizeof(*new_elements)); 3361da177e4SLinus Torvalds kfree(pool->elements); 3371da177e4SLinus Torvalds pool->elements = new_elements; 3381da177e4SLinus Torvalds pool->min_nr = new_min_nr; 3391da177e4SLinus Torvalds 3401da177e4SLinus Torvalds while (pool->curr_nr < pool->min_nr) { 3411da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 34211d83360SDavid Rientjes element = pool->alloc(GFP_KERNEL, pool->pool_data); 3431da177e4SLinus Torvalds if (!element) 3441da177e4SLinus Torvalds goto out; 3451da177e4SLinus Torvalds spin_lock_irqsave(&pool->lock, flags); 3461da177e4SLinus Torvalds if (pool->curr_nr < pool->min_nr) { 3471da177e4SLinus Torvalds add_element(pool, element); 3481da177e4SLinus Torvalds } else { 3491da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 3501da177e4SLinus Torvalds pool->free(element, pool->pool_data); /* Raced */ 3511da177e4SLinus Torvalds goto out; 3521da177e4SLinus Torvalds } 3531da177e4SLinus Torvalds } 3541da177e4SLinus Torvalds out_unlock: 3551da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 3561da177e4SLinus Torvalds out: 3571da177e4SLinus Torvalds return 0; 3581da177e4SLinus Torvalds } 3591da177e4SLinus Torvalds EXPORT_SYMBOL(mempool_resize); 3601da177e4SLinus Torvalds 3611da177e4SLinus Torvalds /** 3621da177e4SLinus Torvalds * mempool_alloc - allocate an element from a specific memory pool 3631da177e4SLinus Torvalds * @pool: pointer to the memory pool which was allocated via 3641da177e4SLinus Torvalds * mempool_create(). 3651da177e4SLinus Torvalds * @gfp_mask: the usual allocation bitmask. 3661da177e4SLinus Torvalds * 36772fd4a35SRobert P. J. Day * this function only sleeps if the alloc_fn() function sleeps or 3681da177e4SLinus Torvalds * returns NULL. Note that due to preallocation, this function 3691da177e4SLinus Torvalds * *never* fails when called from process contexts. (it might 3701da177e4SLinus Torvalds * fail if called from an IRQ context.) 3714e390b2bSMichal Hocko * Note: using __GFP_ZERO is not supported. 372a862f68aSMike Rapoport * 373a862f68aSMike Rapoport * Return: pointer to the allocated element or %NULL on error. 3741da177e4SLinus Torvalds */ 375dd0fc66fSAl Viro void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask) 3761da177e4SLinus Torvalds { 3771da177e4SLinus Torvalds void *element; 3781da177e4SLinus Torvalds unsigned long flags; 379ac6424b9SIngo Molnar wait_queue_entry_t wait; 3806daa0e28SAl Viro gfp_t gfp_temp; 38120a77776SNick Piggin 3828bf8fcb0SSebastian Ott VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO); 383d0164adcSMel Gorman might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM); 384b84a35beSNick Piggin 3854e390b2bSMichal Hocko gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */ 386b84a35beSNick Piggin gfp_mask |= __GFP_NORETRY; /* don't loop in __alloc_pages */ 387b84a35beSNick Piggin gfp_mask |= __GFP_NOWARN; /* failures are OK */ 3881da177e4SLinus Torvalds 389d0164adcSMel Gorman gfp_temp = gfp_mask & ~(__GFP_DIRECT_RECLAIM|__GFP_IO); 39020a77776SNick Piggin 3911da177e4SLinus Torvalds repeat_alloc: 39220a77776SNick Piggin 39320a77776SNick Piggin element = pool->alloc(gfp_temp, pool->pool_data); 3941da177e4SLinus Torvalds if (likely(element != NULL)) 3951da177e4SLinus Torvalds return element; 3961da177e4SLinus Torvalds 3971da177e4SLinus Torvalds spin_lock_irqsave(&pool->lock, flags); 3981da177e4SLinus Torvalds if (likely(pool->curr_nr)) { 3998cded866SJia-Ju Bai element = remove_element(pool); 4001da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 4015b990546STejun Heo /* paired with rmb in mempool_free(), read comment there */ 4025b990546STejun Heo smp_wmb(); 40317411962SCatalin Marinas /* 40417411962SCatalin Marinas * Update the allocation stack trace as this is more useful 40517411962SCatalin Marinas * for debugging. 40617411962SCatalin Marinas */ 40717411962SCatalin Marinas kmemleak_update_trace(element); 4081da177e4SLinus Torvalds return element; 4091da177e4SLinus Torvalds } 4101da177e4SLinus Torvalds 4111ebb7044STejun Heo /* 412d0164adcSMel Gorman * We use gfp mask w/o direct reclaim or IO for the first round. If 4131ebb7044STejun Heo * alloc failed with that and @pool was empty, retry immediately. 4141ebb7044STejun Heo */ 4154e390b2bSMichal Hocko if (gfp_temp != gfp_mask) { 4161ebb7044STejun Heo spin_unlock_irqrestore(&pool->lock, flags); 4171ebb7044STejun Heo gfp_temp = gfp_mask; 4181ebb7044STejun Heo goto repeat_alloc; 4191ebb7044STejun Heo } 4201ebb7044STejun Heo 421d0164adcSMel Gorman /* We must not sleep if !__GFP_DIRECT_RECLAIM */ 422d0164adcSMel Gorman if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) { 4235b990546STejun Heo spin_unlock_irqrestore(&pool->lock, flags); 4241da177e4SLinus Torvalds return NULL; 4255b990546STejun Heo } 4261da177e4SLinus Torvalds 4275b990546STejun Heo /* Let's wait for someone else to return an element to @pool */ 42801890a4cSBenjamin LaHaise init_wait(&wait); 4291da177e4SLinus Torvalds prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); 4305b990546STejun Heo 4315b990546STejun Heo spin_unlock_irqrestore(&pool->lock, flags); 4325b990546STejun Heo 4330b1d647aSPavel Mironchik /* 4345b990546STejun Heo * FIXME: this should be io_schedule(). The timeout is there as a 4355b990546STejun Heo * workaround for some DM problems in 2.6.18. 4360b1d647aSPavel Mironchik */ 4370b1d647aSPavel Mironchik io_schedule_timeout(5*HZ); 4381da177e4SLinus Torvalds 4395b990546STejun Heo finish_wait(&pool->wait, &wait); 4401da177e4SLinus Torvalds goto repeat_alloc; 4411da177e4SLinus Torvalds } 4421da177e4SLinus Torvalds EXPORT_SYMBOL(mempool_alloc); 4431da177e4SLinus Torvalds 4441da177e4SLinus Torvalds /** 4451da177e4SLinus Torvalds * mempool_free - return an element to the pool. 4461da177e4SLinus Torvalds * @element: pool element pointer. 4471da177e4SLinus Torvalds * @pool: pointer to the memory pool which was allocated via 4481da177e4SLinus Torvalds * mempool_create(). 4491da177e4SLinus Torvalds * 4501da177e4SLinus Torvalds * this function only sleeps if the free_fn() function sleeps. 4511da177e4SLinus Torvalds */ 4521da177e4SLinus Torvalds void mempool_free(void *element, mempool_t *pool) 4531da177e4SLinus Torvalds { 4541da177e4SLinus Torvalds unsigned long flags; 4551da177e4SLinus Torvalds 456c80e7a82SRusty Russell if (unlikely(element == NULL)) 457c80e7a82SRusty Russell return; 458c80e7a82SRusty Russell 4595b990546STejun Heo /* 4605b990546STejun Heo * Paired with the wmb in mempool_alloc(). The preceding read is 4615b990546STejun Heo * for @element and the following @pool->curr_nr. This ensures 4625b990546STejun Heo * that the visible value of @pool->curr_nr is from after the 4635b990546STejun Heo * allocation of @element. This is necessary for fringe cases 4645b990546STejun Heo * where @element was passed to this task without going through 4655b990546STejun Heo * barriers. 4665b990546STejun Heo * 4675b990546STejun Heo * For example, assume @p is %NULL at the beginning and one task 4685b990546STejun Heo * performs "p = mempool_alloc(...);" while another task is doing 4695b990546STejun Heo * "while (!p) cpu_relax(); mempool_free(p, ...);". This function 4705b990546STejun Heo * may end up using curr_nr value which is from before allocation 4715b990546STejun Heo * of @p without the following rmb. 4725b990546STejun Heo */ 4735b990546STejun Heo smp_rmb(); 4745b990546STejun Heo 4755b990546STejun Heo /* 4765b990546STejun Heo * For correctness, we need a test which is guaranteed to trigger 4775b990546STejun Heo * if curr_nr + #allocated == min_nr. Testing curr_nr < min_nr 4785b990546STejun Heo * without locking achieves that and refilling as soon as possible 4795b990546STejun Heo * is desirable. 4805b990546STejun Heo * 4815b990546STejun Heo * Because curr_nr visible here is always a value after the 4825b990546STejun Heo * allocation of @element, any task which decremented curr_nr below 4835b990546STejun Heo * min_nr is guaranteed to see curr_nr < min_nr unless curr_nr gets 4845b990546STejun Heo * incremented to min_nr afterwards. If curr_nr gets incremented 4855b990546STejun Heo * to min_nr after the allocation of @element, the elements 4865b990546STejun Heo * allocated after that are subject to the same guarantee. 4875b990546STejun Heo * 4885b990546STejun Heo * Waiters happen iff curr_nr is 0 and the above guarantee also 4895b990546STejun Heo * ensures that there will be frees which return elements to the 4905b990546STejun Heo * pool waking up the waiters. 4915b990546STejun Heo */ 492*abe1de42SQian Cai if (unlikely(READ_ONCE(pool->curr_nr) < pool->min_nr)) { 4931da177e4SLinus Torvalds spin_lock_irqsave(&pool->lock, flags); 494eb9a3c62SMikulas Patocka if (likely(pool->curr_nr < pool->min_nr)) { 4951da177e4SLinus Torvalds add_element(pool, element); 4961da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 4971da177e4SLinus Torvalds wake_up(&pool->wait); 4981da177e4SLinus Torvalds return; 4991da177e4SLinus Torvalds } 5001da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 5011da177e4SLinus Torvalds } 5021da177e4SLinus Torvalds pool->free(element, pool->pool_data); 5031da177e4SLinus Torvalds } 5041da177e4SLinus Torvalds EXPORT_SYMBOL(mempool_free); 5051da177e4SLinus Torvalds 5061da177e4SLinus Torvalds /* 5071da177e4SLinus Torvalds * A commonly used alloc and free fn. 5081da177e4SLinus Torvalds */ 509dd0fc66fSAl Viro void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data) 5101da177e4SLinus Torvalds { 511fcc234f8SPekka Enberg struct kmem_cache *mem = pool_data; 512e244c9e6SDavid Rientjes VM_BUG_ON(mem->ctor); 5131da177e4SLinus Torvalds return kmem_cache_alloc(mem, gfp_mask); 5141da177e4SLinus Torvalds } 5151da177e4SLinus Torvalds EXPORT_SYMBOL(mempool_alloc_slab); 5161da177e4SLinus Torvalds 5171da177e4SLinus Torvalds void mempool_free_slab(void *element, void *pool_data) 5181da177e4SLinus Torvalds { 519fcc234f8SPekka Enberg struct kmem_cache *mem = pool_data; 5201da177e4SLinus Torvalds kmem_cache_free(mem, element); 5211da177e4SLinus Torvalds } 5221da177e4SLinus Torvalds EXPORT_SYMBOL(mempool_free_slab); 5236e0678f3SMatthew Dobson 5246e0678f3SMatthew Dobson /* 52553184082SMatthew Dobson * A commonly used alloc and free fn that kmalloc/kfrees the amount of memory 526183ff22bSSimon Arlott * specified by pool_data 52753184082SMatthew Dobson */ 52853184082SMatthew Dobson void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data) 52953184082SMatthew Dobson { 5305e2f89b5SFigo.zhang size_t size = (size_t)pool_data; 53153184082SMatthew Dobson return kmalloc(size, gfp_mask); 53253184082SMatthew Dobson } 53353184082SMatthew Dobson EXPORT_SYMBOL(mempool_kmalloc); 53453184082SMatthew Dobson 53553184082SMatthew Dobson void mempool_kfree(void *element, void *pool_data) 53653184082SMatthew Dobson { 53753184082SMatthew Dobson kfree(element); 53853184082SMatthew Dobson } 53953184082SMatthew Dobson EXPORT_SYMBOL(mempool_kfree); 54053184082SMatthew Dobson 54153184082SMatthew Dobson /* 5426e0678f3SMatthew Dobson * A simple mempool-backed page allocator that allocates pages 5436e0678f3SMatthew Dobson * of the order specified by pool_data. 5446e0678f3SMatthew Dobson */ 5456e0678f3SMatthew Dobson void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data) 5466e0678f3SMatthew Dobson { 5476e0678f3SMatthew Dobson int order = (int)(long)pool_data; 5486e0678f3SMatthew Dobson return alloc_pages(gfp_mask, order); 5496e0678f3SMatthew Dobson } 5506e0678f3SMatthew Dobson EXPORT_SYMBOL(mempool_alloc_pages); 5516e0678f3SMatthew Dobson 5526e0678f3SMatthew Dobson void mempool_free_pages(void *element, void *pool_data) 5536e0678f3SMatthew Dobson { 5546e0678f3SMatthew Dobson int order = (int)(long)pool_data; 5556e0678f3SMatthew Dobson __free_pages(element, order); 5566e0678f3SMatthew Dobson } 5576e0678f3SMatthew Dobson EXPORT_SYMBOL(mempool_free_pages); 558