11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * linux/mm/mempool.c 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * memory buffer pool support. Such pools are mostly used 51da177e4SLinus Torvalds * for guaranteed, deadlock-free memory allocations during 61da177e4SLinus Torvalds * extreme VM load. 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * started by Ingo Molnar, Copyright (C) 2001 91da177e4SLinus Torvalds */ 101da177e4SLinus Torvalds 111da177e4SLinus Torvalds #include <linux/mm.h> 121da177e4SLinus Torvalds #include <linux/slab.h> 13b95f1b31SPaul Gortmaker #include <linux/export.h> 141da177e4SLinus Torvalds #include <linux/mempool.h> 151da177e4SLinus Torvalds #include <linux/blkdev.h> 161da177e4SLinus Torvalds #include <linux/writeback.h> 171da177e4SLinus Torvalds 181da177e4SLinus Torvalds static void add_element(mempool_t *pool, void *element) 191da177e4SLinus Torvalds { 201da177e4SLinus Torvalds BUG_ON(pool->curr_nr >= pool->min_nr); 211da177e4SLinus Torvalds pool->elements[pool->curr_nr++] = element; 221da177e4SLinus Torvalds } 231da177e4SLinus Torvalds 241da177e4SLinus Torvalds static void *remove_element(mempool_t *pool) 251da177e4SLinus Torvalds { 261da177e4SLinus Torvalds BUG_ON(pool->curr_nr <= 0); 271da177e4SLinus Torvalds return pool->elements[--pool->curr_nr]; 281da177e4SLinus Torvalds } 291da177e4SLinus Torvalds 300565d317STejun Heo /** 310565d317STejun Heo * mempool_destroy - deallocate a memory pool 320565d317STejun Heo * @pool: pointer to the memory pool which was allocated via 330565d317STejun Heo * mempool_create(). 340565d317STejun Heo * 350565d317STejun Heo * Free all reserved elements in @pool and @pool itself. This function 360565d317STejun Heo * only sleeps if the free_fn() function sleeps. 370565d317STejun Heo */ 380565d317STejun Heo void mempool_destroy(mempool_t *pool) 391da177e4SLinus Torvalds { 401da177e4SLinus Torvalds while (pool->curr_nr) { 411da177e4SLinus Torvalds void *element = remove_element(pool); 421da177e4SLinus Torvalds pool->free(element, pool->pool_data); 431da177e4SLinus Torvalds } 441da177e4SLinus Torvalds kfree(pool->elements); 451da177e4SLinus Torvalds kfree(pool); 461da177e4SLinus Torvalds } 470565d317STejun Heo EXPORT_SYMBOL(mempool_destroy); 481da177e4SLinus Torvalds 491da177e4SLinus Torvalds /** 501da177e4SLinus Torvalds * mempool_create - create a memory pool 511da177e4SLinus Torvalds * @min_nr: the minimum number of elements guaranteed to be 521da177e4SLinus Torvalds * allocated for this pool. 531da177e4SLinus Torvalds * @alloc_fn: user-defined element-allocation function. 541da177e4SLinus Torvalds * @free_fn: user-defined element-freeing function. 551da177e4SLinus Torvalds * @pool_data: optional private data available to the user-defined functions. 561da177e4SLinus Torvalds * 571da177e4SLinus Torvalds * this function creates and allocates a guaranteed size, preallocated 5872fd4a35SRobert P. J. Day * memory pool. The pool can be used from the mempool_alloc() and mempool_free() 591da177e4SLinus Torvalds * functions. This function might sleep. Both the alloc_fn() and the free_fn() 6072fd4a35SRobert P. J. Day * functions might sleep - as long as the mempool_alloc() function is not called 611da177e4SLinus Torvalds * from IRQ contexts. 621da177e4SLinus Torvalds */ 631da177e4SLinus Torvalds mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, 641da177e4SLinus Torvalds mempool_free_t *free_fn, void *pool_data) 651da177e4SLinus Torvalds { 66a91a5ac6STejun Heo return mempool_create_node(min_nr,alloc_fn,free_fn, pool_data, 67a91a5ac6STejun Heo GFP_KERNEL, NUMA_NO_NODE); 681946089aSChristoph Lameter } 691946089aSChristoph Lameter EXPORT_SYMBOL(mempool_create); 701da177e4SLinus Torvalds 711946089aSChristoph Lameter mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, 72a91a5ac6STejun Heo mempool_free_t *free_fn, void *pool_data, 73a91a5ac6STejun Heo gfp_t gfp_mask, int node_id) 741946089aSChristoph Lameter { 751946089aSChristoph Lameter mempool_t *pool; 767b5219dbSJoe Perches pool = kzalloc_node(sizeof(*pool), gfp_mask, node_id); 771da177e4SLinus Torvalds if (!pool) 781da177e4SLinus Torvalds return NULL; 791946089aSChristoph Lameter pool->elements = kmalloc_node(min_nr * sizeof(void *), 80a91a5ac6STejun Heo gfp_mask, node_id); 811da177e4SLinus Torvalds if (!pool->elements) { 821da177e4SLinus Torvalds kfree(pool); 831da177e4SLinus Torvalds return NULL; 841da177e4SLinus Torvalds } 851da177e4SLinus Torvalds spin_lock_init(&pool->lock); 861da177e4SLinus Torvalds pool->min_nr = min_nr; 871da177e4SLinus Torvalds pool->pool_data = pool_data; 881da177e4SLinus Torvalds init_waitqueue_head(&pool->wait); 891da177e4SLinus Torvalds pool->alloc = alloc_fn; 901da177e4SLinus Torvalds pool->free = free_fn; 911da177e4SLinus Torvalds 921da177e4SLinus Torvalds /* 931da177e4SLinus Torvalds * First pre-allocate the guaranteed number of buffers. 941da177e4SLinus Torvalds */ 951da177e4SLinus Torvalds while (pool->curr_nr < pool->min_nr) { 961da177e4SLinus Torvalds void *element; 971da177e4SLinus Torvalds 98a91a5ac6STejun Heo element = pool->alloc(gfp_mask, pool->pool_data); 991da177e4SLinus Torvalds if (unlikely(!element)) { 1000565d317STejun Heo mempool_destroy(pool); 1011da177e4SLinus Torvalds return NULL; 1021da177e4SLinus Torvalds } 1031da177e4SLinus Torvalds add_element(pool, element); 1041da177e4SLinus Torvalds } 1051da177e4SLinus Torvalds return pool; 1061da177e4SLinus Torvalds } 1071946089aSChristoph Lameter EXPORT_SYMBOL(mempool_create_node); 1081da177e4SLinus Torvalds 1091da177e4SLinus Torvalds /** 1101da177e4SLinus Torvalds * mempool_resize - resize an existing memory pool 1111da177e4SLinus Torvalds * @pool: pointer to the memory pool which was allocated via 1121da177e4SLinus Torvalds * mempool_create(). 1131da177e4SLinus Torvalds * @new_min_nr: the new minimum number of elements guaranteed to be 1141da177e4SLinus Torvalds * allocated for this pool. 1151da177e4SLinus Torvalds * @gfp_mask: the usual allocation bitmask. 1161da177e4SLinus Torvalds * 1171da177e4SLinus Torvalds * This function shrinks/grows the pool. In the case of growing, 1181da177e4SLinus Torvalds * it cannot be guaranteed that the pool will be grown to the new 1191da177e4SLinus Torvalds * size immediately, but new mempool_free() calls will refill it. 1201da177e4SLinus Torvalds * 1211da177e4SLinus Torvalds * Note, the caller must guarantee that no mempool_destroy is called 1221da177e4SLinus Torvalds * while this function is running. mempool_alloc() & mempool_free() 1231da177e4SLinus Torvalds * might be called (eg. from IRQ contexts) while this function executes. 1241da177e4SLinus Torvalds */ 125dd0fc66fSAl Viro int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask) 1261da177e4SLinus Torvalds { 1271da177e4SLinus Torvalds void *element; 1281da177e4SLinus Torvalds void **new_elements; 1291da177e4SLinus Torvalds unsigned long flags; 1301da177e4SLinus Torvalds 1311da177e4SLinus Torvalds BUG_ON(new_min_nr <= 0); 1321da177e4SLinus Torvalds 1331da177e4SLinus Torvalds spin_lock_irqsave(&pool->lock, flags); 1341da177e4SLinus Torvalds if (new_min_nr <= pool->min_nr) { 1351da177e4SLinus Torvalds while (new_min_nr < pool->curr_nr) { 1361da177e4SLinus Torvalds element = remove_element(pool); 1371da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 1381da177e4SLinus Torvalds pool->free(element, pool->pool_data); 1391da177e4SLinus Torvalds spin_lock_irqsave(&pool->lock, flags); 1401da177e4SLinus Torvalds } 1411da177e4SLinus Torvalds pool->min_nr = new_min_nr; 1421da177e4SLinus Torvalds goto out_unlock; 1431da177e4SLinus Torvalds } 1441da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 1451da177e4SLinus Torvalds 1461da177e4SLinus Torvalds /* Grow the pool */ 1471da177e4SLinus Torvalds new_elements = kmalloc(new_min_nr * sizeof(*new_elements), gfp_mask); 1481da177e4SLinus Torvalds if (!new_elements) 1491da177e4SLinus Torvalds return -ENOMEM; 1501da177e4SLinus Torvalds 1511da177e4SLinus Torvalds spin_lock_irqsave(&pool->lock, flags); 1521da177e4SLinus Torvalds if (unlikely(new_min_nr <= pool->min_nr)) { 1531da177e4SLinus Torvalds /* Raced, other resize will do our work */ 1541da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 1551da177e4SLinus Torvalds kfree(new_elements); 1561da177e4SLinus Torvalds goto out; 1571da177e4SLinus Torvalds } 1581da177e4SLinus Torvalds memcpy(new_elements, pool->elements, 1591da177e4SLinus Torvalds pool->curr_nr * sizeof(*new_elements)); 1601da177e4SLinus Torvalds kfree(pool->elements); 1611da177e4SLinus Torvalds pool->elements = new_elements; 1621da177e4SLinus Torvalds pool->min_nr = new_min_nr; 1631da177e4SLinus Torvalds 1641da177e4SLinus Torvalds while (pool->curr_nr < pool->min_nr) { 1651da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 1661da177e4SLinus Torvalds element = pool->alloc(gfp_mask, pool->pool_data); 1671da177e4SLinus Torvalds if (!element) 1681da177e4SLinus Torvalds goto out; 1691da177e4SLinus Torvalds spin_lock_irqsave(&pool->lock, flags); 1701da177e4SLinus Torvalds if (pool->curr_nr < pool->min_nr) { 1711da177e4SLinus Torvalds add_element(pool, element); 1721da177e4SLinus Torvalds } else { 1731da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 1741da177e4SLinus Torvalds pool->free(element, pool->pool_data); /* Raced */ 1751da177e4SLinus Torvalds goto out; 1761da177e4SLinus Torvalds } 1771da177e4SLinus Torvalds } 1781da177e4SLinus Torvalds out_unlock: 1791da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 1801da177e4SLinus Torvalds out: 1811da177e4SLinus Torvalds return 0; 1821da177e4SLinus Torvalds } 1831da177e4SLinus Torvalds EXPORT_SYMBOL(mempool_resize); 1841da177e4SLinus Torvalds 1851da177e4SLinus Torvalds /** 1861da177e4SLinus Torvalds * mempool_alloc - allocate an element from a specific memory pool 1871da177e4SLinus Torvalds * @pool: pointer to the memory pool which was allocated via 1881da177e4SLinus Torvalds * mempool_create(). 1891da177e4SLinus Torvalds * @gfp_mask: the usual allocation bitmask. 1901da177e4SLinus Torvalds * 19172fd4a35SRobert P. J. Day * this function only sleeps if the alloc_fn() function sleeps or 1921da177e4SLinus Torvalds * returns NULL. Note that due to preallocation, this function 1931da177e4SLinus Torvalds * *never* fails when called from process contexts. (it might 1941da177e4SLinus Torvalds * fail if called from an IRQ context.) 1951da177e4SLinus Torvalds */ 196dd0fc66fSAl Viro void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask) 1971da177e4SLinus Torvalds { 1981da177e4SLinus Torvalds void *element; 1991da177e4SLinus Torvalds unsigned long flags; 20001890a4cSBenjamin LaHaise wait_queue_t wait; 2016daa0e28SAl Viro gfp_t gfp_temp; 20220a77776SNick Piggin 20320a77776SNick Piggin might_sleep_if(gfp_mask & __GFP_WAIT); 204b84a35beSNick Piggin 205b84a35beSNick Piggin gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */ 206b84a35beSNick Piggin gfp_mask |= __GFP_NORETRY; /* don't loop in __alloc_pages */ 207b84a35beSNick Piggin gfp_mask |= __GFP_NOWARN; /* failures are OK */ 2081da177e4SLinus Torvalds 20920a77776SNick Piggin gfp_temp = gfp_mask & ~(__GFP_WAIT|__GFP_IO); 21020a77776SNick Piggin 2111da177e4SLinus Torvalds repeat_alloc: 21220a77776SNick Piggin 21320a77776SNick Piggin element = pool->alloc(gfp_temp, pool->pool_data); 2141da177e4SLinus Torvalds if (likely(element != NULL)) 2151da177e4SLinus Torvalds return element; 2161da177e4SLinus Torvalds 2171da177e4SLinus Torvalds spin_lock_irqsave(&pool->lock, flags); 2181da177e4SLinus Torvalds if (likely(pool->curr_nr)) { 2191da177e4SLinus Torvalds element = remove_element(pool); 2201da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 2215b990546STejun Heo /* paired with rmb in mempool_free(), read comment there */ 2225b990546STejun Heo smp_wmb(); 2231da177e4SLinus Torvalds return element; 2241da177e4SLinus Torvalds } 2251da177e4SLinus Torvalds 2261ebb7044STejun Heo /* 2271ebb7044STejun Heo * We use gfp mask w/o __GFP_WAIT or IO for the first round. If 2281ebb7044STejun Heo * alloc failed with that and @pool was empty, retry immediately. 2291ebb7044STejun Heo */ 2301ebb7044STejun Heo if (gfp_temp != gfp_mask) { 2311ebb7044STejun Heo spin_unlock_irqrestore(&pool->lock, flags); 2321ebb7044STejun Heo gfp_temp = gfp_mask; 2331ebb7044STejun Heo goto repeat_alloc; 2341ebb7044STejun Heo } 2351ebb7044STejun Heo 2361ebb7044STejun Heo /* We must not sleep if !__GFP_WAIT */ 2375b990546STejun Heo if (!(gfp_mask & __GFP_WAIT)) { 2385b990546STejun Heo spin_unlock_irqrestore(&pool->lock, flags); 2391da177e4SLinus Torvalds return NULL; 2405b990546STejun Heo } 2411da177e4SLinus Torvalds 2425b990546STejun Heo /* Let's wait for someone else to return an element to @pool */ 24301890a4cSBenjamin LaHaise init_wait(&wait); 2441da177e4SLinus Torvalds prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); 2455b990546STejun Heo 2465b990546STejun Heo spin_unlock_irqrestore(&pool->lock, flags); 2475b990546STejun Heo 2480b1d647aSPavel Mironchik /* 2495b990546STejun Heo * FIXME: this should be io_schedule(). The timeout is there as a 2505b990546STejun Heo * workaround for some DM problems in 2.6.18. 2510b1d647aSPavel Mironchik */ 2520b1d647aSPavel Mironchik io_schedule_timeout(5*HZ); 2531da177e4SLinus Torvalds 2545b990546STejun Heo finish_wait(&pool->wait, &wait); 2551da177e4SLinus Torvalds goto repeat_alloc; 2561da177e4SLinus Torvalds } 2571da177e4SLinus Torvalds EXPORT_SYMBOL(mempool_alloc); 2581da177e4SLinus Torvalds 2591da177e4SLinus Torvalds /** 2601da177e4SLinus Torvalds * mempool_free - return an element to the pool. 2611da177e4SLinus Torvalds * @element: pool element pointer. 2621da177e4SLinus Torvalds * @pool: pointer to the memory pool which was allocated via 2631da177e4SLinus Torvalds * mempool_create(). 2641da177e4SLinus Torvalds * 2651da177e4SLinus Torvalds * this function only sleeps if the free_fn() function sleeps. 2661da177e4SLinus Torvalds */ 2671da177e4SLinus Torvalds void mempool_free(void *element, mempool_t *pool) 2681da177e4SLinus Torvalds { 2691da177e4SLinus Torvalds unsigned long flags; 2701da177e4SLinus Torvalds 271c80e7a82SRusty Russell if (unlikely(element == NULL)) 272c80e7a82SRusty Russell return; 273c80e7a82SRusty Russell 2745b990546STejun Heo /* 2755b990546STejun Heo * Paired with the wmb in mempool_alloc(). The preceding read is 2765b990546STejun Heo * for @element and the following @pool->curr_nr. This ensures 2775b990546STejun Heo * that the visible value of @pool->curr_nr is from after the 2785b990546STejun Heo * allocation of @element. This is necessary for fringe cases 2795b990546STejun Heo * where @element was passed to this task without going through 2805b990546STejun Heo * barriers. 2815b990546STejun Heo * 2825b990546STejun Heo * For example, assume @p is %NULL at the beginning and one task 2835b990546STejun Heo * performs "p = mempool_alloc(...);" while another task is doing 2845b990546STejun Heo * "while (!p) cpu_relax(); mempool_free(p, ...);". This function 2855b990546STejun Heo * may end up using curr_nr value which is from before allocation 2865b990546STejun Heo * of @p without the following rmb. 2875b990546STejun Heo */ 2885b990546STejun Heo smp_rmb(); 2895b990546STejun Heo 2905b990546STejun Heo /* 2915b990546STejun Heo * For correctness, we need a test which is guaranteed to trigger 2925b990546STejun Heo * if curr_nr + #allocated == min_nr. Testing curr_nr < min_nr 2935b990546STejun Heo * without locking achieves that and refilling as soon as possible 2945b990546STejun Heo * is desirable. 2955b990546STejun Heo * 2965b990546STejun Heo * Because curr_nr visible here is always a value after the 2975b990546STejun Heo * allocation of @element, any task which decremented curr_nr below 2985b990546STejun Heo * min_nr is guaranteed to see curr_nr < min_nr unless curr_nr gets 2995b990546STejun Heo * incremented to min_nr afterwards. If curr_nr gets incremented 3005b990546STejun Heo * to min_nr after the allocation of @element, the elements 3015b990546STejun Heo * allocated after that are subject to the same guarantee. 3025b990546STejun Heo * 3035b990546STejun Heo * Waiters happen iff curr_nr is 0 and the above guarantee also 3045b990546STejun Heo * ensures that there will be frees which return elements to the 3055b990546STejun Heo * pool waking up the waiters. 3065b990546STejun Heo */ 307*eb9a3c62SMikulas Patocka if (unlikely(pool->curr_nr < pool->min_nr)) { 3081da177e4SLinus Torvalds spin_lock_irqsave(&pool->lock, flags); 309*eb9a3c62SMikulas Patocka if (likely(pool->curr_nr < pool->min_nr)) { 3101da177e4SLinus Torvalds add_element(pool, element); 3111da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 3121da177e4SLinus Torvalds wake_up(&pool->wait); 3131da177e4SLinus Torvalds return; 3141da177e4SLinus Torvalds } 3151da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 3161da177e4SLinus Torvalds } 3171da177e4SLinus Torvalds pool->free(element, pool->pool_data); 3181da177e4SLinus Torvalds } 3191da177e4SLinus Torvalds EXPORT_SYMBOL(mempool_free); 3201da177e4SLinus Torvalds 3211da177e4SLinus Torvalds /* 3221da177e4SLinus Torvalds * A commonly used alloc and free fn. 3231da177e4SLinus Torvalds */ 324dd0fc66fSAl Viro void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data) 3251da177e4SLinus Torvalds { 326fcc234f8SPekka Enberg struct kmem_cache *mem = pool_data; 3271da177e4SLinus Torvalds return kmem_cache_alloc(mem, gfp_mask); 3281da177e4SLinus Torvalds } 3291da177e4SLinus Torvalds EXPORT_SYMBOL(mempool_alloc_slab); 3301da177e4SLinus Torvalds 3311da177e4SLinus Torvalds void mempool_free_slab(void *element, void *pool_data) 3321da177e4SLinus Torvalds { 333fcc234f8SPekka Enberg struct kmem_cache *mem = pool_data; 3341da177e4SLinus Torvalds kmem_cache_free(mem, element); 3351da177e4SLinus Torvalds } 3361da177e4SLinus Torvalds EXPORT_SYMBOL(mempool_free_slab); 3376e0678f3SMatthew Dobson 3386e0678f3SMatthew Dobson /* 33953184082SMatthew Dobson * A commonly used alloc and free fn that kmalloc/kfrees the amount of memory 340183ff22bSSimon Arlott * specified by pool_data 34153184082SMatthew Dobson */ 34253184082SMatthew Dobson void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data) 34353184082SMatthew Dobson { 3445e2f89b5SFigo.zhang size_t size = (size_t)pool_data; 34553184082SMatthew Dobson return kmalloc(size, gfp_mask); 34653184082SMatthew Dobson } 34753184082SMatthew Dobson EXPORT_SYMBOL(mempool_kmalloc); 34853184082SMatthew Dobson 34953184082SMatthew Dobson void mempool_kfree(void *element, void *pool_data) 35053184082SMatthew Dobson { 35153184082SMatthew Dobson kfree(element); 35253184082SMatthew Dobson } 35353184082SMatthew Dobson EXPORT_SYMBOL(mempool_kfree); 35453184082SMatthew Dobson 35553184082SMatthew Dobson /* 3566e0678f3SMatthew Dobson * A simple mempool-backed page allocator that allocates pages 3576e0678f3SMatthew Dobson * of the order specified by pool_data. 3586e0678f3SMatthew Dobson */ 3596e0678f3SMatthew Dobson void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data) 3606e0678f3SMatthew Dobson { 3616e0678f3SMatthew Dobson int order = (int)(long)pool_data; 3626e0678f3SMatthew Dobson return alloc_pages(gfp_mask, order); 3636e0678f3SMatthew Dobson } 3646e0678f3SMatthew Dobson EXPORT_SYMBOL(mempool_alloc_pages); 3656e0678f3SMatthew Dobson 3666e0678f3SMatthew Dobson void mempool_free_pages(void *element, void *pool_data) 3676e0678f3SMatthew Dobson { 3686e0678f3SMatthew Dobson int order = (int)(long)pool_data; 3696e0678f3SMatthew Dobson __free_pages(element, order); 3706e0678f3SMatthew Dobson } 3716e0678f3SMatthew Dobson EXPORT_SYMBOL(mempool_free_pages); 372