11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * linux/mm/mempool.c 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * memory buffer pool support. Such pools are mostly used 51da177e4SLinus Torvalds * for guaranteed, deadlock-free memory allocations during 61da177e4SLinus Torvalds * extreme VM load. 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * started by Ingo Molnar, Copyright (C) 2001 91da177e4SLinus Torvalds */ 101da177e4SLinus Torvalds 111da177e4SLinus Torvalds #include <linux/mm.h> 121da177e4SLinus Torvalds #include <linux/slab.h> 13b95f1b31SPaul Gortmaker #include <linux/export.h> 141da177e4SLinus Torvalds #include <linux/mempool.h> 151da177e4SLinus Torvalds #include <linux/blkdev.h> 161da177e4SLinus Torvalds #include <linux/writeback.h> 171da177e4SLinus Torvalds 181da177e4SLinus Torvalds static void add_element(mempool_t *pool, void *element) 191da177e4SLinus Torvalds { 201da177e4SLinus Torvalds BUG_ON(pool->curr_nr >= pool->min_nr); 211da177e4SLinus Torvalds pool->elements[pool->curr_nr++] = element; 221da177e4SLinus Torvalds } 231da177e4SLinus Torvalds 241da177e4SLinus Torvalds static void *remove_element(mempool_t *pool) 251da177e4SLinus Torvalds { 261da177e4SLinus Torvalds BUG_ON(pool->curr_nr <= 0); 271da177e4SLinus Torvalds return pool->elements[--pool->curr_nr]; 281da177e4SLinus Torvalds } 291da177e4SLinus Torvalds 301da177e4SLinus Torvalds static void free_pool(mempool_t *pool) 311da177e4SLinus Torvalds { 321da177e4SLinus Torvalds while (pool->curr_nr) { 331da177e4SLinus Torvalds void *element = remove_element(pool); 341da177e4SLinus Torvalds pool->free(element, pool->pool_data); 351da177e4SLinus Torvalds } 361da177e4SLinus Torvalds kfree(pool->elements); 371da177e4SLinus Torvalds kfree(pool); 381da177e4SLinus Torvalds } 391da177e4SLinus Torvalds 401da177e4SLinus Torvalds /** 411da177e4SLinus Torvalds * mempool_create - create a memory pool 421da177e4SLinus Torvalds * @min_nr: the minimum number of elements guaranteed to be 431da177e4SLinus Torvalds * allocated for this pool. 441da177e4SLinus Torvalds * @alloc_fn: user-defined element-allocation function. 451da177e4SLinus Torvalds * @free_fn: user-defined element-freeing function. 461da177e4SLinus Torvalds * @pool_data: optional private data available to the user-defined functions. 471da177e4SLinus Torvalds * 481da177e4SLinus Torvalds * this function creates and allocates a guaranteed size, preallocated 4972fd4a35SRobert P. J. Day * memory pool. The pool can be used from the mempool_alloc() and mempool_free() 501da177e4SLinus Torvalds * functions. This function might sleep. Both the alloc_fn() and the free_fn() 5172fd4a35SRobert P. J. Day * functions might sleep - as long as the mempool_alloc() function is not called 521da177e4SLinus Torvalds * from IRQ contexts. 531da177e4SLinus Torvalds */ 541da177e4SLinus Torvalds mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, 551da177e4SLinus Torvalds mempool_free_t *free_fn, void *pool_data) 561da177e4SLinus Torvalds { 571946089aSChristoph Lameter return mempool_create_node(min_nr,alloc_fn,free_fn, pool_data,-1); 581946089aSChristoph Lameter } 591946089aSChristoph Lameter EXPORT_SYMBOL(mempool_create); 601da177e4SLinus Torvalds 611946089aSChristoph Lameter mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, 621946089aSChristoph Lameter mempool_free_t *free_fn, void *pool_data, int node_id) 631946089aSChristoph Lameter { 641946089aSChristoph Lameter mempool_t *pool; 6594f6030cSChristoph Lameter pool = kmalloc_node(sizeof(*pool), GFP_KERNEL | __GFP_ZERO, node_id); 661da177e4SLinus Torvalds if (!pool) 671da177e4SLinus Torvalds return NULL; 681946089aSChristoph Lameter pool->elements = kmalloc_node(min_nr * sizeof(void *), 691946089aSChristoph Lameter GFP_KERNEL, node_id); 701da177e4SLinus Torvalds if (!pool->elements) { 711da177e4SLinus Torvalds kfree(pool); 721da177e4SLinus Torvalds return NULL; 731da177e4SLinus Torvalds } 741da177e4SLinus Torvalds spin_lock_init(&pool->lock); 751da177e4SLinus Torvalds pool->min_nr = min_nr; 761da177e4SLinus Torvalds pool->pool_data = pool_data; 771da177e4SLinus Torvalds init_waitqueue_head(&pool->wait); 781da177e4SLinus Torvalds pool->alloc = alloc_fn; 791da177e4SLinus Torvalds pool->free = free_fn; 801da177e4SLinus Torvalds 811da177e4SLinus Torvalds /* 821da177e4SLinus Torvalds * First pre-allocate the guaranteed number of buffers. 831da177e4SLinus Torvalds */ 841da177e4SLinus Torvalds while (pool->curr_nr < pool->min_nr) { 851da177e4SLinus Torvalds void *element; 861da177e4SLinus Torvalds 871da177e4SLinus Torvalds element = pool->alloc(GFP_KERNEL, pool->pool_data); 881da177e4SLinus Torvalds if (unlikely(!element)) { 891da177e4SLinus Torvalds free_pool(pool); 901da177e4SLinus Torvalds return NULL; 911da177e4SLinus Torvalds } 921da177e4SLinus Torvalds add_element(pool, element); 931da177e4SLinus Torvalds } 941da177e4SLinus Torvalds return pool; 951da177e4SLinus Torvalds } 961946089aSChristoph Lameter EXPORT_SYMBOL(mempool_create_node); 971da177e4SLinus Torvalds 981da177e4SLinus Torvalds /** 991da177e4SLinus Torvalds * mempool_resize - resize an existing memory pool 1001da177e4SLinus Torvalds * @pool: pointer to the memory pool which was allocated via 1011da177e4SLinus Torvalds * mempool_create(). 1021da177e4SLinus Torvalds * @new_min_nr: the new minimum number of elements guaranteed to be 1031da177e4SLinus Torvalds * allocated for this pool. 1041da177e4SLinus Torvalds * @gfp_mask: the usual allocation bitmask. 1051da177e4SLinus Torvalds * 1061da177e4SLinus Torvalds * This function shrinks/grows the pool. In the case of growing, 1071da177e4SLinus Torvalds * it cannot be guaranteed that the pool will be grown to the new 1081da177e4SLinus Torvalds * size immediately, but new mempool_free() calls will refill it. 1091da177e4SLinus Torvalds * 1101da177e4SLinus Torvalds * Note, the caller must guarantee that no mempool_destroy is called 1111da177e4SLinus Torvalds * while this function is running. mempool_alloc() & mempool_free() 1121da177e4SLinus Torvalds * might be called (eg. from IRQ contexts) while this function executes. 1131da177e4SLinus Torvalds */ 114dd0fc66fSAl Viro int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask) 1151da177e4SLinus Torvalds { 1161da177e4SLinus Torvalds void *element; 1171da177e4SLinus Torvalds void **new_elements; 1181da177e4SLinus Torvalds unsigned long flags; 1191da177e4SLinus Torvalds 1201da177e4SLinus Torvalds BUG_ON(new_min_nr <= 0); 1211da177e4SLinus Torvalds 1221da177e4SLinus Torvalds spin_lock_irqsave(&pool->lock, flags); 1231da177e4SLinus Torvalds if (new_min_nr <= pool->min_nr) { 1241da177e4SLinus Torvalds while (new_min_nr < pool->curr_nr) { 1251da177e4SLinus Torvalds element = remove_element(pool); 1261da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 1271da177e4SLinus Torvalds pool->free(element, pool->pool_data); 1281da177e4SLinus Torvalds spin_lock_irqsave(&pool->lock, flags); 1291da177e4SLinus Torvalds } 1301da177e4SLinus Torvalds pool->min_nr = new_min_nr; 1311da177e4SLinus Torvalds goto out_unlock; 1321da177e4SLinus Torvalds } 1331da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 1341da177e4SLinus Torvalds 1351da177e4SLinus Torvalds /* Grow the pool */ 1361da177e4SLinus Torvalds new_elements = kmalloc(new_min_nr * sizeof(*new_elements), gfp_mask); 1371da177e4SLinus Torvalds if (!new_elements) 1381da177e4SLinus Torvalds return -ENOMEM; 1391da177e4SLinus Torvalds 1401da177e4SLinus Torvalds spin_lock_irqsave(&pool->lock, flags); 1411da177e4SLinus Torvalds if (unlikely(new_min_nr <= pool->min_nr)) { 1421da177e4SLinus Torvalds /* Raced, other resize will do our work */ 1431da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 1441da177e4SLinus Torvalds kfree(new_elements); 1451da177e4SLinus Torvalds goto out; 1461da177e4SLinus Torvalds } 1471da177e4SLinus Torvalds memcpy(new_elements, pool->elements, 1481da177e4SLinus Torvalds pool->curr_nr * sizeof(*new_elements)); 1491da177e4SLinus Torvalds kfree(pool->elements); 1501da177e4SLinus Torvalds pool->elements = new_elements; 1511da177e4SLinus Torvalds pool->min_nr = new_min_nr; 1521da177e4SLinus Torvalds 1531da177e4SLinus Torvalds while (pool->curr_nr < pool->min_nr) { 1541da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 1551da177e4SLinus Torvalds element = pool->alloc(gfp_mask, pool->pool_data); 1561da177e4SLinus Torvalds if (!element) 1571da177e4SLinus Torvalds goto out; 1581da177e4SLinus Torvalds spin_lock_irqsave(&pool->lock, flags); 1591da177e4SLinus Torvalds if (pool->curr_nr < pool->min_nr) { 1601da177e4SLinus Torvalds add_element(pool, element); 1611da177e4SLinus Torvalds } else { 1621da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 1631da177e4SLinus Torvalds pool->free(element, pool->pool_data); /* Raced */ 1641da177e4SLinus Torvalds goto out; 1651da177e4SLinus Torvalds } 1661da177e4SLinus Torvalds } 1671da177e4SLinus Torvalds out_unlock: 1681da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 1691da177e4SLinus Torvalds out: 1701da177e4SLinus Torvalds return 0; 1711da177e4SLinus Torvalds } 1721da177e4SLinus Torvalds EXPORT_SYMBOL(mempool_resize); 1731da177e4SLinus Torvalds 1741da177e4SLinus Torvalds /** 1751da177e4SLinus Torvalds * mempool_destroy - deallocate a memory pool 1761da177e4SLinus Torvalds * @pool: pointer to the memory pool which was allocated via 1771da177e4SLinus Torvalds * mempool_create(). 1781da177e4SLinus Torvalds * 1791da177e4SLinus Torvalds * this function only sleeps if the free_fn() function sleeps. The caller 1801da177e4SLinus Torvalds * has to guarantee that all elements have been returned to the pool (ie: 1811da177e4SLinus Torvalds * freed) prior to calling mempool_destroy(). 1821da177e4SLinus Torvalds */ 1831da177e4SLinus Torvalds void mempool_destroy(mempool_t *pool) 1841da177e4SLinus Torvalds { 185f02e1fafSEric Sesterhenn /* Check for outstanding elements */ 186f02e1fafSEric Sesterhenn BUG_ON(pool->curr_nr != pool->min_nr); 1871da177e4SLinus Torvalds free_pool(pool); 1881da177e4SLinus Torvalds } 1891da177e4SLinus Torvalds EXPORT_SYMBOL(mempool_destroy); 1901da177e4SLinus Torvalds 1911da177e4SLinus Torvalds /** 1921da177e4SLinus Torvalds * mempool_alloc - allocate an element from a specific memory pool 1931da177e4SLinus Torvalds * @pool: pointer to the memory pool which was allocated via 1941da177e4SLinus Torvalds * mempool_create(). 1951da177e4SLinus Torvalds * @gfp_mask: the usual allocation bitmask. 1961da177e4SLinus Torvalds * 19772fd4a35SRobert P. J. Day * this function only sleeps if the alloc_fn() function sleeps or 1981da177e4SLinus Torvalds * returns NULL. Note that due to preallocation, this function 1991da177e4SLinus Torvalds * *never* fails when called from process contexts. (it might 2001da177e4SLinus Torvalds * fail if called from an IRQ context.) 2011da177e4SLinus Torvalds */ 202dd0fc66fSAl Viro void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask) 2031da177e4SLinus Torvalds { 2041da177e4SLinus Torvalds void *element; 2051da177e4SLinus Torvalds unsigned long flags; 20601890a4cSBenjamin LaHaise wait_queue_t wait; 2076daa0e28SAl Viro gfp_t gfp_temp; 20820a77776SNick Piggin 20920a77776SNick Piggin might_sleep_if(gfp_mask & __GFP_WAIT); 210b84a35beSNick Piggin 211b84a35beSNick Piggin gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */ 212b84a35beSNick Piggin gfp_mask |= __GFP_NORETRY; /* don't loop in __alloc_pages */ 213b84a35beSNick Piggin gfp_mask |= __GFP_NOWARN; /* failures are OK */ 2141da177e4SLinus Torvalds 21520a77776SNick Piggin gfp_temp = gfp_mask & ~(__GFP_WAIT|__GFP_IO); 21620a77776SNick Piggin 2171da177e4SLinus Torvalds repeat_alloc: 21820a77776SNick Piggin 21920a77776SNick Piggin element = pool->alloc(gfp_temp, pool->pool_data); 2201da177e4SLinus Torvalds if (likely(element != NULL)) 2211da177e4SLinus Torvalds return element; 2221da177e4SLinus Torvalds 2231da177e4SLinus Torvalds spin_lock_irqsave(&pool->lock, flags); 2241da177e4SLinus Torvalds if (likely(pool->curr_nr)) { 2251da177e4SLinus Torvalds element = remove_element(pool); 2261da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 227*5b990546STejun Heo /* paired with rmb in mempool_free(), read comment there */ 228*5b990546STejun Heo smp_wmb(); 2291da177e4SLinus Torvalds return element; 2301da177e4SLinus Torvalds } 2311da177e4SLinus Torvalds 2321da177e4SLinus Torvalds /* We must not sleep in the GFP_ATOMIC case */ 233*5b990546STejun Heo if (!(gfp_mask & __GFP_WAIT)) { 234*5b990546STejun Heo spin_unlock_irqrestore(&pool->lock, flags); 2351da177e4SLinus Torvalds return NULL; 236*5b990546STejun Heo } 2371da177e4SLinus Torvalds 238*5b990546STejun Heo /* Let's wait for someone else to return an element to @pool */ 23920a77776SNick Piggin gfp_temp = gfp_mask; 24001890a4cSBenjamin LaHaise init_wait(&wait); 2411da177e4SLinus Torvalds prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); 242*5b990546STejun Heo 243*5b990546STejun Heo spin_unlock_irqrestore(&pool->lock, flags); 244*5b990546STejun Heo 2450b1d647aSPavel Mironchik /* 246*5b990546STejun Heo * FIXME: this should be io_schedule(). The timeout is there as a 247*5b990546STejun Heo * workaround for some DM problems in 2.6.18. 2480b1d647aSPavel Mironchik */ 2490b1d647aSPavel Mironchik io_schedule_timeout(5*HZ); 2501da177e4SLinus Torvalds 251*5b990546STejun Heo finish_wait(&pool->wait, &wait); 2521da177e4SLinus Torvalds goto repeat_alloc; 2531da177e4SLinus Torvalds } 2541da177e4SLinus Torvalds EXPORT_SYMBOL(mempool_alloc); 2551da177e4SLinus Torvalds 2561da177e4SLinus Torvalds /** 2571da177e4SLinus Torvalds * mempool_free - return an element to the pool. 2581da177e4SLinus Torvalds * @element: pool element pointer. 2591da177e4SLinus Torvalds * @pool: pointer to the memory pool which was allocated via 2601da177e4SLinus Torvalds * mempool_create(). 2611da177e4SLinus Torvalds * 2621da177e4SLinus Torvalds * this function only sleeps if the free_fn() function sleeps. 2631da177e4SLinus Torvalds */ 2641da177e4SLinus Torvalds void mempool_free(void *element, mempool_t *pool) 2651da177e4SLinus Torvalds { 2661da177e4SLinus Torvalds unsigned long flags; 2671da177e4SLinus Torvalds 268c80e7a82SRusty Russell if (unlikely(element == NULL)) 269c80e7a82SRusty Russell return; 270c80e7a82SRusty Russell 271*5b990546STejun Heo /* 272*5b990546STejun Heo * Paired with the wmb in mempool_alloc(). The preceding read is 273*5b990546STejun Heo * for @element and the following @pool->curr_nr. This ensures 274*5b990546STejun Heo * that the visible value of @pool->curr_nr is from after the 275*5b990546STejun Heo * allocation of @element. This is necessary for fringe cases 276*5b990546STejun Heo * where @element was passed to this task without going through 277*5b990546STejun Heo * barriers. 278*5b990546STejun Heo * 279*5b990546STejun Heo * For example, assume @p is %NULL at the beginning and one task 280*5b990546STejun Heo * performs "p = mempool_alloc(...);" while another task is doing 281*5b990546STejun Heo * "while (!p) cpu_relax(); mempool_free(p, ...);". This function 282*5b990546STejun Heo * may end up using curr_nr value which is from before allocation 283*5b990546STejun Heo * of @p without the following rmb. 284*5b990546STejun Heo */ 285*5b990546STejun Heo smp_rmb(); 286*5b990546STejun Heo 287*5b990546STejun Heo /* 288*5b990546STejun Heo * For correctness, we need a test which is guaranteed to trigger 289*5b990546STejun Heo * if curr_nr + #allocated == min_nr. Testing curr_nr < min_nr 290*5b990546STejun Heo * without locking achieves that and refilling as soon as possible 291*5b990546STejun Heo * is desirable. 292*5b990546STejun Heo * 293*5b990546STejun Heo * Because curr_nr visible here is always a value after the 294*5b990546STejun Heo * allocation of @element, any task which decremented curr_nr below 295*5b990546STejun Heo * min_nr is guaranteed to see curr_nr < min_nr unless curr_nr gets 296*5b990546STejun Heo * incremented to min_nr afterwards. If curr_nr gets incremented 297*5b990546STejun Heo * to min_nr after the allocation of @element, the elements 298*5b990546STejun Heo * allocated after that are subject to the same guarantee. 299*5b990546STejun Heo * 300*5b990546STejun Heo * Waiters happen iff curr_nr is 0 and the above guarantee also 301*5b990546STejun Heo * ensures that there will be frees which return elements to the 302*5b990546STejun Heo * pool waking up the waiters. 303*5b990546STejun Heo */ 3041da177e4SLinus Torvalds if (pool->curr_nr < pool->min_nr) { 3051da177e4SLinus Torvalds spin_lock_irqsave(&pool->lock, flags); 3061da177e4SLinus Torvalds if (pool->curr_nr < pool->min_nr) { 3071da177e4SLinus Torvalds add_element(pool, element); 3081da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 3091da177e4SLinus Torvalds wake_up(&pool->wait); 3101da177e4SLinus Torvalds return; 3111da177e4SLinus Torvalds } 3121da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 3131da177e4SLinus Torvalds } 3141da177e4SLinus Torvalds pool->free(element, pool->pool_data); 3151da177e4SLinus Torvalds } 3161da177e4SLinus Torvalds EXPORT_SYMBOL(mempool_free); 3171da177e4SLinus Torvalds 3181da177e4SLinus Torvalds /* 3191da177e4SLinus Torvalds * A commonly used alloc and free fn. 3201da177e4SLinus Torvalds */ 321dd0fc66fSAl Viro void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data) 3221da177e4SLinus Torvalds { 323fcc234f8SPekka Enberg struct kmem_cache *mem = pool_data; 3241da177e4SLinus Torvalds return kmem_cache_alloc(mem, gfp_mask); 3251da177e4SLinus Torvalds } 3261da177e4SLinus Torvalds EXPORT_SYMBOL(mempool_alloc_slab); 3271da177e4SLinus Torvalds 3281da177e4SLinus Torvalds void mempool_free_slab(void *element, void *pool_data) 3291da177e4SLinus Torvalds { 330fcc234f8SPekka Enberg struct kmem_cache *mem = pool_data; 3311da177e4SLinus Torvalds kmem_cache_free(mem, element); 3321da177e4SLinus Torvalds } 3331da177e4SLinus Torvalds EXPORT_SYMBOL(mempool_free_slab); 3346e0678f3SMatthew Dobson 3356e0678f3SMatthew Dobson /* 33653184082SMatthew Dobson * A commonly used alloc and free fn that kmalloc/kfrees the amount of memory 337183ff22bSSimon Arlott * specified by pool_data 33853184082SMatthew Dobson */ 33953184082SMatthew Dobson void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data) 34053184082SMatthew Dobson { 3415e2f89b5SFigo.zhang size_t size = (size_t)pool_data; 34253184082SMatthew Dobson return kmalloc(size, gfp_mask); 34353184082SMatthew Dobson } 34453184082SMatthew Dobson EXPORT_SYMBOL(mempool_kmalloc); 34553184082SMatthew Dobson 34653184082SMatthew Dobson void mempool_kfree(void *element, void *pool_data) 34753184082SMatthew Dobson { 34853184082SMatthew Dobson kfree(element); 34953184082SMatthew Dobson } 35053184082SMatthew Dobson EXPORT_SYMBOL(mempool_kfree); 35153184082SMatthew Dobson 35253184082SMatthew Dobson /* 3536e0678f3SMatthew Dobson * A simple mempool-backed page allocator that allocates pages 3546e0678f3SMatthew Dobson * of the order specified by pool_data. 3556e0678f3SMatthew Dobson */ 3566e0678f3SMatthew Dobson void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data) 3576e0678f3SMatthew Dobson { 3586e0678f3SMatthew Dobson int order = (int)(long)pool_data; 3596e0678f3SMatthew Dobson return alloc_pages(gfp_mask, order); 3606e0678f3SMatthew Dobson } 3616e0678f3SMatthew Dobson EXPORT_SYMBOL(mempool_alloc_pages); 3626e0678f3SMatthew Dobson 3636e0678f3SMatthew Dobson void mempool_free_pages(void *element, void *pool_data) 3646e0678f3SMatthew Dobson { 3656e0678f3SMatthew Dobson int order = (int)(long)pool_data; 3666e0678f3SMatthew Dobson __free_pages(element, order); 3676e0678f3SMatthew Dobson } 3686e0678f3SMatthew Dobson EXPORT_SYMBOL(mempool_free_pages); 369