11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * linux/mm/mempool.c 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * memory buffer pool support. Such pools are mostly used 51da177e4SLinus Torvalds * for guaranteed, deadlock-free memory allocations during 61da177e4SLinus Torvalds * extreme VM load. 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * started by Ingo Molnar, Copyright (C) 2001 91da177e4SLinus Torvalds */ 101da177e4SLinus Torvalds 111da177e4SLinus Torvalds #include <linux/mm.h> 121da177e4SLinus Torvalds #include <linux/slab.h> 13*b95f1b31SPaul Gortmaker #include <linux/export.h> 141da177e4SLinus Torvalds #include <linux/mempool.h> 151da177e4SLinus Torvalds #include <linux/blkdev.h> 161da177e4SLinus Torvalds #include <linux/writeback.h> 171da177e4SLinus Torvalds 181da177e4SLinus Torvalds static void add_element(mempool_t *pool, void *element) 191da177e4SLinus Torvalds { 201da177e4SLinus Torvalds BUG_ON(pool->curr_nr >= pool->min_nr); 211da177e4SLinus Torvalds pool->elements[pool->curr_nr++] = element; 221da177e4SLinus Torvalds } 231da177e4SLinus Torvalds 241da177e4SLinus Torvalds static void *remove_element(mempool_t *pool) 251da177e4SLinus Torvalds { 261da177e4SLinus Torvalds BUG_ON(pool->curr_nr <= 0); 271da177e4SLinus Torvalds return pool->elements[--pool->curr_nr]; 281da177e4SLinus Torvalds } 291da177e4SLinus Torvalds 301da177e4SLinus Torvalds static void free_pool(mempool_t *pool) 311da177e4SLinus Torvalds { 321da177e4SLinus Torvalds while (pool->curr_nr) { 331da177e4SLinus Torvalds void *element = remove_element(pool); 341da177e4SLinus Torvalds pool->free(element, pool->pool_data); 351da177e4SLinus Torvalds } 361da177e4SLinus Torvalds kfree(pool->elements); 371da177e4SLinus Torvalds kfree(pool); 381da177e4SLinus Torvalds } 391da177e4SLinus Torvalds 401da177e4SLinus Torvalds /** 411da177e4SLinus Torvalds * mempool_create - create a memory pool 421da177e4SLinus Torvalds * @min_nr: the minimum number of elements guaranteed to be 431da177e4SLinus Torvalds * allocated for this pool. 441da177e4SLinus Torvalds * @alloc_fn: user-defined element-allocation function. 451da177e4SLinus Torvalds * @free_fn: user-defined element-freeing function. 461da177e4SLinus Torvalds * @pool_data: optional private data available to the user-defined functions. 471da177e4SLinus Torvalds * 481da177e4SLinus Torvalds * this function creates and allocates a guaranteed size, preallocated 4972fd4a35SRobert P. J. Day * memory pool. The pool can be used from the mempool_alloc() and mempool_free() 501da177e4SLinus Torvalds * functions. This function might sleep. Both the alloc_fn() and the free_fn() 5172fd4a35SRobert P. J. Day * functions might sleep - as long as the mempool_alloc() function is not called 521da177e4SLinus Torvalds * from IRQ contexts. 531da177e4SLinus Torvalds */ 541da177e4SLinus Torvalds mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, 551da177e4SLinus Torvalds mempool_free_t *free_fn, void *pool_data) 561da177e4SLinus Torvalds { 571946089aSChristoph Lameter return mempool_create_node(min_nr,alloc_fn,free_fn, pool_data,-1); 581946089aSChristoph Lameter } 591946089aSChristoph Lameter EXPORT_SYMBOL(mempool_create); 601da177e4SLinus Torvalds 611946089aSChristoph Lameter mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, 621946089aSChristoph Lameter mempool_free_t *free_fn, void *pool_data, int node_id) 631946089aSChristoph Lameter { 641946089aSChristoph Lameter mempool_t *pool; 6594f6030cSChristoph Lameter pool = kmalloc_node(sizeof(*pool), GFP_KERNEL | __GFP_ZERO, node_id); 661da177e4SLinus Torvalds if (!pool) 671da177e4SLinus Torvalds return NULL; 681946089aSChristoph Lameter pool->elements = kmalloc_node(min_nr * sizeof(void *), 691946089aSChristoph Lameter GFP_KERNEL, node_id); 701da177e4SLinus Torvalds if (!pool->elements) { 711da177e4SLinus Torvalds kfree(pool); 721da177e4SLinus Torvalds return NULL; 731da177e4SLinus Torvalds } 741da177e4SLinus Torvalds spin_lock_init(&pool->lock); 751da177e4SLinus Torvalds pool->min_nr = min_nr; 761da177e4SLinus Torvalds pool->pool_data = pool_data; 771da177e4SLinus Torvalds init_waitqueue_head(&pool->wait); 781da177e4SLinus Torvalds pool->alloc = alloc_fn; 791da177e4SLinus Torvalds pool->free = free_fn; 801da177e4SLinus Torvalds 811da177e4SLinus Torvalds /* 821da177e4SLinus Torvalds * First pre-allocate the guaranteed number of buffers. 831da177e4SLinus Torvalds */ 841da177e4SLinus Torvalds while (pool->curr_nr < pool->min_nr) { 851da177e4SLinus Torvalds void *element; 861da177e4SLinus Torvalds 871da177e4SLinus Torvalds element = pool->alloc(GFP_KERNEL, pool->pool_data); 881da177e4SLinus Torvalds if (unlikely(!element)) { 891da177e4SLinus Torvalds free_pool(pool); 901da177e4SLinus Torvalds return NULL; 911da177e4SLinus Torvalds } 921da177e4SLinus Torvalds add_element(pool, element); 931da177e4SLinus Torvalds } 941da177e4SLinus Torvalds return pool; 951da177e4SLinus Torvalds } 961946089aSChristoph Lameter EXPORT_SYMBOL(mempool_create_node); 971da177e4SLinus Torvalds 981da177e4SLinus Torvalds /** 991da177e4SLinus Torvalds * mempool_resize - resize an existing memory pool 1001da177e4SLinus Torvalds * @pool: pointer to the memory pool which was allocated via 1011da177e4SLinus Torvalds * mempool_create(). 1021da177e4SLinus Torvalds * @new_min_nr: the new minimum number of elements guaranteed to be 1031da177e4SLinus Torvalds * allocated for this pool. 1041da177e4SLinus Torvalds * @gfp_mask: the usual allocation bitmask. 1051da177e4SLinus Torvalds * 1061da177e4SLinus Torvalds * This function shrinks/grows the pool. In the case of growing, 1071da177e4SLinus Torvalds * it cannot be guaranteed that the pool will be grown to the new 1081da177e4SLinus Torvalds * size immediately, but new mempool_free() calls will refill it. 1091da177e4SLinus Torvalds * 1101da177e4SLinus Torvalds * Note, the caller must guarantee that no mempool_destroy is called 1111da177e4SLinus Torvalds * while this function is running. mempool_alloc() & mempool_free() 1121da177e4SLinus Torvalds * might be called (eg. from IRQ contexts) while this function executes. 1131da177e4SLinus Torvalds */ 114dd0fc66fSAl Viro int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask) 1151da177e4SLinus Torvalds { 1161da177e4SLinus Torvalds void *element; 1171da177e4SLinus Torvalds void **new_elements; 1181da177e4SLinus Torvalds unsigned long flags; 1191da177e4SLinus Torvalds 1201da177e4SLinus Torvalds BUG_ON(new_min_nr <= 0); 1211da177e4SLinus Torvalds 1221da177e4SLinus Torvalds spin_lock_irqsave(&pool->lock, flags); 1231da177e4SLinus Torvalds if (new_min_nr <= pool->min_nr) { 1241da177e4SLinus Torvalds while (new_min_nr < pool->curr_nr) { 1251da177e4SLinus Torvalds element = remove_element(pool); 1261da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 1271da177e4SLinus Torvalds pool->free(element, pool->pool_data); 1281da177e4SLinus Torvalds spin_lock_irqsave(&pool->lock, flags); 1291da177e4SLinus Torvalds } 1301da177e4SLinus Torvalds pool->min_nr = new_min_nr; 1311da177e4SLinus Torvalds goto out_unlock; 1321da177e4SLinus Torvalds } 1331da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 1341da177e4SLinus Torvalds 1351da177e4SLinus Torvalds /* Grow the pool */ 1361da177e4SLinus Torvalds new_elements = kmalloc(new_min_nr * sizeof(*new_elements), gfp_mask); 1371da177e4SLinus Torvalds if (!new_elements) 1381da177e4SLinus Torvalds return -ENOMEM; 1391da177e4SLinus Torvalds 1401da177e4SLinus Torvalds spin_lock_irqsave(&pool->lock, flags); 1411da177e4SLinus Torvalds if (unlikely(new_min_nr <= pool->min_nr)) { 1421da177e4SLinus Torvalds /* Raced, other resize will do our work */ 1431da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 1441da177e4SLinus Torvalds kfree(new_elements); 1451da177e4SLinus Torvalds goto out; 1461da177e4SLinus Torvalds } 1471da177e4SLinus Torvalds memcpy(new_elements, pool->elements, 1481da177e4SLinus Torvalds pool->curr_nr * sizeof(*new_elements)); 1491da177e4SLinus Torvalds kfree(pool->elements); 1501da177e4SLinus Torvalds pool->elements = new_elements; 1511da177e4SLinus Torvalds pool->min_nr = new_min_nr; 1521da177e4SLinus Torvalds 1531da177e4SLinus Torvalds while (pool->curr_nr < pool->min_nr) { 1541da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 1551da177e4SLinus Torvalds element = pool->alloc(gfp_mask, pool->pool_data); 1561da177e4SLinus Torvalds if (!element) 1571da177e4SLinus Torvalds goto out; 1581da177e4SLinus Torvalds spin_lock_irqsave(&pool->lock, flags); 1591da177e4SLinus Torvalds if (pool->curr_nr < pool->min_nr) { 1601da177e4SLinus Torvalds add_element(pool, element); 1611da177e4SLinus Torvalds } else { 1621da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 1631da177e4SLinus Torvalds pool->free(element, pool->pool_data); /* Raced */ 1641da177e4SLinus Torvalds goto out; 1651da177e4SLinus Torvalds } 1661da177e4SLinus Torvalds } 1671da177e4SLinus Torvalds out_unlock: 1681da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 1691da177e4SLinus Torvalds out: 1701da177e4SLinus Torvalds return 0; 1711da177e4SLinus Torvalds } 1721da177e4SLinus Torvalds EXPORT_SYMBOL(mempool_resize); 1731da177e4SLinus Torvalds 1741da177e4SLinus Torvalds /** 1751da177e4SLinus Torvalds * mempool_destroy - deallocate a memory pool 1761da177e4SLinus Torvalds * @pool: pointer to the memory pool which was allocated via 1771da177e4SLinus Torvalds * mempool_create(). 1781da177e4SLinus Torvalds * 1791da177e4SLinus Torvalds * this function only sleeps if the free_fn() function sleeps. The caller 1801da177e4SLinus Torvalds * has to guarantee that all elements have been returned to the pool (ie: 1811da177e4SLinus Torvalds * freed) prior to calling mempool_destroy(). 1821da177e4SLinus Torvalds */ 1831da177e4SLinus Torvalds void mempool_destroy(mempool_t *pool) 1841da177e4SLinus Torvalds { 185f02e1fafSEric Sesterhenn /* Check for outstanding elements */ 186f02e1fafSEric Sesterhenn BUG_ON(pool->curr_nr != pool->min_nr); 1871da177e4SLinus Torvalds free_pool(pool); 1881da177e4SLinus Torvalds } 1891da177e4SLinus Torvalds EXPORT_SYMBOL(mempool_destroy); 1901da177e4SLinus Torvalds 1911da177e4SLinus Torvalds /** 1921da177e4SLinus Torvalds * mempool_alloc - allocate an element from a specific memory pool 1931da177e4SLinus Torvalds * @pool: pointer to the memory pool which was allocated via 1941da177e4SLinus Torvalds * mempool_create(). 1951da177e4SLinus Torvalds * @gfp_mask: the usual allocation bitmask. 1961da177e4SLinus Torvalds * 19772fd4a35SRobert P. J. Day * this function only sleeps if the alloc_fn() function sleeps or 1981da177e4SLinus Torvalds * returns NULL. Note that due to preallocation, this function 1991da177e4SLinus Torvalds * *never* fails when called from process contexts. (it might 2001da177e4SLinus Torvalds * fail if called from an IRQ context.) 2011da177e4SLinus Torvalds */ 202dd0fc66fSAl Viro void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask) 2031da177e4SLinus Torvalds { 2041da177e4SLinus Torvalds void *element; 2051da177e4SLinus Torvalds unsigned long flags; 20601890a4cSBenjamin LaHaise wait_queue_t wait; 2076daa0e28SAl Viro gfp_t gfp_temp; 20820a77776SNick Piggin 20920a77776SNick Piggin might_sleep_if(gfp_mask & __GFP_WAIT); 210b84a35beSNick Piggin 211b84a35beSNick Piggin gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */ 212b84a35beSNick Piggin gfp_mask |= __GFP_NORETRY; /* don't loop in __alloc_pages */ 213b84a35beSNick Piggin gfp_mask |= __GFP_NOWARN; /* failures are OK */ 2141da177e4SLinus Torvalds 21520a77776SNick Piggin gfp_temp = gfp_mask & ~(__GFP_WAIT|__GFP_IO); 21620a77776SNick Piggin 2171da177e4SLinus Torvalds repeat_alloc: 21820a77776SNick Piggin 21920a77776SNick Piggin element = pool->alloc(gfp_temp, pool->pool_data); 2201da177e4SLinus Torvalds if (likely(element != NULL)) 2211da177e4SLinus Torvalds return element; 2221da177e4SLinus Torvalds 2231da177e4SLinus Torvalds spin_lock_irqsave(&pool->lock, flags); 2241da177e4SLinus Torvalds if (likely(pool->curr_nr)) { 2251da177e4SLinus Torvalds element = remove_element(pool); 2261da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 2271da177e4SLinus Torvalds return element; 2281da177e4SLinus Torvalds } 2291da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 2301da177e4SLinus Torvalds 2311da177e4SLinus Torvalds /* We must not sleep in the GFP_ATOMIC case */ 2321da177e4SLinus Torvalds if (!(gfp_mask & __GFP_WAIT)) 2331da177e4SLinus Torvalds return NULL; 2341da177e4SLinus Torvalds 23520a77776SNick Piggin /* Now start performing page reclaim */ 23620a77776SNick Piggin gfp_temp = gfp_mask; 23701890a4cSBenjamin LaHaise init_wait(&wait); 2381da177e4SLinus Torvalds prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); 239d59dd462Sakpm@osdl.org smp_mb(); 2400b1d647aSPavel Mironchik if (!pool->curr_nr) { 2410b1d647aSPavel Mironchik /* 2420b1d647aSPavel Mironchik * FIXME: this should be io_schedule(). The timeout is there 2430b1d647aSPavel Mironchik * as a workaround for some DM problems in 2.6.18. 2440b1d647aSPavel Mironchik */ 2450b1d647aSPavel Mironchik io_schedule_timeout(5*HZ); 2460b1d647aSPavel Mironchik } 2471da177e4SLinus Torvalds finish_wait(&pool->wait, &wait); 2481da177e4SLinus Torvalds 2491da177e4SLinus Torvalds goto repeat_alloc; 2501da177e4SLinus Torvalds } 2511da177e4SLinus Torvalds EXPORT_SYMBOL(mempool_alloc); 2521da177e4SLinus Torvalds 2531da177e4SLinus Torvalds /** 2541da177e4SLinus Torvalds * mempool_free - return an element to the pool. 2551da177e4SLinus Torvalds * @element: pool element pointer. 2561da177e4SLinus Torvalds * @pool: pointer to the memory pool which was allocated via 2571da177e4SLinus Torvalds * mempool_create(). 2581da177e4SLinus Torvalds * 2591da177e4SLinus Torvalds * this function only sleeps if the free_fn() function sleeps. 2601da177e4SLinus Torvalds */ 2611da177e4SLinus Torvalds void mempool_free(void *element, mempool_t *pool) 2621da177e4SLinus Torvalds { 2631da177e4SLinus Torvalds unsigned long flags; 2641da177e4SLinus Torvalds 265c80e7a82SRusty Russell if (unlikely(element == NULL)) 266c80e7a82SRusty Russell return; 267c80e7a82SRusty Russell 268d59dd462Sakpm@osdl.org smp_mb(); 2691da177e4SLinus Torvalds if (pool->curr_nr < pool->min_nr) { 2701da177e4SLinus Torvalds spin_lock_irqsave(&pool->lock, flags); 2711da177e4SLinus Torvalds if (pool->curr_nr < pool->min_nr) { 2721da177e4SLinus Torvalds add_element(pool, element); 2731da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 2741da177e4SLinus Torvalds wake_up(&pool->wait); 2751da177e4SLinus Torvalds return; 2761da177e4SLinus Torvalds } 2771da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 2781da177e4SLinus Torvalds } 2791da177e4SLinus Torvalds pool->free(element, pool->pool_data); 2801da177e4SLinus Torvalds } 2811da177e4SLinus Torvalds EXPORT_SYMBOL(mempool_free); 2821da177e4SLinus Torvalds 2831da177e4SLinus Torvalds /* 2841da177e4SLinus Torvalds * A commonly used alloc and free fn. 2851da177e4SLinus Torvalds */ 286dd0fc66fSAl Viro void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data) 2871da177e4SLinus Torvalds { 288fcc234f8SPekka Enberg struct kmem_cache *mem = pool_data; 2891da177e4SLinus Torvalds return kmem_cache_alloc(mem, gfp_mask); 2901da177e4SLinus Torvalds } 2911da177e4SLinus Torvalds EXPORT_SYMBOL(mempool_alloc_slab); 2921da177e4SLinus Torvalds 2931da177e4SLinus Torvalds void mempool_free_slab(void *element, void *pool_data) 2941da177e4SLinus Torvalds { 295fcc234f8SPekka Enberg struct kmem_cache *mem = pool_data; 2961da177e4SLinus Torvalds kmem_cache_free(mem, element); 2971da177e4SLinus Torvalds } 2981da177e4SLinus Torvalds EXPORT_SYMBOL(mempool_free_slab); 2996e0678f3SMatthew Dobson 3006e0678f3SMatthew Dobson /* 30153184082SMatthew Dobson * A commonly used alloc and free fn that kmalloc/kfrees the amount of memory 302183ff22bSSimon Arlott * specified by pool_data 30353184082SMatthew Dobson */ 30453184082SMatthew Dobson void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data) 30553184082SMatthew Dobson { 3065e2f89b5SFigo.zhang size_t size = (size_t)pool_data; 30753184082SMatthew Dobson return kmalloc(size, gfp_mask); 30853184082SMatthew Dobson } 30953184082SMatthew Dobson EXPORT_SYMBOL(mempool_kmalloc); 31053184082SMatthew Dobson 31153184082SMatthew Dobson void mempool_kfree(void *element, void *pool_data) 31253184082SMatthew Dobson { 31353184082SMatthew Dobson kfree(element); 31453184082SMatthew Dobson } 31553184082SMatthew Dobson EXPORT_SYMBOL(mempool_kfree); 31653184082SMatthew Dobson 31753184082SMatthew Dobson /* 3186e0678f3SMatthew Dobson * A simple mempool-backed page allocator that allocates pages 3196e0678f3SMatthew Dobson * of the order specified by pool_data. 3206e0678f3SMatthew Dobson */ 3216e0678f3SMatthew Dobson void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data) 3226e0678f3SMatthew Dobson { 3236e0678f3SMatthew Dobson int order = (int)(long)pool_data; 3246e0678f3SMatthew Dobson return alloc_pages(gfp_mask, order); 3256e0678f3SMatthew Dobson } 3266e0678f3SMatthew Dobson EXPORT_SYMBOL(mempool_alloc_pages); 3276e0678f3SMatthew Dobson 3286e0678f3SMatthew Dobson void mempool_free_pages(void *element, void *pool_data) 3296e0678f3SMatthew Dobson { 3306e0678f3SMatthew Dobson int order = (int)(long)pool_data; 3316e0678f3SMatthew Dobson __free_pages(element, order); 3326e0678f3SMatthew Dobson } 3336e0678f3SMatthew Dobson EXPORT_SYMBOL(mempool_free_pages); 334