11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * linux/mm/mempool.c 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * memory buffer pool support. Such pools are mostly used 51da177e4SLinus Torvalds * for guaranteed, deadlock-free memory allocations during 61da177e4SLinus Torvalds * extreme VM load. 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * started by Ingo Molnar, Copyright (C) 2001 91da177e4SLinus Torvalds */ 101da177e4SLinus Torvalds 111da177e4SLinus Torvalds #include <linux/mm.h> 121da177e4SLinus Torvalds #include <linux/slab.h> 131da177e4SLinus Torvalds #include <linux/module.h> 141da177e4SLinus Torvalds #include <linux/mempool.h> 151da177e4SLinus Torvalds #include <linux/blkdev.h> 161da177e4SLinus Torvalds #include <linux/writeback.h> 171da177e4SLinus Torvalds 181da177e4SLinus Torvalds static void add_element(mempool_t *pool, void *element) 191da177e4SLinus Torvalds { 201da177e4SLinus Torvalds BUG_ON(pool->curr_nr >= pool->min_nr); 211da177e4SLinus Torvalds pool->elements[pool->curr_nr++] = element; 221da177e4SLinus Torvalds } 231da177e4SLinus Torvalds 241da177e4SLinus Torvalds static void *remove_element(mempool_t *pool) 251da177e4SLinus Torvalds { 261da177e4SLinus Torvalds BUG_ON(pool->curr_nr <= 0); 271da177e4SLinus Torvalds return pool->elements[--pool->curr_nr]; 281da177e4SLinus Torvalds } 291da177e4SLinus Torvalds 301da177e4SLinus Torvalds static void free_pool(mempool_t *pool) 311da177e4SLinus Torvalds { 321da177e4SLinus Torvalds while (pool->curr_nr) { 331da177e4SLinus Torvalds void *element = remove_element(pool); 341da177e4SLinus Torvalds pool->free(element, pool->pool_data); 351da177e4SLinus Torvalds } 361da177e4SLinus Torvalds kfree(pool->elements); 371da177e4SLinus Torvalds kfree(pool); 381da177e4SLinus Torvalds } 391da177e4SLinus Torvalds 401da177e4SLinus Torvalds /** 411da177e4SLinus Torvalds * mempool_create - create a memory pool 421da177e4SLinus Torvalds * @min_nr: the minimum number of elements guaranteed to be 431da177e4SLinus Torvalds * allocated for this pool. 441da177e4SLinus Torvalds * @alloc_fn: user-defined element-allocation function. 451da177e4SLinus Torvalds * @free_fn: user-defined element-freeing function. 461da177e4SLinus Torvalds * @pool_data: optional private data available to the user-defined functions. 471da177e4SLinus Torvalds * 481da177e4SLinus Torvalds * this function creates and allocates a guaranteed size, preallocated 491da177e4SLinus Torvalds * memory pool. The pool can be used from the mempool_alloc and mempool_free 501da177e4SLinus Torvalds * functions. This function might sleep. Both the alloc_fn() and the free_fn() 511da177e4SLinus Torvalds * functions might sleep - as long as the mempool_alloc function is not called 521da177e4SLinus Torvalds * from IRQ contexts. 531da177e4SLinus Torvalds */ 541da177e4SLinus Torvalds mempool_t * mempool_create(int min_nr, mempool_alloc_t *alloc_fn, 551da177e4SLinus Torvalds mempool_free_t *free_fn, void *pool_data) 561da177e4SLinus Torvalds { 571da177e4SLinus Torvalds mempool_t *pool; 581da177e4SLinus Torvalds 591da177e4SLinus Torvalds pool = kmalloc(sizeof(*pool), GFP_KERNEL); 601da177e4SLinus Torvalds if (!pool) 611da177e4SLinus Torvalds return NULL; 621da177e4SLinus Torvalds memset(pool, 0, sizeof(*pool)); 631da177e4SLinus Torvalds pool->elements = kmalloc(min_nr * sizeof(void *), GFP_KERNEL); 641da177e4SLinus Torvalds if (!pool->elements) { 651da177e4SLinus Torvalds kfree(pool); 661da177e4SLinus Torvalds return NULL; 671da177e4SLinus Torvalds } 681da177e4SLinus Torvalds spin_lock_init(&pool->lock); 691da177e4SLinus Torvalds pool->min_nr = min_nr; 701da177e4SLinus Torvalds pool->pool_data = pool_data; 711da177e4SLinus Torvalds init_waitqueue_head(&pool->wait); 721da177e4SLinus Torvalds pool->alloc = alloc_fn; 731da177e4SLinus Torvalds pool->free = free_fn; 741da177e4SLinus Torvalds 751da177e4SLinus Torvalds /* 761da177e4SLinus Torvalds * First pre-allocate the guaranteed number of buffers. 771da177e4SLinus Torvalds */ 781da177e4SLinus Torvalds while (pool->curr_nr < pool->min_nr) { 791da177e4SLinus Torvalds void *element; 801da177e4SLinus Torvalds 811da177e4SLinus Torvalds element = pool->alloc(GFP_KERNEL, pool->pool_data); 821da177e4SLinus Torvalds if (unlikely(!element)) { 831da177e4SLinus Torvalds free_pool(pool); 841da177e4SLinus Torvalds return NULL; 851da177e4SLinus Torvalds } 861da177e4SLinus Torvalds add_element(pool, element); 871da177e4SLinus Torvalds } 881da177e4SLinus Torvalds return pool; 891da177e4SLinus Torvalds } 901da177e4SLinus Torvalds EXPORT_SYMBOL(mempool_create); 911da177e4SLinus Torvalds 921da177e4SLinus Torvalds /** 931da177e4SLinus Torvalds * mempool_resize - resize an existing memory pool 941da177e4SLinus Torvalds * @pool: pointer to the memory pool which was allocated via 951da177e4SLinus Torvalds * mempool_create(). 961da177e4SLinus Torvalds * @new_min_nr: the new minimum number of elements guaranteed to be 971da177e4SLinus Torvalds * allocated for this pool. 981da177e4SLinus Torvalds * @gfp_mask: the usual allocation bitmask. 991da177e4SLinus Torvalds * 1001da177e4SLinus Torvalds * This function shrinks/grows the pool. In the case of growing, 1011da177e4SLinus Torvalds * it cannot be guaranteed that the pool will be grown to the new 1021da177e4SLinus Torvalds * size immediately, but new mempool_free() calls will refill it. 1031da177e4SLinus Torvalds * 1041da177e4SLinus Torvalds * Note, the caller must guarantee that no mempool_destroy is called 1051da177e4SLinus Torvalds * while this function is running. mempool_alloc() & mempool_free() 1061da177e4SLinus Torvalds * might be called (eg. from IRQ contexts) while this function executes. 1071da177e4SLinus Torvalds */ 1081da177e4SLinus Torvalds int mempool_resize(mempool_t *pool, int new_min_nr, unsigned int __nocast gfp_mask) 1091da177e4SLinus Torvalds { 1101da177e4SLinus Torvalds void *element; 1111da177e4SLinus Torvalds void **new_elements; 1121da177e4SLinus Torvalds unsigned long flags; 1131da177e4SLinus Torvalds 1141da177e4SLinus Torvalds BUG_ON(new_min_nr <= 0); 1151da177e4SLinus Torvalds 1161da177e4SLinus Torvalds spin_lock_irqsave(&pool->lock, flags); 1171da177e4SLinus Torvalds if (new_min_nr <= pool->min_nr) { 1181da177e4SLinus Torvalds while (new_min_nr < pool->curr_nr) { 1191da177e4SLinus Torvalds element = remove_element(pool); 1201da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 1211da177e4SLinus Torvalds pool->free(element, pool->pool_data); 1221da177e4SLinus Torvalds spin_lock_irqsave(&pool->lock, flags); 1231da177e4SLinus Torvalds } 1241da177e4SLinus Torvalds pool->min_nr = new_min_nr; 1251da177e4SLinus Torvalds goto out_unlock; 1261da177e4SLinus Torvalds } 1271da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 1281da177e4SLinus Torvalds 1291da177e4SLinus Torvalds /* Grow the pool */ 1301da177e4SLinus Torvalds new_elements = kmalloc(new_min_nr * sizeof(*new_elements), gfp_mask); 1311da177e4SLinus Torvalds if (!new_elements) 1321da177e4SLinus Torvalds return -ENOMEM; 1331da177e4SLinus Torvalds 1341da177e4SLinus Torvalds spin_lock_irqsave(&pool->lock, flags); 1351da177e4SLinus Torvalds if (unlikely(new_min_nr <= pool->min_nr)) { 1361da177e4SLinus Torvalds /* Raced, other resize will do our work */ 1371da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 1381da177e4SLinus Torvalds kfree(new_elements); 1391da177e4SLinus Torvalds goto out; 1401da177e4SLinus Torvalds } 1411da177e4SLinus Torvalds memcpy(new_elements, pool->elements, 1421da177e4SLinus Torvalds pool->curr_nr * sizeof(*new_elements)); 1431da177e4SLinus Torvalds kfree(pool->elements); 1441da177e4SLinus Torvalds pool->elements = new_elements; 1451da177e4SLinus Torvalds pool->min_nr = new_min_nr; 1461da177e4SLinus Torvalds 1471da177e4SLinus Torvalds while (pool->curr_nr < pool->min_nr) { 1481da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 1491da177e4SLinus Torvalds element = pool->alloc(gfp_mask, pool->pool_data); 1501da177e4SLinus Torvalds if (!element) 1511da177e4SLinus Torvalds goto out; 1521da177e4SLinus Torvalds spin_lock_irqsave(&pool->lock, flags); 1531da177e4SLinus Torvalds if (pool->curr_nr < pool->min_nr) { 1541da177e4SLinus Torvalds add_element(pool, element); 1551da177e4SLinus Torvalds } else { 1561da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 1571da177e4SLinus Torvalds pool->free(element, pool->pool_data); /* Raced */ 1581da177e4SLinus Torvalds goto out; 1591da177e4SLinus Torvalds } 1601da177e4SLinus Torvalds } 1611da177e4SLinus Torvalds out_unlock: 1621da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 1631da177e4SLinus Torvalds out: 1641da177e4SLinus Torvalds return 0; 1651da177e4SLinus Torvalds } 1661da177e4SLinus Torvalds EXPORT_SYMBOL(mempool_resize); 1671da177e4SLinus Torvalds 1681da177e4SLinus Torvalds /** 1691da177e4SLinus Torvalds * mempool_destroy - deallocate a memory pool 1701da177e4SLinus Torvalds * @pool: pointer to the memory pool which was allocated via 1711da177e4SLinus Torvalds * mempool_create(). 1721da177e4SLinus Torvalds * 1731da177e4SLinus Torvalds * this function only sleeps if the free_fn() function sleeps. The caller 1741da177e4SLinus Torvalds * has to guarantee that all elements have been returned to the pool (ie: 1751da177e4SLinus Torvalds * freed) prior to calling mempool_destroy(). 1761da177e4SLinus Torvalds */ 1771da177e4SLinus Torvalds void mempool_destroy(mempool_t *pool) 1781da177e4SLinus Torvalds { 1791da177e4SLinus Torvalds if (pool->curr_nr != pool->min_nr) 1801da177e4SLinus Torvalds BUG(); /* There were outstanding elements */ 1811da177e4SLinus Torvalds free_pool(pool); 1821da177e4SLinus Torvalds } 1831da177e4SLinus Torvalds EXPORT_SYMBOL(mempool_destroy); 1841da177e4SLinus Torvalds 1851da177e4SLinus Torvalds /** 1861da177e4SLinus Torvalds * mempool_alloc - allocate an element from a specific memory pool 1871da177e4SLinus Torvalds * @pool: pointer to the memory pool which was allocated via 1881da177e4SLinus Torvalds * mempool_create(). 1891da177e4SLinus Torvalds * @gfp_mask: the usual allocation bitmask. 1901da177e4SLinus Torvalds * 1911da177e4SLinus Torvalds * this function only sleeps if the alloc_fn function sleeps or 1921da177e4SLinus Torvalds * returns NULL. Note that due to preallocation, this function 1931da177e4SLinus Torvalds * *never* fails when called from process contexts. (it might 1941da177e4SLinus Torvalds * fail if called from an IRQ context.) 1951da177e4SLinus Torvalds */ 1961da177e4SLinus Torvalds void * mempool_alloc(mempool_t *pool, unsigned int __nocast gfp_mask) 1971da177e4SLinus Torvalds { 1981da177e4SLinus Torvalds void *element; 1991da177e4SLinus Torvalds unsigned long flags; 2001da177e4SLinus Torvalds DEFINE_WAIT(wait); 201*b84a35beSNick Piggin int gfp_nowait; 202*b84a35beSNick Piggin 203*b84a35beSNick Piggin gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */ 204*b84a35beSNick Piggin gfp_mask |= __GFP_NORETRY; /* don't loop in __alloc_pages */ 205*b84a35beSNick Piggin gfp_mask |= __GFP_NOWARN; /* failures are OK */ 206*b84a35beSNick Piggin gfp_nowait = gfp_mask & ~(__GFP_WAIT | __GFP_IO); 2071da177e4SLinus Torvalds 2081da177e4SLinus Torvalds might_sleep_if(gfp_mask & __GFP_WAIT); 2091da177e4SLinus Torvalds repeat_alloc: 210*b84a35beSNick Piggin element = pool->alloc(gfp_nowait, pool->pool_data); 2111da177e4SLinus Torvalds if (likely(element != NULL)) 2121da177e4SLinus Torvalds return element; 2131da177e4SLinus Torvalds 2141da177e4SLinus Torvalds /* 2151da177e4SLinus Torvalds * If the pool is less than 50% full and we can perform effective 2161da177e4SLinus Torvalds * page reclaim then try harder to allocate an element. 2171da177e4SLinus Torvalds */ 2181da177e4SLinus Torvalds mb(); 2191da177e4SLinus Torvalds if ((gfp_mask & __GFP_FS) && (gfp_mask != gfp_nowait) && 2201da177e4SLinus Torvalds (pool->curr_nr <= pool->min_nr/2)) { 2211da177e4SLinus Torvalds element = pool->alloc(gfp_mask, pool->pool_data); 2221da177e4SLinus Torvalds if (likely(element != NULL)) 2231da177e4SLinus Torvalds return element; 2241da177e4SLinus Torvalds } 2251da177e4SLinus Torvalds 2261da177e4SLinus Torvalds /* 2271da177e4SLinus Torvalds * Kick the VM at this point. 2281da177e4SLinus Torvalds */ 2291da177e4SLinus Torvalds wakeup_bdflush(0); 2301da177e4SLinus Torvalds 2311da177e4SLinus Torvalds spin_lock_irqsave(&pool->lock, flags); 2321da177e4SLinus Torvalds if (likely(pool->curr_nr)) { 2331da177e4SLinus Torvalds element = remove_element(pool); 2341da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 2351da177e4SLinus Torvalds return element; 2361da177e4SLinus Torvalds } 2371da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 2381da177e4SLinus Torvalds 2391da177e4SLinus Torvalds /* We must not sleep in the GFP_ATOMIC case */ 2401da177e4SLinus Torvalds if (!(gfp_mask & __GFP_WAIT)) 2411da177e4SLinus Torvalds return NULL; 2421da177e4SLinus Torvalds 2431da177e4SLinus Torvalds prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); 2441da177e4SLinus Torvalds mb(); 2451da177e4SLinus Torvalds if (!pool->curr_nr) 2461da177e4SLinus Torvalds io_schedule(); 2471da177e4SLinus Torvalds finish_wait(&pool->wait, &wait); 2481da177e4SLinus Torvalds 2491da177e4SLinus Torvalds goto repeat_alloc; 2501da177e4SLinus Torvalds } 2511da177e4SLinus Torvalds EXPORT_SYMBOL(mempool_alloc); 2521da177e4SLinus Torvalds 2531da177e4SLinus Torvalds /** 2541da177e4SLinus Torvalds * mempool_free - return an element to the pool. 2551da177e4SLinus Torvalds * @element: pool element pointer. 2561da177e4SLinus Torvalds * @pool: pointer to the memory pool which was allocated via 2571da177e4SLinus Torvalds * mempool_create(). 2581da177e4SLinus Torvalds * 2591da177e4SLinus Torvalds * this function only sleeps if the free_fn() function sleeps. 2601da177e4SLinus Torvalds */ 2611da177e4SLinus Torvalds void mempool_free(void *element, mempool_t *pool) 2621da177e4SLinus Torvalds { 2631da177e4SLinus Torvalds unsigned long flags; 2641da177e4SLinus Torvalds 2651da177e4SLinus Torvalds mb(); 2661da177e4SLinus Torvalds if (pool->curr_nr < pool->min_nr) { 2671da177e4SLinus Torvalds spin_lock_irqsave(&pool->lock, flags); 2681da177e4SLinus Torvalds if (pool->curr_nr < pool->min_nr) { 2691da177e4SLinus Torvalds add_element(pool, element); 2701da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 2711da177e4SLinus Torvalds wake_up(&pool->wait); 2721da177e4SLinus Torvalds return; 2731da177e4SLinus Torvalds } 2741da177e4SLinus Torvalds spin_unlock_irqrestore(&pool->lock, flags); 2751da177e4SLinus Torvalds } 2761da177e4SLinus Torvalds pool->free(element, pool->pool_data); 2771da177e4SLinus Torvalds } 2781da177e4SLinus Torvalds EXPORT_SYMBOL(mempool_free); 2791da177e4SLinus Torvalds 2801da177e4SLinus Torvalds /* 2811da177e4SLinus Torvalds * A commonly used alloc and free fn. 2821da177e4SLinus Torvalds */ 2831da177e4SLinus Torvalds void *mempool_alloc_slab(unsigned int __nocast gfp_mask, void *pool_data) 2841da177e4SLinus Torvalds { 2851da177e4SLinus Torvalds kmem_cache_t *mem = (kmem_cache_t *) pool_data; 2861da177e4SLinus Torvalds return kmem_cache_alloc(mem, gfp_mask); 2871da177e4SLinus Torvalds } 2881da177e4SLinus Torvalds EXPORT_SYMBOL(mempool_alloc_slab); 2891da177e4SLinus Torvalds 2901da177e4SLinus Torvalds void mempool_free_slab(void *element, void *pool_data) 2911da177e4SLinus Torvalds { 2921da177e4SLinus Torvalds kmem_cache_t *mem = (kmem_cache_t *) pool_data; 2931da177e4SLinus Torvalds kmem_cache_free(mem, element); 2941da177e4SLinus Torvalds } 2951da177e4SLinus Torvalds EXPORT_SYMBOL(mempool_free_slab); 296