1 /* 2 * linux/mm/mempool.c 3 * 4 * memory buffer pool support. Such pools are mostly used 5 * for guaranteed, deadlock-free memory allocations during 6 * extreme VM load. 7 * 8 * started by Ingo Molnar, Copyright (C) 2001 9 */ 10 11 #include <linux/mm.h> 12 #include <linux/slab.h> 13 #include <linux/export.h> 14 #include <linux/mempool.h> 15 #include <linux/blkdev.h> 16 #include <linux/writeback.h> 17 18 static void add_element(mempool_t *pool, void *element) 19 { 20 BUG_ON(pool->curr_nr >= pool->min_nr); 21 pool->elements[pool->curr_nr++] = element; 22 } 23 24 static void *remove_element(mempool_t *pool) 25 { 26 BUG_ON(pool->curr_nr <= 0); 27 return pool->elements[--pool->curr_nr]; 28 } 29 30 /** 31 * mempool_destroy - deallocate a memory pool 32 * @pool: pointer to the memory pool which was allocated via 33 * mempool_create(). 34 * 35 * Free all reserved elements in @pool and @pool itself. This function 36 * only sleeps if the free_fn() function sleeps. 37 */ 38 void mempool_destroy(mempool_t *pool) 39 { 40 while (pool->curr_nr) { 41 void *element = remove_element(pool); 42 pool->free(element, pool->pool_data); 43 } 44 kfree(pool->elements); 45 kfree(pool); 46 } 47 EXPORT_SYMBOL(mempool_destroy); 48 49 /** 50 * mempool_create - create a memory pool 51 * @min_nr: the minimum number of elements guaranteed to be 52 * allocated for this pool. 53 * @alloc_fn: user-defined element-allocation function. 54 * @free_fn: user-defined element-freeing function. 55 * @pool_data: optional private data available to the user-defined functions. 56 * 57 * this function creates and allocates a guaranteed size, preallocated 58 * memory pool. The pool can be used from the mempool_alloc() and mempool_free() 59 * functions. This function might sleep. Both the alloc_fn() and the free_fn() 60 * functions might sleep - as long as the mempool_alloc() function is not called 61 * from IRQ contexts. 62 */ 63 mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, 64 mempool_free_t *free_fn, void *pool_data) 65 { 66 return mempool_create_node(min_nr,alloc_fn,free_fn, pool_data, 67 GFP_KERNEL, NUMA_NO_NODE); 68 } 69 EXPORT_SYMBOL(mempool_create); 70 71 mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, 72 mempool_free_t *free_fn, void *pool_data, 73 gfp_t gfp_mask, int node_id) 74 { 75 mempool_t *pool; 76 pool = kzalloc_node(sizeof(*pool), gfp_mask, node_id); 77 if (!pool) 78 return NULL; 79 pool->elements = kmalloc_node(min_nr * sizeof(void *), 80 gfp_mask, node_id); 81 if (!pool->elements) { 82 kfree(pool); 83 return NULL; 84 } 85 spin_lock_init(&pool->lock); 86 pool->min_nr = min_nr; 87 pool->pool_data = pool_data; 88 init_waitqueue_head(&pool->wait); 89 pool->alloc = alloc_fn; 90 pool->free = free_fn; 91 92 /* 93 * First pre-allocate the guaranteed number of buffers. 94 */ 95 while (pool->curr_nr < pool->min_nr) { 96 void *element; 97 98 element = pool->alloc(gfp_mask, pool->pool_data); 99 if (unlikely(!element)) { 100 mempool_destroy(pool); 101 return NULL; 102 } 103 add_element(pool, element); 104 } 105 return pool; 106 } 107 EXPORT_SYMBOL(mempool_create_node); 108 109 /** 110 * mempool_resize - resize an existing memory pool 111 * @pool: pointer to the memory pool which was allocated via 112 * mempool_create(). 113 * @new_min_nr: the new minimum number of elements guaranteed to be 114 * allocated for this pool. 115 * @gfp_mask: the usual allocation bitmask. 116 * 117 * This function shrinks/grows the pool. In the case of growing, 118 * it cannot be guaranteed that the pool will be grown to the new 119 * size immediately, but new mempool_free() calls will refill it. 120 * 121 * Note, the caller must guarantee that no mempool_destroy is called 122 * while this function is running. mempool_alloc() & mempool_free() 123 * might be called (eg. from IRQ contexts) while this function executes. 124 */ 125 int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask) 126 { 127 void *element; 128 void **new_elements; 129 unsigned long flags; 130 131 BUG_ON(new_min_nr <= 0); 132 133 spin_lock_irqsave(&pool->lock, flags); 134 if (new_min_nr <= pool->min_nr) { 135 while (new_min_nr < pool->curr_nr) { 136 element = remove_element(pool); 137 spin_unlock_irqrestore(&pool->lock, flags); 138 pool->free(element, pool->pool_data); 139 spin_lock_irqsave(&pool->lock, flags); 140 } 141 pool->min_nr = new_min_nr; 142 goto out_unlock; 143 } 144 spin_unlock_irqrestore(&pool->lock, flags); 145 146 /* Grow the pool */ 147 new_elements = kmalloc(new_min_nr * sizeof(*new_elements), gfp_mask); 148 if (!new_elements) 149 return -ENOMEM; 150 151 spin_lock_irqsave(&pool->lock, flags); 152 if (unlikely(new_min_nr <= pool->min_nr)) { 153 /* Raced, other resize will do our work */ 154 spin_unlock_irqrestore(&pool->lock, flags); 155 kfree(new_elements); 156 goto out; 157 } 158 memcpy(new_elements, pool->elements, 159 pool->curr_nr * sizeof(*new_elements)); 160 kfree(pool->elements); 161 pool->elements = new_elements; 162 pool->min_nr = new_min_nr; 163 164 while (pool->curr_nr < pool->min_nr) { 165 spin_unlock_irqrestore(&pool->lock, flags); 166 element = pool->alloc(gfp_mask, pool->pool_data); 167 if (!element) 168 goto out; 169 spin_lock_irqsave(&pool->lock, flags); 170 if (pool->curr_nr < pool->min_nr) { 171 add_element(pool, element); 172 } else { 173 spin_unlock_irqrestore(&pool->lock, flags); 174 pool->free(element, pool->pool_data); /* Raced */ 175 goto out; 176 } 177 } 178 out_unlock: 179 spin_unlock_irqrestore(&pool->lock, flags); 180 out: 181 return 0; 182 } 183 EXPORT_SYMBOL(mempool_resize); 184 185 /** 186 * mempool_alloc - allocate an element from a specific memory pool 187 * @pool: pointer to the memory pool which was allocated via 188 * mempool_create(). 189 * @gfp_mask: the usual allocation bitmask. 190 * 191 * this function only sleeps if the alloc_fn() function sleeps or 192 * returns NULL. Note that due to preallocation, this function 193 * *never* fails when called from process contexts. (it might 194 * fail if called from an IRQ context.) 195 */ 196 void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask) 197 { 198 void *element; 199 unsigned long flags; 200 wait_queue_t wait; 201 gfp_t gfp_temp; 202 203 might_sleep_if(gfp_mask & __GFP_WAIT); 204 205 gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */ 206 gfp_mask |= __GFP_NORETRY; /* don't loop in __alloc_pages */ 207 gfp_mask |= __GFP_NOWARN; /* failures are OK */ 208 209 gfp_temp = gfp_mask & ~(__GFP_WAIT|__GFP_IO); 210 211 repeat_alloc: 212 213 element = pool->alloc(gfp_temp, pool->pool_data); 214 if (likely(element != NULL)) 215 return element; 216 217 spin_lock_irqsave(&pool->lock, flags); 218 if (likely(pool->curr_nr)) { 219 element = remove_element(pool); 220 spin_unlock_irqrestore(&pool->lock, flags); 221 /* paired with rmb in mempool_free(), read comment there */ 222 smp_wmb(); 223 return element; 224 } 225 226 /* 227 * We use gfp mask w/o __GFP_WAIT or IO for the first round. If 228 * alloc failed with that and @pool was empty, retry immediately. 229 */ 230 if (gfp_temp != gfp_mask) { 231 spin_unlock_irqrestore(&pool->lock, flags); 232 gfp_temp = gfp_mask; 233 goto repeat_alloc; 234 } 235 236 /* We must not sleep if !__GFP_WAIT */ 237 if (!(gfp_mask & __GFP_WAIT)) { 238 spin_unlock_irqrestore(&pool->lock, flags); 239 return NULL; 240 } 241 242 /* Let's wait for someone else to return an element to @pool */ 243 init_wait(&wait); 244 prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); 245 246 spin_unlock_irqrestore(&pool->lock, flags); 247 248 /* 249 * FIXME: this should be io_schedule(). The timeout is there as a 250 * workaround for some DM problems in 2.6.18. 251 */ 252 io_schedule_timeout(5*HZ); 253 254 finish_wait(&pool->wait, &wait); 255 goto repeat_alloc; 256 } 257 EXPORT_SYMBOL(mempool_alloc); 258 259 /** 260 * mempool_free - return an element to the pool. 261 * @element: pool element pointer. 262 * @pool: pointer to the memory pool which was allocated via 263 * mempool_create(). 264 * 265 * this function only sleeps if the free_fn() function sleeps. 266 */ 267 void mempool_free(void *element, mempool_t *pool) 268 { 269 unsigned long flags; 270 271 if (unlikely(element == NULL)) 272 return; 273 274 /* 275 * Paired with the wmb in mempool_alloc(). The preceding read is 276 * for @element and the following @pool->curr_nr. This ensures 277 * that the visible value of @pool->curr_nr is from after the 278 * allocation of @element. This is necessary for fringe cases 279 * where @element was passed to this task without going through 280 * barriers. 281 * 282 * For example, assume @p is %NULL at the beginning and one task 283 * performs "p = mempool_alloc(...);" while another task is doing 284 * "while (!p) cpu_relax(); mempool_free(p, ...);". This function 285 * may end up using curr_nr value which is from before allocation 286 * of @p without the following rmb. 287 */ 288 smp_rmb(); 289 290 /* 291 * For correctness, we need a test which is guaranteed to trigger 292 * if curr_nr + #allocated == min_nr. Testing curr_nr < min_nr 293 * without locking achieves that and refilling as soon as possible 294 * is desirable. 295 * 296 * Because curr_nr visible here is always a value after the 297 * allocation of @element, any task which decremented curr_nr below 298 * min_nr is guaranteed to see curr_nr < min_nr unless curr_nr gets 299 * incremented to min_nr afterwards. If curr_nr gets incremented 300 * to min_nr after the allocation of @element, the elements 301 * allocated after that are subject to the same guarantee. 302 * 303 * Waiters happen iff curr_nr is 0 and the above guarantee also 304 * ensures that there will be frees which return elements to the 305 * pool waking up the waiters. 306 */ 307 if (unlikely(pool->curr_nr < pool->min_nr)) { 308 spin_lock_irqsave(&pool->lock, flags); 309 if (likely(pool->curr_nr < pool->min_nr)) { 310 add_element(pool, element); 311 spin_unlock_irqrestore(&pool->lock, flags); 312 wake_up(&pool->wait); 313 return; 314 } 315 spin_unlock_irqrestore(&pool->lock, flags); 316 } 317 pool->free(element, pool->pool_data); 318 } 319 EXPORT_SYMBOL(mempool_free); 320 321 /* 322 * A commonly used alloc and free fn. 323 */ 324 void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data) 325 { 326 struct kmem_cache *mem = pool_data; 327 return kmem_cache_alloc(mem, gfp_mask); 328 } 329 EXPORT_SYMBOL(mempool_alloc_slab); 330 331 void mempool_free_slab(void *element, void *pool_data) 332 { 333 struct kmem_cache *mem = pool_data; 334 kmem_cache_free(mem, element); 335 } 336 EXPORT_SYMBOL(mempool_free_slab); 337 338 /* 339 * A commonly used alloc and free fn that kmalloc/kfrees the amount of memory 340 * specified by pool_data 341 */ 342 void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data) 343 { 344 size_t size = (size_t)pool_data; 345 return kmalloc(size, gfp_mask); 346 } 347 EXPORT_SYMBOL(mempool_kmalloc); 348 349 void mempool_kfree(void *element, void *pool_data) 350 { 351 kfree(element); 352 } 353 EXPORT_SYMBOL(mempool_kfree); 354 355 /* 356 * A simple mempool-backed page allocator that allocates pages 357 * of the order specified by pool_data. 358 */ 359 void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data) 360 { 361 int order = (int)(long)pool_data; 362 return alloc_pages(gfp_mask, order); 363 } 364 EXPORT_SYMBOL(mempool_alloc_pages); 365 366 void mempool_free_pages(void *element, void *pool_data) 367 { 368 int order = (int)(long)pool_data; 369 __free_pages(element, order); 370 } 371 EXPORT_SYMBOL(mempool_free_pages); 372