1 /* 2 * linux/mm/mempool.c 3 * 4 * memory buffer pool support. Such pools are mostly used 5 * for guaranteed, deadlock-free memory allocations during 6 * extreme VM load. 7 * 8 * started by Ingo Molnar, Copyright (C) 2001 9 */ 10 11 #include <linux/mm.h> 12 #include <linux/slab.h> 13 #include <linux/kmemleak.h> 14 #include <linux/export.h> 15 #include <linux/mempool.h> 16 #include <linux/blkdev.h> 17 #include <linux/writeback.h> 18 19 static void add_element(mempool_t *pool, void *element) 20 { 21 BUG_ON(pool->curr_nr >= pool->min_nr); 22 pool->elements[pool->curr_nr++] = element; 23 } 24 25 static void *remove_element(mempool_t *pool) 26 { 27 BUG_ON(pool->curr_nr <= 0); 28 return pool->elements[--pool->curr_nr]; 29 } 30 31 /** 32 * mempool_destroy - deallocate a memory pool 33 * @pool: pointer to the memory pool which was allocated via 34 * mempool_create(). 35 * 36 * Free all reserved elements in @pool and @pool itself. This function 37 * only sleeps if the free_fn() function sleeps. 38 */ 39 void mempool_destroy(mempool_t *pool) 40 { 41 while (pool->curr_nr) { 42 void *element = remove_element(pool); 43 pool->free(element, pool->pool_data); 44 } 45 kfree(pool->elements); 46 kfree(pool); 47 } 48 EXPORT_SYMBOL(mempool_destroy); 49 50 /** 51 * mempool_create - create a memory pool 52 * @min_nr: the minimum number of elements guaranteed to be 53 * allocated for this pool. 54 * @alloc_fn: user-defined element-allocation function. 55 * @free_fn: user-defined element-freeing function. 56 * @pool_data: optional private data available to the user-defined functions. 57 * 58 * this function creates and allocates a guaranteed size, preallocated 59 * memory pool. The pool can be used from the mempool_alloc() and mempool_free() 60 * functions. This function might sleep. Both the alloc_fn() and the free_fn() 61 * functions might sleep - as long as the mempool_alloc() function is not called 62 * from IRQ contexts. 63 */ 64 mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, 65 mempool_free_t *free_fn, void *pool_data) 66 { 67 return mempool_create_node(min_nr,alloc_fn,free_fn, pool_data, 68 GFP_KERNEL, NUMA_NO_NODE); 69 } 70 EXPORT_SYMBOL(mempool_create); 71 72 mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, 73 mempool_free_t *free_fn, void *pool_data, 74 gfp_t gfp_mask, int node_id) 75 { 76 mempool_t *pool; 77 pool = kzalloc_node(sizeof(*pool), gfp_mask, node_id); 78 if (!pool) 79 return NULL; 80 pool->elements = kmalloc_node(min_nr * sizeof(void *), 81 gfp_mask, node_id); 82 if (!pool->elements) { 83 kfree(pool); 84 return NULL; 85 } 86 spin_lock_init(&pool->lock); 87 pool->min_nr = min_nr; 88 pool->pool_data = pool_data; 89 init_waitqueue_head(&pool->wait); 90 pool->alloc = alloc_fn; 91 pool->free = free_fn; 92 93 /* 94 * First pre-allocate the guaranteed number of buffers. 95 */ 96 while (pool->curr_nr < pool->min_nr) { 97 void *element; 98 99 element = pool->alloc(gfp_mask, pool->pool_data); 100 if (unlikely(!element)) { 101 mempool_destroy(pool); 102 return NULL; 103 } 104 add_element(pool, element); 105 } 106 return pool; 107 } 108 EXPORT_SYMBOL(mempool_create_node); 109 110 /** 111 * mempool_resize - resize an existing memory pool 112 * @pool: pointer to the memory pool which was allocated via 113 * mempool_create(). 114 * @new_min_nr: the new minimum number of elements guaranteed to be 115 * allocated for this pool. 116 * @gfp_mask: the usual allocation bitmask. 117 * 118 * This function shrinks/grows the pool. In the case of growing, 119 * it cannot be guaranteed that the pool will be grown to the new 120 * size immediately, but new mempool_free() calls will refill it. 121 * 122 * Note, the caller must guarantee that no mempool_destroy is called 123 * while this function is running. mempool_alloc() & mempool_free() 124 * might be called (eg. from IRQ contexts) while this function executes. 125 */ 126 int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask) 127 { 128 void *element; 129 void **new_elements; 130 unsigned long flags; 131 132 BUG_ON(new_min_nr <= 0); 133 134 spin_lock_irqsave(&pool->lock, flags); 135 if (new_min_nr <= pool->min_nr) { 136 while (new_min_nr < pool->curr_nr) { 137 element = remove_element(pool); 138 spin_unlock_irqrestore(&pool->lock, flags); 139 pool->free(element, pool->pool_data); 140 spin_lock_irqsave(&pool->lock, flags); 141 } 142 pool->min_nr = new_min_nr; 143 goto out_unlock; 144 } 145 spin_unlock_irqrestore(&pool->lock, flags); 146 147 /* Grow the pool */ 148 new_elements = kmalloc(new_min_nr * sizeof(*new_elements), gfp_mask); 149 if (!new_elements) 150 return -ENOMEM; 151 152 spin_lock_irqsave(&pool->lock, flags); 153 if (unlikely(new_min_nr <= pool->min_nr)) { 154 /* Raced, other resize will do our work */ 155 spin_unlock_irqrestore(&pool->lock, flags); 156 kfree(new_elements); 157 goto out; 158 } 159 memcpy(new_elements, pool->elements, 160 pool->curr_nr * sizeof(*new_elements)); 161 kfree(pool->elements); 162 pool->elements = new_elements; 163 pool->min_nr = new_min_nr; 164 165 while (pool->curr_nr < pool->min_nr) { 166 spin_unlock_irqrestore(&pool->lock, flags); 167 element = pool->alloc(gfp_mask, pool->pool_data); 168 if (!element) 169 goto out; 170 spin_lock_irqsave(&pool->lock, flags); 171 if (pool->curr_nr < pool->min_nr) { 172 add_element(pool, element); 173 } else { 174 spin_unlock_irqrestore(&pool->lock, flags); 175 pool->free(element, pool->pool_data); /* Raced */ 176 goto out; 177 } 178 } 179 out_unlock: 180 spin_unlock_irqrestore(&pool->lock, flags); 181 out: 182 return 0; 183 } 184 EXPORT_SYMBOL(mempool_resize); 185 186 /** 187 * mempool_alloc - allocate an element from a specific memory pool 188 * @pool: pointer to the memory pool which was allocated via 189 * mempool_create(). 190 * @gfp_mask: the usual allocation bitmask. 191 * 192 * this function only sleeps if the alloc_fn() function sleeps or 193 * returns NULL. Note that due to preallocation, this function 194 * *never* fails when called from process contexts. (it might 195 * fail if called from an IRQ context.) 196 * Note: using __GFP_ZERO is not supported. 197 */ 198 void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask) 199 { 200 void *element; 201 unsigned long flags; 202 wait_queue_t wait; 203 gfp_t gfp_temp; 204 205 VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO); 206 might_sleep_if(gfp_mask & __GFP_WAIT); 207 208 gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */ 209 gfp_mask |= __GFP_NORETRY; /* don't loop in __alloc_pages */ 210 gfp_mask |= __GFP_NOWARN; /* failures are OK */ 211 212 gfp_temp = gfp_mask & ~(__GFP_WAIT|__GFP_IO); 213 214 repeat_alloc: 215 216 element = pool->alloc(gfp_temp, pool->pool_data); 217 if (likely(element != NULL)) 218 return element; 219 220 spin_lock_irqsave(&pool->lock, flags); 221 if (likely(pool->curr_nr)) { 222 element = remove_element(pool); 223 spin_unlock_irqrestore(&pool->lock, flags); 224 /* paired with rmb in mempool_free(), read comment there */ 225 smp_wmb(); 226 /* 227 * Update the allocation stack trace as this is more useful 228 * for debugging. 229 */ 230 kmemleak_update_trace(element); 231 return element; 232 } 233 234 /* 235 * We use gfp mask w/o __GFP_WAIT or IO for the first round. If 236 * alloc failed with that and @pool was empty, retry immediately. 237 */ 238 if (gfp_temp != gfp_mask) { 239 spin_unlock_irqrestore(&pool->lock, flags); 240 gfp_temp = gfp_mask; 241 goto repeat_alloc; 242 } 243 244 /* We must not sleep if !__GFP_WAIT */ 245 if (!(gfp_mask & __GFP_WAIT)) { 246 spin_unlock_irqrestore(&pool->lock, flags); 247 return NULL; 248 } 249 250 /* Let's wait for someone else to return an element to @pool */ 251 init_wait(&wait); 252 prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); 253 254 spin_unlock_irqrestore(&pool->lock, flags); 255 256 /* 257 * FIXME: this should be io_schedule(). The timeout is there as a 258 * workaround for some DM problems in 2.6.18. 259 */ 260 io_schedule_timeout(5*HZ); 261 262 finish_wait(&pool->wait, &wait); 263 goto repeat_alloc; 264 } 265 EXPORT_SYMBOL(mempool_alloc); 266 267 /** 268 * mempool_free - return an element to the pool. 269 * @element: pool element pointer. 270 * @pool: pointer to the memory pool which was allocated via 271 * mempool_create(). 272 * 273 * this function only sleeps if the free_fn() function sleeps. 274 */ 275 void mempool_free(void *element, mempool_t *pool) 276 { 277 unsigned long flags; 278 279 if (unlikely(element == NULL)) 280 return; 281 282 /* 283 * Paired with the wmb in mempool_alloc(). The preceding read is 284 * for @element and the following @pool->curr_nr. This ensures 285 * that the visible value of @pool->curr_nr is from after the 286 * allocation of @element. This is necessary for fringe cases 287 * where @element was passed to this task without going through 288 * barriers. 289 * 290 * For example, assume @p is %NULL at the beginning and one task 291 * performs "p = mempool_alloc(...);" while another task is doing 292 * "while (!p) cpu_relax(); mempool_free(p, ...);". This function 293 * may end up using curr_nr value which is from before allocation 294 * of @p without the following rmb. 295 */ 296 smp_rmb(); 297 298 /* 299 * For correctness, we need a test which is guaranteed to trigger 300 * if curr_nr + #allocated == min_nr. Testing curr_nr < min_nr 301 * without locking achieves that and refilling as soon as possible 302 * is desirable. 303 * 304 * Because curr_nr visible here is always a value after the 305 * allocation of @element, any task which decremented curr_nr below 306 * min_nr is guaranteed to see curr_nr < min_nr unless curr_nr gets 307 * incremented to min_nr afterwards. If curr_nr gets incremented 308 * to min_nr after the allocation of @element, the elements 309 * allocated after that are subject to the same guarantee. 310 * 311 * Waiters happen iff curr_nr is 0 and the above guarantee also 312 * ensures that there will be frees which return elements to the 313 * pool waking up the waiters. 314 */ 315 if (unlikely(pool->curr_nr < pool->min_nr)) { 316 spin_lock_irqsave(&pool->lock, flags); 317 if (likely(pool->curr_nr < pool->min_nr)) { 318 add_element(pool, element); 319 spin_unlock_irqrestore(&pool->lock, flags); 320 wake_up(&pool->wait); 321 return; 322 } 323 spin_unlock_irqrestore(&pool->lock, flags); 324 } 325 pool->free(element, pool->pool_data); 326 } 327 EXPORT_SYMBOL(mempool_free); 328 329 /* 330 * A commonly used alloc and free fn. 331 */ 332 void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data) 333 { 334 struct kmem_cache *mem = pool_data; 335 return kmem_cache_alloc(mem, gfp_mask); 336 } 337 EXPORT_SYMBOL(mempool_alloc_slab); 338 339 void mempool_free_slab(void *element, void *pool_data) 340 { 341 struct kmem_cache *mem = pool_data; 342 kmem_cache_free(mem, element); 343 } 344 EXPORT_SYMBOL(mempool_free_slab); 345 346 /* 347 * A commonly used alloc and free fn that kmalloc/kfrees the amount of memory 348 * specified by pool_data 349 */ 350 void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data) 351 { 352 size_t size = (size_t)pool_data; 353 return kmalloc(size, gfp_mask); 354 } 355 EXPORT_SYMBOL(mempool_kmalloc); 356 357 void mempool_kfree(void *element, void *pool_data) 358 { 359 kfree(element); 360 } 361 EXPORT_SYMBOL(mempool_kfree); 362 363 /* 364 * A simple mempool-backed page allocator that allocates pages 365 * of the order specified by pool_data. 366 */ 367 void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data) 368 { 369 int order = (int)(long)pool_data; 370 return alloc_pages(gfp_mask, order); 371 } 372 EXPORT_SYMBOL(mempool_alloc_pages); 373 374 void mempool_free_pages(void *element, void *pool_data) 375 { 376 int order = (int)(long)pool_data; 377 __free_pages(element, order); 378 } 379 EXPORT_SYMBOL(mempool_free_pages); 380