xref: /openbmc/linux/mm/mempool.c (revision 93d90ad7)
1 /*
2  *  linux/mm/mempool.c
3  *
4  *  memory buffer pool support. Such pools are mostly used
5  *  for guaranteed, deadlock-free memory allocations during
6  *  extreme VM load.
7  *
8  *  started by Ingo Molnar, Copyright (C) 2001
9  */
10 
11 #include <linux/mm.h>
12 #include <linux/slab.h>
13 #include <linux/kmemleak.h>
14 #include <linux/export.h>
15 #include <linux/mempool.h>
16 #include <linux/blkdev.h>
17 #include <linux/writeback.h>
18 
19 static void add_element(mempool_t *pool, void *element)
20 {
21 	BUG_ON(pool->curr_nr >= pool->min_nr);
22 	pool->elements[pool->curr_nr++] = element;
23 }
24 
25 static void *remove_element(mempool_t *pool)
26 {
27 	BUG_ON(pool->curr_nr <= 0);
28 	return pool->elements[--pool->curr_nr];
29 }
30 
31 /**
32  * mempool_destroy - deallocate a memory pool
33  * @pool:      pointer to the memory pool which was allocated via
34  *             mempool_create().
35  *
36  * Free all reserved elements in @pool and @pool itself.  This function
37  * only sleeps if the free_fn() function sleeps.
38  */
39 void mempool_destroy(mempool_t *pool)
40 {
41 	while (pool->curr_nr) {
42 		void *element = remove_element(pool);
43 		pool->free(element, pool->pool_data);
44 	}
45 	kfree(pool->elements);
46 	kfree(pool);
47 }
48 EXPORT_SYMBOL(mempool_destroy);
49 
50 /**
51  * mempool_create - create a memory pool
52  * @min_nr:    the minimum number of elements guaranteed to be
53  *             allocated for this pool.
54  * @alloc_fn:  user-defined element-allocation function.
55  * @free_fn:   user-defined element-freeing function.
56  * @pool_data: optional private data available to the user-defined functions.
57  *
58  * this function creates and allocates a guaranteed size, preallocated
59  * memory pool. The pool can be used from the mempool_alloc() and mempool_free()
60  * functions. This function might sleep. Both the alloc_fn() and the free_fn()
61  * functions might sleep - as long as the mempool_alloc() function is not called
62  * from IRQ contexts.
63  */
64 mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
65 				mempool_free_t *free_fn, void *pool_data)
66 {
67 	return mempool_create_node(min_nr,alloc_fn,free_fn, pool_data,
68 				   GFP_KERNEL, NUMA_NO_NODE);
69 }
70 EXPORT_SYMBOL(mempool_create);
71 
72 mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
73 			       mempool_free_t *free_fn, void *pool_data,
74 			       gfp_t gfp_mask, int node_id)
75 {
76 	mempool_t *pool;
77 	pool = kzalloc_node(sizeof(*pool), gfp_mask, node_id);
78 	if (!pool)
79 		return NULL;
80 	pool->elements = kmalloc_node(min_nr * sizeof(void *),
81 				      gfp_mask, node_id);
82 	if (!pool->elements) {
83 		kfree(pool);
84 		return NULL;
85 	}
86 	spin_lock_init(&pool->lock);
87 	pool->min_nr = min_nr;
88 	pool->pool_data = pool_data;
89 	init_waitqueue_head(&pool->wait);
90 	pool->alloc = alloc_fn;
91 	pool->free = free_fn;
92 
93 	/*
94 	 * First pre-allocate the guaranteed number of buffers.
95 	 */
96 	while (pool->curr_nr < pool->min_nr) {
97 		void *element;
98 
99 		element = pool->alloc(gfp_mask, pool->pool_data);
100 		if (unlikely(!element)) {
101 			mempool_destroy(pool);
102 			return NULL;
103 		}
104 		add_element(pool, element);
105 	}
106 	return pool;
107 }
108 EXPORT_SYMBOL(mempool_create_node);
109 
110 /**
111  * mempool_resize - resize an existing memory pool
112  * @pool:       pointer to the memory pool which was allocated via
113  *              mempool_create().
114  * @new_min_nr: the new minimum number of elements guaranteed to be
115  *              allocated for this pool.
116  * @gfp_mask:   the usual allocation bitmask.
117  *
118  * This function shrinks/grows the pool. In the case of growing,
119  * it cannot be guaranteed that the pool will be grown to the new
120  * size immediately, but new mempool_free() calls will refill it.
121  *
122  * Note, the caller must guarantee that no mempool_destroy is called
123  * while this function is running. mempool_alloc() & mempool_free()
124  * might be called (eg. from IRQ contexts) while this function executes.
125  */
126 int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask)
127 {
128 	void *element;
129 	void **new_elements;
130 	unsigned long flags;
131 
132 	BUG_ON(new_min_nr <= 0);
133 
134 	spin_lock_irqsave(&pool->lock, flags);
135 	if (new_min_nr <= pool->min_nr) {
136 		while (new_min_nr < pool->curr_nr) {
137 			element = remove_element(pool);
138 			spin_unlock_irqrestore(&pool->lock, flags);
139 			pool->free(element, pool->pool_data);
140 			spin_lock_irqsave(&pool->lock, flags);
141 		}
142 		pool->min_nr = new_min_nr;
143 		goto out_unlock;
144 	}
145 	spin_unlock_irqrestore(&pool->lock, flags);
146 
147 	/* Grow the pool */
148 	new_elements = kmalloc(new_min_nr * sizeof(*new_elements), gfp_mask);
149 	if (!new_elements)
150 		return -ENOMEM;
151 
152 	spin_lock_irqsave(&pool->lock, flags);
153 	if (unlikely(new_min_nr <= pool->min_nr)) {
154 		/* Raced, other resize will do our work */
155 		spin_unlock_irqrestore(&pool->lock, flags);
156 		kfree(new_elements);
157 		goto out;
158 	}
159 	memcpy(new_elements, pool->elements,
160 			pool->curr_nr * sizeof(*new_elements));
161 	kfree(pool->elements);
162 	pool->elements = new_elements;
163 	pool->min_nr = new_min_nr;
164 
165 	while (pool->curr_nr < pool->min_nr) {
166 		spin_unlock_irqrestore(&pool->lock, flags);
167 		element = pool->alloc(gfp_mask, pool->pool_data);
168 		if (!element)
169 			goto out;
170 		spin_lock_irqsave(&pool->lock, flags);
171 		if (pool->curr_nr < pool->min_nr) {
172 			add_element(pool, element);
173 		} else {
174 			spin_unlock_irqrestore(&pool->lock, flags);
175 			pool->free(element, pool->pool_data);	/* Raced */
176 			goto out;
177 		}
178 	}
179 out_unlock:
180 	spin_unlock_irqrestore(&pool->lock, flags);
181 out:
182 	return 0;
183 }
184 EXPORT_SYMBOL(mempool_resize);
185 
186 /**
187  * mempool_alloc - allocate an element from a specific memory pool
188  * @pool:      pointer to the memory pool which was allocated via
189  *             mempool_create().
190  * @gfp_mask:  the usual allocation bitmask.
191  *
192  * this function only sleeps if the alloc_fn() function sleeps or
193  * returns NULL. Note that due to preallocation, this function
194  * *never* fails when called from process contexts. (it might
195  * fail if called from an IRQ context.)
196  * Note: using __GFP_ZERO is not supported.
197  */
198 void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
199 {
200 	void *element;
201 	unsigned long flags;
202 	wait_queue_t wait;
203 	gfp_t gfp_temp;
204 
205 	VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO);
206 	might_sleep_if(gfp_mask & __GFP_WAIT);
207 
208 	gfp_mask |= __GFP_NOMEMALLOC;	/* don't allocate emergency reserves */
209 	gfp_mask |= __GFP_NORETRY;	/* don't loop in __alloc_pages */
210 	gfp_mask |= __GFP_NOWARN;	/* failures are OK */
211 
212 	gfp_temp = gfp_mask & ~(__GFP_WAIT|__GFP_IO);
213 
214 repeat_alloc:
215 
216 	element = pool->alloc(gfp_temp, pool->pool_data);
217 	if (likely(element != NULL))
218 		return element;
219 
220 	spin_lock_irqsave(&pool->lock, flags);
221 	if (likely(pool->curr_nr)) {
222 		element = remove_element(pool);
223 		spin_unlock_irqrestore(&pool->lock, flags);
224 		/* paired with rmb in mempool_free(), read comment there */
225 		smp_wmb();
226 		/*
227 		 * Update the allocation stack trace as this is more useful
228 		 * for debugging.
229 		 */
230 		kmemleak_update_trace(element);
231 		return element;
232 	}
233 
234 	/*
235 	 * We use gfp mask w/o __GFP_WAIT or IO for the first round.  If
236 	 * alloc failed with that and @pool was empty, retry immediately.
237 	 */
238 	if (gfp_temp != gfp_mask) {
239 		spin_unlock_irqrestore(&pool->lock, flags);
240 		gfp_temp = gfp_mask;
241 		goto repeat_alloc;
242 	}
243 
244 	/* We must not sleep if !__GFP_WAIT */
245 	if (!(gfp_mask & __GFP_WAIT)) {
246 		spin_unlock_irqrestore(&pool->lock, flags);
247 		return NULL;
248 	}
249 
250 	/* Let's wait for someone else to return an element to @pool */
251 	init_wait(&wait);
252 	prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE);
253 
254 	spin_unlock_irqrestore(&pool->lock, flags);
255 
256 	/*
257 	 * FIXME: this should be io_schedule().  The timeout is there as a
258 	 * workaround for some DM problems in 2.6.18.
259 	 */
260 	io_schedule_timeout(5*HZ);
261 
262 	finish_wait(&pool->wait, &wait);
263 	goto repeat_alloc;
264 }
265 EXPORT_SYMBOL(mempool_alloc);
266 
267 /**
268  * mempool_free - return an element to the pool.
269  * @element:   pool element pointer.
270  * @pool:      pointer to the memory pool which was allocated via
271  *             mempool_create().
272  *
273  * this function only sleeps if the free_fn() function sleeps.
274  */
275 void mempool_free(void *element, mempool_t *pool)
276 {
277 	unsigned long flags;
278 
279 	if (unlikely(element == NULL))
280 		return;
281 
282 	/*
283 	 * Paired with the wmb in mempool_alloc().  The preceding read is
284 	 * for @element and the following @pool->curr_nr.  This ensures
285 	 * that the visible value of @pool->curr_nr is from after the
286 	 * allocation of @element.  This is necessary for fringe cases
287 	 * where @element was passed to this task without going through
288 	 * barriers.
289 	 *
290 	 * For example, assume @p is %NULL at the beginning and one task
291 	 * performs "p = mempool_alloc(...);" while another task is doing
292 	 * "while (!p) cpu_relax(); mempool_free(p, ...);".  This function
293 	 * may end up using curr_nr value which is from before allocation
294 	 * of @p without the following rmb.
295 	 */
296 	smp_rmb();
297 
298 	/*
299 	 * For correctness, we need a test which is guaranteed to trigger
300 	 * if curr_nr + #allocated == min_nr.  Testing curr_nr < min_nr
301 	 * without locking achieves that and refilling as soon as possible
302 	 * is desirable.
303 	 *
304 	 * Because curr_nr visible here is always a value after the
305 	 * allocation of @element, any task which decremented curr_nr below
306 	 * min_nr is guaranteed to see curr_nr < min_nr unless curr_nr gets
307 	 * incremented to min_nr afterwards.  If curr_nr gets incremented
308 	 * to min_nr after the allocation of @element, the elements
309 	 * allocated after that are subject to the same guarantee.
310 	 *
311 	 * Waiters happen iff curr_nr is 0 and the above guarantee also
312 	 * ensures that there will be frees which return elements to the
313 	 * pool waking up the waiters.
314 	 */
315 	if (unlikely(pool->curr_nr < pool->min_nr)) {
316 		spin_lock_irqsave(&pool->lock, flags);
317 		if (likely(pool->curr_nr < pool->min_nr)) {
318 			add_element(pool, element);
319 			spin_unlock_irqrestore(&pool->lock, flags);
320 			wake_up(&pool->wait);
321 			return;
322 		}
323 		spin_unlock_irqrestore(&pool->lock, flags);
324 	}
325 	pool->free(element, pool->pool_data);
326 }
327 EXPORT_SYMBOL(mempool_free);
328 
329 /*
330  * A commonly used alloc and free fn.
331  */
332 void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data)
333 {
334 	struct kmem_cache *mem = pool_data;
335 	return kmem_cache_alloc(mem, gfp_mask);
336 }
337 EXPORT_SYMBOL(mempool_alloc_slab);
338 
339 void mempool_free_slab(void *element, void *pool_data)
340 {
341 	struct kmem_cache *mem = pool_data;
342 	kmem_cache_free(mem, element);
343 }
344 EXPORT_SYMBOL(mempool_free_slab);
345 
346 /*
347  * A commonly used alloc and free fn that kmalloc/kfrees the amount of memory
348  * specified by pool_data
349  */
350 void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data)
351 {
352 	size_t size = (size_t)pool_data;
353 	return kmalloc(size, gfp_mask);
354 }
355 EXPORT_SYMBOL(mempool_kmalloc);
356 
357 void mempool_kfree(void *element, void *pool_data)
358 {
359 	kfree(element);
360 }
361 EXPORT_SYMBOL(mempool_kfree);
362 
363 /*
364  * A simple mempool-backed page allocator that allocates pages
365  * of the order specified by pool_data.
366  */
367 void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data)
368 {
369 	int order = (int)(long)pool_data;
370 	return alloc_pages(gfp_mask, order);
371 }
372 EXPORT_SYMBOL(mempool_alloc_pages);
373 
374 void mempool_free_pages(void *element, void *pool_data)
375 {
376 	int order = (int)(long)pool_data;
377 	__free_pages(element, order);
378 }
379 EXPORT_SYMBOL(mempool_free_pages);
380