xref: /openbmc/linux/drivers/md/dm-bio-prison-v1.c (revision 3f8d3f5432078a558151e27230e20bcf93c23ffe)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2012 Red Hat, Inc.
4  *
5  * This file is released under the GPL.
6  */
7 
8 #include "dm.h"
9 #include "dm-bio-prison-v1.h"
10 #include "dm-bio-prison-v2.h"
11 
12 #include <linux/spinlock.h>
13 #include <linux/mempool.h>
14 #include <linux/module.h>
15 #include <linux/slab.h>
16 
17 /*----------------------------------------------------------------*/
18 
19 #define NR_LOCKS 64
20 #define LOCK_MASK (NR_LOCKS - 1)
21 #define MIN_CELLS 1024
22 
23 struct prison_region {
24 	spinlock_t lock;
25 	struct rb_root cell;
26 } ____cacheline_aligned_in_smp;
27 
28 struct dm_bio_prison {
29 	struct prison_region regions[NR_LOCKS];
30 	mempool_t cell_pool;
31 };
32 
33 static struct kmem_cache *_cell_cache;
34 
35 /*----------------------------------------------------------------*/
36 
37 /*
38  * @nr_cells should be the number of cells you want in use _concurrently_.
39  * Don't confuse it with the number of distinct keys.
40  */
41 struct dm_bio_prison *dm_bio_prison_create(void)
42 {
43 	int ret;
44 	unsigned i;
45 	struct dm_bio_prison *prison = kzalloc(sizeof(*prison), GFP_KERNEL);
46 
47 	if (!prison)
48 		return NULL;
49 
50 	for (i = 0; i < NR_LOCKS; i++) {
51 		spin_lock_init(&prison->regions[i].lock);
52 		prison->regions[i].cell = RB_ROOT;
53 	}
54 
55 	ret = mempool_init_slab_pool(&prison->cell_pool, MIN_CELLS, _cell_cache);
56 	if (ret) {
57 		kfree(prison);
58 		return NULL;
59 	}
60 
61 	return prison;
62 }
63 EXPORT_SYMBOL_GPL(dm_bio_prison_create);
64 
65 void dm_bio_prison_destroy(struct dm_bio_prison *prison)
66 {
67 	mempool_exit(&prison->cell_pool);
68 	kfree(prison);
69 }
70 EXPORT_SYMBOL_GPL(dm_bio_prison_destroy);
71 
72 struct dm_bio_prison_cell *dm_bio_prison_alloc_cell(struct dm_bio_prison *prison, gfp_t gfp)
73 {
74 	return mempool_alloc(&prison->cell_pool, gfp);
75 }
76 EXPORT_SYMBOL_GPL(dm_bio_prison_alloc_cell);
77 
78 void dm_bio_prison_free_cell(struct dm_bio_prison *prison,
79 			     struct dm_bio_prison_cell *cell)
80 {
81 	mempool_free(cell, &prison->cell_pool);
82 }
83 EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell);
84 
85 static void __setup_new_cell(struct dm_cell_key *key,
86 			     struct bio *holder,
87 			     struct dm_bio_prison_cell *cell)
88 {
89 	memcpy(&cell->key, key, sizeof(cell->key));
90 	cell->holder = holder;
91 	bio_list_init(&cell->bios);
92 }
93 
94 static int cmp_keys(struct dm_cell_key *lhs,
95 		    struct dm_cell_key *rhs)
96 {
97 	if (lhs->virtual < rhs->virtual)
98 		return -1;
99 
100 	if (lhs->virtual > rhs->virtual)
101 		return 1;
102 
103 	if (lhs->dev < rhs->dev)
104 		return -1;
105 
106 	if (lhs->dev > rhs->dev)
107 		return 1;
108 
109 	if (lhs->block_end <= rhs->block_begin)
110 		return -1;
111 
112 	if (lhs->block_begin >= rhs->block_end)
113 		return 1;
114 
115 	return 0;
116 }
117 
118 static unsigned lock_nr(struct dm_cell_key *key)
119 {
120 	return (key->block_begin >> BIO_PRISON_MAX_RANGE_SHIFT) & LOCK_MASK;
121 }
122 
123 bool dm_cell_key_has_valid_range(struct dm_cell_key *key)
124 {
125 	if (WARN_ON_ONCE(key->block_end - key->block_begin > BIO_PRISON_MAX_RANGE))
126 		return false;
127 	if (WARN_ON_ONCE((key->block_begin >> BIO_PRISON_MAX_RANGE_SHIFT) !=
128 			 (key->block_end - 1) >> BIO_PRISON_MAX_RANGE_SHIFT))
129 		return false;
130 
131 	return true;
132 }
133 EXPORT_SYMBOL(dm_cell_key_has_valid_range);
134 
135 static int __bio_detain(struct rb_root *root,
136 			struct dm_cell_key *key,
137 			struct bio *inmate,
138 			struct dm_bio_prison_cell *cell_prealloc,
139 			struct dm_bio_prison_cell **cell_result)
140 {
141 	int r;
142 	struct rb_node **new = &root->rb_node, *parent = NULL;
143 
144 	while (*new) {
145 		struct dm_bio_prison_cell *cell =
146 			rb_entry(*new, struct dm_bio_prison_cell, node);
147 
148 		r = cmp_keys(key, &cell->key);
149 
150 		parent = *new;
151 		if (r < 0)
152 			new = &((*new)->rb_left);
153 		else if (r > 0)
154 			new = &((*new)->rb_right);
155 		else {
156 			if (inmate)
157 				bio_list_add(&cell->bios, inmate);
158 			*cell_result = cell;
159 			return 1;
160 		}
161 	}
162 
163 	__setup_new_cell(key, inmate, cell_prealloc);
164 	*cell_result = cell_prealloc;
165 
166 	rb_link_node(&cell_prealloc->node, parent, new);
167 	rb_insert_color(&cell_prealloc->node, root);
168 
169 	return 0;
170 }
171 
172 static int bio_detain(struct dm_bio_prison *prison,
173 		      struct dm_cell_key *key,
174 		      struct bio *inmate,
175 		      struct dm_bio_prison_cell *cell_prealloc,
176 		      struct dm_bio_prison_cell **cell_result)
177 {
178 	int r;
179 	unsigned l = lock_nr(key);
180 
181 	spin_lock_irq(&prison->regions[l].lock);
182 	r = __bio_detain(&prison->regions[l].cell, key, inmate, cell_prealloc, cell_result);
183 	spin_unlock_irq(&prison->regions[l].lock);
184 
185 	return r;
186 }
187 
188 int dm_bio_detain(struct dm_bio_prison *prison,
189 		  struct dm_cell_key *key,
190 		  struct bio *inmate,
191 		  struct dm_bio_prison_cell *cell_prealloc,
192 		  struct dm_bio_prison_cell **cell_result)
193 {
194 	return bio_detain(prison, key, inmate, cell_prealloc, cell_result);
195 }
196 EXPORT_SYMBOL_GPL(dm_bio_detain);
197 
198 int dm_get_cell(struct dm_bio_prison *prison,
199 		struct dm_cell_key *key,
200 		struct dm_bio_prison_cell *cell_prealloc,
201 		struct dm_bio_prison_cell **cell_result)
202 {
203 	return bio_detain(prison, key, NULL, cell_prealloc, cell_result);
204 }
205 EXPORT_SYMBOL_GPL(dm_get_cell);
206 
207 /*
208  * @inmates must have been initialised prior to this call
209  */
210 static void __cell_release(struct rb_root *root,
211 			   struct dm_bio_prison_cell *cell,
212 			   struct bio_list *inmates)
213 {
214 	rb_erase(&cell->node, root);
215 
216 	if (inmates) {
217 		if (cell->holder)
218 			bio_list_add(inmates, cell->holder);
219 		bio_list_merge(inmates, &cell->bios);
220 	}
221 }
222 
223 void dm_cell_release(struct dm_bio_prison *prison,
224 		     struct dm_bio_prison_cell *cell,
225 		     struct bio_list *bios)
226 {
227 	unsigned l = lock_nr(&cell->key);
228 
229 	spin_lock_irq(&prison->regions[l].lock);
230 	__cell_release(&prison->regions[l].cell, cell, bios);
231 	spin_unlock_irq(&prison->regions[l].lock);
232 }
233 EXPORT_SYMBOL_GPL(dm_cell_release);
234 
235 /*
236  * Sometimes we don't want the holder, just the additional bios.
237  */
238 static void __cell_release_no_holder(struct rb_root *root,
239 				     struct dm_bio_prison_cell *cell,
240 				     struct bio_list *inmates)
241 {
242 	rb_erase(&cell->node, root);
243 	bio_list_merge(inmates, &cell->bios);
244 }
245 
246 void dm_cell_release_no_holder(struct dm_bio_prison *prison,
247 			       struct dm_bio_prison_cell *cell,
248 			       struct bio_list *inmates)
249 {
250 	unsigned l = lock_nr(&cell->key);
251 	unsigned long flags;
252 
253 	spin_lock_irqsave(&prison->regions[l].lock, flags);
254 	__cell_release_no_holder(&prison->regions[l].cell, cell, inmates);
255 	spin_unlock_irqrestore(&prison->regions[l].lock, flags);
256 }
257 EXPORT_SYMBOL_GPL(dm_cell_release_no_holder);
258 
259 void dm_cell_error(struct dm_bio_prison *prison,
260 		   struct dm_bio_prison_cell *cell, blk_status_t error)
261 {
262 	struct bio_list bios;
263 	struct bio *bio;
264 
265 	bio_list_init(&bios);
266 	dm_cell_release(prison, cell, &bios);
267 
268 	while ((bio = bio_list_pop(&bios))) {
269 		bio->bi_status = error;
270 		bio_endio(bio);
271 	}
272 }
273 EXPORT_SYMBOL_GPL(dm_cell_error);
274 
275 void dm_cell_visit_release(struct dm_bio_prison *prison,
276 			   void (*visit_fn)(void *, struct dm_bio_prison_cell *),
277 			   void *context,
278 			   struct dm_bio_prison_cell *cell)
279 {
280 	unsigned l = lock_nr(&cell->key);
281 	spin_lock_irq(&prison->regions[l].lock);
282 	visit_fn(context, cell);
283 	rb_erase(&cell->node, &prison->regions[l].cell);
284 	spin_unlock_irq(&prison->regions[l].lock);
285 }
286 EXPORT_SYMBOL_GPL(dm_cell_visit_release);
287 
288 static int __promote_or_release(struct rb_root *root,
289 				struct dm_bio_prison_cell *cell)
290 {
291 	if (bio_list_empty(&cell->bios)) {
292 		rb_erase(&cell->node, root);
293 		return 1;
294 	}
295 
296 	cell->holder = bio_list_pop(&cell->bios);
297 	return 0;
298 }
299 
300 int dm_cell_promote_or_release(struct dm_bio_prison *prison,
301 			       struct dm_bio_prison_cell *cell)
302 {
303 	int r;
304 	unsigned l = lock_nr(&cell->key);
305 
306 	spin_lock_irq(&prison->regions[l].lock);
307 	r = __promote_or_release(&prison->regions[l].cell, cell);
308 	spin_unlock_irq(&prison->regions[l].lock);
309 
310 	return r;
311 }
312 EXPORT_SYMBOL_GPL(dm_cell_promote_or_release);
313 
314 /*----------------------------------------------------------------*/
315 
316 #define DEFERRED_SET_SIZE 64
317 
318 struct dm_deferred_entry {
319 	struct dm_deferred_set *ds;
320 	unsigned int count;
321 	struct list_head work_items;
322 };
323 
324 struct dm_deferred_set {
325 	spinlock_t lock;
326 	unsigned int current_entry;
327 	unsigned int sweeper;
328 	struct dm_deferred_entry entries[DEFERRED_SET_SIZE];
329 };
330 
331 struct dm_deferred_set *dm_deferred_set_create(void)
332 {
333 	int i;
334 	struct dm_deferred_set *ds;
335 
336 	ds = kmalloc(sizeof(*ds), GFP_KERNEL);
337 	if (!ds)
338 		return NULL;
339 
340 	spin_lock_init(&ds->lock);
341 	ds->current_entry = 0;
342 	ds->sweeper = 0;
343 	for (i = 0; i < DEFERRED_SET_SIZE; i++) {
344 		ds->entries[i].ds = ds;
345 		ds->entries[i].count = 0;
346 		INIT_LIST_HEAD(&ds->entries[i].work_items);
347 	}
348 
349 	return ds;
350 }
351 EXPORT_SYMBOL_GPL(dm_deferred_set_create);
352 
353 void dm_deferred_set_destroy(struct dm_deferred_set *ds)
354 {
355 	kfree(ds);
356 }
357 EXPORT_SYMBOL_GPL(dm_deferred_set_destroy);
358 
359 struct dm_deferred_entry *dm_deferred_entry_inc(struct dm_deferred_set *ds)
360 {
361 	unsigned long flags;
362 	struct dm_deferred_entry *entry;
363 
364 	spin_lock_irqsave(&ds->lock, flags);
365 	entry = ds->entries + ds->current_entry;
366 	entry->count++;
367 	spin_unlock_irqrestore(&ds->lock, flags);
368 
369 	return entry;
370 }
371 EXPORT_SYMBOL_GPL(dm_deferred_entry_inc);
372 
373 static unsigned int ds_next(unsigned int index)
374 {
375 	return (index + 1) % DEFERRED_SET_SIZE;
376 }
377 
378 static void __sweep(struct dm_deferred_set *ds, struct list_head *head)
379 {
380 	while ((ds->sweeper != ds->current_entry) &&
381 	       !ds->entries[ds->sweeper].count) {
382 		list_splice_init(&ds->entries[ds->sweeper].work_items, head);
383 		ds->sweeper = ds_next(ds->sweeper);
384 	}
385 
386 	if ((ds->sweeper == ds->current_entry) && !ds->entries[ds->sweeper].count)
387 		list_splice_init(&ds->entries[ds->sweeper].work_items, head);
388 }
389 
390 void dm_deferred_entry_dec(struct dm_deferred_entry *entry, struct list_head *head)
391 {
392 	unsigned long flags;
393 
394 	spin_lock_irqsave(&entry->ds->lock, flags);
395 	BUG_ON(!entry->count);
396 	--entry->count;
397 	__sweep(entry->ds, head);
398 	spin_unlock_irqrestore(&entry->ds->lock, flags);
399 }
400 EXPORT_SYMBOL_GPL(dm_deferred_entry_dec);
401 
402 /*
403  * Returns 1 if deferred or 0 if no pending items to delay job.
404  */
405 int dm_deferred_set_add_work(struct dm_deferred_set *ds, struct list_head *work)
406 {
407 	int r = 1;
408 	unsigned int next_entry;
409 
410 	spin_lock_irq(&ds->lock);
411 	if ((ds->sweeper == ds->current_entry) &&
412 	    !ds->entries[ds->current_entry].count)
413 		r = 0;
414 	else {
415 		list_add(work, &ds->entries[ds->current_entry].work_items);
416 		next_entry = ds_next(ds->current_entry);
417 		if (!ds->entries[next_entry].count)
418 			ds->current_entry = next_entry;
419 	}
420 	spin_unlock_irq(&ds->lock);
421 
422 	return r;
423 }
424 EXPORT_SYMBOL_GPL(dm_deferred_set_add_work);
425 
426 /*----------------------------------------------------------------*/
427 
428 static int __init dm_bio_prison_init_v1(void)
429 {
430 	_cell_cache = KMEM_CACHE(dm_bio_prison_cell, 0);
431 	if (!_cell_cache)
432 		return -ENOMEM;
433 
434 	return 0;
435 }
436 
437 static void dm_bio_prison_exit_v1(void)
438 {
439 	kmem_cache_destroy(_cell_cache);
440 	_cell_cache = NULL;
441 }
442 
443 static int (*_inits[])(void) __initdata = {
444 	dm_bio_prison_init_v1,
445 	dm_bio_prison_init_v2,
446 };
447 
448 static void (*_exits[])(void) = {
449 	dm_bio_prison_exit_v1,
450 	dm_bio_prison_exit_v2,
451 };
452 
453 static int __init dm_bio_prison_init(void)
454 {
455 	const int count = ARRAY_SIZE(_inits);
456 
457 	int r, i;
458 
459 	for (i = 0; i < count; i++) {
460 		r = _inits[i]();
461 		if (r)
462 			goto bad;
463 	}
464 
465 	return 0;
466 
467 bad:
468 	while (i--)
469 		_exits[i]();
470 
471 	return r;
472 }
473 
474 static void __exit dm_bio_prison_exit(void)
475 {
476 	int i = ARRAY_SIZE(_exits);
477 
478 	while (i--)
479 		_exits[i]();
480 }
481 
482 /*
483  * module hooks
484  */
485 module_init(dm_bio_prison_init);
486 module_exit(dm_bio_prison_exit);
487 
488 MODULE_DESCRIPTION(DM_NAME " bio prison");
489 MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
490 MODULE_LICENSE("GPL");
491