xref: /openbmc/linux/drivers/md/dm-delay.c (revision b3c9a04135bdbd3aabd5e9534bad0fe6df505f8a)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2005-2007 Red Hat GmbH
4  *
5  * A target that delays reads and/or writes and can send
6  * them to different devices.
7  *
8  * This file is released under the GPL.
9  */
10 
11 #include <linux/module.h>
12 #include <linux/init.h>
13 #include <linux/blkdev.h>
14 #include <linux/bio.h>
15 #include <linux/slab.h>
16 
17 #include <linux/device-mapper.h>
18 
19 #define DM_MSG_PREFIX "delay"
20 
21 struct delay_class {
22 	struct dm_dev *dev;
23 	sector_t start;
24 	unsigned int delay;
25 	unsigned int ops;
26 };
27 
28 struct delay_c {
29 	struct timer_list delay_timer;
30 	struct mutex timer_lock;
31 	struct workqueue_struct *kdelayd_wq;
32 	struct work_struct flush_expired_bios;
33 	struct list_head delayed_bios;
34 	atomic_t may_delay;
35 
36 	struct delay_class read;
37 	struct delay_class write;
38 	struct delay_class flush;
39 
40 	int argc;
41 };
42 
43 struct dm_delay_info {
44 	struct delay_c *context;
45 	struct delay_class *class;
46 	struct list_head list;
47 	unsigned long expires;
48 };
49 
50 static DEFINE_MUTEX(delayed_bios_lock);
51 
52 static void handle_delayed_timer(struct timer_list *t)
53 {
54 	struct delay_c *dc = from_timer(dc, t, delay_timer);
55 
56 	queue_work(dc->kdelayd_wq, &dc->flush_expired_bios);
57 }
58 
59 static void queue_timeout(struct delay_c *dc, unsigned long expires)
60 {
61 	mutex_lock(&dc->timer_lock);
62 
63 	if (!timer_pending(&dc->delay_timer) || expires < dc->delay_timer.expires)
64 		mod_timer(&dc->delay_timer, expires);
65 
66 	mutex_unlock(&dc->timer_lock);
67 }
68 
69 static void flush_bios(struct bio *bio)
70 {
71 	struct bio *n;
72 
73 	while (bio) {
74 		n = bio->bi_next;
75 		bio->bi_next = NULL;
76 		dm_submit_bio_remap(bio, NULL);
77 		bio = n;
78 	}
79 }
80 
81 static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all)
82 {
83 	struct dm_delay_info *delayed, *next;
84 	unsigned long next_expires = 0;
85 	unsigned long start_timer = 0;
86 	struct bio_list flush_bios = { };
87 
88 	mutex_lock(&delayed_bios_lock);
89 	list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
90 		if (flush_all || time_after_eq(jiffies, delayed->expires)) {
91 			struct bio *bio = dm_bio_from_per_bio_data(delayed,
92 						sizeof(struct dm_delay_info));
93 			list_del(&delayed->list);
94 			bio_list_add(&flush_bios, bio);
95 			delayed->class->ops--;
96 			continue;
97 		}
98 
99 		if (!start_timer) {
100 			start_timer = 1;
101 			next_expires = delayed->expires;
102 		} else
103 			next_expires = min(next_expires, delayed->expires);
104 	}
105 	mutex_unlock(&delayed_bios_lock);
106 
107 	if (start_timer)
108 		queue_timeout(dc, next_expires);
109 
110 	return bio_list_get(&flush_bios);
111 }
112 
113 static void flush_expired_bios(struct work_struct *work)
114 {
115 	struct delay_c *dc;
116 
117 	dc = container_of(work, struct delay_c, flush_expired_bios);
118 	flush_bios(flush_delayed_bios(dc, 0));
119 }
120 
121 static void delay_dtr(struct dm_target *ti)
122 {
123 	struct delay_c *dc = ti->private;
124 
125 	if (dc->kdelayd_wq)
126 		destroy_workqueue(dc->kdelayd_wq);
127 
128 	if (dc->read.dev)
129 		dm_put_device(ti, dc->read.dev);
130 	if (dc->write.dev)
131 		dm_put_device(ti, dc->write.dev);
132 	if (dc->flush.dev)
133 		dm_put_device(ti, dc->flush.dev);
134 
135 	mutex_destroy(&dc->timer_lock);
136 
137 	kfree(dc);
138 }
139 
140 static int delay_class_ctr(struct dm_target *ti, struct delay_class *c, char **argv)
141 {
142 	int ret;
143 	unsigned long long tmpll;
144 	char dummy;
145 
146 	if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1 || tmpll != (sector_t)tmpll) {
147 		ti->error = "Invalid device sector";
148 		return -EINVAL;
149 	}
150 	c->start = tmpll;
151 
152 	if (sscanf(argv[2], "%u%c", &c->delay, &dummy) != 1) {
153 		ti->error = "Invalid delay";
154 		return -EINVAL;
155 	}
156 
157 	ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &c->dev);
158 	if (ret) {
159 		ti->error = "Device lookup failed";
160 		return ret;
161 	}
162 
163 	return 0;
164 }
165 
166 /*
167  * Mapping parameters:
168  *    <device> <offset> <delay> [<write_device> <write_offset> <write_delay>]
169  *
170  * With separate write parameters, the first set is only used for reads.
171  * Offsets are specified in sectors.
172  * Delays are specified in milliseconds.
173  */
174 static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
175 {
176 	struct delay_c *dc;
177 	int ret;
178 
179 	if (argc != 3 && argc != 6 && argc != 9) {
180 		ti->error = "Requires exactly 3, 6 or 9 arguments";
181 		return -EINVAL;
182 	}
183 
184 	dc = kzalloc(sizeof(*dc), GFP_KERNEL);
185 	if (!dc) {
186 		ti->error = "Cannot allocate context";
187 		return -ENOMEM;
188 	}
189 
190 	ti->private = dc;
191 	timer_setup(&dc->delay_timer, handle_delayed_timer, 0);
192 	INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
193 	INIT_LIST_HEAD(&dc->delayed_bios);
194 	mutex_init(&dc->timer_lock);
195 	atomic_set(&dc->may_delay, 1);
196 	dc->argc = argc;
197 
198 	ret = delay_class_ctr(ti, &dc->read, argv);
199 	if (ret)
200 		goto bad;
201 
202 	if (argc == 3) {
203 		ret = delay_class_ctr(ti, &dc->write, argv);
204 		if (ret)
205 			goto bad;
206 		ret = delay_class_ctr(ti, &dc->flush, argv);
207 		if (ret)
208 			goto bad;
209 		goto out;
210 	}
211 
212 	ret = delay_class_ctr(ti, &dc->write, argv + 3);
213 	if (ret)
214 		goto bad;
215 	if (argc == 6) {
216 		ret = delay_class_ctr(ti, &dc->flush, argv + 3);
217 		if (ret)
218 			goto bad;
219 		goto out;
220 	}
221 
222 	ret = delay_class_ctr(ti, &dc->flush, argv + 6);
223 	if (ret)
224 		goto bad;
225 
226 out:
227 	dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
228 	if (!dc->kdelayd_wq) {
229 		ret = -EINVAL;
230 		DMERR("Couldn't start kdelayd");
231 		goto bad;
232 	}
233 
234 	ti->num_flush_bios = 1;
235 	ti->num_discard_bios = 1;
236 	ti->accounts_remapped_io = true;
237 	ti->per_io_data_size = sizeof(struct dm_delay_info);
238 	return 0;
239 
240 bad:
241 	delay_dtr(ti);
242 	return ret;
243 }
244 
245 static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
246 {
247 	struct dm_delay_info *delayed;
248 	unsigned long expires = 0;
249 
250 	if (!c->delay || !atomic_read(&dc->may_delay))
251 		return DM_MAPIO_REMAPPED;
252 
253 	delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info));
254 
255 	delayed->context = dc;
256 	delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay);
257 
258 	mutex_lock(&delayed_bios_lock);
259 	c->ops++;
260 	list_add_tail(&delayed->list, &dc->delayed_bios);
261 	mutex_unlock(&delayed_bios_lock);
262 
263 	queue_timeout(dc, expires);
264 
265 	return DM_MAPIO_SUBMITTED;
266 }
267 
268 static void delay_presuspend(struct dm_target *ti)
269 {
270 	struct delay_c *dc = ti->private;
271 
272 	atomic_set(&dc->may_delay, 0);
273 	del_timer_sync(&dc->delay_timer);
274 	flush_bios(flush_delayed_bios(dc, 1));
275 }
276 
277 static void delay_resume(struct dm_target *ti)
278 {
279 	struct delay_c *dc = ti->private;
280 
281 	atomic_set(&dc->may_delay, 1);
282 }
283 
284 static int delay_map(struct dm_target *ti, struct bio *bio)
285 {
286 	struct delay_c *dc = ti->private;
287 	struct delay_class *c;
288 	struct dm_delay_info *delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info));
289 
290 	if (bio_data_dir(bio) == WRITE) {
291 		if (unlikely(bio->bi_opf & REQ_PREFLUSH))
292 			c = &dc->flush;
293 		else
294 			c = &dc->write;
295 	} else {
296 		c = &dc->read;
297 	}
298 	delayed->class = c;
299 	bio_set_dev(bio, c->dev->bdev);
300 	bio->bi_iter.bi_sector = c->start + dm_target_offset(ti, bio->bi_iter.bi_sector);
301 
302 	return delay_bio(dc, c, bio);
303 }
304 
305 #define DMEMIT_DELAY_CLASS(c) \
306 	DMEMIT("%s %llu %u", (c)->dev->name, (unsigned long long)(c)->start, (c)->delay)
307 
308 static void delay_status(struct dm_target *ti, status_type_t type,
309 			 unsigned int status_flags, char *result, unsigned int maxlen)
310 {
311 	struct delay_c *dc = ti->private;
312 	int sz = 0;
313 
314 	switch (type) {
315 	case STATUSTYPE_INFO:
316 		DMEMIT("%u %u %u", dc->read.ops, dc->write.ops, dc->flush.ops);
317 		break;
318 
319 	case STATUSTYPE_TABLE:
320 		DMEMIT_DELAY_CLASS(&dc->read);
321 		if (dc->argc >= 6) {
322 			DMEMIT(" ");
323 			DMEMIT_DELAY_CLASS(&dc->write);
324 		}
325 		if (dc->argc >= 9) {
326 			DMEMIT(" ");
327 			DMEMIT_DELAY_CLASS(&dc->flush);
328 		}
329 		break;
330 
331 	case STATUSTYPE_IMA:
332 		*result = '\0';
333 		break;
334 	}
335 }
336 
337 static int delay_iterate_devices(struct dm_target *ti,
338 				 iterate_devices_callout_fn fn, void *data)
339 {
340 	struct delay_c *dc = ti->private;
341 	int ret = 0;
342 
343 	ret = fn(ti, dc->read.dev, dc->read.start, ti->len, data);
344 	if (ret)
345 		goto out;
346 	ret = fn(ti, dc->write.dev, dc->write.start, ti->len, data);
347 	if (ret)
348 		goto out;
349 	ret = fn(ti, dc->flush.dev, dc->flush.start, ti->len, data);
350 	if (ret)
351 		goto out;
352 
353 out:
354 	return ret;
355 }
356 
357 static struct target_type delay_target = {
358 	.name	     = "delay",
359 	.version     = {1, 3, 0},
360 	.features    = DM_TARGET_PASSES_INTEGRITY,
361 	.module      = THIS_MODULE,
362 	.ctr	     = delay_ctr,
363 	.dtr	     = delay_dtr,
364 	.map	     = delay_map,
365 	.presuspend  = delay_presuspend,
366 	.resume	     = delay_resume,
367 	.status	     = delay_status,
368 	.iterate_devices = delay_iterate_devices,
369 };
370 
371 static int __init dm_delay_init(void)
372 {
373 	int r;
374 
375 	r = dm_register_target(&delay_target);
376 	if (r < 0) {
377 		DMERR("register failed %d", r);
378 		goto bad_register;
379 	}
380 
381 	return 0;
382 
383 bad_register:
384 	return r;
385 }
386 
387 static void __exit dm_delay_exit(void)
388 {
389 	dm_unregister_target(&delay_target);
390 }
391 
392 /* Module hooks */
393 module_init(dm_delay_init);
394 module_exit(dm_delay_exit);
395 
396 MODULE_DESCRIPTION(DM_NAME " delay target");
397 MODULE_AUTHOR("Heinz Mauelshagen <mauelshagen@redhat.com>");
398 MODULE_LICENSE("GPL");
399