xref: /openbmc/linux/drivers/md/dm-delay.c (revision 9b1623f9609f3fa0a5d02ecd3a2f81164dc4d043)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2005-2007 Red Hat GmbH
4  *
5  * A target that delays reads and/or writes and can send
6  * them to different devices.
7  *
8  * This file is released under the GPL.
9  */
10 
11 #include <linux/module.h>
12 #include <linux/init.h>
13 #include <linux/blkdev.h>
14 #include <linux/bio.h>
15 #include <linux/slab.h>
16 
17 #include <linux/device-mapper.h>
18 
19 #define DM_MSG_PREFIX "delay"
20 
21 struct delay_class {
22 	struct dm_dev *dev;
23 	sector_t start;
24 	unsigned int delay;
25 	unsigned int ops;
26 };
27 
28 struct delay_c {
29 	struct timer_list delay_timer;
30 	struct mutex timer_lock;
31 	struct workqueue_struct *kdelayd_wq;
32 	struct work_struct flush_expired_bios;
33 	struct list_head delayed_bios;
34 	bool may_delay;
35 
36 	struct delay_class read;
37 	struct delay_class write;
38 	struct delay_class flush;
39 
40 	int argc;
41 };
42 
43 struct dm_delay_info {
44 	struct delay_c *context;
45 	struct delay_class *class;
46 	struct list_head list;
47 	unsigned long expires;
48 };
49 
50 static DEFINE_MUTEX(delayed_bios_lock);
51 
52 static void handle_delayed_timer(struct timer_list *t)
53 {
54 	struct delay_c *dc = from_timer(dc, t, delay_timer);
55 
56 	queue_work(dc->kdelayd_wq, &dc->flush_expired_bios);
57 }
58 
59 static void queue_timeout(struct delay_c *dc, unsigned long expires)
60 {
61 	mutex_lock(&dc->timer_lock);
62 
63 	if (!timer_pending(&dc->delay_timer) || expires < dc->delay_timer.expires)
64 		mod_timer(&dc->delay_timer, expires);
65 
66 	mutex_unlock(&dc->timer_lock);
67 }
68 
69 static void flush_bios(struct bio *bio)
70 {
71 	struct bio *n;
72 
73 	while (bio) {
74 		n = bio->bi_next;
75 		bio->bi_next = NULL;
76 		dm_submit_bio_remap(bio, NULL);
77 		bio = n;
78 	}
79 }
80 
81 static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all)
82 {
83 	struct dm_delay_info *delayed, *next;
84 	unsigned long next_expires = 0;
85 	unsigned long start_timer = 0;
86 	struct bio_list flush_bios = { };
87 
88 	mutex_lock(&delayed_bios_lock);
89 	list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
90 		if (flush_all || time_after_eq(jiffies, delayed->expires)) {
91 			struct bio *bio = dm_bio_from_per_bio_data(delayed,
92 						sizeof(struct dm_delay_info));
93 			list_del(&delayed->list);
94 			bio_list_add(&flush_bios, bio);
95 			delayed->class->ops--;
96 			continue;
97 		}
98 
99 		if (!start_timer) {
100 			start_timer = 1;
101 			next_expires = delayed->expires;
102 		} else
103 			next_expires = min(next_expires, delayed->expires);
104 	}
105 	mutex_unlock(&delayed_bios_lock);
106 
107 	if (start_timer)
108 		queue_timeout(dc, next_expires);
109 
110 	return bio_list_get(&flush_bios);
111 }
112 
113 static void flush_expired_bios(struct work_struct *work)
114 {
115 	struct delay_c *dc;
116 
117 	dc = container_of(work, struct delay_c, flush_expired_bios);
118 	flush_bios(flush_delayed_bios(dc, 0));
119 }
120 
121 static void delay_dtr(struct dm_target *ti)
122 {
123 	struct delay_c *dc = ti->private;
124 
125 	if (dc->kdelayd_wq)
126 		destroy_workqueue(dc->kdelayd_wq);
127 
128 	if (dc->read.dev)
129 		dm_put_device(ti, dc->read.dev);
130 	if (dc->write.dev)
131 		dm_put_device(ti, dc->write.dev);
132 	if (dc->flush.dev)
133 		dm_put_device(ti, dc->flush.dev);
134 
135 	mutex_destroy(&dc->timer_lock);
136 
137 	kfree(dc);
138 }
139 
140 static int delay_class_ctr(struct dm_target *ti, struct delay_class *c, char **argv)
141 {
142 	int ret;
143 	unsigned long long tmpll;
144 	char dummy;
145 
146 	if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1 || tmpll != (sector_t)tmpll) {
147 		ti->error = "Invalid device sector";
148 		return -EINVAL;
149 	}
150 	c->start = tmpll;
151 
152 	if (sscanf(argv[2], "%u%c", &c->delay, &dummy) != 1) {
153 		ti->error = "Invalid delay";
154 		return -EINVAL;
155 	}
156 
157 	ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &c->dev);
158 	if (ret) {
159 		ti->error = "Device lookup failed";
160 		return ret;
161 	}
162 
163 	return 0;
164 }
165 
166 /*
167  * Mapping parameters:
168  *    <device> <offset> <delay> [<write_device> <write_offset> <write_delay>]
169  *
170  * With separate write parameters, the first set is only used for reads.
171  * Offsets are specified in sectors.
172  * Delays are specified in milliseconds.
173  */
174 static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
175 {
176 	struct delay_c *dc;
177 	int ret;
178 
179 	if (argc != 3 && argc != 6 && argc != 9) {
180 		ti->error = "Requires exactly 3, 6 or 9 arguments";
181 		return -EINVAL;
182 	}
183 
184 	dc = kzalloc(sizeof(*dc), GFP_KERNEL);
185 	if (!dc) {
186 		ti->error = "Cannot allocate context";
187 		return -ENOMEM;
188 	}
189 
190 	ti->private = dc;
191 	timer_setup(&dc->delay_timer, handle_delayed_timer, 0);
192 	INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
193 	INIT_LIST_HEAD(&dc->delayed_bios);
194 	mutex_init(&dc->timer_lock);
195 	dc->may_delay = true;
196 	dc->argc = argc;
197 
198 	ret = delay_class_ctr(ti, &dc->read, argv);
199 	if (ret)
200 		goto bad;
201 
202 	if (argc == 3) {
203 		ret = delay_class_ctr(ti, &dc->write, argv);
204 		if (ret)
205 			goto bad;
206 		ret = delay_class_ctr(ti, &dc->flush, argv);
207 		if (ret)
208 			goto bad;
209 		goto out;
210 	}
211 
212 	ret = delay_class_ctr(ti, &dc->write, argv + 3);
213 	if (ret)
214 		goto bad;
215 	if (argc == 6) {
216 		ret = delay_class_ctr(ti, &dc->flush, argv + 3);
217 		if (ret)
218 			goto bad;
219 		goto out;
220 	}
221 
222 	ret = delay_class_ctr(ti, &dc->flush, argv + 6);
223 	if (ret)
224 		goto bad;
225 
226 out:
227 	dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
228 	if (!dc->kdelayd_wq) {
229 		ret = -EINVAL;
230 		DMERR("Couldn't start kdelayd");
231 		goto bad;
232 	}
233 
234 	ti->num_flush_bios = 1;
235 	ti->num_discard_bios = 1;
236 	ti->accounts_remapped_io = true;
237 	ti->per_io_data_size = sizeof(struct dm_delay_info);
238 	return 0;
239 
240 bad:
241 	delay_dtr(ti);
242 	return ret;
243 }
244 
245 static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
246 {
247 	struct dm_delay_info *delayed;
248 	unsigned long expires = 0;
249 
250 	if (!c->delay)
251 		return DM_MAPIO_REMAPPED;
252 
253 	delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info));
254 
255 	delayed->context = dc;
256 	delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay);
257 
258 	mutex_lock(&delayed_bios_lock);
259 	if (unlikely(!dc->may_delay)) {
260 		mutex_unlock(&delayed_bios_lock);
261 		return DM_MAPIO_REMAPPED;
262 	}
263 	c->ops++;
264 	list_add_tail(&delayed->list, &dc->delayed_bios);
265 	mutex_unlock(&delayed_bios_lock);
266 
267 	queue_timeout(dc, expires);
268 
269 	return DM_MAPIO_SUBMITTED;
270 }
271 
272 static void delay_presuspend(struct dm_target *ti)
273 {
274 	struct delay_c *dc = ti->private;
275 
276 	mutex_lock(&delayed_bios_lock);
277 	dc->may_delay = false;
278 	mutex_unlock(&delayed_bios_lock);
279 
280 	del_timer_sync(&dc->delay_timer);
281 	flush_bios(flush_delayed_bios(dc, 1));
282 }
283 
284 static void delay_resume(struct dm_target *ti)
285 {
286 	struct delay_c *dc = ti->private;
287 
288 	dc->may_delay = true;
289 }
290 
291 static int delay_map(struct dm_target *ti, struct bio *bio)
292 {
293 	struct delay_c *dc = ti->private;
294 	struct delay_class *c;
295 	struct dm_delay_info *delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info));
296 
297 	if (bio_data_dir(bio) == WRITE) {
298 		if (unlikely(bio->bi_opf & REQ_PREFLUSH))
299 			c = &dc->flush;
300 		else
301 			c = &dc->write;
302 	} else {
303 		c = &dc->read;
304 	}
305 	delayed->class = c;
306 	bio_set_dev(bio, c->dev->bdev);
307 	bio->bi_iter.bi_sector = c->start + dm_target_offset(ti, bio->bi_iter.bi_sector);
308 
309 	return delay_bio(dc, c, bio);
310 }
311 
312 #define DMEMIT_DELAY_CLASS(c) \
313 	DMEMIT("%s %llu %u", (c)->dev->name, (unsigned long long)(c)->start, (c)->delay)
314 
315 static void delay_status(struct dm_target *ti, status_type_t type,
316 			 unsigned int status_flags, char *result, unsigned int maxlen)
317 {
318 	struct delay_c *dc = ti->private;
319 	int sz = 0;
320 
321 	switch (type) {
322 	case STATUSTYPE_INFO:
323 		DMEMIT("%u %u %u", dc->read.ops, dc->write.ops, dc->flush.ops);
324 		break;
325 
326 	case STATUSTYPE_TABLE:
327 		DMEMIT_DELAY_CLASS(&dc->read);
328 		if (dc->argc >= 6) {
329 			DMEMIT(" ");
330 			DMEMIT_DELAY_CLASS(&dc->write);
331 		}
332 		if (dc->argc >= 9) {
333 			DMEMIT(" ");
334 			DMEMIT_DELAY_CLASS(&dc->flush);
335 		}
336 		break;
337 
338 	case STATUSTYPE_IMA:
339 		*result = '\0';
340 		break;
341 	}
342 }
343 
344 static int delay_iterate_devices(struct dm_target *ti,
345 				 iterate_devices_callout_fn fn, void *data)
346 {
347 	struct delay_c *dc = ti->private;
348 	int ret = 0;
349 
350 	ret = fn(ti, dc->read.dev, dc->read.start, ti->len, data);
351 	if (ret)
352 		goto out;
353 	ret = fn(ti, dc->write.dev, dc->write.start, ti->len, data);
354 	if (ret)
355 		goto out;
356 	ret = fn(ti, dc->flush.dev, dc->flush.start, ti->len, data);
357 	if (ret)
358 		goto out;
359 
360 out:
361 	return ret;
362 }
363 
364 static struct target_type delay_target = {
365 	.name	     = "delay",
366 	.version     = {1, 3, 0},
367 	.features    = DM_TARGET_PASSES_INTEGRITY,
368 	.module      = THIS_MODULE,
369 	.ctr	     = delay_ctr,
370 	.dtr	     = delay_dtr,
371 	.map	     = delay_map,
372 	.presuspend  = delay_presuspend,
373 	.resume	     = delay_resume,
374 	.status	     = delay_status,
375 	.iterate_devices = delay_iterate_devices,
376 };
377 module_dm(delay);
378 
379 MODULE_DESCRIPTION(DM_NAME " delay target");
380 MODULE_AUTHOR("Heinz Mauelshagen <mauelshagen@redhat.com>");
381 MODULE_LICENSE("GPL");
382