xref: /openbmc/qemu/block/blkdebug.c (revision 0c4e9931)
1 /*
2  * Block protocol for I/O error injection
3  *
4  * Copyright (C) 2016-2017 Red Hat, Inc.
5  * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "qemu/osdep.h"
27 #include "qapi/error.h"
28 #include "qemu/cutils.h"
29 #include "qemu/config-file.h"
30 #include "block/block_int.h"
31 #include "block/qdict.h"
32 #include "qemu/module.h"
33 #include "qemu/option.h"
34 #include "qapi/qapi-visit-block-core.h"
35 #include "qapi/qmp/qdict.h"
36 #include "qapi/qmp/qlist.h"
37 #include "qapi/qmp/qstring.h"
38 #include "qapi/qobject-input-visitor.h"
39 #include "sysemu/qtest.h"
40 
41 typedef struct BDRVBlkdebugState {
42     int state;
43     int new_state;
44     uint64_t align;
45     uint64_t max_transfer;
46     uint64_t opt_write_zero;
47     uint64_t max_write_zero;
48     uint64_t opt_discard;
49     uint64_t max_discard;
50 
51     uint64_t take_child_perms;
52     uint64_t unshare_child_perms;
53 
54     /* For blkdebug_refresh_filename() */
55     char *config_file;
56 
57     QLIST_HEAD(, BlkdebugRule) rules[BLKDBG__MAX];
58     QSIMPLEQ_HEAD(, BlkdebugRule) active_rules;
59     QLIST_HEAD(, BlkdebugSuspendedReq) suspended_reqs;
60 } BDRVBlkdebugState;
61 
62 typedef struct BlkdebugAIOCB {
63     BlockAIOCB common;
64     int ret;
65 } BlkdebugAIOCB;
66 
67 typedef struct BlkdebugSuspendedReq {
68     Coroutine *co;
69     char *tag;
70     QLIST_ENTRY(BlkdebugSuspendedReq) next;
71 } BlkdebugSuspendedReq;
72 
73 enum {
74     ACTION_INJECT_ERROR,
75     ACTION_SET_STATE,
76     ACTION_SUSPEND,
77 };
78 
79 typedef struct BlkdebugRule {
80     BlkdebugEvent event;
81     int action;
82     int state;
83     union {
84         struct {
85             uint64_t iotype_mask;
86             int error;
87             int immediately;
88             int once;
89             int64_t offset;
90         } inject;
91         struct {
92             int new_state;
93         } set_state;
94         struct {
95             char *tag;
96         } suspend;
97     } options;
98     QLIST_ENTRY(BlkdebugRule) next;
99     QSIMPLEQ_ENTRY(BlkdebugRule) active_next;
100 } BlkdebugRule;
101 
102 QEMU_BUILD_BUG_MSG(BLKDEBUG_IO_TYPE__MAX > 64,
103                    "BlkdebugIOType mask does not fit into an uint64_t");
104 
105 static QemuOptsList inject_error_opts = {
106     .name = "inject-error",
107     .head = QTAILQ_HEAD_INITIALIZER(inject_error_opts.head),
108     .desc = {
109         {
110             .name = "event",
111             .type = QEMU_OPT_STRING,
112         },
113         {
114             .name = "state",
115             .type = QEMU_OPT_NUMBER,
116         },
117         {
118             .name = "iotype",
119             .type = QEMU_OPT_STRING,
120         },
121         {
122             .name = "errno",
123             .type = QEMU_OPT_NUMBER,
124         },
125         {
126             .name = "sector",
127             .type = QEMU_OPT_NUMBER,
128         },
129         {
130             .name = "once",
131             .type = QEMU_OPT_BOOL,
132         },
133         {
134             .name = "immediately",
135             .type = QEMU_OPT_BOOL,
136         },
137         { /* end of list */ }
138     },
139 };
140 
141 static QemuOptsList set_state_opts = {
142     .name = "set-state",
143     .head = QTAILQ_HEAD_INITIALIZER(set_state_opts.head),
144     .desc = {
145         {
146             .name = "event",
147             .type = QEMU_OPT_STRING,
148         },
149         {
150             .name = "state",
151             .type = QEMU_OPT_NUMBER,
152         },
153         {
154             .name = "new_state",
155             .type = QEMU_OPT_NUMBER,
156         },
157         { /* end of list */ }
158     },
159 };
160 
161 static QemuOptsList *config_groups[] = {
162     &inject_error_opts,
163     &set_state_opts,
164     NULL
165 };
166 
167 struct add_rule_data {
168     BDRVBlkdebugState *s;
169     int action;
170 };
171 
172 static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
173 {
174     struct add_rule_data *d = opaque;
175     BDRVBlkdebugState *s = d->s;
176     const char* event_name;
177     int event;
178     struct BlkdebugRule *rule;
179     int64_t sector;
180     BlkdebugIOType iotype;
181     Error *local_error = NULL;
182 
183     /* Find the right event for the rule */
184     event_name = qemu_opt_get(opts, "event");
185     if (!event_name) {
186         error_setg(errp, "Missing event name for rule");
187         return -1;
188     }
189     event = qapi_enum_parse(&BlkdebugEvent_lookup, event_name, -1, errp);
190     if (event < 0) {
191         return -1;
192     }
193 
194     /* Set attributes common for all actions */
195     rule = g_malloc0(sizeof(*rule));
196     *rule = (struct BlkdebugRule) {
197         .event  = event,
198         .action = d->action,
199         .state  = qemu_opt_get_number(opts, "state", 0),
200     };
201 
202     /* Parse action-specific options */
203     switch (d->action) {
204     case ACTION_INJECT_ERROR:
205         rule->options.inject.error = qemu_opt_get_number(opts, "errno", EIO);
206         rule->options.inject.once  = qemu_opt_get_bool(opts, "once", 0);
207         rule->options.inject.immediately =
208             qemu_opt_get_bool(opts, "immediately", 0);
209         sector = qemu_opt_get_number(opts, "sector", -1);
210         rule->options.inject.offset =
211             sector == -1 ? -1 : sector * BDRV_SECTOR_SIZE;
212 
213         iotype = qapi_enum_parse(&BlkdebugIOType_lookup,
214                                  qemu_opt_get(opts, "iotype"),
215                                  BLKDEBUG_IO_TYPE__MAX, &local_error);
216         if (local_error) {
217             error_propagate(errp, local_error);
218             return -1;
219         }
220         if (iotype != BLKDEBUG_IO_TYPE__MAX) {
221             rule->options.inject.iotype_mask = (1ull << iotype);
222         } else {
223             /* Apply the default */
224             rule->options.inject.iotype_mask =
225                 (1ull << BLKDEBUG_IO_TYPE_READ)
226                 | (1ull << BLKDEBUG_IO_TYPE_WRITE)
227                 | (1ull << BLKDEBUG_IO_TYPE_WRITE_ZEROES)
228                 | (1ull << BLKDEBUG_IO_TYPE_DISCARD)
229                 | (1ull << BLKDEBUG_IO_TYPE_FLUSH);
230         }
231 
232         break;
233 
234     case ACTION_SET_STATE:
235         rule->options.set_state.new_state =
236             qemu_opt_get_number(opts, "new_state", 0);
237         break;
238 
239     case ACTION_SUSPEND:
240         rule->options.suspend.tag =
241             g_strdup(qemu_opt_get(opts, "tag"));
242         break;
243     };
244 
245     /* Add the rule */
246     QLIST_INSERT_HEAD(&s->rules[event], rule, next);
247 
248     return 0;
249 }
250 
251 static void remove_rule(BlkdebugRule *rule)
252 {
253     switch (rule->action) {
254     case ACTION_INJECT_ERROR:
255     case ACTION_SET_STATE:
256         break;
257     case ACTION_SUSPEND:
258         g_free(rule->options.suspend.tag);
259         break;
260     }
261 
262     QLIST_REMOVE(rule, next);
263     g_free(rule);
264 }
265 
266 static int read_config(BDRVBlkdebugState *s, const char *filename,
267                        QDict *options, Error **errp)
268 {
269     FILE *f = NULL;
270     int ret;
271     struct add_rule_data d;
272     Error *local_err = NULL;
273 
274     if (filename) {
275         f = fopen(filename, "r");
276         if (f == NULL) {
277             error_setg_errno(errp, errno, "Could not read blkdebug config file");
278             return -errno;
279         }
280 
281         ret = qemu_config_parse(f, config_groups, filename);
282         if (ret < 0) {
283             error_setg(errp, "Could not parse blkdebug config file");
284             goto fail;
285         }
286     }
287 
288     qemu_config_parse_qdict(options, config_groups, &local_err);
289     if (local_err) {
290         error_propagate(errp, local_err);
291         ret = -EINVAL;
292         goto fail;
293     }
294 
295     d.s = s;
296     d.action = ACTION_INJECT_ERROR;
297     qemu_opts_foreach(&inject_error_opts, add_rule, &d, &local_err);
298     if (local_err) {
299         error_propagate(errp, local_err);
300         ret = -EINVAL;
301         goto fail;
302     }
303 
304     d.action = ACTION_SET_STATE;
305     qemu_opts_foreach(&set_state_opts, add_rule, &d, &local_err);
306     if (local_err) {
307         error_propagate(errp, local_err);
308         ret = -EINVAL;
309         goto fail;
310     }
311 
312     ret = 0;
313 fail:
314     qemu_opts_reset(&inject_error_opts);
315     qemu_opts_reset(&set_state_opts);
316     if (f) {
317         fclose(f);
318     }
319     return ret;
320 }
321 
322 /* Valid blkdebug filenames look like blkdebug:path/to/config:path/to/image */
323 static void blkdebug_parse_filename(const char *filename, QDict *options,
324                                     Error **errp)
325 {
326     const char *c;
327 
328     /* Parse the blkdebug: prefix */
329     if (!strstart(filename, "blkdebug:", &filename)) {
330         /* There was no prefix; therefore, all options have to be already
331            present in the QDict (except for the filename) */
332         qdict_put_str(options, "x-image", filename);
333         return;
334     }
335 
336     /* Parse config file path */
337     c = strchr(filename, ':');
338     if (c == NULL) {
339         error_setg(errp, "blkdebug requires both config file and image path");
340         return;
341     }
342 
343     if (c != filename) {
344         QString *config_path;
345         config_path = qstring_from_substr(filename, 0, c - filename);
346         qdict_put(options, "config", config_path);
347     }
348 
349     /* TODO Allow multi-level nesting and set file.filename here */
350     filename = c + 1;
351     qdict_put_str(options, "x-image", filename);
352 }
353 
354 static int blkdebug_parse_perm_list(uint64_t *dest, QDict *options,
355                                     const char *prefix, Error **errp)
356 {
357     int ret = 0;
358     QDict *subqdict = NULL;
359     QObject *crumpled_subqdict = NULL;
360     Visitor *v = NULL;
361     BlockPermissionList *perm_list = NULL, *element;
362     Error *local_err = NULL;
363 
364     *dest = 0;
365 
366     qdict_extract_subqdict(options, &subqdict, prefix);
367     if (!qdict_size(subqdict)) {
368         goto out;
369     }
370 
371     crumpled_subqdict = qdict_crumple(subqdict, errp);
372     if (!crumpled_subqdict) {
373         ret = -EINVAL;
374         goto out;
375     }
376 
377     v = qobject_input_visitor_new(crumpled_subqdict);
378     visit_type_BlockPermissionList(v, NULL, &perm_list, &local_err);
379     if (local_err) {
380         error_propagate(errp, local_err);
381         ret = -EINVAL;
382         goto out;
383     }
384 
385     for (element = perm_list; element; element = element->next) {
386         *dest |= bdrv_qapi_perm_to_blk_perm(element->value);
387     }
388 
389 out:
390     qapi_free_BlockPermissionList(perm_list);
391     visit_free(v);
392     qobject_unref(subqdict);
393     qobject_unref(crumpled_subqdict);
394     return ret;
395 }
396 
397 static int blkdebug_parse_perms(BDRVBlkdebugState *s, QDict *options,
398                                 Error **errp)
399 {
400     int ret;
401 
402     ret = blkdebug_parse_perm_list(&s->take_child_perms, options,
403                                    "take-child-perms.", errp);
404     if (ret < 0) {
405         return ret;
406     }
407 
408     ret = blkdebug_parse_perm_list(&s->unshare_child_perms, options,
409                                    "unshare-child-perms.", errp);
410     if (ret < 0) {
411         return ret;
412     }
413 
414     return 0;
415 }
416 
417 static QemuOptsList runtime_opts = {
418     .name = "blkdebug",
419     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
420     .desc = {
421         {
422             .name = "config",
423             .type = QEMU_OPT_STRING,
424             .help = "Path to the configuration file",
425         },
426         {
427             .name = "x-image",
428             .type = QEMU_OPT_STRING,
429             .help = "[internal use only, will be removed]",
430         },
431         {
432             .name = "align",
433             .type = QEMU_OPT_SIZE,
434             .help = "Required alignment in bytes",
435         },
436         {
437             .name = "max-transfer",
438             .type = QEMU_OPT_SIZE,
439             .help = "Maximum transfer size in bytes",
440         },
441         {
442             .name = "opt-write-zero",
443             .type = QEMU_OPT_SIZE,
444             .help = "Optimum write zero alignment in bytes",
445         },
446         {
447             .name = "max-write-zero",
448             .type = QEMU_OPT_SIZE,
449             .help = "Maximum write zero size in bytes",
450         },
451         {
452             .name = "opt-discard",
453             .type = QEMU_OPT_SIZE,
454             .help = "Optimum discard alignment in bytes",
455         },
456         {
457             .name = "max-discard",
458             .type = QEMU_OPT_SIZE,
459             .help = "Maximum discard size in bytes",
460         },
461         { /* end of list */ }
462     },
463 };
464 
465 static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
466                          Error **errp)
467 {
468     BDRVBlkdebugState *s = bs->opaque;
469     QemuOpts *opts;
470     Error *local_err = NULL;
471     int ret;
472     uint64_t align;
473 
474     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
475     qemu_opts_absorb_qdict(opts, options, &local_err);
476     if (local_err) {
477         error_propagate(errp, local_err);
478         ret = -EINVAL;
479         goto out;
480     }
481 
482     /* Read rules from config file or command line options */
483     s->config_file = g_strdup(qemu_opt_get(opts, "config"));
484     ret = read_config(s, s->config_file, options, errp);
485     if (ret) {
486         goto out;
487     }
488 
489     /* Set initial state */
490     s->state = 1;
491 
492     /* Parse permissions modifiers before opening the image file */
493     ret = blkdebug_parse_perms(s, options, errp);
494     if (ret < 0) {
495         goto out;
496     }
497 
498     /* Open the image file */
499     bs->file = bdrv_open_child(qemu_opt_get(opts, "x-image"), options, "image",
500                                bs, &child_of_bds,
501                                BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
502                                false, &local_err);
503     if (local_err) {
504         ret = -EINVAL;
505         error_propagate(errp, local_err);
506         goto out;
507     }
508 
509     bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
510         (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
511     bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
512         ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
513             bs->file->bs->supported_zero_flags);
514     ret = -EINVAL;
515 
516     /* Set alignment overrides */
517     s->align = qemu_opt_get_size(opts, "align", 0);
518     if (s->align && (s->align >= INT_MAX || !is_power_of_2(s->align))) {
519         error_setg(errp, "Cannot meet constraints with align %" PRIu64,
520                    s->align);
521         goto out;
522     }
523     align = MAX(s->align, bs->file->bs->bl.request_alignment);
524 
525     s->max_transfer = qemu_opt_get_size(opts, "max-transfer", 0);
526     if (s->max_transfer &&
527         (s->max_transfer >= INT_MAX ||
528          !QEMU_IS_ALIGNED(s->max_transfer, align))) {
529         error_setg(errp, "Cannot meet constraints with max-transfer %" PRIu64,
530                    s->max_transfer);
531         goto out;
532     }
533 
534     s->opt_write_zero = qemu_opt_get_size(opts, "opt-write-zero", 0);
535     if (s->opt_write_zero &&
536         (s->opt_write_zero >= INT_MAX ||
537          !QEMU_IS_ALIGNED(s->opt_write_zero, align))) {
538         error_setg(errp, "Cannot meet constraints with opt-write-zero %" PRIu64,
539                    s->opt_write_zero);
540         goto out;
541     }
542 
543     s->max_write_zero = qemu_opt_get_size(opts, "max-write-zero", 0);
544     if (s->max_write_zero &&
545         (s->max_write_zero >= INT_MAX ||
546          !QEMU_IS_ALIGNED(s->max_write_zero,
547                           MAX(s->opt_write_zero, align)))) {
548         error_setg(errp, "Cannot meet constraints with max-write-zero %" PRIu64,
549                    s->max_write_zero);
550         goto out;
551     }
552 
553     s->opt_discard = qemu_opt_get_size(opts, "opt-discard", 0);
554     if (s->opt_discard &&
555         (s->opt_discard >= INT_MAX ||
556          !QEMU_IS_ALIGNED(s->opt_discard, align))) {
557         error_setg(errp, "Cannot meet constraints with opt-discard %" PRIu64,
558                    s->opt_discard);
559         goto out;
560     }
561 
562     s->max_discard = qemu_opt_get_size(opts, "max-discard", 0);
563     if (s->max_discard &&
564         (s->max_discard >= INT_MAX ||
565          !QEMU_IS_ALIGNED(s->max_discard,
566                           MAX(s->opt_discard, align)))) {
567         error_setg(errp, "Cannot meet constraints with max-discard %" PRIu64,
568                    s->max_discard);
569         goto out;
570     }
571 
572     bdrv_debug_event(bs, BLKDBG_NONE);
573 
574     ret = 0;
575 out:
576     if (ret < 0) {
577         g_free(s->config_file);
578     }
579     qemu_opts_del(opts);
580     return ret;
581 }
582 
583 static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
584                       BlkdebugIOType iotype)
585 {
586     BDRVBlkdebugState *s = bs->opaque;
587     BlkdebugRule *rule = NULL;
588     int error;
589     bool immediately;
590 
591     QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
592         uint64_t inject_offset = rule->options.inject.offset;
593 
594         if ((inject_offset == -1 ||
595              (bytes && inject_offset >= offset &&
596               inject_offset < offset + bytes)) &&
597             (rule->options.inject.iotype_mask & (1ull << iotype)))
598         {
599             break;
600         }
601     }
602 
603     if (!rule || !rule->options.inject.error) {
604         return 0;
605     }
606 
607     immediately = rule->options.inject.immediately;
608     error = rule->options.inject.error;
609 
610     if (rule->options.inject.once) {
611         QSIMPLEQ_REMOVE(&s->active_rules, rule, BlkdebugRule, active_next);
612         remove_rule(rule);
613     }
614 
615     if (!immediately) {
616         aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
617         qemu_coroutine_yield();
618     }
619 
620     return -error;
621 }
622 
623 static int coroutine_fn
624 blkdebug_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
625                    QEMUIOVector *qiov, int flags)
626 {
627     int err;
628 
629     /* Sanity check block layer guarantees */
630     assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
631     assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
632     if (bs->bl.max_transfer) {
633         assert(bytes <= bs->bl.max_transfer);
634     }
635 
636     err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_READ);
637     if (err) {
638         return err;
639     }
640 
641     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
642 }
643 
644 static int coroutine_fn
645 blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
646                     QEMUIOVector *qiov, int flags)
647 {
648     int err;
649 
650     /* Sanity check block layer guarantees */
651     assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
652     assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
653     if (bs->bl.max_transfer) {
654         assert(bytes <= bs->bl.max_transfer);
655     }
656 
657     err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_WRITE);
658     if (err) {
659         return err;
660     }
661 
662     return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
663 }
664 
665 static int blkdebug_co_flush(BlockDriverState *bs)
666 {
667     int err = rule_check(bs, 0, 0, BLKDEBUG_IO_TYPE_FLUSH);
668 
669     if (err) {
670         return err;
671     }
672 
673     return bdrv_co_flush(bs->file->bs);
674 }
675 
676 static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs,
677                                                   int64_t offset, int bytes,
678                                                   BdrvRequestFlags flags)
679 {
680     uint32_t align = MAX(bs->bl.request_alignment,
681                          bs->bl.pwrite_zeroes_alignment);
682     int err;
683 
684     /* Only pass through requests that are larger than requested
685      * preferred alignment (so that we test the fallback to writes on
686      * unaligned portions), and check that the block layer never hands
687      * us anything unaligned that crosses an alignment boundary.  */
688     if (bytes < align) {
689         assert(QEMU_IS_ALIGNED(offset, align) ||
690                QEMU_IS_ALIGNED(offset + bytes, align) ||
691                DIV_ROUND_UP(offset, align) ==
692                DIV_ROUND_UP(offset + bytes, align));
693         return -ENOTSUP;
694     }
695     assert(QEMU_IS_ALIGNED(offset, align));
696     assert(QEMU_IS_ALIGNED(bytes, align));
697     if (bs->bl.max_pwrite_zeroes) {
698         assert(bytes <= bs->bl.max_pwrite_zeroes);
699     }
700 
701     err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_WRITE_ZEROES);
702     if (err) {
703         return err;
704     }
705 
706     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
707 }
708 
709 static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs,
710                                              int64_t offset, int bytes)
711 {
712     uint32_t align = bs->bl.pdiscard_alignment;
713     int err;
714 
715     /* Only pass through requests that are larger than requested
716      * minimum alignment, and ensure that unaligned requests do not
717      * cross optimum discard boundaries. */
718     if (bytes < bs->bl.request_alignment) {
719         assert(QEMU_IS_ALIGNED(offset, align) ||
720                QEMU_IS_ALIGNED(offset + bytes, align) ||
721                DIV_ROUND_UP(offset, align) ==
722                DIV_ROUND_UP(offset + bytes, align));
723         return -ENOTSUP;
724     }
725     assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
726     assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
727     if (align && bytes >= align) {
728         assert(QEMU_IS_ALIGNED(offset, align));
729         assert(QEMU_IS_ALIGNED(bytes, align));
730     }
731     if (bs->bl.max_pdiscard) {
732         assert(bytes <= bs->bl.max_pdiscard);
733     }
734 
735     err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_DISCARD);
736     if (err) {
737         return err;
738     }
739 
740     return bdrv_co_pdiscard(bs->file, offset, bytes);
741 }
742 
743 static int coroutine_fn blkdebug_co_block_status(BlockDriverState *bs,
744                                                  bool want_zero,
745                                                  int64_t offset,
746                                                  int64_t bytes,
747                                                  int64_t *pnum,
748                                                  int64_t *map,
749                                                  BlockDriverState **file)
750 {
751     int err;
752 
753     assert(QEMU_IS_ALIGNED(offset | bytes, bs->bl.request_alignment));
754 
755     err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_BLOCK_STATUS);
756     if (err) {
757         return err;
758     }
759 
760     return bdrv_co_block_status_from_file(bs, want_zero, offset, bytes,
761                                           pnum, map, file);
762 }
763 
764 static void blkdebug_close(BlockDriverState *bs)
765 {
766     BDRVBlkdebugState *s = bs->opaque;
767     BlkdebugRule *rule, *next;
768     int i;
769 
770     for (i = 0; i < BLKDBG__MAX; i++) {
771         QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
772             remove_rule(rule);
773         }
774     }
775 
776     g_free(s->config_file);
777 }
778 
779 static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule)
780 {
781     BDRVBlkdebugState *s = bs->opaque;
782     BlkdebugSuspendedReq r;
783 
784     r = (BlkdebugSuspendedReq) {
785         .co         = qemu_coroutine_self(),
786         .tag        = g_strdup(rule->options.suspend.tag),
787     };
788 
789     remove_rule(rule);
790     QLIST_INSERT_HEAD(&s->suspended_reqs, &r, next);
791 
792     if (!qtest_enabled()) {
793         printf("blkdebug: Suspended request '%s'\n", r.tag);
794     }
795     qemu_coroutine_yield();
796     if (!qtest_enabled()) {
797         printf("blkdebug: Resuming request '%s'\n", r.tag);
798     }
799 
800     QLIST_REMOVE(&r, next);
801     g_free(r.tag);
802 }
803 
804 static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
805     bool injected)
806 {
807     BDRVBlkdebugState *s = bs->opaque;
808 
809     /* Only process rules for the current state */
810     if (rule->state && rule->state != s->state) {
811         return injected;
812     }
813 
814     /* Take the action */
815     switch (rule->action) {
816     case ACTION_INJECT_ERROR:
817         if (!injected) {
818             QSIMPLEQ_INIT(&s->active_rules);
819             injected = true;
820         }
821         QSIMPLEQ_INSERT_HEAD(&s->active_rules, rule, active_next);
822         break;
823 
824     case ACTION_SET_STATE:
825         s->new_state = rule->options.set_state.new_state;
826         break;
827 
828     case ACTION_SUSPEND:
829         suspend_request(bs, rule);
830         break;
831     }
832     return injected;
833 }
834 
835 static void blkdebug_debug_event(BlockDriverState *bs, BlkdebugEvent event)
836 {
837     BDRVBlkdebugState *s = bs->opaque;
838     struct BlkdebugRule *rule, *next;
839     bool injected;
840 
841     assert((int)event >= 0 && event < BLKDBG__MAX);
842 
843     injected = false;
844     s->new_state = s->state;
845     QLIST_FOREACH_SAFE(rule, &s->rules[event], next, next) {
846         injected = process_rule(bs, rule, injected);
847     }
848     s->state = s->new_state;
849 }
850 
851 static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event,
852                                      const char *tag)
853 {
854     BDRVBlkdebugState *s = bs->opaque;
855     struct BlkdebugRule *rule;
856     int blkdebug_event;
857 
858     blkdebug_event = qapi_enum_parse(&BlkdebugEvent_lookup, event, -1, NULL);
859     if (blkdebug_event < 0) {
860         return -ENOENT;
861     }
862 
863     rule = g_malloc(sizeof(*rule));
864     *rule = (struct BlkdebugRule) {
865         .event  = blkdebug_event,
866         .action = ACTION_SUSPEND,
867         .state  = 0,
868         .options.suspend.tag = g_strdup(tag),
869     };
870 
871     QLIST_INSERT_HEAD(&s->rules[blkdebug_event], rule, next);
872 
873     return 0;
874 }
875 
876 static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag)
877 {
878     BDRVBlkdebugState *s = bs->opaque;
879     BlkdebugSuspendedReq *r, *next;
880 
881     QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, next) {
882         if (!strcmp(r->tag, tag)) {
883             qemu_coroutine_enter(r->co);
884             return 0;
885         }
886     }
887     return -ENOENT;
888 }
889 
890 static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs,
891                                             const char *tag)
892 {
893     BDRVBlkdebugState *s = bs->opaque;
894     BlkdebugSuspendedReq *r, *r_next;
895     BlkdebugRule *rule, *next;
896     int i, ret = -ENOENT;
897 
898     for (i = 0; i < BLKDBG__MAX; i++) {
899         QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
900             if (rule->action == ACTION_SUSPEND &&
901                 !strcmp(rule->options.suspend.tag, tag)) {
902                 remove_rule(rule);
903                 ret = 0;
904             }
905         }
906     }
907     QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, r_next) {
908         if (!strcmp(r->tag, tag)) {
909             qemu_coroutine_enter(r->co);
910             ret = 0;
911         }
912     }
913     return ret;
914 }
915 
916 static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag)
917 {
918     BDRVBlkdebugState *s = bs->opaque;
919     BlkdebugSuspendedReq *r;
920 
921     QLIST_FOREACH(r, &s->suspended_reqs, next) {
922         if (!strcmp(r->tag, tag)) {
923             return true;
924         }
925     }
926     return false;
927 }
928 
929 static int64_t blkdebug_getlength(BlockDriverState *bs)
930 {
931     return bdrv_getlength(bs->file->bs);
932 }
933 
934 static void blkdebug_refresh_filename(BlockDriverState *bs)
935 {
936     BDRVBlkdebugState *s = bs->opaque;
937     const QDictEntry *e;
938     int ret;
939 
940     if (!bs->file->bs->exact_filename[0]) {
941         return;
942     }
943 
944     for (e = qdict_first(bs->full_open_options); e;
945          e = qdict_next(bs->full_open_options, e))
946     {
947         /* Real child options are under "image", but "x-image" may
948          * contain a filename */
949         if (strcmp(qdict_entry_key(e), "config") &&
950             strcmp(qdict_entry_key(e), "image") &&
951             strcmp(qdict_entry_key(e), "x-image") &&
952             strcmp(qdict_entry_key(e), "driver"))
953         {
954             return;
955         }
956     }
957 
958     ret = snprintf(bs->exact_filename, sizeof(bs->exact_filename),
959                    "blkdebug:%s:%s",
960                    s->config_file ?: "", bs->file->bs->exact_filename);
961     if (ret >= sizeof(bs->exact_filename)) {
962         /* An overflow makes the filename unusable, so do not report any */
963         bs->exact_filename[0] = 0;
964     }
965 }
966 
967 static void blkdebug_refresh_limits(BlockDriverState *bs, Error **errp)
968 {
969     BDRVBlkdebugState *s = bs->opaque;
970 
971     if (s->align) {
972         bs->bl.request_alignment = s->align;
973     }
974     if (s->max_transfer) {
975         bs->bl.max_transfer = s->max_transfer;
976     }
977     if (s->opt_write_zero) {
978         bs->bl.pwrite_zeroes_alignment = s->opt_write_zero;
979     }
980     if (s->max_write_zero) {
981         bs->bl.max_pwrite_zeroes = s->max_write_zero;
982     }
983     if (s->opt_discard) {
984         bs->bl.pdiscard_alignment = s->opt_discard;
985     }
986     if (s->max_discard) {
987         bs->bl.max_pdiscard = s->max_discard;
988     }
989 }
990 
991 static int blkdebug_reopen_prepare(BDRVReopenState *reopen_state,
992                                    BlockReopenQueue *queue, Error **errp)
993 {
994     return 0;
995 }
996 
997 static void blkdebug_child_perm(BlockDriverState *bs, BdrvChild *c,
998                                 BdrvChildRole role,
999                                 BlockReopenQueue *reopen_queue,
1000                                 uint64_t perm, uint64_t shared,
1001                                 uint64_t *nperm, uint64_t *nshared)
1002 {
1003     BDRVBlkdebugState *s = bs->opaque;
1004 
1005     bdrv_default_perms(bs, c, role, reopen_queue,
1006                        perm, shared, nperm, nshared);
1007 
1008     *nperm |= s->take_child_perms;
1009     *nshared &= ~s->unshare_child_perms;
1010 }
1011 
1012 static const char *const blkdebug_strong_runtime_opts[] = {
1013     "config",
1014     "inject-error.",
1015     "set-state.",
1016     "align",
1017     "max-transfer",
1018     "opt-write-zero",
1019     "max-write-zero",
1020     "opt-discard",
1021     "max-discard",
1022 
1023     NULL
1024 };
1025 
1026 static BlockDriver bdrv_blkdebug = {
1027     .format_name            = "blkdebug",
1028     .protocol_name          = "blkdebug",
1029     .instance_size          = sizeof(BDRVBlkdebugState),
1030     .is_filter              = true,
1031 
1032     .bdrv_parse_filename    = blkdebug_parse_filename,
1033     .bdrv_file_open         = blkdebug_open,
1034     .bdrv_close             = blkdebug_close,
1035     .bdrv_reopen_prepare    = blkdebug_reopen_prepare,
1036     .bdrv_child_perm        = blkdebug_child_perm,
1037 
1038     .bdrv_getlength         = blkdebug_getlength,
1039     .bdrv_refresh_filename  = blkdebug_refresh_filename,
1040     .bdrv_refresh_limits    = blkdebug_refresh_limits,
1041 
1042     .bdrv_co_preadv         = blkdebug_co_preadv,
1043     .bdrv_co_pwritev        = blkdebug_co_pwritev,
1044     .bdrv_co_flush_to_disk  = blkdebug_co_flush,
1045     .bdrv_co_pwrite_zeroes  = blkdebug_co_pwrite_zeroes,
1046     .bdrv_co_pdiscard       = blkdebug_co_pdiscard,
1047     .bdrv_co_block_status   = blkdebug_co_block_status,
1048 
1049     .bdrv_debug_event           = blkdebug_debug_event,
1050     .bdrv_debug_breakpoint      = blkdebug_debug_breakpoint,
1051     .bdrv_debug_remove_breakpoint
1052                                 = blkdebug_debug_remove_breakpoint,
1053     .bdrv_debug_resume          = blkdebug_debug_resume,
1054     .bdrv_debug_is_suspended    = blkdebug_debug_is_suspended,
1055 
1056     .strong_runtime_opts        = blkdebug_strong_runtime_opts,
1057 };
1058 
1059 static void bdrv_blkdebug_init(void)
1060 {
1061     bdrv_register(&bdrv_blkdebug);
1062 }
1063 
1064 block_init(bdrv_blkdebug_init);
1065