xref: /openbmc/qemu/block/blkdebug.c (revision 8092b51849499be97c42c0f1a832ade969e38724)
1 /*
2  * Block protocol for I/O error injection
3  *
4  * Copyright (C) 2016-2017 Red Hat, Inc.
5  * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "qemu/osdep.h"
27 #include "qapi/error.h"
28 #include "qemu/cutils.h"
29 #include "qemu/config-file.h"
30 #include "block/block_int.h"
31 #include "block/qdict.h"
32 #include "qemu/module.h"
33 #include "qemu/option.h"
34 #include "qapi/qapi-visit-block-core.h"
35 #include "qapi/qmp/qdict.h"
36 #include "qapi/qmp/qlist.h"
37 #include "qapi/qmp/qstring.h"
38 #include "qapi/qobject-input-visitor.h"
39 #include "sysemu/qtest.h"
40 
41 /* All APIs are thread-safe */
42 
43 typedef struct BDRVBlkdebugState {
44     /* IN: initialized in blkdebug_open() and never changed */
45     uint64_t align;
46     uint64_t max_transfer;
47     uint64_t opt_write_zero;
48     uint64_t max_write_zero;
49     uint64_t opt_discard;
50     uint64_t max_discard;
51     char *config_file; /* For blkdebug_refresh_filename() */
52     /* initialized in blkdebug_parse_perms() */
53     uint64_t take_child_perms;
54     uint64_t unshare_child_perms;
55 
56     /* State. Protected by lock */
57     int state;
58     QLIST_HEAD(, BlkdebugRule) rules[BLKDBG__MAX];
59     QSIMPLEQ_HEAD(, BlkdebugRule) active_rules;
60     QLIST_HEAD(, BlkdebugSuspendedReq) suspended_reqs;
61     QemuMutex lock;
62 } BDRVBlkdebugState;
63 
64 typedef struct BlkdebugAIOCB {
65     BlockAIOCB common;
66     int ret;
67 } BlkdebugAIOCB;
68 
69 typedef struct BlkdebugSuspendedReq {
70     /* IN: initialized in suspend_request() */
71     Coroutine *co;
72     char *tag;
73 
74     /* List entry protected BDRVBlkdebugState's lock */
75     QLIST_ENTRY(BlkdebugSuspendedReq) next;
76 } BlkdebugSuspendedReq;
77 
78 enum {
79     ACTION_INJECT_ERROR,
80     ACTION_SET_STATE,
81     ACTION_SUSPEND,
82     ACTION__MAX,
83 };
84 
85 typedef struct BlkdebugRule {
86     /* IN: initialized in add_rule() or blkdebug_debug_breakpoint() */
87     BlkdebugEvent event;
88     int action;
89     int state;
90     union {
91         struct {
92             uint64_t iotype_mask;
93             int error;
94             int immediately;
95             int once;
96             int64_t offset;
97         } inject;
98         struct {
99             int new_state;
100         } set_state;
101         struct {
102             char *tag;
103         } suspend;
104     } options;
105 
106     /* List entries protected BDRVBlkdebugState's lock */
107     QLIST_ENTRY(BlkdebugRule) next;
108     QSIMPLEQ_ENTRY(BlkdebugRule) active_next;
109 } BlkdebugRule;
110 
111 QEMU_BUILD_BUG_MSG(BLKDEBUG_IO_TYPE__MAX > 64,
112                    "BlkdebugIOType mask does not fit into an uint64_t");
113 
114 static QemuOptsList inject_error_opts = {
115     .name = "inject-error",
116     .head = QTAILQ_HEAD_INITIALIZER(inject_error_opts.head),
117     .desc = {
118         {
119             .name = "event",
120             .type = QEMU_OPT_STRING,
121         },
122         {
123             .name = "state",
124             .type = QEMU_OPT_NUMBER,
125         },
126         {
127             .name = "iotype",
128             .type = QEMU_OPT_STRING,
129         },
130         {
131             .name = "errno",
132             .type = QEMU_OPT_NUMBER,
133         },
134         {
135             .name = "sector",
136             .type = QEMU_OPT_NUMBER,
137         },
138         {
139             .name = "once",
140             .type = QEMU_OPT_BOOL,
141         },
142         {
143             .name = "immediately",
144             .type = QEMU_OPT_BOOL,
145         },
146         { /* end of list */ }
147     },
148 };
149 
150 static QemuOptsList set_state_opts = {
151     .name = "set-state",
152     .head = QTAILQ_HEAD_INITIALIZER(set_state_opts.head),
153     .desc = {
154         {
155             .name = "event",
156             .type = QEMU_OPT_STRING,
157         },
158         {
159             .name = "state",
160             .type = QEMU_OPT_NUMBER,
161         },
162         {
163             .name = "new_state",
164             .type = QEMU_OPT_NUMBER,
165         },
166         { /* end of list */ }
167     },
168 };
169 
170 static QemuOptsList *config_groups[] = {
171     &inject_error_opts,
172     &set_state_opts,
173     NULL
174 };
175 
176 struct add_rule_data {
177     BDRVBlkdebugState *s;
178     int action;
179 };
180 
181 static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
182 {
183     struct add_rule_data *d = opaque;
184     BDRVBlkdebugState *s = d->s;
185     const char *event_name;
186     int event;
187     struct BlkdebugRule *rule;
188     int64_t sector;
189     BlkdebugIOType iotype;
190     Error *local_error = NULL;
191 
192     /* Find the right event for the rule */
193     event_name = qemu_opt_get(opts, "event");
194     if (!event_name) {
195         error_setg(errp, "Missing event name for rule");
196         return -1;
197     }
198     event = qapi_enum_parse(&BlkdebugEvent_lookup, event_name, -1, errp);
199     if (event < 0) {
200         return -1;
201     }
202 
203     /* Set attributes common for all actions */
204     rule = g_malloc0(sizeof(*rule));
205     *rule = (struct BlkdebugRule) {
206         .event  = event,
207         .action = d->action,
208         .state  = qemu_opt_get_number(opts, "state", 0),
209     };
210 
211     /* Parse action-specific options */
212     switch (d->action) {
213     case ACTION_INJECT_ERROR:
214         rule->options.inject.error = qemu_opt_get_number(opts, "errno", EIO);
215         rule->options.inject.once  = qemu_opt_get_bool(opts, "once", 0);
216         rule->options.inject.immediately =
217             qemu_opt_get_bool(opts, "immediately", 0);
218         sector = qemu_opt_get_number(opts, "sector", -1);
219         rule->options.inject.offset =
220             sector == -1 ? -1 : sector * BDRV_SECTOR_SIZE;
221 
222         iotype = qapi_enum_parse(&BlkdebugIOType_lookup,
223                                  qemu_opt_get(opts, "iotype"),
224                                  BLKDEBUG_IO_TYPE__MAX, &local_error);
225         if (local_error) {
226             error_propagate(errp, local_error);
227             g_free(rule);
228             return -1;
229         }
230         if (iotype != BLKDEBUG_IO_TYPE__MAX) {
231             rule->options.inject.iotype_mask = (1ull << iotype);
232         } else {
233             /* Apply the default */
234             rule->options.inject.iotype_mask =
235                 (1ull << BLKDEBUG_IO_TYPE_READ)
236                 | (1ull << BLKDEBUG_IO_TYPE_WRITE)
237                 | (1ull << BLKDEBUG_IO_TYPE_WRITE_ZEROES)
238                 | (1ull << BLKDEBUG_IO_TYPE_DISCARD)
239                 | (1ull << BLKDEBUG_IO_TYPE_FLUSH);
240         }
241 
242         break;
243 
244     case ACTION_SET_STATE:
245         rule->options.set_state.new_state =
246             qemu_opt_get_number(opts, "new_state", 0);
247         break;
248 
249     case ACTION_SUSPEND:
250         rule->options.suspend.tag =
251             g_strdup(qemu_opt_get(opts, "tag"));
252         break;
253     };
254 
255     /* Add the rule */
256     qemu_mutex_lock(&s->lock);
257     QLIST_INSERT_HEAD(&s->rules[event], rule, next);
258     qemu_mutex_unlock(&s->lock);
259 
260     return 0;
261 }
262 
263 /* Called with lock held or from .bdrv_close */
264 static void remove_rule(BlkdebugRule *rule)
265 {
266     switch (rule->action) {
267     case ACTION_INJECT_ERROR:
268     case ACTION_SET_STATE:
269         break;
270     case ACTION_SUSPEND:
271         g_free(rule->options.suspend.tag);
272         break;
273     }
274 
275     QLIST_REMOVE(rule, next);
276     g_free(rule);
277 }
278 
279 static int read_config(BDRVBlkdebugState *s, const char *filename,
280                        QDict *options, Error **errp)
281 {
282     FILE *f = NULL;
283     int ret;
284     struct add_rule_data d;
285     Error *local_err = NULL;
286 
287     if (filename) {
288         f = fopen(filename, "r");
289         if (f == NULL) {
290             error_setg_errno(errp, errno, "Could not read blkdebug config file");
291             return -errno;
292         }
293 
294         ret = qemu_config_parse(f, config_groups, filename, errp);
295         if (ret < 0) {
296             goto fail;
297         }
298     }
299 
300     qemu_config_parse_qdict(options, config_groups, &local_err);
301     if (local_err) {
302         error_propagate(errp, local_err);
303         ret = -EINVAL;
304         goto fail;
305     }
306 
307     d.s = s;
308     d.action = ACTION_INJECT_ERROR;
309     qemu_opts_foreach(&inject_error_opts, add_rule, &d, &local_err);
310     if (local_err) {
311         error_propagate(errp, local_err);
312         ret = -EINVAL;
313         goto fail;
314     }
315 
316     d.action = ACTION_SET_STATE;
317     qemu_opts_foreach(&set_state_opts, add_rule, &d, &local_err);
318     if (local_err) {
319         error_propagate(errp, local_err);
320         ret = -EINVAL;
321         goto fail;
322     }
323 
324     ret = 0;
325 fail:
326     qemu_opts_reset(&inject_error_opts);
327     qemu_opts_reset(&set_state_opts);
328     if (f) {
329         fclose(f);
330     }
331     return ret;
332 }
333 
334 /* Valid blkdebug filenames look like blkdebug:path/to/config:path/to/image */
335 static void blkdebug_parse_filename(const char *filename, QDict *options,
336                                     Error **errp)
337 {
338     const char *c;
339 
340     /* Parse the blkdebug: prefix */
341     if (!strstart(filename, "blkdebug:", &filename)) {
342         /* There was no prefix; therefore, all options have to be already
343            present in the QDict (except for the filename) */
344         qdict_put_str(options, "x-image", filename);
345         return;
346     }
347 
348     /* Parse config file path */
349     c = strchr(filename, ':');
350     if (c == NULL) {
351         error_setg(errp, "blkdebug requires both config file and image path");
352         return;
353     }
354 
355     if (c != filename) {
356         QString *config_path;
357         config_path = qstring_from_substr(filename, 0, c - filename);
358         qdict_put(options, "config", config_path);
359     }
360 
361     /* TODO Allow multi-level nesting and set file.filename here */
362     filename = c + 1;
363     qdict_put_str(options, "x-image", filename);
364 }
365 
366 static int blkdebug_parse_perm_list(uint64_t *dest, QDict *options,
367                                     const char *prefix, Error **errp)
368 {
369     int ret = 0;
370     QDict *subqdict = NULL;
371     QObject *crumpled_subqdict = NULL;
372     Visitor *v = NULL;
373     BlockPermissionList *perm_list = NULL, *element;
374 
375     *dest = 0;
376 
377     qdict_extract_subqdict(options, &subqdict, prefix);
378     if (!qdict_size(subqdict)) {
379         goto out;
380     }
381 
382     crumpled_subqdict = qdict_crumple(subqdict, errp);
383     if (!crumpled_subqdict) {
384         ret = -EINVAL;
385         goto out;
386     }
387 
388     v = qobject_input_visitor_new(crumpled_subqdict);
389     if (!visit_type_BlockPermissionList(v, NULL, &perm_list, errp)) {
390         ret = -EINVAL;
391         goto out;
392     }
393 
394     for (element = perm_list; element; element = element->next) {
395         *dest |= bdrv_qapi_perm_to_blk_perm(element->value);
396     }
397 
398 out:
399     qapi_free_BlockPermissionList(perm_list);
400     visit_free(v);
401     qobject_unref(subqdict);
402     qobject_unref(crumpled_subqdict);
403     return ret;
404 }
405 
406 static int blkdebug_parse_perms(BDRVBlkdebugState *s, QDict *options,
407                                 Error **errp)
408 {
409     int ret;
410 
411     ret = blkdebug_parse_perm_list(&s->take_child_perms, options,
412                                    "take-child-perms.", errp);
413     if (ret < 0) {
414         return ret;
415     }
416 
417     ret = blkdebug_parse_perm_list(&s->unshare_child_perms, options,
418                                    "unshare-child-perms.", errp);
419     if (ret < 0) {
420         return ret;
421     }
422 
423     return 0;
424 }
425 
426 static QemuOptsList runtime_opts = {
427     .name = "blkdebug",
428     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
429     .desc = {
430         {
431             .name = "config",
432             .type = QEMU_OPT_STRING,
433             .help = "Path to the configuration file",
434         },
435         {
436             .name = "x-image",
437             .type = QEMU_OPT_STRING,
438             .help = "[internal use only, will be removed]",
439         },
440         {
441             .name = "align",
442             .type = QEMU_OPT_SIZE,
443             .help = "Required alignment in bytes",
444         },
445         {
446             .name = "max-transfer",
447             .type = QEMU_OPT_SIZE,
448             .help = "Maximum transfer size in bytes",
449         },
450         {
451             .name = "opt-write-zero",
452             .type = QEMU_OPT_SIZE,
453             .help = "Optimum write zero alignment in bytes",
454         },
455         {
456             .name = "max-write-zero",
457             .type = QEMU_OPT_SIZE,
458             .help = "Maximum write zero size in bytes",
459         },
460         {
461             .name = "opt-discard",
462             .type = QEMU_OPT_SIZE,
463             .help = "Optimum discard alignment in bytes",
464         },
465         {
466             .name = "max-discard",
467             .type = QEMU_OPT_SIZE,
468             .help = "Maximum discard size in bytes",
469         },
470         { /* end of list */ }
471     },
472 };
473 
474 static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
475                          Error **errp)
476 {
477     BDRVBlkdebugState *s = bs->opaque;
478     QemuOpts *opts;
479     int ret;
480     uint64_t align;
481 
482     qemu_mutex_init(&s->lock);
483     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
484     if (!qemu_opts_absorb_qdict(opts, options, errp)) {
485         ret = -EINVAL;
486         goto out;
487     }
488 
489     /* Read rules from config file or command line options */
490     s->config_file = g_strdup(qemu_opt_get(opts, "config"));
491     ret = read_config(s, s->config_file, options, errp);
492     if (ret) {
493         goto out;
494     }
495 
496     /* Set initial state */
497     s->state = 1;
498 
499     /* Parse permissions modifiers before opening the image file */
500     ret = blkdebug_parse_perms(s, options, errp);
501     if (ret < 0) {
502         goto out;
503     }
504 
505     /* Open the image file */
506     bs->file = bdrv_open_child(qemu_opt_get(opts, "x-image"), options, "image",
507                                bs, &child_of_bds,
508                                BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
509                                false, errp);
510     if (!bs->file) {
511         ret = -EINVAL;
512         goto out;
513     }
514 
515     bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
516         (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
517     bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
518         ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
519             bs->file->bs->supported_zero_flags);
520     ret = -EINVAL;
521 
522     /* Set alignment overrides */
523     s->align = qemu_opt_get_size(opts, "align", 0);
524     if (s->align && (s->align >= INT_MAX || !is_power_of_2(s->align))) {
525         error_setg(errp, "Cannot meet constraints with align %" PRIu64,
526                    s->align);
527         goto out;
528     }
529     align = MAX(s->align, bs->file->bs->bl.request_alignment);
530 
531     s->max_transfer = qemu_opt_get_size(opts, "max-transfer", 0);
532     if (s->max_transfer &&
533         (s->max_transfer >= INT_MAX ||
534          !QEMU_IS_ALIGNED(s->max_transfer, align))) {
535         error_setg(errp, "Cannot meet constraints with max-transfer %" PRIu64,
536                    s->max_transfer);
537         goto out;
538     }
539 
540     s->opt_write_zero = qemu_opt_get_size(opts, "opt-write-zero", 0);
541     if (s->opt_write_zero &&
542         (s->opt_write_zero >= INT_MAX ||
543          !QEMU_IS_ALIGNED(s->opt_write_zero, align))) {
544         error_setg(errp, "Cannot meet constraints with opt-write-zero %" PRIu64,
545                    s->opt_write_zero);
546         goto out;
547     }
548 
549     s->max_write_zero = qemu_opt_get_size(opts, "max-write-zero", 0);
550     if (s->max_write_zero &&
551         (s->max_write_zero >= INT_MAX ||
552          !QEMU_IS_ALIGNED(s->max_write_zero,
553                           MAX(s->opt_write_zero, align)))) {
554         error_setg(errp, "Cannot meet constraints with max-write-zero %" PRIu64,
555                    s->max_write_zero);
556         goto out;
557     }
558 
559     s->opt_discard = qemu_opt_get_size(opts, "opt-discard", 0);
560     if (s->opt_discard &&
561         (s->opt_discard >= INT_MAX ||
562          !QEMU_IS_ALIGNED(s->opt_discard, align))) {
563         error_setg(errp, "Cannot meet constraints with opt-discard %" PRIu64,
564                    s->opt_discard);
565         goto out;
566     }
567 
568     s->max_discard = qemu_opt_get_size(opts, "max-discard", 0);
569     if (s->max_discard &&
570         (s->max_discard >= INT_MAX ||
571          !QEMU_IS_ALIGNED(s->max_discard,
572                           MAX(s->opt_discard, align)))) {
573         error_setg(errp, "Cannot meet constraints with max-discard %" PRIu64,
574                    s->max_discard);
575         goto out;
576     }
577 
578     bdrv_debug_event(bs, BLKDBG_NONE);
579 
580     ret = 0;
581 out:
582     if (ret < 0) {
583         qemu_mutex_destroy(&s->lock);
584         g_free(s->config_file);
585     }
586     qemu_opts_del(opts);
587     return ret;
588 }
589 
590 static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
591                       BlkdebugIOType iotype)
592 {
593     BDRVBlkdebugState *s = bs->opaque;
594     BlkdebugRule *rule = NULL;
595     int error;
596     bool immediately;
597 
598     qemu_mutex_lock(&s->lock);
599     QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
600         uint64_t inject_offset = rule->options.inject.offset;
601 
602         if ((inject_offset == -1 ||
603              (bytes && inject_offset >= offset &&
604               inject_offset < offset + bytes)) &&
605             (rule->options.inject.iotype_mask & (1ull << iotype)))
606         {
607             break;
608         }
609     }
610 
611     if (!rule || !rule->options.inject.error) {
612         qemu_mutex_unlock(&s->lock);
613         return 0;
614     }
615 
616     immediately = rule->options.inject.immediately;
617     error = rule->options.inject.error;
618 
619     if (rule->options.inject.once) {
620         QSIMPLEQ_REMOVE(&s->active_rules, rule, BlkdebugRule, active_next);
621         remove_rule(rule);
622     }
623 
624     qemu_mutex_unlock(&s->lock);
625     if (!immediately) {
626         aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
627         qemu_coroutine_yield();
628     }
629 
630     return -error;
631 }
632 
633 static int coroutine_fn
634 blkdebug_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
635                    QEMUIOVector *qiov, BdrvRequestFlags flags)
636 {
637     int err;
638 
639     /* Sanity check block layer guarantees */
640     assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
641     assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
642     if (bs->bl.max_transfer) {
643         assert(bytes <= bs->bl.max_transfer);
644     }
645 
646     err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_READ);
647     if (err) {
648         return err;
649     }
650 
651     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
652 }
653 
654 static int coroutine_fn
655 blkdebug_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
656                     QEMUIOVector *qiov, BdrvRequestFlags flags)
657 {
658     int err;
659 
660     /* Sanity check block layer guarantees */
661     assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
662     assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
663     if (bs->bl.max_transfer) {
664         assert(bytes <= bs->bl.max_transfer);
665     }
666 
667     err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_WRITE);
668     if (err) {
669         return err;
670     }
671 
672     return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
673 }
674 
675 static int blkdebug_co_flush(BlockDriverState *bs)
676 {
677     int err = rule_check(bs, 0, 0, BLKDEBUG_IO_TYPE_FLUSH);
678 
679     if (err) {
680         return err;
681     }
682 
683     return bdrv_co_flush(bs->file->bs);
684 }
685 
686 static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs,
687                                                   int64_t offset, int64_t bytes,
688                                                   BdrvRequestFlags flags)
689 {
690     uint32_t align = MAX(bs->bl.request_alignment,
691                          bs->bl.pwrite_zeroes_alignment);
692     int err;
693 
694     /* Only pass through requests that are larger than requested
695      * preferred alignment (so that we test the fallback to writes on
696      * unaligned portions), and check that the block layer never hands
697      * us anything unaligned that crosses an alignment boundary.  */
698     if (bytes < align) {
699         assert(QEMU_IS_ALIGNED(offset, align) ||
700                QEMU_IS_ALIGNED(offset + bytes, align) ||
701                DIV_ROUND_UP(offset, align) ==
702                DIV_ROUND_UP(offset + bytes, align));
703         return -ENOTSUP;
704     }
705     assert(QEMU_IS_ALIGNED(offset, align));
706     assert(QEMU_IS_ALIGNED(bytes, align));
707     if (bs->bl.max_pwrite_zeroes) {
708         assert(bytes <= bs->bl.max_pwrite_zeroes);
709     }
710 
711     err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_WRITE_ZEROES);
712     if (err) {
713         return err;
714     }
715 
716     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
717 }
718 
719 static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs,
720                                              int64_t offset, int64_t bytes)
721 {
722     uint32_t align = bs->bl.pdiscard_alignment;
723     int err;
724 
725     /* Only pass through requests that are larger than requested
726      * minimum alignment, and ensure that unaligned requests do not
727      * cross optimum discard boundaries. */
728     if (bytes < bs->bl.request_alignment) {
729         assert(QEMU_IS_ALIGNED(offset, align) ||
730                QEMU_IS_ALIGNED(offset + bytes, align) ||
731                DIV_ROUND_UP(offset, align) ==
732                DIV_ROUND_UP(offset + bytes, align));
733         return -ENOTSUP;
734     }
735     assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
736     assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
737     if (align && bytes >= align) {
738         assert(QEMU_IS_ALIGNED(offset, align));
739         assert(QEMU_IS_ALIGNED(bytes, align));
740     }
741     if (bs->bl.max_pdiscard) {
742         assert(bytes <= bs->bl.max_pdiscard);
743     }
744 
745     err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_DISCARD);
746     if (err) {
747         return err;
748     }
749 
750     return bdrv_co_pdiscard(bs->file, offset, bytes);
751 }
752 
753 static int coroutine_fn blkdebug_co_block_status(BlockDriverState *bs,
754                                                  bool want_zero,
755                                                  int64_t offset,
756                                                  int64_t bytes,
757                                                  int64_t *pnum,
758                                                  int64_t *map,
759                                                  BlockDriverState **file)
760 {
761     int err;
762 
763     assert(QEMU_IS_ALIGNED(offset | bytes, bs->bl.request_alignment));
764 
765     err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_BLOCK_STATUS);
766     if (err) {
767         return err;
768     }
769 
770     assert(bs->file && bs->file->bs);
771     *pnum = bytes;
772     *map = offset;
773     *file = bs->file->bs;
774     return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
775 }
776 
777 static void blkdebug_close(BlockDriverState *bs)
778 {
779     BDRVBlkdebugState *s = bs->opaque;
780     BlkdebugRule *rule, *next;
781     int i;
782 
783     for (i = 0; i < BLKDBG__MAX; i++) {
784         QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
785             remove_rule(rule);
786         }
787     }
788 
789     g_free(s->config_file);
790     qemu_mutex_destroy(&s->lock);
791 }
792 
793 /* Called with lock held.  */
794 static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule)
795 {
796     BDRVBlkdebugState *s = bs->opaque;
797     BlkdebugSuspendedReq *r;
798 
799     r = g_new(BlkdebugSuspendedReq, 1);
800 
801     r->co         = qemu_coroutine_self();
802     r->tag        = g_strdup(rule->options.suspend.tag);
803 
804     remove_rule(rule);
805     QLIST_INSERT_HEAD(&s->suspended_reqs, r, next);
806 
807     if (!qtest_enabled()) {
808         printf("blkdebug: Suspended request '%s'\n", r->tag);
809     }
810 }
811 
812 /* Called with lock held.  */
813 static void process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
814                          int *action_count, int *new_state)
815 {
816     BDRVBlkdebugState *s = bs->opaque;
817 
818     /* Only process rules for the current state */
819     if (rule->state && rule->state != s->state) {
820         return;
821     }
822 
823     /* Take the action */
824     action_count[rule->action]++;
825     switch (rule->action) {
826     case ACTION_INJECT_ERROR:
827         if (action_count[ACTION_INJECT_ERROR] == 1) {
828             QSIMPLEQ_INIT(&s->active_rules);
829         }
830         QSIMPLEQ_INSERT_HEAD(&s->active_rules, rule, active_next);
831         break;
832 
833     case ACTION_SET_STATE:
834         *new_state = rule->options.set_state.new_state;
835         break;
836 
837     case ACTION_SUSPEND:
838         suspend_request(bs, rule);
839         break;
840     }
841 }
842 
843 static void blkdebug_debug_event(BlockDriverState *bs, BlkdebugEvent event)
844 {
845     BDRVBlkdebugState *s = bs->opaque;
846     struct BlkdebugRule *rule, *next;
847     int new_state;
848     int actions_count[ACTION__MAX] = { 0 };
849 
850     assert((int)event >= 0 && event < BLKDBG__MAX);
851 
852     WITH_QEMU_LOCK_GUARD(&s->lock) {
853         new_state = s->state;
854         QLIST_FOREACH_SAFE(rule, &s->rules[event], next, next) {
855             process_rule(bs, rule, actions_count, &new_state);
856         }
857         s->state = new_state;
858     }
859 
860     while (actions_count[ACTION_SUSPEND] > 0) {
861         qemu_coroutine_yield();
862         actions_count[ACTION_SUSPEND]--;
863     }
864 }
865 
866 static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event,
867                                      const char *tag)
868 {
869     BDRVBlkdebugState *s = bs->opaque;
870     struct BlkdebugRule *rule;
871     int blkdebug_event;
872 
873     blkdebug_event = qapi_enum_parse(&BlkdebugEvent_lookup, event, -1, NULL);
874     if (blkdebug_event < 0) {
875         return -ENOENT;
876     }
877 
878     rule = g_malloc(sizeof(*rule));
879     *rule = (struct BlkdebugRule) {
880         .event  = blkdebug_event,
881         .action = ACTION_SUSPEND,
882         .state  = 0,
883         .options.suspend.tag = g_strdup(tag),
884     };
885 
886     qemu_mutex_lock(&s->lock);
887     QLIST_INSERT_HEAD(&s->rules[blkdebug_event], rule, next);
888     qemu_mutex_unlock(&s->lock);
889 
890     return 0;
891 }
892 
893 /* Called with lock held. May temporarily release lock. */
894 static int resume_req_by_tag(BDRVBlkdebugState *s, const char *tag, bool all)
895 {
896     BlkdebugSuspendedReq *r;
897 
898 retry:
899     /*
900      * No need for _SAFE, since a different coroutine can remove another node
901      * (not the current one) in this list, and when the current one is removed
902      * the iteration starts back from beginning anyways.
903      */
904     QLIST_FOREACH(r, &s->suspended_reqs, next) {
905         if (!strcmp(r->tag, tag)) {
906             Coroutine *co = r->co;
907 
908             if (!qtest_enabled()) {
909                 printf("blkdebug: Resuming request '%s'\n", r->tag);
910             }
911 
912             QLIST_REMOVE(r, next);
913             g_free(r->tag);
914             g_free(r);
915 
916             qemu_mutex_unlock(&s->lock);
917             qemu_coroutine_enter(co);
918             qemu_mutex_lock(&s->lock);
919 
920             if (all) {
921                 goto retry;
922             }
923             return 0;
924         }
925     }
926     return -ENOENT;
927 }
928 
929 static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag)
930 {
931     BDRVBlkdebugState *s = bs->opaque;
932     QEMU_LOCK_GUARD(&s->lock);
933     return resume_req_by_tag(s, tag, false);
934 }
935 
936 static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs,
937                                             const char *tag)
938 {
939     BDRVBlkdebugState *s = bs->opaque;
940     BlkdebugRule *rule, *next;
941     int i, ret = -ENOENT;
942 
943     QEMU_LOCK_GUARD(&s->lock);
944     for (i = 0; i < BLKDBG__MAX; i++) {
945         QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
946             if (rule->action == ACTION_SUSPEND &&
947                 !strcmp(rule->options.suspend.tag, tag)) {
948                 remove_rule(rule);
949                 ret = 0;
950             }
951         }
952     }
953     if (resume_req_by_tag(s, tag, true) == 0) {
954         ret = 0;
955     }
956     return ret;
957 }
958 
959 static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag)
960 {
961     BDRVBlkdebugState *s = bs->opaque;
962     BlkdebugSuspendedReq *r;
963 
964     QEMU_LOCK_GUARD(&s->lock);
965     QLIST_FOREACH(r, &s->suspended_reqs, next) {
966         if (!strcmp(r->tag, tag)) {
967             return true;
968         }
969     }
970     return false;
971 }
972 
973 static int64_t blkdebug_getlength(BlockDriverState *bs)
974 {
975     return bdrv_getlength(bs->file->bs);
976 }
977 
978 static void blkdebug_refresh_filename(BlockDriverState *bs)
979 {
980     BDRVBlkdebugState *s = bs->opaque;
981     const QDictEntry *e;
982     int ret;
983 
984     if (!bs->file->bs->exact_filename[0]) {
985         return;
986     }
987 
988     for (e = qdict_first(bs->full_open_options); e;
989          e = qdict_next(bs->full_open_options, e))
990     {
991         /* Real child options are under "image", but "x-image" may
992          * contain a filename */
993         if (strcmp(qdict_entry_key(e), "config") &&
994             strcmp(qdict_entry_key(e), "image") &&
995             strcmp(qdict_entry_key(e), "x-image") &&
996             strcmp(qdict_entry_key(e), "driver"))
997         {
998             return;
999         }
1000     }
1001 
1002     ret = snprintf(bs->exact_filename, sizeof(bs->exact_filename),
1003                    "blkdebug:%s:%s",
1004                    s->config_file ?: "", bs->file->bs->exact_filename);
1005     if (ret >= sizeof(bs->exact_filename)) {
1006         /* An overflow makes the filename unusable, so do not report any */
1007         bs->exact_filename[0] = 0;
1008     }
1009 }
1010 
1011 static void blkdebug_refresh_limits(BlockDriverState *bs, Error **errp)
1012 {
1013     BDRVBlkdebugState *s = bs->opaque;
1014 
1015     if (s->align) {
1016         bs->bl.request_alignment = s->align;
1017     }
1018     if (s->max_transfer) {
1019         bs->bl.max_transfer = s->max_transfer;
1020     }
1021     if (s->opt_write_zero) {
1022         bs->bl.pwrite_zeroes_alignment = s->opt_write_zero;
1023     }
1024     if (s->max_write_zero) {
1025         bs->bl.max_pwrite_zeroes = s->max_write_zero;
1026     }
1027     if (s->opt_discard) {
1028         bs->bl.pdiscard_alignment = s->opt_discard;
1029     }
1030     if (s->max_discard) {
1031         bs->bl.max_pdiscard = s->max_discard;
1032     }
1033 }
1034 
1035 static int blkdebug_reopen_prepare(BDRVReopenState *reopen_state,
1036                                    BlockReopenQueue *queue, Error **errp)
1037 {
1038     return 0;
1039 }
1040 
1041 static void blkdebug_child_perm(BlockDriverState *bs, BdrvChild *c,
1042                                 BdrvChildRole role,
1043                                 BlockReopenQueue *reopen_queue,
1044                                 uint64_t perm, uint64_t shared,
1045                                 uint64_t *nperm, uint64_t *nshared)
1046 {
1047     BDRVBlkdebugState *s = bs->opaque;
1048 
1049     bdrv_default_perms(bs, c, role, reopen_queue,
1050                        perm, shared, nperm, nshared);
1051 
1052     *nperm |= s->take_child_perms;
1053     *nshared &= ~s->unshare_child_perms;
1054 }
1055 
1056 static const char *const blkdebug_strong_runtime_opts[] = {
1057     "config",
1058     "inject-error.",
1059     "set-state.",
1060     "align",
1061     "max-transfer",
1062     "opt-write-zero",
1063     "max-write-zero",
1064     "opt-discard",
1065     "max-discard",
1066 
1067     NULL
1068 };
1069 
1070 static BlockDriver bdrv_blkdebug = {
1071     .format_name            = "blkdebug",
1072     .protocol_name          = "blkdebug",
1073     .instance_size          = sizeof(BDRVBlkdebugState),
1074     .is_filter              = true,
1075 
1076     .bdrv_parse_filename    = blkdebug_parse_filename,
1077     .bdrv_file_open         = blkdebug_open,
1078     .bdrv_close             = blkdebug_close,
1079     .bdrv_reopen_prepare    = blkdebug_reopen_prepare,
1080     .bdrv_child_perm        = blkdebug_child_perm,
1081 
1082     .bdrv_getlength         = blkdebug_getlength,
1083     .bdrv_refresh_filename  = blkdebug_refresh_filename,
1084     .bdrv_refresh_limits    = blkdebug_refresh_limits,
1085 
1086     .bdrv_co_preadv         = blkdebug_co_preadv,
1087     .bdrv_co_pwritev        = blkdebug_co_pwritev,
1088     .bdrv_co_flush_to_disk  = blkdebug_co_flush,
1089     .bdrv_co_pwrite_zeroes  = blkdebug_co_pwrite_zeroes,
1090     .bdrv_co_pdiscard       = blkdebug_co_pdiscard,
1091     .bdrv_co_block_status   = blkdebug_co_block_status,
1092 
1093     .bdrv_debug_event           = blkdebug_debug_event,
1094     .bdrv_debug_breakpoint      = blkdebug_debug_breakpoint,
1095     .bdrv_debug_remove_breakpoint
1096                                 = blkdebug_debug_remove_breakpoint,
1097     .bdrv_debug_resume          = blkdebug_debug_resume,
1098     .bdrv_debug_is_suspended    = blkdebug_debug_is_suspended,
1099 
1100     .strong_runtime_opts        = blkdebug_strong_runtime_opts,
1101 };
1102 
1103 static void bdrv_blkdebug_init(void)
1104 {
1105     bdrv_register(&bdrv_blkdebug);
1106 }
1107 
1108 block_init(bdrv_blkdebug_init);
1109