xref: /openbmc/qemu/block/blkdebug.c (revision b86c6ba689662256ea32f3e27927524ccb13f81d)
1 /*
2  * Block protocol for I/O error injection
3  *
4  * Copyright (C) 2016-2017 Red Hat, Inc.
5  * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "qemu/osdep.h"
27 #include "qapi/error.h"
28 #include "qemu/cutils.h"
29 #include "qemu/config-file.h"
30 #include "block/block-io.h"
31 #include "block/block_int.h"
32 #include "block/qdict.h"
33 #include "qemu/module.h"
34 #include "qemu/option.h"
35 #include "qapi/qapi-visit-block-core.h"
36 #include "qapi/qmp/qdict.h"
37 #include "qapi/qmp/qlist.h"
38 #include "qapi/qmp/qstring.h"
39 #include "qapi/qobject-input-visitor.h"
40 #include "sysemu/qtest.h"
41 
42 /* All APIs are thread-safe */
43 
44 typedef struct BDRVBlkdebugState {
45     /* IN: initialized in blkdebug_open() and never changed */
46     uint64_t align;
47     uint64_t max_transfer;
48     uint64_t opt_write_zero;
49     uint64_t max_write_zero;
50     uint64_t opt_discard;
51     uint64_t max_discard;
52     char *config_file; /* For blkdebug_refresh_filename() */
53     /* initialized in blkdebug_parse_perms() */
54     uint64_t take_child_perms;
55     uint64_t unshare_child_perms;
56 
57     /* State. Protected by lock */
58     int state;
59     QLIST_HEAD(, BlkdebugRule) rules[BLKDBG__MAX];
60     QSIMPLEQ_HEAD(, BlkdebugRule) active_rules;
61     QLIST_HEAD(, BlkdebugSuspendedReq) suspended_reqs;
62     QemuMutex lock;
63 } BDRVBlkdebugState;
64 
65 typedef struct BlkdebugAIOCB {
66     BlockAIOCB common;
67     int ret;
68 } BlkdebugAIOCB;
69 
70 typedef struct BlkdebugSuspendedReq {
71     /* IN: initialized in suspend_request() */
72     Coroutine *co;
73     char *tag;
74 
75     /* List entry protected BDRVBlkdebugState's lock */
76     QLIST_ENTRY(BlkdebugSuspendedReq) next;
77 } BlkdebugSuspendedReq;
78 
79 enum {
80     ACTION_INJECT_ERROR,
81     ACTION_SET_STATE,
82     ACTION_SUSPEND,
83     ACTION__MAX,
84 };
85 
86 typedef struct BlkdebugRule {
87     /* IN: initialized in add_rule() or blkdebug_debug_breakpoint() */
88     BlkdebugEvent event;
89     int action;
90     int state;
91     union {
92         struct {
93             uint64_t iotype_mask;
94             int error;
95             int immediately;
96             int once;
97             int64_t offset;
98         } inject;
99         struct {
100             int new_state;
101         } set_state;
102         struct {
103             char *tag;
104         } suspend;
105     } options;
106 
107     /* List entries protected BDRVBlkdebugState's lock */
108     QLIST_ENTRY(BlkdebugRule) next;
109     QSIMPLEQ_ENTRY(BlkdebugRule) active_next;
110 } BlkdebugRule;
111 
112 QEMU_BUILD_BUG_MSG(BLKDEBUG_IO_TYPE__MAX > 64,
113                    "BlkdebugIOType mask does not fit into an uint64_t");
114 
115 static QemuOptsList inject_error_opts = {
116     .name = "inject-error",
117     .head = QTAILQ_HEAD_INITIALIZER(inject_error_opts.head),
118     .desc = {
119         {
120             .name = "event",
121             .type = QEMU_OPT_STRING,
122         },
123         {
124             .name = "state",
125             .type = QEMU_OPT_NUMBER,
126         },
127         {
128             .name = "iotype",
129             .type = QEMU_OPT_STRING,
130         },
131         {
132             .name = "errno",
133             .type = QEMU_OPT_NUMBER,
134         },
135         {
136             .name = "sector",
137             .type = QEMU_OPT_NUMBER,
138         },
139         {
140             .name = "once",
141             .type = QEMU_OPT_BOOL,
142         },
143         {
144             .name = "immediately",
145             .type = QEMU_OPT_BOOL,
146         },
147         { /* end of list */ }
148     },
149 };
150 
151 static QemuOptsList set_state_opts = {
152     .name = "set-state",
153     .head = QTAILQ_HEAD_INITIALIZER(set_state_opts.head),
154     .desc = {
155         {
156             .name = "event",
157             .type = QEMU_OPT_STRING,
158         },
159         {
160             .name = "state",
161             .type = QEMU_OPT_NUMBER,
162         },
163         {
164             .name = "new_state",
165             .type = QEMU_OPT_NUMBER,
166         },
167         { /* end of list */ }
168     },
169 };
170 
171 static QemuOptsList *config_groups[] = {
172     &inject_error_opts,
173     &set_state_opts,
174     NULL
175 };
176 
177 struct add_rule_data {
178     BDRVBlkdebugState *s;
179     int action;
180 };
181 
182 static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
183 {
184     struct add_rule_data *d = opaque;
185     BDRVBlkdebugState *s = d->s;
186     const char *event_name;
187     int event;
188     struct BlkdebugRule *rule;
189     int64_t sector;
190     BlkdebugIOType iotype;
191     Error *local_error = NULL;
192 
193     /* Find the right event for the rule */
194     event_name = qemu_opt_get(opts, "event");
195     if (!event_name) {
196         error_setg(errp, "Missing event name for rule");
197         return -1;
198     }
199     event = qapi_enum_parse(&BlkdebugEvent_lookup, event_name, -1, errp);
200     if (event < 0) {
201         return -1;
202     }
203 
204     /* Set attributes common for all actions */
205     rule = g_malloc0(sizeof(*rule));
206     *rule = (struct BlkdebugRule) {
207         .event  = event,
208         .action = d->action,
209         .state  = qemu_opt_get_number(opts, "state", 0),
210     };
211 
212     /* Parse action-specific options */
213     switch (d->action) {
214     case ACTION_INJECT_ERROR:
215         rule->options.inject.error = qemu_opt_get_number(opts, "errno", EIO);
216         rule->options.inject.once  = qemu_opt_get_bool(opts, "once", 0);
217         rule->options.inject.immediately =
218             qemu_opt_get_bool(opts, "immediately", 0);
219         sector = qemu_opt_get_number(opts, "sector", -1);
220         rule->options.inject.offset =
221             sector == -1 ? -1 : sector * BDRV_SECTOR_SIZE;
222 
223         iotype = qapi_enum_parse(&BlkdebugIOType_lookup,
224                                  qemu_opt_get(opts, "iotype"),
225                                  BLKDEBUG_IO_TYPE__MAX, &local_error);
226         if (local_error) {
227             error_propagate(errp, local_error);
228             g_free(rule);
229             return -1;
230         }
231         if (iotype != BLKDEBUG_IO_TYPE__MAX) {
232             rule->options.inject.iotype_mask = (1ull << iotype);
233         } else {
234             /* Apply the default */
235             rule->options.inject.iotype_mask =
236                 (1ull << BLKDEBUG_IO_TYPE_READ)
237                 | (1ull << BLKDEBUG_IO_TYPE_WRITE)
238                 | (1ull << BLKDEBUG_IO_TYPE_WRITE_ZEROES)
239                 | (1ull << BLKDEBUG_IO_TYPE_DISCARD)
240                 | (1ull << BLKDEBUG_IO_TYPE_FLUSH);
241         }
242 
243         break;
244 
245     case ACTION_SET_STATE:
246         rule->options.set_state.new_state =
247             qemu_opt_get_number(opts, "new_state", 0);
248         break;
249 
250     case ACTION_SUSPEND:
251         rule->options.suspend.tag =
252             g_strdup(qemu_opt_get(opts, "tag"));
253         break;
254     };
255 
256     /* Add the rule */
257     qemu_mutex_lock(&s->lock);
258     QLIST_INSERT_HEAD(&s->rules[event], rule, next);
259     qemu_mutex_unlock(&s->lock);
260 
261     return 0;
262 }
263 
264 /* Called with lock held or from .bdrv_close */
265 static void remove_rule(BlkdebugRule *rule)
266 {
267     switch (rule->action) {
268     case ACTION_INJECT_ERROR:
269     case ACTION_SET_STATE:
270         break;
271     case ACTION_SUSPEND:
272         g_free(rule->options.suspend.tag);
273         break;
274     }
275 
276     QLIST_REMOVE(rule, next);
277     g_free(rule);
278 }
279 
280 static int read_config(BDRVBlkdebugState *s, const char *filename,
281                        QDict *options, Error **errp)
282 {
283     FILE *f = NULL;
284     int ret;
285     struct add_rule_data d;
286     Error *local_err = NULL;
287 
288     if (filename) {
289         f = fopen(filename, "r");
290         if (f == NULL) {
291             error_setg_errno(errp, errno, "Could not read blkdebug config file");
292             return -errno;
293         }
294 
295         ret = qemu_config_parse(f, config_groups, filename, errp);
296         if (ret < 0) {
297             goto fail;
298         }
299     }
300 
301     if (!qemu_config_parse_qdict(options, config_groups, errp)) {
302         ret = -EINVAL;
303         goto fail;
304     }
305 
306     d.s = s;
307     d.action = ACTION_INJECT_ERROR;
308     qemu_opts_foreach(&inject_error_opts, add_rule, &d, &local_err);
309     if (local_err) {
310         error_propagate(errp, local_err);
311         ret = -EINVAL;
312         goto fail;
313     }
314 
315     d.action = ACTION_SET_STATE;
316     qemu_opts_foreach(&set_state_opts, add_rule, &d, &local_err);
317     if (local_err) {
318         error_propagate(errp, local_err);
319         ret = -EINVAL;
320         goto fail;
321     }
322 
323     ret = 0;
324 fail:
325     qemu_opts_reset(&inject_error_opts);
326     qemu_opts_reset(&set_state_opts);
327     if (f) {
328         fclose(f);
329     }
330     return ret;
331 }
332 
333 /* Valid blkdebug filenames look like blkdebug:path/to/config:path/to/image */
334 static void blkdebug_parse_filename(const char *filename, QDict *options,
335                                     Error **errp)
336 {
337     const char *c;
338 
339     /* Parse the blkdebug: prefix */
340     if (!strstart(filename, "blkdebug:", &filename)) {
341         /* There was no prefix; therefore, all options have to be already
342            present in the QDict (except for the filename) */
343         qdict_put_str(options, "x-image", filename);
344         return;
345     }
346 
347     /* Parse config file path */
348     c = strchr(filename, ':');
349     if (c == NULL) {
350         error_setg(errp, "blkdebug requires both config file and image path");
351         return;
352     }
353 
354     if (c != filename) {
355         QString *config_path;
356         config_path = qstring_from_substr(filename, 0, c - filename);
357         qdict_put(options, "config", config_path);
358     }
359 
360     /* TODO Allow multi-level nesting and set file.filename here */
361     filename = c + 1;
362     qdict_put_str(options, "x-image", filename);
363 }
364 
365 static int blkdebug_parse_perm_list(uint64_t *dest, QDict *options,
366                                     const char *prefix, Error **errp)
367 {
368     int ret = 0;
369     QDict *subqdict = NULL;
370     QObject *crumpled_subqdict = NULL;
371     Visitor *v = NULL;
372     BlockPermissionList *perm_list = NULL, *element;
373 
374     *dest = 0;
375 
376     qdict_extract_subqdict(options, &subqdict, prefix);
377     if (!qdict_size(subqdict)) {
378         goto out;
379     }
380 
381     crumpled_subqdict = qdict_crumple(subqdict, errp);
382     if (!crumpled_subqdict) {
383         ret = -EINVAL;
384         goto out;
385     }
386 
387     v = qobject_input_visitor_new(crumpled_subqdict);
388     if (!visit_type_BlockPermissionList(v, NULL, &perm_list, errp)) {
389         ret = -EINVAL;
390         goto out;
391     }
392 
393     for (element = perm_list; element; element = element->next) {
394         *dest |= bdrv_qapi_perm_to_blk_perm(element->value);
395     }
396 
397 out:
398     qapi_free_BlockPermissionList(perm_list);
399     visit_free(v);
400     qobject_unref(subqdict);
401     qobject_unref(crumpled_subqdict);
402     return ret;
403 }
404 
405 static int blkdebug_parse_perms(BDRVBlkdebugState *s, QDict *options,
406                                 Error **errp)
407 {
408     int ret;
409 
410     ret = blkdebug_parse_perm_list(&s->take_child_perms, options,
411                                    "take-child-perms.", errp);
412     if (ret < 0) {
413         return ret;
414     }
415 
416     ret = blkdebug_parse_perm_list(&s->unshare_child_perms, options,
417                                    "unshare-child-perms.", errp);
418     if (ret < 0) {
419         return ret;
420     }
421 
422     return 0;
423 }
424 
425 static QemuOptsList runtime_opts = {
426     .name = "blkdebug",
427     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
428     .desc = {
429         {
430             .name = "config",
431             .type = QEMU_OPT_STRING,
432             .help = "Path to the configuration file",
433         },
434         {
435             .name = "x-image",
436             .type = QEMU_OPT_STRING,
437             .help = "[internal use only, will be removed]",
438         },
439         {
440             .name = "align",
441             .type = QEMU_OPT_SIZE,
442             .help = "Required alignment in bytes",
443         },
444         {
445             .name = "max-transfer",
446             .type = QEMU_OPT_SIZE,
447             .help = "Maximum transfer size in bytes",
448         },
449         {
450             .name = "opt-write-zero",
451             .type = QEMU_OPT_SIZE,
452             .help = "Optimum write zero alignment in bytes",
453         },
454         {
455             .name = "max-write-zero",
456             .type = QEMU_OPT_SIZE,
457             .help = "Maximum write zero size in bytes",
458         },
459         {
460             .name = "opt-discard",
461             .type = QEMU_OPT_SIZE,
462             .help = "Optimum discard alignment in bytes",
463         },
464         {
465             .name = "max-discard",
466             .type = QEMU_OPT_SIZE,
467             .help = "Maximum discard size in bytes",
468         },
469         { /* end of list */ }
470     },
471 };
472 
473 static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
474                          Error **errp)
475 {
476     BDRVBlkdebugState *s = bs->opaque;
477     QemuOpts *opts;
478     int ret;
479     uint64_t align;
480 
481     qemu_mutex_init(&s->lock);
482     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
483     if (!qemu_opts_absorb_qdict(opts, options, errp)) {
484         ret = -EINVAL;
485         goto out;
486     }
487 
488     /* Read rules from config file or command line options */
489     s->config_file = g_strdup(qemu_opt_get(opts, "config"));
490     ret = read_config(s, s->config_file, options, errp);
491     if (ret) {
492         goto out;
493     }
494 
495     /* Set initial state */
496     s->state = 1;
497 
498     /* Parse permissions modifiers before opening the image file */
499     ret = blkdebug_parse_perms(s, options, errp);
500     if (ret < 0) {
501         goto out;
502     }
503 
504     /* Open the image file */
505     ret = bdrv_open_file_child(qemu_opt_get(opts, "x-image"), options, "image",
506                                bs, errp);
507     if (ret < 0) {
508         goto out;
509     }
510 
511     bdrv_graph_rdlock_main_loop();
512 
513     bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
514         (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
515     bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
516         ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
517             bs->file->bs->supported_zero_flags);
518     ret = -EINVAL;
519 
520     /* Set alignment overrides */
521     s->align = qemu_opt_get_size(opts, "align", 0);
522     if (s->align && (s->align >= INT_MAX || !is_power_of_2(s->align))) {
523         error_setg(errp, "Cannot meet constraints with align %" PRIu64,
524                    s->align);
525         goto out_rdlock;
526     }
527     align = MAX(s->align, bs->file->bs->bl.request_alignment);
528 
529     s->max_transfer = qemu_opt_get_size(opts, "max-transfer", 0);
530     if (s->max_transfer &&
531         (s->max_transfer >= INT_MAX ||
532          !QEMU_IS_ALIGNED(s->max_transfer, align))) {
533         error_setg(errp, "Cannot meet constraints with max-transfer %" PRIu64,
534                    s->max_transfer);
535         goto out_rdlock;
536     }
537 
538     s->opt_write_zero = qemu_opt_get_size(opts, "opt-write-zero", 0);
539     if (s->opt_write_zero &&
540         (s->opt_write_zero >= INT_MAX ||
541          !QEMU_IS_ALIGNED(s->opt_write_zero, align))) {
542         error_setg(errp, "Cannot meet constraints with opt-write-zero %" PRIu64,
543                    s->opt_write_zero);
544         goto out_rdlock;
545     }
546 
547     s->max_write_zero = qemu_opt_get_size(opts, "max-write-zero", 0);
548     if (s->max_write_zero &&
549         (s->max_write_zero >= INT_MAX ||
550          !QEMU_IS_ALIGNED(s->max_write_zero,
551                           MAX(s->opt_write_zero, align)))) {
552         error_setg(errp, "Cannot meet constraints with max-write-zero %" PRIu64,
553                    s->max_write_zero);
554         goto out_rdlock;
555     }
556 
557     s->opt_discard = qemu_opt_get_size(opts, "opt-discard", 0);
558     if (s->opt_discard &&
559         (s->opt_discard >= INT_MAX ||
560          !QEMU_IS_ALIGNED(s->opt_discard, align))) {
561         error_setg(errp, "Cannot meet constraints with opt-discard %" PRIu64,
562                    s->opt_discard);
563         goto out_rdlock;
564     }
565 
566     s->max_discard = qemu_opt_get_size(opts, "max-discard", 0);
567     if (s->max_discard &&
568         (s->max_discard >= INT_MAX ||
569          !QEMU_IS_ALIGNED(s->max_discard,
570                           MAX(s->opt_discard, align)))) {
571         error_setg(errp, "Cannot meet constraints with max-discard %" PRIu64,
572                    s->max_discard);
573         goto out_rdlock;
574     }
575 
576     bdrv_debug_event(bs, BLKDBG_NONE);
577 
578     ret = 0;
579 out_rdlock:
580     bdrv_graph_rdunlock_main_loop();
581 out:
582     if (ret < 0) {
583         qemu_mutex_destroy(&s->lock);
584         g_free(s->config_file);
585     }
586     qemu_opts_del(opts);
587     return ret;
588 }
589 
590 static int coroutine_fn rule_check(BlockDriverState *bs, uint64_t offset,
591                                    uint64_t bytes, BlkdebugIOType iotype)
592 {
593     BDRVBlkdebugState *s = bs->opaque;
594     BlkdebugRule *rule = NULL;
595     int error;
596     bool immediately;
597 
598     qemu_mutex_lock(&s->lock);
599     QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
600         uint64_t inject_offset = rule->options.inject.offset;
601 
602         if ((inject_offset == -1 ||
603              (bytes && inject_offset >= offset &&
604               inject_offset < offset + bytes)) &&
605             (rule->options.inject.iotype_mask & (1ull << iotype)))
606         {
607             break;
608         }
609     }
610 
611     if (!rule || !rule->options.inject.error) {
612         qemu_mutex_unlock(&s->lock);
613         return 0;
614     }
615 
616     immediately = rule->options.inject.immediately;
617     error = rule->options.inject.error;
618 
619     if (rule->options.inject.once) {
620         QSIMPLEQ_REMOVE(&s->active_rules, rule, BlkdebugRule, active_next);
621         remove_rule(rule);
622     }
623 
624     qemu_mutex_unlock(&s->lock);
625     if (!immediately) {
626         aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
627         qemu_coroutine_yield();
628     }
629 
630     return -error;
631 }
632 
633 static int coroutine_fn GRAPH_RDLOCK
634 blkdebug_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
635                    QEMUIOVector *qiov, BdrvRequestFlags flags)
636 {
637     int err;
638 
639     /* Sanity check block layer guarantees */
640     assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
641     assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
642     if (bs->bl.max_transfer) {
643         assert(bytes <= bs->bl.max_transfer);
644     }
645 
646     err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_READ);
647     if (err) {
648         return err;
649     }
650 
651     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
652 }
653 
654 static int coroutine_fn GRAPH_RDLOCK
655 blkdebug_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
656                     QEMUIOVector *qiov, BdrvRequestFlags flags)
657 {
658     int err;
659 
660     /* Sanity check block layer guarantees */
661     assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
662     assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
663     if (bs->bl.max_transfer) {
664         assert(bytes <= bs->bl.max_transfer);
665     }
666 
667     err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_WRITE);
668     if (err) {
669         return err;
670     }
671 
672     return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
673 }
674 
675 static int GRAPH_RDLOCK coroutine_fn blkdebug_co_flush(BlockDriverState *bs)
676 {
677     int err = rule_check(bs, 0, 0, BLKDEBUG_IO_TYPE_FLUSH);
678 
679     if (err) {
680         return err;
681     }
682 
683     return bdrv_co_flush(bs->file->bs);
684 }
685 
686 static int coroutine_fn GRAPH_RDLOCK
687 blkdebug_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
688                           BdrvRequestFlags flags)
689 {
690     uint32_t align = MAX(bs->bl.request_alignment,
691                          bs->bl.pwrite_zeroes_alignment);
692     int err;
693 
694     /* Only pass through requests that are larger than requested
695      * preferred alignment (so that we test the fallback to writes on
696      * unaligned portions), and check that the block layer never hands
697      * us anything unaligned that crosses an alignment boundary.  */
698     if (bytes < align) {
699         assert(QEMU_IS_ALIGNED(offset, align) ||
700                QEMU_IS_ALIGNED(offset + bytes, align) ||
701                DIV_ROUND_UP(offset, align) ==
702                DIV_ROUND_UP(offset + bytes, align));
703         return -ENOTSUP;
704     }
705     assert(QEMU_IS_ALIGNED(offset, align));
706     assert(QEMU_IS_ALIGNED(bytes, align));
707     if (bs->bl.max_pwrite_zeroes) {
708         assert(bytes <= bs->bl.max_pwrite_zeroes);
709     }
710 
711     err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_WRITE_ZEROES);
712     if (err) {
713         return err;
714     }
715 
716     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
717 }
718 
719 static int coroutine_fn GRAPH_RDLOCK
720 blkdebug_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
721 {
722     uint32_t align = bs->bl.pdiscard_alignment;
723     int err;
724 
725     /* Only pass through requests that are larger than requested
726      * minimum alignment, and ensure that unaligned requests do not
727      * cross optimum discard boundaries. */
728     if (bytes < bs->bl.request_alignment) {
729         assert(QEMU_IS_ALIGNED(offset, align) ||
730                QEMU_IS_ALIGNED(offset + bytes, align) ||
731                DIV_ROUND_UP(offset, align) ==
732                DIV_ROUND_UP(offset + bytes, align));
733         return -ENOTSUP;
734     }
735     assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
736     assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
737     if (align && bytes >= align) {
738         assert(QEMU_IS_ALIGNED(offset, align));
739         assert(QEMU_IS_ALIGNED(bytes, align));
740     }
741     if (bs->bl.max_pdiscard) {
742         assert(bytes <= bs->bl.max_pdiscard);
743     }
744 
745     err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_DISCARD);
746     if (err) {
747         return err;
748     }
749 
750     return bdrv_co_pdiscard(bs->file, offset, bytes);
751 }
752 
753 static int coroutine_fn GRAPH_RDLOCK
754 blkdebug_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset,
755                          int64_t bytes, int64_t *pnum, int64_t *map,
756                          BlockDriverState **file)
757 {
758     int err;
759 
760     assert(QEMU_IS_ALIGNED(offset | bytes, bs->bl.request_alignment));
761 
762     err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_BLOCK_STATUS);
763     if (err) {
764         return err;
765     }
766 
767     assert(bs->file && bs->file->bs);
768     *pnum = bytes;
769     *map = offset;
770     *file = bs->file->bs;
771     return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
772 }
773 
774 static void blkdebug_close(BlockDriverState *bs)
775 {
776     BDRVBlkdebugState *s = bs->opaque;
777     BlkdebugRule *rule, *next;
778     int i;
779 
780     for (i = 0; i < BLKDBG__MAX; i++) {
781         QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
782             remove_rule(rule);
783         }
784     }
785 
786     g_free(s->config_file);
787     qemu_mutex_destroy(&s->lock);
788 }
789 
790 /* Called with lock held.  */
791 static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule)
792 {
793     BDRVBlkdebugState *s = bs->opaque;
794     BlkdebugSuspendedReq *r;
795 
796     r = g_new(BlkdebugSuspendedReq, 1);
797 
798     r->co         = qemu_coroutine_self();
799     r->tag        = g_strdup(rule->options.suspend.tag);
800 
801     remove_rule(rule);
802     QLIST_INSERT_HEAD(&s->suspended_reqs, r, next);
803 
804     if (!qtest_enabled()) {
805         printf("blkdebug: Suspended request '%s'\n", r->tag);
806     }
807 }
808 
809 /* Called with lock held.  */
810 static void process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
811                          int *action_count, int *new_state)
812 {
813     BDRVBlkdebugState *s = bs->opaque;
814 
815     /* Only process rules for the current state */
816     if (rule->state && rule->state != s->state) {
817         return;
818     }
819 
820     /* Take the action */
821     action_count[rule->action]++;
822     switch (rule->action) {
823     case ACTION_INJECT_ERROR:
824         if (action_count[ACTION_INJECT_ERROR] == 1) {
825             QSIMPLEQ_INIT(&s->active_rules);
826         }
827         QSIMPLEQ_INSERT_HEAD(&s->active_rules, rule, active_next);
828         break;
829 
830     case ACTION_SET_STATE:
831         *new_state = rule->options.set_state.new_state;
832         break;
833 
834     case ACTION_SUSPEND:
835         suspend_request(bs, rule);
836         break;
837     }
838 }
839 
840 static void coroutine_fn
841 blkdebug_co_debug_event(BlockDriverState *bs, BlkdebugEvent event)
842 {
843     BDRVBlkdebugState *s = bs->opaque;
844     struct BlkdebugRule *rule, *next;
845     int new_state;
846     int actions_count[ACTION__MAX] = { 0 };
847 
848     assert((int)event >= 0 && event < BLKDBG__MAX);
849 
850     WITH_QEMU_LOCK_GUARD(&s->lock) {
851         new_state = s->state;
852         QLIST_FOREACH_SAFE(rule, &s->rules[event], next, next) {
853             process_rule(bs, rule, actions_count, &new_state);
854         }
855         s->state = new_state;
856     }
857 
858     while (actions_count[ACTION_SUSPEND] > 0) {
859         qemu_coroutine_yield();
860         actions_count[ACTION_SUSPEND]--;
861     }
862 }
863 
864 static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event,
865                                      const char *tag)
866 {
867     BDRVBlkdebugState *s = bs->opaque;
868     struct BlkdebugRule *rule;
869     int blkdebug_event;
870 
871     blkdebug_event = qapi_enum_parse(&BlkdebugEvent_lookup, event, -1, NULL);
872     if (blkdebug_event < 0) {
873         return -ENOENT;
874     }
875 
876     rule = g_malloc(sizeof(*rule));
877     *rule = (struct BlkdebugRule) {
878         .event  = blkdebug_event,
879         .action = ACTION_SUSPEND,
880         .state  = 0,
881         .options.suspend.tag = g_strdup(tag),
882     };
883 
884     qemu_mutex_lock(&s->lock);
885     QLIST_INSERT_HEAD(&s->rules[blkdebug_event], rule, next);
886     qemu_mutex_unlock(&s->lock);
887 
888     return 0;
889 }
890 
891 /* Called with lock held. May temporarily release lock. */
892 static int resume_req_by_tag(BDRVBlkdebugState *s, const char *tag, bool all)
893 {
894     BlkdebugSuspendedReq *r;
895 
896 retry:
897     /*
898      * No need for _SAFE, since a different coroutine can remove another node
899      * (not the current one) in this list, and when the current one is removed
900      * the iteration starts back from beginning anyways.
901      */
902     QLIST_FOREACH(r, &s->suspended_reqs, next) {
903         if (!strcmp(r->tag, tag)) {
904             Coroutine *co = r->co;
905 
906             if (!qtest_enabled()) {
907                 printf("blkdebug: Resuming request '%s'\n", r->tag);
908             }
909 
910             QLIST_REMOVE(r, next);
911             g_free(r->tag);
912             g_free(r);
913 
914             qemu_mutex_unlock(&s->lock);
915             qemu_coroutine_enter(co);
916             qemu_mutex_lock(&s->lock);
917 
918             if (all) {
919                 goto retry;
920             }
921             return 0;
922         }
923     }
924     return -ENOENT;
925 }
926 
927 static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag)
928 {
929     BDRVBlkdebugState *s = bs->opaque;
930     QEMU_LOCK_GUARD(&s->lock);
931     return resume_req_by_tag(s, tag, false);
932 }
933 
934 static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs,
935                                             const char *tag)
936 {
937     BDRVBlkdebugState *s = bs->opaque;
938     BlkdebugRule *rule, *next;
939     int i, ret = -ENOENT;
940 
941     QEMU_LOCK_GUARD(&s->lock);
942     for (i = 0; i < BLKDBG__MAX; i++) {
943         QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
944             if (rule->action == ACTION_SUSPEND &&
945                 !strcmp(rule->options.suspend.tag, tag)) {
946                 remove_rule(rule);
947                 ret = 0;
948             }
949         }
950     }
951     if (resume_req_by_tag(s, tag, true) == 0) {
952         ret = 0;
953     }
954     return ret;
955 }
956 
957 static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag)
958 {
959     BDRVBlkdebugState *s = bs->opaque;
960     BlkdebugSuspendedReq *r;
961 
962     QEMU_LOCK_GUARD(&s->lock);
963     QLIST_FOREACH(r, &s->suspended_reqs, next) {
964         if (!strcmp(r->tag, tag)) {
965             return true;
966         }
967     }
968     return false;
969 }
970 
971 static int64_t coroutine_fn GRAPH_RDLOCK
972 blkdebug_co_getlength(BlockDriverState *bs)
973 {
974     return bdrv_co_getlength(bs->file->bs);
975 }
976 
977 static void GRAPH_RDLOCK blkdebug_refresh_filename(BlockDriverState *bs)
978 {
979     BDRVBlkdebugState *s = bs->opaque;
980     const QDictEntry *e;
981     int ret;
982 
983     if (!bs->file->bs->exact_filename[0]) {
984         return;
985     }
986 
987     for (e = qdict_first(bs->full_open_options); e;
988          e = qdict_next(bs->full_open_options, e))
989     {
990         /* Real child options are under "image", but "x-image" may
991          * contain a filename */
992         if (strcmp(qdict_entry_key(e), "config") &&
993             strcmp(qdict_entry_key(e), "image") &&
994             strcmp(qdict_entry_key(e), "x-image") &&
995             strcmp(qdict_entry_key(e), "driver"))
996         {
997             return;
998         }
999     }
1000 
1001     ret = snprintf(bs->exact_filename, sizeof(bs->exact_filename),
1002                    "blkdebug:%s:%s",
1003                    s->config_file ?: "", bs->file->bs->exact_filename);
1004     if (ret >= sizeof(bs->exact_filename)) {
1005         /* An overflow makes the filename unusable, so do not report any */
1006         bs->exact_filename[0] = 0;
1007     }
1008 }
1009 
1010 static void blkdebug_refresh_limits(BlockDriverState *bs, Error **errp)
1011 {
1012     BDRVBlkdebugState *s = bs->opaque;
1013 
1014     if (s->align) {
1015         bs->bl.request_alignment = s->align;
1016     }
1017     if (s->max_transfer) {
1018         bs->bl.max_transfer = s->max_transfer;
1019     }
1020     if (s->opt_write_zero) {
1021         bs->bl.pwrite_zeroes_alignment = s->opt_write_zero;
1022     }
1023     if (s->max_write_zero) {
1024         bs->bl.max_pwrite_zeroes = s->max_write_zero;
1025     }
1026     if (s->opt_discard) {
1027         bs->bl.pdiscard_alignment = s->opt_discard;
1028     }
1029     if (s->max_discard) {
1030         bs->bl.max_pdiscard = s->max_discard;
1031     }
1032 }
1033 
1034 static int blkdebug_reopen_prepare(BDRVReopenState *reopen_state,
1035                                    BlockReopenQueue *queue, Error **errp)
1036 {
1037     return 0;
1038 }
1039 
1040 static void blkdebug_child_perm(BlockDriverState *bs, BdrvChild *c,
1041                                 BdrvChildRole role,
1042                                 BlockReopenQueue *reopen_queue,
1043                                 uint64_t perm, uint64_t shared,
1044                                 uint64_t *nperm, uint64_t *nshared)
1045 {
1046     BDRVBlkdebugState *s = bs->opaque;
1047 
1048     bdrv_default_perms(bs, c, role, reopen_queue,
1049                        perm, shared, nperm, nshared);
1050 
1051     *nperm |= s->take_child_perms;
1052     *nshared &= ~s->unshare_child_perms;
1053 }
1054 
1055 static const char *const blkdebug_strong_runtime_opts[] = {
1056     "config",
1057     "inject-error.",
1058     "set-state.",
1059     "align",
1060     "max-transfer",
1061     "opt-write-zero",
1062     "max-write-zero",
1063     "opt-discard",
1064     "max-discard",
1065 
1066     NULL
1067 };
1068 
1069 static BlockDriver bdrv_blkdebug = {
1070     .format_name            = "blkdebug",
1071     .protocol_name          = "blkdebug",
1072     .instance_size          = sizeof(BDRVBlkdebugState),
1073     .is_filter              = true,
1074 
1075     .bdrv_parse_filename    = blkdebug_parse_filename,
1076     .bdrv_open              = blkdebug_open,
1077     .bdrv_close             = blkdebug_close,
1078     .bdrv_reopen_prepare    = blkdebug_reopen_prepare,
1079     .bdrv_child_perm        = blkdebug_child_perm,
1080 
1081     .bdrv_co_getlength      = blkdebug_co_getlength,
1082     .bdrv_refresh_filename  = blkdebug_refresh_filename,
1083     .bdrv_refresh_limits    = blkdebug_refresh_limits,
1084 
1085     .bdrv_co_preadv         = blkdebug_co_preadv,
1086     .bdrv_co_pwritev        = blkdebug_co_pwritev,
1087     .bdrv_co_flush_to_disk  = blkdebug_co_flush,
1088     .bdrv_co_pwrite_zeroes  = blkdebug_co_pwrite_zeroes,
1089     .bdrv_co_pdiscard       = blkdebug_co_pdiscard,
1090     .bdrv_co_block_status   = blkdebug_co_block_status,
1091 
1092     .bdrv_co_debug_event        = blkdebug_co_debug_event,
1093     .bdrv_debug_breakpoint      = blkdebug_debug_breakpoint,
1094     .bdrv_debug_remove_breakpoint
1095                                 = blkdebug_debug_remove_breakpoint,
1096     .bdrv_debug_resume          = blkdebug_debug_resume,
1097     .bdrv_debug_is_suspended    = blkdebug_debug_is_suspended,
1098 
1099     .strong_runtime_opts        = blkdebug_strong_runtime_opts,
1100 };
1101 
1102 static void bdrv_blkdebug_init(void)
1103 {
1104     bdrv_register(&bdrv_blkdebug);
1105 }
1106 
1107 block_init(bdrv_blkdebug_init);
1108