xref: /openbmc/qemu/block/blkdebug.c (revision 88c41e4082c01b0b06fb6d781e154deb1a4a2c83)
1 /*
2  * Block protocol for I/O error injection
3  *
4  * Copyright (C) 2016-2017 Red Hat, Inc.
5  * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "qemu/osdep.h"
27 #include "qapi/error.h"
28 #include "qemu/cutils.h"
29 #include "qemu/config-file.h"
30 #include "block/block_int.h"
31 #include "block/qdict.h"
32 #include "qemu/module.h"
33 #include "qemu/option.h"
34 #include "qapi/qapi-visit-block-core.h"
35 #include "qapi/qmp/qdict.h"
36 #include "qapi/qmp/qlist.h"
37 #include "qapi/qmp/qstring.h"
38 #include "qapi/qobject-input-visitor.h"
39 #include "sysemu/qtest.h"
40 
41 /* All APIs are thread-safe */
42 
43 typedef struct BDRVBlkdebugState {
44     /* IN: initialized in blkdebug_open() and never changed */
45     uint64_t align;
46     uint64_t max_transfer;
47     uint64_t opt_write_zero;
48     uint64_t max_write_zero;
49     uint64_t opt_discard;
50     uint64_t max_discard;
51     char *config_file; /* For blkdebug_refresh_filename() */
52     /* initialized in blkdebug_parse_perms() */
53     uint64_t take_child_perms;
54     uint64_t unshare_child_perms;
55 
56     /* State. Protected by lock */
57     int state;
58     QLIST_HEAD(, BlkdebugRule) rules[BLKDBG__MAX];
59     QSIMPLEQ_HEAD(, BlkdebugRule) active_rules;
60     QLIST_HEAD(, BlkdebugSuspendedReq) suspended_reqs;
61     QemuMutex lock;
62 } BDRVBlkdebugState;
63 
64 typedef struct BlkdebugAIOCB {
65     BlockAIOCB common;
66     int ret;
67 } BlkdebugAIOCB;
68 
69 typedef struct BlkdebugSuspendedReq {
70     /* IN: initialized in suspend_request() */
71     Coroutine *co;
72     char *tag;
73 
74     /* List entry protected BDRVBlkdebugState's lock */
75     QLIST_ENTRY(BlkdebugSuspendedReq) next;
76 } BlkdebugSuspendedReq;
77 
78 enum {
79     ACTION_INJECT_ERROR,
80     ACTION_SET_STATE,
81     ACTION_SUSPEND,
82     ACTION__MAX,
83 };
84 
85 typedef struct BlkdebugRule {
86     /* IN: initialized in add_rule() or blkdebug_debug_breakpoint() */
87     BlkdebugEvent event;
88     int action;
89     int state;
90     union {
91         struct {
92             uint64_t iotype_mask;
93             int error;
94             int immediately;
95             int once;
96             int64_t offset;
97         } inject;
98         struct {
99             int new_state;
100         } set_state;
101         struct {
102             char *tag;
103         } suspend;
104     } options;
105 
106     /* List entries protected BDRVBlkdebugState's lock */
107     QLIST_ENTRY(BlkdebugRule) next;
108     QSIMPLEQ_ENTRY(BlkdebugRule) active_next;
109 } BlkdebugRule;
110 
111 QEMU_BUILD_BUG_MSG(BLKDEBUG_IO_TYPE__MAX > 64,
112                    "BlkdebugIOType mask does not fit into an uint64_t");
113 
114 static QemuOptsList inject_error_opts = {
115     .name = "inject-error",
116     .head = QTAILQ_HEAD_INITIALIZER(inject_error_opts.head),
117     .desc = {
118         {
119             .name = "event",
120             .type = QEMU_OPT_STRING,
121         },
122         {
123             .name = "state",
124             .type = QEMU_OPT_NUMBER,
125         },
126         {
127             .name = "iotype",
128             .type = QEMU_OPT_STRING,
129         },
130         {
131             .name = "errno",
132             .type = QEMU_OPT_NUMBER,
133         },
134         {
135             .name = "sector",
136             .type = QEMU_OPT_NUMBER,
137         },
138         {
139             .name = "once",
140             .type = QEMU_OPT_BOOL,
141         },
142         {
143             .name = "immediately",
144             .type = QEMU_OPT_BOOL,
145         },
146         { /* end of list */ }
147     },
148 };
149 
150 static QemuOptsList set_state_opts = {
151     .name = "set-state",
152     .head = QTAILQ_HEAD_INITIALIZER(set_state_opts.head),
153     .desc = {
154         {
155             .name = "event",
156             .type = QEMU_OPT_STRING,
157         },
158         {
159             .name = "state",
160             .type = QEMU_OPT_NUMBER,
161         },
162         {
163             .name = "new_state",
164             .type = QEMU_OPT_NUMBER,
165         },
166         { /* end of list */ }
167     },
168 };
169 
170 static QemuOptsList *config_groups[] = {
171     &inject_error_opts,
172     &set_state_opts,
173     NULL
174 };
175 
176 struct add_rule_data {
177     BDRVBlkdebugState *s;
178     int action;
179 };
180 
181 static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
182 {
183     struct add_rule_data *d = opaque;
184     BDRVBlkdebugState *s = d->s;
185     const char *event_name;
186     int event;
187     struct BlkdebugRule *rule;
188     int64_t sector;
189     BlkdebugIOType iotype;
190     Error *local_error = NULL;
191 
192     /* Find the right event for the rule */
193     event_name = qemu_opt_get(opts, "event");
194     if (!event_name) {
195         error_setg(errp, "Missing event name for rule");
196         return -1;
197     }
198     event = qapi_enum_parse(&BlkdebugEvent_lookup, event_name, -1, errp);
199     if (event < 0) {
200         return -1;
201     }
202 
203     /* Set attributes common for all actions */
204     rule = g_malloc0(sizeof(*rule));
205     *rule = (struct BlkdebugRule) {
206         .event  = event,
207         .action = d->action,
208         .state  = qemu_opt_get_number(opts, "state", 0),
209     };
210 
211     /* Parse action-specific options */
212     switch (d->action) {
213     case ACTION_INJECT_ERROR:
214         rule->options.inject.error = qemu_opt_get_number(opts, "errno", EIO);
215         rule->options.inject.once  = qemu_opt_get_bool(opts, "once", 0);
216         rule->options.inject.immediately =
217             qemu_opt_get_bool(opts, "immediately", 0);
218         sector = qemu_opt_get_number(opts, "sector", -1);
219         rule->options.inject.offset =
220             sector == -1 ? -1 : sector * BDRV_SECTOR_SIZE;
221 
222         iotype = qapi_enum_parse(&BlkdebugIOType_lookup,
223                                  qemu_opt_get(opts, "iotype"),
224                                  BLKDEBUG_IO_TYPE__MAX, &local_error);
225         if (local_error) {
226             error_propagate(errp, local_error);
227             g_free(rule);
228             return -1;
229         }
230         if (iotype != BLKDEBUG_IO_TYPE__MAX) {
231             rule->options.inject.iotype_mask = (1ull << iotype);
232         } else {
233             /* Apply the default */
234             rule->options.inject.iotype_mask =
235                 (1ull << BLKDEBUG_IO_TYPE_READ)
236                 | (1ull << BLKDEBUG_IO_TYPE_WRITE)
237                 | (1ull << BLKDEBUG_IO_TYPE_WRITE_ZEROES)
238                 | (1ull << BLKDEBUG_IO_TYPE_DISCARD)
239                 | (1ull << BLKDEBUG_IO_TYPE_FLUSH);
240         }
241 
242         break;
243 
244     case ACTION_SET_STATE:
245         rule->options.set_state.new_state =
246             qemu_opt_get_number(opts, "new_state", 0);
247         break;
248 
249     case ACTION_SUSPEND:
250         rule->options.suspend.tag =
251             g_strdup(qemu_opt_get(opts, "tag"));
252         break;
253     };
254 
255     /* Add the rule */
256     qemu_mutex_lock(&s->lock);
257     QLIST_INSERT_HEAD(&s->rules[event], rule, next);
258     qemu_mutex_unlock(&s->lock);
259 
260     return 0;
261 }
262 
263 /* Called with lock held or from .bdrv_close */
264 static void remove_rule(BlkdebugRule *rule)
265 {
266     switch (rule->action) {
267     case ACTION_INJECT_ERROR:
268     case ACTION_SET_STATE:
269         break;
270     case ACTION_SUSPEND:
271         g_free(rule->options.suspend.tag);
272         break;
273     }
274 
275     QLIST_REMOVE(rule, next);
276     g_free(rule);
277 }
278 
279 static int read_config(BDRVBlkdebugState *s, const char *filename,
280                        QDict *options, Error **errp)
281 {
282     FILE *f = NULL;
283     int ret;
284     struct add_rule_data d;
285     Error *local_err = NULL;
286 
287     if (filename) {
288         f = fopen(filename, "r");
289         if (f == NULL) {
290             error_setg_errno(errp, errno, "Could not read blkdebug config file");
291             return -errno;
292         }
293 
294         ret = qemu_config_parse(f, config_groups, filename, errp);
295         if (ret < 0) {
296             goto fail;
297         }
298     }
299 
300     if (!qemu_config_parse_qdict(options, config_groups, errp)) {
301         ret = -EINVAL;
302         goto fail;
303     }
304 
305     d.s = s;
306     d.action = ACTION_INJECT_ERROR;
307     qemu_opts_foreach(&inject_error_opts, add_rule, &d, &local_err);
308     if (local_err) {
309         error_propagate(errp, local_err);
310         ret = -EINVAL;
311         goto fail;
312     }
313 
314     d.action = ACTION_SET_STATE;
315     qemu_opts_foreach(&set_state_opts, add_rule, &d, &local_err);
316     if (local_err) {
317         error_propagate(errp, local_err);
318         ret = -EINVAL;
319         goto fail;
320     }
321 
322     ret = 0;
323 fail:
324     qemu_opts_reset(&inject_error_opts);
325     qemu_opts_reset(&set_state_opts);
326     if (f) {
327         fclose(f);
328     }
329     return ret;
330 }
331 
332 /* Valid blkdebug filenames look like blkdebug:path/to/config:path/to/image */
333 static void blkdebug_parse_filename(const char *filename, QDict *options,
334                                     Error **errp)
335 {
336     const char *c;
337 
338     /* Parse the blkdebug: prefix */
339     if (!strstart(filename, "blkdebug:", &filename)) {
340         /* There was no prefix; therefore, all options have to be already
341            present in the QDict (except for the filename) */
342         qdict_put_str(options, "x-image", filename);
343         return;
344     }
345 
346     /* Parse config file path */
347     c = strchr(filename, ':');
348     if (c == NULL) {
349         error_setg(errp, "blkdebug requires both config file and image path");
350         return;
351     }
352 
353     if (c != filename) {
354         QString *config_path;
355         config_path = qstring_from_substr(filename, 0, c - filename);
356         qdict_put(options, "config", config_path);
357     }
358 
359     /* TODO Allow multi-level nesting and set file.filename here */
360     filename = c + 1;
361     qdict_put_str(options, "x-image", filename);
362 }
363 
364 static int blkdebug_parse_perm_list(uint64_t *dest, QDict *options,
365                                     const char *prefix, Error **errp)
366 {
367     int ret = 0;
368     QDict *subqdict = NULL;
369     QObject *crumpled_subqdict = NULL;
370     Visitor *v = NULL;
371     BlockPermissionList *perm_list = NULL, *element;
372 
373     *dest = 0;
374 
375     qdict_extract_subqdict(options, &subqdict, prefix);
376     if (!qdict_size(subqdict)) {
377         goto out;
378     }
379 
380     crumpled_subqdict = qdict_crumple(subqdict, errp);
381     if (!crumpled_subqdict) {
382         ret = -EINVAL;
383         goto out;
384     }
385 
386     v = qobject_input_visitor_new(crumpled_subqdict);
387     if (!visit_type_BlockPermissionList(v, NULL, &perm_list, errp)) {
388         ret = -EINVAL;
389         goto out;
390     }
391 
392     for (element = perm_list; element; element = element->next) {
393         *dest |= bdrv_qapi_perm_to_blk_perm(element->value);
394     }
395 
396 out:
397     qapi_free_BlockPermissionList(perm_list);
398     visit_free(v);
399     qobject_unref(subqdict);
400     qobject_unref(crumpled_subqdict);
401     return ret;
402 }
403 
404 static int blkdebug_parse_perms(BDRVBlkdebugState *s, QDict *options,
405                                 Error **errp)
406 {
407     int ret;
408 
409     ret = blkdebug_parse_perm_list(&s->take_child_perms, options,
410                                    "take-child-perms.", errp);
411     if (ret < 0) {
412         return ret;
413     }
414 
415     ret = blkdebug_parse_perm_list(&s->unshare_child_perms, options,
416                                    "unshare-child-perms.", errp);
417     if (ret < 0) {
418         return ret;
419     }
420 
421     return 0;
422 }
423 
424 static QemuOptsList runtime_opts = {
425     .name = "blkdebug",
426     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
427     .desc = {
428         {
429             .name = "config",
430             .type = QEMU_OPT_STRING,
431             .help = "Path to the configuration file",
432         },
433         {
434             .name = "x-image",
435             .type = QEMU_OPT_STRING,
436             .help = "[internal use only, will be removed]",
437         },
438         {
439             .name = "align",
440             .type = QEMU_OPT_SIZE,
441             .help = "Required alignment in bytes",
442         },
443         {
444             .name = "max-transfer",
445             .type = QEMU_OPT_SIZE,
446             .help = "Maximum transfer size in bytes",
447         },
448         {
449             .name = "opt-write-zero",
450             .type = QEMU_OPT_SIZE,
451             .help = "Optimum write zero alignment in bytes",
452         },
453         {
454             .name = "max-write-zero",
455             .type = QEMU_OPT_SIZE,
456             .help = "Maximum write zero size in bytes",
457         },
458         {
459             .name = "opt-discard",
460             .type = QEMU_OPT_SIZE,
461             .help = "Optimum discard alignment in bytes",
462         },
463         {
464             .name = "max-discard",
465             .type = QEMU_OPT_SIZE,
466             .help = "Maximum discard size in bytes",
467         },
468         { /* end of list */ }
469     },
470 };
471 
472 static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
473                          Error **errp)
474 {
475     BDRVBlkdebugState *s = bs->opaque;
476     QemuOpts *opts;
477     int ret;
478     uint64_t align;
479 
480     qemu_mutex_init(&s->lock);
481     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
482     if (!qemu_opts_absorb_qdict(opts, options, errp)) {
483         ret = -EINVAL;
484         goto out;
485     }
486 
487     /* Read rules from config file or command line options */
488     s->config_file = g_strdup(qemu_opt_get(opts, "config"));
489     ret = read_config(s, s->config_file, options, errp);
490     if (ret) {
491         goto out;
492     }
493 
494     /* Set initial state */
495     s->state = 1;
496 
497     /* Parse permissions modifiers before opening the image file */
498     ret = blkdebug_parse_perms(s, options, errp);
499     if (ret < 0) {
500         goto out;
501     }
502 
503     /* Open the image file */
504     ret = bdrv_open_file_child(qemu_opt_get(opts, "x-image"), options, "image",
505                                bs, errp);
506     if (ret < 0) {
507         goto out;
508     }
509 
510     bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
511         (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
512     bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
513         ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
514             bs->file->bs->supported_zero_flags);
515     ret = -EINVAL;
516 
517     /* Set alignment overrides */
518     s->align = qemu_opt_get_size(opts, "align", 0);
519     if (s->align && (s->align >= INT_MAX || !is_power_of_2(s->align))) {
520         error_setg(errp, "Cannot meet constraints with align %" PRIu64,
521                    s->align);
522         goto out;
523     }
524     align = MAX(s->align, bs->file->bs->bl.request_alignment);
525 
526     s->max_transfer = qemu_opt_get_size(opts, "max-transfer", 0);
527     if (s->max_transfer &&
528         (s->max_transfer >= INT_MAX ||
529          !QEMU_IS_ALIGNED(s->max_transfer, align))) {
530         error_setg(errp, "Cannot meet constraints with max-transfer %" PRIu64,
531                    s->max_transfer);
532         goto out;
533     }
534 
535     s->opt_write_zero = qemu_opt_get_size(opts, "opt-write-zero", 0);
536     if (s->opt_write_zero &&
537         (s->opt_write_zero >= INT_MAX ||
538          !QEMU_IS_ALIGNED(s->opt_write_zero, align))) {
539         error_setg(errp, "Cannot meet constraints with opt-write-zero %" PRIu64,
540                    s->opt_write_zero);
541         goto out;
542     }
543 
544     s->max_write_zero = qemu_opt_get_size(opts, "max-write-zero", 0);
545     if (s->max_write_zero &&
546         (s->max_write_zero >= INT_MAX ||
547          !QEMU_IS_ALIGNED(s->max_write_zero,
548                           MAX(s->opt_write_zero, align)))) {
549         error_setg(errp, "Cannot meet constraints with max-write-zero %" PRIu64,
550                    s->max_write_zero);
551         goto out;
552     }
553 
554     s->opt_discard = qemu_opt_get_size(opts, "opt-discard", 0);
555     if (s->opt_discard &&
556         (s->opt_discard >= INT_MAX ||
557          !QEMU_IS_ALIGNED(s->opt_discard, align))) {
558         error_setg(errp, "Cannot meet constraints with opt-discard %" PRIu64,
559                    s->opt_discard);
560         goto out;
561     }
562 
563     s->max_discard = qemu_opt_get_size(opts, "max-discard", 0);
564     if (s->max_discard &&
565         (s->max_discard >= INT_MAX ||
566          !QEMU_IS_ALIGNED(s->max_discard,
567                           MAX(s->opt_discard, align)))) {
568         error_setg(errp, "Cannot meet constraints with max-discard %" PRIu64,
569                    s->max_discard);
570         goto out;
571     }
572 
573     bdrv_debug_event(bs, BLKDBG_NONE);
574 
575     ret = 0;
576 out:
577     if (ret < 0) {
578         qemu_mutex_destroy(&s->lock);
579         g_free(s->config_file);
580     }
581     qemu_opts_del(opts);
582     return ret;
583 }
584 
585 static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
586                       BlkdebugIOType iotype)
587 {
588     BDRVBlkdebugState *s = bs->opaque;
589     BlkdebugRule *rule = NULL;
590     int error;
591     bool immediately;
592 
593     qemu_mutex_lock(&s->lock);
594     QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
595         uint64_t inject_offset = rule->options.inject.offset;
596 
597         if ((inject_offset == -1 ||
598              (bytes && inject_offset >= offset &&
599               inject_offset < offset + bytes)) &&
600             (rule->options.inject.iotype_mask & (1ull << iotype)))
601         {
602             break;
603         }
604     }
605 
606     if (!rule || !rule->options.inject.error) {
607         qemu_mutex_unlock(&s->lock);
608         return 0;
609     }
610 
611     immediately = rule->options.inject.immediately;
612     error = rule->options.inject.error;
613 
614     if (rule->options.inject.once) {
615         QSIMPLEQ_REMOVE(&s->active_rules, rule, BlkdebugRule, active_next);
616         remove_rule(rule);
617     }
618 
619     qemu_mutex_unlock(&s->lock);
620     if (!immediately) {
621         aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
622         qemu_coroutine_yield();
623     }
624 
625     return -error;
626 }
627 
628 static int coroutine_fn
629 blkdebug_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
630                    QEMUIOVector *qiov, BdrvRequestFlags flags)
631 {
632     int err;
633 
634     /* Sanity check block layer guarantees */
635     assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
636     assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
637     if (bs->bl.max_transfer) {
638         assert(bytes <= bs->bl.max_transfer);
639     }
640 
641     err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_READ);
642     if (err) {
643         return err;
644     }
645 
646     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
647 }
648 
649 static int coroutine_fn
650 blkdebug_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
651                     QEMUIOVector *qiov, BdrvRequestFlags flags)
652 {
653     int err;
654 
655     /* Sanity check block layer guarantees */
656     assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
657     assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
658     if (bs->bl.max_transfer) {
659         assert(bytes <= bs->bl.max_transfer);
660     }
661 
662     err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_WRITE);
663     if (err) {
664         return err;
665     }
666 
667     return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
668 }
669 
670 static int coroutine_fn blkdebug_co_flush(BlockDriverState *bs)
671 {
672     int err = rule_check(bs, 0, 0, BLKDEBUG_IO_TYPE_FLUSH);
673 
674     if (err) {
675         return err;
676     }
677 
678     return bdrv_co_flush(bs->file->bs);
679 }
680 
681 static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs,
682                                                   int64_t offset, int64_t bytes,
683                                                   BdrvRequestFlags flags)
684 {
685     uint32_t align = MAX(bs->bl.request_alignment,
686                          bs->bl.pwrite_zeroes_alignment);
687     int err;
688 
689     /* Only pass through requests that are larger than requested
690      * preferred alignment (so that we test the fallback to writes on
691      * unaligned portions), and check that the block layer never hands
692      * us anything unaligned that crosses an alignment boundary.  */
693     if (bytes < align) {
694         assert(QEMU_IS_ALIGNED(offset, align) ||
695                QEMU_IS_ALIGNED(offset + bytes, align) ||
696                DIV_ROUND_UP(offset, align) ==
697                DIV_ROUND_UP(offset + bytes, align));
698         return -ENOTSUP;
699     }
700     assert(QEMU_IS_ALIGNED(offset, align));
701     assert(QEMU_IS_ALIGNED(bytes, align));
702     if (bs->bl.max_pwrite_zeroes) {
703         assert(bytes <= bs->bl.max_pwrite_zeroes);
704     }
705 
706     err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_WRITE_ZEROES);
707     if (err) {
708         return err;
709     }
710 
711     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
712 }
713 
714 static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs,
715                                              int64_t offset, int64_t bytes)
716 {
717     uint32_t align = bs->bl.pdiscard_alignment;
718     int err;
719 
720     /* Only pass through requests that are larger than requested
721      * minimum alignment, and ensure that unaligned requests do not
722      * cross optimum discard boundaries. */
723     if (bytes < bs->bl.request_alignment) {
724         assert(QEMU_IS_ALIGNED(offset, align) ||
725                QEMU_IS_ALIGNED(offset + bytes, align) ||
726                DIV_ROUND_UP(offset, align) ==
727                DIV_ROUND_UP(offset + bytes, align));
728         return -ENOTSUP;
729     }
730     assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
731     assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
732     if (align && bytes >= align) {
733         assert(QEMU_IS_ALIGNED(offset, align));
734         assert(QEMU_IS_ALIGNED(bytes, align));
735     }
736     if (bs->bl.max_pdiscard) {
737         assert(bytes <= bs->bl.max_pdiscard);
738     }
739 
740     err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_DISCARD);
741     if (err) {
742         return err;
743     }
744 
745     return bdrv_co_pdiscard(bs->file, offset, bytes);
746 }
747 
748 static int coroutine_fn blkdebug_co_block_status(BlockDriverState *bs,
749                                                  bool want_zero,
750                                                  int64_t offset,
751                                                  int64_t bytes,
752                                                  int64_t *pnum,
753                                                  int64_t *map,
754                                                  BlockDriverState **file)
755 {
756     int err;
757 
758     assert(QEMU_IS_ALIGNED(offset | bytes, bs->bl.request_alignment));
759 
760     err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_BLOCK_STATUS);
761     if (err) {
762         return err;
763     }
764 
765     assert(bs->file && bs->file->bs);
766     *pnum = bytes;
767     *map = offset;
768     *file = bs->file->bs;
769     return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
770 }
771 
772 static void blkdebug_close(BlockDriverState *bs)
773 {
774     BDRVBlkdebugState *s = bs->opaque;
775     BlkdebugRule *rule, *next;
776     int i;
777 
778     for (i = 0; i < BLKDBG__MAX; i++) {
779         QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
780             remove_rule(rule);
781         }
782     }
783 
784     g_free(s->config_file);
785     qemu_mutex_destroy(&s->lock);
786 }
787 
788 /* Called with lock held.  */
789 static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule)
790 {
791     BDRVBlkdebugState *s = bs->opaque;
792     BlkdebugSuspendedReq *r;
793 
794     r = g_new(BlkdebugSuspendedReq, 1);
795 
796     r->co         = qemu_coroutine_self();
797     r->tag        = g_strdup(rule->options.suspend.tag);
798 
799     remove_rule(rule);
800     QLIST_INSERT_HEAD(&s->suspended_reqs, r, next);
801 
802     if (!qtest_enabled()) {
803         printf("blkdebug: Suspended request '%s'\n", r->tag);
804     }
805 }
806 
807 /* Called with lock held.  */
808 static void process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
809                          int *action_count, int *new_state)
810 {
811     BDRVBlkdebugState *s = bs->opaque;
812 
813     /* Only process rules for the current state */
814     if (rule->state && rule->state != s->state) {
815         return;
816     }
817 
818     /* Take the action */
819     action_count[rule->action]++;
820     switch (rule->action) {
821     case ACTION_INJECT_ERROR:
822         if (action_count[ACTION_INJECT_ERROR] == 1) {
823             QSIMPLEQ_INIT(&s->active_rules);
824         }
825         QSIMPLEQ_INSERT_HEAD(&s->active_rules, rule, active_next);
826         break;
827 
828     case ACTION_SET_STATE:
829         *new_state = rule->options.set_state.new_state;
830         break;
831 
832     case ACTION_SUSPEND:
833         suspend_request(bs, rule);
834         break;
835     }
836 }
837 
838 static void blkdebug_debug_event(BlockDriverState *bs, BlkdebugEvent event)
839 {
840     BDRVBlkdebugState *s = bs->opaque;
841     struct BlkdebugRule *rule, *next;
842     int new_state;
843     int actions_count[ACTION__MAX] = { 0 };
844 
845     assert((int)event >= 0 && event < BLKDBG__MAX);
846 
847     WITH_QEMU_LOCK_GUARD(&s->lock) {
848         new_state = s->state;
849         QLIST_FOREACH_SAFE(rule, &s->rules[event], next, next) {
850             process_rule(bs, rule, actions_count, &new_state);
851         }
852         s->state = new_state;
853     }
854 
855     while (actions_count[ACTION_SUSPEND] > 0) {
856         qemu_coroutine_yield();
857         actions_count[ACTION_SUSPEND]--;
858     }
859 }
860 
861 static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event,
862                                      const char *tag)
863 {
864     BDRVBlkdebugState *s = bs->opaque;
865     struct BlkdebugRule *rule;
866     int blkdebug_event;
867 
868     blkdebug_event = qapi_enum_parse(&BlkdebugEvent_lookup, event, -1, NULL);
869     if (blkdebug_event < 0) {
870         return -ENOENT;
871     }
872 
873     rule = g_malloc(sizeof(*rule));
874     *rule = (struct BlkdebugRule) {
875         .event  = blkdebug_event,
876         .action = ACTION_SUSPEND,
877         .state  = 0,
878         .options.suspend.tag = g_strdup(tag),
879     };
880 
881     qemu_mutex_lock(&s->lock);
882     QLIST_INSERT_HEAD(&s->rules[blkdebug_event], rule, next);
883     qemu_mutex_unlock(&s->lock);
884 
885     return 0;
886 }
887 
888 /* Called with lock held. May temporarily release lock. */
889 static int resume_req_by_tag(BDRVBlkdebugState *s, const char *tag, bool all)
890 {
891     BlkdebugSuspendedReq *r;
892 
893 retry:
894     /*
895      * No need for _SAFE, since a different coroutine can remove another node
896      * (not the current one) in this list, and when the current one is removed
897      * the iteration starts back from beginning anyways.
898      */
899     QLIST_FOREACH(r, &s->suspended_reqs, next) {
900         if (!strcmp(r->tag, tag)) {
901             Coroutine *co = r->co;
902 
903             if (!qtest_enabled()) {
904                 printf("blkdebug: Resuming request '%s'\n", r->tag);
905             }
906 
907             QLIST_REMOVE(r, next);
908             g_free(r->tag);
909             g_free(r);
910 
911             qemu_mutex_unlock(&s->lock);
912             qemu_coroutine_enter(co);
913             qemu_mutex_lock(&s->lock);
914 
915             if (all) {
916                 goto retry;
917             }
918             return 0;
919         }
920     }
921     return -ENOENT;
922 }
923 
924 static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag)
925 {
926     BDRVBlkdebugState *s = bs->opaque;
927     QEMU_LOCK_GUARD(&s->lock);
928     return resume_req_by_tag(s, tag, false);
929 }
930 
931 static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs,
932                                             const char *tag)
933 {
934     BDRVBlkdebugState *s = bs->opaque;
935     BlkdebugRule *rule, *next;
936     int i, ret = -ENOENT;
937 
938     QEMU_LOCK_GUARD(&s->lock);
939     for (i = 0; i < BLKDBG__MAX; i++) {
940         QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
941             if (rule->action == ACTION_SUSPEND &&
942                 !strcmp(rule->options.suspend.tag, tag)) {
943                 remove_rule(rule);
944                 ret = 0;
945             }
946         }
947     }
948     if (resume_req_by_tag(s, tag, true) == 0) {
949         ret = 0;
950     }
951     return ret;
952 }
953 
954 static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag)
955 {
956     BDRVBlkdebugState *s = bs->opaque;
957     BlkdebugSuspendedReq *r;
958 
959     QEMU_LOCK_GUARD(&s->lock);
960     QLIST_FOREACH(r, &s->suspended_reqs, next) {
961         if (!strcmp(r->tag, tag)) {
962             return true;
963         }
964     }
965     return false;
966 }
967 
968 static int64_t blkdebug_getlength(BlockDriverState *bs)
969 {
970     return bdrv_getlength(bs->file->bs);
971 }
972 
973 static void blkdebug_refresh_filename(BlockDriverState *bs)
974 {
975     BDRVBlkdebugState *s = bs->opaque;
976     const QDictEntry *e;
977     int ret;
978 
979     if (!bs->file->bs->exact_filename[0]) {
980         return;
981     }
982 
983     for (e = qdict_first(bs->full_open_options); e;
984          e = qdict_next(bs->full_open_options, e))
985     {
986         /* Real child options are under "image", but "x-image" may
987          * contain a filename */
988         if (strcmp(qdict_entry_key(e), "config") &&
989             strcmp(qdict_entry_key(e), "image") &&
990             strcmp(qdict_entry_key(e), "x-image") &&
991             strcmp(qdict_entry_key(e), "driver"))
992         {
993             return;
994         }
995     }
996 
997     ret = snprintf(bs->exact_filename, sizeof(bs->exact_filename),
998                    "blkdebug:%s:%s",
999                    s->config_file ?: "", bs->file->bs->exact_filename);
1000     if (ret >= sizeof(bs->exact_filename)) {
1001         /* An overflow makes the filename unusable, so do not report any */
1002         bs->exact_filename[0] = 0;
1003     }
1004 }
1005 
1006 static void blkdebug_refresh_limits(BlockDriverState *bs, Error **errp)
1007 {
1008     BDRVBlkdebugState *s = bs->opaque;
1009 
1010     if (s->align) {
1011         bs->bl.request_alignment = s->align;
1012     }
1013     if (s->max_transfer) {
1014         bs->bl.max_transfer = s->max_transfer;
1015     }
1016     if (s->opt_write_zero) {
1017         bs->bl.pwrite_zeroes_alignment = s->opt_write_zero;
1018     }
1019     if (s->max_write_zero) {
1020         bs->bl.max_pwrite_zeroes = s->max_write_zero;
1021     }
1022     if (s->opt_discard) {
1023         bs->bl.pdiscard_alignment = s->opt_discard;
1024     }
1025     if (s->max_discard) {
1026         bs->bl.max_pdiscard = s->max_discard;
1027     }
1028 }
1029 
1030 static int blkdebug_reopen_prepare(BDRVReopenState *reopen_state,
1031                                    BlockReopenQueue *queue, Error **errp)
1032 {
1033     return 0;
1034 }
1035 
1036 static void blkdebug_child_perm(BlockDriverState *bs, BdrvChild *c,
1037                                 BdrvChildRole role,
1038                                 BlockReopenQueue *reopen_queue,
1039                                 uint64_t perm, uint64_t shared,
1040                                 uint64_t *nperm, uint64_t *nshared)
1041 {
1042     BDRVBlkdebugState *s = bs->opaque;
1043 
1044     bdrv_default_perms(bs, c, role, reopen_queue,
1045                        perm, shared, nperm, nshared);
1046 
1047     *nperm |= s->take_child_perms;
1048     *nshared &= ~s->unshare_child_perms;
1049 }
1050 
1051 static const char *const blkdebug_strong_runtime_opts[] = {
1052     "config",
1053     "inject-error.",
1054     "set-state.",
1055     "align",
1056     "max-transfer",
1057     "opt-write-zero",
1058     "max-write-zero",
1059     "opt-discard",
1060     "max-discard",
1061 
1062     NULL
1063 };
1064 
1065 static BlockDriver bdrv_blkdebug = {
1066     .format_name            = "blkdebug",
1067     .protocol_name          = "blkdebug",
1068     .instance_size          = sizeof(BDRVBlkdebugState),
1069     .is_filter              = true,
1070 
1071     .bdrv_parse_filename    = blkdebug_parse_filename,
1072     .bdrv_file_open         = blkdebug_open,
1073     .bdrv_close             = blkdebug_close,
1074     .bdrv_reopen_prepare    = blkdebug_reopen_prepare,
1075     .bdrv_child_perm        = blkdebug_child_perm,
1076 
1077     .bdrv_getlength         = blkdebug_getlength,
1078     .bdrv_refresh_filename  = blkdebug_refresh_filename,
1079     .bdrv_refresh_limits    = blkdebug_refresh_limits,
1080 
1081     .bdrv_co_preadv         = blkdebug_co_preadv,
1082     .bdrv_co_pwritev        = blkdebug_co_pwritev,
1083     .bdrv_co_flush_to_disk  = blkdebug_co_flush,
1084     .bdrv_co_pwrite_zeroes  = blkdebug_co_pwrite_zeroes,
1085     .bdrv_co_pdiscard       = blkdebug_co_pdiscard,
1086     .bdrv_co_block_status   = blkdebug_co_block_status,
1087 
1088     .bdrv_debug_event           = blkdebug_debug_event,
1089     .bdrv_debug_breakpoint      = blkdebug_debug_breakpoint,
1090     .bdrv_debug_remove_breakpoint
1091                                 = blkdebug_debug_remove_breakpoint,
1092     .bdrv_debug_resume          = blkdebug_debug_resume,
1093     .bdrv_debug_is_suspended    = blkdebug_debug_is_suspended,
1094 
1095     .strong_runtime_opts        = blkdebug_strong_runtime_opts,
1096 };
1097 
1098 static void bdrv_blkdebug_init(void)
1099 {
1100     bdrv_register(&bdrv_blkdebug);
1101 }
1102 
1103 block_init(bdrv_blkdebug_init);
1104