xref: /openbmc/qemu/block/blkdebug.c (revision dbc0e805)
1 /*
2  * Block protocol for I/O error injection
3  *
4  * Copyright (C) 2016-2017 Red Hat, Inc.
5  * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "qemu/osdep.h"
27 #include "qapi/error.h"
28 #include "qemu/cutils.h"
29 #include "qemu/config-file.h"
30 #include "block/block_int.h"
31 #include "block/qdict.h"
32 #include "qemu/module.h"
33 #include "qemu/option.h"
34 #include "qapi/qapi-visit-block-core.h"
35 #include "qapi/qmp/qdict.h"
36 #include "qapi/qmp/qlist.h"
37 #include "qapi/qmp/qstring.h"
38 #include "qapi/qobject-input-visitor.h"
39 #include "sysemu/qtest.h"
40 
41 /* All APIs are thread-safe */
42 
43 typedef struct BDRVBlkdebugState {
44     /* IN: initialized in blkdebug_open() and never changed */
45     uint64_t align;
46     uint64_t max_transfer;
47     uint64_t opt_write_zero;
48     uint64_t max_write_zero;
49     uint64_t opt_discard;
50     uint64_t max_discard;
51     char *config_file; /* For blkdebug_refresh_filename() */
52     /* initialized in blkdebug_parse_perms() */
53     uint64_t take_child_perms;
54     uint64_t unshare_child_perms;
55 
56     /* State. Protected by lock */
57     int state;
58     QLIST_HEAD(, BlkdebugRule) rules[BLKDBG__MAX];
59     QSIMPLEQ_HEAD(, BlkdebugRule) active_rules;
60     QLIST_HEAD(, BlkdebugSuspendedReq) suspended_reqs;
61     QemuMutex lock;
62 } BDRVBlkdebugState;
63 
64 typedef struct BlkdebugAIOCB {
65     BlockAIOCB common;
66     int ret;
67 } BlkdebugAIOCB;
68 
69 typedef struct BlkdebugSuspendedReq {
70     /* IN: initialized in suspend_request() */
71     Coroutine *co;
72     char *tag;
73 
74     /* List entry protected BDRVBlkdebugState's lock */
75     QLIST_ENTRY(BlkdebugSuspendedReq) next;
76 } BlkdebugSuspendedReq;
77 
78 enum {
79     ACTION_INJECT_ERROR,
80     ACTION_SET_STATE,
81     ACTION_SUSPEND,
82     ACTION__MAX,
83 };
84 
85 typedef struct BlkdebugRule {
86     /* IN: initialized in add_rule() or blkdebug_debug_breakpoint() */
87     BlkdebugEvent event;
88     int action;
89     int state;
90     union {
91         struct {
92             uint64_t iotype_mask;
93             int error;
94             int immediately;
95             int once;
96             int64_t offset;
97         } inject;
98         struct {
99             int new_state;
100         } set_state;
101         struct {
102             char *tag;
103         } suspend;
104     } options;
105 
106     /* List entries protected BDRVBlkdebugState's lock */
107     QLIST_ENTRY(BlkdebugRule) next;
108     QSIMPLEQ_ENTRY(BlkdebugRule) active_next;
109 } BlkdebugRule;
110 
111 QEMU_BUILD_BUG_MSG(BLKDEBUG_IO_TYPE__MAX > 64,
112                    "BlkdebugIOType mask does not fit into an uint64_t");
113 
114 static QemuOptsList inject_error_opts = {
115     .name = "inject-error",
116     .head = QTAILQ_HEAD_INITIALIZER(inject_error_opts.head),
117     .desc = {
118         {
119             .name = "event",
120             .type = QEMU_OPT_STRING,
121         },
122         {
123             .name = "state",
124             .type = QEMU_OPT_NUMBER,
125         },
126         {
127             .name = "iotype",
128             .type = QEMU_OPT_STRING,
129         },
130         {
131             .name = "errno",
132             .type = QEMU_OPT_NUMBER,
133         },
134         {
135             .name = "sector",
136             .type = QEMU_OPT_NUMBER,
137         },
138         {
139             .name = "once",
140             .type = QEMU_OPT_BOOL,
141         },
142         {
143             .name = "immediately",
144             .type = QEMU_OPT_BOOL,
145         },
146         { /* end of list */ }
147     },
148 };
149 
150 static QemuOptsList set_state_opts = {
151     .name = "set-state",
152     .head = QTAILQ_HEAD_INITIALIZER(set_state_opts.head),
153     .desc = {
154         {
155             .name = "event",
156             .type = QEMU_OPT_STRING,
157         },
158         {
159             .name = "state",
160             .type = QEMU_OPT_NUMBER,
161         },
162         {
163             .name = "new_state",
164             .type = QEMU_OPT_NUMBER,
165         },
166         { /* end of list */ }
167     },
168 };
169 
170 static QemuOptsList *config_groups[] = {
171     &inject_error_opts,
172     &set_state_opts,
173     NULL
174 };
175 
176 struct add_rule_data {
177     BDRVBlkdebugState *s;
178     int action;
179 };
180 
181 static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
182 {
183     struct add_rule_data *d = opaque;
184     BDRVBlkdebugState *s = d->s;
185     const char *event_name;
186     int event;
187     struct BlkdebugRule *rule;
188     int64_t sector;
189     BlkdebugIOType iotype;
190     Error *local_error = NULL;
191 
192     /* Find the right event for the rule */
193     event_name = qemu_opt_get(opts, "event");
194     if (!event_name) {
195         error_setg(errp, "Missing event name for rule");
196         return -1;
197     }
198     event = qapi_enum_parse(&BlkdebugEvent_lookup, event_name, -1, errp);
199     if (event < 0) {
200         return -1;
201     }
202 
203     /* Set attributes common for all actions */
204     rule = g_malloc0(sizeof(*rule));
205     *rule = (struct BlkdebugRule) {
206         .event  = event,
207         .action = d->action,
208         .state  = qemu_opt_get_number(opts, "state", 0),
209     };
210 
211     /* Parse action-specific options */
212     switch (d->action) {
213     case ACTION_INJECT_ERROR:
214         rule->options.inject.error = qemu_opt_get_number(opts, "errno", EIO);
215         rule->options.inject.once  = qemu_opt_get_bool(opts, "once", 0);
216         rule->options.inject.immediately =
217             qemu_opt_get_bool(opts, "immediately", 0);
218         sector = qemu_opt_get_number(opts, "sector", -1);
219         rule->options.inject.offset =
220             sector == -1 ? -1 : sector * BDRV_SECTOR_SIZE;
221 
222         iotype = qapi_enum_parse(&BlkdebugIOType_lookup,
223                                  qemu_opt_get(opts, "iotype"),
224                                  BLKDEBUG_IO_TYPE__MAX, &local_error);
225         if (local_error) {
226             error_propagate(errp, local_error);
227             g_free(rule);
228             return -1;
229         }
230         if (iotype != BLKDEBUG_IO_TYPE__MAX) {
231             rule->options.inject.iotype_mask = (1ull << iotype);
232         } else {
233             /* Apply the default */
234             rule->options.inject.iotype_mask =
235                 (1ull << BLKDEBUG_IO_TYPE_READ)
236                 | (1ull << BLKDEBUG_IO_TYPE_WRITE)
237                 | (1ull << BLKDEBUG_IO_TYPE_WRITE_ZEROES)
238                 | (1ull << BLKDEBUG_IO_TYPE_DISCARD)
239                 | (1ull << BLKDEBUG_IO_TYPE_FLUSH);
240         }
241 
242         break;
243 
244     case ACTION_SET_STATE:
245         rule->options.set_state.new_state =
246             qemu_opt_get_number(opts, "new_state", 0);
247         break;
248 
249     case ACTION_SUSPEND:
250         rule->options.suspend.tag =
251             g_strdup(qemu_opt_get(opts, "tag"));
252         break;
253     };
254 
255     /* Add the rule */
256     qemu_mutex_lock(&s->lock);
257     QLIST_INSERT_HEAD(&s->rules[event], rule, next);
258     qemu_mutex_unlock(&s->lock);
259 
260     return 0;
261 }
262 
263 /* Called with lock held or from .bdrv_close */
264 static void remove_rule(BlkdebugRule *rule)
265 {
266     switch (rule->action) {
267     case ACTION_INJECT_ERROR:
268     case ACTION_SET_STATE:
269         break;
270     case ACTION_SUSPEND:
271         g_free(rule->options.suspend.tag);
272         break;
273     }
274 
275     QLIST_REMOVE(rule, next);
276     g_free(rule);
277 }
278 
279 static int read_config(BDRVBlkdebugState *s, const char *filename,
280                        QDict *options, Error **errp)
281 {
282     FILE *f = NULL;
283     int ret;
284     struct add_rule_data d;
285     Error *local_err = NULL;
286 
287     if (filename) {
288         f = fopen(filename, "r");
289         if (f == NULL) {
290             error_setg_errno(errp, errno, "Could not read blkdebug config file");
291             return -errno;
292         }
293 
294         ret = qemu_config_parse(f, config_groups, filename, errp);
295         if (ret < 0) {
296             goto fail;
297         }
298     }
299 
300     qemu_config_parse_qdict(options, config_groups, &local_err);
301     if (local_err) {
302         error_propagate(errp, local_err);
303         ret = -EINVAL;
304         goto fail;
305     }
306 
307     d.s = s;
308     d.action = ACTION_INJECT_ERROR;
309     qemu_opts_foreach(&inject_error_opts, add_rule, &d, &local_err);
310     if (local_err) {
311         error_propagate(errp, local_err);
312         ret = -EINVAL;
313         goto fail;
314     }
315 
316     d.action = ACTION_SET_STATE;
317     qemu_opts_foreach(&set_state_opts, add_rule, &d, &local_err);
318     if (local_err) {
319         error_propagate(errp, local_err);
320         ret = -EINVAL;
321         goto fail;
322     }
323 
324     ret = 0;
325 fail:
326     qemu_opts_reset(&inject_error_opts);
327     qemu_opts_reset(&set_state_opts);
328     if (f) {
329         fclose(f);
330     }
331     return ret;
332 }
333 
334 /* Valid blkdebug filenames look like blkdebug:path/to/config:path/to/image */
335 static void blkdebug_parse_filename(const char *filename, QDict *options,
336                                     Error **errp)
337 {
338     const char *c;
339 
340     /* Parse the blkdebug: prefix */
341     if (!strstart(filename, "blkdebug:", &filename)) {
342         /* There was no prefix; therefore, all options have to be already
343            present in the QDict (except for the filename) */
344         qdict_put_str(options, "x-image", filename);
345         return;
346     }
347 
348     /* Parse config file path */
349     c = strchr(filename, ':');
350     if (c == NULL) {
351         error_setg(errp, "blkdebug requires both config file and image path");
352         return;
353     }
354 
355     if (c != filename) {
356         QString *config_path;
357         config_path = qstring_from_substr(filename, 0, c - filename);
358         qdict_put(options, "config", config_path);
359     }
360 
361     /* TODO Allow multi-level nesting and set file.filename here */
362     filename = c + 1;
363     qdict_put_str(options, "x-image", filename);
364 }
365 
366 static int blkdebug_parse_perm_list(uint64_t *dest, QDict *options,
367                                     const char *prefix, Error **errp)
368 {
369     int ret = 0;
370     QDict *subqdict = NULL;
371     QObject *crumpled_subqdict = NULL;
372     Visitor *v = NULL;
373     BlockPermissionList *perm_list = NULL, *element;
374 
375     *dest = 0;
376 
377     qdict_extract_subqdict(options, &subqdict, prefix);
378     if (!qdict_size(subqdict)) {
379         goto out;
380     }
381 
382     crumpled_subqdict = qdict_crumple(subqdict, errp);
383     if (!crumpled_subqdict) {
384         ret = -EINVAL;
385         goto out;
386     }
387 
388     v = qobject_input_visitor_new(crumpled_subqdict);
389     if (!visit_type_BlockPermissionList(v, NULL, &perm_list, errp)) {
390         ret = -EINVAL;
391         goto out;
392     }
393 
394     for (element = perm_list; element; element = element->next) {
395         *dest |= bdrv_qapi_perm_to_blk_perm(element->value);
396     }
397 
398 out:
399     qapi_free_BlockPermissionList(perm_list);
400     visit_free(v);
401     qobject_unref(subqdict);
402     qobject_unref(crumpled_subqdict);
403     return ret;
404 }
405 
406 static int blkdebug_parse_perms(BDRVBlkdebugState *s, QDict *options,
407                                 Error **errp)
408 {
409     int ret;
410 
411     ret = blkdebug_parse_perm_list(&s->take_child_perms, options,
412                                    "take-child-perms.", errp);
413     if (ret < 0) {
414         return ret;
415     }
416 
417     ret = blkdebug_parse_perm_list(&s->unshare_child_perms, options,
418                                    "unshare-child-perms.", errp);
419     if (ret < 0) {
420         return ret;
421     }
422 
423     return 0;
424 }
425 
426 static QemuOptsList runtime_opts = {
427     .name = "blkdebug",
428     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
429     .desc = {
430         {
431             .name = "config",
432             .type = QEMU_OPT_STRING,
433             .help = "Path to the configuration file",
434         },
435         {
436             .name = "x-image",
437             .type = QEMU_OPT_STRING,
438             .help = "[internal use only, will be removed]",
439         },
440         {
441             .name = "align",
442             .type = QEMU_OPT_SIZE,
443             .help = "Required alignment in bytes",
444         },
445         {
446             .name = "max-transfer",
447             .type = QEMU_OPT_SIZE,
448             .help = "Maximum transfer size in bytes",
449         },
450         {
451             .name = "opt-write-zero",
452             .type = QEMU_OPT_SIZE,
453             .help = "Optimum write zero alignment in bytes",
454         },
455         {
456             .name = "max-write-zero",
457             .type = QEMU_OPT_SIZE,
458             .help = "Maximum write zero size in bytes",
459         },
460         {
461             .name = "opt-discard",
462             .type = QEMU_OPT_SIZE,
463             .help = "Optimum discard alignment in bytes",
464         },
465         {
466             .name = "max-discard",
467             .type = QEMU_OPT_SIZE,
468             .help = "Maximum discard size in bytes",
469         },
470         { /* end of list */ }
471     },
472 };
473 
474 static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
475                          Error **errp)
476 {
477     BDRVBlkdebugState *s = bs->opaque;
478     QemuOpts *opts;
479     int ret;
480     uint64_t align;
481 
482     qemu_mutex_init(&s->lock);
483     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
484     if (!qemu_opts_absorb_qdict(opts, options, errp)) {
485         ret = -EINVAL;
486         goto out;
487     }
488 
489     /* Read rules from config file or command line options */
490     s->config_file = g_strdup(qemu_opt_get(opts, "config"));
491     ret = read_config(s, s->config_file, options, errp);
492     if (ret) {
493         goto out;
494     }
495 
496     /* Set initial state */
497     s->state = 1;
498 
499     /* Parse permissions modifiers before opening the image file */
500     ret = blkdebug_parse_perms(s, options, errp);
501     if (ret < 0) {
502         goto out;
503     }
504 
505     /* Open the image file */
506     ret = bdrv_open_file_child(qemu_opt_get(opts, "x-image"), options, "image",
507                                bs, errp);
508     if (ret < 0) {
509         goto out;
510     }
511 
512     bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
513         (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
514     bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
515         ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
516             bs->file->bs->supported_zero_flags);
517     ret = -EINVAL;
518 
519     /* Set alignment overrides */
520     s->align = qemu_opt_get_size(opts, "align", 0);
521     if (s->align && (s->align >= INT_MAX || !is_power_of_2(s->align))) {
522         error_setg(errp, "Cannot meet constraints with align %" PRIu64,
523                    s->align);
524         goto out;
525     }
526     align = MAX(s->align, bs->file->bs->bl.request_alignment);
527 
528     s->max_transfer = qemu_opt_get_size(opts, "max-transfer", 0);
529     if (s->max_transfer &&
530         (s->max_transfer >= INT_MAX ||
531          !QEMU_IS_ALIGNED(s->max_transfer, align))) {
532         error_setg(errp, "Cannot meet constraints with max-transfer %" PRIu64,
533                    s->max_transfer);
534         goto out;
535     }
536 
537     s->opt_write_zero = qemu_opt_get_size(opts, "opt-write-zero", 0);
538     if (s->opt_write_zero &&
539         (s->opt_write_zero >= INT_MAX ||
540          !QEMU_IS_ALIGNED(s->opt_write_zero, align))) {
541         error_setg(errp, "Cannot meet constraints with opt-write-zero %" PRIu64,
542                    s->opt_write_zero);
543         goto out;
544     }
545 
546     s->max_write_zero = qemu_opt_get_size(opts, "max-write-zero", 0);
547     if (s->max_write_zero &&
548         (s->max_write_zero >= INT_MAX ||
549          !QEMU_IS_ALIGNED(s->max_write_zero,
550                           MAX(s->opt_write_zero, align)))) {
551         error_setg(errp, "Cannot meet constraints with max-write-zero %" PRIu64,
552                    s->max_write_zero);
553         goto out;
554     }
555 
556     s->opt_discard = qemu_opt_get_size(opts, "opt-discard", 0);
557     if (s->opt_discard &&
558         (s->opt_discard >= INT_MAX ||
559          !QEMU_IS_ALIGNED(s->opt_discard, align))) {
560         error_setg(errp, "Cannot meet constraints with opt-discard %" PRIu64,
561                    s->opt_discard);
562         goto out;
563     }
564 
565     s->max_discard = qemu_opt_get_size(opts, "max-discard", 0);
566     if (s->max_discard &&
567         (s->max_discard >= INT_MAX ||
568          !QEMU_IS_ALIGNED(s->max_discard,
569                           MAX(s->opt_discard, align)))) {
570         error_setg(errp, "Cannot meet constraints with max-discard %" PRIu64,
571                    s->max_discard);
572         goto out;
573     }
574 
575     bdrv_debug_event(bs, BLKDBG_NONE);
576 
577     ret = 0;
578 out:
579     if (ret < 0) {
580         qemu_mutex_destroy(&s->lock);
581         g_free(s->config_file);
582     }
583     qemu_opts_del(opts);
584     return ret;
585 }
586 
587 static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
588                       BlkdebugIOType iotype)
589 {
590     BDRVBlkdebugState *s = bs->opaque;
591     BlkdebugRule *rule = NULL;
592     int error;
593     bool immediately;
594 
595     qemu_mutex_lock(&s->lock);
596     QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
597         uint64_t inject_offset = rule->options.inject.offset;
598 
599         if ((inject_offset == -1 ||
600              (bytes && inject_offset >= offset &&
601               inject_offset < offset + bytes)) &&
602             (rule->options.inject.iotype_mask & (1ull << iotype)))
603         {
604             break;
605         }
606     }
607 
608     if (!rule || !rule->options.inject.error) {
609         qemu_mutex_unlock(&s->lock);
610         return 0;
611     }
612 
613     immediately = rule->options.inject.immediately;
614     error = rule->options.inject.error;
615 
616     if (rule->options.inject.once) {
617         QSIMPLEQ_REMOVE(&s->active_rules, rule, BlkdebugRule, active_next);
618         remove_rule(rule);
619     }
620 
621     qemu_mutex_unlock(&s->lock);
622     if (!immediately) {
623         aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
624         qemu_coroutine_yield();
625     }
626 
627     return -error;
628 }
629 
630 static int coroutine_fn
631 blkdebug_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
632                    QEMUIOVector *qiov, BdrvRequestFlags flags)
633 {
634     int err;
635 
636     /* Sanity check block layer guarantees */
637     assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
638     assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
639     if (bs->bl.max_transfer) {
640         assert(bytes <= bs->bl.max_transfer);
641     }
642 
643     err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_READ);
644     if (err) {
645         return err;
646     }
647 
648     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
649 }
650 
651 static int coroutine_fn
652 blkdebug_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
653                     QEMUIOVector *qiov, BdrvRequestFlags flags)
654 {
655     int err;
656 
657     /* Sanity check block layer guarantees */
658     assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
659     assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
660     if (bs->bl.max_transfer) {
661         assert(bytes <= bs->bl.max_transfer);
662     }
663 
664     err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_WRITE);
665     if (err) {
666         return err;
667     }
668 
669     return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
670 }
671 
672 static int coroutine_fn blkdebug_co_flush(BlockDriverState *bs)
673 {
674     int err = rule_check(bs, 0, 0, BLKDEBUG_IO_TYPE_FLUSH);
675 
676     if (err) {
677         return err;
678     }
679 
680     return bdrv_co_flush(bs->file->bs);
681 }
682 
683 static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs,
684                                                   int64_t offset, int64_t bytes,
685                                                   BdrvRequestFlags flags)
686 {
687     uint32_t align = MAX(bs->bl.request_alignment,
688                          bs->bl.pwrite_zeroes_alignment);
689     int err;
690 
691     /* Only pass through requests that are larger than requested
692      * preferred alignment (so that we test the fallback to writes on
693      * unaligned portions), and check that the block layer never hands
694      * us anything unaligned that crosses an alignment boundary.  */
695     if (bytes < align) {
696         assert(QEMU_IS_ALIGNED(offset, align) ||
697                QEMU_IS_ALIGNED(offset + bytes, align) ||
698                DIV_ROUND_UP(offset, align) ==
699                DIV_ROUND_UP(offset + bytes, align));
700         return -ENOTSUP;
701     }
702     assert(QEMU_IS_ALIGNED(offset, align));
703     assert(QEMU_IS_ALIGNED(bytes, align));
704     if (bs->bl.max_pwrite_zeroes) {
705         assert(bytes <= bs->bl.max_pwrite_zeroes);
706     }
707 
708     err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_WRITE_ZEROES);
709     if (err) {
710         return err;
711     }
712 
713     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
714 }
715 
716 static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs,
717                                              int64_t offset, int64_t bytes)
718 {
719     uint32_t align = bs->bl.pdiscard_alignment;
720     int err;
721 
722     /* Only pass through requests that are larger than requested
723      * minimum alignment, and ensure that unaligned requests do not
724      * cross optimum discard boundaries. */
725     if (bytes < bs->bl.request_alignment) {
726         assert(QEMU_IS_ALIGNED(offset, align) ||
727                QEMU_IS_ALIGNED(offset + bytes, align) ||
728                DIV_ROUND_UP(offset, align) ==
729                DIV_ROUND_UP(offset + bytes, align));
730         return -ENOTSUP;
731     }
732     assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
733     assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
734     if (align && bytes >= align) {
735         assert(QEMU_IS_ALIGNED(offset, align));
736         assert(QEMU_IS_ALIGNED(bytes, align));
737     }
738     if (bs->bl.max_pdiscard) {
739         assert(bytes <= bs->bl.max_pdiscard);
740     }
741 
742     err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_DISCARD);
743     if (err) {
744         return err;
745     }
746 
747     return bdrv_co_pdiscard(bs->file, offset, bytes);
748 }
749 
750 static int coroutine_fn blkdebug_co_block_status(BlockDriverState *bs,
751                                                  bool want_zero,
752                                                  int64_t offset,
753                                                  int64_t bytes,
754                                                  int64_t *pnum,
755                                                  int64_t *map,
756                                                  BlockDriverState **file)
757 {
758     int err;
759 
760     assert(QEMU_IS_ALIGNED(offset | bytes, bs->bl.request_alignment));
761 
762     err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_BLOCK_STATUS);
763     if (err) {
764         return err;
765     }
766 
767     assert(bs->file && bs->file->bs);
768     *pnum = bytes;
769     *map = offset;
770     *file = bs->file->bs;
771     return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
772 }
773 
774 static void blkdebug_close(BlockDriverState *bs)
775 {
776     BDRVBlkdebugState *s = bs->opaque;
777     BlkdebugRule *rule, *next;
778     int i;
779 
780     for (i = 0; i < BLKDBG__MAX; i++) {
781         QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
782             remove_rule(rule);
783         }
784     }
785 
786     g_free(s->config_file);
787     qemu_mutex_destroy(&s->lock);
788 }
789 
790 /* Called with lock held.  */
791 static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule)
792 {
793     BDRVBlkdebugState *s = bs->opaque;
794     BlkdebugSuspendedReq *r;
795 
796     r = g_new(BlkdebugSuspendedReq, 1);
797 
798     r->co         = qemu_coroutine_self();
799     r->tag        = g_strdup(rule->options.suspend.tag);
800 
801     remove_rule(rule);
802     QLIST_INSERT_HEAD(&s->suspended_reqs, r, next);
803 
804     if (!qtest_enabled()) {
805         printf("blkdebug: Suspended request '%s'\n", r->tag);
806     }
807 }
808 
809 /* Called with lock held.  */
810 static void process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
811                          int *action_count, int *new_state)
812 {
813     BDRVBlkdebugState *s = bs->opaque;
814 
815     /* Only process rules for the current state */
816     if (rule->state && rule->state != s->state) {
817         return;
818     }
819 
820     /* Take the action */
821     action_count[rule->action]++;
822     switch (rule->action) {
823     case ACTION_INJECT_ERROR:
824         if (action_count[ACTION_INJECT_ERROR] == 1) {
825             QSIMPLEQ_INIT(&s->active_rules);
826         }
827         QSIMPLEQ_INSERT_HEAD(&s->active_rules, rule, active_next);
828         break;
829 
830     case ACTION_SET_STATE:
831         *new_state = rule->options.set_state.new_state;
832         break;
833 
834     case ACTION_SUSPEND:
835         suspend_request(bs, rule);
836         break;
837     }
838 }
839 
840 static void blkdebug_debug_event(BlockDriverState *bs, BlkdebugEvent event)
841 {
842     BDRVBlkdebugState *s = bs->opaque;
843     struct BlkdebugRule *rule, *next;
844     int new_state;
845     int actions_count[ACTION__MAX] = { 0 };
846 
847     assert((int)event >= 0 && event < BLKDBG__MAX);
848 
849     WITH_QEMU_LOCK_GUARD(&s->lock) {
850         new_state = s->state;
851         QLIST_FOREACH_SAFE(rule, &s->rules[event], next, next) {
852             process_rule(bs, rule, actions_count, &new_state);
853         }
854         s->state = new_state;
855     }
856 
857     while (actions_count[ACTION_SUSPEND] > 0) {
858         qemu_coroutine_yield();
859         actions_count[ACTION_SUSPEND]--;
860     }
861 }
862 
863 static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event,
864                                      const char *tag)
865 {
866     BDRVBlkdebugState *s = bs->opaque;
867     struct BlkdebugRule *rule;
868     int blkdebug_event;
869 
870     blkdebug_event = qapi_enum_parse(&BlkdebugEvent_lookup, event, -1, NULL);
871     if (blkdebug_event < 0) {
872         return -ENOENT;
873     }
874 
875     rule = g_malloc(sizeof(*rule));
876     *rule = (struct BlkdebugRule) {
877         .event  = blkdebug_event,
878         .action = ACTION_SUSPEND,
879         .state  = 0,
880         .options.suspend.tag = g_strdup(tag),
881     };
882 
883     qemu_mutex_lock(&s->lock);
884     QLIST_INSERT_HEAD(&s->rules[blkdebug_event], rule, next);
885     qemu_mutex_unlock(&s->lock);
886 
887     return 0;
888 }
889 
890 /* Called with lock held. May temporarily release lock. */
891 static int resume_req_by_tag(BDRVBlkdebugState *s, const char *tag, bool all)
892 {
893     BlkdebugSuspendedReq *r;
894 
895 retry:
896     /*
897      * No need for _SAFE, since a different coroutine can remove another node
898      * (not the current one) in this list, and when the current one is removed
899      * the iteration starts back from beginning anyways.
900      */
901     QLIST_FOREACH(r, &s->suspended_reqs, next) {
902         if (!strcmp(r->tag, tag)) {
903             Coroutine *co = r->co;
904 
905             if (!qtest_enabled()) {
906                 printf("blkdebug: Resuming request '%s'\n", r->tag);
907             }
908 
909             QLIST_REMOVE(r, next);
910             g_free(r->tag);
911             g_free(r);
912 
913             qemu_mutex_unlock(&s->lock);
914             qemu_coroutine_enter(co);
915             qemu_mutex_lock(&s->lock);
916 
917             if (all) {
918                 goto retry;
919             }
920             return 0;
921         }
922     }
923     return -ENOENT;
924 }
925 
926 static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag)
927 {
928     BDRVBlkdebugState *s = bs->opaque;
929     QEMU_LOCK_GUARD(&s->lock);
930     return resume_req_by_tag(s, tag, false);
931 }
932 
933 static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs,
934                                             const char *tag)
935 {
936     BDRVBlkdebugState *s = bs->opaque;
937     BlkdebugRule *rule, *next;
938     int i, ret = -ENOENT;
939 
940     QEMU_LOCK_GUARD(&s->lock);
941     for (i = 0; i < BLKDBG__MAX; i++) {
942         QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
943             if (rule->action == ACTION_SUSPEND &&
944                 !strcmp(rule->options.suspend.tag, tag)) {
945                 remove_rule(rule);
946                 ret = 0;
947             }
948         }
949     }
950     if (resume_req_by_tag(s, tag, true) == 0) {
951         ret = 0;
952     }
953     return ret;
954 }
955 
956 static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag)
957 {
958     BDRVBlkdebugState *s = bs->opaque;
959     BlkdebugSuspendedReq *r;
960 
961     QEMU_LOCK_GUARD(&s->lock);
962     QLIST_FOREACH(r, &s->suspended_reqs, next) {
963         if (!strcmp(r->tag, tag)) {
964             return true;
965         }
966     }
967     return false;
968 }
969 
970 static int64_t blkdebug_getlength(BlockDriverState *bs)
971 {
972     return bdrv_getlength(bs->file->bs);
973 }
974 
975 static void blkdebug_refresh_filename(BlockDriverState *bs)
976 {
977     BDRVBlkdebugState *s = bs->opaque;
978     const QDictEntry *e;
979     int ret;
980 
981     if (!bs->file->bs->exact_filename[0]) {
982         return;
983     }
984 
985     for (e = qdict_first(bs->full_open_options); e;
986          e = qdict_next(bs->full_open_options, e))
987     {
988         /* Real child options are under "image", but "x-image" may
989          * contain a filename */
990         if (strcmp(qdict_entry_key(e), "config") &&
991             strcmp(qdict_entry_key(e), "image") &&
992             strcmp(qdict_entry_key(e), "x-image") &&
993             strcmp(qdict_entry_key(e), "driver"))
994         {
995             return;
996         }
997     }
998 
999     ret = snprintf(bs->exact_filename, sizeof(bs->exact_filename),
1000                    "blkdebug:%s:%s",
1001                    s->config_file ?: "", bs->file->bs->exact_filename);
1002     if (ret >= sizeof(bs->exact_filename)) {
1003         /* An overflow makes the filename unusable, so do not report any */
1004         bs->exact_filename[0] = 0;
1005     }
1006 }
1007 
1008 static void blkdebug_refresh_limits(BlockDriverState *bs, Error **errp)
1009 {
1010     BDRVBlkdebugState *s = bs->opaque;
1011 
1012     if (s->align) {
1013         bs->bl.request_alignment = s->align;
1014     }
1015     if (s->max_transfer) {
1016         bs->bl.max_transfer = s->max_transfer;
1017     }
1018     if (s->opt_write_zero) {
1019         bs->bl.pwrite_zeroes_alignment = s->opt_write_zero;
1020     }
1021     if (s->max_write_zero) {
1022         bs->bl.max_pwrite_zeroes = s->max_write_zero;
1023     }
1024     if (s->opt_discard) {
1025         bs->bl.pdiscard_alignment = s->opt_discard;
1026     }
1027     if (s->max_discard) {
1028         bs->bl.max_pdiscard = s->max_discard;
1029     }
1030 }
1031 
1032 static int blkdebug_reopen_prepare(BDRVReopenState *reopen_state,
1033                                    BlockReopenQueue *queue, Error **errp)
1034 {
1035     return 0;
1036 }
1037 
1038 static void blkdebug_child_perm(BlockDriverState *bs, BdrvChild *c,
1039                                 BdrvChildRole role,
1040                                 BlockReopenQueue *reopen_queue,
1041                                 uint64_t perm, uint64_t shared,
1042                                 uint64_t *nperm, uint64_t *nshared)
1043 {
1044     BDRVBlkdebugState *s = bs->opaque;
1045 
1046     bdrv_default_perms(bs, c, role, reopen_queue,
1047                        perm, shared, nperm, nshared);
1048 
1049     *nperm |= s->take_child_perms;
1050     *nshared &= ~s->unshare_child_perms;
1051 }
1052 
1053 static const char *const blkdebug_strong_runtime_opts[] = {
1054     "config",
1055     "inject-error.",
1056     "set-state.",
1057     "align",
1058     "max-transfer",
1059     "opt-write-zero",
1060     "max-write-zero",
1061     "opt-discard",
1062     "max-discard",
1063 
1064     NULL
1065 };
1066 
1067 static BlockDriver bdrv_blkdebug = {
1068     .format_name            = "blkdebug",
1069     .protocol_name          = "blkdebug",
1070     .instance_size          = sizeof(BDRVBlkdebugState),
1071     .is_filter              = true,
1072 
1073     .bdrv_parse_filename    = blkdebug_parse_filename,
1074     .bdrv_file_open         = blkdebug_open,
1075     .bdrv_close             = blkdebug_close,
1076     .bdrv_reopen_prepare    = blkdebug_reopen_prepare,
1077     .bdrv_child_perm        = blkdebug_child_perm,
1078 
1079     .bdrv_getlength         = blkdebug_getlength,
1080     .bdrv_refresh_filename  = blkdebug_refresh_filename,
1081     .bdrv_refresh_limits    = blkdebug_refresh_limits,
1082 
1083     .bdrv_co_preadv         = blkdebug_co_preadv,
1084     .bdrv_co_pwritev        = blkdebug_co_pwritev,
1085     .bdrv_co_flush_to_disk  = blkdebug_co_flush,
1086     .bdrv_co_pwrite_zeroes  = blkdebug_co_pwrite_zeroes,
1087     .bdrv_co_pdiscard       = blkdebug_co_pdiscard,
1088     .bdrv_co_block_status   = blkdebug_co_block_status,
1089 
1090     .bdrv_debug_event           = blkdebug_debug_event,
1091     .bdrv_debug_breakpoint      = blkdebug_debug_breakpoint,
1092     .bdrv_debug_remove_breakpoint
1093                                 = blkdebug_debug_remove_breakpoint,
1094     .bdrv_debug_resume          = blkdebug_debug_resume,
1095     .bdrv_debug_is_suspended    = blkdebug_debug_is_suspended,
1096 
1097     .strong_runtime_opts        = blkdebug_strong_runtime_opts,
1098 };
1099 
1100 static void bdrv_blkdebug_init(void)
1101 {
1102     bdrv_register(&bdrv_blkdebug);
1103 }
1104 
1105 block_init(bdrv_blkdebug_init);
1106