xref: /openbmc/qemu/block/blkdebug.c (revision 978c2bf9)
1 /*
2  * Block protocol for I/O error injection
3  *
4  * Copyright (C) 2016-2017 Red Hat, Inc.
5  * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "qemu/osdep.h"
27 #include "qapi/error.h"
28 #include "qemu/cutils.h"
29 #include "qemu/config-file.h"
30 #include "block/block-io.h"
31 #include "block/block_int.h"
32 #include "block/qdict.h"
33 #include "qemu/module.h"
34 #include "qemu/option.h"
35 #include "qapi/qapi-visit-block-core.h"
36 #include "qapi/qmp/qdict.h"
37 #include "qapi/qmp/qlist.h"
38 #include "qapi/qmp/qstring.h"
39 #include "qapi/qobject-input-visitor.h"
40 #include "sysemu/qtest.h"
41 
42 /* All APIs are thread-safe */
43 
44 typedef struct BDRVBlkdebugState {
45     /* IN: initialized in blkdebug_open() and never changed */
46     uint64_t align;
47     uint64_t max_transfer;
48     uint64_t opt_write_zero;
49     uint64_t max_write_zero;
50     uint64_t opt_discard;
51     uint64_t max_discard;
52     char *config_file; /* For blkdebug_refresh_filename() */
53     /* initialized in blkdebug_parse_perms() */
54     uint64_t take_child_perms;
55     uint64_t unshare_child_perms;
56 
57     /* State. Protected by lock */
58     int state;
59     QLIST_HEAD(, BlkdebugRule) rules[BLKDBG__MAX];
60     QSIMPLEQ_HEAD(, BlkdebugRule) active_rules;
61     QLIST_HEAD(, BlkdebugSuspendedReq) suspended_reqs;
62     QemuMutex lock;
63 } BDRVBlkdebugState;
64 
65 typedef struct BlkdebugAIOCB {
66     BlockAIOCB common;
67     int ret;
68 } BlkdebugAIOCB;
69 
70 typedef struct BlkdebugSuspendedReq {
71     /* IN: initialized in suspend_request() */
72     Coroutine *co;
73     char *tag;
74 
75     /* List entry protected BDRVBlkdebugState's lock */
76     QLIST_ENTRY(BlkdebugSuspendedReq) next;
77 } BlkdebugSuspendedReq;
78 
79 enum {
80     ACTION_INJECT_ERROR,
81     ACTION_SET_STATE,
82     ACTION_SUSPEND,
83     ACTION__MAX,
84 };
85 
86 typedef struct BlkdebugRule {
87     /* IN: initialized in add_rule() or blkdebug_debug_breakpoint() */
88     BlkdebugEvent event;
89     int action;
90     int state;
91     union {
92         struct {
93             uint64_t iotype_mask;
94             int error;
95             int immediately;
96             int once;
97             int64_t offset;
98         } inject;
99         struct {
100             int new_state;
101         } set_state;
102         struct {
103             char *tag;
104         } suspend;
105     } options;
106 
107     /* List entries protected BDRVBlkdebugState's lock */
108     QLIST_ENTRY(BlkdebugRule) next;
109     QSIMPLEQ_ENTRY(BlkdebugRule) active_next;
110 } BlkdebugRule;
111 
112 QEMU_BUILD_BUG_MSG(BLKDEBUG_IO_TYPE__MAX > 64,
113                    "BlkdebugIOType mask does not fit into an uint64_t");
114 
115 static QemuOptsList inject_error_opts = {
116     .name = "inject-error",
117     .head = QTAILQ_HEAD_INITIALIZER(inject_error_opts.head),
118     .desc = {
119         {
120             .name = "event",
121             .type = QEMU_OPT_STRING,
122         },
123         {
124             .name = "state",
125             .type = QEMU_OPT_NUMBER,
126         },
127         {
128             .name = "iotype",
129             .type = QEMU_OPT_STRING,
130         },
131         {
132             .name = "errno",
133             .type = QEMU_OPT_NUMBER,
134         },
135         {
136             .name = "sector",
137             .type = QEMU_OPT_NUMBER,
138         },
139         {
140             .name = "once",
141             .type = QEMU_OPT_BOOL,
142         },
143         {
144             .name = "immediately",
145             .type = QEMU_OPT_BOOL,
146         },
147         { /* end of list */ }
148     },
149 };
150 
151 static QemuOptsList set_state_opts = {
152     .name = "set-state",
153     .head = QTAILQ_HEAD_INITIALIZER(set_state_opts.head),
154     .desc = {
155         {
156             .name = "event",
157             .type = QEMU_OPT_STRING,
158         },
159         {
160             .name = "state",
161             .type = QEMU_OPT_NUMBER,
162         },
163         {
164             .name = "new_state",
165             .type = QEMU_OPT_NUMBER,
166         },
167         { /* end of list */ }
168     },
169 };
170 
171 static QemuOptsList *config_groups[] = {
172     &inject_error_opts,
173     &set_state_opts,
174     NULL
175 };
176 
177 struct add_rule_data {
178     BDRVBlkdebugState *s;
179     int action;
180 };
181 
182 static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
183 {
184     struct add_rule_data *d = opaque;
185     BDRVBlkdebugState *s = d->s;
186     const char *event_name;
187     int event;
188     struct BlkdebugRule *rule;
189     int64_t sector;
190     BlkdebugIOType iotype;
191     Error *local_error = NULL;
192 
193     /* Find the right event for the rule */
194     event_name = qemu_opt_get(opts, "event");
195     if (!event_name) {
196         error_setg(errp, "Missing event name for rule");
197         return -1;
198     }
199     event = qapi_enum_parse(&BlkdebugEvent_lookup, event_name, -1, errp);
200     if (event < 0) {
201         return -1;
202     }
203 
204     /* Set attributes common for all actions */
205     rule = g_malloc0(sizeof(*rule));
206     *rule = (struct BlkdebugRule) {
207         .event  = event,
208         .action = d->action,
209         .state  = qemu_opt_get_number(opts, "state", 0),
210     };
211 
212     /* Parse action-specific options */
213     switch (d->action) {
214     case ACTION_INJECT_ERROR:
215         rule->options.inject.error = qemu_opt_get_number(opts, "errno", EIO);
216         rule->options.inject.once  = qemu_opt_get_bool(opts, "once", 0);
217         rule->options.inject.immediately =
218             qemu_opt_get_bool(opts, "immediately", 0);
219         sector = qemu_opt_get_number(opts, "sector", -1);
220         rule->options.inject.offset =
221             sector == -1 ? -1 : sector * BDRV_SECTOR_SIZE;
222 
223         iotype = qapi_enum_parse(&BlkdebugIOType_lookup,
224                                  qemu_opt_get(opts, "iotype"),
225                                  BLKDEBUG_IO_TYPE__MAX, &local_error);
226         if (local_error) {
227             error_propagate(errp, local_error);
228             g_free(rule);
229             return -1;
230         }
231         if (iotype != BLKDEBUG_IO_TYPE__MAX) {
232             rule->options.inject.iotype_mask = (1ull << iotype);
233         } else {
234             /* Apply the default */
235             rule->options.inject.iotype_mask =
236                 (1ull << BLKDEBUG_IO_TYPE_READ)
237                 | (1ull << BLKDEBUG_IO_TYPE_WRITE)
238                 | (1ull << BLKDEBUG_IO_TYPE_WRITE_ZEROES)
239                 | (1ull << BLKDEBUG_IO_TYPE_DISCARD)
240                 | (1ull << BLKDEBUG_IO_TYPE_FLUSH);
241         }
242 
243         break;
244 
245     case ACTION_SET_STATE:
246         rule->options.set_state.new_state =
247             qemu_opt_get_number(opts, "new_state", 0);
248         break;
249 
250     case ACTION_SUSPEND:
251         rule->options.suspend.tag =
252             g_strdup(qemu_opt_get(opts, "tag"));
253         break;
254     };
255 
256     /* Add the rule */
257     qemu_mutex_lock(&s->lock);
258     QLIST_INSERT_HEAD(&s->rules[event], rule, next);
259     qemu_mutex_unlock(&s->lock);
260 
261     return 0;
262 }
263 
264 /* Called with lock held or from .bdrv_close */
265 static void remove_rule(BlkdebugRule *rule)
266 {
267     switch (rule->action) {
268     case ACTION_INJECT_ERROR:
269     case ACTION_SET_STATE:
270         break;
271     case ACTION_SUSPEND:
272         g_free(rule->options.suspend.tag);
273         break;
274     }
275 
276     QLIST_REMOVE(rule, next);
277     g_free(rule);
278 }
279 
280 static int read_config(BDRVBlkdebugState *s, const char *filename,
281                        QDict *options, Error **errp)
282 {
283     FILE *f = NULL;
284     int ret;
285     struct add_rule_data d;
286     Error *local_err = NULL;
287 
288     if (filename) {
289         f = fopen(filename, "r");
290         if (f == NULL) {
291             error_setg_errno(errp, errno, "Could not read blkdebug config file");
292             return -errno;
293         }
294 
295         ret = qemu_config_parse(f, config_groups, filename, errp);
296         if (ret < 0) {
297             goto fail;
298         }
299     }
300 
301     if (!qemu_config_parse_qdict(options, config_groups, errp)) {
302         ret = -EINVAL;
303         goto fail;
304     }
305 
306     d.s = s;
307     d.action = ACTION_INJECT_ERROR;
308     qemu_opts_foreach(&inject_error_opts, add_rule, &d, &local_err);
309     if (local_err) {
310         error_propagate(errp, local_err);
311         ret = -EINVAL;
312         goto fail;
313     }
314 
315     d.action = ACTION_SET_STATE;
316     qemu_opts_foreach(&set_state_opts, add_rule, &d, &local_err);
317     if (local_err) {
318         error_propagate(errp, local_err);
319         ret = -EINVAL;
320         goto fail;
321     }
322 
323     ret = 0;
324 fail:
325     qemu_opts_reset(&inject_error_opts);
326     qemu_opts_reset(&set_state_opts);
327     if (f) {
328         fclose(f);
329     }
330     return ret;
331 }
332 
333 /* Valid blkdebug filenames look like blkdebug:path/to/config:path/to/image */
334 static void blkdebug_parse_filename(const char *filename, QDict *options,
335                                     Error **errp)
336 {
337     const char *c;
338 
339     /* Parse the blkdebug: prefix */
340     if (!strstart(filename, "blkdebug:", &filename)) {
341         /* There was no prefix; therefore, all options have to be already
342            present in the QDict (except for the filename) */
343         qdict_put_str(options, "x-image", filename);
344         return;
345     }
346 
347     /* Parse config file path */
348     c = strchr(filename, ':');
349     if (c == NULL) {
350         error_setg(errp, "blkdebug requires both config file and image path");
351         return;
352     }
353 
354     if (c != filename) {
355         QString *config_path;
356         config_path = qstring_from_substr(filename, 0, c - filename);
357         qdict_put(options, "config", config_path);
358     }
359 
360     /* TODO Allow multi-level nesting and set file.filename here */
361     filename = c + 1;
362     qdict_put_str(options, "x-image", filename);
363 }
364 
365 static int blkdebug_parse_perm_list(uint64_t *dest, QDict *options,
366                                     const char *prefix, Error **errp)
367 {
368     int ret = 0;
369     QDict *subqdict = NULL;
370     QObject *crumpled_subqdict = NULL;
371     Visitor *v = NULL;
372     BlockPermissionList *perm_list = NULL, *element;
373 
374     *dest = 0;
375 
376     qdict_extract_subqdict(options, &subqdict, prefix);
377     if (!qdict_size(subqdict)) {
378         goto out;
379     }
380 
381     crumpled_subqdict = qdict_crumple(subqdict, errp);
382     if (!crumpled_subqdict) {
383         ret = -EINVAL;
384         goto out;
385     }
386 
387     v = qobject_input_visitor_new(crumpled_subqdict);
388     if (!visit_type_BlockPermissionList(v, NULL, &perm_list, errp)) {
389         ret = -EINVAL;
390         goto out;
391     }
392 
393     for (element = perm_list; element; element = element->next) {
394         *dest |= bdrv_qapi_perm_to_blk_perm(element->value);
395     }
396 
397 out:
398     qapi_free_BlockPermissionList(perm_list);
399     visit_free(v);
400     qobject_unref(subqdict);
401     qobject_unref(crumpled_subqdict);
402     return ret;
403 }
404 
405 static int blkdebug_parse_perms(BDRVBlkdebugState *s, QDict *options,
406                                 Error **errp)
407 {
408     int ret;
409 
410     ret = blkdebug_parse_perm_list(&s->take_child_perms, options,
411                                    "take-child-perms.", errp);
412     if (ret < 0) {
413         return ret;
414     }
415 
416     ret = blkdebug_parse_perm_list(&s->unshare_child_perms, options,
417                                    "unshare-child-perms.", errp);
418     if (ret < 0) {
419         return ret;
420     }
421 
422     return 0;
423 }
424 
425 static QemuOptsList runtime_opts = {
426     .name = "blkdebug",
427     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
428     .desc = {
429         {
430             .name = "config",
431             .type = QEMU_OPT_STRING,
432             .help = "Path to the configuration file",
433         },
434         {
435             .name = "x-image",
436             .type = QEMU_OPT_STRING,
437             .help = "[internal use only, will be removed]",
438         },
439         {
440             .name = "align",
441             .type = QEMU_OPT_SIZE,
442             .help = "Required alignment in bytes",
443         },
444         {
445             .name = "max-transfer",
446             .type = QEMU_OPT_SIZE,
447             .help = "Maximum transfer size in bytes",
448         },
449         {
450             .name = "opt-write-zero",
451             .type = QEMU_OPT_SIZE,
452             .help = "Optimum write zero alignment in bytes",
453         },
454         {
455             .name = "max-write-zero",
456             .type = QEMU_OPT_SIZE,
457             .help = "Maximum write zero size in bytes",
458         },
459         {
460             .name = "opt-discard",
461             .type = QEMU_OPT_SIZE,
462             .help = "Optimum discard alignment in bytes",
463         },
464         {
465             .name = "max-discard",
466             .type = QEMU_OPT_SIZE,
467             .help = "Maximum discard size in bytes",
468         },
469         { /* end of list */ }
470     },
471 };
472 
473 static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
474                          Error **errp)
475 {
476     BDRVBlkdebugState *s = bs->opaque;
477     QemuOpts *opts;
478     int ret;
479     uint64_t align;
480 
481     qemu_mutex_init(&s->lock);
482     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
483     if (!qemu_opts_absorb_qdict(opts, options, errp)) {
484         ret = -EINVAL;
485         goto out;
486     }
487 
488     /* Read rules from config file or command line options */
489     s->config_file = g_strdup(qemu_opt_get(opts, "config"));
490     ret = read_config(s, s->config_file, options, errp);
491     if (ret) {
492         goto out;
493     }
494 
495     /* Set initial state */
496     s->state = 1;
497 
498     /* Parse permissions modifiers before opening the image file */
499     ret = blkdebug_parse_perms(s, options, errp);
500     if (ret < 0) {
501         goto out;
502     }
503 
504     /* Open the image file */
505     ret = bdrv_open_file_child(qemu_opt_get(opts, "x-image"), options, "image",
506                                bs, errp);
507     if (ret < 0) {
508         goto out;
509     }
510 
511     bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
512         (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
513     bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
514         ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
515             bs->file->bs->supported_zero_flags);
516     ret = -EINVAL;
517 
518     /* Set alignment overrides */
519     s->align = qemu_opt_get_size(opts, "align", 0);
520     if (s->align && (s->align >= INT_MAX || !is_power_of_2(s->align))) {
521         error_setg(errp, "Cannot meet constraints with align %" PRIu64,
522                    s->align);
523         goto out;
524     }
525     align = MAX(s->align, bs->file->bs->bl.request_alignment);
526 
527     s->max_transfer = qemu_opt_get_size(opts, "max-transfer", 0);
528     if (s->max_transfer &&
529         (s->max_transfer >= INT_MAX ||
530          !QEMU_IS_ALIGNED(s->max_transfer, align))) {
531         error_setg(errp, "Cannot meet constraints with max-transfer %" PRIu64,
532                    s->max_transfer);
533         goto out;
534     }
535 
536     s->opt_write_zero = qemu_opt_get_size(opts, "opt-write-zero", 0);
537     if (s->opt_write_zero &&
538         (s->opt_write_zero >= INT_MAX ||
539          !QEMU_IS_ALIGNED(s->opt_write_zero, align))) {
540         error_setg(errp, "Cannot meet constraints with opt-write-zero %" PRIu64,
541                    s->opt_write_zero);
542         goto out;
543     }
544 
545     s->max_write_zero = qemu_opt_get_size(opts, "max-write-zero", 0);
546     if (s->max_write_zero &&
547         (s->max_write_zero >= INT_MAX ||
548          !QEMU_IS_ALIGNED(s->max_write_zero,
549                           MAX(s->opt_write_zero, align)))) {
550         error_setg(errp, "Cannot meet constraints with max-write-zero %" PRIu64,
551                    s->max_write_zero);
552         goto out;
553     }
554 
555     s->opt_discard = qemu_opt_get_size(opts, "opt-discard", 0);
556     if (s->opt_discard &&
557         (s->opt_discard >= INT_MAX ||
558          !QEMU_IS_ALIGNED(s->opt_discard, align))) {
559         error_setg(errp, "Cannot meet constraints with opt-discard %" PRIu64,
560                    s->opt_discard);
561         goto out;
562     }
563 
564     s->max_discard = qemu_opt_get_size(opts, "max-discard", 0);
565     if (s->max_discard &&
566         (s->max_discard >= INT_MAX ||
567          !QEMU_IS_ALIGNED(s->max_discard,
568                           MAX(s->opt_discard, align)))) {
569         error_setg(errp, "Cannot meet constraints with max-discard %" PRIu64,
570                    s->max_discard);
571         goto out;
572     }
573 
574     bdrv_debug_event(bs, BLKDBG_NONE);
575 
576     ret = 0;
577 out:
578     if (ret < 0) {
579         qemu_mutex_destroy(&s->lock);
580         g_free(s->config_file);
581     }
582     qemu_opts_del(opts);
583     return ret;
584 }
585 
586 static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
587                       BlkdebugIOType iotype)
588 {
589     BDRVBlkdebugState *s = bs->opaque;
590     BlkdebugRule *rule = NULL;
591     int error;
592     bool immediately;
593 
594     qemu_mutex_lock(&s->lock);
595     QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
596         uint64_t inject_offset = rule->options.inject.offset;
597 
598         if ((inject_offset == -1 ||
599              (bytes && inject_offset >= offset &&
600               inject_offset < offset + bytes)) &&
601             (rule->options.inject.iotype_mask & (1ull << iotype)))
602         {
603             break;
604         }
605     }
606 
607     if (!rule || !rule->options.inject.error) {
608         qemu_mutex_unlock(&s->lock);
609         return 0;
610     }
611 
612     immediately = rule->options.inject.immediately;
613     error = rule->options.inject.error;
614 
615     if (rule->options.inject.once) {
616         QSIMPLEQ_REMOVE(&s->active_rules, rule, BlkdebugRule, active_next);
617         remove_rule(rule);
618     }
619 
620     qemu_mutex_unlock(&s->lock);
621     if (!immediately) {
622         aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
623         qemu_coroutine_yield();
624     }
625 
626     return -error;
627 }
628 
629 static int coroutine_fn
630 blkdebug_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
631                    QEMUIOVector *qiov, BdrvRequestFlags flags)
632 {
633     int err;
634 
635     /* Sanity check block layer guarantees */
636     assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
637     assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
638     if (bs->bl.max_transfer) {
639         assert(bytes <= bs->bl.max_transfer);
640     }
641 
642     err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_READ);
643     if (err) {
644         return err;
645     }
646 
647     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
648 }
649 
650 static int coroutine_fn
651 blkdebug_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
652                     QEMUIOVector *qiov, BdrvRequestFlags flags)
653 {
654     int err;
655 
656     /* Sanity check block layer guarantees */
657     assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
658     assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
659     if (bs->bl.max_transfer) {
660         assert(bytes <= bs->bl.max_transfer);
661     }
662 
663     err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_WRITE);
664     if (err) {
665         return err;
666     }
667 
668     return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
669 }
670 
671 static int coroutine_fn blkdebug_co_flush(BlockDriverState *bs)
672 {
673     int err = rule_check(bs, 0, 0, BLKDEBUG_IO_TYPE_FLUSH);
674 
675     if (err) {
676         return err;
677     }
678 
679     return bdrv_co_flush(bs->file->bs);
680 }
681 
682 static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs,
683                                                   int64_t offset, int64_t bytes,
684                                                   BdrvRequestFlags flags)
685 {
686     uint32_t align = MAX(bs->bl.request_alignment,
687                          bs->bl.pwrite_zeroes_alignment);
688     int err;
689 
690     /* Only pass through requests that are larger than requested
691      * preferred alignment (so that we test the fallback to writes on
692      * unaligned portions), and check that the block layer never hands
693      * us anything unaligned that crosses an alignment boundary.  */
694     if (bytes < align) {
695         assert(QEMU_IS_ALIGNED(offset, align) ||
696                QEMU_IS_ALIGNED(offset + bytes, align) ||
697                DIV_ROUND_UP(offset, align) ==
698                DIV_ROUND_UP(offset + bytes, align));
699         return -ENOTSUP;
700     }
701     assert(QEMU_IS_ALIGNED(offset, align));
702     assert(QEMU_IS_ALIGNED(bytes, align));
703     if (bs->bl.max_pwrite_zeroes) {
704         assert(bytes <= bs->bl.max_pwrite_zeroes);
705     }
706 
707     err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_WRITE_ZEROES);
708     if (err) {
709         return err;
710     }
711 
712     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
713 }
714 
715 static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs,
716                                              int64_t offset, int64_t bytes)
717 {
718     uint32_t align = bs->bl.pdiscard_alignment;
719     int err;
720 
721     /* Only pass through requests that are larger than requested
722      * minimum alignment, and ensure that unaligned requests do not
723      * cross optimum discard boundaries. */
724     if (bytes < bs->bl.request_alignment) {
725         assert(QEMU_IS_ALIGNED(offset, align) ||
726                QEMU_IS_ALIGNED(offset + bytes, align) ||
727                DIV_ROUND_UP(offset, align) ==
728                DIV_ROUND_UP(offset + bytes, align));
729         return -ENOTSUP;
730     }
731     assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
732     assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
733     if (align && bytes >= align) {
734         assert(QEMU_IS_ALIGNED(offset, align));
735         assert(QEMU_IS_ALIGNED(bytes, align));
736     }
737     if (bs->bl.max_pdiscard) {
738         assert(bytes <= bs->bl.max_pdiscard);
739     }
740 
741     err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_DISCARD);
742     if (err) {
743         return err;
744     }
745 
746     return bdrv_co_pdiscard(bs->file, offset, bytes);
747 }
748 
749 static int coroutine_fn blkdebug_co_block_status(BlockDriverState *bs,
750                                                  bool want_zero,
751                                                  int64_t offset,
752                                                  int64_t bytes,
753                                                  int64_t *pnum,
754                                                  int64_t *map,
755                                                  BlockDriverState **file)
756 {
757     int err;
758 
759     assert(QEMU_IS_ALIGNED(offset | bytes, bs->bl.request_alignment));
760 
761     err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_BLOCK_STATUS);
762     if (err) {
763         return err;
764     }
765 
766     assert(bs->file && bs->file->bs);
767     *pnum = bytes;
768     *map = offset;
769     *file = bs->file->bs;
770     return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
771 }
772 
773 static void blkdebug_close(BlockDriverState *bs)
774 {
775     BDRVBlkdebugState *s = bs->opaque;
776     BlkdebugRule *rule, *next;
777     int i;
778 
779     for (i = 0; i < BLKDBG__MAX; i++) {
780         QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
781             remove_rule(rule);
782         }
783     }
784 
785     g_free(s->config_file);
786     qemu_mutex_destroy(&s->lock);
787 }
788 
789 /* Called with lock held.  */
790 static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule)
791 {
792     BDRVBlkdebugState *s = bs->opaque;
793     BlkdebugSuspendedReq *r;
794 
795     r = g_new(BlkdebugSuspendedReq, 1);
796 
797     r->co         = qemu_coroutine_self();
798     r->tag        = g_strdup(rule->options.suspend.tag);
799 
800     remove_rule(rule);
801     QLIST_INSERT_HEAD(&s->suspended_reqs, r, next);
802 
803     if (!qtest_enabled()) {
804         printf("blkdebug: Suspended request '%s'\n", r->tag);
805     }
806 }
807 
808 /* Called with lock held.  */
809 static void process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
810                          int *action_count, int *new_state)
811 {
812     BDRVBlkdebugState *s = bs->opaque;
813 
814     /* Only process rules for the current state */
815     if (rule->state && rule->state != s->state) {
816         return;
817     }
818 
819     /* Take the action */
820     action_count[rule->action]++;
821     switch (rule->action) {
822     case ACTION_INJECT_ERROR:
823         if (action_count[ACTION_INJECT_ERROR] == 1) {
824             QSIMPLEQ_INIT(&s->active_rules);
825         }
826         QSIMPLEQ_INSERT_HEAD(&s->active_rules, rule, active_next);
827         break;
828 
829     case ACTION_SET_STATE:
830         *new_state = rule->options.set_state.new_state;
831         break;
832 
833     case ACTION_SUSPEND:
834         suspend_request(bs, rule);
835         break;
836     }
837 }
838 
839 static void blkdebug_debug_event(BlockDriverState *bs, BlkdebugEvent event)
840 {
841     BDRVBlkdebugState *s = bs->opaque;
842     struct BlkdebugRule *rule, *next;
843     int new_state;
844     int actions_count[ACTION__MAX] = { 0 };
845 
846     assert((int)event >= 0 && event < BLKDBG__MAX);
847 
848     WITH_QEMU_LOCK_GUARD(&s->lock) {
849         new_state = s->state;
850         QLIST_FOREACH_SAFE(rule, &s->rules[event], next, next) {
851             process_rule(bs, rule, actions_count, &new_state);
852         }
853         s->state = new_state;
854     }
855 
856     while (actions_count[ACTION_SUSPEND] > 0) {
857         qemu_coroutine_yield();
858         actions_count[ACTION_SUSPEND]--;
859     }
860 }
861 
862 static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event,
863                                      const char *tag)
864 {
865     BDRVBlkdebugState *s = bs->opaque;
866     struct BlkdebugRule *rule;
867     int blkdebug_event;
868 
869     blkdebug_event = qapi_enum_parse(&BlkdebugEvent_lookup, event, -1, NULL);
870     if (blkdebug_event < 0) {
871         return -ENOENT;
872     }
873 
874     rule = g_malloc(sizeof(*rule));
875     *rule = (struct BlkdebugRule) {
876         .event  = blkdebug_event,
877         .action = ACTION_SUSPEND,
878         .state  = 0,
879         .options.suspend.tag = g_strdup(tag),
880     };
881 
882     qemu_mutex_lock(&s->lock);
883     QLIST_INSERT_HEAD(&s->rules[blkdebug_event], rule, next);
884     qemu_mutex_unlock(&s->lock);
885 
886     return 0;
887 }
888 
889 /* Called with lock held. May temporarily release lock. */
890 static int resume_req_by_tag(BDRVBlkdebugState *s, const char *tag, bool all)
891 {
892     BlkdebugSuspendedReq *r;
893 
894 retry:
895     /*
896      * No need for _SAFE, since a different coroutine can remove another node
897      * (not the current one) in this list, and when the current one is removed
898      * the iteration starts back from beginning anyways.
899      */
900     QLIST_FOREACH(r, &s->suspended_reqs, next) {
901         if (!strcmp(r->tag, tag)) {
902             Coroutine *co = r->co;
903 
904             if (!qtest_enabled()) {
905                 printf("blkdebug: Resuming request '%s'\n", r->tag);
906             }
907 
908             QLIST_REMOVE(r, next);
909             g_free(r->tag);
910             g_free(r);
911 
912             qemu_mutex_unlock(&s->lock);
913             qemu_coroutine_enter(co);
914             qemu_mutex_lock(&s->lock);
915 
916             if (all) {
917                 goto retry;
918             }
919             return 0;
920         }
921     }
922     return -ENOENT;
923 }
924 
925 static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag)
926 {
927     BDRVBlkdebugState *s = bs->opaque;
928     QEMU_LOCK_GUARD(&s->lock);
929     return resume_req_by_tag(s, tag, false);
930 }
931 
932 static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs,
933                                             const char *tag)
934 {
935     BDRVBlkdebugState *s = bs->opaque;
936     BlkdebugRule *rule, *next;
937     int i, ret = -ENOENT;
938 
939     QEMU_LOCK_GUARD(&s->lock);
940     for (i = 0; i < BLKDBG__MAX; i++) {
941         QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
942             if (rule->action == ACTION_SUSPEND &&
943                 !strcmp(rule->options.suspend.tag, tag)) {
944                 remove_rule(rule);
945                 ret = 0;
946             }
947         }
948     }
949     if (resume_req_by_tag(s, tag, true) == 0) {
950         ret = 0;
951     }
952     return ret;
953 }
954 
955 static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag)
956 {
957     BDRVBlkdebugState *s = bs->opaque;
958     BlkdebugSuspendedReq *r;
959 
960     QEMU_LOCK_GUARD(&s->lock);
961     QLIST_FOREACH(r, &s->suspended_reqs, next) {
962         if (!strcmp(r->tag, tag)) {
963             return true;
964         }
965     }
966     return false;
967 }
968 
969 static int64_t blkdebug_getlength(BlockDriverState *bs)
970 {
971     return bdrv_getlength(bs->file->bs);
972 }
973 
974 static void blkdebug_refresh_filename(BlockDriverState *bs)
975 {
976     BDRVBlkdebugState *s = bs->opaque;
977     const QDictEntry *e;
978     int ret;
979 
980     if (!bs->file->bs->exact_filename[0]) {
981         return;
982     }
983 
984     for (e = qdict_first(bs->full_open_options); e;
985          e = qdict_next(bs->full_open_options, e))
986     {
987         /* Real child options are under "image", but "x-image" may
988          * contain a filename */
989         if (strcmp(qdict_entry_key(e), "config") &&
990             strcmp(qdict_entry_key(e), "image") &&
991             strcmp(qdict_entry_key(e), "x-image") &&
992             strcmp(qdict_entry_key(e), "driver"))
993         {
994             return;
995         }
996     }
997 
998     ret = snprintf(bs->exact_filename, sizeof(bs->exact_filename),
999                    "blkdebug:%s:%s",
1000                    s->config_file ?: "", bs->file->bs->exact_filename);
1001     if (ret >= sizeof(bs->exact_filename)) {
1002         /* An overflow makes the filename unusable, so do not report any */
1003         bs->exact_filename[0] = 0;
1004     }
1005 }
1006 
1007 static void blkdebug_refresh_limits(BlockDriverState *bs, Error **errp)
1008 {
1009     BDRVBlkdebugState *s = bs->opaque;
1010 
1011     if (s->align) {
1012         bs->bl.request_alignment = s->align;
1013     }
1014     if (s->max_transfer) {
1015         bs->bl.max_transfer = s->max_transfer;
1016     }
1017     if (s->opt_write_zero) {
1018         bs->bl.pwrite_zeroes_alignment = s->opt_write_zero;
1019     }
1020     if (s->max_write_zero) {
1021         bs->bl.max_pwrite_zeroes = s->max_write_zero;
1022     }
1023     if (s->opt_discard) {
1024         bs->bl.pdiscard_alignment = s->opt_discard;
1025     }
1026     if (s->max_discard) {
1027         bs->bl.max_pdiscard = s->max_discard;
1028     }
1029 }
1030 
1031 static int blkdebug_reopen_prepare(BDRVReopenState *reopen_state,
1032                                    BlockReopenQueue *queue, Error **errp)
1033 {
1034     return 0;
1035 }
1036 
1037 static void blkdebug_child_perm(BlockDriverState *bs, BdrvChild *c,
1038                                 BdrvChildRole role,
1039                                 BlockReopenQueue *reopen_queue,
1040                                 uint64_t perm, uint64_t shared,
1041                                 uint64_t *nperm, uint64_t *nshared)
1042 {
1043     BDRVBlkdebugState *s = bs->opaque;
1044 
1045     bdrv_default_perms(bs, c, role, reopen_queue,
1046                        perm, shared, nperm, nshared);
1047 
1048     *nperm |= s->take_child_perms;
1049     *nshared &= ~s->unshare_child_perms;
1050 }
1051 
1052 static const char *const blkdebug_strong_runtime_opts[] = {
1053     "config",
1054     "inject-error.",
1055     "set-state.",
1056     "align",
1057     "max-transfer",
1058     "opt-write-zero",
1059     "max-write-zero",
1060     "opt-discard",
1061     "max-discard",
1062 
1063     NULL
1064 };
1065 
1066 static BlockDriver bdrv_blkdebug = {
1067     .format_name            = "blkdebug",
1068     .protocol_name          = "blkdebug",
1069     .instance_size          = sizeof(BDRVBlkdebugState),
1070     .is_filter              = true,
1071 
1072     .bdrv_parse_filename    = blkdebug_parse_filename,
1073     .bdrv_file_open         = blkdebug_open,
1074     .bdrv_close             = blkdebug_close,
1075     .bdrv_reopen_prepare    = blkdebug_reopen_prepare,
1076     .bdrv_child_perm        = blkdebug_child_perm,
1077 
1078     .bdrv_getlength         = blkdebug_getlength,
1079     .bdrv_refresh_filename  = blkdebug_refresh_filename,
1080     .bdrv_refresh_limits    = blkdebug_refresh_limits,
1081 
1082     .bdrv_co_preadv         = blkdebug_co_preadv,
1083     .bdrv_co_pwritev        = blkdebug_co_pwritev,
1084     .bdrv_co_flush_to_disk  = blkdebug_co_flush,
1085     .bdrv_co_pwrite_zeroes  = blkdebug_co_pwrite_zeroes,
1086     .bdrv_co_pdiscard       = blkdebug_co_pdiscard,
1087     .bdrv_co_block_status   = blkdebug_co_block_status,
1088 
1089     .bdrv_debug_event           = blkdebug_debug_event,
1090     .bdrv_debug_breakpoint      = blkdebug_debug_breakpoint,
1091     .bdrv_debug_remove_breakpoint
1092                                 = blkdebug_debug_remove_breakpoint,
1093     .bdrv_debug_resume          = blkdebug_debug_resume,
1094     .bdrv_debug_is_suspended    = blkdebug_debug_is_suspended,
1095 
1096     .strong_runtime_opts        = blkdebug_strong_runtime_opts,
1097 };
1098 
1099 static void bdrv_blkdebug_init(void)
1100 {
1101     bdrv_register(&bdrv_blkdebug);
1102 }
1103 
1104 block_init(bdrv_blkdebug_init);
1105