xref: /openbmc/qemu/hw/block/xen-block.c (revision 05caa062)
1 /*
2  * Copyright (c) 2018  Citrix Systems Inc.
3  *
4  * This work is licensed under the terms of the GNU GPL, version 2 or later.
5  * See the COPYING file in the top-level directory.
6  */
7 
8 #include "qemu/osdep.h"
9 #include "qemu/cutils.h"
10 #include "qemu/main-loop.h"
11 #include "qemu/module.h"
12 #include "qemu/option.h"
13 #include "qapi/error.h"
14 #include "qapi/qapi-commands-block-core.h"
15 #include "qapi/qapi-commands-qom.h"
16 #include "qapi/qapi-visit-block-core.h"
17 #include "qapi/qobject-input-visitor.h"
18 #include "qapi/visitor.h"
19 #include "qapi/qmp/qdict.h"
20 #include "qapi/qmp/qstring.h"
21 #include "qom/object_interfaces.h"
22 #include "hw/block/xen_blkif.h"
23 #include "hw/qdev-properties.h"
24 #include "hw/xen/xen-block.h"
25 #include "hw/xen/xen-backend.h"
26 #include "sysemu/blockdev.h"
27 #include "sysemu/block-backend.h"
28 #include "sysemu/iothread.h"
29 #include "dataplane/xen-block.h"
30 #include "hw/xen/interface/io/xs_wire.h"
31 #include "trace.h"
32 
33 #define XVDA_MAJOR 202
34 #define XVDQ_MAJOR (1 << 20)
35 #define XVDBGQCV_MAJOR ((1 << 21) - 1)
36 #define HDA_MAJOR 3
37 #define HDC_MAJOR 22
38 #define SDA_MAJOR 8
39 
40 
41 static int vdev_to_diskno(unsigned int vdev_nr)
42 {
43     switch (vdev_nr >> 8) {
44     case XVDA_MAJOR:
45     case SDA_MAJOR:
46         return (vdev_nr >> 4) & 0x15;
47 
48     case HDA_MAJOR:
49         return (vdev_nr >> 6) & 1;
50 
51     case HDC_MAJOR:
52         return ((vdev_nr >> 6) & 1) + 2;
53 
54     case XVDQ_MAJOR ... XVDBGQCV_MAJOR:
55         return (vdev_nr >> 8) & 0xfffff;
56 
57     default:
58         return -1;
59     }
60 }
61 
62 #define MAX_AUTO_VDEV 4096
63 
64 /*
65  * Find a free device name in the xvda → xvdfan range and set it in
66  * blockdev->props.vdev. Our definition of "free" is that there must
67  * be no other disk or partition with the same disk number.
68  *
69  * You are technically permitted to have all of hda, hda1, sda, sda1,
70  * xvda and xvda1 as *separate* PV block devices with separate backing
71  * stores. That doesn't make it a good idea. This code will skip xvda
72  * if *any* of those "conflicting" devices already exists.
73  *
74  * The limit of xvdfan (disk 4095) is fairly arbitrary just to avoid a
75  * stupidly sized bitmap, but Linux as of v6.6 doesn't support anything
76  * higher than that anyway.
77  */
78 static bool xen_block_find_free_vdev(XenBlockDevice *blockdev, Error **errp)
79 {
80     XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(blockdev)));
81     unsigned long used_devs[BITS_TO_LONGS(MAX_AUTO_VDEV)];
82     XenBlockVdev *vdev = &blockdev->props.vdev;
83     char fe_path[XENSTORE_ABS_PATH_MAX + 1];
84     char **existing_frontends;
85     unsigned int nr_existing = 0;
86     unsigned int vdev_nr;
87     int i, disk = 0;
88 
89     snprintf(fe_path, sizeof(fe_path), "/local/domain/%u/device/vbd",
90              blockdev->xendev.frontend_id);
91 
92     existing_frontends = qemu_xen_xs_directory(xenbus->xsh, XBT_NULL, fe_path,
93                                                &nr_existing);
94     if (!existing_frontends) {
95         if (errno == ENOENT) {
96             /*
97              * If the frontend directory doesn't exist because there are
98              * no existing vbd devices, that's fine. Just ensure that we
99              * don't dereference the NULL existing_frontends pointer, by
100              * checking that nr_existing is zero so the loop below is not
101              * entered.
102              *
103              * In fact this is redundant since nr_existing is initialized
104              * to zero, but setting it again here makes it abundantly clear
105              * to Coverity, and to the human reader who doesn't know the
106              * semantics of qemu_xen_xs_directory() off the top of their
107              * head.
108              */
109             nr_existing = 0;
110         } else {
111             /* All other errors accessing the frontend directory are fatal. */
112             error_setg_errno(errp, errno, "cannot read %s", fe_path);
113             return false;
114         }
115     }
116 
117     memset(used_devs, 0, sizeof(used_devs));
118     for (i = 0; i < nr_existing; i++) {
119         if (qemu_strtoui(existing_frontends[i], NULL, 10, &vdev_nr)) {
120             free(existing_frontends[i]);
121             continue;
122         }
123 
124         free(existing_frontends[i]);
125 
126         disk = vdev_to_diskno(vdev_nr);
127         if (disk < 0 || disk >= MAX_AUTO_VDEV) {
128             continue;
129         }
130 
131         set_bit(disk, used_devs);
132     }
133     free(existing_frontends);
134 
135     disk = find_first_zero_bit(used_devs, MAX_AUTO_VDEV);
136     if (disk == MAX_AUTO_VDEV) {
137         error_setg(errp, "cannot find device vdev for block device");
138         return false;
139     }
140 
141     vdev->type = XEN_BLOCK_VDEV_TYPE_XVD;
142     vdev->partition = 0;
143     vdev->disk = disk;
144     if (disk < (1 << 4)) {
145         vdev->number = (XVDA_MAJOR << 8) | (disk << 4);
146     } else {
147         vdev->number = (XVDQ_MAJOR << 8) | (disk << 8);
148     }
149     return true;
150 }
151 
152 static char *xen_block_get_name(XenDevice *xendev, Error **errp)
153 {
154     XenBlockDevice *blockdev = XEN_BLOCK_DEVICE(xendev);
155     XenBlockVdev *vdev = &blockdev->props.vdev;
156 
157     if (vdev->type == XEN_BLOCK_VDEV_TYPE_INVALID &&
158         !xen_block_find_free_vdev(blockdev, errp)) {
159         return NULL;
160     }
161     return g_strdup_printf("%lu", vdev->number);
162 }
163 
164 static void xen_block_disconnect(XenDevice *xendev, Error **errp)
165 {
166     XenBlockDevice *blockdev = XEN_BLOCK_DEVICE(xendev);
167     const char *type = object_get_typename(OBJECT(blockdev));
168     XenBlockVdev *vdev = &blockdev->props.vdev;
169 
170     trace_xen_block_disconnect(type, vdev->disk, vdev->partition);
171 
172     xen_block_dataplane_stop(blockdev->dataplane);
173 }
174 
175 static void xen_block_connect(XenDevice *xendev, Error **errp)
176 {
177     XenBlockDevice *blockdev = XEN_BLOCK_DEVICE(xendev);
178     const char *type = object_get_typename(OBJECT(blockdev));
179     XenBlockVdev *vdev = &blockdev->props.vdev;
180     BlockConf *conf = &blockdev->props.conf;
181     unsigned int feature_large_sector_size;
182     unsigned int order, nr_ring_ref, *ring_ref, event_channel, protocol;
183     char *str;
184 
185     trace_xen_block_connect(type, vdev->disk, vdev->partition);
186 
187     if (xen_device_frontend_scanf(xendev, "feature-large-sector-size", "%u",
188                                   &feature_large_sector_size) != 1) {
189         feature_large_sector_size = 0;
190     }
191 
192     if (feature_large_sector_size != 1 &&
193         conf->logical_block_size != XEN_BLKIF_SECTOR_SIZE) {
194         error_setg(errp, "logical_block_size != %u not supported by frontend",
195                    XEN_BLKIF_SECTOR_SIZE);
196         return;
197     }
198 
199     if (xen_device_frontend_scanf(xendev, "ring-page-order", "%u",
200                                   &order) != 1) {
201         nr_ring_ref = 1;
202         ring_ref = g_new(unsigned int, nr_ring_ref);
203 
204         if (xen_device_frontend_scanf(xendev, "ring-ref", "%u",
205                                       &ring_ref[0]) != 1) {
206             error_setg(errp, "failed to read ring-ref");
207             g_free(ring_ref);
208             return;
209         }
210     } else if (qemu_xen_gnttab_can_map_multi() &&
211                order <= blockdev->props.max_ring_page_order) {
212         unsigned int i;
213 
214         nr_ring_ref = 1 << order;
215         ring_ref = g_new(unsigned int, nr_ring_ref);
216 
217         for (i = 0; i < nr_ring_ref; i++) {
218             const char *key = g_strdup_printf("ring-ref%u", i);
219 
220             if (xen_device_frontend_scanf(xendev, key, "%u",
221                                           &ring_ref[i]) != 1) {
222                 error_setg(errp, "failed to read %s", key);
223                 g_free((gpointer)key);
224                 g_free(ring_ref);
225                 return;
226             }
227 
228             g_free((gpointer)key);
229         }
230     } else {
231         error_setg(errp, "invalid ring-page-order (%d)", order);
232         return;
233     }
234 
235     if (xen_device_frontend_scanf(xendev, "event-channel", "%u",
236                                   &event_channel) != 1) {
237         error_setg(errp, "failed to read event-channel");
238         g_free(ring_ref);
239         return;
240     }
241 
242     if (xen_device_frontend_scanf(xendev, "protocol", "%ms", &str) != 1) {
243         /* x86 defaults to the 32-bit protocol even for 64-bit guests. */
244         if (object_dynamic_cast(OBJECT(qdev_get_machine()), "x86-machine")) {
245             protocol = BLKIF_PROTOCOL_X86_32;
246         } else {
247             protocol = BLKIF_PROTOCOL_NATIVE;
248         }
249     } else {
250         if (strcmp(str, XEN_IO_PROTO_ABI_X86_32) == 0) {
251             protocol = BLKIF_PROTOCOL_X86_32;
252         } else if (strcmp(str, XEN_IO_PROTO_ABI_X86_64) == 0) {
253             protocol = BLKIF_PROTOCOL_X86_64;
254         } else {
255             protocol = BLKIF_PROTOCOL_NATIVE;
256         }
257 
258         free(str);
259     }
260 
261     xen_block_dataplane_start(blockdev->dataplane, ring_ref, nr_ring_ref,
262                               event_channel, protocol, errp);
263 
264     g_free(ring_ref);
265 }
266 
267 static void xen_block_unrealize(XenDevice *xendev)
268 {
269     XenBlockDevice *blockdev = XEN_BLOCK_DEVICE(xendev);
270     XenBlockDeviceClass *blockdev_class =
271         XEN_BLOCK_DEVICE_GET_CLASS(xendev);
272     const char *type = object_get_typename(OBJECT(blockdev));
273     XenBlockVdev *vdev = &blockdev->props.vdev;
274 
275     if (vdev->type == XEN_BLOCK_VDEV_TYPE_INVALID) {
276         return;
277     }
278 
279     trace_xen_block_unrealize(type, vdev->disk, vdev->partition);
280 
281     /* Disconnect from the frontend in case this has not already happened */
282     xen_block_disconnect(xendev, NULL);
283 
284     xen_block_dataplane_destroy(blockdev->dataplane);
285     blockdev->dataplane = NULL;
286 
287     if (blockdev_class->unrealize) {
288         blockdev_class->unrealize(blockdev);
289     }
290 }
291 
292 static void xen_block_set_size(XenBlockDevice *blockdev)
293 {
294     const char *type = object_get_typename(OBJECT(blockdev));
295     XenBlockVdev *vdev = &blockdev->props.vdev;
296     BlockConf *conf = &blockdev->props.conf;
297     int64_t sectors = blk_getlength(conf->blk) / conf->logical_block_size;
298     XenDevice *xendev = XEN_DEVICE(blockdev);
299 
300     trace_xen_block_size(type, vdev->disk, vdev->partition, sectors);
301 
302     xen_device_backend_printf(xendev, "sectors", "%"PRIi64, sectors);
303 }
304 
305 static void xen_block_resize_cb(void *opaque)
306 {
307     XenBlockDevice *blockdev = opaque;
308     XenDevice *xendev = XEN_DEVICE(blockdev);
309     enum xenbus_state state = xen_device_backend_get_state(xendev);
310 
311     xen_block_set_size(blockdev);
312 
313     /*
314      * Mimic the behaviour of Linux xen-blkback and re-write the state
315      * to trigger the frontend watch.
316      */
317     xen_device_backend_printf(xendev, "state", "%u", state);
318 }
319 
320 /* Suspend request handling */
321 static void xen_block_drained_begin(void *opaque)
322 {
323     XenBlockDevice *blockdev = opaque;
324 
325     xen_block_dataplane_detach(blockdev->dataplane);
326 }
327 
328 /* Resume request handling */
329 static void xen_block_drained_end(void *opaque)
330 {
331     XenBlockDevice *blockdev = opaque;
332 
333     xen_block_dataplane_attach(blockdev->dataplane);
334 }
335 
336 static const BlockDevOps xen_block_dev_ops = {
337     .resize_cb     = xen_block_resize_cb,
338     .drained_begin = xen_block_drained_begin,
339     .drained_end   = xen_block_drained_end,
340 };
341 
342 static void xen_block_realize(XenDevice *xendev, Error **errp)
343 {
344     ERRP_GUARD();
345     XenBlockDevice *blockdev = XEN_BLOCK_DEVICE(xendev);
346     XenBlockDeviceClass *blockdev_class =
347         XEN_BLOCK_DEVICE_GET_CLASS(xendev);
348     const char *type = object_get_typename(OBJECT(blockdev));
349     XenBlockVdev *vdev = &blockdev->props.vdev;
350     BlockConf *conf = &blockdev->props.conf;
351     BlockBackend *blk = conf->blk;
352 
353     if (vdev->type == XEN_BLOCK_VDEV_TYPE_INVALID) {
354         error_setg(errp, "vdev property not set");
355         return;
356     }
357 
358     trace_xen_block_realize(type, vdev->disk, vdev->partition);
359 
360     if (blockdev_class->realize) {
361         blockdev_class->realize(blockdev, errp);
362         if (*errp) {
363             return;
364         }
365     }
366 
367     /*
368      * The blkif protocol does not deal with removable media, so it must
369      * always be present, even for CDRom devices.
370      */
371     assert(blk);
372     if (!blk_is_inserted(blk)) {
373         error_setg(errp, "device needs media, but drive is empty");
374         return;
375     }
376 
377     if (!blkconf_apply_backend_options(conf, blockdev->info & VDISK_READONLY,
378                                        true, errp)) {
379         return;
380     }
381 
382     if (!(blockdev->info & VDISK_CDROM) &&
383         !blkconf_geometry(conf, NULL, 65535, 255, 255, errp)) {
384         return;
385     }
386 
387     if (!blkconf_blocksizes(conf, errp)) {
388         return;
389     }
390 
391     if (conf->discard_granularity == -1) {
392         conf->discard_granularity = conf->physical_block_size;
393     }
394 
395     if (blk_get_flags(blk) & BDRV_O_UNMAP) {
396         xen_device_backend_printf(xendev, "feature-discard", "%u", 1);
397         xen_device_backend_printf(xendev, "discard-granularity", "%u",
398                                   conf->discard_granularity);
399         xen_device_backend_printf(xendev, "discard-alignment", "%u", 0);
400     }
401 
402     xen_device_backend_printf(xendev, "feature-flush-cache", "%u", 1);
403 
404     if (qemu_xen_gnttab_can_map_multi()) {
405         xen_device_backend_printf(xendev, "max-ring-page-order", "%u",
406                                   blockdev->props.max_ring_page_order);
407     }
408 
409     xen_device_backend_printf(xendev, "info", "%u", blockdev->info);
410 
411     xen_device_frontend_printf(xendev, "virtual-device", "%lu",
412                                vdev->number);
413     xen_device_frontend_printf(xendev, "device-type", "%s",
414                                blockdev->device_type);
415 
416     xen_device_backend_printf(xendev, "sector-size", "%u",
417                               conf->logical_block_size);
418 
419     xen_block_set_size(blockdev);
420 
421     blockdev->dataplane =
422         xen_block_dataplane_create(xendev, blk, conf->logical_block_size,
423                                    blockdev->props.iothread);
424 
425     blk_set_dev_ops(blk, &xen_block_dev_ops, blockdev);
426 }
427 
428 static void xen_block_frontend_changed(XenDevice *xendev,
429                                        enum xenbus_state frontend_state,
430                                        Error **errp)
431 {
432     ERRP_GUARD();
433     enum xenbus_state backend_state = xen_device_backend_get_state(xendev);
434 
435     switch (frontend_state) {
436     case XenbusStateInitialised:
437     case XenbusStateConnected:
438         if (backend_state == XenbusStateConnected) {
439             break;
440         }
441 
442         xen_block_disconnect(xendev, errp);
443         if (*errp) {
444             break;
445         }
446 
447         xen_block_connect(xendev, errp);
448         if (*errp) {
449             break;
450         }
451 
452         xen_device_backend_set_state(xendev, XenbusStateConnected);
453         break;
454 
455     case XenbusStateClosing:
456         xen_device_backend_set_state(xendev, XenbusStateClosing);
457         break;
458 
459     case XenbusStateClosed:
460     case XenbusStateUnknown:
461         xen_block_disconnect(xendev, errp);
462         if (*errp) {
463             break;
464         }
465 
466         xen_device_backend_set_state(xendev, XenbusStateClosed);
467         break;
468 
469     default:
470         break;
471     }
472 }
473 
474 static char *disk_to_vbd_name(unsigned int disk)
475 {
476     char *name, *prefix = (disk >= 26) ?
477         disk_to_vbd_name((disk / 26) - 1) : g_strdup("");
478 
479     name = g_strdup_printf("%s%c", prefix, 'a' + disk % 26);
480     g_free(prefix);
481 
482     return name;
483 }
484 
485 static void xen_block_get_vdev(Object *obj, Visitor *v, const char *name,
486                                void *opaque, Error **errp)
487 {
488     Property *prop = opaque;
489     XenBlockVdev *vdev = object_field_prop_ptr(obj, prop);
490     char *str;
491 
492     switch (vdev->type) {
493     case XEN_BLOCK_VDEV_TYPE_DP:
494         str = g_strdup_printf("d%lup%lu", vdev->disk, vdev->partition);
495         break;
496 
497     case XEN_BLOCK_VDEV_TYPE_XVD:
498     case XEN_BLOCK_VDEV_TYPE_HD:
499     case XEN_BLOCK_VDEV_TYPE_SD: {
500         char *vbd_name = disk_to_vbd_name(vdev->disk);
501 
502         str = g_strdup_printf("%s%s%lu",
503                               (vdev->type == XEN_BLOCK_VDEV_TYPE_XVD) ?
504                               "xvd" :
505                               (vdev->type == XEN_BLOCK_VDEV_TYPE_HD) ?
506                               "hd" :
507                               "sd",
508                               vbd_name, vdev->partition);
509         g_free(vbd_name);
510         break;
511     }
512     default:
513         error_setg(errp, "invalid vdev type");
514         return;
515     }
516 
517     visit_type_str(v, name, &str, errp);
518     g_free(str);
519 }
520 
521 static int vbd_name_to_disk(const char *name, const char **endp,
522                             unsigned long *disk)
523 {
524     unsigned int n = 0;
525 
526     while (*name != '\0') {
527         if (!g_ascii_isalpha(*name) || !g_ascii_islower(*name)) {
528             break;
529         }
530 
531         n *= 26;
532         n += *name++ - 'a' + 1;
533     }
534     *endp = name;
535 
536     if (!n) {
537         return -1;
538     }
539 
540     *disk = n - 1;
541 
542     return 0;
543 }
544 
545 static void xen_block_set_vdev(Object *obj, Visitor *v, const char *name,
546                                void *opaque, Error **errp)
547 {
548     Property *prop = opaque;
549     XenBlockVdev *vdev = object_field_prop_ptr(obj, prop);
550     char *str, *p;
551     const char *end;
552 
553     if (!visit_type_str(v, name, &str, errp)) {
554         return;
555     }
556 
557     p = strchr(str, 'd');
558     if (!p) {
559         goto invalid;
560     }
561 
562     *p++ = '\0';
563     if (*str == '\0') {
564         vdev->type = XEN_BLOCK_VDEV_TYPE_DP;
565     } else if (strcmp(str, "xv") == 0) {
566         vdev->type = XEN_BLOCK_VDEV_TYPE_XVD;
567     } else if (strcmp(str, "h") == 0) {
568         vdev->type = XEN_BLOCK_VDEV_TYPE_HD;
569     } else if (strcmp(str, "s") == 0) {
570         vdev->type = XEN_BLOCK_VDEV_TYPE_SD;
571     } else {
572         goto invalid;
573     }
574 
575     if (vdev->type == XEN_BLOCK_VDEV_TYPE_DP) {
576         if (qemu_strtoul(p, &end, 10, &vdev->disk)) {
577             goto invalid;
578         }
579 
580         if (*end == 'p') {
581             if (*(++end) == '\0') {
582                 goto invalid;
583             }
584         }
585     } else {
586         if (vbd_name_to_disk(p, &end, &vdev->disk)) {
587             goto invalid;
588         }
589     }
590 
591     if (*end != '\0') {
592         p = (char *)end;
593 
594         if (qemu_strtoul(p, &end, 10, &vdev->partition)) {
595             goto invalid;
596         }
597 
598         if (*end != '\0') {
599             goto invalid;
600         }
601     } else {
602         vdev->partition = 0;
603     }
604 
605     switch (vdev->type) {
606     case XEN_BLOCK_VDEV_TYPE_DP:
607     case XEN_BLOCK_VDEV_TYPE_XVD:
608         if (vdev->disk < (1 << 4) && vdev->partition < (1 << 4)) {
609             vdev->number = (XVDA_MAJOR << 8) | (vdev->disk << 4) |
610                 vdev->partition;
611         } else if (vdev->disk < (1 << 20) && vdev->partition < (1 << 8)) {
612             vdev->number = (XVDQ_MAJOR << 8) | (vdev->disk << 8) |
613                 vdev->partition;
614         } else {
615             goto invalid;
616         }
617         break;
618 
619     case XEN_BLOCK_VDEV_TYPE_HD:
620         if ((vdev->disk == 0 || vdev->disk == 1) &&
621             vdev->partition < (1 << 6)) {
622             vdev->number = (HDA_MAJOR << 8) | (vdev->disk << 6) |
623                 vdev->partition;
624         } else if ((vdev->disk == 2 || vdev->disk == 3) &&
625                    vdev->partition < (1 << 6)) {
626             vdev->number = (HDC_MAJOR << 8) | ((vdev->disk - 2) << 6) |
627                 vdev->partition;
628         } else {
629             goto invalid;
630         }
631         break;
632 
633     case XEN_BLOCK_VDEV_TYPE_SD:
634         if (vdev->disk < (1 << 4) && vdev->partition < (1 << 4)) {
635             vdev->number = (SDA_MAJOR << 8) | (vdev->disk << 4) |
636                 vdev->partition;
637         } else {
638             goto invalid;
639         }
640         break;
641 
642     default:
643         goto invalid;
644     }
645 
646     g_free(str);
647     return;
648 
649 invalid:
650     error_setg(errp, "invalid virtual disk specifier");
651 
652     vdev->type = XEN_BLOCK_VDEV_TYPE_INVALID;
653     g_free(str);
654 }
655 
656 /*
657  * This property deals with 'vdev' names adhering to the Xen VBD naming
658  * scheme described in:
659  *
660  * https://xenbits.xen.org/docs/unstable/man/xen-vbd-interface.7.html
661  */
662 const PropertyInfo xen_block_prop_vdev = {
663     .name  = "str",
664     .description = "Virtual Disk specifier: d*p*/xvd*/hd*/sd*",
665     .get = xen_block_get_vdev,
666     .set = xen_block_set_vdev,
667 };
668 
669 static Property xen_block_props[] = {
670     DEFINE_PROP("vdev", XenBlockDevice, props.vdev,
671                 xen_block_prop_vdev, XenBlockVdev),
672     DEFINE_BLOCK_PROPERTIES(XenBlockDevice, props.conf),
673     DEFINE_PROP_UINT32("max-ring-page-order", XenBlockDevice,
674                        props.max_ring_page_order, 4),
675     DEFINE_PROP_LINK("iothread", XenBlockDevice, props.iothread,
676                      TYPE_IOTHREAD, IOThread *),
677     DEFINE_PROP_END_OF_LIST()
678 };
679 
680 static void xen_block_class_init(ObjectClass *class, void *data)
681 {
682     DeviceClass *dev_class = DEVICE_CLASS(class);
683     XenDeviceClass *xendev_class = XEN_DEVICE_CLASS(class);
684 
685     xendev_class->backend = "qdisk";
686     xendev_class->device = "vbd";
687     xendev_class->get_name = xen_block_get_name;
688     xendev_class->realize = xen_block_realize;
689     xendev_class->frontend_changed = xen_block_frontend_changed;
690     xendev_class->unrealize = xen_block_unrealize;
691 
692     device_class_set_props(dev_class, xen_block_props);
693 }
694 
695 static const TypeInfo xen_block_type_info = {
696     .name = TYPE_XEN_BLOCK_DEVICE,
697     .parent = TYPE_XEN_DEVICE,
698     .instance_size = sizeof(XenBlockDevice),
699     .abstract = true,
700     .class_size = sizeof(XenBlockDeviceClass),
701     .class_init = xen_block_class_init,
702 };
703 
704 static void xen_disk_unrealize(XenBlockDevice *blockdev)
705 {
706     trace_xen_disk_unrealize();
707 }
708 
709 static void xen_disk_realize(XenBlockDevice *blockdev, Error **errp)
710 {
711     BlockConf *conf = &blockdev->props.conf;
712 
713     trace_xen_disk_realize();
714 
715     blockdev->device_type = "disk";
716 
717     if (!conf->blk) {
718         error_setg(errp, "drive property not set");
719         return;
720     }
721 
722     blockdev->info = blk_supports_write_perm(conf->blk) ? 0 : VDISK_READONLY;
723 }
724 
725 static void xen_disk_class_init(ObjectClass *class, void *data)
726 {
727     DeviceClass *dev_class = DEVICE_CLASS(class);
728     XenBlockDeviceClass *blockdev_class = XEN_BLOCK_DEVICE_CLASS(class);
729 
730     blockdev_class->realize = xen_disk_realize;
731     blockdev_class->unrealize = xen_disk_unrealize;
732 
733     dev_class->desc = "Xen Disk Device";
734 }
735 
736 static const TypeInfo xen_disk_type_info = {
737     .name = TYPE_XEN_DISK_DEVICE,
738     .parent = TYPE_XEN_BLOCK_DEVICE,
739     .instance_size = sizeof(XenDiskDevice),
740     .class_init = xen_disk_class_init,
741 };
742 
743 static void xen_cdrom_unrealize(XenBlockDevice *blockdev)
744 {
745     trace_xen_cdrom_unrealize();
746 }
747 
748 static void xen_cdrom_realize(XenBlockDevice *blockdev, Error **errp)
749 {
750     BlockConf *conf = &blockdev->props.conf;
751 
752     trace_xen_cdrom_realize();
753 
754     blockdev->device_type = "cdrom";
755 
756     if (!conf->blk) {
757         int rc;
758 
759         /* Set up an empty drive */
760         conf->blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
761 
762         rc = blk_attach_dev(conf->blk, DEVICE(blockdev));
763         if (!rc) {
764             error_setg_errno(errp, -rc, "failed to create drive");
765             return;
766         }
767     }
768 
769     blockdev->info = VDISK_READONLY | VDISK_CDROM;
770 }
771 
772 static void xen_cdrom_class_init(ObjectClass *class, void *data)
773 {
774     DeviceClass *dev_class = DEVICE_CLASS(class);
775     XenBlockDeviceClass *blockdev_class = XEN_BLOCK_DEVICE_CLASS(class);
776 
777     blockdev_class->realize = xen_cdrom_realize;
778     blockdev_class->unrealize = xen_cdrom_unrealize;
779 
780     dev_class->desc = "Xen CD-ROM Device";
781 }
782 
783 static const TypeInfo xen_cdrom_type_info = {
784     .name = TYPE_XEN_CDROM_DEVICE,
785     .parent = TYPE_XEN_BLOCK_DEVICE,
786     .instance_size = sizeof(XenCDRomDevice),
787     .class_init = xen_cdrom_class_init,
788 };
789 
790 static void xen_block_register_types(void)
791 {
792     type_register_static(&xen_block_type_info);
793     type_register_static(&xen_disk_type_info);
794     type_register_static(&xen_cdrom_type_info);
795 }
796 
797 type_init(xen_block_register_types)
798 
799 static void xen_block_blockdev_del(const char *node_name, Error **errp)
800 {
801     trace_xen_block_blockdev_del(node_name);
802 
803     qmp_blockdev_del(node_name, errp);
804 }
805 
806 static char *xen_block_blockdev_add(const char *id, QDict *qdict,
807                                     Error **errp)
808 {
809     ERRP_GUARD();
810     const char *driver = qdict_get_try_str(qdict, "driver");
811     BlockdevOptions *options = NULL;
812     char *node_name;
813     Visitor *v;
814 
815     if (!driver) {
816         error_setg(errp, "no 'driver' parameter");
817         return NULL;
818     }
819 
820     node_name = g_strdup_printf("%s-%s", id, driver);
821     qdict_put_str(qdict, "node-name", node_name);
822 
823     trace_xen_block_blockdev_add(node_name);
824 
825     v = qobject_input_visitor_new(QOBJECT(qdict));
826     visit_type_BlockdevOptions(v, NULL, &options, errp);
827     visit_free(v);
828     if (!options) {
829         goto fail;
830     }
831 
832     qmp_blockdev_add(options, errp);
833 
834     if (*errp) {
835         goto fail;
836     }
837 
838     qapi_free_BlockdevOptions(options);
839 
840     return node_name;
841 
842 fail:
843     if (options) {
844         qapi_free_BlockdevOptions(options);
845     }
846     g_free(node_name);
847 
848     return NULL;
849 }
850 
851 static void xen_block_drive_destroy(XenBlockDrive *drive, Error **errp)
852 {
853     ERRP_GUARD();
854     char *node_name = drive->node_name;
855 
856     if (node_name) {
857         xen_block_blockdev_del(node_name, errp);
858         if (*errp) {
859             return;
860         }
861         g_free(node_name);
862         drive->node_name = NULL;
863     }
864     g_free(drive->id);
865     g_free(drive);
866 }
867 
868 static XenBlockDrive *xen_block_drive_create(const char *id,
869                                              const char *device_type,
870                                              QDict *opts, Error **errp)
871 {
872     ERRP_GUARD();
873     const char *params = qdict_get_try_str(opts, "params");
874     const char *mode = qdict_get_try_str(opts, "mode");
875     const char *direct_io_safe = qdict_get_try_str(opts, "direct-io-safe");
876     const char *discard_enable = qdict_get_try_str(opts, "discard-enable");
877     char *driver = NULL;
878     char *filename = NULL;
879     XenBlockDrive *drive = NULL;
880     QDict *file_layer;
881     QDict *driver_layer;
882     struct stat st;
883     int rc;
884 
885     if (params) {
886         char **v = g_strsplit(params, ":", 2);
887 
888         if (v[1] == NULL) {
889             filename = g_strdup(v[0]);
890             driver = g_strdup("raw");
891         } else {
892             if (strcmp(v[0], "aio") == 0) {
893                 driver = g_strdup("raw");
894             } else if (strcmp(v[0], "vhd") == 0) {
895                 driver = g_strdup("vpc");
896             } else {
897                 driver = g_strdup(v[0]);
898             }
899             filename = g_strdup(v[1]);
900         }
901 
902         g_strfreev(v);
903     } else {
904         error_setg(errp, "no params");
905         goto done;
906     }
907 
908     assert(filename);
909     assert(driver);
910 
911     drive = g_new0(XenBlockDrive, 1);
912     drive->id = g_strdup(id);
913 
914     rc = stat(filename, &st);
915     if (rc) {
916         error_setg_errno(errp, errno, "Could not stat file '%s'", filename);
917         goto done;
918     }
919 
920     file_layer = qdict_new();
921     driver_layer = qdict_new();
922 
923     if (S_ISBLK(st.st_mode)) {
924         qdict_put_str(file_layer, "driver", "host_device");
925     } else {
926         qdict_put_str(file_layer, "driver", "file");
927     }
928 
929     qdict_put_str(file_layer, "filename", filename);
930 
931     if (mode && *mode != 'w') {
932         qdict_put_bool(file_layer, "read-only", true);
933     }
934 
935     if (direct_io_safe) {
936         unsigned long value;
937 
938         if (!qemu_strtoul(direct_io_safe, NULL, 2, &value) && !!value) {
939             QDict *cache_qdict = qdict_new();
940 
941             qdict_put_bool(cache_qdict, "direct", true);
942             qdict_put(file_layer, "cache", cache_qdict);
943 
944             qdict_put_str(file_layer, "aio", "native");
945         }
946     }
947 
948     if (discard_enable) {
949         unsigned long value;
950 
951         if (!qemu_strtoul(discard_enable, NULL, 2, &value) && !!value) {
952             qdict_put_str(file_layer, "discard", "unmap");
953             qdict_put_str(driver_layer, "discard", "unmap");
954         }
955     }
956 
957     /*
958      * It is necessary to turn file locking off as an emulated device
959      * may have already opened the same image file.
960      */
961     qdict_put_str(file_layer, "locking", "off");
962 
963     qdict_put_str(driver_layer, "driver", driver);
964 
965     qdict_put(driver_layer, "file", file_layer);
966 
967     g_assert(!drive->node_name);
968     drive->node_name = xen_block_blockdev_add(drive->id, driver_layer,
969                                               errp);
970 
971     qobject_unref(driver_layer);
972 
973 done:
974     g_free(filename);
975     g_free(driver);
976     if (*errp) {
977         xen_block_drive_destroy(drive, NULL);
978         return NULL;
979     }
980 
981     return drive;
982 }
983 
984 static const char *xen_block_drive_get_node_name(XenBlockDrive *drive)
985 {
986     return drive->node_name ? drive->node_name : "";
987 }
988 
989 static void xen_block_iothread_destroy(XenBlockIOThread *iothread,
990                                        Error **errp)
991 {
992     qmp_object_del(iothread->id, errp);
993 
994     g_free(iothread->id);
995     g_free(iothread);
996 }
997 
998 static XenBlockIOThread *xen_block_iothread_create(const char *id,
999                                                    Error **errp)
1000 {
1001     ERRP_GUARD();
1002     XenBlockIOThread *iothread = g_new(XenBlockIOThread, 1);
1003     ObjectOptions *opts;
1004 
1005     iothread->id = g_strdup(id);
1006 
1007     opts = g_new(ObjectOptions, 1);
1008     *opts = (ObjectOptions) {
1009         .qom_type = OBJECT_TYPE_IOTHREAD,
1010         .id = g_strdup(id),
1011     };
1012     qmp_object_add(opts, errp);
1013     qapi_free_ObjectOptions(opts);
1014 
1015     if (*errp) {
1016         g_free(iothread->id);
1017         g_free(iothread);
1018         return NULL;
1019     }
1020 
1021     return iothread;
1022 }
1023 
1024 static void xen_block_device_create(XenBackendInstance *backend,
1025                                     QDict *opts, Error **errp)
1026 {
1027     ERRP_GUARD();
1028     XenBus *xenbus = xen_backend_get_bus(backend);
1029     const char *name = xen_backend_get_name(backend);
1030     unsigned long number;
1031     const char *vdev, *device_type;
1032     XenBlockDrive *drive = NULL;
1033     XenBlockIOThread *iothread = NULL;
1034     XenDevice *xendev = NULL;
1035     const char *type;
1036     XenBlockDevice *blockdev;
1037 
1038     if (qemu_strtoul(name, NULL, 10, &number)) {
1039         error_setg(errp, "failed to parse name '%s'", name);
1040         goto fail;
1041     }
1042 
1043     trace_xen_block_device_create(number);
1044 
1045     vdev = qdict_get_try_str(opts, "dev");
1046     if (!vdev) {
1047         error_setg(errp, "no dev parameter");
1048         goto fail;
1049     }
1050 
1051     device_type = qdict_get_try_str(opts, "device-type");
1052     if (!device_type) {
1053         error_setg(errp, "no device-type parameter");
1054         goto fail;
1055     }
1056 
1057     if (!strcmp(device_type, "disk")) {
1058         type = TYPE_XEN_DISK_DEVICE;
1059     } else if (!strcmp(device_type, "cdrom")) {
1060         type = TYPE_XEN_CDROM_DEVICE;
1061     } else {
1062         error_setg(errp, "invalid device-type parameter '%s'", device_type);
1063         goto fail;
1064     }
1065 
1066     drive = xen_block_drive_create(vdev, device_type, opts, errp);
1067     if (!drive) {
1068         error_prepend(errp, "failed to create drive: ");
1069         goto fail;
1070     }
1071 
1072     iothread = xen_block_iothread_create(vdev, errp);
1073     if (*errp) {
1074         error_prepend(errp, "failed to create iothread: ");
1075         goto fail;
1076     }
1077 
1078     xendev = XEN_DEVICE(qdev_new(type));
1079     blockdev = XEN_BLOCK_DEVICE(xendev);
1080 
1081     if (!object_property_set_str(OBJECT(xendev), "vdev", vdev,
1082                                  errp)) {
1083         error_prepend(errp, "failed to set 'vdev': ");
1084         goto fail;
1085     }
1086 
1087     if (!object_property_set_str(OBJECT(xendev), "drive",
1088                                  xen_block_drive_get_node_name(drive),
1089                                  errp)) {
1090         error_prepend(errp, "failed to set 'drive': ");
1091         goto fail;
1092     }
1093 
1094     if (!object_property_set_str(OBJECT(xendev), "iothread", iothread->id,
1095                                  errp)) {
1096         error_prepend(errp, "failed to set 'iothread': ");
1097         goto fail;
1098     }
1099 
1100     blockdev->iothread = iothread;
1101     blockdev->drive = drive;
1102 
1103     if (!qdev_realize_and_unref(DEVICE(xendev), BUS(xenbus), errp)) {
1104         error_prepend(errp, "realization of device %s failed: ", type);
1105         goto fail;
1106     }
1107 
1108     xen_backend_set_device(backend, xendev);
1109     return;
1110 
1111 fail:
1112     if (xendev) {
1113         object_unparent(OBJECT(xendev));
1114     }
1115 
1116     if (iothread) {
1117         xen_block_iothread_destroy(iothread, NULL);
1118     }
1119 
1120     if (drive) {
1121         xen_block_drive_destroy(drive, NULL);
1122     }
1123 }
1124 
1125 static void xen_block_device_destroy(XenBackendInstance *backend,
1126                                      Error **errp)
1127 {
1128     ERRP_GUARD();
1129     XenDevice *xendev = xen_backend_get_device(backend);
1130     XenBlockDevice *blockdev = XEN_BLOCK_DEVICE(xendev);
1131     XenBlockVdev *vdev = &blockdev->props.vdev;
1132     XenBlockDrive *drive = blockdev->drive;
1133     XenBlockIOThread *iothread = blockdev->iothread;
1134 
1135     trace_xen_block_device_destroy(vdev->number);
1136 
1137     object_unparent(OBJECT(xendev));
1138 
1139     /*
1140      * Drain all pending RCU callbacks as object_unparent() frees `xendev'
1141      * in a RCU callback.
1142      * And due to the property "drive" still existing in `xendev', we
1143      * can't destroy the XenBlockDrive associated with `xendev' with
1144      * xen_block_drive_destroy() below.
1145      */
1146     drain_call_rcu();
1147 
1148     if (iothread) {
1149         xen_block_iothread_destroy(iothread, errp);
1150         if (*errp) {
1151             error_prepend(errp, "failed to destroy iothread: ");
1152             return;
1153         }
1154     }
1155 
1156     if (drive) {
1157         xen_block_drive_destroy(drive, errp);
1158         if (*errp) {
1159             error_prepend(errp, "failed to destroy drive: ");
1160             return;
1161         }
1162     }
1163 }
1164 
1165 static const XenBackendInfo xen_block_backend_info = {
1166     .type = "qdisk",
1167     .create = xen_block_device_create,
1168     .destroy = xen_block_device_destroy,
1169 };
1170 
1171 static void xen_block_register_backend(void)
1172 {
1173     xen_backend_register(&xen_block_backend_info);
1174 }
1175 
1176 xen_backend_init(xen_block_register_backend);
1177