xref: /openbmc/qemu/hw/i386/xen/xen_platform.c (revision 51e47cf8)
1 /*
2  * XEN platform pci device, formerly known as the event channel device
3  *
4  * Copyright (c) 2003-2004 Intel Corp.
5  * Copyright (c) 2006 XenSource
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "qemu/osdep.h"
27 #include "qapi/error.h"
28 #include "hw/ide/pci.h"
29 #include "hw/pci/pci.h"
30 #include "migration/vmstate.h"
31 #include "net/net.h"
32 #include "trace.h"
33 #include "sysemu/xen.h"
34 #include "sysemu/block-backend.h"
35 #include "qemu/error-report.h"
36 #include "qemu/module.h"
37 #include "qom/object.h"
38 
39 #ifdef CONFIG_XEN
40 #include "hw/xen/xen_native.h"
41 #endif
42 
43 /* The rule is that xen_native.h must come first */
44 #include "hw/xen/xen.h"
45 
46 //#define DEBUG_PLATFORM
47 
48 #ifdef DEBUG_PLATFORM
49 #define DPRINTF(fmt, ...) do { \
50     fprintf(stderr, "xen_platform: " fmt, ## __VA_ARGS__); \
51 } while (0)
52 #else
53 #define DPRINTF(fmt, ...) do { } while (0)
54 #endif
55 
56 #define PFFLAG_ROM_LOCK 1 /* Sets whether ROM memory area is RW or RO */
57 
58 struct PCIXenPlatformState {
59     /*< private >*/
60     PCIDevice parent_obj;
61     /*< public >*/
62 
63     MemoryRegion fixed_io;
64     MemoryRegion bar;
65     MemoryRegion mmio_bar;
66     uint8_t flags; /* used only for version_id == 2 */
67     uint16_t driver_product_version;
68 
69     /* Log from guest drivers */
70     char log_buffer[4096];
71     int log_buffer_off;
72 };
73 
74 #define TYPE_XEN_PLATFORM "xen-platform"
75 OBJECT_DECLARE_SIMPLE_TYPE(PCIXenPlatformState, XEN_PLATFORM)
76 
77 #define XEN_PLATFORM_IOPORT 0x10
78 
79 /* Send bytes to syslog */
80 static void log_writeb(PCIXenPlatformState *s, char val)
81 {
82     if (val == '\n' || s->log_buffer_off == sizeof(s->log_buffer) - 1) {
83         /* Flush buffer */
84         s->log_buffer[s->log_buffer_off] = 0;
85         trace_xen_platform_log(s->log_buffer);
86         s->log_buffer_off = 0;
87     } else {
88         s->log_buffer[s->log_buffer_off++] = val;
89     }
90 }
91 
92 /*
93  * Unplug device flags.
94  *
95  * The logic got a little confused at some point in the past but this is
96  * what they do now.
97  *
98  * bit 0: Unplug all IDE and SCSI disks.
99  * bit 1: Unplug all NICs.
100  * bit 2: Unplug IDE disks except primary master. This is overridden if
101  *        bit 0 is also present in the mask.
102  * bit 3: Unplug all NVMe disks.
103  *
104  */
105 #define _UNPLUG_IDE_SCSI_DISKS 0
106 #define UNPLUG_IDE_SCSI_DISKS (1u << _UNPLUG_IDE_SCSI_DISKS)
107 
108 #define _UNPLUG_ALL_NICS 1
109 #define UNPLUG_ALL_NICS (1u << _UNPLUG_ALL_NICS)
110 
111 #define _UNPLUG_AUX_IDE_DISKS 2
112 #define UNPLUG_AUX_IDE_DISKS (1u << _UNPLUG_AUX_IDE_DISKS)
113 
114 #define _UNPLUG_NVME_DISKS 3
115 #define UNPLUG_NVME_DISKS (1u << _UNPLUG_NVME_DISKS)
116 
117 static bool pci_device_is_passthrough(PCIDevice *d)
118 {
119     if (!strcmp(d->name, "xen-pci-passthrough")) {
120         return true;
121     }
122 
123     if (xen_mode == XEN_EMULATE && !strcmp(d->name, "vfio-pci")) {
124         return true;
125     }
126 
127     return false;
128 }
129 
130 static void unplug_nic(PCIBus *b, PCIDevice *d, void *o)
131 {
132     /* We have to ignore passthrough devices */
133     if (pci_get_word(d->config + PCI_CLASS_DEVICE) ==
134             PCI_CLASS_NETWORK_ETHERNET
135             && !pci_device_is_passthrough(d)) {
136         object_unparent(OBJECT(d));
137     }
138 }
139 
140 /* Remove the peer of the NIC device. Normally, this would be a tap device. */
141 static void del_nic_peer(NICState *nic, void *opaque)
142 {
143     NetClientState *nc;
144 
145     nc = qemu_get_queue(nic);
146     if (nc->peer)
147         qemu_del_net_client(nc->peer);
148 }
149 
150 static void pci_unplug_nics(PCIBus *bus)
151 {
152     qemu_foreach_nic(del_nic_peer, NULL);
153     pci_for_each_device(bus, 0, unplug_nic, NULL);
154 }
155 
156 /*
157  * The Xen HVM unplug protocol [1] specifies a mechanism to allow guests to
158  * request unplug of 'aux' disks (which is stated to mean all IDE disks,
159  * except the primary master).
160  *
161  * NOTE: The semantics of what happens if unplug of all disks and 'aux' disks
162  *       is simultaneously requested is not clear. The implementation assumes
163  *       that an 'all' request overrides an 'aux' request.
164  *
165  * [1] https://xenbits.xen.org/gitweb/?p=xen.git;a=blob;f=docs/misc/hvm-emulated-unplug.pandoc
166  */
167 static void pci_xen_ide_unplug(DeviceState *dev, bool aux)
168 {
169     PCIIDEState *pci_ide;
170     int i;
171     IDEDevice *idedev;
172     IDEBus *idebus;
173     BlockBackend *blk;
174 
175     pci_ide = PCI_IDE(dev);
176 
177     for (i = aux ? 1 : 0; i < 4; i++) {
178         idebus = &pci_ide->bus[i / 2];
179         blk = idebus->ifs[i % 2].blk;
180 
181         if (blk && idebus->ifs[i % 2].drive_kind != IDE_CD) {
182             if (!(i % 2)) {
183                 idedev = idebus->master;
184             } else {
185                 idedev = idebus->slave;
186             }
187 
188             blk_drain(blk);
189             blk_flush(blk);
190 
191             blk_detach_dev(blk, DEVICE(idedev));
192             idebus->ifs[i % 2].blk = NULL;
193             idedev->conf.blk = NULL;
194             monitor_remove_blk(blk);
195             blk_unref(blk);
196         }
197     }
198     device_cold_reset(dev);
199 }
200 
201 static void unplug_disks(PCIBus *b, PCIDevice *d, void *opaque)
202 {
203     uint32_t flags = *(uint32_t *)opaque;
204     bool aux = (flags & UNPLUG_AUX_IDE_DISKS) &&
205         !(flags & UNPLUG_IDE_SCSI_DISKS);
206 
207     /* We have to ignore passthrough devices */
208     if (pci_device_is_passthrough(d))
209         return;
210 
211     switch (pci_get_word(d->config + PCI_CLASS_DEVICE)) {
212     case PCI_CLASS_STORAGE_IDE:
213         pci_xen_ide_unplug(DEVICE(d), aux);
214         break;
215 
216     case PCI_CLASS_STORAGE_SCSI:
217         if (!aux) {
218             object_unparent(OBJECT(d));
219         }
220         break;
221 
222     case PCI_CLASS_STORAGE_EXPRESS:
223         if (flags & UNPLUG_NVME_DISKS) {
224             object_unparent(OBJECT(d));
225         }
226 
227     default:
228         break;
229     }
230 }
231 
232 static void pci_unplug_disks(PCIBus *bus, uint32_t flags)
233 {
234     pci_for_each_device(bus, 0, unplug_disks, &flags);
235 }
236 
237 static void platform_fixed_ioport_writew(void *opaque, uint32_t addr, uint32_t val)
238 {
239     PCIXenPlatformState *s = opaque;
240 
241     switch (addr) {
242     case 0: {
243         PCIDevice *pci_dev = PCI_DEVICE(s);
244         /* Unplug devices. See comment above flag definitions */
245         if (val & (UNPLUG_IDE_SCSI_DISKS | UNPLUG_AUX_IDE_DISKS |
246                    UNPLUG_NVME_DISKS)) {
247             DPRINTF("unplug disks\n");
248             pci_unplug_disks(pci_get_bus(pci_dev), val);
249         }
250         if (val & UNPLUG_ALL_NICS) {
251             DPRINTF("unplug nics\n");
252             pci_unplug_nics(pci_get_bus(pci_dev));
253         }
254         break;
255     }
256     case 2:
257         switch (val) {
258         case 1:
259             DPRINTF("Citrix Windows PV drivers loaded in guest\n");
260             break;
261         case 0:
262             DPRINTF("Guest claimed to be running PV product 0?\n");
263             break;
264         default:
265             DPRINTF("Unknown PV product %d loaded in guest\n", val);
266             break;
267         }
268         s->driver_product_version = val;
269         break;
270     }
271 }
272 
273 static void platform_fixed_ioport_writel(void *opaque, uint32_t addr,
274                                          uint32_t val)
275 {
276     switch (addr) {
277     case 0:
278         /* PV driver version */
279         break;
280     }
281 }
282 
283 static void platform_fixed_ioport_writeb(void *opaque, uint32_t addr, uint32_t val)
284 {
285     PCIXenPlatformState *s = opaque;
286 
287     switch (addr) {
288     case 0: /* Platform flags */
289         if (xen_mode == XEN_EMULATE) {
290             /* XX: Use i440gx/q35 PAM setup to do this? */
291             s->flags = val & PFFLAG_ROM_LOCK;
292 #ifdef CONFIG_XEN
293         } else {
294             hvmmem_type_t mem_type = (val & PFFLAG_ROM_LOCK) ?
295                 HVMMEM_ram_ro : HVMMEM_ram_rw;
296 
297             if (xen_set_mem_type(xen_domid, mem_type, 0xc0, 0x40)) {
298                 DPRINTF("unable to change ro/rw state of ROM memory area!\n");
299             } else {
300                 s->flags = val & PFFLAG_ROM_LOCK;
301                 DPRINTF("changed ro/rw state of ROM memory area. now is %s state.\n",
302                         (mem_type == HVMMEM_ram_ro ? "ro" : "rw"));
303             }
304 #endif
305         }
306         break;
307 
308     case 2:
309         log_writeb(s, val);
310         break;
311     }
312 }
313 
314 static uint32_t platform_fixed_ioport_readw(void *opaque, uint32_t addr)
315 {
316     switch (addr) {
317     case 0:
318         /* Magic value so that you can identify the interface. */
319         return 0x49d2;
320     default:
321         return 0xffff;
322     }
323 }
324 
325 static uint32_t platform_fixed_ioport_readb(void *opaque, uint32_t addr)
326 {
327     PCIXenPlatformState *s = opaque;
328 
329     switch (addr) {
330     case 0:
331         /* Platform flags */
332         return s->flags;
333     case 2:
334         /* Version number */
335         return 1;
336     default:
337         return 0xff;
338     }
339 }
340 
341 static void platform_fixed_ioport_reset(void *opaque)
342 {
343     PCIXenPlatformState *s = opaque;
344 
345     platform_fixed_ioport_writeb(s, 0, 0);
346 }
347 
348 static uint64_t platform_fixed_ioport_read(void *opaque,
349                                            hwaddr addr,
350                                            unsigned size)
351 {
352     switch (size) {
353     case 1:
354         return platform_fixed_ioport_readb(opaque, addr);
355     case 2:
356         return platform_fixed_ioport_readw(opaque, addr);
357     default:
358         return -1;
359     }
360 }
361 
362 static void platform_fixed_ioport_write(void *opaque, hwaddr addr,
363 
364                                         uint64_t val, unsigned size)
365 {
366     switch (size) {
367     case 1:
368         platform_fixed_ioport_writeb(opaque, addr, val);
369         break;
370     case 2:
371         platform_fixed_ioport_writew(opaque, addr, val);
372         break;
373     case 4:
374         platform_fixed_ioport_writel(opaque, addr, val);
375         break;
376     }
377 }
378 
379 
380 static const MemoryRegionOps platform_fixed_io_ops = {
381     .read = platform_fixed_ioport_read,
382     .write = platform_fixed_ioport_write,
383     .valid = {
384         .unaligned = true,
385     },
386     .impl = {
387         .min_access_size = 1,
388         .max_access_size = 4,
389         .unaligned = true,
390     },
391     .endianness = DEVICE_LITTLE_ENDIAN,
392 };
393 
394 static void platform_fixed_ioport_init(PCIXenPlatformState* s)
395 {
396     memory_region_init_io(&s->fixed_io, OBJECT(s), &platform_fixed_io_ops, s,
397                           "xen-fixed", 16);
398     memory_region_add_subregion(get_system_io(), XEN_PLATFORM_IOPORT,
399                                 &s->fixed_io);
400 }
401 
402 /* Xen Platform PCI Device */
403 
404 static uint64_t xen_platform_ioport_readb(void *opaque, hwaddr addr,
405                                           unsigned int size)
406 {
407     if (addr == 0) {
408         return platform_fixed_ioport_readb(opaque, 0);
409     } else {
410         return ~0u;
411     }
412 }
413 
414 static void xen_platform_ioport_writeb(void *opaque, hwaddr addr,
415                                        uint64_t val, unsigned int size)
416 {
417     PCIXenPlatformState *s = opaque;
418     PCIDevice *pci_dev = PCI_DEVICE(s);
419 
420     switch (addr) {
421     case 0: /* Platform flags */
422         platform_fixed_ioport_writeb(opaque, 0, (uint32_t)val);
423         break;
424     case 4:
425         if (val == 1) {
426             /*
427              * SUSE unplug for Xenlinux
428              * xen-kmp used this since xen-3.0.4, instead the official protocol
429              * from xen-3.3+ It did an unconditional "outl(1, (ioaddr + 4));"
430              * Pre VMDP 1.7 used 4 and 8 depending on how VMDP was configured.
431              * If VMDP was to control both disk and LAN it would use 4.
432              * If it controlled just disk or just LAN, it would use 8 below.
433              */
434             pci_unplug_disks(pci_get_bus(pci_dev), UNPLUG_IDE_SCSI_DISKS);
435             pci_unplug_nics(pci_get_bus(pci_dev));
436         }
437         break;
438     case 8:
439         switch (val) {
440         case 1:
441             pci_unplug_disks(pci_get_bus(pci_dev), UNPLUG_IDE_SCSI_DISKS);
442             break;
443         case 2:
444             pci_unplug_nics(pci_get_bus(pci_dev));
445             break;
446         default:
447             log_writeb(s, (uint32_t)val);
448             break;
449         }
450         break;
451     default:
452         break;
453     }
454 }
455 
456 static const MemoryRegionOps xen_pci_io_ops = {
457     .read  = xen_platform_ioport_readb,
458     .write = xen_platform_ioport_writeb,
459     .impl.min_access_size = 1,
460     .impl.max_access_size = 1,
461 };
462 
463 static void platform_ioport_bar_setup(PCIXenPlatformState *d)
464 {
465     memory_region_init_io(&d->bar, OBJECT(d), &xen_pci_io_ops, d,
466                           "xen-pci", 0x100);
467 }
468 
469 static uint64_t platform_mmio_read(void *opaque, hwaddr addr,
470                                    unsigned size)
471 {
472     DPRINTF("Warning: attempted read from physical address "
473             "0x" HWADDR_FMT_plx " in xen platform mmio space\n", addr);
474 
475     return 0;
476 }
477 
478 static void platform_mmio_write(void *opaque, hwaddr addr,
479                                 uint64_t val, unsigned size)
480 {
481     DPRINTF("Warning: attempted write of 0x%"PRIx64" to physical "
482             "address 0x" HWADDR_FMT_plx " in xen platform mmio space\n",
483             val, addr);
484 }
485 
486 static const MemoryRegionOps platform_mmio_handler = {
487     .read = &platform_mmio_read,
488     .write = &platform_mmio_write,
489     .endianness = DEVICE_NATIVE_ENDIAN,
490 };
491 
492 static void platform_mmio_setup(PCIXenPlatformState *d)
493 {
494     memory_region_init_io(&d->mmio_bar, OBJECT(d), &platform_mmio_handler, d,
495                           "xen-mmio", 0x1000000);
496 }
497 
498 static int xen_platform_post_load(void *opaque, int version_id)
499 {
500     PCIXenPlatformState *s = opaque;
501 
502     platform_fixed_ioport_writeb(s, 0, s->flags);
503 
504     return 0;
505 }
506 
507 static const VMStateDescription vmstate_xen_platform = {
508     .name = "platform",
509     .version_id = 4,
510     .minimum_version_id = 4,
511     .post_load = xen_platform_post_load,
512     .fields = (VMStateField[]) {
513         VMSTATE_PCI_DEVICE(parent_obj, PCIXenPlatformState),
514         VMSTATE_UINT8(flags, PCIXenPlatformState),
515         VMSTATE_END_OF_LIST()
516     }
517 };
518 
519 static void xen_platform_realize(PCIDevice *dev, Error **errp)
520 {
521     PCIXenPlatformState *d = XEN_PLATFORM(dev);
522     uint8_t *pci_conf;
523 
524     /* Device will crash on reset if xen is not initialized */
525     if (xen_mode == XEN_DISABLED) {
526         error_setg(errp, "xen-platform device requires a Xen guest");
527         return;
528     }
529 
530     pci_conf = dev->config;
531 
532     pci_set_word(pci_conf + PCI_COMMAND, PCI_COMMAND_IO | PCI_COMMAND_MEMORY);
533 
534     pci_config_set_prog_interface(pci_conf, 0);
535 
536     pci_conf[PCI_INTERRUPT_PIN] = 1;
537 
538     platform_ioport_bar_setup(d);
539     pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_IO, &d->bar);
540 
541     /* reserve 16MB mmio address for share memory*/
542     platform_mmio_setup(d);
543     pci_register_bar(dev, 1, PCI_BASE_ADDRESS_MEM_PREFETCH,
544                      &d->mmio_bar);
545 
546     platform_fixed_ioport_init(d);
547 }
548 
549 static void platform_reset(DeviceState *dev)
550 {
551     PCIXenPlatformState *s = XEN_PLATFORM(dev);
552 
553     platform_fixed_ioport_reset(s);
554 }
555 
556 static void xen_platform_class_init(ObjectClass *klass, void *data)
557 {
558     DeviceClass *dc = DEVICE_CLASS(klass);
559     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
560 
561     k->realize = xen_platform_realize;
562     k->vendor_id = PCI_VENDOR_ID_XEN;
563     k->device_id = PCI_DEVICE_ID_XEN_PLATFORM;
564     k->class_id = PCI_CLASS_OTHERS << 8 | 0x80;
565     k->subsystem_vendor_id = PCI_VENDOR_ID_XEN;
566     k->subsystem_id = PCI_DEVICE_ID_XEN_PLATFORM;
567     k->revision = 1;
568     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
569     dc->desc = "XEN platform pci device";
570     dc->reset = platform_reset;
571     dc->vmsd = &vmstate_xen_platform;
572 }
573 
574 static const TypeInfo xen_platform_info = {
575     .name          = TYPE_XEN_PLATFORM,
576     .parent        = TYPE_PCI_DEVICE,
577     .instance_size = sizeof(PCIXenPlatformState),
578     .class_init    = xen_platform_class_init,
579     .interfaces = (InterfaceInfo[]) {
580         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
581         { },
582     },
583 };
584 
585 static void xen_platform_register_types(void)
586 {
587     type_register_static(&xen_platform_info);
588 }
589 
590 type_init(xen_platform_register_types)
591