xref: /openbmc/qemu/hw/scsi/vmw_pvscsi.c (revision 0221d73c)
1 /*
2  * QEMU VMWARE PVSCSI paravirtual SCSI bus
3  *
4  * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
5  *
6  * Developed by Daynix Computing LTD (http://www.daynix.com)
7  *
8  * Based on implementation by Paolo Bonzini
9  * http://lists.gnu.org/archive/html/qemu-devel/2011-08/msg00729.html
10  *
11  * Authors:
12  * Paolo Bonzini <pbonzini@redhat.com>
13  * Dmitry Fleytman <dmitry@daynix.com>
14  * Yan Vugenfirer <yan@daynix.com>
15  *
16  * This work is licensed under the terms of the GNU GPL, version 2.
17  * See the COPYING file in the top-level directory.
18  *
19  * NOTE about MSI-X:
20  * MSI-X support has been removed for the moment because it leads Windows OS
21  * to crash on startup. The crash happens because Windows driver requires
22  * MSI-X shared memory to be part of the same BAR used for rings state
23  * registers, etc. This is not supported by QEMU infrastructure so separate
24  * BAR created from MSI-X purposes. Windows driver fails to deal with 2 BARs.
25  *
26  */
27 
28 #include "qemu/osdep.h"
29 #include "qapi/error.h"
30 #include "qemu/main-loop.h"
31 #include "qemu/module.h"
32 #include "hw/scsi/scsi.h"
33 #include "migration/vmstate.h"
34 #include "scsi/constants.h"
35 #include "hw/pci/msi.h"
36 #include "hw/qdev-properties.h"
37 #include "vmw_pvscsi.h"
38 #include "trace.h"
39 
40 
41 #define PVSCSI_USE_64BIT         (true)
42 #define PVSCSI_PER_VECTOR_MASK   (false)
43 
44 #define PVSCSI_MAX_DEVS                   (64)
45 #define PVSCSI_MSIX_NUM_VECTORS           (1)
46 
47 #define PVSCSI_MAX_SG_ELEM                2048
48 
49 #define PVSCSI_MAX_CMD_DATA_WORDS \
50     (sizeof(PVSCSICmdDescSetupRings)/sizeof(uint32_t))
51 
52 #define RS_GET_FIELD(m, field) \
53     (ldl_le_pci_dma(&container_of(m, PVSCSIState, rings)->parent_obj, \
54                  (m)->rs_pa + offsetof(struct PVSCSIRingsState, field)))
55 #define RS_SET_FIELD(m, field, val) \
56     (stl_le_pci_dma(&container_of(m, PVSCSIState, rings)->parent_obj, \
57                  (m)->rs_pa + offsetof(struct PVSCSIRingsState, field), val))
58 
59 typedef struct PVSCSIClass {
60     PCIDeviceClass parent_class;
61     DeviceRealize parent_dc_realize;
62 } PVSCSIClass;
63 
64 #define TYPE_PVSCSI "pvscsi"
65 #define PVSCSI(obj) OBJECT_CHECK(PVSCSIState, (obj), TYPE_PVSCSI)
66 
67 #define PVSCSI_DEVICE_CLASS(klass) \
68     OBJECT_CLASS_CHECK(PVSCSIClass, (klass), TYPE_PVSCSI)
69 #define PVSCSI_DEVICE_GET_CLASS(obj) \
70     OBJECT_GET_CLASS(PVSCSIClass, (obj), TYPE_PVSCSI)
71 
72 /* Compatibility flags for migration */
73 #define PVSCSI_COMPAT_OLD_PCI_CONFIGURATION_BIT 0
74 #define PVSCSI_COMPAT_OLD_PCI_CONFIGURATION \
75     (1 << PVSCSI_COMPAT_OLD_PCI_CONFIGURATION_BIT)
76 #define PVSCSI_COMPAT_DISABLE_PCIE_BIT 1
77 #define PVSCSI_COMPAT_DISABLE_PCIE \
78     (1 << PVSCSI_COMPAT_DISABLE_PCIE_BIT)
79 
80 #define PVSCSI_USE_OLD_PCI_CONFIGURATION(s) \
81     ((s)->compat_flags & PVSCSI_COMPAT_OLD_PCI_CONFIGURATION)
82 #define PVSCSI_MSI_OFFSET(s) \
83     (PVSCSI_USE_OLD_PCI_CONFIGURATION(s) ? 0x50 : 0x7c)
84 #define PVSCSI_EXP_EP_OFFSET (0x40)
85 
86 typedef struct PVSCSIRingInfo {
87     uint64_t            rs_pa;
88     uint32_t            txr_len_mask;
89     uint32_t            rxr_len_mask;
90     uint32_t            msg_len_mask;
91     uint64_t            req_ring_pages_pa[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES];
92     uint64_t            cmp_ring_pages_pa[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES];
93     uint64_t            msg_ring_pages_pa[PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES];
94     uint64_t            consumed_ptr;
95     uint64_t            filled_cmp_ptr;
96     uint64_t            filled_msg_ptr;
97 } PVSCSIRingInfo;
98 
99 typedef struct PVSCSISGState {
100     hwaddr elemAddr;
101     hwaddr dataAddr;
102     uint32_t resid;
103 } PVSCSISGState;
104 
105 typedef QTAILQ_HEAD(, PVSCSIRequest) PVSCSIRequestList;
106 
107 typedef struct {
108     PCIDevice parent_obj;
109     MemoryRegion io_space;
110     SCSIBus bus;
111     QEMUBH *completion_worker;
112     PVSCSIRequestList pending_queue;
113     PVSCSIRequestList completion_queue;
114 
115     uint64_t reg_interrupt_status;        /* Interrupt status register value */
116     uint64_t reg_interrupt_enabled;       /* Interrupt mask register value   */
117     uint64_t reg_command_status;          /* Command status register value   */
118 
119     /* Command data adoption mechanism */
120     uint64_t curr_cmd;                   /* Last command arrived             */
121     uint32_t curr_cmd_data_cntr;         /* Amount of data for last command  */
122 
123     /* Collector for current command data */
124     uint32_t curr_cmd_data[PVSCSI_MAX_CMD_DATA_WORDS];
125 
126     uint8_t rings_info_valid;            /* Whether data rings initialized   */
127     uint8_t msg_ring_info_valid;         /* Whether message ring initialized */
128     uint8_t use_msg;                     /* Whether to use message ring      */
129 
130     uint8_t msi_used;                    /* For migration compatibility      */
131     PVSCSIRingInfo rings;                /* Data transfer rings manager      */
132     uint32_t resetting;                  /* Reset in progress                */
133 
134     uint32_t compat_flags;
135 } PVSCSIState;
136 
137 typedef struct PVSCSIRequest {
138     SCSIRequest *sreq;
139     PVSCSIState *dev;
140     uint8_t sense_key;
141     uint8_t completed;
142     int lun;
143     QEMUSGList sgl;
144     PVSCSISGState sg;
145     struct PVSCSIRingReqDesc req;
146     struct PVSCSIRingCmpDesc cmp;
147     QTAILQ_ENTRY(PVSCSIRequest) next;
148 } PVSCSIRequest;
149 
150 /* Integer binary logarithm */
151 static int
152 pvscsi_log2(uint32_t input)
153 {
154     int log = 0;
155     assert(input > 0);
156     while (input >> ++log) {
157     }
158     return log;
159 }
160 
161 static void
162 pvscsi_ring_init_data(PVSCSIRingInfo *m, PVSCSICmdDescSetupRings *ri)
163 {
164     int i;
165     uint32_t txr_len_log2, rxr_len_log2;
166     uint32_t req_ring_size, cmp_ring_size;
167     m->rs_pa = ri->ringsStatePPN << VMW_PAGE_SHIFT;
168 
169     req_ring_size = ri->reqRingNumPages * PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
170     cmp_ring_size = ri->cmpRingNumPages * PVSCSI_MAX_NUM_CMP_ENTRIES_PER_PAGE;
171     txr_len_log2 = pvscsi_log2(req_ring_size - 1);
172     rxr_len_log2 = pvscsi_log2(cmp_ring_size - 1);
173 
174     m->txr_len_mask = MASK(txr_len_log2);
175     m->rxr_len_mask = MASK(rxr_len_log2);
176 
177     m->consumed_ptr = 0;
178     m->filled_cmp_ptr = 0;
179 
180     for (i = 0; i < ri->reqRingNumPages; i++) {
181         m->req_ring_pages_pa[i] = ri->reqRingPPNs[i] << VMW_PAGE_SHIFT;
182     }
183 
184     for (i = 0; i < ri->cmpRingNumPages; i++) {
185         m->cmp_ring_pages_pa[i] = ri->cmpRingPPNs[i] << VMW_PAGE_SHIFT;
186     }
187 
188     RS_SET_FIELD(m, reqProdIdx, 0);
189     RS_SET_FIELD(m, reqConsIdx, 0);
190     RS_SET_FIELD(m, reqNumEntriesLog2, txr_len_log2);
191 
192     RS_SET_FIELD(m, cmpProdIdx, 0);
193     RS_SET_FIELD(m, cmpConsIdx, 0);
194     RS_SET_FIELD(m, cmpNumEntriesLog2, rxr_len_log2);
195 
196     trace_pvscsi_ring_init_data(txr_len_log2, rxr_len_log2);
197 
198     /* Flush ring state page changes */
199     smp_wmb();
200 }
201 
202 static int
203 pvscsi_ring_init_msg(PVSCSIRingInfo *m, PVSCSICmdDescSetupMsgRing *ri)
204 {
205     int i;
206     uint32_t len_log2;
207     uint32_t ring_size;
208 
209     if (!ri->numPages || ri->numPages > PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES) {
210         return -1;
211     }
212     ring_size = ri->numPages * PVSCSI_MAX_NUM_MSG_ENTRIES_PER_PAGE;
213     len_log2 = pvscsi_log2(ring_size - 1);
214 
215     m->msg_len_mask = MASK(len_log2);
216 
217     m->filled_msg_ptr = 0;
218 
219     for (i = 0; i < ri->numPages; i++) {
220         m->msg_ring_pages_pa[i] = ri->ringPPNs[i] << VMW_PAGE_SHIFT;
221     }
222 
223     RS_SET_FIELD(m, msgProdIdx, 0);
224     RS_SET_FIELD(m, msgConsIdx, 0);
225     RS_SET_FIELD(m, msgNumEntriesLog2, len_log2);
226 
227     trace_pvscsi_ring_init_msg(len_log2);
228 
229     /* Flush ring state page changes */
230     smp_wmb();
231 
232     return 0;
233 }
234 
235 static void
236 pvscsi_ring_cleanup(PVSCSIRingInfo *mgr)
237 {
238     mgr->rs_pa = 0;
239     mgr->txr_len_mask = 0;
240     mgr->rxr_len_mask = 0;
241     mgr->msg_len_mask = 0;
242     mgr->consumed_ptr = 0;
243     mgr->filled_cmp_ptr = 0;
244     mgr->filled_msg_ptr = 0;
245     memset(mgr->req_ring_pages_pa, 0, sizeof(mgr->req_ring_pages_pa));
246     memset(mgr->cmp_ring_pages_pa, 0, sizeof(mgr->cmp_ring_pages_pa));
247     memset(mgr->msg_ring_pages_pa, 0, sizeof(mgr->msg_ring_pages_pa));
248 }
249 
250 static hwaddr
251 pvscsi_ring_pop_req_descr(PVSCSIRingInfo *mgr)
252 {
253     uint32_t ready_ptr = RS_GET_FIELD(mgr, reqProdIdx);
254     uint32_t ring_size = PVSCSI_MAX_NUM_PAGES_REQ_RING
255                             * PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
256 
257     if (ready_ptr != mgr->consumed_ptr
258         && ready_ptr - mgr->consumed_ptr < ring_size) {
259         uint32_t next_ready_ptr =
260             mgr->consumed_ptr++ & mgr->txr_len_mask;
261         uint32_t next_ready_page =
262             next_ready_ptr / PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
263         uint32_t inpage_idx =
264             next_ready_ptr % PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
265 
266         return mgr->req_ring_pages_pa[next_ready_page] +
267                inpage_idx * sizeof(PVSCSIRingReqDesc);
268     } else {
269         return 0;
270     }
271 }
272 
273 static void
274 pvscsi_ring_flush_req(PVSCSIRingInfo *mgr)
275 {
276     RS_SET_FIELD(mgr, reqConsIdx, mgr->consumed_ptr);
277 }
278 
279 static hwaddr
280 pvscsi_ring_pop_cmp_descr(PVSCSIRingInfo *mgr)
281 {
282     /*
283      * According to Linux driver code it explicitly verifies that number
284      * of requests being processed by device is less then the size of
285      * completion queue, so device may omit completion queue overflow
286      * conditions check. We assume that this is true for other (Windows)
287      * drivers as well.
288      */
289 
290     uint32_t free_cmp_ptr =
291         mgr->filled_cmp_ptr++ & mgr->rxr_len_mask;
292     uint32_t free_cmp_page =
293         free_cmp_ptr / PVSCSI_MAX_NUM_CMP_ENTRIES_PER_PAGE;
294     uint32_t inpage_idx =
295         free_cmp_ptr % PVSCSI_MAX_NUM_CMP_ENTRIES_PER_PAGE;
296     return mgr->cmp_ring_pages_pa[free_cmp_page] +
297            inpage_idx * sizeof(PVSCSIRingCmpDesc);
298 }
299 
300 static hwaddr
301 pvscsi_ring_pop_msg_descr(PVSCSIRingInfo *mgr)
302 {
303     uint32_t free_msg_ptr =
304         mgr->filled_msg_ptr++ & mgr->msg_len_mask;
305     uint32_t free_msg_page =
306         free_msg_ptr / PVSCSI_MAX_NUM_MSG_ENTRIES_PER_PAGE;
307     uint32_t inpage_idx =
308         free_msg_ptr % PVSCSI_MAX_NUM_MSG_ENTRIES_PER_PAGE;
309     return mgr->msg_ring_pages_pa[free_msg_page] +
310            inpage_idx * sizeof(PVSCSIRingMsgDesc);
311 }
312 
313 static void
314 pvscsi_ring_flush_cmp(PVSCSIRingInfo *mgr)
315 {
316     /* Flush descriptor changes */
317     smp_wmb();
318 
319     trace_pvscsi_ring_flush_cmp(mgr->filled_cmp_ptr);
320 
321     RS_SET_FIELD(mgr, cmpProdIdx, mgr->filled_cmp_ptr);
322 }
323 
324 static bool
325 pvscsi_ring_msg_has_room(PVSCSIRingInfo *mgr)
326 {
327     uint32_t prodIdx = RS_GET_FIELD(mgr, msgProdIdx);
328     uint32_t consIdx = RS_GET_FIELD(mgr, msgConsIdx);
329 
330     return (prodIdx - consIdx) < (mgr->msg_len_mask + 1);
331 }
332 
333 static void
334 pvscsi_ring_flush_msg(PVSCSIRingInfo *mgr)
335 {
336     /* Flush descriptor changes */
337     smp_wmb();
338 
339     trace_pvscsi_ring_flush_msg(mgr->filled_msg_ptr);
340 
341     RS_SET_FIELD(mgr, msgProdIdx, mgr->filled_msg_ptr);
342 }
343 
344 static void
345 pvscsi_reset_state(PVSCSIState *s)
346 {
347     s->curr_cmd = PVSCSI_CMD_FIRST;
348     s->curr_cmd_data_cntr = 0;
349     s->reg_command_status = PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
350     s->reg_interrupt_status = 0;
351     pvscsi_ring_cleanup(&s->rings);
352     s->rings_info_valid = FALSE;
353     s->msg_ring_info_valid = FALSE;
354     QTAILQ_INIT(&s->pending_queue);
355     QTAILQ_INIT(&s->completion_queue);
356 }
357 
358 static void
359 pvscsi_update_irq_status(PVSCSIState *s)
360 {
361     PCIDevice *d = PCI_DEVICE(s);
362     bool should_raise = s->reg_interrupt_enabled & s->reg_interrupt_status;
363 
364     trace_pvscsi_update_irq_level(should_raise, s->reg_interrupt_enabled,
365                                   s->reg_interrupt_status);
366 
367     if (msi_enabled(d)) {
368         if (should_raise) {
369             trace_pvscsi_update_irq_msi();
370             msi_notify(d, PVSCSI_VECTOR_COMPLETION);
371         }
372         return;
373     }
374 
375     pci_set_irq(d, !!should_raise);
376 }
377 
378 static void
379 pvscsi_raise_completion_interrupt(PVSCSIState *s)
380 {
381     s->reg_interrupt_status |= PVSCSI_INTR_CMPL_0;
382 
383     /* Memory barrier to flush interrupt status register changes*/
384     smp_wmb();
385 
386     pvscsi_update_irq_status(s);
387 }
388 
389 static void
390 pvscsi_raise_message_interrupt(PVSCSIState *s)
391 {
392     s->reg_interrupt_status |= PVSCSI_INTR_MSG_0;
393 
394     /* Memory barrier to flush interrupt status register changes*/
395     smp_wmb();
396 
397     pvscsi_update_irq_status(s);
398 }
399 
400 static void
401 pvscsi_cmp_ring_put(PVSCSIState *s, struct PVSCSIRingCmpDesc *cmp_desc)
402 {
403     hwaddr cmp_descr_pa;
404 
405     cmp_descr_pa = pvscsi_ring_pop_cmp_descr(&s->rings);
406     trace_pvscsi_cmp_ring_put(cmp_descr_pa);
407     cpu_physical_memory_write(cmp_descr_pa, (void *)cmp_desc,
408                               sizeof(*cmp_desc));
409 }
410 
411 static void
412 pvscsi_msg_ring_put(PVSCSIState *s, struct PVSCSIRingMsgDesc *msg_desc)
413 {
414     hwaddr msg_descr_pa;
415 
416     msg_descr_pa = pvscsi_ring_pop_msg_descr(&s->rings);
417     trace_pvscsi_msg_ring_put(msg_descr_pa);
418     cpu_physical_memory_write(msg_descr_pa, (void *)msg_desc,
419                               sizeof(*msg_desc));
420 }
421 
422 static void
423 pvscsi_process_completion_queue(void *opaque)
424 {
425     PVSCSIState *s = opaque;
426     PVSCSIRequest *pvscsi_req;
427     bool has_completed = false;
428 
429     while (!QTAILQ_EMPTY(&s->completion_queue)) {
430         pvscsi_req = QTAILQ_FIRST(&s->completion_queue);
431         QTAILQ_REMOVE(&s->completion_queue, pvscsi_req, next);
432         pvscsi_cmp_ring_put(s, &pvscsi_req->cmp);
433         g_free(pvscsi_req);
434         has_completed = true;
435     }
436 
437     if (has_completed) {
438         pvscsi_ring_flush_cmp(&s->rings);
439         pvscsi_raise_completion_interrupt(s);
440     }
441 }
442 
443 static void
444 pvscsi_reset_adapter(PVSCSIState *s)
445 {
446     s->resetting++;
447     qbus_reset_all(BUS(&s->bus));
448     s->resetting--;
449     pvscsi_process_completion_queue(s);
450     assert(QTAILQ_EMPTY(&s->pending_queue));
451     pvscsi_reset_state(s);
452 }
453 
454 static void
455 pvscsi_schedule_completion_processing(PVSCSIState *s)
456 {
457     /* Try putting more complete requests on the ring. */
458     if (!QTAILQ_EMPTY(&s->completion_queue)) {
459         qemu_bh_schedule(s->completion_worker);
460     }
461 }
462 
463 static void
464 pvscsi_complete_request(PVSCSIState *s, PVSCSIRequest *r)
465 {
466     assert(!r->completed);
467 
468     trace_pvscsi_complete_request(r->cmp.context, r->cmp.dataLen,
469                                   r->sense_key);
470     if (r->sreq != NULL) {
471         scsi_req_unref(r->sreq);
472         r->sreq = NULL;
473     }
474     r->completed = 1;
475     QTAILQ_REMOVE(&s->pending_queue, r, next);
476     QTAILQ_INSERT_TAIL(&s->completion_queue, r, next);
477     pvscsi_schedule_completion_processing(s);
478 }
479 
480 static QEMUSGList *pvscsi_get_sg_list(SCSIRequest *r)
481 {
482     PVSCSIRequest *req = r->hba_private;
483 
484     trace_pvscsi_get_sg_list(req->sgl.nsg, req->sgl.size);
485 
486     return &req->sgl;
487 }
488 
489 static void
490 pvscsi_get_next_sg_elem(PVSCSISGState *sg)
491 {
492     struct PVSCSISGElement elem;
493 
494     cpu_physical_memory_read(sg->elemAddr, (void *)&elem, sizeof(elem));
495     if ((elem.flags & ~PVSCSI_KNOWN_FLAGS) != 0) {
496         /*
497             * There is PVSCSI_SGE_FLAG_CHAIN_ELEMENT flag described in
498             * header file but its value is unknown. This flag requires
499             * additional processing, so we put warning here to catch it
500             * some day and make proper implementation
501             */
502         trace_pvscsi_get_next_sg_elem(elem.flags);
503     }
504 
505     sg->elemAddr += sizeof(elem);
506     sg->dataAddr = elem.addr;
507     sg->resid = elem.length;
508 }
509 
510 static void
511 pvscsi_write_sense(PVSCSIRequest *r, uint8_t *sense, int len)
512 {
513     r->cmp.senseLen = MIN(r->req.senseLen, len);
514     r->sense_key = sense[(sense[0] & 2) ? 1 : 2];
515     cpu_physical_memory_write(r->req.senseAddr, sense, r->cmp.senseLen);
516 }
517 
518 static void
519 pvscsi_command_complete(SCSIRequest *req, uint32_t status, size_t resid)
520 {
521     PVSCSIRequest *pvscsi_req = req->hba_private;
522     PVSCSIState *s;
523 
524     if (!pvscsi_req) {
525         trace_pvscsi_command_complete_not_found(req->tag);
526         return;
527     }
528     s = pvscsi_req->dev;
529 
530     if (resid) {
531         /* Short transfer.  */
532         trace_pvscsi_command_complete_data_run();
533         pvscsi_req->cmp.hostStatus = BTSTAT_DATARUN;
534     }
535 
536     pvscsi_req->cmp.scsiStatus = status;
537     if (pvscsi_req->cmp.scsiStatus == CHECK_CONDITION) {
538         uint8_t sense[SCSI_SENSE_BUF_SIZE];
539         int sense_len =
540             scsi_req_get_sense(pvscsi_req->sreq, sense, sizeof(sense));
541 
542         trace_pvscsi_command_complete_sense_len(sense_len);
543         pvscsi_write_sense(pvscsi_req, sense, sense_len);
544     }
545     qemu_sglist_destroy(&pvscsi_req->sgl);
546     pvscsi_complete_request(s, pvscsi_req);
547 }
548 
549 static void
550 pvscsi_send_msg(PVSCSIState *s, SCSIDevice *dev, uint32_t msg_type)
551 {
552     if (s->msg_ring_info_valid && pvscsi_ring_msg_has_room(&s->rings)) {
553         PVSCSIMsgDescDevStatusChanged msg = {0};
554 
555         msg.type = msg_type;
556         msg.bus = dev->channel;
557         msg.target = dev->id;
558         msg.lun[1] = dev->lun;
559 
560         pvscsi_msg_ring_put(s, (PVSCSIRingMsgDesc *)&msg);
561         pvscsi_ring_flush_msg(&s->rings);
562         pvscsi_raise_message_interrupt(s);
563     }
564 }
565 
566 static void
567 pvscsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp)
568 {
569     PVSCSIState *s = PVSCSI(hotplug_dev);
570 
571     pvscsi_send_msg(s, SCSI_DEVICE(dev), PVSCSI_MSG_DEV_ADDED);
572 }
573 
574 static void
575 pvscsi_hot_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp)
576 {
577     PVSCSIState *s = PVSCSI(hotplug_dev);
578 
579     pvscsi_send_msg(s, SCSI_DEVICE(dev), PVSCSI_MSG_DEV_REMOVED);
580     qdev_simple_device_unplug_cb(hotplug_dev, dev, errp);
581 }
582 
583 static void
584 pvscsi_request_cancelled(SCSIRequest *req)
585 {
586     PVSCSIRequest *pvscsi_req = req->hba_private;
587     PVSCSIState *s = pvscsi_req->dev;
588 
589     if (pvscsi_req->completed) {
590         return;
591     }
592 
593    if (pvscsi_req->dev->resetting) {
594        pvscsi_req->cmp.hostStatus = BTSTAT_BUSRESET;
595     } else {
596        pvscsi_req->cmp.hostStatus = BTSTAT_ABORTQUEUE;
597     }
598 
599     pvscsi_complete_request(s, pvscsi_req);
600 }
601 
602 static SCSIDevice*
603 pvscsi_device_find(PVSCSIState *s, int channel, int target,
604                    uint8_t *requested_lun, uint8_t *target_lun)
605 {
606     if (requested_lun[0] || requested_lun[2] || requested_lun[3] ||
607         requested_lun[4] || requested_lun[5] || requested_lun[6] ||
608         requested_lun[7] || (target > PVSCSI_MAX_DEVS)) {
609         return NULL;
610     } else {
611         *target_lun = requested_lun[1];
612         return scsi_device_find(&s->bus, channel, target, *target_lun);
613     }
614 }
615 
616 static PVSCSIRequest *
617 pvscsi_queue_pending_descriptor(PVSCSIState *s, SCSIDevice **d,
618                                 struct PVSCSIRingReqDesc *descr)
619 {
620     PVSCSIRequest *pvscsi_req;
621     uint8_t lun;
622 
623     pvscsi_req = g_malloc0(sizeof(*pvscsi_req));
624     pvscsi_req->dev = s;
625     pvscsi_req->req = *descr;
626     pvscsi_req->cmp.context = pvscsi_req->req.context;
627     QTAILQ_INSERT_TAIL(&s->pending_queue, pvscsi_req, next);
628 
629     *d = pvscsi_device_find(s, descr->bus, descr->target, descr->lun, &lun);
630     if (*d) {
631         pvscsi_req->lun = lun;
632     }
633 
634     return pvscsi_req;
635 }
636 
637 static void
638 pvscsi_convert_sglist(PVSCSIRequest *r)
639 {
640     uint32_t chunk_size, elmcnt = 0;
641     uint64_t data_length = r->req.dataLen;
642     PVSCSISGState sg = r->sg;
643     while (data_length && elmcnt < PVSCSI_MAX_SG_ELEM) {
644         while (!sg.resid && elmcnt++ < PVSCSI_MAX_SG_ELEM) {
645             pvscsi_get_next_sg_elem(&sg);
646             trace_pvscsi_convert_sglist(r->req.context, r->sg.dataAddr,
647                                         r->sg.resid);
648         }
649         chunk_size = MIN(data_length, sg.resid);
650         if (chunk_size) {
651             qemu_sglist_add(&r->sgl, sg.dataAddr, chunk_size);
652         }
653 
654         sg.dataAddr += chunk_size;
655         data_length -= chunk_size;
656         sg.resid -= chunk_size;
657     }
658 }
659 
660 static void
661 pvscsi_build_sglist(PVSCSIState *s, PVSCSIRequest *r)
662 {
663     PCIDevice *d = PCI_DEVICE(s);
664 
665     pci_dma_sglist_init(&r->sgl, d, 1);
666     if (r->req.flags & PVSCSI_FLAG_CMD_WITH_SG_LIST) {
667         pvscsi_convert_sglist(r);
668     } else {
669         qemu_sglist_add(&r->sgl, r->req.dataAddr, r->req.dataLen);
670     }
671 }
672 
673 static void
674 pvscsi_process_request_descriptor(PVSCSIState *s,
675                                   struct PVSCSIRingReqDesc *descr)
676 {
677     SCSIDevice *d;
678     PVSCSIRequest *r = pvscsi_queue_pending_descriptor(s, &d, descr);
679     int64_t n;
680 
681     trace_pvscsi_process_req_descr(descr->cdb[0], descr->context);
682 
683     if (!d) {
684         r->cmp.hostStatus = BTSTAT_SELTIMEO;
685         trace_pvscsi_process_req_descr_unknown_device();
686         pvscsi_complete_request(s, r);
687         return;
688     }
689 
690     if (descr->flags & PVSCSI_FLAG_CMD_WITH_SG_LIST) {
691         r->sg.elemAddr = descr->dataAddr;
692     }
693 
694     r->sreq = scsi_req_new(d, descr->context, r->lun, descr->cdb, r);
695     if (r->sreq->cmd.mode == SCSI_XFER_FROM_DEV &&
696         (descr->flags & PVSCSI_FLAG_CMD_DIR_TODEVICE)) {
697         r->cmp.hostStatus = BTSTAT_BADMSG;
698         trace_pvscsi_process_req_descr_invalid_dir();
699         scsi_req_cancel(r->sreq);
700         return;
701     }
702     if (r->sreq->cmd.mode == SCSI_XFER_TO_DEV &&
703         (descr->flags & PVSCSI_FLAG_CMD_DIR_TOHOST)) {
704         r->cmp.hostStatus = BTSTAT_BADMSG;
705         trace_pvscsi_process_req_descr_invalid_dir();
706         scsi_req_cancel(r->sreq);
707         return;
708     }
709 
710     pvscsi_build_sglist(s, r);
711     n = scsi_req_enqueue(r->sreq);
712 
713     if (n) {
714         scsi_req_continue(r->sreq);
715     }
716 }
717 
718 static void
719 pvscsi_process_io(PVSCSIState *s)
720 {
721     PVSCSIRingReqDesc descr;
722     hwaddr next_descr_pa;
723 
724     assert(s->rings_info_valid);
725     while ((next_descr_pa = pvscsi_ring_pop_req_descr(&s->rings)) != 0) {
726 
727         /* Only read after production index verification */
728         smp_rmb();
729 
730         trace_pvscsi_process_io(next_descr_pa);
731         cpu_physical_memory_read(next_descr_pa, &descr, sizeof(descr));
732         pvscsi_process_request_descriptor(s, &descr);
733     }
734 
735     pvscsi_ring_flush_req(&s->rings);
736 }
737 
738 static void
739 pvscsi_dbg_dump_tx_rings_config(PVSCSICmdDescSetupRings *rc)
740 {
741     int i;
742     trace_pvscsi_tx_rings_ppn("Rings State", rc->ringsStatePPN);
743 
744     trace_pvscsi_tx_rings_num_pages("Request Ring", rc->reqRingNumPages);
745     for (i = 0; i < rc->reqRingNumPages; i++) {
746         trace_pvscsi_tx_rings_ppn("Request Ring", rc->reqRingPPNs[i]);
747     }
748 
749     trace_pvscsi_tx_rings_num_pages("Confirm Ring", rc->cmpRingNumPages);
750     for (i = 0; i < rc->cmpRingNumPages; i++) {
751         trace_pvscsi_tx_rings_ppn("Confirm Ring", rc->cmpRingPPNs[i]);
752     }
753 }
754 
755 static uint64_t
756 pvscsi_on_cmd_config(PVSCSIState *s)
757 {
758     trace_pvscsi_on_cmd_noimpl("PVSCSI_CMD_CONFIG");
759     return PVSCSI_COMMAND_PROCESSING_FAILED;
760 }
761 
762 static uint64_t
763 pvscsi_on_cmd_unplug(PVSCSIState *s)
764 {
765     trace_pvscsi_on_cmd_noimpl("PVSCSI_CMD_DEVICE_UNPLUG");
766     return PVSCSI_COMMAND_PROCESSING_FAILED;
767 }
768 
769 static uint64_t
770 pvscsi_on_issue_scsi(PVSCSIState *s)
771 {
772     trace_pvscsi_on_cmd_noimpl("PVSCSI_CMD_ISSUE_SCSI");
773     return PVSCSI_COMMAND_PROCESSING_FAILED;
774 }
775 
776 static uint64_t
777 pvscsi_on_cmd_setup_rings(PVSCSIState *s)
778 {
779     PVSCSICmdDescSetupRings *rc =
780         (PVSCSICmdDescSetupRings *) s->curr_cmd_data;
781 
782     trace_pvscsi_on_cmd_arrived("PVSCSI_CMD_SETUP_RINGS");
783 
784     if (!rc->reqRingNumPages
785         || rc->reqRingNumPages > PVSCSI_SETUP_RINGS_MAX_NUM_PAGES
786         || !rc->cmpRingNumPages
787         || rc->cmpRingNumPages > PVSCSI_SETUP_RINGS_MAX_NUM_PAGES) {
788         return PVSCSI_COMMAND_PROCESSING_FAILED;
789     }
790 
791     pvscsi_dbg_dump_tx_rings_config(rc);
792     pvscsi_ring_init_data(&s->rings, rc);
793 
794     s->rings_info_valid = TRUE;
795     return PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
796 }
797 
798 static uint64_t
799 pvscsi_on_cmd_abort(PVSCSIState *s)
800 {
801     PVSCSICmdDescAbortCmd *cmd = (PVSCSICmdDescAbortCmd *) s->curr_cmd_data;
802     PVSCSIRequest *r, *next;
803 
804     trace_pvscsi_on_cmd_abort(cmd->context, cmd->target);
805 
806     QTAILQ_FOREACH_SAFE(r, &s->pending_queue, next, next) {
807         if (r->req.context == cmd->context) {
808             break;
809         }
810     }
811     if (r) {
812         assert(!r->completed);
813         r->cmp.hostStatus = BTSTAT_ABORTQUEUE;
814         scsi_req_cancel(r->sreq);
815     }
816 
817     return PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
818 }
819 
820 static uint64_t
821 pvscsi_on_cmd_unknown(PVSCSIState *s)
822 {
823     trace_pvscsi_on_cmd_unknown_data(s->curr_cmd_data[0]);
824     return PVSCSI_COMMAND_PROCESSING_FAILED;
825 }
826 
827 static uint64_t
828 pvscsi_on_cmd_reset_device(PVSCSIState *s)
829 {
830     uint8_t target_lun = 0;
831     struct PVSCSICmdDescResetDevice *cmd =
832         (struct PVSCSICmdDescResetDevice *) s->curr_cmd_data;
833     SCSIDevice *sdev;
834 
835     sdev = pvscsi_device_find(s, 0, cmd->target, cmd->lun, &target_lun);
836 
837     trace_pvscsi_on_cmd_reset_dev(cmd->target, (int) target_lun, sdev);
838 
839     if (sdev != NULL) {
840         s->resetting++;
841         device_reset(&sdev->qdev);
842         s->resetting--;
843         return PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
844     }
845 
846     return PVSCSI_COMMAND_PROCESSING_FAILED;
847 }
848 
849 static uint64_t
850 pvscsi_on_cmd_reset_bus(PVSCSIState *s)
851 {
852     trace_pvscsi_on_cmd_arrived("PVSCSI_CMD_RESET_BUS");
853 
854     s->resetting++;
855     qbus_reset_all(BUS(&s->bus));
856     s->resetting--;
857     return PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
858 }
859 
860 static uint64_t
861 pvscsi_on_cmd_setup_msg_ring(PVSCSIState *s)
862 {
863     PVSCSICmdDescSetupMsgRing *rc =
864         (PVSCSICmdDescSetupMsgRing *) s->curr_cmd_data;
865 
866     trace_pvscsi_on_cmd_arrived("PVSCSI_CMD_SETUP_MSG_RING");
867 
868     if (!s->use_msg) {
869         return PVSCSI_COMMAND_PROCESSING_FAILED;
870     }
871 
872     if (s->rings_info_valid) {
873         if (pvscsi_ring_init_msg(&s->rings, rc) < 0) {
874             return PVSCSI_COMMAND_PROCESSING_FAILED;
875         }
876         s->msg_ring_info_valid = TRUE;
877     }
878     return sizeof(PVSCSICmdDescSetupMsgRing) / sizeof(uint32_t);
879 }
880 
881 static uint64_t
882 pvscsi_on_cmd_adapter_reset(PVSCSIState *s)
883 {
884     trace_pvscsi_on_cmd_arrived("PVSCSI_CMD_ADAPTER_RESET");
885 
886     pvscsi_reset_adapter(s);
887     return PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
888 }
889 
890 static const struct {
891     int       data_size;
892     uint64_t  (*handler_fn)(PVSCSIState *s);
893 } pvscsi_commands[] = {
894     [PVSCSI_CMD_FIRST] = {
895         .data_size = 0,
896         .handler_fn = pvscsi_on_cmd_unknown,
897     },
898 
899     /* Not implemented, data size defined based on what arrives on windows */
900     [PVSCSI_CMD_CONFIG] = {
901         .data_size = 6 * sizeof(uint32_t),
902         .handler_fn = pvscsi_on_cmd_config,
903     },
904 
905     /* Command not implemented, data size is unknown */
906     [PVSCSI_CMD_ISSUE_SCSI] = {
907         .data_size = 0,
908         .handler_fn = pvscsi_on_issue_scsi,
909     },
910 
911     /* Command not implemented, data size is unknown */
912     [PVSCSI_CMD_DEVICE_UNPLUG] = {
913         .data_size = 0,
914         .handler_fn = pvscsi_on_cmd_unplug,
915     },
916 
917     [PVSCSI_CMD_SETUP_RINGS] = {
918         .data_size = sizeof(PVSCSICmdDescSetupRings),
919         .handler_fn = pvscsi_on_cmd_setup_rings,
920     },
921 
922     [PVSCSI_CMD_RESET_DEVICE] = {
923         .data_size = sizeof(struct PVSCSICmdDescResetDevice),
924         .handler_fn = pvscsi_on_cmd_reset_device,
925     },
926 
927     [PVSCSI_CMD_RESET_BUS] = {
928         .data_size = 0,
929         .handler_fn = pvscsi_on_cmd_reset_bus,
930     },
931 
932     [PVSCSI_CMD_SETUP_MSG_RING] = {
933         .data_size = sizeof(PVSCSICmdDescSetupMsgRing),
934         .handler_fn = pvscsi_on_cmd_setup_msg_ring,
935     },
936 
937     [PVSCSI_CMD_ADAPTER_RESET] = {
938         .data_size = 0,
939         .handler_fn = pvscsi_on_cmd_adapter_reset,
940     },
941 
942     [PVSCSI_CMD_ABORT_CMD] = {
943         .data_size = sizeof(struct PVSCSICmdDescAbortCmd),
944         .handler_fn = pvscsi_on_cmd_abort,
945     },
946 };
947 
948 static void
949 pvscsi_do_command_processing(PVSCSIState *s)
950 {
951     size_t bytes_arrived = s->curr_cmd_data_cntr * sizeof(uint32_t);
952 
953     assert(s->curr_cmd < PVSCSI_CMD_LAST);
954     if (bytes_arrived >= pvscsi_commands[s->curr_cmd].data_size) {
955         s->reg_command_status = pvscsi_commands[s->curr_cmd].handler_fn(s);
956         s->curr_cmd = PVSCSI_CMD_FIRST;
957         s->curr_cmd_data_cntr   = 0;
958     }
959 }
960 
961 static void
962 pvscsi_on_command_data(PVSCSIState *s, uint32_t value)
963 {
964     size_t bytes_arrived = s->curr_cmd_data_cntr * sizeof(uint32_t);
965 
966     assert(bytes_arrived < sizeof(s->curr_cmd_data));
967     s->curr_cmd_data[s->curr_cmd_data_cntr++] = value;
968 
969     pvscsi_do_command_processing(s);
970 }
971 
972 static void
973 pvscsi_on_command(PVSCSIState *s, uint64_t cmd_id)
974 {
975     if ((cmd_id > PVSCSI_CMD_FIRST) && (cmd_id < PVSCSI_CMD_LAST)) {
976         s->curr_cmd = cmd_id;
977     } else {
978         s->curr_cmd = PVSCSI_CMD_FIRST;
979         trace_pvscsi_on_cmd_unknown(cmd_id);
980     }
981 
982     s->curr_cmd_data_cntr = 0;
983     s->reg_command_status = PVSCSI_COMMAND_NOT_ENOUGH_DATA;
984 
985     pvscsi_do_command_processing(s);
986 }
987 
988 static void
989 pvscsi_io_write(void *opaque, hwaddr addr,
990                 uint64_t val, unsigned size)
991 {
992     PVSCSIState *s = opaque;
993 
994     switch (addr) {
995     case PVSCSI_REG_OFFSET_COMMAND:
996         pvscsi_on_command(s, val);
997         break;
998 
999     case PVSCSI_REG_OFFSET_COMMAND_DATA:
1000         pvscsi_on_command_data(s, (uint32_t) val);
1001         break;
1002 
1003     case PVSCSI_REG_OFFSET_INTR_STATUS:
1004         trace_pvscsi_io_write("PVSCSI_REG_OFFSET_INTR_STATUS", val);
1005         s->reg_interrupt_status &= ~val;
1006         pvscsi_update_irq_status(s);
1007         pvscsi_schedule_completion_processing(s);
1008         break;
1009 
1010     case PVSCSI_REG_OFFSET_INTR_MASK:
1011         trace_pvscsi_io_write("PVSCSI_REG_OFFSET_INTR_MASK", val);
1012         s->reg_interrupt_enabled = val;
1013         pvscsi_update_irq_status(s);
1014         break;
1015 
1016     case PVSCSI_REG_OFFSET_KICK_NON_RW_IO:
1017         trace_pvscsi_io_write("PVSCSI_REG_OFFSET_KICK_NON_RW_IO", val);
1018         pvscsi_process_io(s);
1019         break;
1020 
1021     case PVSCSI_REG_OFFSET_KICK_RW_IO:
1022         trace_pvscsi_io_write("PVSCSI_REG_OFFSET_KICK_RW_IO", val);
1023         pvscsi_process_io(s);
1024         break;
1025 
1026     case PVSCSI_REG_OFFSET_DEBUG:
1027         trace_pvscsi_io_write("PVSCSI_REG_OFFSET_DEBUG", val);
1028         break;
1029 
1030     default:
1031         trace_pvscsi_io_write_unknown(addr, size, val);
1032         break;
1033     }
1034 
1035 }
1036 
1037 static uint64_t
1038 pvscsi_io_read(void *opaque, hwaddr addr, unsigned size)
1039 {
1040     PVSCSIState *s = opaque;
1041 
1042     switch (addr) {
1043     case PVSCSI_REG_OFFSET_INTR_STATUS:
1044         trace_pvscsi_io_read("PVSCSI_REG_OFFSET_INTR_STATUS",
1045                              s->reg_interrupt_status);
1046         return s->reg_interrupt_status;
1047 
1048     case PVSCSI_REG_OFFSET_INTR_MASK:
1049         trace_pvscsi_io_read("PVSCSI_REG_OFFSET_INTR_MASK",
1050                              s->reg_interrupt_status);
1051         return s->reg_interrupt_enabled;
1052 
1053     case PVSCSI_REG_OFFSET_COMMAND_STATUS:
1054         trace_pvscsi_io_read("PVSCSI_REG_OFFSET_COMMAND_STATUS",
1055                              s->reg_interrupt_status);
1056         return s->reg_command_status;
1057 
1058     default:
1059         trace_pvscsi_io_read_unknown(addr, size);
1060         return 0;
1061     }
1062 }
1063 
1064 
1065 static void
1066 pvscsi_init_msi(PVSCSIState *s)
1067 {
1068     int res;
1069     PCIDevice *d = PCI_DEVICE(s);
1070 
1071     res = msi_init(d, PVSCSI_MSI_OFFSET(s), PVSCSI_MSIX_NUM_VECTORS,
1072                    PVSCSI_USE_64BIT, PVSCSI_PER_VECTOR_MASK, NULL);
1073     if (res < 0) {
1074         trace_pvscsi_init_msi_fail(res);
1075         s->msi_used = false;
1076     } else {
1077         s->msi_used = true;
1078     }
1079 }
1080 
1081 static void
1082 pvscsi_cleanup_msi(PVSCSIState *s)
1083 {
1084     PCIDevice *d = PCI_DEVICE(s);
1085 
1086     msi_uninit(d);
1087 }
1088 
1089 static const MemoryRegionOps pvscsi_ops = {
1090         .read = pvscsi_io_read,
1091         .write = pvscsi_io_write,
1092         .endianness = DEVICE_LITTLE_ENDIAN,
1093         .impl = {
1094                 .min_access_size = 4,
1095                 .max_access_size = 4,
1096         },
1097 };
1098 
1099 static const struct SCSIBusInfo pvscsi_scsi_info = {
1100         .tcq = true,
1101         .max_target = PVSCSI_MAX_DEVS,
1102         .max_channel = 0,
1103         .max_lun = 0,
1104 
1105         .get_sg_list = pvscsi_get_sg_list,
1106         .complete = pvscsi_command_complete,
1107         .cancel = pvscsi_request_cancelled,
1108 };
1109 
1110 static void
1111 pvscsi_realizefn(PCIDevice *pci_dev, Error **errp)
1112 {
1113     PVSCSIState *s = PVSCSI(pci_dev);
1114 
1115     trace_pvscsi_state("init");
1116 
1117     /* PCI subsystem ID, subsystem vendor ID, revision */
1118     if (PVSCSI_USE_OLD_PCI_CONFIGURATION(s)) {
1119         pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID, 0x1000);
1120     } else {
1121         pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID,
1122                      PCI_VENDOR_ID_VMWARE);
1123         pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID,
1124                      PCI_DEVICE_ID_VMWARE_PVSCSI);
1125         pci_config_set_revision(pci_dev->config, 0x2);
1126     }
1127 
1128     /* PCI latency timer = 255 */
1129     pci_dev->config[PCI_LATENCY_TIMER] = 0xff;
1130 
1131     /* Interrupt pin A */
1132     pci_config_set_interrupt_pin(pci_dev->config, 1);
1133 
1134     memory_region_init_io(&s->io_space, OBJECT(s), &pvscsi_ops, s,
1135                           "pvscsi-io", PVSCSI_MEM_SPACE_SIZE);
1136     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->io_space);
1137 
1138     pvscsi_init_msi(s);
1139 
1140     if (pci_is_express(pci_dev) && pci_bus_is_express(pci_get_bus(pci_dev))) {
1141         pcie_endpoint_cap_init(pci_dev, PVSCSI_EXP_EP_OFFSET);
1142     }
1143 
1144     s->completion_worker = qemu_bh_new(pvscsi_process_completion_queue, s);
1145 
1146     scsi_bus_new(&s->bus, sizeof(s->bus), DEVICE(pci_dev),
1147                  &pvscsi_scsi_info, NULL);
1148     /* override default SCSI bus hotplug-handler, with pvscsi's one */
1149     qbus_set_hotplug_handler(BUS(&s->bus), OBJECT(s), &error_abort);
1150     pvscsi_reset_state(s);
1151 }
1152 
1153 static void
1154 pvscsi_uninit(PCIDevice *pci_dev)
1155 {
1156     PVSCSIState *s = PVSCSI(pci_dev);
1157 
1158     trace_pvscsi_state("uninit");
1159     qemu_bh_delete(s->completion_worker);
1160 
1161     pvscsi_cleanup_msi(s);
1162 }
1163 
1164 static void
1165 pvscsi_reset(DeviceState *dev)
1166 {
1167     PCIDevice *d = PCI_DEVICE(dev);
1168     PVSCSIState *s = PVSCSI(d);
1169 
1170     trace_pvscsi_state("reset");
1171     pvscsi_reset_adapter(s);
1172 }
1173 
1174 static int
1175 pvscsi_pre_save(void *opaque)
1176 {
1177     PVSCSIState *s = (PVSCSIState *) opaque;
1178 
1179     trace_pvscsi_state("presave");
1180 
1181     assert(QTAILQ_EMPTY(&s->pending_queue));
1182     assert(QTAILQ_EMPTY(&s->completion_queue));
1183 
1184     return 0;
1185 }
1186 
1187 static int
1188 pvscsi_post_load(void *opaque, int version_id)
1189 {
1190     trace_pvscsi_state("postload");
1191     return 0;
1192 }
1193 
1194 static bool pvscsi_vmstate_need_pcie_device(void *opaque)
1195 {
1196     PVSCSIState *s = PVSCSI(opaque);
1197 
1198     return !(s->compat_flags & PVSCSI_COMPAT_DISABLE_PCIE);
1199 }
1200 
1201 static bool pvscsi_vmstate_test_pci_device(void *opaque, int version_id)
1202 {
1203     return !pvscsi_vmstate_need_pcie_device(opaque);
1204 }
1205 
1206 static const VMStateDescription vmstate_pvscsi_pcie_device = {
1207     .name = "pvscsi/pcie",
1208     .needed = pvscsi_vmstate_need_pcie_device,
1209     .fields = (VMStateField[]) {
1210         VMSTATE_PCI_DEVICE(parent_obj, PVSCSIState),
1211         VMSTATE_END_OF_LIST()
1212     }
1213 };
1214 
1215 static const VMStateDescription vmstate_pvscsi = {
1216     .name = "pvscsi",
1217     .version_id = 0,
1218     .minimum_version_id = 0,
1219     .pre_save = pvscsi_pre_save,
1220     .post_load = pvscsi_post_load,
1221     .fields = (VMStateField[]) {
1222         VMSTATE_STRUCT_TEST(parent_obj, PVSCSIState,
1223                             pvscsi_vmstate_test_pci_device, 0,
1224                             vmstate_pci_device, PCIDevice),
1225         VMSTATE_UINT8(msi_used, PVSCSIState),
1226         VMSTATE_UINT32(resetting, PVSCSIState),
1227         VMSTATE_UINT64(reg_interrupt_status, PVSCSIState),
1228         VMSTATE_UINT64(reg_interrupt_enabled, PVSCSIState),
1229         VMSTATE_UINT64(reg_command_status, PVSCSIState),
1230         VMSTATE_UINT64(curr_cmd, PVSCSIState),
1231         VMSTATE_UINT32(curr_cmd_data_cntr, PVSCSIState),
1232         VMSTATE_UINT32_ARRAY(curr_cmd_data, PVSCSIState,
1233                              ARRAY_SIZE(((PVSCSIState *)NULL)->curr_cmd_data)),
1234         VMSTATE_UINT8(rings_info_valid, PVSCSIState),
1235         VMSTATE_UINT8(msg_ring_info_valid, PVSCSIState),
1236         VMSTATE_UINT8(use_msg, PVSCSIState),
1237 
1238         VMSTATE_UINT64(rings.rs_pa, PVSCSIState),
1239         VMSTATE_UINT32(rings.txr_len_mask, PVSCSIState),
1240         VMSTATE_UINT32(rings.rxr_len_mask, PVSCSIState),
1241         VMSTATE_UINT64_ARRAY(rings.req_ring_pages_pa, PVSCSIState,
1242                              PVSCSI_SETUP_RINGS_MAX_NUM_PAGES),
1243         VMSTATE_UINT64_ARRAY(rings.cmp_ring_pages_pa, PVSCSIState,
1244                              PVSCSI_SETUP_RINGS_MAX_NUM_PAGES),
1245         VMSTATE_UINT64(rings.consumed_ptr, PVSCSIState),
1246         VMSTATE_UINT64(rings.filled_cmp_ptr, PVSCSIState),
1247 
1248         VMSTATE_END_OF_LIST()
1249     },
1250     .subsections = (const VMStateDescription*[]) {
1251         &vmstate_pvscsi_pcie_device,
1252         NULL
1253     }
1254 };
1255 
1256 static Property pvscsi_properties[] = {
1257     DEFINE_PROP_UINT8("use_msg", PVSCSIState, use_msg, 1),
1258     DEFINE_PROP_BIT("x-old-pci-configuration", PVSCSIState, compat_flags,
1259                     PVSCSI_COMPAT_OLD_PCI_CONFIGURATION_BIT, false),
1260     DEFINE_PROP_BIT("x-disable-pcie", PVSCSIState, compat_flags,
1261                     PVSCSI_COMPAT_DISABLE_PCIE_BIT, false),
1262     DEFINE_PROP_END_OF_LIST(),
1263 };
1264 
1265 static void pvscsi_realize(DeviceState *qdev, Error **errp)
1266 {
1267     PVSCSIClass *pvs_c = PVSCSI_DEVICE_GET_CLASS(qdev);
1268     PCIDevice *pci_dev = PCI_DEVICE(qdev);
1269     PVSCSIState *s = PVSCSI(qdev);
1270 
1271     if (!(s->compat_flags & PVSCSI_COMPAT_DISABLE_PCIE)) {
1272         pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
1273     }
1274 
1275     pvs_c->parent_dc_realize(qdev, errp);
1276 }
1277 
1278 static void pvscsi_class_init(ObjectClass *klass, void *data)
1279 {
1280     DeviceClass *dc = DEVICE_CLASS(klass);
1281     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1282     PVSCSIClass *pvs_k = PVSCSI_DEVICE_CLASS(klass);
1283     HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass);
1284 
1285     k->realize = pvscsi_realizefn;
1286     k->exit = pvscsi_uninit;
1287     k->vendor_id = PCI_VENDOR_ID_VMWARE;
1288     k->device_id = PCI_DEVICE_ID_VMWARE_PVSCSI;
1289     k->class_id = PCI_CLASS_STORAGE_SCSI;
1290     k->subsystem_id = 0x1000;
1291     device_class_set_parent_realize(dc, pvscsi_realize,
1292                                     &pvs_k->parent_dc_realize);
1293     dc->reset = pvscsi_reset;
1294     dc->vmsd = &vmstate_pvscsi;
1295     dc->props = pvscsi_properties;
1296     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
1297     hc->unplug = pvscsi_hot_unplug;
1298     hc->plug = pvscsi_hotplug;
1299 }
1300 
1301 static const TypeInfo pvscsi_info = {
1302     .name          = TYPE_PVSCSI,
1303     .parent        = TYPE_PCI_DEVICE,
1304     .class_size    = sizeof(PVSCSIClass),
1305     .instance_size = sizeof(PVSCSIState),
1306     .class_init    = pvscsi_class_init,
1307     .interfaces = (InterfaceInfo[]) {
1308         { TYPE_HOTPLUG_HANDLER },
1309         { INTERFACE_PCIE_DEVICE },
1310         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1311         { }
1312     }
1313 };
1314 
1315 static void
1316 pvscsi_register_types(void)
1317 {
1318     type_register_static(&pvscsi_info);
1319 }
1320 
1321 type_init(pvscsi_register_types);
1322