xref: /openbmc/qemu/hw/net/vmxnet3.c (revision 35a6ed4f)
1 /*
2  * QEMU VMWARE VMXNET3 paravirtual NIC
3  *
4  * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
5  *
6  * Developed by Daynix Computing LTD (http://www.daynix.com)
7  *
8  * Authors:
9  * Dmitry Fleytman <dmitry@daynix.com>
10  * Tamir Shomer <tamirs@daynix.com>
11  * Yan Vugenfirer <yan@daynix.com>
12  *
13  * This work is licensed under the terms of the GNU GPL, version 2.
14  * See the COPYING file in the top-level directory.
15  *
16  */
17 
18 #include "qemu/osdep.h"
19 #include "hw/hw.h"
20 #include "hw/pci/pci.h"
21 #include "net/net.h"
22 #include "net/tap.h"
23 #include "net/checksum.h"
24 #include "sysemu/sysemu.h"
25 #include "qemu-common.h"
26 #include "qemu/bswap.h"
27 #include "hw/pci/msix.h"
28 #include "hw/pci/msi.h"
29 
30 #include "vmxnet3.h"
31 #include "vmxnet_debug.h"
32 #include "vmware_utils.h"
33 #include "net_tx_pkt.h"
34 #include "net_rx_pkt.h"
35 
36 #define PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION 0x1
37 #define VMXNET3_MSIX_BAR_SIZE 0x2000
38 #define MIN_BUF_SIZE 60
39 
40 /* Compatibility flags for migration */
41 #define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT 0
42 #define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS \
43     (1 << VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT)
44 #define VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT 1
45 #define VMXNET3_COMPAT_FLAG_DISABLE_PCIE \
46     (1 << VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT)
47 
48 #define VMXNET3_EXP_EP_OFFSET (0x48)
49 #define VMXNET3_MSI_OFFSET(s) \
50     ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x50 : 0x84)
51 #define VMXNET3_MSIX_OFFSET(s) \
52     ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0 : 0x9c)
53 #define VMXNET3_DSN_OFFSET     (0x100)
54 
55 #define VMXNET3_BAR0_IDX      (0)
56 #define VMXNET3_BAR1_IDX      (1)
57 #define VMXNET3_MSIX_BAR_IDX  (2)
58 
59 #define VMXNET3_OFF_MSIX_TABLE (0x000)
60 #define VMXNET3_OFF_MSIX_PBA(s) \
61     ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x800 : 0x1000)
62 
63 /* Link speed in Mbps should be shifted by 16 */
64 #define VMXNET3_LINK_SPEED      (1000 << 16)
65 
66 /* Link status: 1 - up, 0 - down. */
67 #define VMXNET3_LINK_STATUS_UP  0x1
68 
69 /* Least significant bit should be set for revision and version */
70 #define VMXNET3_UPT_REVISION      0x1
71 #define VMXNET3_DEVICE_REVISION   0x1
72 
73 /* Number of interrupt vectors for non-MSIx modes */
74 #define VMXNET3_MAX_NMSIX_INTRS   (1)
75 
76 /* Macros for rings descriptors access */
77 #define VMXNET3_READ_TX_QUEUE_DESCR8(_d, dpa, field) \
78     (vmw_shmem_ld8(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
79 
80 #define VMXNET3_WRITE_TX_QUEUE_DESCR8(_d, dpa, field, value) \
81     (vmw_shmem_st8(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field, value)))
82 
83 #define VMXNET3_READ_TX_QUEUE_DESCR32(_d, dpa, field) \
84     (vmw_shmem_ld32(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
85 
86 #define VMXNET3_WRITE_TX_QUEUE_DESCR32(_d, dpa, field, value) \
87     (vmw_shmem_st32(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field), value))
88 
89 #define VMXNET3_READ_TX_QUEUE_DESCR64(_d, dpa, field) \
90     (vmw_shmem_ld64(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
91 
92 #define VMXNET3_WRITE_TX_QUEUE_DESCR64(_d, dpa, field, value) \
93     (vmw_shmem_st64(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field), value))
94 
95 #define VMXNET3_READ_RX_QUEUE_DESCR64(_d, dpa, field) \
96     (vmw_shmem_ld64(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field)))
97 
98 #define VMXNET3_READ_RX_QUEUE_DESCR32(_d, dpa, field) \
99     (vmw_shmem_ld32(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field)))
100 
101 #define VMXNET3_WRITE_RX_QUEUE_DESCR64(_d, dpa, field, value) \
102     (vmw_shmem_st64(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field), value))
103 
104 #define VMXNET3_WRITE_RX_QUEUE_DESCR8(_d, dpa, field, value) \
105     (vmw_shmem_st8(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field), value))
106 
107 /* Macros for guest driver shared area access */
108 #define VMXNET3_READ_DRV_SHARED64(_d, shpa, field) \
109     (vmw_shmem_ld64(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
110 
111 #define VMXNET3_READ_DRV_SHARED32(_d, shpa, field) \
112     (vmw_shmem_ld32(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
113 
114 #define VMXNET3_WRITE_DRV_SHARED32(_d, shpa, field, val) \
115     (vmw_shmem_st32(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field), val))
116 
117 #define VMXNET3_READ_DRV_SHARED16(_d, shpa, field) \
118     (vmw_shmem_ld16(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
119 
120 #define VMXNET3_READ_DRV_SHARED8(_d, shpa, field) \
121     (vmw_shmem_ld8(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
122 
123 #define VMXNET3_READ_DRV_SHARED(_d, shpa, field, b, l) \
124     (vmw_shmem_read(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field), b, l))
125 
126 #define VMXNET_FLAG_IS_SET(field, flag) (((field) & (flag)) == (flag))
127 
128 typedef struct VMXNET3Class {
129     PCIDeviceClass parent_class;
130     DeviceRealize parent_dc_realize;
131 } VMXNET3Class;
132 
133 #define TYPE_VMXNET3 "vmxnet3"
134 #define VMXNET3(obj) OBJECT_CHECK(VMXNET3State, (obj), TYPE_VMXNET3)
135 
136 #define VMXNET3_DEVICE_CLASS(klass) \
137     OBJECT_CLASS_CHECK(VMXNET3Class, (klass), TYPE_VMXNET3)
138 #define VMXNET3_DEVICE_GET_CLASS(obj) \
139     OBJECT_GET_CLASS(VMXNET3Class, (obj), TYPE_VMXNET3)
140 
141 /* Cyclic ring abstraction */
142 typedef struct {
143     hwaddr pa;
144     size_t size;
145     size_t cell_size;
146     size_t next;
147     uint8_t gen;
148 } Vmxnet3Ring;
149 
150 static inline void vmxnet3_ring_init(PCIDevice *d,
151 				     Vmxnet3Ring *ring,
152                                      hwaddr pa,
153                                      size_t size,
154                                      size_t cell_size,
155                                      bool zero_region)
156 {
157     ring->pa = pa;
158     ring->size = size;
159     ring->cell_size = cell_size;
160     ring->gen = VMXNET3_INIT_GEN;
161     ring->next = 0;
162 
163     if (zero_region) {
164         vmw_shmem_set(d, pa, 0, size * cell_size);
165     }
166 }
167 
168 #define VMXNET3_RING_DUMP(macro, ring_name, ridx, r)                         \
169     macro("%s#%d: base %" PRIx64 " size %zu cell_size %zu gen %d next %zu",  \
170           (ring_name), (ridx),                                               \
171           (r)->pa, (r)->size, (r)->cell_size, (r)->gen, (r)->next)
172 
173 static inline void vmxnet3_ring_inc(Vmxnet3Ring *ring)
174 {
175     if (++ring->next >= ring->size) {
176         ring->next = 0;
177         ring->gen ^= 1;
178     }
179 }
180 
181 static inline void vmxnet3_ring_dec(Vmxnet3Ring *ring)
182 {
183     if (ring->next-- == 0) {
184         ring->next = ring->size - 1;
185         ring->gen ^= 1;
186     }
187 }
188 
189 static inline hwaddr vmxnet3_ring_curr_cell_pa(Vmxnet3Ring *ring)
190 {
191     return ring->pa + ring->next * ring->cell_size;
192 }
193 
194 static inline void vmxnet3_ring_read_curr_cell(PCIDevice *d, Vmxnet3Ring *ring,
195 					       void *buff)
196 {
197     vmw_shmem_read(d, vmxnet3_ring_curr_cell_pa(ring), buff, ring->cell_size);
198 }
199 
200 static inline void vmxnet3_ring_write_curr_cell(PCIDevice *d, Vmxnet3Ring *ring,
201 						void *buff)
202 {
203     vmw_shmem_write(d, vmxnet3_ring_curr_cell_pa(ring), buff, ring->cell_size);
204 }
205 
206 static inline size_t vmxnet3_ring_curr_cell_idx(Vmxnet3Ring *ring)
207 {
208     return ring->next;
209 }
210 
211 static inline uint8_t vmxnet3_ring_curr_gen(Vmxnet3Ring *ring)
212 {
213     return ring->gen;
214 }
215 
216 /* Debug trace-related functions */
217 static inline void
218 vmxnet3_dump_tx_descr(struct Vmxnet3_TxDesc *descr)
219 {
220     VMW_PKPRN("TX DESCR: "
221               "addr %" PRIx64 ", len: %d, gen: %d, rsvd: %d, "
222               "dtype: %d, ext1: %d, msscof: %d, hlen: %d, om: %d, "
223               "eop: %d, cq: %d, ext2: %d, ti: %d, tci: %d",
224               le64_to_cpu(descr->addr), descr->len, descr->gen, descr->rsvd,
225               descr->dtype, descr->ext1, descr->msscof, descr->hlen, descr->om,
226               descr->eop, descr->cq, descr->ext2, descr->ti, descr->tci);
227 }
228 
229 static inline void
230 vmxnet3_dump_virt_hdr(struct virtio_net_hdr *vhdr)
231 {
232     VMW_PKPRN("VHDR: flags 0x%x, gso_type: 0x%x, hdr_len: %d, gso_size: %d, "
233               "csum_start: %d, csum_offset: %d",
234               vhdr->flags, vhdr->gso_type, vhdr->hdr_len, vhdr->gso_size,
235               vhdr->csum_start, vhdr->csum_offset);
236 }
237 
238 static inline void
239 vmxnet3_dump_rx_descr(struct Vmxnet3_RxDesc *descr)
240 {
241     VMW_PKPRN("RX DESCR: addr %" PRIx64 ", len: %d, gen: %d, rsvd: %d, "
242               "dtype: %d, ext1: %d, btype: %d",
243               le64_to_cpu(descr->addr), descr->len, descr->gen,
244               descr->rsvd, descr->dtype, descr->ext1, descr->btype);
245 }
246 
247 /* Device state and helper functions */
248 #define VMXNET3_RX_RINGS_PER_QUEUE (2)
249 
250 typedef struct {
251     Vmxnet3Ring tx_ring;
252     Vmxnet3Ring comp_ring;
253 
254     uint8_t intr_idx;
255     hwaddr tx_stats_pa;
256     struct UPT1_TxStats txq_stats;
257 } Vmxnet3TxqDescr;
258 
259 typedef struct {
260     Vmxnet3Ring rx_ring[VMXNET3_RX_RINGS_PER_QUEUE];
261     Vmxnet3Ring comp_ring;
262     uint8_t intr_idx;
263     hwaddr rx_stats_pa;
264     struct UPT1_RxStats rxq_stats;
265 } Vmxnet3RxqDescr;
266 
267 typedef struct {
268     bool is_masked;
269     bool is_pending;
270     bool is_asserted;
271 } Vmxnet3IntState;
272 
273 typedef struct {
274         PCIDevice parent_obj;
275         NICState *nic;
276         NICConf conf;
277         MemoryRegion bar0;
278         MemoryRegion bar1;
279         MemoryRegion msix_bar;
280 
281         Vmxnet3RxqDescr rxq_descr[VMXNET3_DEVICE_MAX_RX_QUEUES];
282         Vmxnet3TxqDescr txq_descr[VMXNET3_DEVICE_MAX_TX_QUEUES];
283 
284         /* Whether MSI-X support was installed successfully */
285         bool msix_used;
286         hwaddr drv_shmem;
287         hwaddr temp_shared_guest_driver_memory;
288 
289         uint8_t txq_num;
290 
291         /* This boolean tells whether RX packet being indicated has to */
292         /* be split into head and body chunks from different RX rings  */
293         bool rx_packets_compound;
294 
295         bool rx_vlan_stripping;
296         bool lro_supported;
297 
298         uint8_t rxq_num;
299 
300         /* Network MTU */
301         uint32_t mtu;
302 
303         /* Maximum number of fragments for indicated TX packets */
304         uint32_t max_tx_frags;
305 
306         /* Maximum number of fragments for indicated RX packets */
307         uint16_t max_rx_frags;
308 
309         /* Index for events interrupt */
310         uint8_t event_int_idx;
311 
312         /* Whether automatic interrupts masking enabled */
313         bool auto_int_masking;
314 
315         bool peer_has_vhdr;
316 
317         /* TX packets to QEMU interface */
318         struct NetTxPkt *tx_pkt;
319         uint32_t offload_mode;
320         uint32_t cso_or_gso_size;
321         uint16_t tci;
322         bool needs_vlan;
323 
324         struct NetRxPkt *rx_pkt;
325 
326         bool tx_sop;
327         bool skip_current_tx_pkt;
328 
329         uint32_t device_active;
330         uint32_t last_command;
331 
332         uint32_t link_status_and_speed;
333 
334         Vmxnet3IntState interrupt_states[VMXNET3_MAX_INTRS];
335 
336         uint32_t temp_mac;   /* To store the low part first */
337 
338         MACAddr perm_mac;
339         uint32_t vlan_table[VMXNET3_VFT_SIZE];
340         uint32_t rx_mode;
341         MACAddr *mcast_list;
342         uint32_t mcast_list_len;
343         uint32_t mcast_list_buff_size; /* needed for live migration. */
344 
345         /* Compatibility flags for migration */
346         uint32_t compat_flags;
347 } VMXNET3State;
348 
349 /* Interrupt management */
350 
351 /*
352  * This function returns sign whether interrupt line is in asserted state
353  * This depends on the type of interrupt used. For INTX interrupt line will
354  * be asserted until explicit deassertion, for MSI(X) interrupt line will
355  * be deasserted automatically due to notification semantics of the MSI(X)
356  * interrupts
357  */
358 static bool _vmxnet3_assert_interrupt_line(VMXNET3State *s, uint32_t int_idx)
359 {
360     PCIDevice *d = PCI_DEVICE(s);
361 
362     if (s->msix_used && msix_enabled(d)) {
363         VMW_IRPRN("Sending MSI-X notification for vector %u", int_idx);
364         msix_notify(d, int_idx);
365         return false;
366     }
367     if (msi_enabled(d)) {
368         VMW_IRPRN("Sending MSI notification for vector %u", int_idx);
369         msi_notify(d, int_idx);
370         return false;
371     }
372 
373     VMW_IRPRN("Asserting line for interrupt %u", int_idx);
374     pci_irq_assert(d);
375     return true;
376 }
377 
378 static void _vmxnet3_deassert_interrupt_line(VMXNET3State *s, int lidx)
379 {
380     PCIDevice *d = PCI_DEVICE(s);
381 
382     /*
383      * This function should never be called for MSI(X) interrupts
384      * because deassertion never required for message interrupts
385      */
386     assert(!s->msix_used || !msix_enabled(d));
387     /*
388      * This function should never be called for MSI(X) interrupts
389      * because deassertion never required for message interrupts
390      */
391     assert(!msi_enabled(d));
392 
393     VMW_IRPRN("Deasserting line for interrupt %u", lidx);
394     pci_irq_deassert(d);
395 }
396 
397 static void vmxnet3_update_interrupt_line_state(VMXNET3State *s, int lidx)
398 {
399     if (!s->interrupt_states[lidx].is_pending &&
400        s->interrupt_states[lidx].is_asserted) {
401         VMW_IRPRN("New interrupt line state for index %d is DOWN", lidx);
402         _vmxnet3_deassert_interrupt_line(s, lidx);
403         s->interrupt_states[lidx].is_asserted = false;
404         return;
405     }
406 
407     if (s->interrupt_states[lidx].is_pending &&
408        !s->interrupt_states[lidx].is_masked &&
409        !s->interrupt_states[lidx].is_asserted) {
410         VMW_IRPRN("New interrupt line state for index %d is UP", lidx);
411         s->interrupt_states[lidx].is_asserted =
412             _vmxnet3_assert_interrupt_line(s, lidx);
413         s->interrupt_states[lidx].is_pending = false;
414         return;
415     }
416 }
417 
418 static void vmxnet3_trigger_interrupt(VMXNET3State *s, int lidx)
419 {
420     PCIDevice *d = PCI_DEVICE(s);
421     s->interrupt_states[lidx].is_pending = true;
422     vmxnet3_update_interrupt_line_state(s, lidx);
423 
424     if (s->msix_used && msix_enabled(d) && s->auto_int_masking) {
425         goto do_automask;
426     }
427 
428     if (msi_enabled(d) && s->auto_int_masking) {
429         goto do_automask;
430     }
431 
432     return;
433 
434 do_automask:
435     s->interrupt_states[lidx].is_masked = true;
436     vmxnet3_update_interrupt_line_state(s, lidx);
437 }
438 
439 static bool vmxnet3_interrupt_asserted(VMXNET3State *s, int lidx)
440 {
441     return s->interrupt_states[lidx].is_asserted;
442 }
443 
444 static void vmxnet3_clear_interrupt(VMXNET3State *s, int int_idx)
445 {
446     s->interrupt_states[int_idx].is_pending = false;
447     if (s->auto_int_masking) {
448         s->interrupt_states[int_idx].is_masked = true;
449     }
450     vmxnet3_update_interrupt_line_state(s, int_idx);
451 }
452 
453 static void
454 vmxnet3_on_interrupt_mask_changed(VMXNET3State *s, int lidx, bool is_masked)
455 {
456     s->interrupt_states[lidx].is_masked = is_masked;
457     vmxnet3_update_interrupt_line_state(s, lidx);
458 }
459 
460 static bool vmxnet3_verify_driver_magic(PCIDevice *d, hwaddr dshmem)
461 {
462     return (VMXNET3_READ_DRV_SHARED32(d, dshmem, magic) == VMXNET3_REV1_MAGIC);
463 }
464 
465 #define VMXNET3_GET_BYTE(x, byte_num) (((x) >> (byte_num)*8) & 0xFF)
466 #define VMXNET3_MAKE_BYTE(byte_num, val) \
467     (((uint32_t)((val) & 0xFF)) << (byte_num)*8)
468 
469 static void vmxnet3_set_variable_mac(VMXNET3State *s, uint32_t h, uint32_t l)
470 {
471     s->conf.macaddr.a[0] = VMXNET3_GET_BYTE(l,  0);
472     s->conf.macaddr.a[1] = VMXNET3_GET_BYTE(l,  1);
473     s->conf.macaddr.a[2] = VMXNET3_GET_BYTE(l,  2);
474     s->conf.macaddr.a[3] = VMXNET3_GET_BYTE(l,  3);
475     s->conf.macaddr.a[4] = VMXNET3_GET_BYTE(h, 0);
476     s->conf.macaddr.a[5] = VMXNET3_GET_BYTE(h, 1);
477 
478     VMW_CFPRN("Variable MAC: " MAC_FMT, MAC_ARG(s->conf.macaddr.a));
479 
480     qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
481 }
482 
483 static uint64_t vmxnet3_get_mac_low(MACAddr *addr)
484 {
485     return VMXNET3_MAKE_BYTE(0, addr->a[0]) |
486            VMXNET3_MAKE_BYTE(1, addr->a[1]) |
487            VMXNET3_MAKE_BYTE(2, addr->a[2]) |
488            VMXNET3_MAKE_BYTE(3, addr->a[3]);
489 }
490 
491 static uint64_t vmxnet3_get_mac_high(MACAddr *addr)
492 {
493     return VMXNET3_MAKE_BYTE(0, addr->a[4]) |
494            VMXNET3_MAKE_BYTE(1, addr->a[5]);
495 }
496 
497 static void
498 vmxnet3_inc_tx_consumption_counter(VMXNET3State *s, int qidx)
499 {
500     vmxnet3_ring_inc(&s->txq_descr[qidx].tx_ring);
501 }
502 
503 static inline void
504 vmxnet3_inc_rx_consumption_counter(VMXNET3State *s, int qidx, int ridx)
505 {
506     vmxnet3_ring_inc(&s->rxq_descr[qidx].rx_ring[ridx]);
507 }
508 
509 static inline void
510 vmxnet3_inc_tx_completion_counter(VMXNET3State *s, int qidx)
511 {
512     vmxnet3_ring_inc(&s->txq_descr[qidx].comp_ring);
513 }
514 
515 static void
516 vmxnet3_inc_rx_completion_counter(VMXNET3State *s, int qidx)
517 {
518     vmxnet3_ring_inc(&s->rxq_descr[qidx].comp_ring);
519 }
520 
521 static void
522 vmxnet3_dec_rx_completion_counter(VMXNET3State *s, int qidx)
523 {
524     vmxnet3_ring_dec(&s->rxq_descr[qidx].comp_ring);
525 }
526 
527 static void vmxnet3_complete_packet(VMXNET3State *s, int qidx, uint32_t tx_ridx)
528 {
529     struct Vmxnet3_TxCompDesc txcq_descr;
530     PCIDevice *d = PCI_DEVICE(s);
531 
532     VMXNET3_RING_DUMP(VMW_RIPRN, "TXC", qidx, &s->txq_descr[qidx].comp_ring);
533 
534     memset(&txcq_descr, 0, sizeof(txcq_descr));
535     txcq_descr.txdIdx = tx_ridx;
536     txcq_descr.gen = vmxnet3_ring_curr_gen(&s->txq_descr[qidx].comp_ring);
537 
538     vmxnet3_ring_write_curr_cell(d, &s->txq_descr[qidx].comp_ring, &txcq_descr);
539 
540     /* Flush changes in TX descriptor before changing the counter value */
541     smp_wmb();
542 
543     vmxnet3_inc_tx_completion_counter(s, qidx);
544     vmxnet3_trigger_interrupt(s, s->txq_descr[qidx].intr_idx);
545 }
546 
547 static bool
548 vmxnet3_setup_tx_offloads(VMXNET3State *s)
549 {
550     switch (s->offload_mode) {
551     case VMXNET3_OM_NONE:
552         net_tx_pkt_build_vheader(s->tx_pkt, false, false, 0);
553         break;
554 
555     case VMXNET3_OM_CSUM:
556         net_tx_pkt_build_vheader(s->tx_pkt, false, true, 0);
557         VMW_PKPRN("L4 CSO requested\n");
558         break;
559 
560     case VMXNET3_OM_TSO:
561         net_tx_pkt_build_vheader(s->tx_pkt, true, true,
562             s->cso_or_gso_size);
563         net_tx_pkt_update_ip_checksums(s->tx_pkt);
564         VMW_PKPRN("GSO offload requested.");
565         break;
566 
567     default:
568         g_assert_not_reached();
569         return false;
570     }
571 
572     return true;
573 }
574 
575 static void
576 vmxnet3_tx_retrieve_metadata(VMXNET3State *s,
577                              const struct Vmxnet3_TxDesc *txd)
578 {
579     s->offload_mode = txd->om;
580     s->cso_or_gso_size = txd->msscof;
581     s->tci = txd->tci;
582     s->needs_vlan = txd->ti;
583 }
584 
585 typedef enum {
586     VMXNET3_PKT_STATUS_OK,
587     VMXNET3_PKT_STATUS_ERROR,
588     VMXNET3_PKT_STATUS_DISCARD,/* only for tx */
589     VMXNET3_PKT_STATUS_OUT_OF_BUF /* only for rx */
590 } Vmxnet3PktStatus;
591 
592 static void
593 vmxnet3_on_tx_done_update_stats(VMXNET3State *s, int qidx,
594     Vmxnet3PktStatus status)
595 {
596     size_t tot_len = net_tx_pkt_get_total_len(s->tx_pkt);
597     struct UPT1_TxStats *stats = &s->txq_descr[qidx].txq_stats;
598 
599     switch (status) {
600     case VMXNET3_PKT_STATUS_OK:
601         switch (net_tx_pkt_get_packet_type(s->tx_pkt)) {
602         case ETH_PKT_BCAST:
603             stats->bcastPktsTxOK++;
604             stats->bcastBytesTxOK += tot_len;
605             break;
606         case ETH_PKT_MCAST:
607             stats->mcastPktsTxOK++;
608             stats->mcastBytesTxOK += tot_len;
609             break;
610         case ETH_PKT_UCAST:
611             stats->ucastPktsTxOK++;
612             stats->ucastBytesTxOK += tot_len;
613             break;
614         default:
615             g_assert_not_reached();
616         }
617 
618         if (s->offload_mode == VMXNET3_OM_TSO) {
619             /*
620              * According to VMWARE headers this statistic is a number
621              * of packets after segmentation but since we don't have
622              * this information in QEMU model, the best we can do is to
623              * provide number of non-segmented packets
624              */
625             stats->TSOPktsTxOK++;
626             stats->TSOBytesTxOK += tot_len;
627         }
628         break;
629 
630     case VMXNET3_PKT_STATUS_DISCARD:
631         stats->pktsTxDiscard++;
632         break;
633 
634     case VMXNET3_PKT_STATUS_ERROR:
635         stats->pktsTxError++;
636         break;
637 
638     default:
639         g_assert_not_reached();
640     }
641 }
642 
643 static void
644 vmxnet3_on_rx_done_update_stats(VMXNET3State *s,
645                                 int qidx,
646                                 Vmxnet3PktStatus status)
647 {
648     struct UPT1_RxStats *stats = &s->rxq_descr[qidx].rxq_stats;
649     size_t tot_len = net_rx_pkt_get_total_len(s->rx_pkt);
650 
651     switch (status) {
652     case VMXNET3_PKT_STATUS_OUT_OF_BUF:
653         stats->pktsRxOutOfBuf++;
654         break;
655 
656     case VMXNET3_PKT_STATUS_ERROR:
657         stats->pktsRxError++;
658         break;
659     case VMXNET3_PKT_STATUS_OK:
660         switch (net_rx_pkt_get_packet_type(s->rx_pkt)) {
661         case ETH_PKT_BCAST:
662             stats->bcastPktsRxOK++;
663             stats->bcastBytesRxOK += tot_len;
664             break;
665         case ETH_PKT_MCAST:
666             stats->mcastPktsRxOK++;
667             stats->mcastBytesRxOK += tot_len;
668             break;
669         case ETH_PKT_UCAST:
670             stats->ucastPktsRxOK++;
671             stats->ucastBytesRxOK += tot_len;
672             break;
673         default:
674             g_assert_not_reached();
675         }
676 
677         if (tot_len > s->mtu) {
678             stats->LROPktsRxOK++;
679             stats->LROBytesRxOK += tot_len;
680         }
681         break;
682     default:
683         g_assert_not_reached();
684     }
685 }
686 
687 static inline bool
688 vmxnet3_pop_next_tx_descr(VMXNET3State *s,
689                           int qidx,
690                           struct Vmxnet3_TxDesc *txd,
691                           uint32_t *descr_idx)
692 {
693     Vmxnet3Ring *ring = &s->txq_descr[qidx].tx_ring;
694     PCIDevice *d = PCI_DEVICE(s);
695 
696     vmxnet3_ring_read_curr_cell(d, ring, txd);
697     if (txd->gen == vmxnet3_ring_curr_gen(ring)) {
698         /* Only read after generation field verification */
699         smp_rmb();
700         /* Re-read to be sure we got the latest version */
701         vmxnet3_ring_read_curr_cell(d, ring, txd);
702         VMXNET3_RING_DUMP(VMW_RIPRN, "TX", qidx, ring);
703         *descr_idx = vmxnet3_ring_curr_cell_idx(ring);
704         vmxnet3_inc_tx_consumption_counter(s, qidx);
705         return true;
706     }
707 
708     return false;
709 }
710 
711 static bool
712 vmxnet3_send_packet(VMXNET3State *s, uint32_t qidx)
713 {
714     Vmxnet3PktStatus status = VMXNET3_PKT_STATUS_OK;
715 
716     if (!vmxnet3_setup_tx_offloads(s)) {
717         status = VMXNET3_PKT_STATUS_ERROR;
718         goto func_exit;
719     }
720 
721     /* debug prints */
722     vmxnet3_dump_virt_hdr(net_tx_pkt_get_vhdr(s->tx_pkt));
723     net_tx_pkt_dump(s->tx_pkt);
724 
725     if (!net_tx_pkt_send(s->tx_pkt, qemu_get_queue(s->nic))) {
726         status = VMXNET3_PKT_STATUS_DISCARD;
727         goto func_exit;
728     }
729 
730 func_exit:
731     vmxnet3_on_tx_done_update_stats(s, qidx, status);
732     return (status == VMXNET3_PKT_STATUS_OK);
733 }
734 
735 static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx)
736 {
737     struct Vmxnet3_TxDesc txd;
738     uint32_t txd_idx;
739     uint32_t data_len;
740     hwaddr data_pa;
741 
742     for (;;) {
743         if (!vmxnet3_pop_next_tx_descr(s, qidx, &txd, &txd_idx)) {
744             break;
745         }
746 
747         vmxnet3_dump_tx_descr(&txd);
748 
749         if (!s->skip_current_tx_pkt) {
750             data_len = (txd.len > 0) ? txd.len : VMXNET3_MAX_TX_BUF_SIZE;
751             data_pa = le64_to_cpu(txd.addr);
752 
753             if (!net_tx_pkt_add_raw_fragment(s->tx_pkt,
754                                                 data_pa,
755                                                 data_len)) {
756                 s->skip_current_tx_pkt = true;
757             }
758         }
759 
760         if (s->tx_sop) {
761             vmxnet3_tx_retrieve_metadata(s, &txd);
762             s->tx_sop = false;
763         }
764 
765         if (txd.eop) {
766             if (!s->skip_current_tx_pkt && net_tx_pkt_parse(s->tx_pkt)) {
767                 if (s->needs_vlan) {
768                     net_tx_pkt_setup_vlan_header(s->tx_pkt, s->tci);
769                 }
770 
771                 vmxnet3_send_packet(s, qidx);
772             } else {
773                 vmxnet3_on_tx_done_update_stats(s, qidx,
774                                                 VMXNET3_PKT_STATUS_ERROR);
775             }
776 
777             vmxnet3_complete_packet(s, qidx, txd_idx);
778             s->tx_sop = true;
779             s->skip_current_tx_pkt = false;
780             net_tx_pkt_reset(s->tx_pkt);
781         }
782     }
783 }
784 
785 static inline void
786 vmxnet3_read_next_rx_descr(VMXNET3State *s, int qidx, int ridx,
787                            struct Vmxnet3_RxDesc *dbuf, uint32_t *didx)
788 {
789     PCIDevice *d = PCI_DEVICE(s);
790 
791     Vmxnet3Ring *ring = &s->rxq_descr[qidx].rx_ring[ridx];
792     *didx = vmxnet3_ring_curr_cell_idx(ring);
793     vmxnet3_ring_read_curr_cell(d, ring, dbuf);
794 }
795 
796 static inline uint8_t
797 vmxnet3_get_rx_ring_gen(VMXNET3State *s, int qidx, int ridx)
798 {
799     return s->rxq_descr[qidx].rx_ring[ridx].gen;
800 }
801 
802 static inline hwaddr
803 vmxnet3_pop_rxc_descr(VMXNET3State *s, int qidx, uint32_t *descr_gen)
804 {
805     uint8_t ring_gen;
806     struct Vmxnet3_RxCompDesc rxcd;
807 
808     hwaddr daddr =
809         vmxnet3_ring_curr_cell_pa(&s->rxq_descr[qidx].comp_ring);
810 
811     pci_dma_read(PCI_DEVICE(s),
812                  daddr, &rxcd, sizeof(struct Vmxnet3_RxCompDesc));
813     ring_gen = vmxnet3_ring_curr_gen(&s->rxq_descr[qidx].comp_ring);
814 
815     if (rxcd.gen != ring_gen) {
816         *descr_gen = ring_gen;
817         vmxnet3_inc_rx_completion_counter(s, qidx);
818         return daddr;
819     }
820 
821     return 0;
822 }
823 
824 static inline void
825 vmxnet3_revert_rxc_descr(VMXNET3State *s, int qidx)
826 {
827     vmxnet3_dec_rx_completion_counter(s, qidx);
828 }
829 
830 #define RXQ_IDX      (0)
831 #define RX_HEAD_BODY_RING (0)
832 #define RX_BODY_ONLY_RING (1)
833 
834 static bool
835 vmxnet3_get_next_head_rx_descr(VMXNET3State *s,
836                                struct Vmxnet3_RxDesc *descr_buf,
837                                uint32_t *descr_idx,
838                                uint32_t *ridx)
839 {
840     for (;;) {
841         uint32_t ring_gen;
842         vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING,
843                                    descr_buf, descr_idx);
844 
845         /* If no more free descriptors - return */
846         ring_gen = vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_HEAD_BODY_RING);
847         if (descr_buf->gen != ring_gen) {
848             return false;
849         }
850 
851         /* Only read after generation field verification */
852         smp_rmb();
853         /* Re-read to be sure we got the latest version */
854         vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING,
855                                    descr_buf, descr_idx);
856 
857         /* Mark current descriptor as used/skipped */
858         vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_HEAD_BODY_RING);
859 
860         /* If this is what we are looking for - return */
861         if (descr_buf->btype == VMXNET3_RXD_BTYPE_HEAD) {
862             *ridx = RX_HEAD_BODY_RING;
863             return true;
864         }
865     }
866 }
867 
868 static bool
869 vmxnet3_get_next_body_rx_descr(VMXNET3State *s,
870                                struct Vmxnet3_RxDesc *d,
871                                uint32_t *didx,
872                                uint32_t *ridx)
873 {
874     vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, d, didx);
875 
876     /* Try to find corresponding descriptor in head/body ring */
877     if (d->gen == vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_HEAD_BODY_RING)) {
878         /* Only read after generation field verification */
879         smp_rmb();
880         /* Re-read to be sure we got the latest version */
881         vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, d, didx);
882         if (d->btype == VMXNET3_RXD_BTYPE_BODY) {
883             vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_HEAD_BODY_RING);
884             *ridx = RX_HEAD_BODY_RING;
885             return true;
886         }
887     }
888 
889     /*
890      * If there is no free descriptors on head/body ring or next free
891      * descriptor is a head descriptor switch to body only ring
892      */
893     vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_BODY_ONLY_RING, d, didx);
894 
895     /* If no more free descriptors - return */
896     if (d->gen == vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_BODY_ONLY_RING)) {
897         /* Only read after generation field verification */
898         smp_rmb();
899         /* Re-read to be sure we got the latest version */
900         vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_BODY_ONLY_RING, d, didx);
901         assert(d->btype == VMXNET3_RXD_BTYPE_BODY);
902         *ridx = RX_BODY_ONLY_RING;
903         vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_BODY_ONLY_RING);
904         return true;
905     }
906 
907     return false;
908 }
909 
910 static inline bool
911 vmxnet3_get_next_rx_descr(VMXNET3State *s, bool is_head,
912                           struct Vmxnet3_RxDesc *descr_buf,
913                           uint32_t *descr_idx,
914                           uint32_t *ridx)
915 {
916     if (is_head || !s->rx_packets_compound) {
917         return vmxnet3_get_next_head_rx_descr(s, descr_buf, descr_idx, ridx);
918     } else {
919         return vmxnet3_get_next_body_rx_descr(s, descr_buf, descr_idx, ridx);
920     }
921 }
922 
923 /* In case packet was csum offloaded (either NEEDS_CSUM or DATA_VALID),
924  * the implementation always passes an RxCompDesc with a "Checksum
925  * calculated and found correct" to the OS (cnc=0 and tuc=1, see
926  * vmxnet3_rx_update_descr). This emulates the observed ESXi behavior.
927  *
928  * Therefore, if packet has the NEEDS_CSUM set, we must calculate
929  * and place a fully computed checksum into the tcp/udp header.
930  * Otherwise, the OS driver will receive a checksum-correct indication
931  * (CHECKSUM_UNNECESSARY), but with the actual tcp/udp checksum field
932  * having just the pseudo header csum value.
933  *
934  * While this is not a problem if packet is destined for local delivery,
935  * in the case the host OS performs forwarding, it will forward an
936  * incorrectly checksummed packet.
937  */
938 static void vmxnet3_rx_need_csum_calculate(struct NetRxPkt *pkt,
939                                            const void *pkt_data,
940                                            size_t pkt_len)
941 {
942     struct virtio_net_hdr *vhdr;
943     bool isip4, isip6, istcp, isudp;
944     uint8_t *data;
945     int len;
946 
947     if (!net_rx_pkt_has_virt_hdr(pkt)) {
948         return;
949     }
950 
951     vhdr = net_rx_pkt_get_vhdr(pkt);
952     if (!VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
953         return;
954     }
955 
956     net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
957     if (!(isip4 || isip6) || !(istcp || isudp)) {
958         return;
959     }
960 
961     vmxnet3_dump_virt_hdr(vhdr);
962 
963     /* Validate packet len: csum_start + scum_offset + length of csum field */
964     if (pkt_len < (vhdr->csum_start + vhdr->csum_offset + 2)) {
965         VMW_PKPRN("packet len:%zu < csum_start(%d) + csum_offset(%d) + 2, "
966                   "cannot calculate checksum",
967                   pkt_len, vhdr->csum_start, vhdr->csum_offset);
968         return;
969     }
970 
971     data = (uint8_t *)pkt_data + vhdr->csum_start;
972     len = pkt_len - vhdr->csum_start;
973     /* Put the checksum obtained into the packet */
974     stw_be_p(data + vhdr->csum_offset, net_raw_checksum(data, len));
975 
976     vhdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
977     vhdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
978 }
979 
980 static void vmxnet3_rx_update_descr(struct NetRxPkt *pkt,
981     struct Vmxnet3_RxCompDesc *rxcd)
982 {
983     int csum_ok, is_gso;
984     bool isip4, isip6, istcp, isudp;
985     struct virtio_net_hdr *vhdr;
986     uint8_t offload_type;
987 
988     if (net_rx_pkt_is_vlan_stripped(pkt)) {
989         rxcd->ts = 1;
990         rxcd->tci = net_rx_pkt_get_vlan_tag(pkt);
991     }
992 
993     if (!net_rx_pkt_has_virt_hdr(pkt)) {
994         goto nocsum;
995     }
996 
997     vhdr = net_rx_pkt_get_vhdr(pkt);
998     /*
999      * Checksum is valid when lower level tell so or when lower level
1000      * requires checksum offload telling that packet produced/bridged
1001      * locally and did travel over network after last checksum calculation
1002      * or production
1003      */
1004     csum_ok = VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_DATA_VALID) ||
1005               VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM);
1006 
1007     offload_type = vhdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
1008     is_gso = (offload_type != VIRTIO_NET_HDR_GSO_NONE) ? 1 : 0;
1009 
1010     if (!csum_ok && !is_gso) {
1011         goto nocsum;
1012     }
1013 
1014     net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1015     if ((!istcp && !isudp) || (!isip4 && !isip6)) {
1016         goto nocsum;
1017     }
1018 
1019     rxcd->cnc = 0;
1020     rxcd->v4 = isip4 ? 1 : 0;
1021     rxcd->v6 = isip6 ? 1 : 0;
1022     rxcd->tcp = istcp ? 1 : 0;
1023     rxcd->udp = isudp ? 1 : 0;
1024     rxcd->fcs = rxcd->tuc = rxcd->ipc = 1;
1025     return;
1026 
1027 nocsum:
1028     rxcd->cnc = 1;
1029     return;
1030 }
1031 
1032 static void
1033 vmxnet3_pci_dma_writev(PCIDevice *pci_dev,
1034                        const struct iovec *iov,
1035                        size_t start_iov_off,
1036                        hwaddr target_addr,
1037                        size_t bytes_to_copy)
1038 {
1039     size_t curr_off = 0;
1040     size_t copied = 0;
1041 
1042     while (bytes_to_copy) {
1043         if (start_iov_off < (curr_off + iov->iov_len)) {
1044             size_t chunk_len =
1045                 MIN((curr_off + iov->iov_len) - start_iov_off, bytes_to_copy);
1046 
1047             pci_dma_write(pci_dev, target_addr + copied,
1048                           iov->iov_base + start_iov_off - curr_off,
1049                           chunk_len);
1050 
1051             copied += chunk_len;
1052             start_iov_off += chunk_len;
1053             curr_off = start_iov_off;
1054             bytes_to_copy -= chunk_len;
1055         } else {
1056             curr_off += iov->iov_len;
1057         }
1058         iov++;
1059     }
1060 }
1061 
1062 static bool
1063 vmxnet3_indicate_packet(VMXNET3State *s)
1064 {
1065     struct Vmxnet3_RxDesc rxd;
1066     PCIDevice *d = PCI_DEVICE(s);
1067     bool is_head = true;
1068     uint32_t rxd_idx;
1069     uint32_t rx_ridx = 0;
1070 
1071     struct Vmxnet3_RxCompDesc rxcd;
1072     uint32_t new_rxcd_gen = VMXNET3_INIT_GEN;
1073     hwaddr new_rxcd_pa = 0;
1074     hwaddr ready_rxcd_pa = 0;
1075     struct iovec *data = net_rx_pkt_get_iovec(s->rx_pkt);
1076     size_t bytes_copied = 0;
1077     size_t bytes_left = net_rx_pkt_get_total_len(s->rx_pkt);
1078     uint16_t num_frags = 0;
1079     size_t chunk_size;
1080 
1081     net_rx_pkt_dump(s->rx_pkt);
1082 
1083     while (bytes_left > 0) {
1084 
1085         /* cannot add more frags to packet */
1086         if (num_frags == s->max_rx_frags) {
1087             break;
1088         }
1089 
1090         new_rxcd_pa = vmxnet3_pop_rxc_descr(s, RXQ_IDX, &new_rxcd_gen);
1091         if (!new_rxcd_pa) {
1092             break;
1093         }
1094 
1095         if (!vmxnet3_get_next_rx_descr(s, is_head, &rxd, &rxd_idx, &rx_ridx)) {
1096             break;
1097         }
1098 
1099         chunk_size = MIN(bytes_left, rxd.len);
1100         vmxnet3_pci_dma_writev(d, data, bytes_copied,
1101                                le64_to_cpu(rxd.addr), chunk_size);
1102         bytes_copied += chunk_size;
1103         bytes_left -= chunk_size;
1104 
1105         vmxnet3_dump_rx_descr(&rxd);
1106 
1107         if (ready_rxcd_pa != 0) {
1108             pci_dma_write(d, ready_rxcd_pa, &rxcd, sizeof(rxcd));
1109         }
1110 
1111         memset(&rxcd, 0, sizeof(struct Vmxnet3_RxCompDesc));
1112         rxcd.rxdIdx = rxd_idx;
1113         rxcd.len = chunk_size;
1114         rxcd.sop = is_head;
1115         rxcd.gen = new_rxcd_gen;
1116         rxcd.rqID = RXQ_IDX + rx_ridx * s->rxq_num;
1117 
1118         if (bytes_left == 0) {
1119             vmxnet3_rx_update_descr(s->rx_pkt, &rxcd);
1120         }
1121 
1122         VMW_RIPRN("RX Completion descriptor: rxRing: %lu rxIdx %lu len %lu "
1123                   "sop %d csum_correct %lu",
1124                   (unsigned long) rx_ridx,
1125                   (unsigned long) rxcd.rxdIdx,
1126                   (unsigned long) rxcd.len,
1127                   (int) rxcd.sop,
1128                   (unsigned long) rxcd.tuc);
1129 
1130         is_head = false;
1131         ready_rxcd_pa = new_rxcd_pa;
1132         new_rxcd_pa = 0;
1133         num_frags++;
1134     }
1135 
1136     if (ready_rxcd_pa != 0) {
1137         rxcd.eop = 1;
1138         rxcd.err = (bytes_left != 0);
1139 
1140         pci_dma_write(d, ready_rxcd_pa, &rxcd, sizeof(rxcd));
1141 
1142         /* Flush RX descriptor changes */
1143         smp_wmb();
1144     }
1145 
1146     if (new_rxcd_pa != 0) {
1147         vmxnet3_revert_rxc_descr(s, RXQ_IDX);
1148     }
1149 
1150     vmxnet3_trigger_interrupt(s, s->rxq_descr[RXQ_IDX].intr_idx);
1151 
1152     if (bytes_left == 0) {
1153         vmxnet3_on_rx_done_update_stats(s, RXQ_IDX, VMXNET3_PKT_STATUS_OK);
1154         return true;
1155     } else if (num_frags == s->max_rx_frags) {
1156         vmxnet3_on_rx_done_update_stats(s, RXQ_IDX, VMXNET3_PKT_STATUS_ERROR);
1157         return false;
1158     } else {
1159         vmxnet3_on_rx_done_update_stats(s, RXQ_IDX,
1160                                         VMXNET3_PKT_STATUS_OUT_OF_BUF);
1161         return false;
1162     }
1163 }
1164 
1165 static void
1166 vmxnet3_io_bar0_write(void *opaque, hwaddr addr,
1167                       uint64_t val, unsigned size)
1168 {
1169     VMXNET3State *s = opaque;
1170 
1171     if (!s->device_active) {
1172         return;
1173     }
1174 
1175     if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_TXPROD,
1176                         VMXNET3_DEVICE_MAX_TX_QUEUES, VMXNET3_REG_ALIGN)) {
1177         int tx_queue_idx =
1178             VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_TXPROD,
1179                                      VMXNET3_REG_ALIGN);
1180         assert(tx_queue_idx <= s->txq_num);
1181         vmxnet3_process_tx_queue(s, tx_queue_idx);
1182         return;
1183     }
1184 
1185     if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_IMR,
1186                         VMXNET3_MAX_INTRS, VMXNET3_REG_ALIGN)) {
1187         int l = VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_IMR,
1188                                          VMXNET3_REG_ALIGN);
1189 
1190         VMW_CBPRN("Interrupt mask for line %d written: 0x%" PRIx64, l, val);
1191 
1192         vmxnet3_on_interrupt_mask_changed(s, l, val);
1193         return;
1194     }
1195 
1196     if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_RXPROD,
1197                         VMXNET3_DEVICE_MAX_RX_QUEUES, VMXNET3_REG_ALIGN) ||
1198        VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_RXPROD2,
1199                         VMXNET3_DEVICE_MAX_RX_QUEUES, VMXNET3_REG_ALIGN)) {
1200         return;
1201     }
1202 
1203     VMW_WRPRN("BAR0 unknown write [%" PRIx64 "] = %" PRIx64 ", size %d",
1204               (uint64_t) addr, val, size);
1205 }
1206 
1207 static uint64_t
1208 vmxnet3_io_bar0_read(void *opaque, hwaddr addr, unsigned size)
1209 {
1210     VMXNET3State *s = opaque;
1211 
1212     if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_IMR,
1213                         VMXNET3_MAX_INTRS, VMXNET3_REG_ALIGN)) {
1214         int l = VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_IMR,
1215                                          VMXNET3_REG_ALIGN);
1216         return s->interrupt_states[l].is_masked;
1217     }
1218 
1219     VMW_CBPRN("BAR0 unknown read [%" PRIx64 "], size %d", addr, size);
1220     return 0;
1221 }
1222 
1223 static void vmxnet3_reset_interrupt_states(VMXNET3State *s)
1224 {
1225     int i;
1226     for (i = 0; i < ARRAY_SIZE(s->interrupt_states); i++) {
1227         s->interrupt_states[i].is_asserted = false;
1228         s->interrupt_states[i].is_pending = false;
1229         s->interrupt_states[i].is_masked = true;
1230     }
1231 }
1232 
1233 static void vmxnet3_reset_mac(VMXNET3State *s)
1234 {
1235     memcpy(&s->conf.macaddr.a, &s->perm_mac.a, sizeof(s->perm_mac.a));
1236     VMW_CFPRN("MAC address set to: " MAC_FMT, MAC_ARG(s->conf.macaddr.a));
1237 }
1238 
1239 static void vmxnet3_deactivate_device(VMXNET3State *s)
1240 {
1241     if (s->device_active) {
1242         VMW_CBPRN("Deactivating vmxnet3...");
1243         net_tx_pkt_reset(s->tx_pkt);
1244         net_tx_pkt_uninit(s->tx_pkt);
1245         net_rx_pkt_uninit(s->rx_pkt);
1246         s->device_active = false;
1247     }
1248 }
1249 
1250 static void vmxnet3_reset(VMXNET3State *s)
1251 {
1252     VMW_CBPRN("Resetting vmxnet3...");
1253 
1254     vmxnet3_deactivate_device(s);
1255     vmxnet3_reset_interrupt_states(s);
1256     s->drv_shmem = 0;
1257     s->tx_sop = true;
1258     s->skip_current_tx_pkt = false;
1259 }
1260 
1261 static void vmxnet3_update_rx_mode(VMXNET3State *s)
1262 {
1263     PCIDevice *d = PCI_DEVICE(s);
1264 
1265     s->rx_mode = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem,
1266                                            devRead.rxFilterConf.rxMode);
1267     VMW_CFPRN("RX mode: 0x%08X", s->rx_mode);
1268 }
1269 
1270 static void vmxnet3_update_vlan_filters(VMXNET3State *s)
1271 {
1272     int i;
1273     PCIDevice *d = PCI_DEVICE(s);
1274 
1275     /* Copy configuration from shared memory */
1276     VMXNET3_READ_DRV_SHARED(d, s->drv_shmem,
1277                             devRead.rxFilterConf.vfTable,
1278                             s->vlan_table,
1279                             sizeof(s->vlan_table));
1280 
1281     /* Invert byte order when needed */
1282     for (i = 0; i < ARRAY_SIZE(s->vlan_table); i++) {
1283         s->vlan_table[i] = le32_to_cpu(s->vlan_table[i]);
1284     }
1285 
1286     /* Dump configuration for debugging purposes */
1287     VMW_CFPRN("Configured VLANs:");
1288     for (i = 0; i < sizeof(s->vlan_table) * 8; i++) {
1289         if (VMXNET3_VFTABLE_ENTRY_IS_SET(s->vlan_table, i)) {
1290             VMW_CFPRN("\tVLAN %d is present", i);
1291         }
1292     }
1293 }
1294 
1295 static void vmxnet3_update_mcast_filters(VMXNET3State *s)
1296 {
1297     PCIDevice *d = PCI_DEVICE(s);
1298 
1299     uint16_t list_bytes =
1300         VMXNET3_READ_DRV_SHARED16(d, s->drv_shmem,
1301                                   devRead.rxFilterConf.mfTableLen);
1302 
1303     s->mcast_list_len = list_bytes / sizeof(s->mcast_list[0]);
1304 
1305     s->mcast_list = g_realloc(s->mcast_list, list_bytes);
1306     if (!s->mcast_list) {
1307         if (s->mcast_list_len == 0) {
1308             VMW_CFPRN("Current multicast list is empty");
1309         } else {
1310             VMW_ERPRN("Failed to allocate multicast list of %d elements",
1311                       s->mcast_list_len);
1312         }
1313         s->mcast_list_len = 0;
1314     } else {
1315         int i;
1316         hwaddr mcast_list_pa =
1317             VMXNET3_READ_DRV_SHARED64(d, s->drv_shmem,
1318                                       devRead.rxFilterConf.mfTablePA);
1319 
1320         pci_dma_read(d, mcast_list_pa, s->mcast_list, list_bytes);
1321 
1322         VMW_CFPRN("Current multicast list len is %d:", s->mcast_list_len);
1323         for (i = 0; i < s->mcast_list_len; i++) {
1324             VMW_CFPRN("\t" MAC_FMT, MAC_ARG(s->mcast_list[i].a));
1325         }
1326     }
1327 }
1328 
1329 static void vmxnet3_setup_rx_filtering(VMXNET3State *s)
1330 {
1331     vmxnet3_update_rx_mode(s);
1332     vmxnet3_update_vlan_filters(s);
1333     vmxnet3_update_mcast_filters(s);
1334 }
1335 
1336 static uint32_t vmxnet3_get_interrupt_config(VMXNET3State *s)
1337 {
1338     uint32_t interrupt_mode = VMXNET3_IT_AUTO | (VMXNET3_IMM_AUTO << 2);
1339     VMW_CFPRN("Interrupt config is 0x%X", interrupt_mode);
1340     return interrupt_mode;
1341 }
1342 
1343 static void vmxnet3_fill_stats(VMXNET3State *s)
1344 {
1345     int i;
1346     PCIDevice *d = PCI_DEVICE(s);
1347 
1348     if (!s->device_active)
1349         return;
1350 
1351     for (i = 0; i < s->txq_num; i++) {
1352         pci_dma_write(d,
1353                       s->txq_descr[i].tx_stats_pa,
1354                       &s->txq_descr[i].txq_stats,
1355                       sizeof(s->txq_descr[i].txq_stats));
1356     }
1357 
1358     for (i = 0; i < s->rxq_num; i++) {
1359         pci_dma_write(d,
1360                       s->rxq_descr[i].rx_stats_pa,
1361                       &s->rxq_descr[i].rxq_stats,
1362                       sizeof(s->rxq_descr[i].rxq_stats));
1363     }
1364 }
1365 
1366 static void vmxnet3_adjust_by_guest_type(VMXNET3State *s)
1367 {
1368     struct Vmxnet3_GOSInfo gos;
1369     PCIDevice *d = PCI_DEVICE(s);
1370 
1371     VMXNET3_READ_DRV_SHARED(d, s->drv_shmem, devRead.misc.driverInfo.gos,
1372                             &gos, sizeof(gos));
1373     s->rx_packets_compound =
1374         (gos.gosType == VMXNET3_GOS_TYPE_WIN) ? false : true;
1375 
1376     VMW_CFPRN("Guest type specifics: RXCOMPOUND: %d", s->rx_packets_compound);
1377 }
1378 
1379 static void
1380 vmxnet3_dump_conf_descr(const char *name,
1381                         struct Vmxnet3_VariableLenConfDesc *pm_descr)
1382 {
1383     VMW_CFPRN("%s descriptor dump: Version %u, Length %u",
1384               name, pm_descr->confVer, pm_descr->confLen);
1385 
1386 };
1387 
1388 static void vmxnet3_update_pm_state(VMXNET3State *s)
1389 {
1390     struct Vmxnet3_VariableLenConfDesc pm_descr;
1391     PCIDevice *d = PCI_DEVICE(s);
1392 
1393     pm_descr.confLen =
1394         VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.pmConfDesc.confLen);
1395     pm_descr.confVer =
1396         VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.pmConfDesc.confVer);
1397     pm_descr.confPA =
1398         VMXNET3_READ_DRV_SHARED64(d, s->drv_shmem, devRead.pmConfDesc.confPA);
1399 
1400     vmxnet3_dump_conf_descr("PM State", &pm_descr);
1401 }
1402 
1403 static void vmxnet3_update_features(VMXNET3State *s)
1404 {
1405     uint32_t guest_features;
1406     int rxcso_supported;
1407     PCIDevice *d = PCI_DEVICE(s);
1408 
1409     guest_features = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem,
1410                                                devRead.misc.uptFeatures);
1411 
1412     rxcso_supported = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_RXCSUM);
1413     s->rx_vlan_stripping = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_RXVLAN);
1414     s->lro_supported = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_LRO);
1415 
1416     VMW_CFPRN("Features configuration: LRO: %d, RXCSUM: %d, VLANSTRIP: %d",
1417               s->lro_supported, rxcso_supported,
1418               s->rx_vlan_stripping);
1419     if (s->peer_has_vhdr) {
1420         qemu_set_offload(qemu_get_queue(s->nic)->peer,
1421                          rxcso_supported,
1422                          s->lro_supported,
1423                          s->lro_supported,
1424                          0,
1425                          0);
1426     }
1427 }
1428 
1429 static bool vmxnet3_verify_intx(VMXNET3State *s, int intx)
1430 {
1431     return s->msix_used || msi_enabled(PCI_DEVICE(s))
1432         || intx == pci_get_byte(s->parent_obj.config + PCI_INTERRUPT_PIN) - 1;
1433 }
1434 
1435 static void vmxnet3_validate_interrupt_idx(bool is_msix, int idx)
1436 {
1437     int max_ints = is_msix ? VMXNET3_MAX_INTRS : VMXNET3_MAX_NMSIX_INTRS;
1438     if (idx >= max_ints) {
1439         hw_error("Bad interrupt index: %d\n", idx);
1440     }
1441 }
1442 
1443 static void vmxnet3_validate_interrupts(VMXNET3State *s)
1444 {
1445     int i;
1446 
1447     VMW_CFPRN("Verifying event interrupt index (%d)", s->event_int_idx);
1448     vmxnet3_validate_interrupt_idx(s->msix_used, s->event_int_idx);
1449 
1450     for (i = 0; i < s->txq_num; i++) {
1451         int idx = s->txq_descr[i].intr_idx;
1452         VMW_CFPRN("Verifying TX queue %d interrupt index (%d)", i, idx);
1453         vmxnet3_validate_interrupt_idx(s->msix_used, idx);
1454     }
1455 
1456     for (i = 0; i < s->rxq_num; i++) {
1457         int idx = s->rxq_descr[i].intr_idx;
1458         VMW_CFPRN("Verifying RX queue %d interrupt index (%d)", i, idx);
1459         vmxnet3_validate_interrupt_idx(s->msix_used, idx);
1460     }
1461 }
1462 
1463 static void vmxnet3_validate_queues(VMXNET3State *s)
1464 {
1465     /*
1466     * txq_num and rxq_num are total number of queues
1467     * configured by guest. These numbers must not
1468     * exceed corresponding maximal values.
1469     */
1470 
1471     if (s->txq_num > VMXNET3_DEVICE_MAX_TX_QUEUES) {
1472         hw_error("Bad TX queues number: %d\n", s->txq_num);
1473     }
1474 
1475     if (s->rxq_num > VMXNET3_DEVICE_MAX_RX_QUEUES) {
1476         hw_error("Bad RX queues number: %d\n", s->rxq_num);
1477     }
1478 }
1479 
1480 static void vmxnet3_activate_device(VMXNET3State *s)
1481 {
1482     int i;
1483     static const uint32_t VMXNET3_DEF_TX_THRESHOLD = 1;
1484     PCIDevice *d = PCI_DEVICE(s);
1485     hwaddr qdescr_table_pa;
1486     uint64_t pa;
1487     uint32_t size;
1488 
1489     /* Verify configuration consistency */
1490     if (!vmxnet3_verify_driver_magic(d, s->drv_shmem)) {
1491         VMW_ERPRN("Device configuration received from driver is invalid");
1492         return;
1493     }
1494 
1495     /* Verify if device is active */
1496     if (s->device_active) {
1497         VMW_CFPRN("Vmxnet3 device is active");
1498         return;
1499     }
1500 
1501     vmxnet3_adjust_by_guest_type(s);
1502     vmxnet3_update_features(s);
1503     vmxnet3_update_pm_state(s);
1504     vmxnet3_setup_rx_filtering(s);
1505     /* Cache fields from shared memory */
1506     s->mtu = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.misc.mtu);
1507     VMW_CFPRN("MTU is %u", s->mtu);
1508 
1509     s->max_rx_frags =
1510         VMXNET3_READ_DRV_SHARED16(d, s->drv_shmem, devRead.misc.maxNumRxSG);
1511 
1512     if (s->max_rx_frags == 0) {
1513         s->max_rx_frags = 1;
1514     }
1515 
1516     VMW_CFPRN("Max RX fragments is %u", s->max_rx_frags);
1517 
1518     s->event_int_idx =
1519         VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.intrConf.eventIntrIdx);
1520     assert(vmxnet3_verify_intx(s, s->event_int_idx));
1521     VMW_CFPRN("Events interrupt line is %u", s->event_int_idx);
1522 
1523     s->auto_int_masking =
1524         VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.intrConf.autoMask);
1525     VMW_CFPRN("Automatic interrupt masking is %d", (int)s->auto_int_masking);
1526 
1527     s->txq_num =
1528         VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.misc.numTxQueues);
1529     s->rxq_num =
1530         VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.misc.numRxQueues);
1531 
1532     VMW_CFPRN("Number of TX/RX queues %u/%u", s->txq_num, s->rxq_num);
1533     vmxnet3_validate_queues(s);
1534 
1535     qdescr_table_pa =
1536         VMXNET3_READ_DRV_SHARED64(d, s->drv_shmem, devRead.misc.queueDescPA);
1537     VMW_CFPRN("TX queues descriptors table is at 0x%" PRIx64, qdescr_table_pa);
1538 
1539     /*
1540      * Worst-case scenario is a packet that holds all TX rings space so
1541      * we calculate total size of all TX rings for max TX fragments number
1542      */
1543     s->max_tx_frags = 0;
1544 
1545     /* TX queues */
1546     for (i = 0; i < s->txq_num; i++) {
1547         hwaddr qdescr_pa =
1548             qdescr_table_pa + i * sizeof(struct Vmxnet3_TxQueueDesc);
1549 
1550         /* Read interrupt number for this TX queue */
1551         s->txq_descr[i].intr_idx =
1552             VMXNET3_READ_TX_QUEUE_DESCR8(d, qdescr_pa, conf.intrIdx);
1553         assert(vmxnet3_verify_intx(s, s->txq_descr[i].intr_idx));
1554 
1555         VMW_CFPRN("TX Queue %d interrupt: %d", i, s->txq_descr[i].intr_idx);
1556 
1557         /* Read rings memory locations for TX queues */
1558         pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.txRingBasePA);
1559         size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.txRingSize);
1560 
1561         vmxnet3_ring_init(d, &s->txq_descr[i].tx_ring, pa, size,
1562                           sizeof(struct Vmxnet3_TxDesc), false);
1563         VMXNET3_RING_DUMP(VMW_CFPRN, "TX", i, &s->txq_descr[i].tx_ring);
1564 
1565         s->max_tx_frags += size;
1566 
1567         /* TXC ring */
1568         pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.compRingBasePA);
1569         size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.compRingSize);
1570         vmxnet3_ring_init(d, &s->txq_descr[i].comp_ring, pa, size,
1571                           sizeof(struct Vmxnet3_TxCompDesc), true);
1572         VMXNET3_RING_DUMP(VMW_CFPRN, "TXC", i, &s->txq_descr[i].comp_ring);
1573 
1574         s->txq_descr[i].tx_stats_pa =
1575             qdescr_pa + offsetof(struct Vmxnet3_TxQueueDesc, stats);
1576 
1577         memset(&s->txq_descr[i].txq_stats, 0,
1578                sizeof(s->txq_descr[i].txq_stats));
1579 
1580         /* Fill device-managed parameters for queues */
1581         VMXNET3_WRITE_TX_QUEUE_DESCR32(d, qdescr_pa,
1582                                        ctrl.txThreshold,
1583                                        VMXNET3_DEF_TX_THRESHOLD);
1584     }
1585 
1586     /* Preallocate TX packet wrapper */
1587     VMW_CFPRN("Max TX fragments is %u", s->max_tx_frags);
1588     net_tx_pkt_init(&s->tx_pkt, PCI_DEVICE(s),
1589                     s->max_tx_frags, s->peer_has_vhdr);
1590     net_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr);
1591 
1592     /* Read rings memory locations for RX queues */
1593     for (i = 0; i < s->rxq_num; i++) {
1594         int j;
1595         hwaddr qd_pa =
1596             qdescr_table_pa + s->txq_num * sizeof(struct Vmxnet3_TxQueueDesc) +
1597             i * sizeof(struct Vmxnet3_RxQueueDesc);
1598 
1599         /* Read interrupt number for this RX queue */
1600         s->rxq_descr[i].intr_idx =
1601             VMXNET3_READ_TX_QUEUE_DESCR8(d, qd_pa, conf.intrIdx);
1602         assert(vmxnet3_verify_intx(s, s->rxq_descr[i].intr_idx));
1603 
1604         VMW_CFPRN("RX Queue %d interrupt: %d", i, s->rxq_descr[i].intr_idx);
1605 
1606         /* Read rings memory locations */
1607         for (j = 0; j < VMXNET3_RX_RINGS_PER_QUEUE; j++) {
1608             /* RX rings */
1609             pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.rxRingBasePA[j]);
1610             size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.rxRingSize[j]);
1611             vmxnet3_ring_init(d, &s->rxq_descr[i].rx_ring[j], pa, size,
1612                               sizeof(struct Vmxnet3_RxDesc), false);
1613             VMW_CFPRN("RX queue %d:%d: Base: %" PRIx64 ", Size: %d",
1614                       i, j, pa, size);
1615         }
1616 
1617         /* RXC ring */
1618         pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.compRingBasePA);
1619         size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.compRingSize);
1620         vmxnet3_ring_init(d, &s->rxq_descr[i].comp_ring, pa, size,
1621                           sizeof(struct Vmxnet3_RxCompDesc), true);
1622         VMW_CFPRN("RXC queue %d: Base: %" PRIx64 ", Size: %d", i, pa, size);
1623 
1624         s->rxq_descr[i].rx_stats_pa =
1625             qd_pa + offsetof(struct Vmxnet3_RxQueueDesc, stats);
1626         memset(&s->rxq_descr[i].rxq_stats, 0,
1627                sizeof(s->rxq_descr[i].rxq_stats));
1628     }
1629 
1630     vmxnet3_validate_interrupts(s);
1631 
1632     /* Make sure everything is in place before device activation */
1633     smp_wmb();
1634 
1635     vmxnet3_reset_mac(s);
1636 
1637     s->device_active = true;
1638 }
1639 
1640 static void vmxnet3_handle_command(VMXNET3State *s, uint64_t cmd)
1641 {
1642     s->last_command = cmd;
1643 
1644     switch (cmd) {
1645     case VMXNET3_CMD_GET_PERM_MAC_HI:
1646         VMW_CBPRN("Set: Get upper part of permanent MAC");
1647         break;
1648 
1649     case VMXNET3_CMD_GET_PERM_MAC_LO:
1650         VMW_CBPRN("Set: Get lower part of permanent MAC");
1651         break;
1652 
1653     case VMXNET3_CMD_GET_STATS:
1654         VMW_CBPRN("Set: Get device statistics");
1655         vmxnet3_fill_stats(s);
1656         break;
1657 
1658     case VMXNET3_CMD_ACTIVATE_DEV:
1659         VMW_CBPRN("Set: Activating vmxnet3 device");
1660         vmxnet3_activate_device(s);
1661         break;
1662 
1663     case VMXNET3_CMD_UPDATE_RX_MODE:
1664         VMW_CBPRN("Set: Update rx mode");
1665         vmxnet3_update_rx_mode(s);
1666         break;
1667 
1668     case VMXNET3_CMD_UPDATE_VLAN_FILTERS:
1669         VMW_CBPRN("Set: Update VLAN filters");
1670         vmxnet3_update_vlan_filters(s);
1671         break;
1672 
1673     case VMXNET3_CMD_UPDATE_MAC_FILTERS:
1674         VMW_CBPRN("Set: Update MAC filters");
1675         vmxnet3_update_mcast_filters(s);
1676         break;
1677 
1678     case VMXNET3_CMD_UPDATE_FEATURE:
1679         VMW_CBPRN("Set: Update features");
1680         vmxnet3_update_features(s);
1681         break;
1682 
1683     case VMXNET3_CMD_UPDATE_PMCFG:
1684         VMW_CBPRN("Set: Update power management config");
1685         vmxnet3_update_pm_state(s);
1686         break;
1687 
1688     case VMXNET3_CMD_GET_LINK:
1689         VMW_CBPRN("Set: Get link");
1690         break;
1691 
1692     case VMXNET3_CMD_RESET_DEV:
1693         VMW_CBPRN("Set: Reset device");
1694         vmxnet3_reset(s);
1695         break;
1696 
1697     case VMXNET3_CMD_QUIESCE_DEV:
1698         VMW_CBPRN("Set: VMXNET3_CMD_QUIESCE_DEV - deactivate the device");
1699         vmxnet3_deactivate_device(s);
1700         break;
1701 
1702     case VMXNET3_CMD_GET_CONF_INTR:
1703         VMW_CBPRN("Set: VMXNET3_CMD_GET_CONF_INTR - interrupt configuration");
1704         break;
1705 
1706     case VMXNET3_CMD_GET_ADAPTIVE_RING_INFO:
1707         VMW_CBPRN("Set: VMXNET3_CMD_GET_ADAPTIVE_RING_INFO - "
1708                   "adaptive ring info flags");
1709         break;
1710 
1711     case VMXNET3_CMD_GET_DID_LO:
1712         VMW_CBPRN("Set: Get lower part of device ID");
1713         break;
1714 
1715     case VMXNET3_CMD_GET_DID_HI:
1716         VMW_CBPRN("Set: Get upper part of device ID");
1717         break;
1718 
1719     case VMXNET3_CMD_GET_DEV_EXTRA_INFO:
1720         VMW_CBPRN("Set: Get device extra info");
1721         break;
1722 
1723     default:
1724         VMW_CBPRN("Received unknown command: %" PRIx64, cmd);
1725         break;
1726     }
1727 }
1728 
1729 static uint64_t vmxnet3_get_command_status(VMXNET3State *s)
1730 {
1731     uint64_t ret;
1732 
1733     switch (s->last_command) {
1734     case VMXNET3_CMD_ACTIVATE_DEV:
1735         ret = (s->device_active) ? 0 : 1;
1736         VMW_CFPRN("Device active: %" PRIx64, ret);
1737         break;
1738 
1739     case VMXNET3_CMD_RESET_DEV:
1740     case VMXNET3_CMD_QUIESCE_DEV:
1741     case VMXNET3_CMD_GET_QUEUE_STATUS:
1742     case VMXNET3_CMD_GET_DEV_EXTRA_INFO:
1743         ret = 0;
1744         break;
1745 
1746     case VMXNET3_CMD_GET_LINK:
1747         ret = s->link_status_and_speed;
1748         VMW_CFPRN("Link and speed: %" PRIx64, ret);
1749         break;
1750 
1751     case VMXNET3_CMD_GET_PERM_MAC_LO:
1752         ret = vmxnet3_get_mac_low(&s->perm_mac);
1753         break;
1754 
1755     case VMXNET3_CMD_GET_PERM_MAC_HI:
1756         ret = vmxnet3_get_mac_high(&s->perm_mac);
1757         break;
1758 
1759     case VMXNET3_CMD_GET_CONF_INTR:
1760         ret = vmxnet3_get_interrupt_config(s);
1761         break;
1762 
1763     case VMXNET3_CMD_GET_ADAPTIVE_RING_INFO:
1764         ret = VMXNET3_DISABLE_ADAPTIVE_RING;
1765         break;
1766 
1767     case VMXNET3_CMD_GET_DID_LO:
1768         ret = PCI_DEVICE_ID_VMWARE_VMXNET3;
1769         break;
1770 
1771     case VMXNET3_CMD_GET_DID_HI:
1772         ret = VMXNET3_DEVICE_REVISION;
1773         break;
1774 
1775     default:
1776         VMW_WRPRN("Received request for unknown command: %x", s->last_command);
1777         ret = 0;
1778         break;
1779     }
1780 
1781     return ret;
1782 }
1783 
1784 static void vmxnet3_set_events(VMXNET3State *s, uint32_t val)
1785 {
1786     uint32_t events;
1787     PCIDevice *d = PCI_DEVICE(s);
1788 
1789     VMW_CBPRN("Setting events: 0x%x", val);
1790     events = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, ecr) | val;
1791     VMXNET3_WRITE_DRV_SHARED32(d, s->drv_shmem, ecr, events);
1792 }
1793 
1794 static void vmxnet3_ack_events(VMXNET3State *s, uint32_t val)
1795 {
1796     PCIDevice *d = PCI_DEVICE(s);
1797     uint32_t events;
1798 
1799     VMW_CBPRN("Clearing events: 0x%x", val);
1800     events = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, ecr) & ~val;
1801     VMXNET3_WRITE_DRV_SHARED32(d, s->drv_shmem, ecr, events);
1802 }
1803 
1804 static void
1805 vmxnet3_io_bar1_write(void *opaque,
1806                       hwaddr addr,
1807                       uint64_t val,
1808                       unsigned size)
1809 {
1810     VMXNET3State *s = opaque;
1811 
1812     switch (addr) {
1813     /* Vmxnet3 Revision Report Selection */
1814     case VMXNET3_REG_VRRS:
1815         VMW_CBPRN("Write BAR1 [VMXNET3_REG_VRRS] = %" PRIx64 ", size %d",
1816                   val, size);
1817         break;
1818 
1819     /* UPT Version Report Selection */
1820     case VMXNET3_REG_UVRS:
1821         VMW_CBPRN("Write BAR1 [VMXNET3_REG_UVRS] = %" PRIx64 ", size %d",
1822                   val, size);
1823         break;
1824 
1825     /* Driver Shared Address Low */
1826     case VMXNET3_REG_DSAL:
1827         VMW_CBPRN("Write BAR1 [VMXNET3_REG_DSAL] = %" PRIx64 ", size %d",
1828                   val, size);
1829         /*
1830          * Guest driver will first write the low part of the shared
1831          * memory address. We save it to temp variable and set the
1832          * shared address only after we get the high part
1833          */
1834         if (val == 0) {
1835             vmxnet3_deactivate_device(s);
1836         }
1837         s->temp_shared_guest_driver_memory = val;
1838         s->drv_shmem = 0;
1839         break;
1840 
1841     /* Driver Shared Address High */
1842     case VMXNET3_REG_DSAH:
1843         VMW_CBPRN("Write BAR1 [VMXNET3_REG_DSAH] = %" PRIx64 ", size %d",
1844                   val, size);
1845         /*
1846          * Set the shared memory between guest driver and device.
1847          * We already should have low address part.
1848          */
1849         s->drv_shmem = s->temp_shared_guest_driver_memory | (val << 32);
1850         break;
1851 
1852     /* Command */
1853     case VMXNET3_REG_CMD:
1854         VMW_CBPRN("Write BAR1 [VMXNET3_REG_CMD] = %" PRIx64 ", size %d",
1855                   val, size);
1856         vmxnet3_handle_command(s, val);
1857         break;
1858 
1859     /* MAC Address Low */
1860     case VMXNET3_REG_MACL:
1861         VMW_CBPRN("Write BAR1 [VMXNET3_REG_MACL] = %" PRIx64 ", size %d",
1862                   val, size);
1863         s->temp_mac = val;
1864         break;
1865 
1866     /* MAC Address High */
1867     case VMXNET3_REG_MACH:
1868         VMW_CBPRN("Write BAR1 [VMXNET3_REG_MACH] = %" PRIx64 ", size %d",
1869                   val, size);
1870         vmxnet3_set_variable_mac(s, val, s->temp_mac);
1871         break;
1872 
1873     /* Interrupt Cause Register */
1874     case VMXNET3_REG_ICR:
1875         VMW_CBPRN("Write BAR1 [VMXNET3_REG_ICR] = %" PRIx64 ", size %d",
1876                   val, size);
1877         g_assert_not_reached();
1878         break;
1879 
1880     /* Event Cause Register */
1881     case VMXNET3_REG_ECR:
1882         VMW_CBPRN("Write BAR1 [VMXNET3_REG_ECR] = %" PRIx64 ", size %d",
1883                   val, size);
1884         vmxnet3_ack_events(s, val);
1885         break;
1886 
1887     default:
1888         VMW_CBPRN("Unknown Write to BAR1 [%" PRIx64 "] = %" PRIx64 ", size %d",
1889                   addr, val, size);
1890         break;
1891     }
1892 }
1893 
1894 static uint64_t
1895 vmxnet3_io_bar1_read(void *opaque, hwaddr addr, unsigned size)
1896 {
1897         VMXNET3State *s = opaque;
1898         uint64_t ret = 0;
1899 
1900         switch (addr) {
1901         /* Vmxnet3 Revision Report Selection */
1902         case VMXNET3_REG_VRRS:
1903             VMW_CBPRN("Read BAR1 [VMXNET3_REG_VRRS], size %d", size);
1904             ret = VMXNET3_DEVICE_REVISION;
1905             break;
1906 
1907         /* UPT Version Report Selection */
1908         case VMXNET3_REG_UVRS:
1909             VMW_CBPRN("Read BAR1 [VMXNET3_REG_UVRS], size %d", size);
1910             ret = VMXNET3_UPT_REVISION;
1911             break;
1912 
1913         /* Command */
1914         case VMXNET3_REG_CMD:
1915             VMW_CBPRN("Read BAR1 [VMXNET3_REG_CMD], size %d", size);
1916             ret = vmxnet3_get_command_status(s);
1917             break;
1918 
1919         /* MAC Address Low */
1920         case VMXNET3_REG_MACL:
1921             VMW_CBPRN("Read BAR1 [VMXNET3_REG_MACL], size %d", size);
1922             ret = vmxnet3_get_mac_low(&s->conf.macaddr);
1923             break;
1924 
1925         /* MAC Address High */
1926         case VMXNET3_REG_MACH:
1927             VMW_CBPRN("Read BAR1 [VMXNET3_REG_MACH], size %d", size);
1928             ret = vmxnet3_get_mac_high(&s->conf.macaddr);
1929             break;
1930 
1931         /*
1932          * Interrupt Cause Register
1933          * Used for legacy interrupts only so interrupt index always 0
1934          */
1935         case VMXNET3_REG_ICR:
1936             VMW_CBPRN("Read BAR1 [VMXNET3_REG_ICR], size %d", size);
1937             if (vmxnet3_interrupt_asserted(s, 0)) {
1938                 vmxnet3_clear_interrupt(s, 0);
1939                 ret = true;
1940             } else {
1941                 ret = false;
1942             }
1943             break;
1944 
1945         default:
1946             VMW_CBPRN("Unknow read BAR1[%" PRIx64 "], %d bytes", addr, size);
1947             break;
1948         }
1949 
1950         return ret;
1951 }
1952 
1953 static int
1954 vmxnet3_can_receive(NetClientState *nc)
1955 {
1956     VMXNET3State *s = qemu_get_nic_opaque(nc);
1957     return s->device_active &&
1958            VMXNET_FLAG_IS_SET(s->link_status_and_speed, VMXNET3_LINK_STATUS_UP);
1959 }
1960 
1961 static inline bool
1962 vmxnet3_is_registered_vlan(VMXNET3State *s, const void *data)
1963 {
1964     uint16_t vlan_tag = eth_get_pkt_tci(data) & VLAN_VID_MASK;
1965     if (IS_SPECIAL_VLAN_ID(vlan_tag)) {
1966         return true;
1967     }
1968 
1969     return VMXNET3_VFTABLE_ENTRY_IS_SET(s->vlan_table, vlan_tag);
1970 }
1971 
1972 static bool
1973 vmxnet3_is_allowed_mcast_group(VMXNET3State *s, const uint8_t *group_mac)
1974 {
1975     int i;
1976     for (i = 0; i < s->mcast_list_len; i++) {
1977         if (!memcmp(group_mac, s->mcast_list[i].a, sizeof(s->mcast_list[i]))) {
1978             return true;
1979         }
1980     }
1981     return false;
1982 }
1983 
1984 static bool
1985 vmxnet3_rx_filter_may_indicate(VMXNET3State *s, const void *data,
1986     size_t size)
1987 {
1988     struct eth_header *ehdr = PKT_GET_ETH_HDR(data);
1989 
1990     if (VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_PROMISC)) {
1991         return true;
1992     }
1993 
1994     if (!vmxnet3_is_registered_vlan(s, data)) {
1995         return false;
1996     }
1997 
1998     switch (net_rx_pkt_get_packet_type(s->rx_pkt)) {
1999     case ETH_PKT_UCAST:
2000         if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_UCAST)) {
2001             return false;
2002         }
2003         if (memcmp(s->conf.macaddr.a, ehdr->h_dest, ETH_ALEN)) {
2004             return false;
2005         }
2006         break;
2007 
2008     case ETH_PKT_BCAST:
2009         if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_BCAST)) {
2010             return false;
2011         }
2012         break;
2013 
2014     case ETH_PKT_MCAST:
2015         if (VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_ALL_MULTI)) {
2016             return true;
2017         }
2018         if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_MCAST)) {
2019             return false;
2020         }
2021         if (!vmxnet3_is_allowed_mcast_group(s, ehdr->h_dest)) {
2022             return false;
2023         }
2024         break;
2025 
2026     default:
2027         g_assert_not_reached();
2028     }
2029 
2030     return true;
2031 }
2032 
2033 static ssize_t
2034 vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size)
2035 {
2036     VMXNET3State *s = qemu_get_nic_opaque(nc);
2037     size_t bytes_indicated;
2038     uint8_t min_buf[MIN_BUF_SIZE];
2039 
2040     if (!vmxnet3_can_receive(nc)) {
2041         VMW_PKPRN("Cannot receive now");
2042         return -1;
2043     }
2044 
2045     if (s->peer_has_vhdr) {
2046         net_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf);
2047         buf += sizeof(struct virtio_net_hdr);
2048         size -= sizeof(struct virtio_net_hdr);
2049     }
2050 
2051     /* Pad to minimum Ethernet frame length */
2052     if (size < sizeof(min_buf)) {
2053         memcpy(min_buf, buf, size);
2054         memset(&min_buf[size], 0, sizeof(min_buf) - size);
2055         buf = min_buf;
2056         size = sizeof(min_buf);
2057     }
2058 
2059     net_rx_pkt_set_packet_type(s->rx_pkt,
2060         get_eth_packet_type(PKT_GET_ETH_HDR(buf)));
2061 
2062     if (vmxnet3_rx_filter_may_indicate(s, buf, size)) {
2063         net_rx_pkt_set_protocols(s->rx_pkt, buf, size);
2064         vmxnet3_rx_need_csum_calculate(s->rx_pkt, buf, size);
2065         net_rx_pkt_attach_data(s->rx_pkt, buf, size, s->rx_vlan_stripping);
2066         bytes_indicated = vmxnet3_indicate_packet(s) ? size : -1;
2067         if (bytes_indicated < size) {
2068             VMW_PKPRN("RX: %zu of %zu bytes indicated", bytes_indicated, size);
2069         }
2070     } else {
2071         VMW_PKPRN("Packet dropped by RX filter");
2072         bytes_indicated = size;
2073     }
2074 
2075     assert(size > 0);
2076     assert(bytes_indicated != 0);
2077     return bytes_indicated;
2078 }
2079 
2080 static void vmxnet3_set_link_status(NetClientState *nc)
2081 {
2082     VMXNET3State *s = qemu_get_nic_opaque(nc);
2083 
2084     if (nc->link_down) {
2085         s->link_status_and_speed &= ~VMXNET3_LINK_STATUS_UP;
2086     } else {
2087         s->link_status_and_speed |= VMXNET3_LINK_STATUS_UP;
2088     }
2089 
2090     vmxnet3_set_events(s, VMXNET3_ECR_LINK);
2091     vmxnet3_trigger_interrupt(s, s->event_int_idx);
2092 }
2093 
2094 static NetClientInfo net_vmxnet3_info = {
2095         .type = NET_CLIENT_DRIVER_NIC,
2096         .size = sizeof(NICState),
2097         .receive = vmxnet3_receive,
2098         .link_status_changed = vmxnet3_set_link_status,
2099 };
2100 
2101 static bool vmxnet3_peer_has_vnet_hdr(VMXNET3State *s)
2102 {
2103     NetClientState *nc = qemu_get_queue(s->nic);
2104 
2105     if (qemu_has_vnet_hdr(nc->peer)) {
2106         return true;
2107     }
2108 
2109     return false;
2110 }
2111 
2112 static void vmxnet3_net_uninit(VMXNET3State *s)
2113 {
2114     g_free(s->mcast_list);
2115     vmxnet3_deactivate_device(s);
2116     qemu_del_nic(s->nic);
2117 }
2118 
2119 static void vmxnet3_net_init(VMXNET3State *s)
2120 {
2121     DeviceState *d = DEVICE(s);
2122 
2123     VMW_CBPRN("vmxnet3_net_init called...");
2124 
2125     qemu_macaddr_default_if_unset(&s->conf.macaddr);
2126 
2127     /* Windows guest will query the address that was set on init */
2128     memcpy(&s->perm_mac.a, &s->conf.macaddr.a, sizeof(s->perm_mac.a));
2129 
2130     s->mcast_list = NULL;
2131     s->mcast_list_len = 0;
2132 
2133     s->link_status_and_speed = VMXNET3_LINK_SPEED | VMXNET3_LINK_STATUS_UP;
2134 
2135     VMW_CFPRN("Permanent MAC: " MAC_FMT, MAC_ARG(s->perm_mac.a));
2136 
2137     s->nic = qemu_new_nic(&net_vmxnet3_info, &s->conf,
2138                           object_get_typename(OBJECT(s)),
2139                           d->id, s);
2140 
2141     s->peer_has_vhdr = vmxnet3_peer_has_vnet_hdr(s);
2142     s->tx_sop = true;
2143     s->skip_current_tx_pkt = false;
2144     s->tx_pkt = NULL;
2145     s->rx_pkt = NULL;
2146     s->rx_vlan_stripping = false;
2147     s->lro_supported = false;
2148 
2149     if (s->peer_has_vhdr) {
2150         qemu_set_vnet_hdr_len(qemu_get_queue(s->nic)->peer,
2151             sizeof(struct virtio_net_hdr));
2152 
2153         qemu_using_vnet_hdr(qemu_get_queue(s->nic)->peer, 1);
2154     }
2155 
2156     qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
2157 }
2158 
2159 static void
2160 vmxnet3_unuse_msix_vectors(VMXNET3State *s, int num_vectors)
2161 {
2162     PCIDevice *d = PCI_DEVICE(s);
2163     int i;
2164     for (i = 0; i < num_vectors; i++) {
2165         msix_vector_unuse(d, i);
2166     }
2167 }
2168 
2169 static bool
2170 vmxnet3_use_msix_vectors(VMXNET3State *s, int num_vectors)
2171 {
2172     PCIDevice *d = PCI_DEVICE(s);
2173     int i;
2174     for (i = 0; i < num_vectors; i++) {
2175         int res = msix_vector_use(d, i);
2176         if (0 > res) {
2177             VMW_WRPRN("Failed to use MSI-X vector %d, error %d", i, res);
2178             vmxnet3_unuse_msix_vectors(s, i);
2179             return false;
2180         }
2181     }
2182     return true;
2183 }
2184 
2185 static bool
2186 vmxnet3_init_msix(VMXNET3State *s)
2187 {
2188     PCIDevice *d = PCI_DEVICE(s);
2189     int res = msix_init(d, VMXNET3_MAX_INTRS,
2190                         &s->msix_bar,
2191                         VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_TABLE,
2192                         &s->msix_bar,
2193                         VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_PBA(s),
2194                         VMXNET3_MSIX_OFFSET(s));
2195 
2196     if (0 > res) {
2197         VMW_WRPRN("Failed to initialize MSI-X, error %d", res);
2198         s->msix_used = false;
2199     } else {
2200         if (!vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS)) {
2201             VMW_WRPRN("Failed to use MSI-X vectors, error %d", res);
2202             msix_uninit(d, &s->msix_bar, &s->msix_bar);
2203             s->msix_used = false;
2204         } else {
2205             s->msix_used = true;
2206         }
2207     }
2208     return s->msix_used;
2209 }
2210 
2211 static void
2212 vmxnet3_cleanup_msix(VMXNET3State *s)
2213 {
2214     PCIDevice *d = PCI_DEVICE(s);
2215 
2216     if (s->msix_used) {
2217         vmxnet3_unuse_msix_vectors(s, VMXNET3_MAX_INTRS);
2218         msix_uninit(d, &s->msix_bar, &s->msix_bar);
2219     }
2220 }
2221 
2222 static void
2223 vmxnet3_cleanup_msi(VMXNET3State *s)
2224 {
2225     PCIDevice *d = PCI_DEVICE(s);
2226 
2227     msi_uninit(d);
2228 }
2229 
2230 static void
2231 vmxnet3_msix_save(QEMUFile *f, void *opaque)
2232 {
2233     PCIDevice *d = PCI_DEVICE(opaque);
2234     msix_save(d, f);
2235 }
2236 
2237 static int
2238 vmxnet3_msix_load(QEMUFile *f, void *opaque, int version_id)
2239 {
2240     PCIDevice *d = PCI_DEVICE(opaque);
2241     msix_load(d, f);
2242     return 0;
2243 }
2244 
2245 static const MemoryRegionOps b0_ops = {
2246     .read = vmxnet3_io_bar0_read,
2247     .write = vmxnet3_io_bar0_write,
2248     .endianness = DEVICE_LITTLE_ENDIAN,
2249     .impl = {
2250             .min_access_size = 4,
2251             .max_access_size = 4,
2252     },
2253 };
2254 
2255 static const MemoryRegionOps b1_ops = {
2256     .read = vmxnet3_io_bar1_read,
2257     .write = vmxnet3_io_bar1_write,
2258     .endianness = DEVICE_LITTLE_ENDIAN,
2259     .impl = {
2260             .min_access_size = 4,
2261             .max_access_size = 4,
2262     },
2263 };
2264 
2265 static uint64_t vmxnet3_device_serial_num(VMXNET3State *s)
2266 {
2267     uint64_t dsn_payload;
2268     uint8_t *dsnp = (uint8_t *)&dsn_payload;
2269 
2270     dsnp[0] = 0xfe;
2271     dsnp[1] = s->conf.macaddr.a[3];
2272     dsnp[2] = s->conf.macaddr.a[4];
2273     dsnp[3] = s->conf.macaddr.a[5];
2274     dsnp[4] = s->conf.macaddr.a[0];
2275     dsnp[5] = s->conf.macaddr.a[1];
2276     dsnp[6] = s->conf.macaddr.a[2];
2277     dsnp[7] = 0xff;
2278     return dsn_payload;
2279 }
2280 
2281 
2282 #define VMXNET3_USE_64BIT         (true)
2283 #define VMXNET3_PER_VECTOR_MASK   (false)
2284 
2285 static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp)
2286 {
2287     DeviceState *dev = DEVICE(pci_dev);
2288     VMXNET3State *s = VMXNET3(pci_dev);
2289     int ret;
2290 
2291     VMW_CBPRN("Starting init...");
2292 
2293     memory_region_init_io(&s->bar0, OBJECT(s), &b0_ops, s,
2294                           "vmxnet3-b0", VMXNET3_PT_REG_SIZE);
2295     pci_register_bar(pci_dev, VMXNET3_BAR0_IDX,
2296                      PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar0);
2297 
2298     memory_region_init_io(&s->bar1, OBJECT(s), &b1_ops, s,
2299                           "vmxnet3-b1", VMXNET3_VD_REG_SIZE);
2300     pci_register_bar(pci_dev, VMXNET3_BAR1_IDX,
2301                      PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar1);
2302 
2303     memory_region_init(&s->msix_bar, OBJECT(s), "vmxnet3-msix-bar",
2304                        VMXNET3_MSIX_BAR_SIZE);
2305     pci_register_bar(pci_dev, VMXNET3_MSIX_BAR_IDX,
2306                      PCI_BASE_ADDRESS_SPACE_MEMORY, &s->msix_bar);
2307 
2308     vmxnet3_reset_interrupt_states(s);
2309 
2310     /* Interrupt pin A */
2311     pci_dev->config[PCI_INTERRUPT_PIN] = 0x01;
2312 
2313     ret = msi_init(pci_dev, VMXNET3_MSI_OFFSET(s), VMXNET3_MAX_NMSIX_INTRS,
2314                    VMXNET3_USE_64BIT, VMXNET3_PER_VECTOR_MASK, NULL);
2315     /* Any error other than -ENOTSUP(board's MSI support is broken)
2316      * is a programming error. Fall back to INTx silently on -ENOTSUP */
2317     assert(!ret || ret == -ENOTSUP);
2318 
2319     if (!vmxnet3_init_msix(s)) {
2320         VMW_WRPRN("Failed to initialize MSI-X, configuration is inconsistent.");
2321     }
2322 
2323     vmxnet3_net_init(s);
2324 
2325     if (pci_is_express(pci_dev)) {
2326         if (pci_bus_is_express(pci_dev->bus)) {
2327             pcie_endpoint_cap_init(pci_dev, VMXNET3_EXP_EP_OFFSET);
2328         }
2329 
2330         pcie_dev_ser_num_init(pci_dev, VMXNET3_DSN_OFFSET,
2331                               vmxnet3_device_serial_num(s));
2332     }
2333 
2334     register_savevm(dev, "vmxnet3-msix", -1, 1,
2335                     vmxnet3_msix_save, vmxnet3_msix_load, s);
2336 }
2337 
2338 static void vmxnet3_instance_init(Object *obj)
2339 {
2340     VMXNET3State *s = VMXNET3(obj);
2341     device_add_bootindex_property(obj, &s->conf.bootindex,
2342                                   "bootindex", "/ethernet-phy@0",
2343                                   DEVICE(obj), NULL);
2344 }
2345 
2346 static void vmxnet3_pci_uninit(PCIDevice *pci_dev)
2347 {
2348     DeviceState *dev = DEVICE(pci_dev);
2349     VMXNET3State *s = VMXNET3(pci_dev);
2350 
2351     VMW_CBPRN("Starting uninit...");
2352 
2353     unregister_savevm(dev, "vmxnet3-msix", s);
2354 
2355     vmxnet3_net_uninit(s);
2356 
2357     vmxnet3_cleanup_msix(s);
2358 
2359     vmxnet3_cleanup_msi(s);
2360 }
2361 
2362 static void vmxnet3_qdev_reset(DeviceState *dev)
2363 {
2364     PCIDevice *d = PCI_DEVICE(dev);
2365     VMXNET3State *s = VMXNET3(d);
2366 
2367     VMW_CBPRN("Starting QDEV reset...");
2368     vmxnet3_reset(s);
2369 }
2370 
2371 static bool vmxnet3_mc_list_needed(void *opaque)
2372 {
2373     return true;
2374 }
2375 
2376 static int vmxnet3_mcast_list_pre_load(void *opaque)
2377 {
2378     VMXNET3State *s = opaque;
2379 
2380     s->mcast_list = g_malloc(s->mcast_list_buff_size);
2381 
2382     return 0;
2383 }
2384 
2385 
2386 static void vmxnet3_pre_save(void *opaque)
2387 {
2388     VMXNET3State *s = opaque;
2389 
2390     s->mcast_list_buff_size = s->mcast_list_len * sizeof(MACAddr);
2391 }
2392 
2393 static const VMStateDescription vmxstate_vmxnet3_mcast_list = {
2394     .name = "vmxnet3/mcast_list",
2395     .version_id = 1,
2396     .minimum_version_id = 1,
2397     .pre_load = vmxnet3_mcast_list_pre_load,
2398     .needed = vmxnet3_mc_list_needed,
2399     .fields = (VMStateField[]) {
2400         VMSTATE_VBUFFER_UINT32(mcast_list, VMXNET3State, 0, NULL, 0,
2401             mcast_list_buff_size),
2402         VMSTATE_END_OF_LIST()
2403     }
2404 };
2405 
2406 static void vmxnet3_get_ring_from_file(QEMUFile *f, Vmxnet3Ring *r)
2407 {
2408     r->pa = qemu_get_be64(f);
2409     r->size = qemu_get_be32(f);
2410     r->cell_size = qemu_get_be32(f);
2411     r->next = qemu_get_be32(f);
2412     r->gen = qemu_get_byte(f);
2413 }
2414 
2415 static void vmxnet3_put_ring_to_file(QEMUFile *f, Vmxnet3Ring *r)
2416 {
2417     qemu_put_be64(f, r->pa);
2418     qemu_put_be32(f, r->size);
2419     qemu_put_be32(f, r->cell_size);
2420     qemu_put_be32(f, r->next);
2421     qemu_put_byte(f, r->gen);
2422 }
2423 
2424 static void vmxnet3_get_tx_stats_from_file(QEMUFile *f,
2425     struct UPT1_TxStats *tx_stat)
2426 {
2427     tx_stat->TSOPktsTxOK = qemu_get_be64(f);
2428     tx_stat->TSOBytesTxOK = qemu_get_be64(f);
2429     tx_stat->ucastPktsTxOK = qemu_get_be64(f);
2430     tx_stat->ucastBytesTxOK = qemu_get_be64(f);
2431     tx_stat->mcastPktsTxOK = qemu_get_be64(f);
2432     tx_stat->mcastBytesTxOK = qemu_get_be64(f);
2433     tx_stat->bcastPktsTxOK = qemu_get_be64(f);
2434     tx_stat->bcastBytesTxOK = qemu_get_be64(f);
2435     tx_stat->pktsTxError = qemu_get_be64(f);
2436     tx_stat->pktsTxDiscard = qemu_get_be64(f);
2437 }
2438 
2439 static void vmxnet3_put_tx_stats_to_file(QEMUFile *f,
2440     struct UPT1_TxStats *tx_stat)
2441 {
2442     qemu_put_be64(f, tx_stat->TSOPktsTxOK);
2443     qemu_put_be64(f, tx_stat->TSOBytesTxOK);
2444     qemu_put_be64(f, tx_stat->ucastPktsTxOK);
2445     qemu_put_be64(f, tx_stat->ucastBytesTxOK);
2446     qemu_put_be64(f, tx_stat->mcastPktsTxOK);
2447     qemu_put_be64(f, tx_stat->mcastBytesTxOK);
2448     qemu_put_be64(f, tx_stat->bcastPktsTxOK);
2449     qemu_put_be64(f, tx_stat->bcastBytesTxOK);
2450     qemu_put_be64(f, tx_stat->pktsTxError);
2451     qemu_put_be64(f, tx_stat->pktsTxDiscard);
2452 }
2453 
2454 static int vmxnet3_get_txq_descr(QEMUFile *f, void *pv, size_t size)
2455 {
2456     Vmxnet3TxqDescr *r = pv;
2457 
2458     vmxnet3_get_ring_from_file(f, &r->tx_ring);
2459     vmxnet3_get_ring_from_file(f, &r->comp_ring);
2460     r->intr_idx = qemu_get_byte(f);
2461     r->tx_stats_pa = qemu_get_be64(f);
2462 
2463     vmxnet3_get_tx_stats_from_file(f, &r->txq_stats);
2464 
2465     return 0;
2466 }
2467 
2468 static void vmxnet3_put_txq_descr(QEMUFile *f, void *pv, size_t size)
2469 {
2470     Vmxnet3TxqDescr *r = pv;
2471 
2472     vmxnet3_put_ring_to_file(f, &r->tx_ring);
2473     vmxnet3_put_ring_to_file(f, &r->comp_ring);
2474     qemu_put_byte(f, r->intr_idx);
2475     qemu_put_be64(f, r->tx_stats_pa);
2476     vmxnet3_put_tx_stats_to_file(f, &r->txq_stats);
2477 }
2478 
2479 static const VMStateInfo txq_descr_info = {
2480     .name = "txq_descr",
2481     .get = vmxnet3_get_txq_descr,
2482     .put = vmxnet3_put_txq_descr
2483 };
2484 
2485 static void vmxnet3_get_rx_stats_from_file(QEMUFile *f,
2486     struct UPT1_RxStats *rx_stat)
2487 {
2488     rx_stat->LROPktsRxOK = qemu_get_be64(f);
2489     rx_stat->LROBytesRxOK = qemu_get_be64(f);
2490     rx_stat->ucastPktsRxOK = qemu_get_be64(f);
2491     rx_stat->ucastBytesRxOK = qemu_get_be64(f);
2492     rx_stat->mcastPktsRxOK = qemu_get_be64(f);
2493     rx_stat->mcastBytesRxOK = qemu_get_be64(f);
2494     rx_stat->bcastPktsRxOK = qemu_get_be64(f);
2495     rx_stat->bcastBytesRxOK = qemu_get_be64(f);
2496     rx_stat->pktsRxOutOfBuf = qemu_get_be64(f);
2497     rx_stat->pktsRxError = qemu_get_be64(f);
2498 }
2499 
2500 static void vmxnet3_put_rx_stats_to_file(QEMUFile *f,
2501     struct UPT1_RxStats *rx_stat)
2502 {
2503     qemu_put_be64(f, rx_stat->LROPktsRxOK);
2504     qemu_put_be64(f, rx_stat->LROBytesRxOK);
2505     qemu_put_be64(f, rx_stat->ucastPktsRxOK);
2506     qemu_put_be64(f, rx_stat->ucastBytesRxOK);
2507     qemu_put_be64(f, rx_stat->mcastPktsRxOK);
2508     qemu_put_be64(f, rx_stat->mcastBytesRxOK);
2509     qemu_put_be64(f, rx_stat->bcastPktsRxOK);
2510     qemu_put_be64(f, rx_stat->bcastBytesRxOK);
2511     qemu_put_be64(f, rx_stat->pktsRxOutOfBuf);
2512     qemu_put_be64(f, rx_stat->pktsRxError);
2513 }
2514 
2515 static int vmxnet3_get_rxq_descr(QEMUFile *f, void *pv, size_t size)
2516 {
2517     Vmxnet3RxqDescr *r = pv;
2518     int i;
2519 
2520     for (i = 0; i < VMXNET3_RX_RINGS_PER_QUEUE; i++) {
2521         vmxnet3_get_ring_from_file(f, &r->rx_ring[i]);
2522     }
2523 
2524     vmxnet3_get_ring_from_file(f, &r->comp_ring);
2525     r->intr_idx = qemu_get_byte(f);
2526     r->rx_stats_pa = qemu_get_be64(f);
2527 
2528     vmxnet3_get_rx_stats_from_file(f, &r->rxq_stats);
2529 
2530     return 0;
2531 }
2532 
2533 static void vmxnet3_put_rxq_descr(QEMUFile *f, void *pv, size_t size)
2534 {
2535     Vmxnet3RxqDescr *r = pv;
2536     int i;
2537 
2538     for (i = 0; i < VMXNET3_RX_RINGS_PER_QUEUE; i++) {
2539         vmxnet3_put_ring_to_file(f, &r->rx_ring[i]);
2540     }
2541 
2542     vmxnet3_put_ring_to_file(f, &r->comp_ring);
2543     qemu_put_byte(f, r->intr_idx);
2544     qemu_put_be64(f, r->rx_stats_pa);
2545     vmxnet3_put_rx_stats_to_file(f, &r->rxq_stats);
2546 }
2547 
2548 static int vmxnet3_post_load(void *opaque, int version_id)
2549 {
2550     VMXNET3State *s = opaque;
2551     PCIDevice *d = PCI_DEVICE(s);
2552 
2553     net_tx_pkt_init(&s->tx_pkt, PCI_DEVICE(s),
2554                     s->max_tx_frags, s->peer_has_vhdr);
2555     net_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr);
2556 
2557     if (s->msix_used) {
2558         if  (!vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS)) {
2559             VMW_WRPRN("Failed to re-use MSI-X vectors");
2560             msix_uninit(d, &s->msix_bar, &s->msix_bar);
2561             s->msix_used = false;
2562             return -1;
2563         }
2564     }
2565 
2566     vmxnet3_validate_queues(s);
2567     vmxnet3_validate_interrupts(s);
2568 
2569     return 0;
2570 }
2571 
2572 static const VMStateInfo rxq_descr_info = {
2573     .name = "rxq_descr",
2574     .get = vmxnet3_get_rxq_descr,
2575     .put = vmxnet3_put_rxq_descr
2576 };
2577 
2578 static int vmxnet3_get_int_state(QEMUFile *f, void *pv, size_t size)
2579 {
2580     Vmxnet3IntState *r = pv;
2581 
2582     r->is_masked = qemu_get_byte(f);
2583     r->is_pending = qemu_get_byte(f);
2584     r->is_asserted = qemu_get_byte(f);
2585 
2586     return 0;
2587 }
2588 
2589 static void vmxnet3_put_int_state(QEMUFile *f, void *pv, size_t size)
2590 {
2591     Vmxnet3IntState *r = pv;
2592 
2593     qemu_put_byte(f, r->is_masked);
2594     qemu_put_byte(f, r->is_pending);
2595     qemu_put_byte(f, r->is_asserted);
2596 }
2597 
2598 static const VMStateInfo int_state_info = {
2599     .name = "int_state",
2600     .get = vmxnet3_get_int_state,
2601     .put = vmxnet3_put_int_state
2602 };
2603 
2604 static bool vmxnet3_vmstate_need_pcie_device(void *opaque)
2605 {
2606     VMXNET3State *s = VMXNET3(opaque);
2607 
2608     return !(s->compat_flags & VMXNET3_COMPAT_FLAG_DISABLE_PCIE);
2609 }
2610 
2611 static bool vmxnet3_vmstate_test_pci_device(void *opaque, int version_id)
2612 {
2613     return !vmxnet3_vmstate_need_pcie_device(opaque);
2614 }
2615 
2616 static const VMStateDescription vmstate_vmxnet3_pcie_device = {
2617     .name = "vmxnet3/pcie",
2618     .version_id = 1,
2619     .minimum_version_id = 1,
2620     .needed = vmxnet3_vmstate_need_pcie_device,
2621     .fields = (VMStateField[]) {
2622         VMSTATE_PCIE_DEVICE(parent_obj, VMXNET3State),
2623         VMSTATE_END_OF_LIST()
2624     }
2625 };
2626 
2627 static const VMStateDescription vmstate_vmxnet3 = {
2628     .name = "vmxnet3",
2629     .version_id = 1,
2630     .minimum_version_id = 1,
2631     .pre_save = vmxnet3_pre_save,
2632     .post_load = vmxnet3_post_load,
2633     .fields = (VMStateField[]) {
2634             VMSTATE_STRUCT_TEST(parent_obj, VMXNET3State,
2635                                 vmxnet3_vmstate_test_pci_device, 0,
2636                                 vmstate_pci_device, PCIDevice),
2637             VMSTATE_BOOL(rx_packets_compound, VMXNET3State),
2638             VMSTATE_BOOL(rx_vlan_stripping, VMXNET3State),
2639             VMSTATE_BOOL(lro_supported, VMXNET3State),
2640             VMSTATE_UINT32(rx_mode, VMXNET3State),
2641             VMSTATE_UINT32(mcast_list_len, VMXNET3State),
2642             VMSTATE_UINT32(mcast_list_buff_size, VMXNET3State),
2643             VMSTATE_UINT32_ARRAY(vlan_table, VMXNET3State, VMXNET3_VFT_SIZE),
2644             VMSTATE_UINT32(mtu, VMXNET3State),
2645             VMSTATE_UINT16(max_rx_frags, VMXNET3State),
2646             VMSTATE_UINT32(max_tx_frags, VMXNET3State),
2647             VMSTATE_UINT8(event_int_idx, VMXNET3State),
2648             VMSTATE_BOOL(auto_int_masking, VMXNET3State),
2649             VMSTATE_UINT8(txq_num, VMXNET3State),
2650             VMSTATE_UINT8(rxq_num, VMXNET3State),
2651             VMSTATE_UINT32(device_active, VMXNET3State),
2652             VMSTATE_UINT32(last_command, VMXNET3State),
2653             VMSTATE_UINT32(link_status_and_speed, VMXNET3State),
2654             VMSTATE_UINT32(temp_mac, VMXNET3State),
2655             VMSTATE_UINT64(drv_shmem, VMXNET3State),
2656             VMSTATE_UINT64(temp_shared_guest_driver_memory, VMXNET3State),
2657 
2658             VMSTATE_ARRAY(txq_descr, VMXNET3State,
2659                 VMXNET3_DEVICE_MAX_TX_QUEUES, 0, txq_descr_info,
2660                 Vmxnet3TxqDescr),
2661             VMSTATE_ARRAY(rxq_descr, VMXNET3State,
2662                 VMXNET3_DEVICE_MAX_RX_QUEUES, 0, rxq_descr_info,
2663                 Vmxnet3RxqDescr),
2664             VMSTATE_ARRAY(interrupt_states, VMXNET3State, VMXNET3_MAX_INTRS,
2665                 0, int_state_info, Vmxnet3IntState),
2666 
2667             VMSTATE_END_OF_LIST()
2668     },
2669     .subsections = (const VMStateDescription*[]) {
2670         &vmxstate_vmxnet3_mcast_list,
2671         &vmstate_vmxnet3_pcie_device,
2672         NULL
2673     }
2674 };
2675 
2676 static Property vmxnet3_properties[] = {
2677     DEFINE_NIC_PROPERTIES(VMXNET3State, conf),
2678     DEFINE_PROP_BIT("x-old-msi-offsets", VMXNET3State, compat_flags,
2679                     VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT, false),
2680     DEFINE_PROP_BIT("x-disable-pcie", VMXNET3State, compat_flags,
2681                     VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT, false),
2682     DEFINE_PROP_END_OF_LIST(),
2683 };
2684 
2685 static void vmxnet3_realize(DeviceState *qdev, Error **errp)
2686 {
2687     VMXNET3Class *vc = VMXNET3_DEVICE_GET_CLASS(qdev);
2688     PCIDevice *pci_dev = PCI_DEVICE(qdev);
2689     VMXNET3State *s = VMXNET3(qdev);
2690 
2691     if (!(s->compat_flags & VMXNET3_COMPAT_FLAG_DISABLE_PCIE)) {
2692         pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
2693     }
2694 
2695     vc->parent_dc_realize(qdev, errp);
2696 }
2697 
2698 static void vmxnet3_class_init(ObjectClass *class, void *data)
2699 {
2700     DeviceClass *dc = DEVICE_CLASS(class);
2701     PCIDeviceClass *c = PCI_DEVICE_CLASS(class);
2702     VMXNET3Class *vc = VMXNET3_DEVICE_CLASS(class);
2703 
2704     c->realize = vmxnet3_pci_realize;
2705     c->exit = vmxnet3_pci_uninit;
2706     c->vendor_id = PCI_VENDOR_ID_VMWARE;
2707     c->device_id = PCI_DEVICE_ID_VMWARE_VMXNET3;
2708     c->revision = PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION;
2709     c->romfile = "efi-vmxnet3.rom";
2710     c->class_id = PCI_CLASS_NETWORK_ETHERNET;
2711     c->subsystem_vendor_id = PCI_VENDOR_ID_VMWARE;
2712     c->subsystem_id = PCI_DEVICE_ID_VMWARE_VMXNET3;
2713     vc->parent_dc_realize = dc->realize;
2714     dc->realize = vmxnet3_realize;
2715     dc->desc = "VMWare Paravirtualized Ethernet v3";
2716     dc->reset = vmxnet3_qdev_reset;
2717     dc->vmsd = &vmstate_vmxnet3;
2718     dc->props = vmxnet3_properties;
2719     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
2720 }
2721 
2722 static const TypeInfo vmxnet3_info = {
2723     .name          = TYPE_VMXNET3,
2724     .parent        = TYPE_PCI_DEVICE,
2725     .class_size    = sizeof(VMXNET3Class),
2726     .instance_size = sizeof(VMXNET3State),
2727     .class_init    = vmxnet3_class_init,
2728     .instance_init = vmxnet3_instance_init,
2729 };
2730 
2731 static void vmxnet3_register_types(void)
2732 {
2733     VMW_CBPRN("vmxnet3_register_types called...");
2734     type_register_static(&vmxnet3_info);
2735 }
2736 
2737 type_init(vmxnet3_register_types)
2738