xref: /openbmc/qemu/hw/net/vmxnet3.c (revision a4d50b1d)
1 /*
2  * QEMU VMWARE VMXNET3 paravirtual NIC
3  *
4  * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
5  *
6  * Developed by Daynix Computing LTD (http://www.daynix.com)
7  *
8  * Authors:
9  * Dmitry Fleytman <dmitry@daynix.com>
10  * Tamir Shomer <tamirs@daynix.com>
11  * Yan Vugenfirer <yan@daynix.com>
12  *
13  * This work is licensed under the terms of the GNU GPL, version 2.
14  * See the COPYING file in the top-level directory.
15  *
16  */
17 
18 #include "qemu/osdep.h"
19 #include "hw/hw.h"
20 #include "hw/pci/pci.h"
21 #include "net/net.h"
22 #include "net/tap.h"
23 #include "net/checksum.h"
24 #include "sysemu/sysemu.h"
25 #include "qemu-common.h"
26 #include "qemu/bswap.h"
27 #include "hw/pci/msix.h"
28 #include "hw/pci/msi.h"
29 
30 #include "vmxnet3.h"
31 #include "vmxnet_debug.h"
32 #include "vmware_utils.h"
33 #include "net_tx_pkt.h"
34 #include "net_rx_pkt.h"
35 
36 #define PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION 0x1
37 #define VMXNET3_MSIX_BAR_SIZE 0x2000
38 #define MIN_BUF_SIZE 60
39 
40 /* Compatibility flags for migration */
41 #define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT 0
42 #define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS \
43     (1 << VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT)
44 #define VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT 1
45 #define VMXNET3_COMPAT_FLAG_DISABLE_PCIE \
46     (1 << VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT)
47 
48 #define VMXNET3_EXP_EP_OFFSET (0x48)
49 #define VMXNET3_MSI_OFFSET(s) \
50     ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x50 : 0x84)
51 #define VMXNET3_MSIX_OFFSET(s) \
52     ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0 : 0x9c)
53 #define VMXNET3_DSN_OFFSET     (0x100)
54 
55 #define VMXNET3_BAR0_IDX      (0)
56 #define VMXNET3_BAR1_IDX      (1)
57 #define VMXNET3_MSIX_BAR_IDX  (2)
58 
59 #define VMXNET3_OFF_MSIX_TABLE (0x000)
60 #define VMXNET3_OFF_MSIX_PBA(s) \
61     ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x800 : 0x1000)
62 
63 /* Link speed in Mbps should be shifted by 16 */
64 #define VMXNET3_LINK_SPEED      (1000 << 16)
65 
66 /* Link status: 1 - up, 0 - down. */
67 #define VMXNET3_LINK_STATUS_UP  0x1
68 
69 /* Least significant bit should be set for revision and version */
70 #define VMXNET3_UPT_REVISION      0x1
71 #define VMXNET3_DEVICE_REVISION   0x1
72 
73 /* Number of interrupt vectors for non-MSIx modes */
74 #define VMXNET3_MAX_NMSIX_INTRS   (1)
75 
76 /* Macros for rings descriptors access */
77 #define VMXNET3_READ_TX_QUEUE_DESCR8(_d, dpa, field) \
78     (vmw_shmem_ld8(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
79 
80 #define VMXNET3_WRITE_TX_QUEUE_DESCR8(_d, dpa, field, value) \
81     (vmw_shmem_st8(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field, value)))
82 
83 #define VMXNET3_READ_TX_QUEUE_DESCR32(_d, dpa, field) \
84     (vmw_shmem_ld32(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
85 
86 #define VMXNET3_WRITE_TX_QUEUE_DESCR32(_d, dpa, field, value) \
87     (vmw_shmem_st32(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field), value))
88 
89 #define VMXNET3_READ_TX_QUEUE_DESCR64(_d, dpa, field) \
90     (vmw_shmem_ld64(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
91 
92 #define VMXNET3_WRITE_TX_QUEUE_DESCR64(_d, dpa, field, value) \
93     (vmw_shmem_st64(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field), value))
94 
95 #define VMXNET3_READ_RX_QUEUE_DESCR64(_d, dpa, field) \
96     (vmw_shmem_ld64(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field)))
97 
98 #define VMXNET3_READ_RX_QUEUE_DESCR32(_d, dpa, field) \
99     (vmw_shmem_ld32(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field)))
100 
101 #define VMXNET3_WRITE_RX_QUEUE_DESCR64(_d, dpa, field, value) \
102     (vmw_shmem_st64(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field), value))
103 
104 #define VMXNET3_WRITE_RX_QUEUE_DESCR8(_d, dpa, field, value) \
105     (vmw_shmem_st8(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field), value))
106 
107 /* Macros for guest driver shared area access */
108 #define VMXNET3_READ_DRV_SHARED64(_d, shpa, field) \
109     (vmw_shmem_ld64(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
110 
111 #define VMXNET3_READ_DRV_SHARED32(_d, shpa, field) \
112     (vmw_shmem_ld32(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
113 
114 #define VMXNET3_WRITE_DRV_SHARED32(_d, shpa, field, val) \
115     (vmw_shmem_st32(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field), val))
116 
117 #define VMXNET3_READ_DRV_SHARED16(_d, shpa, field) \
118     (vmw_shmem_ld16(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
119 
120 #define VMXNET3_READ_DRV_SHARED8(_d, shpa, field) \
121     (vmw_shmem_ld8(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
122 
123 #define VMXNET3_READ_DRV_SHARED(_d, shpa, field, b, l) \
124     (vmw_shmem_read(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field), b, l))
125 
126 #define VMXNET_FLAG_IS_SET(field, flag) (((field) & (flag)) == (flag))
127 
128 typedef struct VMXNET3Class {
129     PCIDeviceClass parent_class;
130     DeviceRealize parent_dc_realize;
131 } VMXNET3Class;
132 
133 #define TYPE_VMXNET3 "vmxnet3"
134 #define VMXNET3(obj) OBJECT_CHECK(VMXNET3State, (obj), TYPE_VMXNET3)
135 
136 #define VMXNET3_DEVICE_CLASS(klass) \
137     OBJECT_CLASS_CHECK(VMXNET3Class, (klass), TYPE_VMXNET3)
138 #define VMXNET3_DEVICE_GET_CLASS(obj) \
139     OBJECT_GET_CLASS(VMXNET3Class, (obj), TYPE_VMXNET3)
140 
141 /* Cyclic ring abstraction */
142 typedef struct {
143     hwaddr pa;
144     size_t size;
145     size_t cell_size;
146     size_t next;
147     uint8_t gen;
148 } Vmxnet3Ring;
149 
150 static inline void vmxnet3_ring_init(PCIDevice *d,
151 				     Vmxnet3Ring *ring,
152                                      hwaddr pa,
153                                      size_t size,
154                                      size_t cell_size,
155                                      bool zero_region)
156 {
157     ring->pa = pa;
158     ring->size = size;
159     ring->cell_size = cell_size;
160     ring->gen = VMXNET3_INIT_GEN;
161     ring->next = 0;
162 
163     if (zero_region) {
164         vmw_shmem_set(d, pa, 0, size * cell_size);
165     }
166 }
167 
168 #define VMXNET3_RING_DUMP(macro, ring_name, ridx, r)                         \
169     macro("%s#%d: base %" PRIx64 " size %zu cell_size %zu gen %d next %zu",  \
170           (ring_name), (ridx),                                               \
171           (r)->pa, (r)->size, (r)->cell_size, (r)->gen, (r)->next)
172 
173 static inline void vmxnet3_ring_inc(Vmxnet3Ring *ring)
174 {
175     if (++ring->next >= ring->size) {
176         ring->next = 0;
177         ring->gen ^= 1;
178     }
179 }
180 
181 static inline void vmxnet3_ring_dec(Vmxnet3Ring *ring)
182 {
183     if (ring->next-- == 0) {
184         ring->next = ring->size - 1;
185         ring->gen ^= 1;
186     }
187 }
188 
189 static inline hwaddr vmxnet3_ring_curr_cell_pa(Vmxnet3Ring *ring)
190 {
191     return ring->pa + ring->next * ring->cell_size;
192 }
193 
194 static inline void vmxnet3_ring_read_curr_cell(PCIDevice *d, Vmxnet3Ring *ring,
195 					       void *buff)
196 {
197     vmw_shmem_read(d, vmxnet3_ring_curr_cell_pa(ring), buff, ring->cell_size);
198 }
199 
200 static inline void vmxnet3_ring_write_curr_cell(PCIDevice *d, Vmxnet3Ring *ring,
201 						void *buff)
202 {
203     vmw_shmem_write(d, vmxnet3_ring_curr_cell_pa(ring), buff, ring->cell_size);
204 }
205 
206 static inline size_t vmxnet3_ring_curr_cell_idx(Vmxnet3Ring *ring)
207 {
208     return ring->next;
209 }
210 
211 static inline uint8_t vmxnet3_ring_curr_gen(Vmxnet3Ring *ring)
212 {
213     return ring->gen;
214 }
215 
216 /* Debug trace-related functions */
217 static inline void
218 vmxnet3_dump_tx_descr(struct Vmxnet3_TxDesc *descr)
219 {
220     VMW_PKPRN("TX DESCR: "
221               "addr %" PRIx64 ", len: %d, gen: %d, rsvd: %d, "
222               "dtype: %d, ext1: %d, msscof: %d, hlen: %d, om: %d, "
223               "eop: %d, cq: %d, ext2: %d, ti: %d, tci: %d",
224               le64_to_cpu(descr->addr), descr->len, descr->gen, descr->rsvd,
225               descr->dtype, descr->ext1, descr->msscof, descr->hlen, descr->om,
226               descr->eop, descr->cq, descr->ext2, descr->ti, descr->tci);
227 }
228 
229 static inline void
230 vmxnet3_dump_virt_hdr(struct virtio_net_hdr *vhdr)
231 {
232     VMW_PKPRN("VHDR: flags 0x%x, gso_type: 0x%x, hdr_len: %d, gso_size: %d, "
233               "csum_start: %d, csum_offset: %d",
234               vhdr->flags, vhdr->gso_type, vhdr->hdr_len, vhdr->gso_size,
235               vhdr->csum_start, vhdr->csum_offset);
236 }
237 
238 static inline void
239 vmxnet3_dump_rx_descr(struct Vmxnet3_RxDesc *descr)
240 {
241     VMW_PKPRN("RX DESCR: addr %" PRIx64 ", len: %d, gen: %d, rsvd: %d, "
242               "dtype: %d, ext1: %d, btype: %d",
243               le64_to_cpu(descr->addr), descr->len, descr->gen,
244               descr->rsvd, descr->dtype, descr->ext1, descr->btype);
245 }
246 
247 /* Device state and helper functions */
248 #define VMXNET3_RX_RINGS_PER_QUEUE (2)
249 
250 typedef struct {
251     Vmxnet3Ring tx_ring;
252     Vmxnet3Ring comp_ring;
253 
254     uint8_t intr_idx;
255     hwaddr tx_stats_pa;
256     struct UPT1_TxStats txq_stats;
257 } Vmxnet3TxqDescr;
258 
259 typedef struct {
260     Vmxnet3Ring rx_ring[VMXNET3_RX_RINGS_PER_QUEUE];
261     Vmxnet3Ring comp_ring;
262     uint8_t intr_idx;
263     hwaddr rx_stats_pa;
264     struct UPT1_RxStats rxq_stats;
265 } Vmxnet3RxqDescr;
266 
267 typedef struct {
268     bool is_masked;
269     bool is_pending;
270     bool is_asserted;
271 } Vmxnet3IntState;
272 
273 typedef struct {
274         PCIDevice parent_obj;
275         NICState *nic;
276         NICConf conf;
277         MemoryRegion bar0;
278         MemoryRegion bar1;
279         MemoryRegion msix_bar;
280 
281         Vmxnet3RxqDescr rxq_descr[VMXNET3_DEVICE_MAX_RX_QUEUES];
282         Vmxnet3TxqDescr txq_descr[VMXNET3_DEVICE_MAX_TX_QUEUES];
283 
284         /* Whether MSI-X support was installed successfully */
285         bool msix_used;
286         hwaddr drv_shmem;
287         hwaddr temp_shared_guest_driver_memory;
288 
289         uint8_t txq_num;
290 
291         /* This boolean tells whether RX packet being indicated has to */
292         /* be split into head and body chunks from different RX rings  */
293         bool rx_packets_compound;
294 
295         bool rx_vlan_stripping;
296         bool lro_supported;
297 
298         uint8_t rxq_num;
299 
300         /* Network MTU */
301         uint32_t mtu;
302 
303         /* Maximum number of fragments for indicated TX packets */
304         uint32_t max_tx_frags;
305 
306         /* Maximum number of fragments for indicated RX packets */
307         uint16_t max_rx_frags;
308 
309         /* Index for events interrupt */
310         uint8_t event_int_idx;
311 
312         /* Whether automatic interrupts masking enabled */
313         bool auto_int_masking;
314 
315         bool peer_has_vhdr;
316 
317         /* TX packets to QEMU interface */
318         struct NetTxPkt *tx_pkt;
319         uint32_t offload_mode;
320         uint32_t cso_or_gso_size;
321         uint16_t tci;
322         bool needs_vlan;
323 
324         struct NetRxPkt *rx_pkt;
325 
326         bool tx_sop;
327         bool skip_current_tx_pkt;
328 
329         uint32_t device_active;
330         uint32_t last_command;
331 
332         uint32_t link_status_and_speed;
333 
334         Vmxnet3IntState interrupt_states[VMXNET3_MAX_INTRS];
335 
336         uint32_t temp_mac;   /* To store the low part first */
337 
338         MACAddr perm_mac;
339         uint32_t vlan_table[VMXNET3_VFT_SIZE];
340         uint32_t rx_mode;
341         MACAddr *mcast_list;
342         uint32_t mcast_list_len;
343         uint32_t mcast_list_buff_size; /* needed for live migration. */
344 
345         /* Compatibility flags for migration */
346         uint32_t compat_flags;
347 } VMXNET3State;
348 
349 /* Interrupt management */
350 
351 /*
352  * This function returns sign whether interrupt line is in asserted state
353  * This depends on the type of interrupt used. For INTX interrupt line will
354  * be asserted until explicit deassertion, for MSI(X) interrupt line will
355  * be deasserted automatically due to notification semantics of the MSI(X)
356  * interrupts
357  */
358 static bool _vmxnet3_assert_interrupt_line(VMXNET3State *s, uint32_t int_idx)
359 {
360     PCIDevice *d = PCI_DEVICE(s);
361 
362     if (s->msix_used && msix_enabled(d)) {
363         VMW_IRPRN("Sending MSI-X notification for vector %u", int_idx);
364         msix_notify(d, int_idx);
365         return false;
366     }
367     if (msi_enabled(d)) {
368         VMW_IRPRN("Sending MSI notification for vector %u", int_idx);
369         msi_notify(d, int_idx);
370         return false;
371     }
372 
373     VMW_IRPRN("Asserting line for interrupt %u", int_idx);
374     pci_irq_assert(d);
375     return true;
376 }
377 
378 static void _vmxnet3_deassert_interrupt_line(VMXNET3State *s, int lidx)
379 {
380     PCIDevice *d = PCI_DEVICE(s);
381 
382     /*
383      * This function should never be called for MSI(X) interrupts
384      * because deassertion never required for message interrupts
385      */
386     assert(!s->msix_used || !msix_enabled(d));
387     /*
388      * This function should never be called for MSI(X) interrupts
389      * because deassertion never required for message interrupts
390      */
391     assert(!msi_enabled(d));
392 
393     VMW_IRPRN("Deasserting line for interrupt %u", lidx);
394     pci_irq_deassert(d);
395 }
396 
397 static void vmxnet3_update_interrupt_line_state(VMXNET3State *s, int lidx)
398 {
399     if (!s->interrupt_states[lidx].is_pending &&
400        s->interrupt_states[lidx].is_asserted) {
401         VMW_IRPRN("New interrupt line state for index %d is DOWN", lidx);
402         _vmxnet3_deassert_interrupt_line(s, lidx);
403         s->interrupt_states[lidx].is_asserted = false;
404         return;
405     }
406 
407     if (s->interrupt_states[lidx].is_pending &&
408        !s->interrupt_states[lidx].is_masked &&
409        !s->interrupt_states[lidx].is_asserted) {
410         VMW_IRPRN("New interrupt line state for index %d is UP", lidx);
411         s->interrupt_states[lidx].is_asserted =
412             _vmxnet3_assert_interrupt_line(s, lidx);
413         s->interrupt_states[lidx].is_pending = false;
414         return;
415     }
416 }
417 
418 static void vmxnet3_trigger_interrupt(VMXNET3State *s, int lidx)
419 {
420     PCIDevice *d = PCI_DEVICE(s);
421     s->interrupt_states[lidx].is_pending = true;
422     vmxnet3_update_interrupt_line_state(s, lidx);
423 
424     if (s->msix_used && msix_enabled(d) && s->auto_int_masking) {
425         goto do_automask;
426     }
427 
428     if (msi_enabled(d) && s->auto_int_masking) {
429         goto do_automask;
430     }
431 
432     return;
433 
434 do_automask:
435     s->interrupt_states[lidx].is_masked = true;
436     vmxnet3_update_interrupt_line_state(s, lidx);
437 }
438 
439 static bool vmxnet3_interrupt_asserted(VMXNET3State *s, int lidx)
440 {
441     return s->interrupt_states[lidx].is_asserted;
442 }
443 
444 static void vmxnet3_clear_interrupt(VMXNET3State *s, int int_idx)
445 {
446     s->interrupt_states[int_idx].is_pending = false;
447     if (s->auto_int_masking) {
448         s->interrupt_states[int_idx].is_masked = true;
449     }
450     vmxnet3_update_interrupt_line_state(s, int_idx);
451 }
452 
453 static void
454 vmxnet3_on_interrupt_mask_changed(VMXNET3State *s, int lidx, bool is_masked)
455 {
456     s->interrupt_states[lidx].is_masked = is_masked;
457     vmxnet3_update_interrupt_line_state(s, lidx);
458 }
459 
460 static bool vmxnet3_verify_driver_magic(PCIDevice *d, hwaddr dshmem)
461 {
462     return (VMXNET3_READ_DRV_SHARED32(d, dshmem, magic) == VMXNET3_REV1_MAGIC);
463 }
464 
465 #define VMXNET3_GET_BYTE(x, byte_num) (((x) >> (byte_num)*8) & 0xFF)
466 #define VMXNET3_MAKE_BYTE(byte_num, val) \
467     (((uint32_t)((val) & 0xFF)) << (byte_num)*8)
468 
469 static void vmxnet3_set_variable_mac(VMXNET3State *s, uint32_t h, uint32_t l)
470 {
471     s->conf.macaddr.a[0] = VMXNET3_GET_BYTE(l,  0);
472     s->conf.macaddr.a[1] = VMXNET3_GET_BYTE(l,  1);
473     s->conf.macaddr.a[2] = VMXNET3_GET_BYTE(l,  2);
474     s->conf.macaddr.a[3] = VMXNET3_GET_BYTE(l,  3);
475     s->conf.macaddr.a[4] = VMXNET3_GET_BYTE(h, 0);
476     s->conf.macaddr.a[5] = VMXNET3_GET_BYTE(h, 1);
477 
478     VMW_CFPRN("Variable MAC: " MAC_FMT, MAC_ARG(s->conf.macaddr.a));
479 
480     qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
481 }
482 
483 static uint64_t vmxnet3_get_mac_low(MACAddr *addr)
484 {
485     return VMXNET3_MAKE_BYTE(0, addr->a[0]) |
486            VMXNET3_MAKE_BYTE(1, addr->a[1]) |
487            VMXNET3_MAKE_BYTE(2, addr->a[2]) |
488            VMXNET3_MAKE_BYTE(3, addr->a[3]);
489 }
490 
491 static uint64_t vmxnet3_get_mac_high(MACAddr *addr)
492 {
493     return VMXNET3_MAKE_BYTE(0, addr->a[4]) |
494            VMXNET3_MAKE_BYTE(1, addr->a[5]);
495 }
496 
497 static void
498 vmxnet3_inc_tx_consumption_counter(VMXNET3State *s, int qidx)
499 {
500     vmxnet3_ring_inc(&s->txq_descr[qidx].tx_ring);
501 }
502 
503 static inline void
504 vmxnet3_inc_rx_consumption_counter(VMXNET3State *s, int qidx, int ridx)
505 {
506     vmxnet3_ring_inc(&s->rxq_descr[qidx].rx_ring[ridx]);
507 }
508 
509 static inline void
510 vmxnet3_inc_tx_completion_counter(VMXNET3State *s, int qidx)
511 {
512     vmxnet3_ring_inc(&s->txq_descr[qidx].comp_ring);
513 }
514 
515 static void
516 vmxnet3_inc_rx_completion_counter(VMXNET3State *s, int qidx)
517 {
518     vmxnet3_ring_inc(&s->rxq_descr[qidx].comp_ring);
519 }
520 
521 static void
522 vmxnet3_dec_rx_completion_counter(VMXNET3State *s, int qidx)
523 {
524     vmxnet3_ring_dec(&s->rxq_descr[qidx].comp_ring);
525 }
526 
527 static void vmxnet3_complete_packet(VMXNET3State *s, int qidx, uint32_t tx_ridx)
528 {
529     struct Vmxnet3_TxCompDesc txcq_descr;
530     PCIDevice *d = PCI_DEVICE(s);
531 
532     VMXNET3_RING_DUMP(VMW_RIPRN, "TXC", qidx, &s->txq_descr[qidx].comp_ring);
533 
534     txcq_descr.txdIdx = tx_ridx;
535     txcq_descr.gen = vmxnet3_ring_curr_gen(&s->txq_descr[qidx].comp_ring);
536 
537     vmxnet3_ring_write_curr_cell(d, &s->txq_descr[qidx].comp_ring, &txcq_descr);
538 
539     /* Flush changes in TX descriptor before changing the counter value */
540     smp_wmb();
541 
542     vmxnet3_inc_tx_completion_counter(s, qidx);
543     vmxnet3_trigger_interrupt(s, s->txq_descr[qidx].intr_idx);
544 }
545 
546 static bool
547 vmxnet3_setup_tx_offloads(VMXNET3State *s)
548 {
549     switch (s->offload_mode) {
550     case VMXNET3_OM_NONE:
551         net_tx_pkt_build_vheader(s->tx_pkt, false, false, 0);
552         break;
553 
554     case VMXNET3_OM_CSUM:
555         net_tx_pkt_build_vheader(s->tx_pkt, false, true, 0);
556         VMW_PKPRN("L4 CSO requested\n");
557         break;
558 
559     case VMXNET3_OM_TSO:
560         net_tx_pkt_build_vheader(s->tx_pkt, true, true,
561             s->cso_or_gso_size);
562         net_tx_pkt_update_ip_checksums(s->tx_pkt);
563         VMW_PKPRN("GSO offload requested.");
564         break;
565 
566     default:
567         g_assert_not_reached();
568         return false;
569     }
570 
571     return true;
572 }
573 
574 static void
575 vmxnet3_tx_retrieve_metadata(VMXNET3State *s,
576                              const struct Vmxnet3_TxDesc *txd)
577 {
578     s->offload_mode = txd->om;
579     s->cso_or_gso_size = txd->msscof;
580     s->tci = txd->tci;
581     s->needs_vlan = txd->ti;
582 }
583 
584 typedef enum {
585     VMXNET3_PKT_STATUS_OK,
586     VMXNET3_PKT_STATUS_ERROR,
587     VMXNET3_PKT_STATUS_DISCARD,/* only for tx */
588     VMXNET3_PKT_STATUS_OUT_OF_BUF /* only for rx */
589 } Vmxnet3PktStatus;
590 
591 static void
592 vmxnet3_on_tx_done_update_stats(VMXNET3State *s, int qidx,
593     Vmxnet3PktStatus status)
594 {
595     size_t tot_len = net_tx_pkt_get_total_len(s->tx_pkt);
596     struct UPT1_TxStats *stats = &s->txq_descr[qidx].txq_stats;
597 
598     switch (status) {
599     case VMXNET3_PKT_STATUS_OK:
600         switch (net_tx_pkt_get_packet_type(s->tx_pkt)) {
601         case ETH_PKT_BCAST:
602             stats->bcastPktsTxOK++;
603             stats->bcastBytesTxOK += tot_len;
604             break;
605         case ETH_PKT_MCAST:
606             stats->mcastPktsTxOK++;
607             stats->mcastBytesTxOK += tot_len;
608             break;
609         case ETH_PKT_UCAST:
610             stats->ucastPktsTxOK++;
611             stats->ucastBytesTxOK += tot_len;
612             break;
613         default:
614             g_assert_not_reached();
615         }
616 
617         if (s->offload_mode == VMXNET3_OM_TSO) {
618             /*
619              * According to VMWARE headers this statistic is a number
620              * of packets after segmentation but since we don't have
621              * this information in QEMU model, the best we can do is to
622              * provide number of non-segmented packets
623              */
624             stats->TSOPktsTxOK++;
625             stats->TSOBytesTxOK += tot_len;
626         }
627         break;
628 
629     case VMXNET3_PKT_STATUS_DISCARD:
630         stats->pktsTxDiscard++;
631         break;
632 
633     case VMXNET3_PKT_STATUS_ERROR:
634         stats->pktsTxError++;
635         break;
636 
637     default:
638         g_assert_not_reached();
639     }
640 }
641 
642 static void
643 vmxnet3_on_rx_done_update_stats(VMXNET3State *s,
644                                 int qidx,
645                                 Vmxnet3PktStatus status)
646 {
647     struct UPT1_RxStats *stats = &s->rxq_descr[qidx].rxq_stats;
648     size_t tot_len = net_rx_pkt_get_total_len(s->rx_pkt);
649 
650     switch (status) {
651     case VMXNET3_PKT_STATUS_OUT_OF_BUF:
652         stats->pktsRxOutOfBuf++;
653         break;
654 
655     case VMXNET3_PKT_STATUS_ERROR:
656         stats->pktsRxError++;
657         break;
658     case VMXNET3_PKT_STATUS_OK:
659         switch (net_rx_pkt_get_packet_type(s->rx_pkt)) {
660         case ETH_PKT_BCAST:
661             stats->bcastPktsRxOK++;
662             stats->bcastBytesRxOK += tot_len;
663             break;
664         case ETH_PKT_MCAST:
665             stats->mcastPktsRxOK++;
666             stats->mcastBytesRxOK += tot_len;
667             break;
668         case ETH_PKT_UCAST:
669             stats->ucastPktsRxOK++;
670             stats->ucastBytesRxOK += tot_len;
671             break;
672         default:
673             g_assert_not_reached();
674         }
675 
676         if (tot_len > s->mtu) {
677             stats->LROPktsRxOK++;
678             stats->LROBytesRxOK += tot_len;
679         }
680         break;
681     default:
682         g_assert_not_reached();
683     }
684 }
685 
686 static inline bool
687 vmxnet3_pop_next_tx_descr(VMXNET3State *s,
688                           int qidx,
689                           struct Vmxnet3_TxDesc *txd,
690                           uint32_t *descr_idx)
691 {
692     Vmxnet3Ring *ring = &s->txq_descr[qidx].tx_ring;
693     PCIDevice *d = PCI_DEVICE(s);
694 
695     vmxnet3_ring_read_curr_cell(d, ring, txd);
696     if (txd->gen == vmxnet3_ring_curr_gen(ring)) {
697         /* Only read after generation field verification */
698         smp_rmb();
699         /* Re-read to be sure we got the latest version */
700         vmxnet3_ring_read_curr_cell(d, ring, txd);
701         VMXNET3_RING_DUMP(VMW_RIPRN, "TX", qidx, ring);
702         *descr_idx = vmxnet3_ring_curr_cell_idx(ring);
703         vmxnet3_inc_tx_consumption_counter(s, qidx);
704         return true;
705     }
706 
707     return false;
708 }
709 
710 static bool
711 vmxnet3_send_packet(VMXNET3State *s, uint32_t qidx)
712 {
713     Vmxnet3PktStatus status = VMXNET3_PKT_STATUS_OK;
714 
715     if (!vmxnet3_setup_tx_offloads(s)) {
716         status = VMXNET3_PKT_STATUS_ERROR;
717         goto func_exit;
718     }
719 
720     /* debug prints */
721     vmxnet3_dump_virt_hdr(net_tx_pkt_get_vhdr(s->tx_pkt));
722     net_tx_pkt_dump(s->tx_pkt);
723 
724     if (!net_tx_pkt_send(s->tx_pkt, qemu_get_queue(s->nic))) {
725         status = VMXNET3_PKT_STATUS_DISCARD;
726         goto func_exit;
727     }
728 
729 func_exit:
730     vmxnet3_on_tx_done_update_stats(s, qidx, status);
731     return (status == VMXNET3_PKT_STATUS_OK);
732 }
733 
734 static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx)
735 {
736     struct Vmxnet3_TxDesc txd;
737     uint32_t txd_idx;
738     uint32_t data_len;
739     hwaddr data_pa;
740 
741     for (;;) {
742         if (!vmxnet3_pop_next_tx_descr(s, qidx, &txd, &txd_idx)) {
743             break;
744         }
745 
746         vmxnet3_dump_tx_descr(&txd);
747 
748         if (!s->skip_current_tx_pkt) {
749             data_len = (txd.len > 0) ? txd.len : VMXNET3_MAX_TX_BUF_SIZE;
750             data_pa = le64_to_cpu(txd.addr);
751 
752             if (!net_tx_pkt_add_raw_fragment(s->tx_pkt,
753                                                 data_pa,
754                                                 data_len)) {
755                 s->skip_current_tx_pkt = true;
756             }
757         }
758 
759         if (s->tx_sop) {
760             vmxnet3_tx_retrieve_metadata(s, &txd);
761             s->tx_sop = false;
762         }
763 
764         if (txd.eop) {
765             if (!s->skip_current_tx_pkt && net_tx_pkt_parse(s->tx_pkt)) {
766                 if (s->needs_vlan) {
767                     net_tx_pkt_setup_vlan_header(s->tx_pkt, s->tci);
768                 }
769 
770                 vmxnet3_send_packet(s, qidx);
771             } else {
772                 vmxnet3_on_tx_done_update_stats(s, qidx,
773                                                 VMXNET3_PKT_STATUS_ERROR);
774             }
775 
776             vmxnet3_complete_packet(s, qidx, txd_idx);
777             s->tx_sop = true;
778             s->skip_current_tx_pkt = false;
779             net_tx_pkt_reset(s->tx_pkt);
780         }
781     }
782 }
783 
784 static inline void
785 vmxnet3_read_next_rx_descr(VMXNET3State *s, int qidx, int ridx,
786                            struct Vmxnet3_RxDesc *dbuf, uint32_t *didx)
787 {
788     PCIDevice *d = PCI_DEVICE(s);
789 
790     Vmxnet3Ring *ring = &s->rxq_descr[qidx].rx_ring[ridx];
791     *didx = vmxnet3_ring_curr_cell_idx(ring);
792     vmxnet3_ring_read_curr_cell(d, ring, dbuf);
793 }
794 
795 static inline uint8_t
796 vmxnet3_get_rx_ring_gen(VMXNET3State *s, int qidx, int ridx)
797 {
798     return s->rxq_descr[qidx].rx_ring[ridx].gen;
799 }
800 
801 static inline hwaddr
802 vmxnet3_pop_rxc_descr(VMXNET3State *s, int qidx, uint32_t *descr_gen)
803 {
804     uint8_t ring_gen;
805     struct Vmxnet3_RxCompDesc rxcd;
806 
807     hwaddr daddr =
808         vmxnet3_ring_curr_cell_pa(&s->rxq_descr[qidx].comp_ring);
809 
810     pci_dma_read(PCI_DEVICE(s),
811                  daddr, &rxcd, sizeof(struct Vmxnet3_RxCompDesc));
812     ring_gen = vmxnet3_ring_curr_gen(&s->rxq_descr[qidx].comp_ring);
813 
814     if (rxcd.gen != ring_gen) {
815         *descr_gen = ring_gen;
816         vmxnet3_inc_rx_completion_counter(s, qidx);
817         return daddr;
818     }
819 
820     return 0;
821 }
822 
823 static inline void
824 vmxnet3_revert_rxc_descr(VMXNET3State *s, int qidx)
825 {
826     vmxnet3_dec_rx_completion_counter(s, qidx);
827 }
828 
829 #define RXQ_IDX      (0)
830 #define RX_HEAD_BODY_RING (0)
831 #define RX_BODY_ONLY_RING (1)
832 
833 static bool
834 vmxnet3_get_next_head_rx_descr(VMXNET3State *s,
835                                struct Vmxnet3_RxDesc *descr_buf,
836                                uint32_t *descr_idx,
837                                uint32_t *ridx)
838 {
839     for (;;) {
840         uint32_t ring_gen;
841         vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING,
842                                    descr_buf, descr_idx);
843 
844         /* If no more free descriptors - return */
845         ring_gen = vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_HEAD_BODY_RING);
846         if (descr_buf->gen != ring_gen) {
847             return false;
848         }
849 
850         /* Only read after generation field verification */
851         smp_rmb();
852         /* Re-read to be sure we got the latest version */
853         vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING,
854                                    descr_buf, descr_idx);
855 
856         /* Mark current descriptor as used/skipped */
857         vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_HEAD_BODY_RING);
858 
859         /* If this is what we are looking for - return */
860         if (descr_buf->btype == VMXNET3_RXD_BTYPE_HEAD) {
861             *ridx = RX_HEAD_BODY_RING;
862             return true;
863         }
864     }
865 }
866 
867 static bool
868 vmxnet3_get_next_body_rx_descr(VMXNET3State *s,
869                                struct Vmxnet3_RxDesc *d,
870                                uint32_t *didx,
871                                uint32_t *ridx)
872 {
873     vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, d, didx);
874 
875     /* Try to find corresponding descriptor in head/body ring */
876     if (d->gen == vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_HEAD_BODY_RING)) {
877         /* Only read after generation field verification */
878         smp_rmb();
879         /* Re-read to be sure we got the latest version */
880         vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, d, didx);
881         if (d->btype == VMXNET3_RXD_BTYPE_BODY) {
882             vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_HEAD_BODY_RING);
883             *ridx = RX_HEAD_BODY_RING;
884             return true;
885         }
886     }
887 
888     /*
889      * If there is no free descriptors on head/body ring or next free
890      * descriptor is a head descriptor switch to body only ring
891      */
892     vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_BODY_ONLY_RING, d, didx);
893 
894     /* If no more free descriptors - return */
895     if (d->gen == vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_BODY_ONLY_RING)) {
896         /* Only read after generation field verification */
897         smp_rmb();
898         /* Re-read to be sure we got the latest version */
899         vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_BODY_ONLY_RING, d, didx);
900         assert(d->btype == VMXNET3_RXD_BTYPE_BODY);
901         *ridx = RX_BODY_ONLY_RING;
902         vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_BODY_ONLY_RING);
903         return true;
904     }
905 
906     return false;
907 }
908 
909 static inline bool
910 vmxnet3_get_next_rx_descr(VMXNET3State *s, bool is_head,
911                           struct Vmxnet3_RxDesc *descr_buf,
912                           uint32_t *descr_idx,
913                           uint32_t *ridx)
914 {
915     if (is_head || !s->rx_packets_compound) {
916         return vmxnet3_get_next_head_rx_descr(s, descr_buf, descr_idx, ridx);
917     } else {
918         return vmxnet3_get_next_body_rx_descr(s, descr_buf, descr_idx, ridx);
919     }
920 }
921 
922 /* In case packet was csum offloaded (either NEEDS_CSUM or DATA_VALID),
923  * the implementation always passes an RxCompDesc with a "Checksum
924  * calculated and found correct" to the OS (cnc=0 and tuc=1, see
925  * vmxnet3_rx_update_descr). This emulates the observed ESXi behavior.
926  *
927  * Therefore, if packet has the NEEDS_CSUM set, we must calculate
928  * and place a fully computed checksum into the tcp/udp header.
929  * Otherwise, the OS driver will receive a checksum-correct indication
930  * (CHECKSUM_UNNECESSARY), but with the actual tcp/udp checksum field
931  * having just the pseudo header csum value.
932  *
933  * While this is not a problem if packet is destined for local delivery,
934  * in the case the host OS performs forwarding, it will forward an
935  * incorrectly checksummed packet.
936  */
937 static void vmxnet3_rx_need_csum_calculate(struct NetRxPkt *pkt,
938                                            const void *pkt_data,
939                                            size_t pkt_len)
940 {
941     struct virtio_net_hdr *vhdr;
942     bool isip4, isip6, istcp, isudp;
943     uint8_t *data;
944     int len;
945 
946     if (!net_rx_pkt_has_virt_hdr(pkt)) {
947         return;
948     }
949 
950     vhdr = net_rx_pkt_get_vhdr(pkt);
951     if (!VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
952         return;
953     }
954 
955     net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
956     if (!(isip4 || isip6) || !(istcp || isudp)) {
957         return;
958     }
959 
960     vmxnet3_dump_virt_hdr(vhdr);
961 
962     /* Validate packet len: csum_start + scum_offset + length of csum field */
963     if (pkt_len < (vhdr->csum_start + vhdr->csum_offset + 2)) {
964         VMW_PKPRN("packet len:%zu < csum_start(%d) + csum_offset(%d) + 2, "
965                   "cannot calculate checksum",
966                   pkt_len, vhdr->csum_start, vhdr->csum_offset);
967         return;
968     }
969 
970     data = (uint8_t *)pkt_data + vhdr->csum_start;
971     len = pkt_len - vhdr->csum_start;
972     /* Put the checksum obtained into the packet */
973     stw_be_p(data + vhdr->csum_offset, net_raw_checksum(data, len));
974 
975     vhdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
976     vhdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
977 }
978 
979 static void vmxnet3_rx_update_descr(struct NetRxPkt *pkt,
980     struct Vmxnet3_RxCompDesc *rxcd)
981 {
982     int csum_ok, is_gso;
983     bool isip4, isip6, istcp, isudp;
984     struct virtio_net_hdr *vhdr;
985     uint8_t offload_type;
986 
987     if (net_rx_pkt_is_vlan_stripped(pkt)) {
988         rxcd->ts = 1;
989         rxcd->tci = net_rx_pkt_get_vlan_tag(pkt);
990     }
991 
992     if (!net_rx_pkt_has_virt_hdr(pkt)) {
993         goto nocsum;
994     }
995 
996     vhdr = net_rx_pkt_get_vhdr(pkt);
997     /*
998      * Checksum is valid when lower level tell so or when lower level
999      * requires checksum offload telling that packet produced/bridged
1000      * locally and did travel over network after last checksum calculation
1001      * or production
1002      */
1003     csum_ok = VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_DATA_VALID) ||
1004               VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM);
1005 
1006     offload_type = vhdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
1007     is_gso = (offload_type != VIRTIO_NET_HDR_GSO_NONE) ? 1 : 0;
1008 
1009     if (!csum_ok && !is_gso) {
1010         goto nocsum;
1011     }
1012 
1013     net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1014     if ((!istcp && !isudp) || (!isip4 && !isip6)) {
1015         goto nocsum;
1016     }
1017 
1018     rxcd->cnc = 0;
1019     rxcd->v4 = isip4 ? 1 : 0;
1020     rxcd->v6 = isip6 ? 1 : 0;
1021     rxcd->tcp = istcp ? 1 : 0;
1022     rxcd->udp = isudp ? 1 : 0;
1023     rxcd->fcs = rxcd->tuc = rxcd->ipc = 1;
1024     return;
1025 
1026 nocsum:
1027     rxcd->cnc = 1;
1028     return;
1029 }
1030 
1031 static void
1032 vmxnet3_pci_dma_writev(PCIDevice *pci_dev,
1033                        const struct iovec *iov,
1034                        size_t start_iov_off,
1035                        hwaddr target_addr,
1036                        size_t bytes_to_copy)
1037 {
1038     size_t curr_off = 0;
1039     size_t copied = 0;
1040 
1041     while (bytes_to_copy) {
1042         if (start_iov_off < (curr_off + iov->iov_len)) {
1043             size_t chunk_len =
1044                 MIN((curr_off + iov->iov_len) - start_iov_off, bytes_to_copy);
1045 
1046             pci_dma_write(pci_dev, target_addr + copied,
1047                           iov->iov_base + start_iov_off - curr_off,
1048                           chunk_len);
1049 
1050             copied += chunk_len;
1051             start_iov_off += chunk_len;
1052             curr_off = start_iov_off;
1053             bytes_to_copy -= chunk_len;
1054         } else {
1055             curr_off += iov->iov_len;
1056         }
1057         iov++;
1058     }
1059 }
1060 
1061 static bool
1062 vmxnet3_indicate_packet(VMXNET3State *s)
1063 {
1064     struct Vmxnet3_RxDesc rxd;
1065     PCIDevice *d = PCI_DEVICE(s);
1066     bool is_head = true;
1067     uint32_t rxd_idx;
1068     uint32_t rx_ridx = 0;
1069 
1070     struct Vmxnet3_RxCompDesc rxcd;
1071     uint32_t new_rxcd_gen = VMXNET3_INIT_GEN;
1072     hwaddr new_rxcd_pa = 0;
1073     hwaddr ready_rxcd_pa = 0;
1074     struct iovec *data = net_rx_pkt_get_iovec(s->rx_pkt);
1075     size_t bytes_copied = 0;
1076     size_t bytes_left = net_rx_pkt_get_total_len(s->rx_pkt);
1077     uint16_t num_frags = 0;
1078     size_t chunk_size;
1079 
1080     net_rx_pkt_dump(s->rx_pkt);
1081 
1082     while (bytes_left > 0) {
1083 
1084         /* cannot add more frags to packet */
1085         if (num_frags == s->max_rx_frags) {
1086             break;
1087         }
1088 
1089         new_rxcd_pa = vmxnet3_pop_rxc_descr(s, RXQ_IDX, &new_rxcd_gen);
1090         if (!new_rxcd_pa) {
1091             break;
1092         }
1093 
1094         if (!vmxnet3_get_next_rx_descr(s, is_head, &rxd, &rxd_idx, &rx_ridx)) {
1095             break;
1096         }
1097 
1098         chunk_size = MIN(bytes_left, rxd.len);
1099         vmxnet3_pci_dma_writev(d, data, bytes_copied,
1100                                le64_to_cpu(rxd.addr), chunk_size);
1101         bytes_copied += chunk_size;
1102         bytes_left -= chunk_size;
1103 
1104         vmxnet3_dump_rx_descr(&rxd);
1105 
1106         if (ready_rxcd_pa != 0) {
1107             pci_dma_write(d, ready_rxcd_pa, &rxcd, sizeof(rxcd));
1108         }
1109 
1110         memset(&rxcd, 0, sizeof(struct Vmxnet3_RxCompDesc));
1111         rxcd.rxdIdx = rxd_idx;
1112         rxcd.len = chunk_size;
1113         rxcd.sop = is_head;
1114         rxcd.gen = new_rxcd_gen;
1115         rxcd.rqID = RXQ_IDX + rx_ridx * s->rxq_num;
1116 
1117         if (bytes_left == 0) {
1118             vmxnet3_rx_update_descr(s->rx_pkt, &rxcd);
1119         }
1120 
1121         VMW_RIPRN("RX Completion descriptor: rxRing: %lu rxIdx %lu len %lu "
1122                   "sop %d csum_correct %lu",
1123                   (unsigned long) rx_ridx,
1124                   (unsigned long) rxcd.rxdIdx,
1125                   (unsigned long) rxcd.len,
1126                   (int) rxcd.sop,
1127                   (unsigned long) rxcd.tuc);
1128 
1129         is_head = false;
1130         ready_rxcd_pa = new_rxcd_pa;
1131         new_rxcd_pa = 0;
1132         num_frags++;
1133     }
1134 
1135     if (ready_rxcd_pa != 0) {
1136         rxcd.eop = 1;
1137         rxcd.err = (bytes_left != 0);
1138 
1139         pci_dma_write(d, ready_rxcd_pa, &rxcd, sizeof(rxcd));
1140 
1141         /* Flush RX descriptor changes */
1142         smp_wmb();
1143     }
1144 
1145     if (new_rxcd_pa != 0) {
1146         vmxnet3_revert_rxc_descr(s, RXQ_IDX);
1147     }
1148 
1149     vmxnet3_trigger_interrupt(s, s->rxq_descr[RXQ_IDX].intr_idx);
1150 
1151     if (bytes_left == 0) {
1152         vmxnet3_on_rx_done_update_stats(s, RXQ_IDX, VMXNET3_PKT_STATUS_OK);
1153         return true;
1154     } else if (num_frags == s->max_rx_frags) {
1155         vmxnet3_on_rx_done_update_stats(s, RXQ_IDX, VMXNET3_PKT_STATUS_ERROR);
1156         return false;
1157     } else {
1158         vmxnet3_on_rx_done_update_stats(s, RXQ_IDX,
1159                                         VMXNET3_PKT_STATUS_OUT_OF_BUF);
1160         return false;
1161     }
1162 }
1163 
1164 static void
1165 vmxnet3_io_bar0_write(void *opaque, hwaddr addr,
1166                       uint64_t val, unsigned size)
1167 {
1168     VMXNET3State *s = opaque;
1169 
1170     if (!s->device_active) {
1171         return;
1172     }
1173 
1174     if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_TXPROD,
1175                         VMXNET3_DEVICE_MAX_TX_QUEUES, VMXNET3_REG_ALIGN)) {
1176         int tx_queue_idx =
1177             VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_TXPROD,
1178                                      VMXNET3_REG_ALIGN);
1179         assert(tx_queue_idx <= s->txq_num);
1180         vmxnet3_process_tx_queue(s, tx_queue_idx);
1181         return;
1182     }
1183 
1184     if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_IMR,
1185                         VMXNET3_MAX_INTRS, VMXNET3_REG_ALIGN)) {
1186         int l = VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_IMR,
1187                                          VMXNET3_REG_ALIGN);
1188 
1189         VMW_CBPRN("Interrupt mask for line %d written: 0x%" PRIx64, l, val);
1190 
1191         vmxnet3_on_interrupt_mask_changed(s, l, val);
1192         return;
1193     }
1194 
1195     if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_RXPROD,
1196                         VMXNET3_DEVICE_MAX_RX_QUEUES, VMXNET3_REG_ALIGN) ||
1197        VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_RXPROD2,
1198                         VMXNET3_DEVICE_MAX_RX_QUEUES, VMXNET3_REG_ALIGN)) {
1199         return;
1200     }
1201 
1202     VMW_WRPRN("BAR0 unknown write [%" PRIx64 "] = %" PRIx64 ", size %d",
1203               (uint64_t) addr, val, size);
1204 }
1205 
1206 static uint64_t
1207 vmxnet3_io_bar0_read(void *opaque, hwaddr addr, unsigned size)
1208 {
1209     VMXNET3State *s = opaque;
1210 
1211     if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_IMR,
1212                         VMXNET3_MAX_INTRS, VMXNET3_REG_ALIGN)) {
1213         int l = VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_IMR,
1214                                          VMXNET3_REG_ALIGN);
1215         return s->interrupt_states[l].is_masked;
1216     }
1217 
1218     VMW_CBPRN("BAR0 unknown read [%" PRIx64 "], size %d", addr, size);
1219     return 0;
1220 }
1221 
1222 static void vmxnet3_reset_interrupt_states(VMXNET3State *s)
1223 {
1224     int i;
1225     for (i = 0; i < ARRAY_SIZE(s->interrupt_states); i++) {
1226         s->interrupt_states[i].is_asserted = false;
1227         s->interrupt_states[i].is_pending = false;
1228         s->interrupt_states[i].is_masked = true;
1229     }
1230 }
1231 
1232 static void vmxnet3_reset_mac(VMXNET3State *s)
1233 {
1234     memcpy(&s->conf.macaddr.a, &s->perm_mac.a, sizeof(s->perm_mac.a));
1235     VMW_CFPRN("MAC address set to: " MAC_FMT, MAC_ARG(s->conf.macaddr.a));
1236 }
1237 
1238 static void vmxnet3_deactivate_device(VMXNET3State *s)
1239 {
1240     if (s->device_active) {
1241         VMW_CBPRN("Deactivating vmxnet3...");
1242         net_tx_pkt_reset(s->tx_pkt);
1243         net_tx_pkt_uninit(s->tx_pkt);
1244         net_rx_pkt_uninit(s->rx_pkt);
1245         s->device_active = false;
1246     }
1247 }
1248 
1249 static void vmxnet3_reset(VMXNET3State *s)
1250 {
1251     VMW_CBPRN("Resetting vmxnet3...");
1252 
1253     vmxnet3_deactivate_device(s);
1254     vmxnet3_reset_interrupt_states(s);
1255     s->drv_shmem = 0;
1256     s->tx_sop = true;
1257     s->skip_current_tx_pkt = false;
1258 }
1259 
1260 static void vmxnet3_update_rx_mode(VMXNET3State *s)
1261 {
1262     PCIDevice *d = PCI_DEVICE(s);
1263 
1264     s->rx_mode = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem,
1265                                            devRead.rxFilterConf.rxMode);
1266     VMW_CFPRN("RX mode: 0x%08X", s->rx_mode);
1267 }
1268 
1269 static void vmxnet3_update_vlan_filters(VMXNET3State *s)
1270 {
1271     int i;
1272     PCIDevice *d = PCI_DEVICE(s);
1273 
1274     /* Copy configuration from shared memory */
1275     VMXNET3_READ_DRV_SHARED(d, s->drv_shmem,
1276                             devRead.rxFilterConf.vfTable,
1277                             s->vlan_table,
1278                             sizeof(s->vlan_table));
1279 
1280     /* Invert byte order when needed */
1281     for (i = 0; i < ARRAY_SIZE(s->vlan_table); i++) {
1282         s->vlan_table[i] = le32_to_cpu(s->vlan_table[i]);
1283     }
1284 
1285     /* Dump configuration for debugging purposes */
1286     VMW_CFPRN("Configured VLANs:");
1287     for (i = 0; i < sizeof(s->vlan_table) * 8; i++) {
1288         if (VMXNET3_VFTABLE_ENTRY_IS_SET(s->vlan_table, i)) {
1289             VMW_CFPRN("\tVLAN %d is present", i);
1290         }
1291     }
1292 }
1293 
1294 static void vmxnet3_update_mcast_filters(VMXNET3State *s)
1295 {
1296     PCIDevice *d = PCI_DEVICE(s);
1297 
1298     uint16_t list_bytes =
1299         VMXNET3_READ_DRV_SHARED16(d, s->drv_shmem,
1300                                   devRead.rxFilterConf.mfTableLen);
1301 
1302     s->mcast_list_len = list_bytes / sizeof(s->mcast_list[0]);
1303 
1304     s->mcast_list = g_realloc(s->mcast_list, list_bytes);
1305     if (!s->mcast_list) {
1306         if (s->mcast_list_len == 0) {
1307             VMW_CFPRN("Current multicast list is empty");
1308         } else {
1309             VMW_ERPRN("Failed to allocate multicast list of %d elements",
1310                       s->mcast_list_len);
1311         }
1312         s->mcast_list_len = 0;
1313     } else {
1314         int i;
1315         hwaddr mcast_list_pa =
1316             VMXNET3_READ_DRV_SHARED64(d, s->drv_shmem,
1317                                       devRead.rxFilterConf.mfTablePA);
1318 
1319         pci_dma_read(d, mcast_list_pa, s->mcast_list, list_bytes);
1320 
1321         VMW_CFPRN("Current multicast list len is %d:", s->mcast_list_len);
1322         for (i = 0; i < s->mcast_list_len; i++) {
1323             VMW_CFPRN("\t" MAC_FMT, MAC_ARG(s->mcast_list[i].a));
1324         }
1325     }
1326 }
1327 
1328 static void vmxnet3_setup_rx_filtering(VMXNET3State *s)
1329 {
1330     vmxnet3_update_rx_mode(s);
1331     vmxnet3_update_vlan_filters(s);
1332     vmxnet3_update_mcast_filters(s);
1333 }
1334 
1335 static uint32_t vmxnet3_get_interrupt_config(VMXNET3State *s)
1336 {
1337     uint32_t interrupt_mode = VMXNET3_IT_AUTO | (VMXNET3_IMM_AUTO << 2);
1338     VMW_CFPRN("Interrupt config is 0x%X", interrupt_mode);
1339     return interrupt_mode;
1340 }
1341 
1342 static void vmxnet3_fill_stats(VMXNET3State *s)
1343 {
1344     int i;
1345     PCIDevice *d = PCI_DEVICE(s);
1346 
1347     if (!s->device_active)
1348         return;
1349 
1350     for (i = 0; i < s->txq_num; i++) {
1351         pci_dma_write(d,
1352                       s->txq_descr[i].tx_stats_pa,
1353                       &s->txq_descr[i].txq_stats,
1354                       sizeof(s->txq_descr[i].txq_stats));
1355     }
1356 
1357     for (i = 0; i < s->rxq_num; i++) {
1358         pci_dma_write(d,
1359                       s->rxq_descr[i].rx_stats_pa,
1360                       &s->rxq_descr[i].rxq_stats,
1361                       sizeof(s->rxq_descr[i].rxq_stats));
1362     }
1363 }
1364 
1365 static void vmxnet3_adjust_by_guest_type(VMXNET3State *s)
1366 {
1367     struct Vmxnet3_GOSInfo gos;
1368     PCIDevice *d = PCI_DEVICE(s);
1369 
1370     VMXNET3_READ_DRV_SHARED(d, s->drv_shmem, devRead.misc.driverInfo.gos,
1371                             &gos, sizeof(gos));
1372     s->rx_packets_compound =
1373         (gos.gosType == VMXNET3_GOS_TYPE_WIN) ? false : true;
1374 
1375     VMW_CFPRN("Guest type specifics: RXCOMPOUND: %d", s->rx_packets_compound);
1376 }
1377 
1378 static void
1379 vmxnet3_dump_conf_descr(const char *name,
1380                         struct Vmxnet3_VariableLenConfDesc *pm_descr)
1381 {
1382     VMW_CFPRN("%s descriptor dump: Version %u, Length %u",
1383               name, pm_descr->confVer, pm_descr->confLen);
1384 
1385 };
1386 
1387 static void vmxnet3_update_pm_state(VMXNET3State *s)
1388 {
1389     struct Vmxnet3_VariableLenConfDesc pm_descr;
1390     PCIDevice *d = PCI_DEVICE(s);
1391 
1392     pm_descr.confLen =
1393         VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.pmConfDesc.confLen);
1394     pm_descr.confVer =
1395         VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.pmConfDesc.confVer);
1396     pm_descr.confPA =
1397         VMXNET3_READ_DRV_SHARED64(d, s->drv_shmem, devRead.pmConfDesc.confPA);
1398 
1399     vmxnet3_dump_conf_descr("PM State", &pm_descr);
1400 }
1401 
1402 static void vmxnet3_update_features(VMXNET3State *s)
1403 {
1404     uint32_t guest_features;
1405     int rxcso_supported;
1406     PCIDevice *d = PCI_DEVICE(s);
1407 
1408     guest_features = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem,
1409                                                devRead.misc.uptFeatures);
1410 
1411     rxcso_supported = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_RXCSUM);
1412     s->rx_vlan_stripping = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_RXVLAN);
1413     s->lro_supported = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_LRO);
1414 
1415     VMW_CFPRN("Features configuration: LRO: %d, RXCSUM: %d, VLANSTRIP: %d",
1416               s->lro_supported, rxcso_supported,
1417               s->rx_vlan_stripping);
1418     if (s->peer_has_vhdr) {
1419         qemu_set_offload(qemu_get_queue(s->nic)->peer,
1420                          rxcso_supported,
1421                          s->lro_supported,
1422                          s->lro_supported,
1423                          0,
1424                          0);
1425     }
1426 }
1427 
1428 static bool vmxnet3_verify_intx(VMXNET3State *s, int intx)
1429 {
1430     return s->msix_used || msi_enabled(PCI_DEVICE(s))
1431         || intx == pci_get_byte(s->parent_obj.config + PCI_INTERRUPT_PIN) - 1;
1432 }
1433 
1434 static void vmxnet3_validate_interrupt_idx(bool is_msix, int idx)
1435 {
1436     int max_ints = is_msix ? VMXNET3_MAX_INTRS : VMXNET3_MAX_NMSIX_INTRS;
1437     if (idx >= max_ints) {
1438         hw_error("Bad interrupt index: %d\n", idx);
1439     }
1440 }
1441 
1442 static void vmxnet3_validate_interrupts(VMXNET3State *s)
1443 {
1444     int i;
1445 
1446     VMW_CFPRN("Verifying event interrupt index (%d)", s->event_int_idx);
1447     vmxnet3_validate_interrupt_idx(s->msix_used, s->event_int_idx);
1448 
1449     for (i = 0; i < s->txq_num; i++) {
1450         int idx = s->txq_descr[i].intr_idx;
1451         VMW_CFPRN("Verifying TX queue %d interrupt index (%d)", i, idx);
1452         vmxnet3_validate_interrupt_idx(s->msix_used, idx);
1453     }
1454 
1455     for (i = 0; i < s->rxq_num; i++) {
1456         int idx = s->rxq_descr[i].intr_idx;
1457         VMW_CFPRN("Verifying RX queue %d interrupt index (%d)", i, idx);
1458         vmxnet3_validate_interrupt_idx(s->msix_used, idx);
1459     }
1460 }
1461 
1462 static void vmxnet3_validate_queues(VMXNET3State *s)
1463 {
1464     /*
1465     * txq_num and rxq_num are total number of queues
1466     * configured by guest. These numbers must not
1467     * exceed corresponding maximal values.
1468     */
1469 
1470     if (s->txq_num > VMXNET3_DEVICE_MAX_TX_QUEUES) {
1471         hw_error("Bad TX queues number: %d\n", s->txq_num);
1472     }
1473 
1474     if (s->rxq_num > VMXNET3_DEVICE_MAX_RX_QUEUES) {
1475         hw_error("Bad RX queues number: %d\n", s->rxq_num);
1476     }
1477 }
1478 
1479 static void vmxnet3_activate_device(VMXNET3State *s)
1480 {
1481     int i;
1482     static const uint32_t VMXNET3_DEF_TX_THRESHOLD = 1;
1483     PCIDevice *d = PCI_DEVICE(s);
1484     hwaddr qdescr_table_pa;
1485     uint64_t pa;
1486     uint32_t size;
1487 
1488     /* Verify configuration consistency */
1489     if (!vmxnet3_verify_driver_magic(d, s->drv_shmem)) {
1490         VMW_ERPRN("Device configuration received from driver is invalid");
1491         return;
1492     }
1493 
1494     /* Verify if device is active */
1495     if (s->device_active) {
1496         VMW_CFPRN("Vmxnet3 device is active");
1497         return;
1498     }
1499 
1500     vmxnet3_adjust_by_guest_type(s);
1501     vmxnet3_update_features(s);
1502     vmxnet3_update_pm_state(s);
1503     vmxnet3_setup_rx_filtering(s);
1504     /* Cache fields from shared memory */
1505     s->mtu = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.misc.mtu);
1506     VMW_CFPRN("MTU is %u", s->mtu);
1507 
1508     s->max_rx_frags =
1509         VMXNET3_READ_DRV_SHARED16(d, s->drv_shmem, devRead.misc.maxNumRxSG);
1510 
1511     if (s->max_rx_frags == 0) {
1512         s->max_rx_frags = 1;
1513     }
1514 
1515     VMW_CFPRN("Max RX fragments is %u", s->max_rx_frags);
1516 
1517     s->event_int_idx =
1518         VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.intrConf.eventIntrIdx);
1519     assert(vmxnet3_verify_intx(s, s->event_int_idx));
1520     VMW_CFPRN("Events interrupt line is %u", s->event_int_idx);
1521 
1522     s->auto_int_masking =
1523         VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.intrConf.autoMask);
1524     VMW_CFPRN("Automatic interrupt masking is %d", (int)s->auto_int_masking);
1525 
1526     s->txq_num =
1527         VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.misc.numTxQueues);
1528     s->rxq_num =
1529         VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.misc.numRxQueues);
1530 
1531     VMW_CFPRN("Number of TX/RX queues %u/%u", s->txq_num, s->rxq_num);
1532     vmxnet3_validate_queues(s);
1533 
1534     qdescr_table_pa =
1535         VMXNET3_READ_DRV_SHARED64(d, s->drv_shmem, devRead.misc.queueDescPA);
1536     VMW_CFPRN("TX queues descriptors table is at 0x%" PRIx64, qdescr_table_pa);
1537 
1538     /*
1539      * Worst-case scenario is a packet that holds all TX rings space so
1540      * we calculate total size of all TX rings for max TX fragments number
1541      */
1542     s->max_tx_frags = 0;
1543 
1544     /* TX queues */
1545     for (i = 0; i < s->txq_num; i++) {
1546         hwaddr qdescr_pa =
1547             qdescr_table_pa + i * sizeof(struct Vmxnet3_TxQueueDesc);
1548 
1549         /* Read interrupt number for this TX queue */
1550         s->txq_descr[i].intr_idx =
1551             VMXNET3_READ_TX_QUEUE_DESCR8(d, qdescr_pa, conf.intrIdx);
1552         assert(vmxnet3_verify_intx(s, s->txq_descr[i].intr_idx));
1553 
1554         VMW_CFPRN("TX Queue %d interrupt: %d", i, s->txq_descr[i].intr_idx);
1555 
1556         /* Read rings memory locations for TX queues */
1557         pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.txRingBasePA);
1558         size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.txRingSize);
1559 
1560         vmxnet3_ring_init(d, &s->txq_descr[i].tx_ring, pa, size,
1561                           sizeof(struct Vmxnet3_TxDesc), false);
1562         VMXNET3_RING_DUMP(VMW_CFPRN, "TX", i, &s->txq_descr[i].tx_ring);
1563 
1564         s->max_tx_frags += size;
1565 
1566         /* TXC ring */
1567         pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.compRingBasePA);
1568         size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.compRingSize);
1569         vmxnet3_ring_init(d, &s->txq_descr[i].comp_ring, pa, size,
1570                           sizeof(struct Vmxnet3_TxCompDesc), true);
1571         VMXNET3_RING_DUMP(VMW_CFPRN, "TXC", i, &s->txq_descr[i].comp_ring);
1572 
1573         s->txq_descr[i].tx_stats_pa =
1574             qdescr_pa + offsetof(struct Vmxnet3_TxQueueDesc, stats);
1575 
1576         memset(&s->txq_descr[i].txq_stats, 0,
1577                sizeof(s->txq_descr[i].txq_stats));
1578 
1579         /* Fill device-managed parameters for queues */
1580         VMXNET3_WRITE_TX_QUEUE_DESCR32(d, qdescr_pa,
1581                                        ctrl.txThreshold,
1582                                        VMXNET3_DEF_TX_THRESHOLD);
1583     }
1584 
1585     /* Preallocate TX packet wrapper */
1586     VMW_CFPRN("Max TX fragments is %u", s->max_tx_frags);
1587     net_tx_pkt_init(&s->tx_pkt, PCI_DEVICE(s),
1588                     s->max_tx_frags, s->peer_has_vhdr);
1589     net_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr);
1590 
1591     /* Read rings memory locations for RX queues */
1592     for (i = 0; i < s->rxq_num; i++) {
1593         int j;
1594         hwaddr qd_pa =
1595             qdescr_table_pa + s->txq_num * sizeof(struct Vmxnet3_TxQueueDesc) +
1596             i * sizeof(struct Vmxnet3_RxQueueDesc);
1597 
1598         /* Read interrupt number for this RX queue */
1599         s->rxq_descr[i].intr_idx =
1600             VMXNET3_READ_TX_QUEUE_DESCR8(d, qd_pa, conf.intrIdx);
1601         assert(vmxnet3_verify_intx(s, s->rxq_descr[i].intr_idx));
1602 
1603         VMW_CFPRN("RX Queue %d interrupt: %d", i, s->rxq_descr[i].intr_idx);
1604 
1605         /* Read rings memory locations */
1606         for (j = 0; j < VMXNET3_RX_RINGS_PER_QUEUE; j++) {
1607             /* RX rings */
1608             pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.rxRingBasePA[j]);
1609             size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.rxRingSize[j]);
1610             vmxnet3_ring_init(d, &s->rxq_descr[i].rx_ring[j], pa, size,
1611                               sizeof(struct Vmxnet3_RxDesc), false);
1612             VMW_CFPRN("RX queue %d:%d: Base: %" PRIx64 ", Size: %d",
1613                       i, j, pa, size);
1614         }
1615 
1616         /* RXC ring */
1617         pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.compRingBasePA);
1618         size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.compRingSize);
1619         vmxnet3_ring_init(d, &s->rxq_descr[i].comp_ring, pa, size,
1620                           sizeof(struct Vmxnet3_RxCompDesc), true);
1621         VMW_CFPRN("RXC queue %d: Base: %" PRIx64 ", Size: %d", i, pa, size);
1622 
1623         s->rxq_descr[i].rx_stats_pa =
1624             qd_pa + offsetof(struct Vmxnet3_RxQueueDesc, stats);
1625         memset(&s->rxq_descr[i].rxq_stats, 0,
1626                sizeof(s->rxq_descr[i].rxq_stats));
1627     }
1628 
1629     vmxnet3_validate_interrupts(s);
1630 
1631     /* Make sure everything is in place before device activation */
1632     smp_wmb();
1633 
1634     vmxnet3_reset_mac(s);
1635 
1636     s->device_active = true;
1637 }
1638 
1639 static void vmxnet3_handle_command(VMXNET3State *s, uint64_t cmd)
1640 {
1641     s->last_command = cmd;
1642 
1643     switch (cmd) {
1644     case VMXNET3_CMD_GET_PERM_MAC_HI:
1645         VMW_CBPRN("Set: Get upper part of permanent MAC");
1646         break;
1647 
1648     case VMXNET3_CMD_GET_PERM_MAC_LO:
1649         VMW_CBPRN("Set: Get lower part of permanent MAC");
1650         break;
1651 
1652     case VMXNET3_CMD_GET_STATS:
1653         VMW_CBPRN("Set: Get device statistics");
1654         vmxnet3_fill_stats(s);
1655         break;
1656 
1657     case VMXNET3_CMD_ACTIVATE_DEV:
1658         VMW_CBPRN("Set: Activating vmxnet3 device");
1659         vmxnet3_activate_device(s);
1660         break;
1661 
1662     case VMXNET3_CMD_UPDATE_RX_MODE:
1663         VMW_CBPRN("Set: Update rx mode");
1664         vmxnet3_update_rx_mode(s);
1665         break;
1666 
1667     case VMXNET3_CMD_UPDATE_VLAN_FILTERS:
1668         VMW_CBPRN("Set: Update VLAN filters");
1669         vmxnet3_update_vlan_filters(s);
1670         break;
1671 
1672     case VMXNET3_CMD_UPDATE_MAC_FILTERS:
1673         VMW_CBPRN("Set: Update MAC filters");
1674         vmxnet3_update_mcast_filters(s);
1675         break;
1676 
1677     case VMXNET3_CMD_UPDATE_FEATURE:
1678         VMW_CBPRN("Set: Update features");
1679         vmxnet3_update_features(s);
1680         break;
1681 
1682     case VMXNET3_CMD_UPDATE_PMCFG:
1683         VMW_CBPRN("Set: Update power management config");
1684         vmxnet3_update_pm_state(s);
1685         break;
1686 
1687     case VMXNET3_CMD_GET_LINK:
1688         VMW_CBPRN("Set: Get link");
1689         break;
1690 
1691     case VMXNET3_CMD_RESET_DEV:
1692         VMW_CBPRN("Set: Reset device");
1693         vmxnet3_reset(s);
1694         break;
1695 
1696     case VMXNET3_CMD_QUIESCE_DEV:
1697         VMW_CBPRN("Set: VMXNET3_CMD_QUIESCE_DEV - deactivate the device");
1698         vmxnet3_deactivate_device(s);
1699         break;
1700 
1701     case VMXNET3_CMD_GET_CONF_INTR:
1702         VMW_CBPRN("Set: VMXNET3_CMD_GET_CONF_INTR - interrupt configuration");
1703         break;
1704 
1705     case VMXNET3_CMD_GET_ADAPTIVE_RING_INFO:
1706         VMW_CBPRN("Set: VMXNET3_CMD_GET_ADAPTIVE_RING_INFO - "
1707                   "adaptive ring info flags");
1708         break;
1709 
1710     case VMXNET3_CMD_GET_DID_LO:
1711         VMW_CBPRN("Set: Get lower part of device ID");
1712         break;
1713 
1714     case VMXNET3_CMD_GET_DID_HI:
1715         VMW_CBPRN("Set: Get upper part of device ID");
1716         break;
1717 
1718     case VMXNET3_CMD_GET_DEV_EXTRA_INFO:
1719         VMW_CBPRN("Set: Get device extra info");
1720         break;
1721 
1722     default:
1723         VMW_CBPRN("Received unknown command: %" PRIx64, cmd);
1724         break;
1725     }
1726 }
1727 
1728 static uint64_t vmxnet3_get_command_status(VMXNET3State *s)
1729 {
1730     uint64_t ret;
1731 
1732     switch (s->last_command) {
1733     case VMXNET3_CMD_ACTIVATE_DEV:
1734         ret = (s->device_active) ? 0 : 1;
1735         VMW_CFPRN("Device active: %" PRIx64, ret);
1736         break;
1737 
1738     case VMXNET3_CMD_RESET_DEV:
1739     case VMXNET3_CMD_QUIESCE_DEV:
1740     case VMXNET3_CMD_GET_QUEUE_STATUS:
1741     case VMXNET3_CMD_GET_DEV_EXTRA_INFO:
1742         ret = 0;
1743         break;
1744 
1745     case VMXNET3_CMD_GET_LINK:
1746         ret = s->link_status_and_speed;
1747         VMW_CFPRN("Link and speed: %" PRIx64, ret);
1748         break;
1749 
1750     case VMXNET3_CMD_GET_PERM_MAC_LO:
1751         ret = vmxnet3_get_mac_low(&s->perm_mac);
1752         break;
1753 
1754     case VMXNET3_CMD_GET_PERM_MAC_HI:
1755         ret = vmxnet3_get_mac_high(&s->perm_mac);
1756         break;
1757 
1758     case VMXNET3_CMD_GET_CONF_INTR:
1759         ret = vmxnet3_get_interrupt_config(s);
1760         break;
1761 
1762     case VMXNET3_CMD_GET_ADAPTIVE_RING_INFO:
1763         ret = VMXNET3_DISABLE_ADAPTIVE_RING;
1764         break;
1765 
1766     case VMXNET3_CMD_GET_DID_LO:
1767         ret = PCI_DEVICE_ID_VMWARE_VMXNET3;
1768         break;
1769 
1770     case VMXNET3_CMD_GET_DID_HI:
1771         ret = VMXNET3_DEVICE_REVISION;
1772         break;
1773 
1774     default:
1775         VMW_WRPRN("Received request for unknown command: %x", s->last_command);
1776         ret = 0;
1777         break;
1778     }
1779 
1780     return ret;
1781 }
1782 
1783 static void vmxnet3_set_events(VMXNET3State *s, uint32_t val)
1784 {
1785     uint32_t events;
1786     PCIDevice *d = PCI_DEVICE(s);
1787 
1788     VMW_CBPRN("Setting events: 0x%x", val);
1789     events = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, ecr) | val;
1790     VMXNET3_WRITE_DRV_SHARED32(d, s->drv_shmem, ecr, events);
1791 }
1792 
1793 static void vmxnet3_ack_events(VMXNET3State *s, uint32_t val)
1794 {
1795     PCIDevice *d = PCI_DEVICE(s);
1796     uint32_t events;
1797 
1798     VMW_CBPRN("Clearing events: 0x%x", val);
1799     events = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, ecr) & ~val;
1800     VMXNET3_WRITE_DRV_SHARED32(d, s->drv_shmem, ecr, events);
1801 }
1802 
1803 static void
1804 vmxnet3_io_bar1_write(void *opaque,
1805                       hwaddr addr,
1806                       uint64_t val,
1807                       unsigned size)
1808 {
1809     VMXNET3State *s = opaque;
1810 
1811     switch (addr) {
1812     /* Vmxnet3 Revision Report Selection */
1813     case VMXNET3_REG_VRRS:
1814         VMW_CBPRN("Write BAR1 [VMXNET3_REG_VRRS] = %" PRIx64 ", size %d",
1815                   val, size);
1816         break;
1817 
1818     /* UPT Version Report Selection */
1819     case VMXNET3_REG_UVRS:
1820         VMW_CBPRN("Write BAR1 [VMXNET3_REG_UVRS] = %" PRIx64 ", size %d",
1821                   val, size);
1822         break;
1823 
1824     /* Driver Shared Address Low */
1825     case VMXNET3_REG_DSAL:
1826         VMW_CBPRN("Write BAR1 [VMXNET3_REG_DSAL] = %" PRIx64 ", size %d",
1827                   val, size);
1828         /*
1829          * Guest driver will first write the low part of the shared
1830          * memory address. We save it to temp variable and set the
1831          * shared address only after we get the high part
1832          */
1833         if (val == 0) {
1834             vmxnet3_deactivate_device(s);
1835         }
1836         s->temp_shared_guest_driver_memory = val;
1837         s->drv_shmem = 0;
1838         break;
1839 
1840     /* Driver Shared Address High */
1841     case VMXNET3_REG_DSAH:
1842         VMW_CBPRN("Write BAR1 [VMXNET3_REG_DSAH] = %" PRIx64 ", size %d",
1843                   val, size);
1844         /*
1845          * Set the shared memory between guest driver and device.
1846          * We already should have low address part.
1847          */
1848         s->drv_shmem = s->temp_shared_guest_driver_memory | (val << 32);
1849         break;
1850 
1851     /* Command */
1852     case VMXNET3_REG_CMD:
1853         VMW_CBPRN("Write BAR1 [VMXNET3_REG_CMD] = %" PRIx64 ", size %d",
1854                   val, size);
1855         vmxnet3_handle_command(s, val);
1856         break;
1857 
1858     /* MAC Address Low */
1859     case VMXNET3_REG_MACL:
1860         VMW_CBPRN("Write BAR1 [VMXNET3_REG_MACL] = %" PRIx64 ", size %d",
1861                   val, size);
1862         s->temp_mac = val;
1863         break;
1864 
1865     /* MAC Address High */
1866     case VMXNET3_REG_MACH:
1867         VMW_CBPRN("Write BAR1 [VMXNET3_REG_MACH] = %" PRIx64 ", size %d",
1868                   val, size);
1869         vmxnet3_set_variable_mac(s, val, s->temp_mac);
1870         break;
1871 
1872     /* Interrupt Cause Register */
1873     case VMXNET3_REG_ICR:
1874         VMW_CBPRN("Write BAR1 [VMXNET3_REG_ICR] = %" PRIx64 ", size %d",
1875                   val, size);
1876         g_assert_not_reached();
1877         break;
1878 
1879     /* Event Cause Register */
1880     case VMXNET3_REG_ECR:
1881         VMW_CBPRN("Write BAR1 [VMXNET3_REG_ECR] = %" PRIx64 ", size %d",
1882                   val, size);
1883         vmxnet3_ack_events(s, val);
1884         break;
1885 
1886     default:
1887         VMW_CBPRN("Unknown Write to BAR1 [%" PRIx64 "] = %" PRIx64 ", size %d",
1888                   addr, val, size);
1889         break;
1890     }
1891 }
1892 
1893 static uint64_t
1894 vmxnet3_io_bar1_read(void *opaque, hwaddr addr, unsigned size)
1895 {
1896         VMXNET3State *s = opaque;
1897         uint64_t ret = 0;
1898 
1899         switch (addr) {
1900         /* Vmxnet3 Revision Report Selection */
1901         case VMXNET3_REG_VRRS:
1902             VMW_CBPRN("Read BAR1 [VMXNET3_REG_VRRS], size %d", size);
1903             ret = VMXNET3_DEVICE_REVISION;
1904             break;
1905 
1906         /* UPT Version Report Selection */
1907         case VMXNET3_REG_UVRS:
1908             VMW_CBPRN("Read BAR1 [VMXNET3_REG_UVRS], size %d", size);
1909             ret = VMXNET3_UPT_REVISION;
1910             break;
1911 
1912         /* Command */
1913         case VMXNET3_REG_CMD:
1914             VMW_CBPRN("Read BAR1 [VMXNET3_REG_CMD], size %d", size);
1915             ret = vmxnet3_get_command_status(s);
1916             break;
1917 
1918         /* MAC Address Low */
1919         case VMXNET3_REG_MACL:
1920             VMW_CBPRN("Read BAR1 [VMXNET3_REG_MACL], size %d", size);
1921             ret = vmxnet3_get_mac_low(&s->conf.macaddr);
1922             break;
1923 
1924         /* MAC Address High */
1925         case VMXNET3_REG_MACH:
1926             VMW_CBPRN("Read BAR1 [VMXNET3_REG_MACH], size %d", size);
1927             ret = vmxnet3_get_mac_high(&s->conf.macaddr);
1928             break;
1929 
1930         /*
1931          * Interrupt Cause Register
1932          * Used for legacy interrupts only so interrupt index always 0
1933          */
1934         case VMXNET3_REG_ICR:
1935             VMW_CBPRN("Read BAR1 [VMXNET3_REG_ICR], size %d", size);
1936             if (vmxnet3_interrupt_asserted(s, 0)) {
1937                 vmxnet3_clear_interrupt(s, 0);
1938                 ret = true;
1939             } else {
1940                 ret = false;
1941             }
1942             break;
1943 
1944         default:
1945             VMW_CBPRN("Unknow read BAR1[%" PRIx64 "], %d bytes", addr, size);
1946             break;
1947         }
1948 
1949         return ret;
1950 }
1951 
1952 static int
1953 vmxnet3_can_receive(NetClientState *nc)
1954 {
1955     VMXNET3State *s = qemu_get_nic_opaque(nc);
1956     return s->device_active &&
1957            VMXNET_FLAG_IS_SET(s->link_status_and_speed, VMXNET3_LINK_STATUS_UP);
1958 }
1959 
1960 static inline bool
1961 vmxnet3_is_registered_vlan(VMXNET3State *s, const void *data)
1962 {
1963     uint16_t vlan_tag = eth_get_pkt_tci(data) & VLAN_VID_MASK;
1964     if (IS_SPECIAL_VLAN_ID(vlan_tag)) {
1965         return true;
1966     }
1967 
1968     return VMXNET3_VFTABLE_ENTRY_IS_SET(s->vlan_table, vlan_tag);
1969 }
1970 
1971 static bool
1972 vmxnet3_is_allowed_mcast_group(VMXNET3State *s, const uint8_t *group_mac)
1973 {
1974     int i;
1975     for (i = 0; i < s->mcast_list_len; i++) {
1976         if (!memcmp(group_mac, s->mcast_list[i].a, sizeof(s->mcast_list[i]))) {
1977             return true;
1978         }
1979     }
1980     return false;
1981 }
1982 
1983 static bool
1984 vmxnet3_rx_filter_may_indicate(VMXNET3State *s, const void *data,
1985     size_t size)
1986 {
1987     struct eth_header *ehdr = PKT_GET_ETH_HDR(data);
1988 
1989     if (VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_PROMISC)) {
1990         return true;
1991     }
1992 
1993     if (!vmxnet3_is_registered_vlan(s, data)) {
1994         return false;
1995     }
1996 
1997     switch (net_rx_pkt_get_packet_type(s->rx_pkt)) {
1998     case ETH_PKT_UCAST:
1999         if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_UCAST)) {
2000             return false;
2001         }
2002         if (memcmp(s->conf.macaddr.a, ehdr->h_dest, ETH_ALEN)) {
2003             return false;
2004         }
2005         break;
2006 
2007     case ETH_PKT_BCAST:
2008         if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_BCAST)) {
2009             return false;
2010         }
2011         break;
2012 
2013     case ETH_PKT_MCAST:
2014         if (VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_ALL_MULTI)) {
2015             return true;
2016         }
2017         if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_MCAST)) {
2018             return false;
2019         }
2020         if (!vmxnet3_is_allowed_mcast_group(s, ehdr->h_dest)) {
2021             return false;
2022         }
2023         break;
2024 
2025     default:
2026         g_assert_not_reached();
2027     }
2028 
2029     return true;
2030 }
2031 
2032 static ssize_t
2033 vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size)
2034 {
2035     VMXNET3State *s = qemu_get_nic_opaque(nc);
2036     size_t bytes_indicated;
2037     uint8_t min_buf[MIN_BUF_SIZE];
2038 
2039     if (!vmxnet3_can_receive(nc)) {
2040         VMW_PKPRN("Cannot receive now");
2041         return -1;
2042     }
2043 
2044     if (s->peer_has_vhdr) {
2045         net_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf);
2046         buf += sizeof(struct virtio_net_hdr);
2047         size -= sizeof(struct virtio_net_hdr);
2048     }
2049 
2050     /* Pad to minimum Ethernet frame length */
2051     if (size < sizeof(min_buf)) {
2052         memcpy(min_buf, buf, size);
2053         memset(&min_buf[size], 0, sizeof(min_buf) - size);
2054         buf = min_buf;
2055         size = sizeof(min_buf);
2056     }
2057 
2058     net_rx_pkt_set_packet_type(s->rx_pkt,
2059         get_eth_packet_type(PKT_GET_ETH_HDR(buf)));
2060 
2061     if (vmxnet3_rx_filter_may_indicate(s, buf, size)) {
2062         net_rx_pkt_set_protocols(s->rx_pkt, buf, size);
2063         vmxnet3_rx_need_csum_calculate(s->rx_pkt, buf, size);
2064         net_rx_pkt_attach_data(s->rx_pkt, buf, size, s->rx_vlan_stripping);
2065         bytes_indicated = vmxnet3_indicate_packet(s) ? size : -1;
2066         if (bytes_indicated < size) {
2067             VMW_PKPRN("RX: %zu of %zu bytes indicated", bytes_indicated, size);
2068         }
2069     } else {
2070         VMW_PKPRN("Packet dropped by RX filter");
2071         bytes_indicated = size;
2072     }
2073 
2074     assert(size > 0);
2075     assert(bytes_indicated != 0);
2076     return bytes_indicated;
2077 }
2078 
2079 static void vmxnet3_set_link_status(NetClientState *nc)
2080 {
2081     VMXNET3State *s = qemu_get_nic_opaque(nc);
2082 
2083     if (nc->link_down) {
2084         s->link_status_and_speed &= ~VMXNET3_LINK_STATUS_UP;
2085     } else {
2086         s->link_status_and_speed |= VMXNET3_LINK_STATUS_UP;
2087     }
2088 
2089     vmxnet3_set_events(s, VMXNET3_ECR_LINK);
2090     vmxnet3_trigger_interrupt(s, s->event_int_idx);
2091 }
2092 
2093 static NetClientInfo net_vmxnet3_info = {
2094         .type = NET_CLIENT_DRIVER_NIC,
2095         .size = sizeof(NICState),
2096         .receive = vmxnet3_receive,
2097         .link_status_changed = vmxnet3_set_link_status,
2098 };
2099 
2100 static bool vmxnet3_peer_has_vnet_hdr(VMXNET3State *s)
2101 {
2102     NetClientState *nc = qemu_get_queue(s->nic);
2103 
2104     if (qemu_has_vnet_hdr(nc->peer)) {
2105         return true;
2106     }
2107 
2108     return false;
2109 }
2110 
2111 static void vmxnet3_net_uninit(VMXNET3State *s)
2112 {
2113     g_free(s->mcast_list);
2114     vmxnet3_deactivate_device(s);
2115     qemu_del_nic(s->nic);
2116 }
2117 
2118 static void vmxnet3_net_init(VMXNET3State *s)
2119 {
2120     DeviceState *d = DEVICE(s);
2121 
2122     VMW_CBPRN("vmxnet3_net_init called...");
2123 
2124     qemu_macaddr_default_if_unset(&s->conf.macaddr);
2125 
2126     /* Windows guest will query the address that was set on init */
2127     memcpy(&s->perm_mac.a, &s->conf.macaddr.a, sizeof(s->perm_mac.a));
2128 
2129     s->mcast_list = NULL;
2130     s->mcast_list_len = 0;
2131 
2132     s->link_status_and_speed = VMXNET3_LINK_SPEED | VMXNET3_LINK_STATUS_UP;
2133 
2134     VMW_CFPRN("Permanent MAC: " MAC_FMT, MAC_ARG(s->perm_mac.a));
2135 
2136     s->nic = qemu_new_nic(&net_vmxnet3_info, &s->conf,
2137                           object_get_typename(OBJECT(s)),
2138                           d->id, s);
2139 
2140     s->peer_has_vhdr = vmxnet3_peer_has_vnet_hdr(s);
2141     s->tx_sop = true;
2142     s->skip_current_tx_pkt = false;
2143     s->tx_pkt = NULL;
2144     s->rx_pkt = NULL;
2145     s->rx_vlan_stripping = false;
2146     s->lro_supported = false;
2147 
2148     if (s->peer_has_vhdr) {
2149         qemu_set_vnet_hdr_len(qemu_get_queue(s->nic)->peer,
2150             sizeof(struct virtio_net_hdr));
2151 
2152         qemu_using_vnet_hdr(qemu_get_queue(s->nic)->peer, 1);
2153     }
2154 
2155     qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
2156 }
2157 
2158 static void
2159 vmxnet3_unuse_msix_vectors(VMXNET3State *s, int num_vectors)
2160 {
2161     PCIDevice *d = PCI_DEVICE(s);
2162     int i;
2163     for (i = 0; i < num_vectors; i++) {
2164         msix_vector_unuse(d, i);
2165     }
2166 }
2167 
2168 static bool
2169 vmxnet3_use_msix_vectors(VMXNET3State *s, int num_vectors)
2170 {
2171     PCIDevice *d = PCI_DEVICE(s);
2172     int i;
2173     for (i = 0; i < num_vectors; i++) {
2174         int res = msix_vector_use(d, i);
2175         if (0 > res) {
2176             VMW_WRPRN("Failed to use MSI-X vector %d, error %d", i, res);
2177             vmxnet3_unuse_msix_vectors(s, i);
2178             return false;
2179         }
2180     }
2181     return true;
2182 }
2183 
2184 static bool
2185 vmxnet3_init_msix(VMXNET3State *s)
2186 {
2187     PCIDevice *d = PCI_DEVICE(s);
2188     int res = msix_init(d, VMXNET3_MAX_INTRS,
2189                         &s->msix_bar,
2190                         VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_TABLE,
2191                         &s->msix_bar,
2192                         VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_PBA(s),
2193                         VMXNET3_MSIX_OFFSET(s));
2194 
2195     if (0 > res) {
2196         VMW_WRPRN("Failed to initialize MSI-X, error %d", res);
2197         s->msix_used = false;
2198     } else {
2199         if (!vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS)) {
2200             VMW_WRPRN("Failed to use MSI-X vectors, error %d", res);
2201             msix_uninit(d, &s->msix_bar, &s->msix_bar);
2202             s->msix_used = false;
2203         } else {
2204             s->msix_used = true;
2205         }
2206     }
2207     return s->msix_used;
2208 }
2209 
2210 static void
2211 vmxnet3_cleanup_msix(VMXNET3State *s)
2212 {
2213     PCIDevice *d = PCI_DEVICE(s);
2214 
2215     if (s->msix_used) {
2216         vmxnet3_unuse_msix_vectors(s, VMXNET3_MAX_INTRS);
2217         msix_uninit(d, &s->msix_bar, &s->msix_bar);
2218     }
2219 }
2220 
2221 static void
2222 vmxnet3_cleanup_msi(VMXNET3State *s)
2223 {
2224     PCIDevice *d = PCI_DEVICE(s);
2225 
2226     msi_uninit(d);
2227 }
2228 
2229 static void
2230 vmxnet3_msix_save(QEMUFile *f, void *opaque)
2231 {
2232     PCIDevice *d = PCI_DEVICE(opaque);
2233     msix_save(d, f);
2234 }
2235 
2236 static int
2237 vmxnet3_msix_load(QEMUFile *f, void *opaque, int version_id)
2238 {
2239     PCIDevice *d = PCI_DEVICE(opaque);
2240     msix_load(d, f);
2241     return 0;
2242 }
2243 
2244 static const MemoryRegionOps b0_ops = {
2245     .read = vmxnet3_io_bar0_read,
2246     .write = vmxnet3_io_bar0_write,
2247     .endianness = DEVICE_LITTLE_ENDIAN,
2248     .impl = {
2249             .min_access_size = 4,
2250             .max_access_size = 4,
2251     },
2252 };
2253 
2254 static const MemoryRegionOps b1_ops = {
2255     .read = vmxnet3_io_bar1_read,
2256     .write = vmxnet3_io_bar1_write,
2257     .endianness = DEVICE_LITTLE_ENDIAN,
2258     .impl = {
2259             .min_access_size = 4,
2260             .max_access_size = 4,
2261     },
2262 };
2263 
2264 static uint64_t vmxnet3_device_serial_num(VMXNET3State *s)
2265 {
2266     uint64_t dsn_payload;
2267     uint8_t *dsnp = (uint8_t *)&dsn_payload;
2268 
2269     dsnp[0] = 0xfe;
2270     dsnp[1] = s->conf.macaddr.a[3];
2271     dsnp[2] = s->conf.macaddr.a[4];
2272     dsnp[3] = s->conf.macaddr.a[5];
2273     dsnp[4] = s->conf.macaddr.a[0];
2274     dsnp[5] = s->conf.macaddr.a[1];
2275     dsnp[6] = s->conf.macaddr.a[2];
2276     dsnp[7] = 0xff;
2277     return dsn_payload;
2278 }
2279 
2280 
2281 #define VMXNET3_USE_64BIT         (true)
2282 #define VMXNET3_PER_VECTOR_MASK   (false)
2283 
2284 static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp)
2285 {
2286     DeviceState *dev = DEVICE(pci_dev);
2287     VMXNET3State *s = VMXNET3(pci_dev);
2288     int ret;
2289 
2290     VMW_CBPRN("Starting init...");
2291 
2292     memory_region_init_io(&s->bar0, OBJECT(s), &b0_ops, s,
2293                           "vmxnet3-b0", VMXNET3_PT_REG_SIZE);
2294     pci_register_bar(pci_dev, VMXNET3_BAR0_IDX,
2295                      PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar0);
2296 
2297     memory_region_init_io(&s->bar1, OBJECT(s), &b1_ops, s,
2298                           "vmxnet3-b1", VMXNET3_VD_REG_SIZE);
2299     pci_register_bar(pci_dev, VMXNET3_BAR1_IDX,
2300                      PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar1);
2301 
2302     memory_region_init(&s->msix_bar, OBJECT(s), "vmxnet3-msix-bar",
2303                        VMXNET3_MSIX_BAR_SIZE);
2304     pci_register_bar(pci_dev, VMXNET3_MSIX_BAR_IDX,
2305                      PCI_BASE_ADDRESS_SPACE_MEMORY, &s->msix_bar);
2306 
2307     vmxnet3_reset_interrupt_states(s);
2308 
2309     /* Interrupt pin A */
2310     pci_dev->config[PCI_INTERRUPT_PIN] = 0x01;
2311 
2312     ret = msi_init(pci_dev, VMXNET3_MSI_OFFSET(s), VMXNET3_MAX_NMSIX_INTRS,
2313                    VMXNET3_USE_64BIT, VMXNET3_PER_VECTOR_MASK, NULL);
2314     /* Any error other than -ENOTSUP(board's MSI support is broken)
2315      * is a programming error. Fall back to INTx silently on -ENOTSUP */
2316     assert(!ret || ret == -ENOTSUP);
2317 
2318     if (!vmxnet3_init_msix(s)) {
2319         VMW_WRPRN("Failed to initialize MSI-X, configuration is inconsistent.");
2320     }
2321 
2322     vmxnet3_net_init(s);
2323 
2324     if (pci_is_express(pci_dev)) {
2325         if (pci_bus_is_express(pci_dev->bus)) {
2326             pcie_endpoint_cap_init(pci_dev, VMXNET3_EXP_EP_OFFSET);
2327         }
2328 
2329         pcie_dev_ser_num_init(pci_dev, VMXNET3_DSN_OFFSET,
2330                               vmxnet3_device_serial_num(s));
2331     }
2332 
2333     register_savevm(dev, "vmxnet3-msix", -1, 1,
2334                     vmxnet3_msix_save, vmxnet3_msix_load, s);
2335 }
2336 
2337 static void vmxnet3_instance_init(Object *obj)
2338 {
2339     VMXNET3State *s = VMXNET3(obj);
2340     device_add_bootindex_property(obj, &s->conf.bootindex,
2341                                   "bootindex", "/ethernet-phy@0",
2342                                   DEVICE(obj), NULL);
2343 }
2344 
2345 static void vmxnet3_pci_uninit(PCIDevice *pci_dev)
2346 {
2347     DeviceState *dev = DEVICE(pci_dev);
2348     VMXNET3State *s = VMXNET3(pci_dev);
2349 
2350     VMW_CBPRN("Starting uninit...");
2351 
2352     unregister_savevm(dev, "vmxnet3-msix", s);
2353 
2354     vmxnet3_net_uninit(s);
2355 
2356     vmxnet3_cleanup_msix(s);
2357 
2358     vmxnet3_cleanup_msi(s);
2359 }
2360 
2361 static void vmxnet3_qdev_reset(DeviceState *dev)
2362 {
2363     PCIDevice *d = PCI_DEVICE(dev);
2364     VMXNET3State *s = VMXNET3(d);
2365 
2366     VMW_CBPRN("Starting QDEV reset...");
2367     vmxnet3_reset(s);
2368 }
2369 
2370 static bool vmxnet3_mc_list_needed(void *opaque)
2371 {
2372     return true;
2373 }
2374 
2375 static int vmxnet3_mcast_list_pre_load(void *opaque)
2376 {
2377     VMXNET3State *s = opaque;
2378 
2379     s->mcast_list = g_malloc(s->mcast_list_buff_size);
2380 
2381     return 0;
2382 }
2383 
2384 
2385 static void vmxnet3_pre_save(void *opaque)
2386 {
2387     VMXNET3State *s = opaque;
2388 
2389     s->mcast_list_buff_size = s->mcast_list_len * sizeof(MACAddr);
2390 }
2391 
2392 static const VMStateDescription vmxstate_vmxnet3_mcast_list = {
2393     .name = "vmxnet3/mcast_list",
2394     .version_id = 1,
2395     .minimum_version_id = 1,
2396     .pre_load = vmxnet3_mcast_list_pre_load,
2397     .needed = vmxnet3_mc_list_needed,
2398     .fields = (VMStateField[]) {
2399         VMSTATE_VBUFFER_UINT32(mcast_list, VMXNET3State, 0, NULL, 0,
2400             mcast_list_buff_size),
2401         VMSTATE_END_OF_LIST()
2402     }
2403 };
2404 
2405 static void vmxnet3_get_ring_from_file(QEMUFile *f, Vmxnet3Ring *r)
2406 {
2407     r->pa = qemu_get_be64(f);
2408     r->size = qemu_get_be32(f);
2409     r->cell_size = qemu_get_be32(f);
2410     r->next = qemu_get_be32(f);
2411     r->gen = qemu_get_byte(f);
2412 }
2413 
2414 static void vmxnet3_put_ring_to_file(QEMUFile *f, Vmxnet3Ring *r)
2415 {
2416     qemu_put_be64(f, r->pa);
2417     qemu_put_be32(f, r->size);
2418     qemu_put_be32(f, r->cell_size);
2419     qemu_put_be32(f, r->next);
2420     qemu_put_byte(f, r->gen);
2421 }
2422 
2423 static void vmxnet3_get_tx_stats_from_file(QEMUFile *f,
2424     struct UPT1_TxStats *tx_stat)
2425 {
2426     tx_stat->TSOPktsTxOK = qemu_get_be64(f);
2427     tx_stat->TSOBytesTxOK = qemu_get_be64(f);
2428     tx_stat->ucastPktsTxOK = qemu_get_be64(f);
2429     tx_stat->ucastBytesTxOK = qemu_get_be64(f);
2430     tx_stat->mcastPktsTxOK = qemu_get_be64(f);
2431     tx_stat->mcastBytesTxOK = qemu_get_be64(f);
2432     tx_stat->bcastPktsTxOK = qemu_get_be64(f);
2433     tx_stat->bcastBytesTxOK = qemu_get_be64(f);
2434     tx_stat->pktsTxError = qemu_get_be64(f);
2435     tx_stat->pktsTxDiscard = qemu_get_be64(f);
2436 }
2437 
2438 static void vmxnet3_put_tx_stats_to_file(QEMUFile *f,
2439     struct UPT1_TxStats *tx_stat)
2440 {
2441     qemu_put_be64(f, tx_stat->TSOPktsTxOK);
2442     qemu_put_be64(f, tx_stat->TSOBytesTxOK);
2443     qemu_put_be64(f, tx_stat->ucastPktsTxOK);
2444     qemu_put_be64(f, tx_stat->ucastBytesTxOK);
2445     qemu_put_be64(f, tx_stat->mcastPktsTxOK);
2446     qemu_put_be64(f, tx_stat->mcastBytesTxOK);
2447     qemu_put_be64(f, tx_stat->bcastPktsTxOK);
2448     qemu_put_be64(f, tx_stat->bcastBytesTxOK);
2449     qemu_put_be64(f, tx_stat->pktsTxError);
2450     qemu_put_be64(f, tx_stat->pktsTxDiscard);
2451 }
2452 
2453 static int vmxnet3_get_txq_descr(QEMUFile *f, void *pv, size_t size)
2454 {
2455     Vmxnet3TxqDescr *r = pv;
2456 
2457     vmxnet3_get_ring_from_file(f, &r->tx_ring);
2458     vmxnet3_get_ring_from_file(f, &r->comp_ring);
2459     r->intr_idx = qemu_get_byte(f);
2460     r->tx_stats_pa = qemu_get_be64(f);
2461 
2462     vmxnet3_get_tx_stats_from_file(f, &r->txq_stats);
2463 
2464     return 0;
2465 }
2466 
2467 static void vmxnet3_put_txq_descr(QEMUFile *f, void *pv, size_t size)
2468 {
2469     Vmxnet3TxqDescr *r = pv;
2470 
2471     vmxnet3_put_ring_to_file(f, &r->tx_ring);
2472     vmxnet3_put_ring_to_file(f, &r->comp_ring);
2473     qemu_put_byte(f, r->intr_idx);
2474     qemu_put_be64(f, r->tx_stats_pa);
2475     vmxnet3_put_tx_stats_to_file(f, &r->txq_stats);
2476 }
2477 
2478 static const VMStateInfo txq_descr_info = {
2479     .name = "txq_descr",
2480     .get = vmxnet3_get_txq_descr,
2481     .put = vmxnet3_put_txq_descr
2482 };
2483 
2484 static void vmxnet3_get_rx_stats_from_file(QEMUFile *f,
2485     struct UPT1_RxStats *rx_stat)
2486 {
2487     rx_stat->LROPktsRxOK = qemu_get_be64(f);
2488     rx_stat->LROBytesRxOK = qemu_get_be64(f);
2489     rx_stat->ucastPktsRxOK = qemu_get_be64(f);
2490     rx_stat->ucastBytesRxOK = qemu_get_be64(f);
2491     rx_stat->mcastPktsRxOK = qemu_get_be64(f);
2492     rx_stat->mcastBytesRxOK = qemu_get_be64(f);
2493     rx_stat->bcastPktsRxOK = qemu_get_be64(f);
2494     rx_stat->bcastBytesRxOK = qemu_get_be64(f);
2495     rx_stat->pktsRxOutOfBuf = qemu_get_be64(f);
2496     rx_stat->pktsRxError = qemu_get_be64(f);
2497 }
2498 
2499 static void vmxnet3_put_rx_stats_to_file(QEMUFile *f,
2500     struct UPT1_RxStats *rx_stat)
2501 {
2502     qemu_put_be64(f, rx_stat->LROPktsRxOK);
2503     qemu_put_be64(f, rx_stat->LROBytesRxOK);
2504     qemu_put_be64(f, rx_stat->ucastPktsRxOK);
2505     qemu_put_be64(f, rx_stat->ucastBytesRxOK);
2506     qemu_put_be64(f, rx_stat->mcastPktsRxOK);
2507     qemu_put_be64(f, rx_stat->mcastBytesRxOK);
2508     qemu_put_be64(f, rx_stat->bcastPktsRxOK);
2509     qemu_put_be64(f, rx_stat->bcastBytesRxOK);
2510     qemu_put_be64(f, rx_stat->pktsRxOutOfBuf);
2511     qemu_put_be64(f, rx_stat->pktsRxError);
2512 }
2513 
2514 static int vmxnet3_get_rxq_descr(QEMUFile *f, void *pv, size_t size)
2515 {
2516     Vmxnet3RxqDescr *r = pv;
2517     int i;
2518 
2519     for (i = 0; i < VMXNET3_RX_RINGS_PER_QUEUE; i++) {
2520         vmxnet3_get_ring_from_file(f, &r->rx_ring[i]);
2521     }
2522 
2523     vmxnet3_get_ring_from_file(f, &r->comp_ring);
2524     r->intr_idx = qemu_get_byte(f);
2525     r->rx_stats_pa = qemu_get_be64(f);
2526 
2527     vmxnet3_get_rx_stats_from_file(f, &r->rxq_stats);
2528 
2529     return 0;
2530 }
2531 
2532 static void vmxnet3_put_rxq_descr(QEMUFile *f, void *pv, size_t size)
2533 {
2534     Vmxnet3RxqDescr *r = pv;
2535     int i;
2536 
2537     for (i = 0; i < VMXNET3_RX_RINGS_PER_QUEUE; i++) {
2538         vmxnet3_put_ring_to_file(f, &r->rx_ring[i]);
2539     }
2540 
2541     vmxnet3_put_ring_to_file(f, &r->comp_ring);
2542     qemu_put_byte(f, r->intr_idx);
2543     qemu_put_be64(f, r->rx_stats_pa);
2544     vmxnet3_put_rx_stats_to_file(f, &r->rxq_stats);
2545 }
2546 
2547 static int vmxnet3_post_load(void *opaque, int version_id)
2548 {
2549     VMXNET3State *s = opaque;
2550     PCIDevice *d = PCI_DEVICE(s);
2551 
2552     net_tx_pkt_init(&s->tx_pkt, PCI_DEVICE(s),
2553                     s->max_tx_frags, s->peer_has_vhdr);
2554     net_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr);
2555 
2556     if (s->msix_used) {
2557         if  (!vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS)) {
2558             VMW_WRPRN("Failed to re-use MSI-X vectors");
2559             msix_uninit(d, &s->msix_bar, &s->msix_bar);
2560             s->msix_used = false;
2561             return -1;
2562         }
2563     }
2564 
2565     vmxnet3_validate_queues(s);
2566     vmxnet3_validate_interrupts(s);
2567 
2568     return 0;
2569 }
2570 
2571 static const VMStateInfo rxq_descr_info = {
2572     .name = "rxq_descr",
2573     .get = vmxnet3_get_rxq_descr,
2574     .put = vmxnet3_put_rxq_descr
2575 };
2576 
2577 static int vmxnet3_get_int_state(QEMUFile *f, void *pv, size_t size)
2578 {
2579     Vmxnet3IntState *r = pv;
2580 
2581     r->is_masked = qemu_get_byte(f);
2582     r->is_pending = qemu_get_byte(f);
2583     r->is_asserted = qemu_get_byte(f);
2584 
2585     return 0;
2586 }
2587 
2588 static void vmxnet3_put_int_state(QEMUFile *f, void *pv, size_t size)
2589 {
2590     Vmxnet3IntState *r = pv;
2591 
2592     qemu_put_byte(f, r->is_masked);
2593     qemu_put_byte(f, r->is_pending);
2594     qemu_put_byte(f, r->is_asserted);
2595 }
2596 
2597 static const VMStateInfo int_state_info = {
2598     .name = "int_state",
2599     .get = vmxnet3_get_int_state,
2600     .put = vmxnet3_put_int_state
2601 };
2602 
2603 static bool vmxnet3_vmstate_need_pcie_device(void *opaque)
2604 {
2605     VMXNET3State *s = VMXNET3(opaque);
2606 
2607     return !(s->compat_flags & VMXNET3_COMPAT_FLAG_DISABLE_PCIE);
2608 }
2609 
2610 static bool vmxnet3_vmstate_test_pci_device(void *opaque, int version_id)
2611 {
2612     return !vmxnet3_vmstate_need_pcie_device(opaque);
2613 }
2614 
2615 static const VMStateDescription vmstate_vmxnet3_pcie_device = {
2616     .name = "vmxnet3/pcie",
2617     .version_id = 1,
2618     .minimum_version_id = 1,
2619     .needed = vmxnet3_vmstate_need_pcie_device,
2620     .fields = (VMStateField[]) {
2621         VMSTATE_PCIE_DEVICE(parent_obj, VMXNET3State),
2622         VMSTATE_END_OF_LIST()
2623     }
2624 };
2625 
2626 static const VMStateDescription vmstate_vmxnet3 = {
2627     .name = "vmxnet3",
2628     .version_id = 1,
2629     .minimum_version_id = 1,
2630     .pre_save = vmxnet3_pre_save,
2631     .post_load = vmxnet3_post_load,
2632     .fields = (VMStateField[]) {
2633             VMSTATE_STRUCT_TEST(parent_obj, VMXNET3State,
2634                                 vmxnet3_vmstate_test_pci_device, 0,
2635                                 vmstate_pci_device, PCIDevice),
2636             VMSTATE_BOOL(rx_packets_compound, VMXNET3State),
2637             VMSTATE_BOOL(rx_vlan_stripping, VMXNET3State),
2638             VMSTATE_BOOL(lro_supported, VMXNET3State),
2639             VMSTATE_UINT32(rx_mode, VMXNET3State),
2640             VMSTATE_UINT32(mcast_list_len, VMXNET3State),
2641             VMSTATE_UINT32(mcast_list_buff_size, VMXNET3State),
2642             VMSTATE_UINT32_ARRAY(vlan_table, VMXNET3State, VMXNET3_VFT_SIZE),
2643             VMSTATE_UINT32(mtu, VMXNET3State),
2644             VMSTATE_UINT16(max_rx_frags, VMXNET3State),
2645             VMSTATE_UINT32(max_tx_frags, VMXNET3State),
2646             VMSTATE_UINT8(event_int_idx, VMXNET3State),
2647             VMSTATE_BOOL(auto_int_masking, VMXNET3State),
2648             VMSTATE_UINT8(txq_num, VMXNET3State),
2649             VMSTATE_UINT8(rxq_num, VMXNET3State),
2650             VMSTATE_UINT32(device_active, VMXNET3State),
2651             VMSTATE_UINT32(last_command, VMXNET3State),
2652             VMSTATE_UINT32(link_status_and_speed, VMXNET3State),
2653             VMSTATE_UINT32(temp_mac, VMXNET3State),
2654             VMSTATE_UINT64(drv_shmem, VMXNET3State),
2655             VMSTATE_UINT64(temp_shared_guest_driver_memory, VMXNET3State),
2656 
2657             VMSTATE_ARRAY(txq_descr, VMXNET3State,
2658                 VMXNET3_DEVICE_MAX_TX_QUEUES, 0, txq_descr_info,
2659                 Vmxnet3TxqDescr),
2660             VMSTATE_ARRAY(rxq_descr, VMXNET3State,
2661                 VMXNET3_DEVICE_MAX_RX_QUEUES, 0, rxq_descr_info,
2662                 Vmxnet3RxqDescr),
2663             VMSTATE_ARRAY(interrupt_states, VMXNET3State, VMXNET3_MAX_INTRS,
2664                 0, int_state_info, Vmxnet3IntState),
2665 
2666             VMSTATE_END_OF_LIST()
2667     },
2668     .subsections = (const VMStateDescription*[]) {
2669         &vmxstate_vmxnet3_mcast_list,
2670         &vmstate_vmxnet3_pcie_device,
2671         NULL
2672     }
2673 };
2674 
2675 static Property vmxnet3_properties[] = {
2676     DEFINE_NIC_PROPERTIES(VMXNET3State, conf),
2677     DEFINE_PROP_BIT("x-old-msi-offsets", VMXNET3State, compat_flags,
2678                     VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT, false),
2679     DEFINE_PROP_BIT("x-disable-pcie", VMXNET3State, compat_flags,
2680                     VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT, false),
2681     DEFINE_PROP_END_OF_LIST(),
2682 };
2683 
2684 static void vmxnet3_realize(DeviceState *qdev, Error **errp)
2685 {
2686     VMXNET3Class *vc = VMXNET3_DEVICE_GET_CLASS(qdev);
2687     PCIDevice *pci_dev = PCI_DEVICE(qdev);
2688     VMXNET3State *s = VMXNET3(qdev);
2689 
2690     if (!(s->compat_flags & VMXNET3_COMPAT_FLAG_DISABLE_PCIE)) {
2691         pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
2692     }
2693 
2694     vc->parent_dc_realize(qdev, errp);
2695 }
2696 
2697 static void vmxnet3_class_init(ObjectClass *class, void *data)
2698 {
2699     DeviceClass *dc = DEVICE_CLASS(class);
2700     PCIDeviceClass *c = PCI_DEVICE_CLASS(class);
2701     VMXNET3Class *vc = VMXNET3_DEVICE_CLASS(class);
2702 
2703     c->realize = vmxnet3_pci_realize;
2704     c->exit = vmxnet3_pci_uninit;
2705     c->vendor_id = PCI_VENDOR_ID_VMWARE;
2706     c->device_id = PCI_DEVICE_ID_VMWARE_VMXNET3;
2707     c->revision = PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION;
2708     c->romfile = "efi-vmxnet3.rom";
2709     c->class_id = PCI_CLASS_NETWORK_ETHERNET;
2710     c->subsystem_vendor_id = PCI_VENDOR_ID_VMWARE;
2711     c->subsystem_id = PCI_DEVICE_ID_VMWARE_VMXNET3;
2712     vc->parent_dc_realize = dc->realize;
2713     dc->realize = vmxnet3_realize;
2714     dc->desc = "VMWare Paravirtualized Ethernet v3";
2715     dc->reset = vmxnet3_qdev_reset;
2716     dc->vmsd = &vmstate_vmxnet3;
2717     dc->props = vmxnet3_properties;
2718     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
2719 }
2720 
2721 static const TypeInfo vmxnet3_info = {
2722     .name          = TYPE_VMXNET3,
2723     .parent        = TYPE_PCI_DEVICE,
2724     .class_size    = sizeof(VMXNET3Class),
2725     .instance_size = sizeof(VMXNET3State),
2726     .class_init    = vmxnet3_class_init,
2727     .instance_init = vmxnet3_instance_init,
2728 };
2729 
2730 static void vmxnet3_register_types(void)
2731 {
2732     VMW_CBPRN("vmxnet3_register_types called...");
2733     type_register_static(&vmxnet3_info);
2734 }
2735 
2736 type_init(vmxnet3_register_types)
2737