xref: /openbmc/qemu/hw/net/vmxnet3.c (revision e452053097371880910c744a5d42ae2df058a4a7)
1 /*
2  * QEMU VMWARE VMXNET3 paravirtual NIC
3  *
4  * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
5  *
6  * Developed by Daynix Computing LTD (http://www.daynix.com)
7  *
8  * Authors:
9  * Dmitry Fleytman <dmitry@daynix.com>
10  * Tamir Shomer <tamirs@daynix.com>
11  * Yan Vugenfirer <yan@daynix.com>
12  *
13  * This work is licensed under the terms of the GNU GPL, version 2.
14  * See the COPYING file in the top-level directory.
15  *
16  */
17 
18 #include "qemu/osdep.h"
19 #include "hw/hw.h"
20 #include "hw/pci/pci.h"
21 #include "hw/qdev-properties.h"
22 #include "net/tap.h"
23 #include "net/checksum.h"
24 #include "system/system.h"
25 #include "qemu/log.h"
26 #include "qemu/module.h"
27 #include "hw/pci/msix.h"
28 #include "hw/pci/msi.h"
29 #include "migration/register.h"
30 #include "migration/vmstate.h"
31 
32 #include "vmxnet3.h"
33 #include "vmxnet3_defs.h"
34 #include "vmxnet_debug.h"
35 #include "vmware_utils.h"
36 #include "net_tx_pkt.h"
37 #include "net_rx_pkt.h"
38 #include "qom/object.h"
39 
40 #define PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION 0x1
41 #define VMXNET3_MSIX_BAR_SIZE 0x2000
42 
43 #define VMXNET3_EXP_EP_OFFSET (0x48)
44 #define VMXNET3_MSI_OFFSET    (0x84)
45 #define VMXNET3_MSIX_OFFSET   (0x9c)
46 #define VMXNET3_DSN_OFFSET     (0x100)
47 
48 #define VMXNET3_BAR0_IDX      (0)
49 #define VMXNET3_BAR1_IDX      (1)
50 #define VMXNET3_MSIX_BAR_IDX  (2)
51 
52 #define VMXNET3_OFF_MSIX_TABLE (0x000)
53 #define VMXNET3_OFF_MSIX_PBA   (0x1000)
54 
55 /* Link speed in Mbps should be shifted by 16 */
56 #define VMXNET3_LINK_SPEED      (1000 << 16)
57 
58 /* Link status: 1 - up, 0 - down. */
59 #define VMXNET3_LINK_STATUS_UP  0x1
60 
61 /* Least significant bit should be set for revision and version */
62 #define VMXNET3_UPT_REVISION      0x1
63 #define VMXNET3_DEVICE_REVISION   0x1
64 
65 /* Number of interrupt vectors for non-MSIx modes */
66 #define VMXNET3_MAX_NMSIX_INTRS   (1)
67 
68 /* Macros for rings descriptors access */
69 #define VMXNET3_READ_TX_QUEUE_DESCR8(_d, dpa, field) \
70     (vmw_shmem_ld8(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
71 
72 #define VMXNET3_WRITE_TX_QUEUE_DESCR8(_d, dpa, field, value) \
73     (vmw_shmem_st8(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field, value)))
74 
75 #define VMXNET3_READ_TX_QUEUE_DESCR32(_d, dpa, field) \
76     (vmw_shmem_ld32(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
77 
78 #define VMXNET3_WRITE_TX_QUEUE_DESCR32(_d, dpa, field, value) \
79     (vmw_shmem_st32(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field), value))
80 
81 #define VMXNET3_READ_TX_QUEUE_DESCR64(_d, dpa, field) \
82     (vmw_shmem_ld64(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
83 
84 #define VMXNET3_WRITE_TX_QUEUE_DESCR64(_d, dpa, field, value) \
85     (vmw_shmem_st64(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field), value))
86 
87 #define VMXNET3_READ_RX_QUEUE_DESCR64(_d, dpa, field) \
88     (vmw_shmem_ld64(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field)))
89 
90 #define VMXNET3_READ_RX_QUEUE_DESCR32(_d, dpa, field) \
91     (vmw_shmem_ld32(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field)))
92 
93 #define VMXNET3_WRITE_RX_QUEUE_DESCR64(_d, dpa, field, value) \
94     (vmw_shmem_st64(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field), value))
95 
96 #define VMXNET3_WRITE_RX_QUEUE_DESCR8(_d, dpa, field, value) \
97     (vmw_shmem_st8(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field), value))
98 
99 /* Macros for guest driver shared area access */
100 #define VMXNET3_READ_DRV_SHARED64(_d, shpa, field) \
101     (vmw_shmem_ld64(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
102 
103 #define VMXNET3_READ_DRV_SHARED32(_d, shpa, field) \
104     (vmw_shmem_ld32(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
105 
106 #define VMXNET3_WRITE_DRV_SHARED32(_d, shpa, field, val) \
107     (vmw_shmem_st32(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field), val))
108 
109 #define VMXNET3_READ_DRV_SHARED16(_d, shpa, field) \
110     (vmw_shmem_ld16(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
111 
112 #define VMXNET3_READ_DRV_SHARED8(_d, shpa, field) \
113     (vmw_shmem_ld8(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
114 
115 #define VMXNET3_READ_DRV_SHARED(_d, shpa, field, b, l) \
116     (vmw_shmem_read(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field), b, l))
117 
118 #define VMXNET_FLAG_IS_SET(field, flag) (((field) & (flag)) == (flag))
119 
120 struct VMXNET3Class {
121     PCIDeviceClass parent_class;
122     DeviceRealize parent_dc_realize;
123 };
124 typedef struct VMXNET3Class VMXNET3Class;
125 
DECLARE_CLASS_CHECKERS(VMXNET3Class,VMXNET3_DEVICE,TYPE_VMXNET3)126 DECLARE_CLASS_CHECKERS(VMXNET3Class, VMXNET3_DEVICE,
127                        TYPE_VMXNET3)
128 
129 static inline void vmxnet3_ring_init(PCIDevice *d,
130                                      Vmxnet3Ring *ring,
131                                      hwaddr pa,
132                                      uint32_t size,
133                                      uint32_t cell_size,
134                                      bool zero_region)
135 {
136     ring->pa = pa;
137     ring->size = size;
138     ring->cell_size = cell_size;
139     ring->gen = VMXNET3_INIT_GEN;
140     ring->next = 0;
141 
142     if (zero_region) {
143         vmw_shmem_set(d, pa, 0, size * cell_size);
144     }
145 }
146 
147 #define VMXNET3_RING_DUMP(macro, ring_name, ridx, r)                         \
148     macro("%s#%d: base %" PRIx64 " size %u cell_size %u gen %d next %u",  \
149           (ring_name), (ridx),                                               \
150           (r)->pa, (r)->size, (r)->cell_size, (r)->gen, (r)->next)
151 
vmxnet3_ring_inc(Vmxnet3Ring * ring)152 static inline void vmxnet3_ring_inc(Vmxnet3Ring *ring)
153 {
154     if (++ring->next >= ring->size) {
155         ring->next = 0;
156         ring->gen ^= 1;
157     }
158 }
159 
vmxnet3_ring_dec(Vmxnet3Ring * ring)160 static inline void vmxnet3_ring_dec(Vmxnet3Ring *ring)
161 {
162     if (ring->next-- == 0) {
163         ring->next = ring->size - 1;
164         ring->gen ^= 1;
165     }
166 }
167 
vmxnet3_ring_curr_cell_pa(Vmxnet3Ring * ring)168 static inline hwaddr vmxnet3_ring_curr_cell_pa(Vmxnet3Ring *ring)
169 {
170     return ring->pa + ring->next * ring->cell_size;
171 }
172 
vmxnet3_ring_read_curr_cell(PCIDevice * d,Vmxnet3Ring * ring,void * buff)173 static inline void vmxnet3_ring_read_curr_cell(PCIDevice *d, Vmxnet3Ring *ring,
174                                                void *buff)
175 {
176     vmw_shmem_read(d, vmxnet3_ring_curr_cell_pa(ring), buff, ring->cell_size);
177 }
178 
vmxnet3_ring_write_curr_cell(PCIDevice * d,Vmxnet3Ring * ring,void * buff)179 static inline void vmxnet3_ring_write_curr_cell(PCIDevice *d, Vmxnet3Ring *ring,
180                                                 void *buff)
181 {
182     vmw_shmem_write(d, vmxnet3_ring_curr_cell_pa(ring), buff, ring->cell_size);
183 }
184 
vmxnet3_ring_curr_cell_idx(Vmxnet3Ring * ring)185 static inline size_t vmxnet3_ring_curr_cell_idx(Vmxnet3Ring *ring)
186 {
187     return ring->next;
188 }
189 
vmxnet3_ring_curr_gen(Vmxnet3Ring * ring)190 static inline uint8_t vmxnet3_ring_curr_gen(Vmxnet3Ring *ring)
191 {
192     return ring->gen;
193 }
194 
195 /* Debug trace-related functions */
196 static inline void
vmxnet3_dump_tx_descr(struct Vmxnet3_TxDesc * descr)197 vmxnet3_dump_tx_descr(struct Vmxnet3_TxDesc *descr)
198 {
199     VMW_PKPRN("TX DESCR: "
200               "addr %" PRIx64 ", len: %d, gen: %d, rsvd: %d, "
201               "dtype: %d, ext1: %d, msscof: %d, hlen: %d, om: %d, "
202               "eop: %d, cq: %d, ext2: %d, ti: %d, tci: %d",
203               descr->addr, descr->len, descr->gen, descr->rsvd,
204               descr->dtype, descr->ext1, descr->msscof, descr->hlen, descr->om,
205               descr->eop, descr->cq, descr->ext2, descr->ti, descr->tci);
206 }
207 
208 static inline void
vmxnet3_dump_virt_hdr(struct virtio_net_hdr * vhdr)209 vmxnet3_dump_virt_hdr(struct virtio_net_hdr *vhdr)
210 {
211     VMW_PKPRN("VHDR: flags 0x%x, gso_type: 0x%x, hdr_len: %d, gso_size: %d, "
212               "csum_start: %d, csum_offset: %d",
213               vhdr->flags, vhdr->gso_type, vhdr->hdr_len, vhdr->gso_size,
214               vhdr->csum_start, vhdr->csum_offset);
215 }
216 
217 static inline void
vmxnet3_dump_rx_descr(struct Vmxnet3_RxDesc * descr)218 vmxnet3_dump_rx_descr(struct Vmxnet3_RxDesc *descr)
219 {
220     VMW_PKPRN("RX DESCR: addr %" PRIx64 ", len: %d, gen: %d, rsvd: %d, "
221               "dtype: %d, ext1: %d, btype: %d",
222               descr->addr, descr->len, descr->gen,
223               descr->rsvd, descr->dtype, descr->ext1, descr->btype);
224 }
225 
226 /* Interrupt management */
227 
228 /*
229  * This function returns sign whether interrupt line is in asserted state
230  * This depends on the type of interrupt used. For INTX interrupt line will
231  * be asserted until explicit deassertion, for MSI(X) interrupt line will
232  * be deasserted automatically due to notification semantics of the MSI(X)
233  * interrupts
234  */
_vmxnet3_assert_interrupt_line(VMXNET3State * s,uint32_t int_idx)235 static bool _vmxnet3_assert_interrupt_line(VMXNET3State *s, uint32_t int_idx)
236 {
237     PCIDevice *d = PCI_DEVICE(s);
238 
239     if (s->msix_used && msix_enabled(d)) {
240         VMW_IRPRN("Sending MSI-X notification for vector %u", int_idx);
241         msix_notify(d, int_idx);
242         return false;
243     }
244     if (msi_enabled(d)) {
245         VMW_IRPRN("Sending MSI notification for vector %u", int_idx);
246         msi_notify(d, int_idx);
247         return false;
248     }
249 
250     VMW_IRPRN("Asserting line for interrupt %u", int_idx);
251     pci_irq_assert(d);
252     return true;
253 }
254 
_vmxnet3_deassert_interrupt_line(VMXNET3State * s,int lidx)255 static void _vmxnet3_deassert_interrupt_line(VMXNET3State *s, int lidx)
256 {
257     PCIDevice *d = PCI_DEVICE(s);
258 
259     /*
260      * This function should never be called for MSI(X) interrupts
261      * because deassertion never required for message interrupts
262      */
263     assert(!s->msix_used || !msix_enabled(d));
264     /*
265      * This function should never be called for MSI(X) interrupts
266      * because deassertion never required for message interrupts
267      */
268     assert(!msi_enabled(d));
269 
270     VMW_IRPRN("Deasserting line for interrupt %u", lidx);
271     pci_irq_deassert(d);
272 }
273 
vmxnet3_update_interrupt_line_state(VMXNET3State * s,int lidx)274 static void vmxnet3_update_interrupt_line_state(VMXNET3State *s, int lidx)
275 {
276     if (!s->interrupt_states[lidx].is_pending &&
277        s->interrupt_states[lidx].is_asserted) {
278         VMW_IRPRN("New interrupt line state for index %d is DOWN", lidx);
279         _vmxnet3_deassert_interrupt_line(s, lidx);
280         s->interrupt_states[lidx].is_asserted = false;
281         return;
282     }
283 
284     if (s->interrupt_states[lidx].is_pending &&
285        !s->interrupt_states[lidx].is_masked &&
286        !s->interrupt_states[lidx].is_asserted) {
287         VMW_IRPRN("New interrupt line state for index %d is UP", lidx);
288         s->interrupt_states[lidx].is_asserted =
289             _vmxnet3_assert_interrupt_line(s, lidx);
290         s->interrupt_states[lidx].is_pending = false;
291         return;
292     }
293 }
294 
vmxnet3_trigger_interrupt(VMXNET3State * s,int lidx)295 static void vmxnet3_trigger_interrupt(VMXNET3State *s, int lidx)
296 {
297     PCIDevice *d = PCI_DEVICE(s);
298     s->interrupt_states[lidx].is_pending = true;
299     vmxnet3_update_interrupt_line_state(s, lidx);
300 
301     if (s->msix_used && msix_enabled(d) && s->auto_int_masking) {
302         goto do_automask;
303     }
304 
305     if (msi_enabled(d) && s->auto_int_masking) {
306         goto do_automask;
307     }
308 
309     return;
310 
311 do_automask:
312     s->interrupt_states[lidx].is_masked = true;
313     vmxnet3_update_interrupt_line_state(s, lidx);
314 }
315 
vmxnet3_interrupt_asserted(VMXNET3State * s,int lidx)316 static bool vmxnet3_interrupt_asserted(VMXNET3State *s, int lidx)
317 {
318     return s->interrupt_states[lidx].is_asserted;
319 }
320 
vmxnet3_clear_interrupt(VMXNET3State * s,int int_idx)321 static void vmxnet3_clear_interrupt(VMXNET3State *s, int int_idx)
322 {
323     s->interrupt_states[int_idx].is_pending = false;
324     if (s->auto_int_masking) {
325         s->interrupt_states[int_idx].is_masked = true;
326     }
327     vmxnet3_update_interrupt_line_state(s, int_idx);
328 }
329 
330 static void
vmxnet3_on_interrupt_mask_changed(VMXNET3State * s,int lidx,bool is_masked)331 vmxnet3_on_interrupt_mask_changed(VMXNET3State *s, int lidx, bool is_masked)
332 {
333     s->interrupt_states[lidx].is_masked = is_masked;
334     vmxnet3_update_interrupt_line_state(s, lidx);
335 }
336 
vmxnet3_verify_driver_magic(PCIDevice * d,hwaddr dshmem)337 static bool vmxnet3_verify_driver_magic(PCIDevice *d, hwaddr dshmem)
338 {
339     return (VMXNET3_READ_DRV_SHARED32(d, dshmem, magic) == VMXNET3_REV1_MAGIC);
340 }
341 
342 #define VMXNET3_GET_BYTE(x, byte_num) (((x) >> (byte_num)*8) & 0xFF)
343 #define VMXNET3_MAKE_BYTE(byte_num, val) \
344     (((uint32_t)((val) & 0xFF)) << (byte_num)*8)
345 
vmxnet3_set_variable_mac(VMXNET3State * s,uint32_t h,uint32_t l)346 static void vmxnet3_set_variable_mac(VMXNET3State *s, uint32_t h, uint32_t l)
347 {
348     s->conf.macaddr.a[0] = VMXNET3_GET_BYTE(l,  0);
349     s->conf.macaddr.a[1] = VMXNET3_GET_BYTE(l,  1);
350     s->conf.macaddr.a[2] = VMXNET3_GET_BYTE(l,  2);
351     s->conf.macaddr.a[3] = VMXNET3_GET_BYTE(l,  3);
352     s->conf.macaddr.a[4] = VMXNET3_GET_BYTE(h, 0);
353     s->conf.macaddr.a[5] = VMXNET3_GET_BYTE(h, 1);
354 
355     VMW_CFPRN("Variable MAC: " MAC_FMT, MAC_ARG(s->conf.macaddr.a));
356 
357     qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
358 }
359 
vmxnet3_get_mac_low(MACAddr * addr)360 static uint64_t vmxnet3_get_mac_low(MACAddr *addr)
361 {
362     return VMXNET3_MAKE_BYTE(0, addr->a[0]) |
363            VMXNET3_MAKE_BYTE(1, addr->a[1]) |
364            VMXNET3_MAKE_BYTE(2, addr->a[2]) |
365            VMXNET3_MAKE_BYTE(3, addr->a[3]);
366 }
367 
vmxnet3_get_mac_high(MACAddr * addr)368 static uint64_t vmxnet3_get_mac_high(MACAddr *addr)
369 {
370     return VMXNET3_MAKE_BYTE(0, addr->a[4]) |
371            VMXNET3_MAKE_BYTE(1, addr->a[5]);
372 }
373 
374 static void
vmxnet3_inc_tx_consumption_counter(VMXNET3State * s,int qidx)375 vmxnet3_inc_tx_consumption_counter(VMXNET3State *s, int qidx)
376 {
377     vmxnet3_ring_inc(&s->txq_descr[qidx].tx_ring);
378 }
379 
380 static inline void
vmxnet3_inc_rx_consumption_counter(VMXNET3State * s,int qidx,int ridx)381 vmxnet3_inc_rx_consumption_counter(VMXNET3State *s, int qidx, int ridx)
382 {
383     vmxnet3_ring_inc(&s->rxq_descr[qidx].rx_ring[ridx]);
384 }
385 
386 static inline void
vmxnet3_inc_tx_completion_counter(VMXNET3State * s,int qidx)387 vmxnet3_inc_tx_completion_counter(VMXNET3State *s, int qidx)
388 {
389     vmxnet3_ring_inc(&s->txq_descr[qidx].comp_ring);
390 }
391 
392 static void
vmxnet3_inc_rx_completion_counter(VMXNET3State * s,int qidx)393 vmxnet3_inc_rx_completion_counter(VMXNET3State *s, int qidx)
394 {
395     vmxnet3_ring_inc(&s->rxq_descr[qidx].comp_ring);
396 }
397 
398 static void
vmxnet3_dec_rx_completion_counter(VMXNET3State * s,int qidx)399 vmxnet3_dec_rx_completion_counter(VMXNET3State *s, int qidx)
400 {
401     vmxnet3_ring_dec(&s->rxq_descr[qidx].comp_ring);
402 }
403 
vmxnet3_complete_packet(VMXNET3State * s,int qidx,uint32_t tx_ridx)404 static void vmxnet3_complete_packet(VMXNET3State *s, int qidx, uint32_t tx_ridx)
405 {
406     struct Vmxnet3_TxCompDesc txcq_descr;
407     PCIDevice *d = PCI_DEVICE(s);
408 
409     VMXNET3_RING_DUMP(VMW_RIPRN, "TXC", qidx, &s->txq_descr[qidx].comp_ring);
410 
411     memset(&txcq_descr, 0, sizeof(txcq_descr));
412     txcq_descr.txdIdx = tx_ridx;
413     txcq_descr.gen = vmxnet3_ring_curr_gen(&s->txq_descr[qidx].comp_ring);
414     txcq_descr.val1 = cpu_to_le32(txcq_descr.val1);
415     txcq_descr.val2 = cpu_to_le32(txcq_descr.val2);
416     vmxnet3_ring_write_curr_cell(d, &s->txq_descr[qidx].comp_ring, &txcq_descr);
417 
418     /* Flush changes in TX descriptor before changing the counter value */
419     smp_wmb();
420 
421     vmxnet3_inc_tx_completion_counter(s, qidx);
422     vmxnet3_trigger_interrupt(s, s->txq_descr[qidx].intr_idx);
423 }
424 
425 static bool
vmxnet3_setup_tx_offloads(VMXNET3State * s)426 vmxnet3_setup_tx_offloads(VMXNET3State *s)
427 {
428     switch (s->offload_mode) {
429     case VMXNET3_OM_NONE:
430         return net_tx_pkt_build_vheader(s->tx_pkt, false, false, 0);
431 
432     case VMXNET3_OM_CSUM:
433         VMW_PKPRN("L4 CSO requested\n");
434         return net_tx_pkt_build_vheader(s->tx_pkt, false, true, 0);
435 
436     case VMXNET3_OM_TSO:
437         VMW_PKPRN("GSO offload requested.");
438         if (!net_tx_pkt_build_vheader(s->tx_pkt, true, true,
439             s->cso_or_gso_size)) {
440             return false;
441         }
442         net_tx_pkt_update_ip_checksums(s->tx_pkt);
443         break;
444 
445     default:
446         g_assert_not_reached();
447     }
448 
449     return true;
450 }
451 
452 static void
vmxnet3_tx_retrieve_metadata(VMXNET3State * s,const struct Vmxnet3_TxDesc * txd)453 vmxnet3_tx_retrieve_metadata(VMXNET3State *s,
454                              const struct Vmxnet3_TxDesc *txd)
455 {
456     s->offload_mode = txd->om;
457     s->cso_or_gso_size = txd->msscof;
458     s->tci = txd->tci;
459     s->needs_vlan = txd->ti;
460 }
461 
462 typedef enum {
463     VMXNET3_PKT_STATUS_OK,
464     VMXNET3_PKT_STATUS_ERROR,
465     VMXNET3_PKT_STATUS_DISCARD,/* only for tx */
466     VMXNET3_PKT_STATUS_OUT_OF_BUF /* only for rx */
467 } Vmxnet3PktStatus;
468 
469 static void
vmxnet3_on_tx_done_update_stats(VMXNET3State * s,int qidx,Vmxnet3PktStatus status)470 vmxnet3_on_tx_done_update_stats(VMXNET3State *s, int qidx,
471     Vmxnet3PktStatus status)
472 {
473     size_t tot_len = net_tx_pkt_get_total_len(s->tx_pkt);
474     struct UPT1_TxStats *stats = &s->txq_descr[qidx].txq_stats;
475 
476     switch (status) {
477     case VMXNET3_PKT_STATUS_OK:
478         switch (net_tx_pkt_get_packet_type(s->tx_pkt)) {
479         case ETH_PKT_BCAST:
480             stats->bcastPktsTxOK++;
481             stats->bcastBytesTxOK += tot_len;
482             break;
483         case ETH_PKT_MCAST:
484             stats->mcastPktsTxOK++;
485             stats->mcastBytesTxOK += tot_len;
486             break;
487         case ETH_PKT_UCAST:
488             stats->ucastPktsTxOK++;
489             stats->ucastBytesTxOK += tot_len;
490             break;
491         default:
492             g_assert_not_reached();
493         }
494 
495         if (s->offload_mode == VMXNET3_OM_TSO) {
496             /*
497              * According to VMWARE headers this statistic is a number
498              * of packets after segmentation but since we don't have
499              * this information in QEMU model, the best we can do is to
500              * provide number of non-segmented packets
501              */
502             stats->TSOPktsTxOK++;
503             stats->TSOBytesTxOK += tot_len;
504         }
505         break;
506 
507     case VMXNET3_PKT_STATUS_DISCARD:
508         stats->pktsTxDiscard++;
509         break;
510 
511     case VMXNET3_PKT_STATUS_ERROR:
512         stats->pktsTxError++;
513         break;
514 
515     default:
516         g_assert_not_reached();
517     }
518 }
519 
520 static void
vmxnet3_on_rx_done_update_stats(VMXNET3State * s,int qidx,Vmxnet3PktStatus status)521 vmxnet3_on_rx_done_update_stats(VMXNET3State *s,
522                                 int qidx,
523                                 Vmxnet3PktStatus status)
524 {
525     struct UPT1_RxStats *stats = &s->rxq_descr[qidx].rxq_stats;
526     size_t tot_len = net_rx_pkt_get_total_len(s->rx_pkt);
527 
528     switch (status) {
529     case VMXNET3_PKT_STATUS_OUT_OF_BUF:
530         stats->pktsRxOutOfBuf++;
531         break;
532 
533     case VMXNET3_PKT_STATUS_ERROR:
534         stats->pktsRxError++;
535         break;
536     case VMXNET3_PKT_STATUS_OK:
537         switch (net_rx_pkt_get_packet_type(s->rx_pkt)) {
538         case ETH_PKT_BCAST:
539             stats->bcastPktsRxOK++;
540             stats->bcastBytesRxOK += tot_len;
541             break;
542         case ETH_PKT_MCAST:
543             stats->mcastPktsRxOK++;
544             stats->mcastBytesRxOK += tot_len;
545             break;
546         case ETH_PKT_UCAST:
547             stats->ucastPktsRxOK++;
548             stats->ucastBytesRxOK += tot_len;
549             break;
550         default:
551             g_assert_not_reached();
552         }
553 
554         if (tot_len > s->mtu) {
555             stats->LROPktsRxOK++;
556             stats->LROBytesRxOK += tot_len;
557         }
558         break;
559     default:
560         g_assert_not_reached();
561     }
562 }
563 
564 static inline void
vmxnet3_ring_read_curr_txdesc(PCIDevice * pcidev,Vmxnet3Ring * ring,struct Vmxnet3_TxDesc * txd)565 vmxnet3_ring_read_curr_txdesc(PCIDevice *pcidev, Vmxnet3Ring *ring,
566                               struct Vmxnet3_TxDesc *txd)
567 {
568     vmxnet3_ring_read_curr_cell(pcidev, ring, txd);
569     txd->addr = le64_to_cpu(txd->addr);
570     txd->val1 = le32_to_cpu(txd->val1);
571     txd->val2 = le32_to_cpu(txd->val2);
572 }
573 
574 static inline bool
vmxnet3_pop_next_tx_descr(VMXNET3State * s,int qidx,struct Vmxnet3_TxDesc * txd,uint32_t * descr_idx)575 vmxnet3_pop_next_tx_descr(VMXNET3State *s,
576                           int qidx,
577                           struct Vmxnet3_TxDesc *txd,
578                           uint32_t *descr_idx)
579 {
580     Vmxnet3Ring *ring = &s->txq_descr[qidx].tx_ring;
581     PCIDevice *d = PCI_DEVICE(s);
582 
583     vmxnet3_ring_read_curr_txdesc(d, ring, txd);
584     if (txd->gen == vmxnet3_ring_curr_gen(ring)) {
585         /* Only read after generation field verification */
586         smp_rmb();
587         /* Re-read to be sure we got the latest version */
588         vmxnet3_ring_read_curr_txdesc(d, ring, txd);
589         VMXNET3_RING_DUMP(VMW_RIPRN, "TX", qidx, ring);
590         *descr_idx = vmxnet3_ring_curr_cell_idx(ring);
591         vmxnet3_inc_tx_consumption_counter(s, qidx);
592         return true;
593     }
594 
595     return false;
596 }
597 
598 static bool
vmxnet3_send_packet(VMXNET3State * s,uint32_t qidx)599 vmxnet3_send_packet(VMXNET3State *s, uint32_t qidx)
600 {
601     Vmxnet3PktStatus status = VMXNET3_PKT_STATUS_OK;
602 
603     if (!vmxnet3_setup_tx_offloads(s)) {
604         status = VMXNET3_PKT_STATUS_ERROR;
605         goto func_exit;
606     }
607 
608     /* debug prints */
609     vmxnet3_dump_virt_hdr(net_tx_pkt_get_vhdr(s->tx_pkt));
610     net_tx_pkt_dump(s->tx_pkt);
611 
612     if (!net_tx_pkt_send(s->tx_pkt, qemu_get_queue(s->nic))) {
613         status = VMXNET3_PKT_STATUS_DISCARD;
614         goto func_exit;
615     }
616 
617 func_exit:
618     vmxnet3_on_tx_done_update_stats(s, qidx, status);
619     return (status == VMXNET3_PKT_STATUS_OK);
620 }
621 
vmxnet3_process_tx_queue(VMXNET3State * s,int qidx)622 static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx)
623 {
624     struct Vmxnet3_TxDesc txd;
625     uint32_t txd_idx;
626     uint32_t data_len;
627     hwaddr data_pa;
628 
629     for (;;) {
630         if (!vmxnet3_pop_next_tx_descr(s, qidx, &txd, &txd_idx)) {
631             break;
632         }
633 
634         vmxnet3_dump_tx_descr(&txd);
635 
636         if (!s->skip_current_tx_pkt) {
637             data_len = (txd.len > 0) ? txd.len : VMXNET3_MAX_TX_BUF_SIZE;
638             data_pa = txd.addr;
639 
640             if (!net_tx_pkt_add_raw_fragment_pci(s->tx_pkt, PCI_DEVICE(s),
641                                                  data_pa, data_len)) {
642                 s->skip_current_tx_pkt = true;
643             }
644         }
645 
646         if (s->tx_sop) {
647             vmxnet3_tx_retrieve_metadata(s, &txd);
648             s->tx_sop = false;
649         }
650 
651         if (txd.eop) {
652             if (!s->skip_current_tx_pkt && net_tx_pkt_parse(s->tx_pkt)) {
653                 if (s->needs_vlan) {
654                     net_tx_pkt_setup_vlan_header(s->tx_pkt, s->tci);
655                 }
656 
657                 vmxnet3_send_packet(s, qidx);
658             } else {
659                 vmxnet3_on_tx_done_update_stats(s, qidx,
660                                                 VMXNET3_PKT_STATUS_ERROR);
661             }
662 
663             vmxnet3_complete_packet(s, qidx, txd_idx);
664             s->tx_sop = true;
665             s->skip_current_tx_pkt = false;
666             net_tx_pkt_reset(s->tx_pkt,
667                              net_tx_pkt_unmap_frag_pci, PCI_DEVICE(s));
668         }
669     }
670 
671     net_tx_pkt_reset(s->tx_pkt, net_tx_pkt_unmap_frag_pci, PCI_DEVICE(s));
672 }
673 
674 static inline void
vmxnet3_read_next_rx_descr(VMXNET3State * s,int qidx,int ridx,struct Vmxnet3_RxDesc * dbuf,uint32_t * didx)675 vmxnet3_read_next_rx_descr(VMXNET3State *s, int qidx, int ridx,
676                            struct Vmxnet3_RxDesc *dbuf, uint32_t *didx)
677 {
678     PCIDevice *d = PCI_DEVICE(s);
679 
680     Vmxnet3Ring *ring = &s->rxq_descr[qidx].rx_ring[ridx];
681     *didx = vmxnet3_ring_curr_cell_idx(ring);
682     vmxnet3_ring_read_curr_cell(d, ring, dbuf);
683     dbuf->addr = le64_to_cpu(dbuf->addr);
684     dbuf->val1 = le32_to_cpu(dbuf->val1);
685     dbuf->ext1 = le32_to_cpu(dbuf->ext1);
686 }
687 
688 static inline uint8_t
vmxnet3_get_rx_ring_gen(VMXNET3State * s,int qidx,int ridx)689 vmxnet3_get_rx_ring_gen(VMXNET3State *s, int qidx, int ridx)
690 {
691     return s->rxq_descr[qidx].rx_ring[ridx].gen;
692 }
693 
694 static inline hwaddr
vmxnet3_pop_rxc_descr(VMXNET3State * s,int qidx,uint32_t * descr_gen)695 vmxnet3_pop_rxc_descr(VMXNET3State *s, int qidx, uint32_t *descr_gen)
696 {
697     uint8_t ring_gen;
698     struct Vmxnet3_RxCompDesc rxcd;
699 
700     hwaddr daddr =
701         vmxnet3_ring_curr_cell_pa(&s->rxq_descr[qidx].comp_ring);
702 
703     pci_dma_read(PCI_DEVICE(s),
704                  daddr, &rxcd, sizeof(struct Vmxnet3_RxCompDesc));
705     rxcd.val1 = le32_to_cpu(rxcd.val1);
706     rxcd.val2 = le32_to_cpu(rxcd.val2);
707     rxcd.val3 = le32_to_cpu(rxcd.val3);
708     ring_gen = vmxnet3_ring_curr_gen(&s->rxq_descr[qidx].comp_ring);
709 
710     if (rxcd.gen != ring_gen) {
711         *descr_gen = ring_gen;
712         vmxnet3_inc_rx_completion_counter(s, qidx);
713         return daddr;
714     }
715 
716     return 0;
717 }
718 
719 static inline void
vmxnet3_revert_rxc_descr(VMXNET3State * s,int qidx)720 vmxnet3_revert_rxc_descr(VMXNET3State *s, int qidx)
721 {
722     vmxnet3_dec_rx_completion_counter(s, qidx);
723 }
724 
725 #define RXQ_IDX      (0)
726 #define RX_HEAD_BODY_RING (0)
727 #define RX_BODY_ONLY_RING (1)
728 
729 static bool
vmxnet3_get_next_head_rx_descr(VMXNET3State * s,struct Vmxnet3_RxDesc * descr_buf,uint32_t * descr_idx,uint32_t * ridx)730 vmxnet3_get_next_head_rx_descr(VMXNET3State *s,
731                                struct Vmxnet3_RxDesc *descr_buf,
732                                uint32_t *descr_idx,
733                                uint32_t *ridx)
734 {
735     for (;;) {
736         uint32_t ring_gen;
737         vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING,
738                                    descr_buf, descr_idx);
739 
740         /* If no more free descriptors - return */
741         ring_gen = vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_HEAD_BODY_RING);
742         if (descr_buf->gen != ring_gen) {
743             return false;
744         }
745 
746         /* Only read after generation field verification */
747         smp_rmb();
748         /* Re-read to be sure we got the latest version */
749         vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING,
750                                    descr_buf, descr_idx);
751 
752         /* Mark current descriptor as used/skipped */
753         vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_HEAD_BODY_RING);
754 
755         /* If this is what we are looking for - return */
756         if (descr_buf->btype == VMXNET3_RXD_BTYPE_HEAD) {
757             *ridx = RX_HEAD_BODY_RING;
758             return true;
759         }
760     }
761 }
762 
763 static bool
vmxnet3_get_next_body_rx_descr(VMXNET3State * s,struct Vmxnet3_RxDesc * d,uint32_t * didx,uint32_t * ridx)764 vmxnet3_get_next_body_rx_descr(VMXNET3State *s,
765                                struct Vmxnet3_RxDesc *d,
766                                uint32_t *didx,
767                                uint32_t *ridx)
768 {
769     vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, d, didx);
770 
771     /* Try to find corresponding descriptor in head/body ring */
772     if (d->gen == vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_HEAD_BODY_RING)) {
773         /* Only read after generation field verification */
774         smp_rmb();
775         /* Re-read to be sure we got the latest version */
776         vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, d, didx);
777         if (d->btype == VMXNET3_RXD_BTYPE_BODY) {
778             vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_HEAD_BODY_RING);
779             *ridx = RX_HEAD_BODY_RING;
780             return true;
781         }
782     }
783 
784     /*
785      * If there is no free descriptors on head/body ring or next free
786      * descriptor is a head descriptor switch to body only ring
787      */
788     vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_BODY_ONLY_RING, d, didx);
789 
790     /* If no more free descriptors - return */
791     if (d->gen == vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_BODY_ONLY_RING)) {
792         /* Only read after generation field verification */
793         smp_rmb();
794         /* Re-read to be sure we got the latest version */
795         vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_BODY_ONLY_RING, d, didx);
796         assert(d->btype == VMXNET3_RXD_BTYPE_BODY);
797         *ridx = RX_BODY_ONLY_RING;
798         vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_BODY_ONLY_RING);
799         return true;
800     }
801 
802     return false;
803 }
804 
805 static inline bool
vmxnet3_get_next_rx_descr(VMXNET3State * s,bool is_head,struct Vmxnet3_RxDesc * descr_buf,uint32_t * descr_idx,uint32_t * ridx)806 vmxnet3_get_next_rx_descr(VMXNET3State *s, bool is_head,
807                           struct Vmxnet3_RxDesc *descr_buf,
808                           uint32_t *descr_idx,
809                           uint32_t *ridx)
810 {
811     if (is_head || !s->rx_packets_compound) {
812         return vmxnet3_get_next_head_rx_descr(s, descr_buf, descr_idx, ridx);
813     } else {
814         return vmxnet3_get_next_body_rx_descr(s, descr_buf, descr_idx, ridx);
815     }
816 }
817 
818 /* In case packet was csum offloaded (either NEEDS_CSUM or DATA_VALID),
819  * the implementation always passes an RxCompDesc with a "Checksum
820  * calculated and found correct" to the OS (cnc=0 and tuc=1, see
821  * vmxnet3_rx_update_descr). This emulates the observed ESXi behavior.
822  *
823  * Therefore, if packet has the NEEDS_CSUM set, we must calculate
824  * and place a fully computed checksum into the tcp/udp header.
825  * Otherwise, the OS driver will receive a checksum-correct indication
826  * (CHECKSUM_UNNECESSARY), but with the actual tcp/udp checksum field
827  * having just the pseudo header csum value.
828  *
829  * While this is not a problem if packet is destined for local delivery,
830  * in the case the host OS performs forwarding, it will forward an
831  * incorrectly checksummed packet.
832  */
vmxnet3_rx_need_csum_calculate(struct NetRxPkt * pkt,const void * pkt_data,size_t pkt_len)833 static void vmxnet3_rx_need_csum_calculate(struct NetRxPkt *pkt,
834                                            const void *pkt_data,
835                                            size_t pkt_len)
836 {
837     struct virtio_net_hdr *vhdr;
838     bool hasip4, hasip6;
839     EthL4HdrProto l4hdr_proto;
840     uint8_t *data;
841     int len;
842 
843     vhdr = net_rx_pkt_get_vhdr(pkt);
844     if (!VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
845         return;
846     }
847 
848     net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
849     if (!(hasip4 || hasip6) ||
850         (l4hdr_proto != ETH_L4_HDR_PROTO_TCP &&
851          l4hdr_proto != ETH_L4_HDR_PROTO_UDP)) {
852         return;
853     }
854 
855     vmxnet3_dump_virt_hdr(vhdr);
856 
857     /* Validate packet len: csum_start + scum_offset + length of csum field */
858     if (pkt_len < (vhdr->csum_start + vhdr->csum_offset + 2)) {
859         VMW_PKPRN("packet len:%zu < csum_start(%d) + csum_offset(%d) + 2, "
860                   "cannot calculate checksum",
861                   pkt_len, vhdr->csum_start, vhdr->csum_offset);
862         return;
863     }
864 
865     data = (uint8_t *)pkt_data + vhdr->csum_start;
866     len = pkt_len - vhdr->csum_start;
867     /* Put the checksum obtained into the packet */
868     stw_be_p(data + vhdr->csum_offset,
869              net_checksum_finish_nozero(net_checksum_add(len, data)));
870 
871     vhdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
872     vhdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
873 }
874 
vmxnet3_rx_update_descr(struct NetRxPkt * pkt,struct Vmxnet3_RxCompDesc * rxcd)875 static void vmxnet3_rx_update_descr(struct NetRxPkt *pkt,
876     struct Vmxnet3_RxCompDesc *rxcd)
877 {
878     int csum_ok, is_gso;
879     bool hasip4, hasip6;
880     EthL4HdrProto l4hdr_proto;
881     struct virtio_net_hdr *vhdr;
882     uint8_t offload_type;
883 
884     if (net_rx_pkt_is_vlan_stripped(pkt)) {
885         rxcd->ts = 1;
886         rxcd->tci = net_rx_pkt_get_vlan_tag(pkt);
887     }
888 
889     vhdr = net_rx_pkt_get_vhdr(pkt);
890     /*
891      * Checksum is valid when lower level tell so or when lower level
892      * requires checksum offload telling that packet produced/bridged
893      * locally and did travel over network after last checksum calculation
894      * or production
895      */
896     csum_ok = VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_DATA_VALID) ||
897               VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM);
898 
899     offload_type = vhdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
900     is_gso = (offload_type != VIRTIO_NET_HDR_GSO_NONE) ? 1 : 0;
901 
902     if (!csum_ok && !is_gso) {
903         goto nocsum;
904     }
905 
906     net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
907     if ((l4hdr_proto != ETH_L4_HDR_PROTO_TCP &&
908          l4hdr_proto != ETH_L4_HDR_PROTO_UDP) ||
909         (!hasip4 && !hasip6)) {
910         goto nocsum;
911     }
912 
913     rxcd->cnc = 0;
914     rxcd->v4 = hasip4 ? 1 : 0;
915     rxcd->v6 = hasip6 ? 1 : 0;
916     rxcd->tcp = l4hdr_proto == ETH_L4_HDR_PROTO_TCP;
917     rxcd->udp = l4hdr_proto == ETH_L4_HDR_PROTO_UDP;
918     rxcd->fcs = rxcd->tuc = rxcd->ipc = 1;
919     return;
920 
921 nocsum:
922     rxcd->cnc = 1;
923 }
924 
925 static void
vmxnet3_pci_dma_writev(PCIDevice * pci_dev,const struct iovec * iov,size_t start_iov_off,hwaddr target_addr,size_t bytes_to_copy)926 vmxnet3_pci_dma_writev(PCIDevice *pci_dev,
927                        const struct iovec *iov,
928                        size_t start_iov_off,
929                        hwaddr target_addr,
930                        size_t bytes_to_copy)
931 {
932     size_t curr_off = 0;
933     size_t copied = 0;
934 
935     while (bytes_to_copy) {
936         if (start_iov_off < (curr_off + iov->iov_len)) {
937             size_t chunk_len =
938                 MIN((curr_off + iov->iov_len) - start_iov_off, bytes_to_copy);
939 
940             pci_dma_write(pci_dev, target_addr + copied,
941                           iov->iov_base + start_iov_off - curr_off,
942                           chunk_len);
943 
944             copied += chunk_len;
945             start_iov_off += chunk_len;
946             curr_off = start_iov_off;
947             bytes_to_copy -= chunk_len;
948         } else {
949             curr_off += iov->iov_len;
950         }
951         iov++;
952     }
953 }
954 
955 static void
vmxnet3_pci_dma_write_rxcd(PCIDevice * pcidev,dma_addr_t pa,struct Vmxnet3_RxCompDesc * rxcd)956 vmxnet3_pci_dma_write_rxcd(PCIDevice *pcidev, dma_addr_t pa,
957                            struct Vmxnet3_RxCompDesc *rxcd)
958 {
959     rxcd->val1 = cpu_to_le32(rxcd->val1);
960     rxcd->val2 = cpu_to_le32(rxcd->val2);
961     rxcd->val3 = cpu_to_le32(rxcd->val3);
962     pci_dma_write(pcidev, pa, rxcd, sizeof(*rxcd));
963 }
964 
965 static bool
vmxnet3_indicate_packet(VMXNET3State * s)966 vmxnet3_indicate_packet(VMXNET3State *s)
967 {
968     struct Vmxnet3_RxDesc rxd;
969     PCIDevice *d = PCI_DEVICE(s);
970     bool is_head = true;
971     uint32_t rxd_idx;
972     uint32_t rx_ridx = 0;
973 
974     struct Vmxnet3_RxCompDesc rxcd;
975     uint32_t new_rxcd_gen = VMXNET3_INIT_GEN;
976     hwaddr new_rxcd_pa = 0;
977     hwaddr ready_rxcd_pa = 0;
978     struct iovec *data = net_rx_pkt_get_iovec(s->rx_pkt);
979     size_t bytes_copied = 0;
980     size_t bytes_left = net_rx_pkt_get_total_len(s->rx_pkt);
981     uint16_t num_frags = 0;
982     size_t chunk_size;
983 
984     net_rx_pkt_dump(s->rx_pkt);
985 
986     while (bytes_left > 0) {
987 
988         /* cannot add more frags to packet */
989         if (num_frags == s->max_rx_frags) {
990             break;
991         }
992 
993         new_rxcd_pa = vmxnet3_pop_rxc_descr(s, RXQ_IDX, &new_rxcd_gen);
994         if (!new_rxcd_pa) {
995             break;
996         }
997 
998         if (!vmxnet3_get_next_rx_descr(s, is_head, &rxd, &rxd_idx, &rx_ridx)) {
999             break;
1000         }
1001 
1002         chunk_size = MIN(bytes_left, rxd.len);
1003         vmxnet3_pci_dma_writev(d, data, bytes_copied, rxd.addr, chunk_size);
1004         bytes_copied += chunk_size;
1005         bytes_left -= chunk_size;
1006 
1007         vmxnet3_dump_rx_descr(&rxd);
1008 
1009         if (ready_rxcd_pa != 0) {
1010             vmxnet3_pci_dma_write_rxcd(d, ready_rxcd_pa, &rxcd);
1011         }
1012 
1013         memset(&rxcd, 0, sizeof(struct Vmxnet3_RxCompDesc));
1014         rxcd.rxdIdx = rxd_idx;
1015         rxcd.len = chunk_size;
1016         rxcd.sop = is_head;
1017         rxcd.gen = new_rxcd_gen;
1018         rxcd.rqID = RXQ_IDX + rx_ridx * s->rxq_num;
1019 
1020         if (bytes_left == 0) {
1021             vmxnet3_rx_update_descr(s->rx_pkt, &rxcd);
1022         }
1023 
1024         VMW_RIPRN("RX Completion descriptor: rxRing: %lu rxIdx %lu len %lu "
1025                   "sop %d csum_correct %lu",
1026                   (unsigned long) rx_ridx,
1027                   (unsigned long) rxcd.rxdIdx,
1028                   (unsigned long) rxcd.len,
1029                   (int) rxcd.sop,
1030                   (unsigned long) rxcd.tuc);
1031 
1032         is_head = false;
1033         ready_rxcd_pa = new_rxcd_pa;
1034         new_rxcd_pa = 0;
1035         num_frags++;
1036     }
1037 
1038     if (ready_rxcd_pa != 0) {
1039         rxcd.eop = 1;
1040         rxcd.err = (bytes_left != 0);
1041 
1042         vmxnet3_pci_dma_write_rxcd(d, ready_rxcd_pa, &rxcd);
1043 
1044         /* Flush RX descriptor changes */
1045         smp_wmb();
1046     }
1047 
1048     if (new_rxcd_pa != 0) {
1049         vmxnet3_revert_rxc_descr(s, RXQ_IDX);
1050     }
1051 
1052     vmxnet3_trigger_interrupt(s, s->rxq_descr[RXQ_IDX].intr_idx);
1053 
1054     if (bytes_left == 0) {
1055         vmxnet3_on_rx_done_update_stats(s, RXQ_IDX, VMXNET3_PKT_STATUS_OK);
1056         return true;
1057     } else if (num_frags == s->max_rx_frags) {
1058         vmxnet3_on_rx_done_update_stats(s, RXQ_IDX, VMXNET3_PKT_STATUS_ERROR);
1059         return false;
1060     } else {
1061         vmxnet3_on_rx_done_update_stats(s, RXQ_IDX,
1062                                         VMXNET3_PKT_STATUS_OUT_OF_BUF);
1063         return false;
1064     }
1065 }
1066 
1067 static void
vmxnet3_io_bar0_write(void * opaque,hwaddr addr,uint64_t val,unsigned size)1068 vmxnet3_io_bar0_write(void *opaque, hwaddr addr,
1069                       uint64_t val, unsigned size)
1070 {
1071     VMXNET3State *s = opaque;
1072 
1073     if (!s->device_active) {
1074         return;
1075     }
1076 
1077     if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_TXPROD,
1078                         VMXNET3_DEVICE_MAX_TX_QUEUES, VMXNET3_REG_ALIGN)) {
1079         int tx_queue_idx =
1080             VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_TXPROD,
1081                                      VMXNET3_REG_ALIGN);
1082         if (tx_queue_idx <= s->txq_num) {
1083             vmxnet3_process_tx_queue(s, tx_queue_idx);
1084         } else {
1085             qemu_log_mask(LOG_GUEST_ERROR, "vmxnet3: Illegal TX queue %d/%d\n",
1086                           tx_queue_idx, s->txq_num);
1087         }
1088         return;
1089     }
1090 
1091     if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_IMR,
1092                         VMXNET3_MAX_INTRS, VMXNET3_REG_ALIGN)) {
1093         int l = VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_IMR,
1094                                          VMXNET3_REG_ALIGN);
1095 
1096         VMW_CBPRN("Interrupt mask for line %d written: 0x%" PRIx64, l, val);
1097 
1098         vmxnet3_on_interrupt_mask_changed(s, l, val);
1099         return;
1100     }
1101 
1102     if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_RXPROD,
1103                         VMXNET3_DEVICE_MAX_RX_QUEUES, VMXNET3_REG_ALIGN) ||
1104        VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_RXPROD2,
1105                         VMXNET3_DEVICE_MAX_RX_QUEUES, VMXNET3_REG_ALIGN)) {
1106         return;
1107     }
1108 
1109     VMW_WRPRN("BAR0 unknown write [%" PRIx64 "] = %" PRIx64 ", size %d",
1110               (uint64_t) addr, val, size);
1111 }
1112 
1113 static uint64_t
vmxnet3_io_bar0_read(void * opaque,hwaddr addr,unsigned size)1114 vmxnet3_io_bar0_read(void *opaque, hwaddr addr, unsigned size)
1115 {
1116     VMXNET3State *s = opaque;
1117 
1118     if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_IMR,
1119                         VMXNET3_MAX_INTRS, VMXNET3_REG_ALIGN)) {
1120         int l = VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_IMR,
1121                                          VMXNET3_REG_ALIGN);
1122         return s->interrupt_states[l].is_masked;
1123     }
1124 
1125     VMW_CBPRN("BAR0 unknown read [%" PRIx64 "], size %d", addr, size);
1126     return 0;
1127 }
1128 
vmxnet3_reset_interrupt_states(VMXNET3State * s)1129 static void vmxnet3_reset_interrupt_states(VMXNET3State *s)
1130 {
1131     int i;
1132     for (i = 0; i < ARRAY_SIZE(s->interrupt_states); i++) {
1133         s->interrupt_states[i].is_asserted = false;
1134         s->interrupt_states[i].is_pending = false;
1135         s->interrupt_states[i].is_masked = true;
1136     }
1137 }
1138 
vmxnet3_reset_mac(VMXNET3State * s)1139 static void vmxnet3_reset_mac(VMXNET3State *s)
1140 {
1141     memcpy(&s->conf.macaddr.a, &s->perm_mac.a, sizeof(s->perm_mac.a));
1142     VMW_CFPRN("MAC address set to: " MAC_FMT, MAC_ARG(s->conf.macaddr.a));
1143 }
1144 
vmxnet3_deactivate_device(VMXNET3State * s)1145 static void vmxnet3_deactivate_device(VMXNET3State *s)
1146 {
1147     if (s->device_active) {
1148         VMW_CBPRN("Deactivating vmxnet3...");
1149         net_tx_pkt_uninit(s->tx_pkt);
1150         net_rx_pkt_uninit(s->rx_pkt);
1151         s->device_active = false;
1152     }
1153 }
1154 
vmxnet3_reset(VMXNET3State * s)1155 static void vmxnet3_reset(VMXNET3State *s)
1156 {
1157     VMW_CBPRN("Resetting vmxnet3...");
1158 
1159     vmxnet3_deactivate_device(s);
1160     vmxnet3_reset_interrupt_states(s);
1161     s->drv_shmem = 0;
1162     s->tx_sop = true;
1163     s->skip_current_tx_pkt = false;
1164 }
1165 
vmxnet3_update_rx_mode(VMXNET3State * s)1166 static void vmxnet3_update_rx_mode(VMXNET3State *s)
1167 {
1168     PCIDevice *d = PCI_DEVICE(s);
1169 
1170     s->rx_mode = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem,
1171                                            devRead.rxFilterConf.rxMode);
1172     VMW_CFPRN("RX mode: 0x%08X", s->rx_mode);
1173 }
1174 
vmxnet3_update_vlan_filters(VMXNET3State * s)1175 static void vmxnet3_update_vlan_filters(VMXNET3State *s)
1176 {
1177     int i;
1178     PCIDevice *d = PCI_DEVICE(s);
1179 
1180     /* Copy configuration from shared memory */
1181     VMXNET3_READ_DRV_SHARED(d, s->drv_shmem,
1182                             devRead.rxFilterConf.vfTable,
1183                             s->vlan_table,
1184                             sizeof(s->vlan_table));
1185 
1186     /* Invert byte order when needed */
1187     for (i = 0; i < ARRAY_SIZE(s->vlan_table); i++) {
1188         s->vlan_table[i] = le32_to_cpu(s->vlan_table[i]);
1189     }
1190 
1191     /* Dump configuration for debugging purposes */
1192     VMW_CFPRN("Configured VLANs:");
1193     for (i = 0; i < sizeof(s->vlan_table) * 8; i++) {
1194         if (VMXNET3_VFTABLE_ENTRY_IS_SET(s->vlan_table, i)) {
1195             VMW_CFPRN("\tVLAN %d is present", i);
1196         }
1197     }
1198 }
1199 
vmxnet3_update_mcast_filters(VMXNET3State * s)1200 static void vmxnet3_update_mcast_filters(VMXNET3State *s)
1201 {
1202     PCIDevice *d = PCI_DEVICE(s);
1203 
1204     uint16_t list_bytes =
1205         VMXNET3_READ_DRV_SHARED16(d, s->drv_shmem,
1206                                   devRead.rxFilterConf.mfTableLen);
1207 
1208     s->mcast_list_len = list_bytes / sizeof(s->mcast_list[0]);
1209 
1210     s->mcast_list = g_realloc(s->mcast_list, list_bytes);
1211     if (!s->mcast_list) {
1212         if (s->mcast_list_len == 0) {
1213             VMW_CFPRN("Current multicast list is empty");
1214         } else {
1215             VMW_ERPRN("Failed to allocate multicast list of %d elements",
1216                       s->mcast_list_len);
1217         }
1218         s->mcast_list_len = 0;
1219     } else {
1220         int i;
1221         hwaddr mcast_list_pa =
1222             VMXNET3_READ_DRV_SHARED64(d, s->drv_shmem,
1223                                       devRead.rxFilterConf.mfTablePA);
1224 
1225         pci_dma_read(d, mcast_list_pa, s->mcast_list, list_bytes);
1226 
1227         VMW_CFPRN("Current multicast list len is %d:", s->mcast_list_len);
1228         for (i = 0; i < s->mcast_list_len; i++) {
1229             VMW_CFPRN("\t" MAC_FMT, MAC_ARG(s->mcast_list[i].a));
1230         }
1231     }
1232 }
1233 
vmxnet3_setup_rx_filtering(VMXNET3State * s)1234 static void vmxnet3_setup_rx_filtering(VMXNET3State *s)
1235 {
1236     vmxnet3_update_rx_mode(s);
1237     vmxnet3_update_vlan_filters(s);
1238     vmxnet3_update_mcast_filters(s);
1239 }
1240 
vmxnet3_get_interrupt_config(VMXNET3State * s)1241 static uint32_t vmxnet3_get_interrupt_config(VMXNET3State *s)
1242 {
1243     uint32_t interrupt_mode = VMXNET3_IT_AUTO | (VMXNET3_IMM_AUTO << 2);
1244     VMW_CFPRN("Interrupt config is 0x%X", interrupt_mode);
1245     return interrupt_mode;
1246 }
1247 
vmxnet3_fill_stats(VMXNET3State * s)1248 static void vmxnet3_fill_stats(VMXNET3State *s)
1249 {
1250     int i;
1251     PCIDevice *d = PCI_DEVICE(s);
1252 
1253     if (!s->device_active)
1254         return;
1255 
1256     for (i = 0; i < s->txq_num; i++) {
1257         pci_dma_write(d,
1258                       s->txq_descr[i].tx_stats_pa,
1259                       &s->txq_descr[i].txq_stats,
1260                       sizeof(s->txq_descr[i].txq_stats));
1261     }
1262 
1263     for (i = 0; i < s->rxq_num; i++) {
1264         pci_dma_write(d,
1265                       s->rxq_descr[i].rx_stats_pa,
1266                       &s->rxq_descr[i].rxq_stats,
1267                       sizeof(s->rxq_descr[i].rxq_stats));
1268     }
1269 }
1270 
vmxnet3_adjust_by_guest_type(VMXNET3State * s)1271 static void vmxnet3_adjust_by_guest_type(VMXNET3State *s)
1272 {
1273     struct Vmxnet3_GOSInfo gos;
1274     PCIDevice *d = PCI_DEVICE(s);
1275 
1276     VMXNET3_READ_DRV_SHARED(d, s->drv_shmem, devRead.misc.driverInfo.gos,
1277                             &gos, sizeof(gos));
1278     s->rx_packets_compound =
1279         (gos.gosType == VMXNET3_GOS_TYPE_WIN) ? false : true;
1280 
1281     VMW_CFPRN("Guest type specifics: RXCOMPOUND: %d", s->rx_packets_compound);
1282 }
1283 
1284 static void
vmxnet3_dump_conf_descr(const char * name,struct Vmxnet3_VariableLenConfDesc * pm_descr)1285 vmxnet3_dump_conf_descr(const char *name,
1286                         struct Vmxnet3_VariableLenConfDesc *pm_descr)
1287 {
1288     VMW_CFPRN("%s descriptor dump: Version %u, Length %u",
1289               name, pm_descr->confVer, pm_descr->confLen);
1290 
1291 };
1292 
vmxnet3_update_pm_state(VMXNET3State * s)1293 static void vmxnet3_update_pm_state(VMXNET3State *s)
1294 {
1295     struct Vmxnet3_VariableLenConfDesc pm_descr;
1296     PCIDevice *d = PCI_DEVICE(s);
1297 
1298     pm_descr.confLen =
1299         VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.pmConfDesc.confLen);
1300     pm_descr.confVer =
1301         VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.pmConfDesc.confVer);
1302     pm_descr.confPA =
1303         VMXNET3_READ_DRV_SHARED64(d, s->drv_shmem, devRead.pmConfDesc.confPA);
1304 
1305     vmxnet3_dump_conf_descr("PM State", &pm_descr);
1306 }
1307 
vmxnet3_update_features(VMXNET3State * s)1308 static void vmxnet3_update_features(VMXNET3State *s)
1309 {
1310     uint32_t guest_features;
1311     int rxcso_supported;
1312     PCIDevice *d = PCI_DEVICE(s);
1313 
1314     guest_features = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem,
1315                                                devRead.misc.uptFeatures);
1316 
1317     rxcso_supported = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_RXCSUM);
1318     s->rx_vlan_stripping = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_RXVLAN);
1319     s->lro_supported = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_LRO);
1320 
1321     VMW_CFPRN("Features configuration: LRO: %d, RXCSUM: %d, VLANSTRIP: %d",
1322               s->lro_supported, rxcso_supported,
1323               s->rx_vlan_stripping);
1324     if (s->peer_has_vhdr) {
1325         qemu_set_offload(qemu_get_queue(s->nic)->peer,
1326                          rxcso_supported,
1327                          s->lro_supported,
1328                          s->lro_supported,
1329                          0,
1330                          0,
1331                          0,
1332                          0);
1333     }
1334 }
1335 
vmxnet3_verify_intx(VMXNET3State * s,int intx)1336 static bool vmxnet3_verify_intx(VMXNET3State *s, int intx)
1337 {
1338     return s->msix_used || msi_enabled(PCI_DEVICE(s))
1339         || intx == pci_get_byte(s->parent_obj.config + PCI_INTERRUPT_PIN) - 1;
1340 }
1341 
vmxnet3_validate_interrupt_idx(bool is_msix,int idx)1342 static void vmxnet3_validate_interrupt_idx(bool is_msix, int idx)
1343 {
1344     int max_ints = is_msix ? VMXNET3_MAX_INTRS : VMXNET3_MAX_NMSIX_INTRS;
1345     if (idx >= max_ints) {
1346         hw_error("Bad interrupt index: %d\n", idx);
1347     }
1348 }
1349 
vmxnet3_validate_interrupts(VMXNET3State * s)1350 static void vmxnet3_validate_interrupts(VMXNET3State *s)
1351 {
1352     int i;
1353 
1354     VMW_CFPRN("Verifying event interrupt index (%d)", s->event_int_idx);
1355     vmxnet3_validate_interrupt_idx(s->msix_used, s->event_int_idx);
1356 
1357     for (i = 0; i < s->txq_num; i++) {
1358         int idx = s->txq_descr[i].intr_idx;
1359         VMW_CFPRN("Verifying TX queue %d interrupt index (%d)", i, idx);
1360         vmxnet3_validate_interrupt_idx(s->msix_used, idx);
1361     }
1362 
1363     for (i = 0; i < s->rxq_num; i++) {
1364         int idx = s->rxq_descr[i].intr_idx;
1365         VMW_CFPRN("Verifying RX queue %d interrupt index (%d)", i, idx);
1366         vmxnet3_validate_interrupt_idx(s->msix_used, idx);
1367     }
1368 }
1369 
vmxnet3_validate_queues(VMXNET3State * s)1370 static bool vmxnet3_validate_queues(VMXNET3State *s)
1371 {
1372     /*
1373     * txq_num and rxq_num are total number of queues
1374     * configured by guest. These numbers must not
1375     * exceed corresponding maximal values.
1376     */
1377 
1378     if (s->txq_num > VMXNET3_DEVICE_MAX_TX_QUEUES) {
1379         qemu_log_mask(LOG_GUEST_ERROR, "vmxnet3: Bad TX queues number: %d\n",
1380                       s->txq_num);
1381         return false;
1382     }
1383 
1384     if (s->rxq_num > VMXNET3_DEVICE_MAX_RX_QUEUES) {
1385         qemu_log_mask(LOG_GUEST_ERROR, "vmxnet3: Bad RX queues number: %d\n",
1386                       s->rxq_num);
1387         return false;
1388     }
1389 
1390     return true;
1391 }
1392 
vmxnet3_activate_device(VMXNET3State * s)1393 static void vmxnet3_activate_device(VMXNET3State *s)
1394 {
1395     int i;
1396     static const uint32_t VMXNET3_DEF_TX_THRESHOLD = 1;
1397     PCIDevice *d = PCI_DEVICE(s);
1398     hwaddr qdescr_table_pa;
1399     uint64_t pa;
1400     uint32_t size;
1401 
1402     /* Verify configuration consistency */
1403     if (!vmxnet3_verify_driver_magic(d, s->drv_shmem)) {
1404         VMW_ERPRN("Device configuration received from driver is invalid");
1405         return;
1406     }
1407 
1408     /* Verify if device is active */
1409     if (s->device_active) {
1410         VMW_CFPRN("Vmxnet3 device is active");
1411         return;
1412     }
1413 
1414     s->txq_num =
1415         VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.misc.numTxQueues);
1416     s->rxq_num =
1417         VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.misc.numRxQueues);
1418 
1419     VMW_CFPRN("Number of TX/RX queues %u/%u", s->txq_num, s->rxq_num);
1420     if (!vmxnet3_validate_queues(s)) {
1421         return;
1422     }
1423 
1424     vmxnet3_adjust_by_guest_type(s);
1425     vmxnet3_update_features(s);
1426     vmxnet3_update_pm_state(s);
1427     vmxnet3_setup_rx_filtering(s);
1428     /* Cache fields from shared memory */
1429     s->mtu = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.misc.mtu);
1430     if (s->mtu < VMXNET3_MIN_MTU || s->mtu > VMXNET3_MAX_MTU) {
1431         qemu_log_mask(LOG_GUEST_ERROR, "vmxnet3: Bad MTU size: %u\n", s->mtu);
1432         return;
1433     }
1434     VMW_CFPRN("MTU is %u", s->mtu);
1435 
1436     s->max_rx_frags =
1437         VMXNET3_READ_DRV_SHARED16(d, s->drv_shmem, devRead.misc.maxNumRxSG);
1438 
1439     if (s->max_rx_frags == 0) {
1440         s->max_rx_frags = 1;
1441     }
1442 
1443     VMW_CFPRN("Max RX fragments is %u", s->max_rx_frags);
1444 
1445     s->event_int_idx =
1446         VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.intrConf.eventIntrIdx);
1447     assert(vmxnet3_verify_intx(s, s->event_int_idx));
1448     VMW_CFPRN("Events interrupt line is %u", s->event_int_idx);
1449 
1450     s->auto_int_masking =
1451         VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.intrConf.autoMask);
1452     VMW_CFPRN("Automatic interrupt masking is %d", (int)s->auto_int_masking);
1453 
1454     qdescr_table_pa =
1455         VMXNET3_READ_DRV_SHARED64(d, s->drv_shmem, devRead.misc.queueDescPA);
1456     VMW_CFPRN("TX queues descriptors table is at 0x%" PRIx64, qdescr_table_pa);
1457 
1458     /*
1459      * Worst-case scenario is a packet that holds all TX rings space so
1460      * we calculate total size of all TX rings for max TX fragments number
1461      */
1462     s->max_tx_frags = 0;
1463 
1464     /* TX queues */
1465     for (i = 0; i < s->txq_num; i++) {
1466         hwaddr qdescr_pa =
1467             qdescr_table_pa + i * sizeof(struct Vmxnet3_TxQueueDesc);
1468 
1469         /* Read interrupt number for this TX queue */
1470         s->txq_descr[i].intr_idx =
1471             VMXNET3_READ_TX_QUEUE_DESCR8(d, qdescr_pa, conf.intrIdx);
1472         assert(vmxnet3_verify_intx(s, s->txq_descr[i].intr_idx));
1473 
1474         VMW_CFPRN("TX Queue %d interrupt: %d", i, s->txq_descr[i].intr_idx);
1475 
1476         /* Read rings memory locations for TX queues */
1477         pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.txRingBasePA);
1478         size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.txRingSize);
1479         if (size > VMXNET3_TX_RING_MAX_SIZE) {
1480             size = VMXNET3_TX_RING_MAX_SIZE;
1481         }
1482 
1483         vmxnet3_ring_init(d, &s->txq_descr[i].tx_ring, pa, size,
1484                           sizeof(struct Vmxnet3_TxDesc), false);
1485         VMXNET3_RING_DUMP(VMW_CFPRN, "TX", i, &s->txq_descr[i].tx_ring);
1486 
1487         s->max_tx_frags += size;
1488 
1489         /* TXC ring */
1490         pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.compRingBasePA);
1491         size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.compRingSize);
1492         if (size > VMXNET3_TC_RING_MAX_SIZE) {
1493             size = VMXNET3_TC_RING_MAX_SIZE;
1494         }
1495         vmxnet3_ring_init(d, &s->txq_descr[i].comp_ring, pa, size,
1496                           sizeof(struct Vmxnet3_TxCompDesc), true);
1497         VMXNET3_RING_DUMP(VMW_CFPRN, "TXC", i, &s->txq_descr[i].comp_ring);
1498 
1499         s->txq_descr[i].tx_stats_pa =
1500             qdescr_pa + offsetof(struct Vmxnet3_TxQueueDesc, stats);
1501 
1502         memset(&s->txq_descr[i].txq_stats, 0,
1503                sizeof(s->txq_descr[i].txq_stats));
1504 
1505         /* Fill device-managed parameters for queues */
1506         VMXNET3_WRITE_TX_QUEUE_DESCR32(d, qdescr_pa,
1507                                        ctrl.txThreshold,
1508                                        VMXNET3_DEF_TX_THRESHOLD);
1509     }
1510 
1511     /* Preallocate TX packet wrapper */
1512     VMW_CFPRN("Max TX fragments is %u", s->max_tx_frags);
1513     net_tx_pkt_init(&s->tx_pkt, s->max_tx_frags);
1514     net_rx_pkt_init(&s->rx_pkt);
1515 
1516     /* Read rings memory locations for RX queues */
1517     for (i = 0; i < s->rxq_num; i++) {
1518         int j;
1519         hwaddr qd_pa =
1520             qdescr_table_pa + s->txq_num * sizeof(struct Vmxnet3_TxQueueDesc) +
1521             i * sizeof(struct Vmxnet3_RxQueueDesc);
1522 
1523         /* Read interrupt number for this RX queue */
1524         s->rxq_descr[i].intr_idx =
1525             VMXNET3_READ_TX_QUEUE_DESCR8(d, qd_pa, conf.intrIdx);
1526         assert(vmxnet3_verify_intx(s, s->rxq_descr[i].intr_idx));
1527 
1528         VMW_CFPRN("RX Queue %d interrupt: %d", i, s->rxq_descr[i].intr_idx);
1529 
1530         /* Read rings memory locations */
1531         for (j = 0; j < VMXNET3_RX_RINGS_PER_QUEUE; j++) {
1532             /* RX rings */
1533             pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.rxRingBasePA[j]);
1534             size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.rxRingSize[j]);
1535             if (size > VMXNET3_RX_RING_MAX_SIZE) {
1536                 size = VMXNET3_RX_RING_MAX_SIZE;
1537             }
1538             vmxnet3_ring_init(d, &s->rxq_descr[i].rx_ring[j], pa, size,
1539                               sizeof(struct Vmxnet3_RxDesc), false);
1540             VMW_CFPRN("RX queue %d:%d: Base: %" PRIx64 ", Size: %d",
1541                       i, j, pa, size);
1542         }
1543 
1544         /* RXC ring */
1545         pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.compRingBasePA);
1546         size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.compRingSize);
1547         if (size > VMXNET3_RC_RING_MAX_SIZE) {
1548             size = VMXNET3_RC_RING_MAX_SIZE;
1549         }
1550         vmxnet3_ring_init(d, &s->rxq_descr[i].comp_ring, pa, size,
1551                           sizeof(struct Vmxnet3_RxCompDesc), true);
1552         VMW_CFPRN("RXC queue %d: Base: %" PRIx64 ", Size: %d", i, pa, size);
1553 
1554         s->rxq_descr[i].rx_stats_pa =
1555             qd_pa + offsetof(struct Vmxnet3_RxQueueDesc, stats);
1556         memset(&s->rxq_descr[i].rxq_stats, 0,
1557                sizeof(s->rxq_descr[i].rxq_stats));
1558     }
1559 
1560     vmxnet3_validate_interrupts(s);
1561 
1562     /* Make sure everything is in place before device activation */
1563     smp_wmb();
1564 
1565     vmxnet3_reset_mac(s);
1566 
1567     s->device_active = true;
1568 }
1569 
vmxnet3_handle_command(VMXNET3State * s,uint64_t cmd)1570 static void vmxnet3_handle_command(VMXNET3State *s, uint64_t cmd)
1571 {
1572     s->last_command = cmd;
1573 
1574     switch (cmd) {
1575     case VMXNET3_CMD_GET_PERM_MAC_HI:
1576         VMW_CBPRN("Set: Get upper part of permanent MAC");
1577         break;
1578 
1579     case VMXNET3_CMD_GET_PERM_MAC_LO:
1580         VMW_CBPRN("Set: Get lower part of permanent MAC");
1581         break;
1582 
1583     case VMXNET3_CMD_GET_STATS:
1584         VMW_CBPRN("Set: Get device statistics");
1585         vmxnet3_fill_stats(s);
1586         break;
1587 
1588     case VMXNET3_CMD_ACTIVATE_DEV:
1589         VMW_CBPRN("Set: Activating vmxnet3 device");
1590         vmxnet3_activate_device(s);
1591         break;
1592 
1593     case VMXNET3_CMD_UPDATE_RX_MODE:
1594         VMW_CBPRN("Set: Update rx mode");
1595         vmxnet3_update_rx_mode(s);
1596         break;
1597 
1598     case VMXNET3_CMD_UPDATE_VLAN_FILTERS:
1599         VMW_CBPRN("Set: Update VLAN filters");
1600         vmxnet3_update_vlan_filters(s);
1601         break;
1602 
1603     case VMXNET3_CMD_UPDATE_MAC_FILTERS:
1604         VMW_CBPRN("Set: Update MAC filters");
1605         vmxnet3_update_mcast_filters(s);
1606         break;
1607 
1608     case VMXNET3_CMD_UPDATE_FEATURE:
1609         VMW_CBPRN("Set: Update features");
1610         vmxnet3_update_features(s);
1611         break;
1612 
1613     case VMXNET3_CMD_UPDATE_PMCFG:
1614         VMW_CBPRN("Set: Update power management config");
1615         vmxnet3_update_pm_state(s);
1616         break;
1617 
1618     case VMXNET3_CMD_GET_LINK:
1619         VMW_CBPRN("Set: Get link");
1620         break;
1621 
1622     case VMXNET3_CMD_RESET_DEV:
1623         VMW_CBPRN("Set: Reset device");
1624         vmxnet3_reset(s);
1625         break;
1626 
1627     case VMXNET3_CMD_QUIESCE_DEV:
1628         VMW_CBPRN("Set: VMXNET3_CMD_QUIESCE_DEV - deactivate the device");
1629         vmxnet3_deactivate_device(s);
1630         break;
1631 
1632     case VMXNET3_CMD_GET_CONF_INTR:
1633         VMW_CBPRN("Set: VMXNET3_CMD_GET_CONF_INTR - interrupt configuration");
1634         break;
1635 
1636     case VMXNET3_CMD_GET_ADAPTIVE_RING_INFO:
1637         VMW_CBPRN("Set: VMXNET3_CMD_GET_ADAPTIVE_RING_INFO - "
1638                   "adaptive ring info flags");
1639         break;
1640 
1641     case VMXNET3_CMD_GET_DID_LO:
1642         VMW_CBPRN("Set: Get lower part of device ID");
1643         break;
1644 
1645     case VMXNET3_CMD_GET_DID_HI:
1646         VMW_CBPRN("Set: Get upper part of device ID");
1647         break;
1648 
1649     case VMXNET3_CMD_GET_DEV_EXTRA_INFO:
1650         VMW_CBPRN("Set: Get device extra info");
1651         break;
1652 
1653     default:
1654         VMW_CBPRN("Received unknown command: %" PRIx64, cmd);
1655         break;
1656     }
1657 }
1658 
vmxnet3_get_command_status(VMXNET3State * s)1659 static uint64_t vmxnet3_get_command_status(VMXNET3State *s)
1660 {
1661     uint64_t ret;
1662 
1663     switch (s->last_command) {
1664     case VMXNET3_CMD_ACTIVATE_DEV:
1665         ret = (s->device_active) ? 0 : 1;
1666         VMW_CFPRN("Device active: %" PRIx64, ret);
1667         break;
1668 
1669     case VMXNET3_CMD_RESET_DEV:
1670     case VMXNET3_CMD_QUIESCE_DEV:
1671     case VMXNET3_CMD_GET_QUEUE_STATUS:
1672     case VMXNET3_CMD_GET_DEV_EXTRA_INFO:
1673         ret = 0;
1674         break;
1675 
1676     case VMXNET3_CMD_GET_LINK:
1677         ret = s->link_status_and_speed;
1678         VMW_CFPRN("Link and speed: %" PRIx64, ret);
1679         break;
1680 
1681     case VMXNET3_CMD_GET_PERM_MAC_LO:
1682         ret = vmxnet3_get_mac_low(&s->perm_mac);
1683         break;
1684 
1685     case VMXNET3_CMD_GET_PERM_MAC_HI:
1686         ret = vmxnet3_get_mac_high(&s->perm_mac);
1687         break;
1688 
1689     case VMXNET3_CMD_GET_CONF_INTR:
1690         ret = vmxnet3_get_interrupt_config(s);
1691         break;
1692 
1693     case VMXNET3_CMD_GET_ADAPTIVE_RING_INFO:
1694         ret = VMXNET3_DISABLE_ADAPTIVE_RING;
1695         break;
1696 
1697     case VMXNET3_CMD_GET_DID_LO:
1698         ret = PCI_DEVICE_ID_VMWARE_VMXNET3;
1699         break;
1700 
1701     case VMXNET3_CMD_GET_DID_HI:
1702         ret = VMXNET3_DEVICE_REVISION;
1703         break;
1704 
1705     default:
1706         VMW_WRPRN("Received request for unknown command: %x", s->last_command);
1707         ret = 0;
1708         break;
1709     }
1710 
1711     return ret;
1712 }
1713 
vmxnet3_set_events(VMXNET3State * s,uint32_t val)1714 static void vmxnet3_set_events(VMXNET3State *s, uint32_t val)
1715 {
1716     uint32_t events;
1717     PCIDevice *d = PCI_DEVICE(s);
1718 
1719     VMW_CBPRN("Setting events: 0x%x", val);
1720     events = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, ecr) | val;
1721     VMXNET3_WRITE_DRV_SHARED32(d, s->drv_shmem, ecr, events);
1722 }
1723 
vmxnet3_ack_events(VMXNET3State * s,uint32_t val)1724 static void vmxnet3_ack_events(VMXNET3State *s, uint32_t val)
1725 {
1726     PCIDevice *d = PCI_DEVICE(s);
1727     uint32_t events;
1728 
1729     VMW_CBPRN("Clearing events: 0x%x", val);
1730     events = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, ecr) & ~val;
1731     VMXNET3_WRITE_DRV_SHARED32(d, s->drv_shmem, ecr, events);
1732 }
1733 
1734 static void
vmxnet3_io_bar1_write(void * opaque,hwaddr addr,uint64_t val,unsigned size)1735 vmxnet3_io_bar1_write(void *opaque,
1736                       hwaddr addr,
1737                       uint64_t val,
1738                       unsigned size)
1739 {
1740     VMXNET3State *s = opaque;
1741 
1742     switch (addr) {
1743     /* Vmxnet3 Revision Report Selection */
1744     case VMXNET3_REG_VRRS:
1745         VMW_CBPRN("Write BAR1 [VMXNET3_REG_VRRS] = %" PRIx64 ", size %d",
1746                   val, size);
1747         break;
1748 
1749     /* UPT Version Report Selection */
1750     case VMXNET3_REG_UVRS:
1751         VMW_CBPRN("Write BAR1 [VMXNET3_REG_UVRS] = %" PRIx64 ", size %d",
1752                   val, size);
1753         break;
1754 
1755     /* Driver Shared Address Low */
1756     case VMXNET3_REG_DSAL:
1757         VMW_CBPRN("Write BAR1 [VMXNET3_REG_DSAL] = %" PRIx64 ", size %d",
1758                   val, size);
1759         /*
1760          * Guest driver will first write the low part of the shared
1761          * memory address. We save it to temp variable and set the
1762          * shared address only after we get the high part
1763          */
1764         if (val == 0) {
1765             vmxnet3_deactivate_device(s);
1766         }
1767         s->temp_shared_guest_driver_memory = val;
1768         s->drv_shmem = 0;
1769         break;
1770 
1771     /* Driver Shared Address High */
1772     case VMXNET3_REG_DSAH:
1773         VMW_CBPRN("Write BAR1 [VMXNET3_REG_DSAH] = %" PRIx64 ", size %d",
1774                   val, size);
1775         /*
1776          * Set the shared memory between guest driver and device.
1777          * We already should have low address part.
1778          */
1779         s->drv_shmem = s->temp_shared_guest_driver_memory | (val << 32);
1780         break;
1781 
1782     /* Command */
1783     case VMXNET3_REG_CMD:
1784         VMW_CBPRN("Write BAR1 [VMXNET3_REG_CMD] = %" PRIx64 ", size %d",
1785                   val, size);
1786         vmxnet3_handle_command(s, val);
1787         break;
1788 
1789     /* MAC Address Low */
1790     case VMXNET3_REG_MACL:
1791         VMW_CBPRN("Write BAR1 [VMXNET3_REG_MACL] = %" PRIx64 ", size %d",
1792                   val, size);
1793         s->temp_mac = val;
1794         break;
1795 
1796     /* MAC Address High */
1797     case VMXNET3_REG_MACH:
1798         VMW_CBPRN("Write BAR1 [VMXNET3_REG_MACH] = %" PRIx64 ", size %d",
1799                   val, size);
1800         vmxnet3_set_variable_mac(s, val, s->temp_mac);
1801         break;
1802 
1803     /* Interrupt Cause Register */
1804     case VMXNET3_REG_ICR:
1805         VMW_CBPRN("Write BAR1 [VMXNET3_REG_ICR] = %" PRIx64 ", size %d",
1806                   val, size);
1807         qemu_log_mask(LOG_GUEST_ERROR,
1808                       "%s: write to read-only register VMXNET3_REG_ICR\n",
1809                       TYPE_VMXNET3);
1810         break;
1811 
1812     /* Event Cause Register */
1813     case VMXNET3_REG_ECR:
1814         VMW_CBPRN("Write BAR1 [VMXNET3_REG_ECR] = %" PRIx64 ", size %d",
1815                   val, size);
1816         vmxnet3_ack_events(s, val);
1817         break;
1818 
1819     default:
1820         VMW_CBPRN("Unknown Write to BAR1 [%" PRIx64 "] = %" PRIx64 ", size %d",
1821                   addr, val, size);
1822         break;
1823     }
1824 }
1825 
1826 static uint64_t
vmxnet3_io_bar1_read(void * opaque,hwaddr addr,unsigned size)1827 vmxnet3_io_bar1_read(void *opaque, hwaddr addr, unsigned size)
1828 {
1829         VMXNET3State *s = opaque;
1830         uint64_t ret = 0;
1831 
1832         switch (addr) {
1833         /* Vmxnet3 Revision Report Selection */
1834         case VMXNET3_REG_VRRS:
1835             VMW_CBPRN("Read BAR1 [VMXNET3_REG_VRRS], size %d", size);
1836             ret = VMXNET3_DEVICE_REVISION;
1837             break;
1838 
1839         /* UPT Version Report Selection */
1840         case VMXNET3_REG_UVRS:
1841             VMW_CBPRN("Read BAR1 [VMXNET3_REG_UVRS], size %d", size);
1842             ret = VMXNET3_UPT_REVISION;
1843             break;
1844 
1845         /* Command */
1846         case VMXNET3_REG_CMD:
1847             VMW_CBPRN("Read BAR1 [VMXNET3_REG_CMD], size %d", size);
1848             ret = vmxnet3_get_command_status(s);
1849             break;
1850 
1851         /* MAC Address Low */
1852         case VMXNET3_REG_MACL:
1853             VMW_CBPRN("Read BAR1 [VMXNET3_REG_MACL], size %d", size);
1854             ret = vmxnet3_get_mac_low(&s->conf.macaddr);
1855             break;
1856 
1857         /* MAC Address High */
1858         case VMXNET3_REG_MACH:
1859             VMW_CBPRN("Read BAR1 [VMXNET3_REG_MACH], size %d", size);
1860             ret = vmxnet3_get_mac_high(&s->conf.macaddr);
1861             break;
1862 
1863         /*
1864          * Interrupt Cause Register
1865          * Used for legacy interrupts only so interrupt index always 0
1866          */
1867         case VMXNET3_REG_ICR:
1868             VMW_CBPRN("Read BAR1 [VMXNET3_REG_ICR], size %d", size);
1869             if (vmxnet3_interrupt_asserted(s, 0)) {
1870                 vmxnet3_clear_interrupt(s, 0);
1871                 ret = true;
1872             } else {
1873                 ret = false;
1874             }
1875             break;
1876 
1877         default:
1878             VMW_CBPRN("Unknown read BAR1[%" PRIx64 "], %d bytes", addr, size);
1879             break;
1880         }
1881 
1882         return ret;
1883 }
1884 
1885 static int
vmxnet3_can_receive(NetClientState * nc)1886 vmxnet3_can_receive(NetClientState *nc)
1887 {
1888     VMXNET3State *s = qemu_get_nic_opaque(nc);
1889     return s->device_active &&
1890            VMXNET_FLAG_IS_SET(s->link_status_and_speed, VMXNET3_LINK_STATUS_UP);
1891 }
1892 
1893 static inline bool
vmxnet3_is_registered_vlan(VMXNET3State * s,const void * data)1894 vmxnet3_is_registered_vlan(VMXNET3State *s, const void *data)
1895 {
1896     uint16_t vlan_tag = eth_get_pkt_tci(data) & VLAN_VID_MASK;
1897     if (IS_SPECIAL_VLAN_ID(vlan_tag)) {
1898         return true;
1899     }
1900 
1901     return VMXNET3_VFTABLE_ENTRY_IS_SET(s->vlan_table, vlan_tag);
1902 }
1903 
1904 static bool
vmxnet3_is_allowed_mcast_group(VMXNET3State * s,const uint8_t * group_mac)1905 vmxnet3_is_allowed_mcast_group(VMXNET3State *s, const uint8_t *group_mac)
1906 {
1907     int i;
1908     for (i = 0; i < s->mcast_list_len; i++) {
1909         if (!memcmp(group_mac, s->mcast_list[i].a, sizeof(s->mcast_list[i]))) {
1910             return true;
1911         }
1912     }
1913     return false;
1914 }
1915 
1916 static bool
vmxnet3_rx_filter_may_indicate(VMXNET3State * s,const void * data,size_t size)1917 vmxnet3_rx_filter_may_indicate(VMXNET3State *s, const void *data,
1918     size_t size)
1919 {
1920     struct eth_header *ehdr = PKT_GET_ETH_HDR(data);
1921 
1922     if (VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_PROMISC)) {
1923         return true;
1924     }
1925 
1926     if (!vmxnet3_is_registered_vlan(s, data)) {
1927         return false;
1928     }
1929 
1930     switch (net_rx_pkt_get_packet_type(s->rx_pkt)) {
1931     case ETH_PKT_UCAST:
1932         if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_UCAST)) {
1933             return false;
1934         }
1935         if (memcmp(s->conf.macaddr.a, ehdr->h_dest, ETH_ALEN)) {
1936             return false;
1937         }
1938         break;
1939 
1940     case ETH_PKT_BCAST:
1941         if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_BCAST)) {
1942             return false;
1943         }
1944         break;
1945 
1946     case ETH_PKT_MCAST:
1947         if (VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_ALL_MULTI)) {
1948             return true;
1949         }
1950         if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_MCAST)) {
1951             return false;
1952         }
1953         if (!vmxnet3_is_allowed_mcast_group(s, ehdr->h_dest)) {
1954             return false;
1955         }
1956         break;
1957 
1958     default:
1959         g_assert_not_reached();
1960     }
1961 
1962     return true;
1963 }
1964 
1965 static ssize_t
vmxnet3_receive(NetClientState * nc,const uint8_t * buf,size_t size)1966 vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1967 {
1968     VMXNET3State *s = qemu_get_nic_opaque(nc);
1969     size_t bytes_indicated;
1970 
1971     if (!vmxnet3_can_receive(nc)) {
1972         VMW_PKPRN("Cannot receive now");
1973         return -1;
1974     }
1975 
1976     if (s->peer_has_vhdr) {
1977         net_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf);
1978         buf += sizeof(struct virtio_net_hdr);
1979         size -= sizeof(struct virtio_net_hdr);
1980     }
1981 
1982     net_rx_pkt_set_packet_type(s->rx_pkt,
1983         get_eth_packet_type(PKT_GET_ETH_HDR(buf)));
1984 
1985     if (vmxnet3_rx_filter_may_indicate(s, buf, size)) {
1986         struct iovec iov = {
1987             .iov_base = (void *)buf,
1988             .iov_len = size
1989         };
1990 
1991         net_rx_pkt_set_protocols(s->rx_pkt, &iov, 1, 0);
1992         vmxnet3_rx_need_csum_calculate(s->rx_pkt, buf, size);
1993         net_rx_pkt_attach_data(s->rx_pkt, buf, size, s->rx_vlan_stripping);
1994         bytes_indicated = vmxnet3_indicate_packet(s) ? size : -1;
1995         if (bytes_indicated < size) {
1996             VMW_PKPRN("RX: %zu of %zu bytes indicated", bytes_indicated, size);
1997         }
1998     } else {
1999         VMW_PKPRN("Packet dropped by RX filter");
2000         bytes_indicated = size;
2001     }
2002 
2003     assert(size > 0);
2004     assert(bytes_indicated != 0);
2005     return bytes_indicated;
2006 }
2007 
vmxnet3_set_link_status(NetClientState * nc)2008 static void vmxnet3_set_link_status(NetClientState *nc)
2009 {
2010     VMXNET3State *s = qemu_get_nic_opaque(nc);
2011 
2012     if (nc->link_down) {
2013         s->link_status_and_speed &= ~VMXNET3_LINK_STATUS_UP;
2014     } else {
2015         s->link_status_and_speed |= VMXNET3_LINK_STATUS_UP;
2016     }
2017 
2018     vmxnet3_set_events(s, VMXNET3_ECR_LINK);
2019     vmxnet3_trigger_interrupt(s, s->event_int_idx);
2020 }
2021 
2022 static NetClientInfo net_vmxnet3_info = {
2023         .type = NET_CLIENT_DRIVER_NIC,
2024         .size = sizeof(NICState),
2025         .receive = vmxnet3_receive,
2026         .link_status_changed = vmxnet3_set_link_status,
2027 };
2028 
vmxnet3_peer_has_vnet_hdr(VMXNET3State * s)2029 static bool vmxnet3_peer_has_vnet_hdr(VMXNET3State *s)
2030 {
2031     NetClientState *nc = qemu_get_queue(s->nic);
2032 
2033     if (qemu_has_vnet_hdr(nc->peer)) {
2034         return true;
2035     }
2036 
2037     return false;
2038 }
2039 
vmxnet3_net_uninit(VMXNET3State * s)2040 static void vmxnet3_net_uninit(VMXNET3State *s)
2041 {
2042     g_free(s->mcast_list);
2043     vmxnet3_deactivate_device(s);
2044     qemu_del_nic(s->nic);
2045 }
2046 
vmxnet3_net_init(VMXNET3State * s)2047 static void vmxnet3_net_init(VMXNET3State *s)
2048 {
2049     DeviceState *d = DEVICE(s);
2050 
2051     VMW_CBPRN("vmxnet3_net_init called...");
2052 
2053     qemu_macaddr_default_if_unset(&s->conf.macaddr);
2054 
2055     /* Windows guest will query the address that was set on init */
2056     memcpy(&s->perm_mac.a, &s->conf.macaddr.a, sizeof(s->perm_mac.a));
2057 
2058     s->mcast_list = NULL;
2059     s->mcast_list_len = 0;
2060 
2061     s->link_status_and_speed = VMXNET3_LINK_SPEED | VMXNET3_LINK_STATUS_UP;
2062 
2063     VMW_CFPRN("Permanent MAC: " MAC_FMT, MAC_ARG(s->perm_mac.a));
2064 
2065     s->nic = qemu_new_nic(&net_vmxnet3_info, &s->conf,
2066                           object_get_typename(OBJECT(s)),
2067                           d->id, &d->mem_reentrancy_guard, s);
2068 
2069     s->peer_has_vhdr = vmxnet3_peer_has_vnet_hdr(s);
2070     s->tx_sop = true;
2071     s->skip_current_tx_pkt = false;
2072     s->tx_pkt = NULL;
2073     s->rx_pkt = NULL;
2074     s->rx_vlan_stripping = false;
2075     s->lro_supported = false;
2076 
2077     if (s->peer_has_vhdr) {
2078         qemu_set_vnet_hdr_len(qemu_get_queue(s->nic)->peer,
2079             sizeof(struct virtio_net_hdr));
2080     }
2081 
2082     qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
2083 }
2084 
2085 static void
vmxnet3_unuse_msix_vectors(VMXNET3State * s,int num_vectors)2086 vmxnet3_unuse_msix_vectors(VMXNET3State *s, int num_vectors)
2087 {
2088     PCIDevice *d = PCI_DEVICE(s);
2089     int i;
2090     for (i = 0; i < num_vectors; i++) {
2091         msix_vector_unuse(d, i);
2092     }
2093 }
2094 
2095 static void
vmxnet3_use_msix_vectors(VMXNET3State * s,int num_vectors)2096 vmxnet3_use_msix_vectors(VMXNET3State *s, int num_vectors)
2097 {
2098     PCIDevice *d = PCI_DEVICE(s);
2099     int i;
2100     for (i = 0; i < num_vectors; i++) {
2101         msix_vector_use(d, i);
2102     }
2103 }
2104 
2105 static bool
vmxnet3_init_msix(VMXNET3State * s)2106 vmxnet3_init_msix(VMXNET3State *s)
2107 {
2108     PCIDevice *d = PCI_DEVICE(s);
2109     int res = msix_init(d, VMXNET3_MAX_INTRS,
2110                         &s->msix_bar,
2111                         VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_TABLE,
2112                         &s->msix_bar,
2113                         VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_PBA,
2114                         VMXNET3_MSIX_OFFSET, NULL);
2115 
2116     if (0 > res) {
2117         VMW_WRPRN("Failed to initialize MSI-X, error %d", res);
2118         s->msix_used = false;
2119     } else {
2120         vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS);
2121         s->msix_used = true;
2122     }
2123     return s->msix_used;
2124 }
2125 
2126 static void
vmxnet3_cleanup_msix(VMXNET3State * s)2127 vmxnet3_cleanup_msix(VMXNET3State *s)
2128 {
2129     PCIDevice *d = PCI_DEVICE(s);
2130 
2131     if (s->msix_used) {
2132         vmxnet3_unuse_msix_vectors(s, VMXNET3_MAX_INTRS);
2133         msix_uninit(d, &s->msix_bar, &s->msix_bar);
2134     }
2135 }
2136 
2137 static void
vmxnet3_cleanup_msi(VMXNET3State * s)2138 vmxnet3_cleanup_msi(VMXNET3State *s)
2139 {
2140     PCIDevice *d = PCI_DEVICE(s);
2141 
2142     msi_uninit(d);
2143 }
2144 
2145 static const MemoryRegionOps b0_ops = {
2146     .read = vmxnet3_io_bar0_read,
2147     .write = vmxnet3_io_bar0_write,
2148     .endianness = DEVICE_LITTLE_ENDIAN,
2149     .impl = {
2150             .min_access_size = 4,
2151             .max_access_size = 4,
2152     },
2153 };
2154 
2155 static const MemoryRegionOps b1_ops = {
2156     .read = vmxnet3_io_bar1_read,
2157     .write = vmxnet3_io_bar1_write,
2158     .endianness = DEVICE_LITTLE_ENDIAN,
2159     .impl = {
2160             .min_access_size = 4,
2161             .max_access_size = 4,
2162     },
2163 };
2164 
vmxnet3_device_serial_num(VMXNET3State * s)2165 static uint64_t vmxnet3_device_serial_num(VMXNET3State *s)
2166 {
2167     uint64_t dsn_payload;
2168     uint8_t *dsnp = (uint8_t *)&dsn_payload;
2169 
2170     dsnp[0] = 0xfe;
2171     dsnp[1] = s->conf.macaddr.a[3];
2172     dsnp[2] = s->conf.macaddr.a[4];
2173     dsnp[3] = s->conf.macaddr.a[5];
2174     dsnp[4] = s->conf.macaddr.a[0];
2175     dsnp[5] = s->conf.macaddr.a[1];
2176     dsnp[6] = s->conf.macaddr.a[2];
2177     dsnp[7] = 0xff;
2178     return dsn_payload;
2179 }
2180 
2181 
2182 #define VMXNET3_USE_64BIT         (true)
2183 #define VMXNET3_PER_VECTOR_MASK   (false)
2184 
vmxnet3_pci_realize(PCIDevice * pci_dev,Error ** errp)2185 static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp)
2186 {
2187     VMXNET3State *s = VMXNET3(pci_dev);
2188     int ret;
2189 
2190     VMW_CBPRN("Starting init...");
2191 
2192     memory_region_init_io(&s->bar0, OBJECT(s), &b0_ops, s,
2193                           "vmxnet3-b0", VMXNET3_PT_REG_SIZE);
2194     pci_register_bar(pci_dev, VMXNET3_BAR0_IDX,
2195                      PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar0);
2196 
2197     memory_region_init_io(&s->bar1, OBJECT(s), &b1_ops, s,
2198                           "vmxnet3-b1", VMXNET3_VD_REG_SIZE);
2199     pci_register_bar(pci_dev, VMXNET3_BAR1_IDX,
2200                      PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar1);
2201 
2202     memory_region_init(&s->msix_bar, OBJECT(s), "vmxnet3-msix-bar",
2203                        VMXNET3_MSIX_BAR_SIZE);
2204     pci_register_bar(pci_dev, VMXNET3_MSIX_BAR_IDX,
2205                      PCI_BASE_ADDRESS_SPACE_MEMORY, &s->msix_bar);
2206 
2207     vmxnet3_reset_interrupt_states(s);
2208 
2209     /* Interrupt pin A */
2210     pci_dev->config[PCI_INTERRUPT_PIN] = 0x01;
2211 
2212     ret = msi_init(pci_dev, VMXNET3_MSI_OFFSET, VMXNET3_MAX_NMSIX_INTRS,
2213                    VMXNET3_USE_64BIT, VMXNET3_PER_VECTOR_MASK, NULL);
2214     /* Any error other than -ENOTSUP(board's MSI support is broken)
2215      * is a programming error. Fall back to INTx silently on -ENOTSUP */
2216     assert(!ret || ret == -ENOTSUP);
2217 
2218     if (!vmxnet3_init_msix(s)) {
2219         VMW_WRPRN("Failed to initialize MSI-X, configuration is inconsistent.");
2220     }
2221 
2222     vmxnet3_net_init(s);
2223 
2224     if (pci_is_express(pci_dev)) {
2225         if (pci_bus_is_express(pci_get_bus(pci_dev))) {
2226             pcie_endpoint_cap_init(pci_dev, VMXNET3_EXP_EP_OFFSET);
2227         }
2228 
2229         pcie_dev_ser_num_init(pci_dev, VMXNET3_DSN_OFFSET,
2230                               vmxnet3_device_serial_num(s));
2231     }
2232 }
2233 
vmxnet3_instance_init(Object * obj)2234 static void vmxnet3_instance_init(Object *obj)
2235 {
2236     VMXNET3State *s = VMXNET3(obj);
2237     device_add_bootindex_property(obj, &s->conf.bootindex,
2238                                   "bootindex", "/ethernet-phy@0",
2239                                   DEVICE(obj));
2240     PCI_DEVICE(obj)->cap_present |= QEMU_PCI_CAP_EXPRESS;
2241 }
2242 
vmxnet3_pci_uninit(PCIDevice * pci_dev)2243 static void vmxnet3_pci_uninit(PCIDevice *pci_dev)
2244 {
2245     VMXNET3State *s = VMXNET3(pci_dev);
2246 
2247     VMW_CBPRN("Starting uninit...");
2248 
2249     vmxnet3_net_uninit(s);
2250 
2251     vmxnet3_cleanup_msix(s);
2252 
2253     vmxnet3_cleanup_msi(s);
2254 }
2255 
vmxnet3_qdev_reset(DeviceState * dev)2256 static void vmxnet3_qdev_reset(DeviceState *dev)
2257 {
2258     PCIDevice *d = PCI_DEVICE(dev);
2259     VMXNET3State *s = VMXNET3(d);
2260 
2261     VMW_CBPRN("Starting QDEV reset...");
2262     vmxnet3_reset(s);
2263 }
2264 
vmxnet3_mc_list_needed(void * opaque)2265 static bool vmxnet3_mc_list_needed(void *opaque)
2266 {
2267     return true;
2268 }
2269 
vmxnet3_mcast_list_pre_load(void * opaque)2270 static int vmxnet3_mcast_list_pre_load(void *opaque)
2271 {
2272     VMXNET3State *s = opaque;
2273 
2274     s->mcast_list = g_malloc(s->mcast_list_buff_size);
2275 
2276     return 0;
2277 }
2278 
2279 
vmxnet3_pre_save(void * opaque)2280 static int vmxnet3_pre_save(void *opaque)
2281 {
2282     VMXNET3State *s = opaque;
2283 
2284     s->mcast_list_buff_size = s->mcast_list_len * sizeof(MACAddr);
2285 
2286     return 0;
2287 }
2288 
2289 static const VMStateDescription vmxstate_vmxnet3_mcast_list = {
2290     .name = "vmxnet3/mcast_list",
2291     .version_id = 1,
2292     .minimum_version_id = 1,
2293     .pre_load = vmxnet3_mcast_list_pre_load,
2294     .needed = vmxnet3_mc_list_needed,
2295     .fields = (const VMStateField[]) {
2296         VMSTATE_VBUFFER_UINT32(mcast_list, VMXNET3State, 0, NULL,
2297             mcast_list_buff_size),
2298         VMSTATE_END_OF_LIST()
2299     }
2300 };
2301 
2302 static const VMStateDescription vmstate_vmxnet3_ring = {
2303     .name = "vmxnet3-ring",
2304     .version_id = 0,
2305     .fields = (const VMStateField[]) {
2306         VMSTATE_UINT64(pa, Vmxnet3Ring),
2307         VMSTATE_UINT32(size, Vmxnet3Ring),
2308         VMSTATE_UINT32(cell_size, Vmxnet3Ring),
2309         VMSTATE_UINT32(next, Vmxnet3Ring),
2310         VMSTATE_UINT8(gen, Vmxnet3Ring),
2311         VMSTATE_END_OF_LIST()
2312     }
2313 };
2314 
2315 static const VMStateDescription vmstate_vmxnet3_tx_stats = {
2316     .name = "vmxnet3-tx-stats",
2317     .version_id = 0,
2318     .fields = (const VMStateField[]) {
2319         VMSTATE_UINT64(TSOPktsTxOK, struct UPT1_TxStats),
2320         VMSTATE_UINT64(TSOBytesTxOK, struct UPT1_TxStats),
2321         VMSTATE_UINT64(ucastPktsTxOK, struct UPT1_TxStats),
2322         VMSTATE_UINT64(ucastBytesTxOK, struct UPT1_TxStats),
2323         VMSTATE_UINT64(mcastPktsTxOK, struct UPT1_TxStats),
2324         VMSTATE_UINT64(mcastBytesTxOK, struct UPT1_TxStats),
2325         VMSTATE_UINT64(bcastPktsTxOK, struct UPT1_TxStats),
2326         VMSTATE_UINT64(bcastBytesTxOK, struct UPT1_TxStats),
2327         VMSTATE_UINT64(pktsTxError, struct UPT1_TxStats),
2328         VMSTATE_UINT64(pktsTxDiscard, struct UPT1_TxStats),
2329         VMSTATE_END_OF_LIST()
2330     }
2331 };
2332 
2333 static const VMStateDescription vmstate_vmxnet3_txq_descr = {
2334     .name = "vmxnet3-txq-descr",
2335     .version_id = 0,
2336     .fields = (const VMStateField[]) {
2337         VMSTATE_STRUCT(tx_ring, Vmxnet3TxqDescr, 0, vmstate_vmxnet3_ring,
2338                        Vmxnet3Ring),
2339         VMSTATE_STRUCT(comp_ring, Vmxnet3TxqDescr, 0, vmstate_vmxnet3_ring,
2340                        Vmxnet3Ring),
2341         VMSTATE_UINT8(intr_idx, Vmxnet3TxqDescr),
2342         VMSTATE_UINT64(tx_stats_pa, Vmxnet3TxqDescr),
2343         VMSTATE_STRUCT(txq_stats, Vmxnet3TxqDescr, 0, vmstate_vmxnet3_tx_stats,
2344                        struct UPT1_TxStats),
2345         VMSTATE_END_OF_LIST()
2346     }
2347 };
2348 
2349 static const VMStateDescription vmstate_vmxnet3_rx_stats = {
2350     .name = "vmxnet3-rx-stats",
2351     .version_id = 0,
2352     .fields = (const VMStateField[]) {
2353         VMSTATE_UINT64(LROPktsRxOK, struct UPT1_RxStats),
2354         VMSTATE_UINT64(LROBytesRxOK, struct UPT1_RxStats),
2355         VMSTATE_UINT64(ucastPktsRxOK, struct UPT1_RxStats),
2356         VMSTATE_UINT64(ucastBytesRxOK, struct UPT1_RxStats),
2357         VMSTATE_UINT64(mcastPktsRxOK, struct UPT1_RxStats),
2358         VMSTATE_UINT64(mcastBytesRxOK, struct UPT1_RxStats),
2359         VMSTATE_UINT64(bcastPktsRxOK, struct UPT1_RxStats),
2360         VMSTATE_UINT64(bcastBytesRxOK, struct UPT1_RxStats),
2361         VMSTATE_UINT64(pktsRxOutOfBuf, struct UPT1_RxStats),
2362         VMSTATE_UINT64(pktsRxError, struct UPT1_RxStats),
2363         VMSTATE_END_OF_LIST()
2364     }
2365 };
2366 
2367 static const VMStateDescription vmstate_vmxnet3_rxq_descr = {
2368     .name = "vmxnet3-rxq-descr",
2369     .version_id = 0,
2370     .fields = (const VMStateField[]) {
2371         VMSTATE_STRUCT_ARRAY(rx_ring, Vmxnet3RxqDescr,
2372                              VMXNET3_RX_RINGS_PER_QUEUE, 0,
2373                              vmstate_vmxnet3_ring, Vmxnet3Ring),
2374         VMSTATE_STRUCT(comp_ring, Vmxnet3RxqDescr, 0, vmstate_vmxnet3_ring,
2375                        Vmxnet3Ring),
2376         VMSTATE_UINT8(intr_idx, Vmxnet3RxqDescr),
2377         VMSTATE_UINT64(rx_stats_pa, Vmxnet3RxqDescr),
2378         VMSTATE_STRUCT(rxq_stats, Vmxnet3RxqDescr, 0, vmstate_vmxnet3_rx_stats,
2379                        struct UPT1_RxStats),
2380         VMSTATE_END_OF_LIST()
2381     }
2382 };
2383 
vmxnet3_post_load(void * opaque,int version_id)2384 static int vmxnet3_post_load(void *opaque, int version_id)
2385 {
2386     VMXNET3State *s = opaque;
2387 
2388     net_tx_pkt_init(&s->tx_pkt, s->max_tx_frags);
2389     net_rx_pkt_init(&s->rx_pkt);
2390 
2391     if (s->msix_used) {
2392         vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS);
2393     }
2394 
2395     if (!vmxnet3_validate_queues(s)) {
2396         return -1;
2397     }
2398     vmxnet3_validate_interrupts(s);
2399 
2400     return 0;
2401 }
2402 
2403 static const VMStateDescription vmstate_vmxnet3_int_state = {
2404     .name = "vmxnet3-int-state",
2405     .version_id = 0,
2406     .fields = (const VMStateField[]) {
2407         VMSTATE_BOOL(is_masked, Vmxnet3IntState),
2408         VMSTATE_BOOL(is_pending, Vmxnet3IntState),
2409         VMSTATE_BOOL(is_asserted, Vmxnet3IntState),
2410         VMSTATE_END_OF_LIST()
2411     }
2412 };
2413 
2414 static const VMStateDescription vmstate_vmxnet3 = {
2415     .name = "vmxnet3",
2416     .version_id = 1,
2417     .minimum_version_id = 1,
2418     .pre_save = vmxnet3_pre_save,
2419     .post_load = vmxnet3_post_load,
2420     .fields = (const VMStateField[]) {
2421             VMSTATE_PCI_DEVICE(parent_obj, VMXNET3State),
2422             VMSTATE_MSIX(parent_obj, VMXNET3State),
2423             VMSTATE_BOOL(rx_packets_compound, VMXNET3State),
2424             VMSTATE_BOOL(rx_vlan_stripping, VMXNET3State),
2425             VMSTATE_BOOL(lro_supported, VMXNET3State),
2426             VMSTATE_UINT32(rx_mode, VMXNET3State),
2427             VMSTATE_UINT32(mcast_list_len, VMXNET3State),
2428             VMSTATE_UINT32(mcast_list_buff_size, VMXNET3State),
2429             VMSTATE_UINT32_ARRAY(vlan_table, VMXNET3State, VMXNET3_VFT_SIZE),
2430             VMSTATE_UINT32(mtu, VMXNET3State),
2431             VMSTATE_UINT16(max_rx_frags, VMXNET3State),
2432             VMSTATE_UINT32(max_tx_frags, VMXNET3State),
2433             VMSTATE_UINT8(event_int_idx, VMXNET3State),
2434             VMSTATE_BOOL(auto_int_masking, VMXNET3State),
2435             VMSTATE_UINT8(txq_num, VMXNET3State),
2436             VMSTATE_UINT8(rxq_num, VMXNET3State),
2437             VMSTATE_UINT32(device_active, VMXNET3State),
2438             VMSTATE_UINT32(last_command, VMXNET3State),
2439             VMSTATE_UINT32(link_status_and_speed, VMXNET3State),
2440             VMSTATE_UINT32(temp_mac, VMXNET3State),
2441             VMSTATE_UINT64(drv_shmem, VMXNET3State),
2442             VMSTATE_UINT64(temp_shared_guest_driver_memory, VMXNET3State),
2443 
2444             VMSTATE_STRUCT_ARRAY(txq_descr, VMXNET3State,
2445                 VMXNET3_DEVICE_MAX_TX_QUEUES, 0, vmstate_vmxnet3_txq_descr,
2446                 Vmxnet3TxqDescr),
2447             VMSTATE_STRUCT_ARRAY(rxq_descr, VMXNET3State,
2448                 VMXNET3_DEVICE_MAX_RX_QUEUES, 0, vmstate_vmxnet3_rxq_descr,
2449                 Vmxnet3RxqDescr),
2450             VMSTATE_STRUCT_ARRAY(interrupt_states, VMXNET3State,
2451                 VMXNET3_MAX_INTRS, 0, vmstate_vmxnet3_int_state,
2452                 Vmxnet3IntState),
2453 
2454             VMSTATE_END_OF_LIST()
2455     },
2456     .subsections = (const VMStateDescription * const []) {
2457         &vmxstate_vmxnet3_mcast_list,
2458         NULL
2459     }
2460 };
2461 
2462 static const Property vmxnet3_properties[] = {
2463     DEFINE_NIC_PROPERTIES(VMXNET3State, conf),
2464 };
2465 
vmxnet3_class_init(ObjectClass * class,const void * data)2466 static void vmxnet3_class_init(ObjectClass *class, const void *data)
2467 {
2468     DeviceClass *dc = DEVICE_CLASS(class);
2469     PCIDeviceClass *c = PCI_DEVICE_CLASS(class);
2470 
2471     c->realize = vmxnet3_pci_realize;
2472     c->exit = vmxnet3_pci_uninit;
2473     c->vendor_id = PCI_VENDOR_ID_VMWARE;
2474     c->device_id = PCI_DEVICE_ID_VMWARE_VMXNET3;
2475     c->revision = PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION;
2476     c->romfile = "efi-vmxnet3.rom";
2477     c->class_id = PCI_CLASS_NETWORK_ETHERNET;
2478     c->subsystem_vendor_id = PCI_VENDOR_ID_VMWARE;
2479     c->subsystem_id = PCI_DEVICE_ID_VMWARE_VMXNET3;
2480     dc->desc = "VMWare Paravirtualized Ethernet v3";
2481     device_class_set_legacy_reset(dc, vmxnet3_qdev_reset);
2482     dc->vmsd = &vmstate_vmxnet3;
2483     device_class_set_props(dc, vmxnet3_properties);
2484     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
2485 }
2486 
2487 static const TypeInfo vmxnet3_info = {
2488     .name          = TYPE_VMXNET3,
2489     .parent        = TYPE_PCI_DEVICE,
2490     .class_size    = sizeof(VMXNET3Class),
2491     .instance_size = sizeof(VMXNET3State),
2492     .class_init    = vmxnet3_class_init,
2493     .instance_init = vmxnet3_instance_init,
2494     .interfaces = (const InterfaceInfo[]) {
2495         { INTERFACE_PCIE_DEVICE },
2496         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
2497         { }
2498     },
2499 };
2500 
vmxnet3_register_types(void)2501 static void vmxnet3_register_types(void)
2502 {
2503     VMW_CBPRN("vmxnet3_register_types called...");
2504     type_register_static(&vmxnet3_info);
2505 }
2506 
2507 type_init(vmxnet3_register_types)
2508