xref: /openbmc/qemu/hw/net/vmxnet3.c (revision 59c2ddedcbe70a9ead3378b4946edc66b8757cb6)
1  /*
2   * QEMU VMWARE VMXNET3 paravirtual NIC
3   *
4   * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
5   *
6   * Developed by Daynix Computing LTD (http://www.daynix.com)
7   *
8   * Authors:
9   * Dmitry Fleytman <dmitry@daynix.com>
10   * Tamir Shomer <tamirs@daynix.com>
11   * Yan Vugenfirer <yan@daynix.com>
12   *
13   * This work is licensed under the terms of the GNU GPL, version 2.
14   * See the COPYING file in the top-level directory.
15   *
16   */
17  
18  #include "qemu/osdep.h"
19  #include "hw/hw.h"
20  #include "hw/pci/pci.h"
21  #include "hw/qdev-properties.h"
22  #include "net/tap.h"
23  #include "net/checksum.h"
24  #include "sysemu/sysemu.h"
25  #include "qemu/bswap.h"
26  #include "qemu/log.h"
27  #include "qemu/module.h"
28  #include "hw/pci/msix.h"
29  #include "hw/pci/msi.h"
30  #include "migration/register.h"
31  #include "migration/vmstate.h"
32  
33  #include "vmxnet3.h"
34  #include "vmxnet3_defs.h"
35  #include "vmxnet_debug.h"
36  #include "vmware_utils.h"
37  #include "net_tx_pkt.h"
38  #include "net_rx_pkt.h"
39  #include "qom/object.h"
40  
41  #define PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION 0x1
42  #define VMXNET3_MSIX_BAR_SIZE 0x2000
43  
44  /* Compatibility flags for migration */
45  #define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT 0
46  #define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS \
47      (1 << VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT)
48  #define VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT 1
49  #define VMXNET3_COMPAT_FLAG_DISABLE_PCIE \
50      (1 << VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT)
51  
52  #define VMXNET3_EXP_EP_OFFSET (0x48)
53  #define VMXNET3_MSI_OFFSET(s) \
54      ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x50 : 0x84)
55  #define VMXNET3_MSIX_OFFSET(s) \
56      ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0 : 0x9c)
57  #define VMXNET3_DSN_OFFSET     (0x100)
58  
59  #define VMXNET3_BAR0_IDX      (0)
60  #define VMXNET3_BAR1_IDX      (1)
61  #define VMXNET3_MSIX_BAR_IDX  (2)
62  
63  #define VMXNET3_OFF_MSIX_TABLE (0x000)
64  #define VMXNET3_OFF_MSIX_PBA(s) \
65      ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x800 : 0x1000)
66  
67  /* Link speed in Mbps should be shifted by 16 */
68  #define VMXNET3_LINK_SPEED      (1000 << 16)
69  
70  /* Link status: 1 - up, 0 - down. */
71  #define VMXNET3_LINK_STATUS_UP  0x1
72  
73  /* Least significant bit should be set for revision and version */
74  #define VMXNET3_UPT_REVISION      0x1
75  #define VMXNET3_DEVICE_REVISION   0x1
76  
77  /* Number of interrupt vectors for non-MSIx modes */
78  #define VMXNET3_MAX_NMSIX_INTRS   (1)
79  
80  /* Macros for rings descriptors access */
81  #define VMXNET3_READ_TX_QUEUE_DESCR8(_d, dpa, field) \
82      (vmw_shmem_ld8(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
83  
84  #define VMXNET3_WRITE_TX_QUEUE_DESCR8(_d, dpa, field, value) \
85      (vmw_shmem_st8(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field, value)))
86  
87  #define VMXNET3_READ_TX_QUEUE_DESCR32(_d, dpa, field) \
88      (vmw_shmem_ld32(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
89  
90  #define VMXNET3_WRITE_TX_QUEUE_DESCR32(_d, dpa, field, value) \
91      (vmw_shmem_st32(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field), value))
92  
93  #define VMXNET3_READ_TX_QUEUE_DESCR64(_d, dpa, field) \
94      (vmw_shmem_ld64(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
95  
96  #define VMXNET3_WRITE_TX_QUEUE_DESCR64(_d, dpa, field, value) \
97      (vmw_shmem_st64(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field), value))
98  
99  #define VMXNET3_READ_RX_QUEUE_DESCR64(_d, dpa, field) \
100      (vmw_shmem_ld64(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field)))
101  
102  #define VMXNET3_READ_RX_QUEUE_DESCR32(_d, dpa, field) \
103      (vmw_shmem_ld32(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field)))
104  
105  #define VMXNET3_WRITE_RX_QUEUE_DESCR64(_d, dpa, field, value) \
106      (vmw_shmem_st64(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field), value))
107  
108  #define VMXNET3_WRITE_RX_QUEUE_DESCR8(_d, dpa, field, value) \
109      (vmw_shmem_st8(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field), value))
110  
111  /* Macros for guest driver shared area access */
112  #define VMXNET3_READ_DRV_SHARED64(_d, shpa, field) \
113      (vmw_shmem_ld64(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
114  
115  #define VMXNET3_READ_DRV_SHARED32(_d, shpa, field) \
116      (vmw_shmem_ld32(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
117  
118  #define VMXNET3_WRITE_DRV_SHARED32(_d, shpa, field, val) \
119      (vmw_shmem_st32(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field), val))
120  
121  #define VMXNET3_READ_DRV_SHARED16(_d, shpa, field) \
122      (vmw_shmem_ld16(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
123  
124  #define VMXNET3_READ_DRV_SHARED8(_d, shpa, field) \
125      (vmw_shmem_ld8(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
126  
127  #define VMXNET3_READ_DRV_SHARED(_d, shpa, field, b, l) \
128      (vmw_shmem_read(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field), b, l))
129  
130  #define VMXNET_FLAG_IS_SET(field, flag) (((field) & (flag)) == (flag))
131  
132  struct VMXNET3Class {
133      PCIDeviceClass parent_class;
134      DeviceRealize parent_dc_realize;
135  };
136  typedef struct VMXNET3Class VMXNET3Class;
137  
138  DECLARE_CLASS_CHECKERS(VMXNET3Class, VMXNET3_DEVICE,
139                         TYPE_VMXNET3)
140  
141  static inline void vmxnet3_ring_init(PCIDevice *d,
142                                       Vmxnet3Ring *ring,
143                                       hwaddr pa,
144                                       uint32_t size,
145                                       uint32_t cell_size,
146                                       bool zero_region)
147  {
148      ring->pa = pa;
149      ring->size = size;
150      ring->cell_size = cell_size;
151      ring->gen = VMXNET3_INIT_GEN;
152      ring->next = 0;
153  
154      if (zero_region) {
155          vmw_shmem_set(d, pa, 0, size * cell_size);
156      }
157  }
158  
159  #define VMXNET3_RING_DUMP(macro, ring_name, ridx, r)                         \
160      macro("%s#%d: base %" PRIx64 " size %u cell_size %u gen %d next %u",  \
161            (ring_name), (ridx),                                               \
162            (r)->pa, (r)->size, (r)->cell_size, (r)->gen, (r)->next)
163  
164  static inline void vmxnet3_ring_inc(Vmxnet3Ring *ring)
165  {
166      if (++ring->next >= ring->size) {
167          ring->next = 0;
168          ring->gen ^= 1;
169      }
170  }
171  
172  static inline void vmxnet3_ring_dec(Vmxnet3Ring *ring)
173  {
174      if (ring->next-- == 0) {
175          ring->next = ring->size - 1;
176          ring->gen ^= 1;
177      }
178  }
179  
180  static inline hwaddr vmxnet3_ring_curr_cell_pa(Vmxnet3Ring *ring)
181  {
182      return ring->pa + ring->next * ring->cell_size;
183  }
184  
185  static inline void vmxnet3_ring_read_curr_cell(PCIDevice *d, Vmxnet3Ring *ring,
186                                                 void *buff)
187  {
188      vmw_shmem_read(d, vmxnet3_ring_curr_cell_pa(ring), buff, ring->cell_size);
189  }
190  
191  static inline void vmxnet3_ring_write_curr_cell(PCIDevice *d, Vmxnet3Ring *ring,
192                                                  void *buff)
193  {
194      vmw_shmem_write(d, vmxnet3_ring_curr_cell_pa(ring), buff, ring->cell_size);
195  }
196  
197  static inline size_t vmxnet3_ring_curr_cell_idx(Vmxnet3Ring *ring)
198  {
199      return ring->next;
200  }
201  
202  static inline uint8_t vmxnet3_ring_curr_gen(Vmxnet3Ring *ring)
203  {
204      return ring->gen;
205  }
206  
207  /* Debug trace-related functions */
208  static inline void
209  vmxnet3_dump_tx_descr(struct Vmxnet3_TxDesc *descr)
210  {
211      VMW_PKPRN("TX DESCR: "
212                "addr %" PRIx64 ", len: %d, gen: %d, rsvd: %d, "
213                "dtype: %d, ext1: %d, msscof: %d, hlen: %d, om: %d, "
214                "eop: %d, cq: %d, ext2: %d, ti: %d, tci: %d",
215                descr->addr, descr->len, descr->gen, descr->rsvd,
216                descr->dtype, descr->ext1, descr->msscof, descr->hlen, descr->om,
217                descr->eop, descr->cq, descr->ext2, descr->ti, descr->tci);
218  }
219  
220  static inline void
221  vmxnet3_dump_virt_hdr(struct virtio_net_hdr *vhdr)
222  {
223      VMW_PKPRN("VHDR: flags 0x%x, gso_type: 0x%x, hdr_len: %d, gso_size: %d, "
224                "csum_start: %d, csum_offset: %d",
225                vhdr->flags, vhdr->gso_type, vhdr->hdr_len, vhdr->gso_size,
226                vhdr->csum_start, vhdr->csum_offset);
227  }
228  
229  static inline void
230  vmxnet3_dump_rx_descr(struct Vmxnet3_RxDesc *descr)
231  {
232      VMW_PKPRN("RX DESCR: addr %" PRIx64 ", len: %d, gen: %d, rsvd: %d, "
233                "dtype: %d, ext1: %d, btype: %d",
234                descr->addr, descr->len, descr->gen,
235                descr->rsvd, descr->dtype, descr->ext1, descr->btype);
236  }
237  
238  /* Interrupt management */
239  
240  /*
241   * This function returns sign whether interrupt line is in asserted state
242   * This depends on the type of interrupt used. For INTX interrupt line will
243   * be asserted until explicit deassertion, for MSI(X) interrupt line will
244   * be deasserted automatically due to notification semantics of the MSI(X)
245   * interrupts
246   */
247  static bool _vmxnet3_assert_interrupt_line(VMXNET3State *s, uint32_t int_idx)
248  {
249      PCIDevice *d = PCI_DEVICE(s);
250  
251      if (s->msix_used && msix_enabled(d)) {
252          VMW_IRPRN("Sending MSI-X notification for vector %u", int_idx);
253          msix_notify(d, int_idx);
254          return false;
255      }
256      if (msi_enabled(d)) {
257          VMW_IRPRN("Sending MSI notification for vector %u", int_idx);
258          msi_notify(d, int_idx);
259          return false;
260      }
261  
262      VMW_IRPRN("Asserting line for interrupt %u", int_idx);
263      pci_irq_assert(d);
264      return true;
265  }
266  
267  static void _vmxnet3_deassert_interrupt_line(VMXNET3State *s, int lidx)
268  {
269      PCIDevice *d = PCI_DEVICE(s);
270  
271      /*
272       * This function should never be called for MSI(X) interrupts
273       * because deassertion never required for message interrupts
274       */
275      assert(!s->msix_used || !msix_enabled(d));
276      /*
277       * This function should never be called for MSI(X) interrupts
278       * because deassertion never required for message interrupts
279       */
280      assert(!msi_enabled(d));
281  
282      VMW_IRPRN("Deasserting line for interrupt %u", lidx);
283      pci_irq_deassert(d);
284  }
285  
286  static void vmxnet3_update_interrupt_line_state(VMXNET3State *s, int lidx)
287  {
288      if (!s->interrupt_states[lidx].is_pending &&
289         s->interrupt_states[lidx].is_asserted) {
290          VMW_IRPRN("New interrupt line state for index %d is DOWN", lidx);
291          _vmxnet3_deassert_interrupt_line(s, lidx);
292          s->interrupt_states[lidx].is_asserted = false;
293          return;
294      }
295  
296      if (s->interrupt_states[lidx].is_pending &&
297         !s->interrupt_states[lidx].is_masked &&
298         !s->interrupt_states[lidx].is_asserted) {
299          VMW_IRPRN("New interrupt line state for index %d is UP", lidx);
300          s->interrupt_states[lidx].is_asserted =
301              _vmxnet3_assert_interrupt_line(s, lidx);
302          s->interrupt_states[lidx].is_pending = false;
303          return;
304      }
305  }
306  
307  static void vmxnet3_trigger_interrupt(VMXNET3State *s, int lidx)
308  {
309      PCIDevice *d = PCI_DEVICE(s);
310      s->interrupt_states[lidx].is_pending = true;
311      vmxnet3_update_interrupt_line_state(s, lidx);
312  
313      if (s->msix_used && msix_enabled(d) && s->auto_int_masking) {
314          goto do_automask;
315      }
316  
317      if (msi_enabled(d) && s->auto_int_masking) {
318          goto do_automask;
319      }
320  
321      return;
322  
323  do_automask:
324      s->interrupt_states[lidx].is_masked = true;
325      vmxnet3_update_interrupt_line_state(s, lidx);
326  }
327  
328  static bool vmxnet3_interrupt_asserted(VMXNET3State *s, int lidx)
329  {
330      return s->interrupt_states[lidx].is_asserted;
331  }
332  
333  static void vmxnet3_clear_interrupt(VMXNET3State *s, int int_idx)
334  {
335      s->interrupt_states[int_idx].is_pending = false;
336      if (s->auto_int_masking) {
337          s->interrupt_states[int_idx].is_masked = true;
338      }
339      vmxnet3_update_interrupt_line_state(s, int_idx);
340  }
341  
342  static void
343  vmxnet3_on_interrupt_mask_changed(VMXNET3State *s, int lidx, bool is_masked)
344  {
345      s->interrupt_states[lidx].is_masked = is_masked;
346      vmxnet3_update_interrupt_line_state(s, lidx);
347  }
348  
349  static bool vmxnet3_verify_driver_magic(PCIDevice *d, hwaddr dshmem)
350  {
351      return (VMXNET3_READ_DRV_SHARED32(d, dshmem, magic) == VMXNET3_REV1_MAGIC);
352  }
353  
354  #define VMXNET3_GET_BYTE(x, byte_num) (((x) >> (byte_num)*8) & 0xFF)
355  #define VMXNET3_MAKE_BYTE(byte_num, val) \
356      (((uint32_t)((val) & 0xFF)) << (byte_num)*8)
357  
358  static void vmxnet3_set_variable_mac(VMXNET3State *s, uint32_t h, uint32_t l)
359  {
360      s->conf.macaddr.a[0] = VMXNET3_GET_BYTE(l,  0);
361      s->conf.macaddr.a[1] = VMXNET3_GET_BYTE(l,  1);
362      s->conf.macaddr.a[2] = VMXNET3_GET_BYTE(l,  2);
363      s->conf.macaddr.a[3] = VMXNET3_GET_BYTE(l,  3);
364      s->conf.macaddr.a[4] = VMXNET3_GET_BYTE(h, 0);
365      s->conf.macaddr.a[5] = VMXNET3_GET_BYTE(h, 1);
366  
367      VMW_CFPRN("Variable MAC: " MAC_FMT, MAC_ARG(s->conf.macaddr.a));
368  
369      qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
370  }
371  
372  static uint64_t vmxnet3_get_mac_low(MACAddr *addr)
373  {
374      return VMXNET3_MAKE_BYTE(0, addr->a[0]) |
375             VMXNET3_MAKE_BYTE(1, addr->a[1]) |
376             VMXNET3_MAKE_BYTE(2, addr->a[2]) |
377             VMXNET3_MAKE_BYTE(3, addr->a[3]);
378  }
379  
380  static uint64_t vmxnet3_get_mac_high(MACAddr *addr)
381  {
382      return VMXNET3_MAKE_BYTE(0, addr->a[4]) |
383             VMXNET3_MAKE_BYTE(1, addr->a[5]);
384  }
385  
386  static void
387  vmxnet3_inc_tx_consumption_counter(VMXNET3State *s, int qidx)
388  {
389      vmxnet3_ring_inc(&s->txq_descr[qidx].tx_ring);
390  }
391  
392  static inline void
393  vmxnet3_inc_rx_consumption_counter(VMXNET3State *s, int qidx, int ridx)
394  {
395      vmxnet3_ring_inc(&s->rxq_descr[qidx].rx_ring[ridx]);
396  }
397  
398  static inline void
399  vmxnet3_inc_tx_completion_counter(VMXNET3State *s, int qidx)
400  {
401      vmxnet3_ring_inc(&s->txq_descr[qidx].comp_ring);
402  }
403  
404  static void
405  vmxnet3_inc_rx_completion_counter(VMXNET3State *s, int qidx)
406  {
407      vmxnet3_ring_inc(&s->rxq_descr[qidx].comp_ring);
408  }
409  
410  static void
411  vmxnet3_dec_rx_completion_counter(VMXNET3State *s, int qidx)
412  {
413      vmxnet3_ring_dec(&s->rxq_descr[qidx].comp_ring);
414  }
415  
416  static void vmxnet3_complete_packet(VMXNET3State *s, int qidx, uint32_t tx_ridx)
417  {
418      struct Vmxnet3_TxCompDesc txcq_descr;
419      PCIDevice *d = PCI_DEVICE(s);
420  
421      VMXNET3_RING_DUMP(VMW_RIPRN, "TXC", qidx, &s->txq_descr[qidx].comp_ring);
422  
423      memset(&txcq_descr, 0, sizeof(txcq_descr));
424      txcq_descr.txdIdx = tx_ridx;
425      txcq_descr.gen = vmxnet3_ring_curr_gen(&s->txq_descr[qidx].comp_ring);
426      txcq_descr.val1 = cpu_to_le32(txcq_descr.val1);
427      txcq_descr.val2 = cpu_to_le32(txcq_descr.val2);
428      vmxnet3_ring_write_curr_cell(d, &s->txq_descr[qidx].comp_ring, &txcq_descr);
429  
430      /* Flush changes in TX descriptor before changing the counter value */
431      smp_wmb();
432  
433      vmxnet3_inc_tx_completion_counter(s, qidx);
434      vmxnet3_trigger_interrupt(s, s->txq_descr[qidx].intr_idx);
435  }
436  
437  static bool
438  vmxnet3_setup_tx_offloads(VMXNET3State *s)
439  {
440      switch (s->offload_mode) {
441      case VMXNET3_OM_NONE:
442          return net_tx_pkt_build_vheader(s->tx_pkt, false, false, 0);
443  
444      case VMXNET3_OM_CSUM:
445          VMW_PKPRN("L4 CSO requested\n");
446          return net_tx_pkt_build_vheader(s->tx_pkt, false, true, 0);
447  
448      case VMXNET3_OM_TSO:
449          VMW_PKPRN("GSO offload requested.");
450          if (!net_tx_pkt_build_vheader(s->tx_pkt, true, true,
451              s->cso_or_gso_size)) {
452              return false;
453          }
454          net_tx_pkt_update_ip_checksums(s->tx_pkt);
455          break;
456  
457      default:
458          g_assert_not_reached();
459          return false;
460      }
461  
462      return true;
463  }
464  
465  static void
466  vmxnet3_tx_retrieve_metadata(VMXNET3State *s,
467                               const struct Vmxnet3_TxDesc *txd)
468  {
469      s->offload_mode = txd->om;
470      s->cso_or_gso_size = txd->msscof;
471      s->tci = txd->tci;
472      s->needs_vlan = txd->ti;
473  }
474  
475  typedef enum {
476      VMXNET3_PKT_STATUS_OK,
477      VMXNET3_PKT_STATUS_ERROR,
478      VMXNET3_PKT_STATUS_DISCARD,/* only for tx */
479      VMXNET3_PKT_STATUS_OUT_OF_BUF /* only for rx */
480  } Vmxnet3PktStatus;
481  
482  static void
483  vmxnet3_on_tx_done_update_stats(VMXNET3State *s, int qidx,
484      Vmxnet3PktStatus status)
485  {
486      size_t tot_len = net_tx_pkt_get_total_len(s->tx_pkt);
487      struct UPT1_TxStats *stats = &s->txq_descr[qidx].txq_stats;
488  
489      switch (status) {
490      case VMXNET3_PKT_STATUS_OK:
491          switch (net_tx_pkt_get_packet_type(s->tx_pkt)) {
492          case ETH_PKT_BCAST:
493              stats->bcastPktsTxOK++;
494              stats->bcastBytesTxOK += tot_len;
495              break;
496          case ETH_PKT_MCAST:
497              stats->mcastPktsTxOK++;
498              stats->mcastBytesTxOK += tot_len;
499              break;
500          case ETH_PKT_UCAST:
501              stats->ucastPktsTxOK++;
502              stats->ucastBytesTxOK += tot_len;
503              break;
504          default:
505              g_assert_not_reached();
506          }
507  
508          if (s->offload_mode == VMXNET3_OM_TSO) {
509              /*
510               * According to VMWARE headers this statistic is a number
511               * of packets after segmentation but since we don't have
512               * this information in QEMU model, the best we can do is to
513               * provide number of non-segmented packets
514               */
515              stats->TSOPktsTxOK++;
516              stats->TSOBytesTxOK += tot_len;
517          }
518          break;
519  
520      case VMXNET3_PKT_STATUS_DISCARD:
521          stats->pktsTxDiscard++;
522          break;
523  
524      case VMXNET3_PKT_STATUS_ERROR:
525          stats->pktsTxError++;
526          break;
527  
528      default:
529          g_assert_not_reached();
530      }
531  }
532  
533  static void
534  vmxnet3_on_rx_done_update_stats(VMXNET3State *s,
535                                  int qidx,
536                                  Vmxnet3PktStatus status)
537  {
538      struct UPT1_RxStats *stats = &s->rxq_descr[qidx].rxq_stats;
539      size_t tot_len = net_rx_pkt_get_total_len(s->rx_pkt);
540  
541      switch (status) {
542      case VMXNET3_PKT_STATUS_OUT_OF_BUF:
543          stats->pktsRxOutOfBuf++;
544          break;
545  
546      case VMXNET3_PKT_STATUS_ERROR:
547          stats->pktsRxError++;
548          break;
549      case VMXNET3_PKT_STATUS_OK:
550          switch (net_rx_pkt_get_packet_type(s->rx_pkt)) {
551          case ETH_PKT_BCAST:
552              stats->bcastPktsRxOK++;
553              stats->bcastBytesRxOK += tot_len;
554              break;
555          case ETH_PKT_MCAST:
556              stats->mcastPktsRxOK++;
557              stats->mcastBytesRxOK += tot_len;
558              break;
559          case ETH_PKT_UCAST:
560              stats->ucastPktsRxOK++;
561              stats->ucastBytesRxOK += tot_len;
562              break;
563          default:
564              g_assert_not_reached();
565          }
566  
567          if (tot_len > s->mtu) {
568              stats->LROPktsRxOK++;
569              stats->LROBytesRxOK += tot_len;
570          }
571          break;
572      default:
573          g_assert_not_reached();
574      }
575  }
576  
577  static inline void
578  vmxnet3_ring_read_curr_txdesc(PCIDevice *pcidev, Vmxnet3Ring *ring,
579                                struct Vmxnet3_TxDesc *txd)
580  {
581      vmxnet3_ring_read_curr_cell(pcidev, ring, txd);
582      txd->addr = le64_to_cpu(txd->addr);
583      txd->val1 = le32_to_cpu(txd->val1);
584      txd->val2 = le32_to_cpu(txd->val2);
585  }
586  
587  static inline bool
588  vmxnet3_pop_next_tx_descr(VMXNET3State *s,
589                            int qidx,
590                            struct Vmxnet3_TxDesc *txd,
591                            uint32_t *descr_idx)
592  {
593      Vmxnet3Ring *ring = &s->txq_descr[qidx].tx_ring;
594      PCIDevice *d = PCI_DEVICE(s);
595  
596      vmxnet3_ring_read_curr_txdesc(d, ring, txd);
597      if (txd->gen == vmxnet3_ring_curr_gen(ring)) {
598          /* Only read after generation field verification */
599          smp_rmb();
600          /* Re-read to be sure we got the latest version */
601          vmxnet3_ring_read_curr_txdesc(d, ring, txd);
602          VMXNET3_RING_DUMP(VMW_RIPRN, "TX", qidx, ring);
603          *descr_idx = vmxnet3_ring_curr_cell_idx(ring);
604          vmxnet3_inc_tx_consumption_counter(s, qidx);
605          return true;
606      }
607  
608      return false;
609  }
610  
611  static bool
612  vmxnet3_send_packet(VMXNET3State *s, uint32_t qidx)
613  {
614      Vmxnet3PktStatus status = VMXNET3_PKT_STATUS_OK;
615  
616      if (!vmxnet3_setup_tx_offloads(s)) {
617          status = VMXNET3_PKT_STATUS_ERROR;
618          goto func_exit;
619      }
620  
621      /* debug prints */
622      vmxnet3_dump_virt_hdr(net_tx_pkt_get_vhdr(s->tx_pkt));
623      net_tx_pkt_dump(s->tx_pkt);
624  
625      if (!net_tx_pkt_send(s->tx_pkt, qemu_get_queue(s->nic))) {
626          status = VMXNET3_PKT_STATUS_DISCARD;
627          goto func_exit;
628      }
629  
630  func_exit:
631      vmxnet3_on_tx_done_update_stats(s, qidx, status);
632      return (status == VMXNET3_PKT_STATUS_OK);
633  }
634  
635  static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx)
636  {
637      struct Vmxnet3_TxDesc txd;
638      uint32_t txd_idx;
639      uint32_t data_len;
640      hwaddr data_pa;
641  
642      for (;;) {
643          if (!vmxnet3_pop_next_tx_descr(s, qidx, &txd, &txd_idx)) {
644              break;
645          }
646  
647          vmxnet3_dump_tx_descr(&txd);
648  
649          if (!s->skip_current_tx_pkt) {
650              data_len = (txd.len > 0) ? txd.len : VMXNET3_MAX_TX_BUF_SIZE;
651              data_pa = txd.addr;
652  
653              if (!net_tx_pkt_add_raw_fragment_pci(s->tx_pkt, PCI_DEVICE(s),
654                                                   data_pa, data_len)) {
655                  s->skip_current_tx_pkt = true;
656              }
657          }
658  
659          if (s->tx_sop) {
660              vmxnet3_tx_retrieve_metadata(s, &txd);
661              s->tx_sop = false;
662          }
663  
664          if (txd.eop) {
665              if (!s->skip_current_tx_pkt && net_tx_pkt_parse(s->tx_pkt)) {
666                  if (s->needs_vlan) {
667                      net_tx_pkt_setup_vlan_header(s->tx_pkt, s->tci);
668                  }
669  
670                  vmxnet3_send_packet(s, qidx);
671              } else {
672                  vmxnet3_on_tx_done_update_stats(s, qidx,
673                                                  VMXNET3_PKT_STATUS_ERROR);
674              }
675  
676              vmxnet3_complete_packet(s, qidx, txd_idx);
677              s->tx_sop = true;
678              s->skip_current_tx_pkt = false;
679              net_tx_pkt_reset(s->tx_pkt,
680                               net_tx_pkt_unmap_frag_pci, PCI_DEVICE(s));
681          }
682      }
683  
684      net_tx_pkt_reset(s->tx_pkt, net_tx_pkt_unmap_frag_pci, PCI_DEVICE(s));
685  }
686  
687  static inline void
688  vmxnet3_read_next_rx_descr(VMXNET3State *s, int qidx, int ridx,
689                             struct Vmxnet3_RxDesc *dbuf, uint32_t *didx)
690  {
691      PCIDevice *d = PCI_DEVICE(s);
692  
693      Vmxnet3Ring *ring = &s->rxq_descr[qidx].rx_ring[ridx];
694      *didx = vmxnet3_ring_curr_cell_idx(ring);
695      vmxnet3_ring_read_curr_cell(d, ring, dbuf);
696      dbuf->addr = le64_to_cpu(dbuf->addr);
697      dbuf->val1 = le32_to_cpu(dbuf->val1);
698      dbuf->ext1 = le32_to_cpu(dbuf->ext1);
699  }
700  
701  static inline uint8_t
702  vmxnet3_get_rx_ring_gen(VMXNET3State *s, int qidx, int ridx)
703  {
704      return s->rxq_descr[qidx].rx_ring[ridx].gen;
705  }
706  
707  static inline hwaddr
708  vmxnet3_pop_rxc_descr(VMXNET3State *s, int qidx, uint32_t *descr_gen)
709  {
710      uint8_t ring_gen;
711      struct Vmxnet3_RxCompDesc rxcd;
712  
713      hwaddr daddr =
714          vmxnet3_ring_curr_cell_pa(&s->rxq_descr[qidx].comp_ring);
715  
716      pci_dma_read(PCI_DEVICE(s),
717                   daddr, &rxcd, sizeof(struct Vmxnet3_RxCompDesc));
718      rxcd.val1 = le32_to_cpu(rxcd.val1);
719      rxcd.val2 = le32_to_cpu(rxcd.val2);
720      rxcd.val3 = le32_to_cpu(rxcd.val3);
721      ring_gen = vmxnet3_ring_curr_gen(&s->rxq_descr[qidx].comp_ring);
722  
723      if (rxcd.gen != ring_gen) {
724          *descr_gen = ring_gen;
725          vmxnet3_inc_rx_completion_counter(s, qidx);
726          return daddr;
727      }
728  
729      return 0;
730  }
731  
732  static inline void
733  vmxnet3_revert_rxc_descr(VMXNET3State *s, int qidx)
734  {
735      vmxnet3_dec_rx_completion_counter(s, qidx);
736  }
737  
738  #define RXQ_IDX      (0)
739  #define RX_HEAD_BODY_RING (0)
740  #define RX_BODY_ONLY_RING (1)
741  
742  static bool
743  vmxnet3_get_next_head_rx_descr(VMXNET3State *s,
744                                 struct Vmxnet3_RxDesc *descr_buf,
745                                 uint32_t *descr_idx,
746                                 uint32_t *ridx)
747  {
748      for (;;) {
749          uint32_t ring_gen;
750          vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING,
751                                     descr_buf, descr_idx);
752  
753          /* If no more free descriptors - return */
754          ring_gen = vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_HEAD_BODY_RING);
755          if (descr_buf->gen != ring_gen) {
756              return false;
757          }
758  
759          /* Only read after generation field verification */
760          smp_rmb();
761          /* Re-read to be sure we got the latest version */
762          vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING,
763                                     descr_buf, descr_idx);
764  
765          /* Mark current descriptor as used/skipped */
766          vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_HEAD_BODY_RING);
767  
768          /* If this is what we are looking for - return */
769          if (descr_buf->btype == VMXNET3_RXD_BTYPE_HEAD) {
770              *ridx = RX_HEAD_BODY_RING;
771              return true;
772          }
773      }
774  }
775  
776  static bool
777  vmxnet3_get_next_body_rx_descr(VMXNET3State *s,
778                                 struct Vmxnet3_RxDesc *d,
779                                 uint32_t *didx,
780                                 uint32_t *ridx)
781  {
782      vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, d, didx);
783  
784      /* Try to find corresponding descriptor in head/body ring */
785      if (d->gen == vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_HEAD_BODY_RING)) {
786          /* Only read after generation field verification */
787          smp_rmb();
788          /* Re-read to be sure we got the latest version */
789          vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, d, didx);
790          if (d->btype == VMXNET3_RXD_BTYPE_BODY) {
791              vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_HEAD_BODY_RING);
792              *ridx = RX_HEAD_BODY_RING;
793              return true;
794          }
795      }
796  
797      /*
798       * If there is no free descriptors on head/body ring or next free
799       * descriptor is a head descriptor switch to body only ring
800       */
801      vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_BODY_ONLY_RING, d, didx);
802  
803      /* If no more free descriptors - return */
804      if (d->gen == vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_BODY_ONLY_RING)) {
805          /* Only read after generation field verification */
806          smp_rmb();
807          /* Re-read to be sure we got the latest version */
808          vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_BODY_ONLY_RING, d, didx);
809          assert(d->btype == VMXNET3_RXD_BTYPE_BODY);
810          *ridx = RX_BODY_ONLY_RING;
811          vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_BODY_ONLY_RING);
812          return true;
813      }
814  
815      return false;
816  }
817  
818  static inline bool
819  vmxnet3_get_next_rx_descr(VMXNET3State *s, bool is_head,
820                            struct Vmxnet3_RxDesc *descr_buf,
821                            uint32_t *descr_idx,
822                            uint32_t *ridx)
823  {
824      if (is_head || !s->rx_packets_compound) {
825          return vmxnet3_get_next_head_rx_descr(s, descr_buf, descr_idx, ridx);
826      } else {
827          return vmxnet3_get_next_body_rx_descr(s, descr_buf, descr_idx, ridx);
828      }
829  }
830  
831  /* In case packet was csum offloaded (either NEEDS_CSUM or DATA_VALID),
832   * the implementation always passes an RxCompDesc with a "Checksum
833   * calculated and found correct" to the OS (cnc=0 and tuc=1, see
834   * vmxnet3_rx_update_descr). This emulates the observed ESXi behavior.
835   *
836   * Therefore, if packet has the NEEDS_CSUM set, we must calculate
837   * and place a fully computed checksum into the tcp/udp header.
838   * Otherwise, the OS driver will receive a checksum-correct indication
839   * (CHECKSUM_UNNECESSARY), but with the actual tcp/udp checksum field
840   * having just the pseudo header csum value.
841   *
842   * While this is not a problem if packet is destined for local delivery,
843   * in the case the host OS performs forwarding, it will forward an
844   * incorrectly checksummed packet.
845   */
846  static void vmxnet3_rx_need_csum_calculate(struct NetRxPkt *pkt,
847                                             const void *pkt_data,
848                                             size_t pkt_len)
849  {
850      struct virtio_net_hdr *vhdr;
851      bool hasip4, hasip6;
852      EthL4HdrProto l4hdr_proto;
853      uint8_t *data;
854      int len;
855  
856      vhdr = net_rx_pkt_get_vhdr(pkt);
857      if (!VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
858          return;
859      }
860  
861      net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
862      if (!(hasip4 || hasip6) ||
863          (l4hdr_proto != ETH_L4_HDR_PROTO_TCP &&
864           l4hdr_proto != ETH_L4_HDR_PROTO_UDP)) {
865          return;
866      }
867  
868      vmxnet3_dump_virt_hdr(vhdr);
869  
870      /* Validate packet len: csum_start + scum_offset + length of csum field */
871      if (pkt_len < (vhdr->csum_start + vhdr->csum_offset + 2)) {
872          VMW_PKPRN("packet len:%zu < csum_start(%d) + csum_offset(%d) + 2, "
873                    "cannot calculate checksum",
874                    pkt_len, vhdr->csum_start, vhdr->csum_offset);
875          return;
876      }
877  
878      data = (uint8_t *)pkt_data + vhdr->csum_start;
879      len = pkt_len - vhdr->csum_start;
880      /* Put the checksum obtained into the packet */
881      stw_be_p(data + vhdr->csum_offset,
882               net_checksum_finish_nozero(net_checksum_add(len, data)));
883  
884      vhdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
885      vhdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
886  }
887  
888  static void vmxnet3_rx_update_descr(struct NetRxPkt *pkt,
889      struct Vmxnet3_RxCompDesc *rxcd)
890  {
891      int csum_ok, is_gso;
892      bool hasip4, hasip6;
893      EthL4HdrProto l4hdr_proto;
894      struct virtio_net_hdr *vhdr;
895      uint8_t offload_type;
896  
897      if (net_rx_pkt_is_vlan_stripped(pkt)) {
898          rxcd->ts = 1;
899          rxcd->tci = net_rx_pkt_get_vlan_tag(pkt);
900      }
901  
902      vhdr = net_rx_pkt_get_vhdr(pkt);
903      /*
904       * Checksum is valid when lower level tell so or when lower level
905       * requires checksum offload telling that packet produced/bridged
906       * locally and did travel over network after last checksum calculation
907       * or production
908       */
909      csum_ok = VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_DATA_VALID) ||
910                VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM);
911  
912      offload_type = vhdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
913      is_gso = (offload_type != VIRTIO_NET_HDR_GSO_NONE) ? 1 : 0;
914  
915      if (!csum_ok && !is_gso) {
916          goto nocsum;
917      }
918  
919      net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
920      if ((l4hdr_proto != ETH_L4_HDR_PROTO_TCP &&
921           l4hdr_proto != ETH_L4_HDR_PROTO_UDP) ||
922          (!hasip4 && !hasip6)) {
923          goto nocsum;
924      }
925  
926      rxcd->cnc = 0;
927      rxcd->v4 = hasip4 ? 1 : 0;
928      rxcd->v6 = hasip6 ? 1 : 0;
929      rxcd->tcp = l4hdr_proto == ETH_L4_HDR_PROTO_TCP;
930      rxcd->udp = l4hdr_proto == ETH_L4_HDR_PROTO_UDP;
931      rxcd->fcs = rxcd->tuc = rxcd->ipc = 1;
932      return;
933  
934  nocsum:
935      rxcd->cnc = 1;
936      return;
937  }
938  
939  static void
940  vmxnet3_pci_dma_writev(PCIDevice *pci_dev,
941                         const struct iovec *iov,
942                         size_t start_iov_off,
943                         hwaddr target_addr,
944                         size_t bytes_to_copy)
945  {
946      size_t curr_off = 0;
947      size_t copied = 0;
948  
949      while (bytes_to_copy) {
950          if (start_iov_off < (curr_off + iov->iov_len)) {
951              size_t chunk_len =
952                  MIN((curr_off + iov->iov_len) - start_iov_off, bytes_to_copy);
953  
954              pci_dma_write(pci_dev, target_addr + copied,
955                            iov->iov_base + start_iov_off - curr_off,
956                            chunk_len);
957  
958              copied += chunk_len;
959              start_iov_off += chunk_len;
960              curr_off = start_iov_off;
961              bytes_to_copy -= chunk_len;
962          } else {
963              curr_off += iov->iov_len;
964          }
965          iov++;
966      }
967  }
968  
969  static void
970  vmxnet3_pci_dma_write_rxcd(PCIDevice *pcidev, dma_addr_t pa,
971                             struct Vmxnet3_RxCompDesc *rxcd)
972  {
973      rxcd->val1 = cpu_to_le32(rxcd->val1);
974      rxcd->val2 = cpu_to_le32(rxcd->val2);
975      rxcd->val3 = cpu_to_le32(rxcd->val3);
976      pci_dma_write(pcidev, pa, rxcd, sizeof(*rxcd));
977  }
978  
979  static bool
980  vmxnet3_indicate_packet(VMXNET3State *s)
981  {
982      struct Vmxnet3_RxDesc rxd;
983      PCIDevice *d = PCI_DEVICE(s);
984      bool is_head = true;
985      uint32_t rxd_idx;
986      uint32_t rx_ridx = 0;
987  
988      struct Vmxnet3_RxCompDesc rxcd;
989      uint32_t new_rxcd_gen = VMXNET3_INIT_GEN;
990      hwaddr new_rxcd_pa = 0;
991      hwaddr ready_rxcd_pa = 0;
992      struct iovec *data = net_rx_pkt_get_iovec(s->rx_pkt);
993      size_t bytes_copied = 0;
994      size_t bytes_left = net_rx_pkt_get_total_len(s->rx_pkt);
995      uint16_t num_frags = 0;
996      size_t chunk_size;
997  
998      net_rx_pkt_dump(s->rx_pkt);
999  
1000      while (bytes_left > 0) {
1001  
1002          /* cannot add more frags to packet */
1003          if (num_frags == s->max_rx_frags) {
1004              break;
1005          }
1006  
1007          new_rxcd_pa = vmxnet3_pop_rxc_descr(s, RXQ_IDX, &new_rxcd_gen);
1008          if (!new_rxcd_pa) {
1009              break;
1010          }
1011  
1012          if (!vmxnet3_get_next_rx_descr(s, is_head, &rxd, &rxd_idx, &rx_ridx)) {
1013              break;
1014          }
1015  
1016          chunk_size = MIN(bytes_left, rxd.len);
1017          vmxnet3_pci_dma_writev(d, data, bytes_copied, rxd.addr, chunk_size);
1018          bytes_copied += chunk_size;
1019          bytes_left -= chunk_size;
1020  
1021          vmxnet3_dump_rx_descr(&rxd);
1022  
1023          if (ready_rxcd_pa != 0) {
1024              vmxnet3_pci_dma_write_rxcd(d, ready_rxcd_pa, &rxcd);
1025          }
1026  
1027          memset(&rxcd, 0, sizeof(struct Vmxnet3_RxCompDesc));
1028          rxcd.rxdIdx = rxd_idx;
1029          rxcd.len = chunk_size;
1030          rxcd.sop = is_head;
1031          rxcd.gen = new_rxcd_gen;
1032          rxcd.rqID = RXQ_IDX + rx_ridx * s->rxq_num;
1033  
1034          if (bytes_left == 0) {
1035              vmxnet3_rx_update_descr(s->rx_pkt, &rxcd);
1036          }
1037  
1038          VMW_RIPRN("RX Completion descriptor: rxRing: %lu rxIdx %lu len %lu "
1039                    "sop %d csum_correct %lu",
1040                    (unsigned long) rx_ridx,
1041                    (unsigned long) rxcd.rxdIdx,
1042                    (unsigned long) rxcd.len,
1043                    (int) rxcd.sop,
1044                    (unsigned long) rxcd.tuc);
1045  
1046          is_head = false;
1047          ready_rxcd_pa = new_rxcd_pa;
1048          new_rxcd_pa = 0;
1049          num_frags++;
1050      }
1051  
1052      if (ready_rxcd_pa != 0) {
1053          rxcd.eop = 1;
1054          rxcd.err = (bytes_left != 0);
1055  
1056          vmxnet3_pci_dma_write_rxcd(d, ready_rxcd_pa, &rxcd);
1057  
1058          /* Flush RX descriptor changes */
1059          smp_wmb();
1060      }
1061  
1062      if (new_rxcd_pa != 0) {
1063          vmxnet3_revert_rxc_descr(s, RXQ_IDX);
1064      }
1065  
1066      vmxnet3_trigger_interrupt(s, s->rxq_descr[RXQ_IDX].intr_idx);
1067  
1068      if (bytes_left == 0) {
1069          vmxnet3_on_rx_done_update_stats(s, RXQ_IDX, VMXNET3_PKT_STATUS_OK);
1070          return true;
1071      } else if (num_frags == s->max_rx_frags) {
1072          vmxnet3_on_rx_done_update_stats(s, RXQ_IDX, VMXNET3_PKT_STATUS_ERROR);
1073          return false;
1074      } else {
1075          vmxnet3_on_rx_done_update_stats(s, RXQ_IDX,
1076                                          VMXNET3_PKT_STATUS_OUT_OF_BUF);
1077          return false;
1078      }
1079  }
1080  
1081  static void
1082  vmxnet3_io_bar0_write(void *opaque, hwaddr addr,
1083                        uint64_t val, unsigned size)
1084  {
1085      VMXNET3State *s = opaque;
1086  
1087      if (!s->device_active) {
1088          return;
1089      }
1090  
1091      if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_TXPROD,
1092                          VMXNET3_DEVICE_MAX_TX_QUEUES, VMXNET3_REG_ALIGN)) {
1093          int tx_queue_idx =
1094              VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_TXPROD,
1095                                       VMXNET3_REG_ALIGN);
1096          if (tx_queue_idx <= s->txq_num) {
1097              vmxnet3_process_tx_queue(s, tx_queue_idx);
1098          } else {
1099              qemu_log_mask(LOG_GUEST_ERROR, "vmxnet3: Illegal TX queue %d/%d\n",
1100                            tx_queue_idx, s->txq_num);
1101          }
1102          return;
1103      }
1104  
1105      if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_IMR,
1106                          VMXNET3_MAX_INTRS, VMXNET3_REG_ALIGN)) {
1107          int l = VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_IMR,
1108                                           VMXNET3_REG_ALIGN);
1109  
1110          VMW_CBPRN("Interrupt mask for line %d written: 0x%" PRIx64, l, val);
1111  
1112          vmxnet3_on_interrupt_mask_changed(s, l, val);
1113          return;
1114      }
1115  
1116      if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_RXPROD,
1117                          VMXNET3_DEVICE_MAX_RX_QUEUES, VMXNET3_REG_ALIGN) ||
1118         VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_RXPROD2,
1119                          VMXNET3_DEVICE_MAX_RX_QUEUES, VMXNET3_REG_ALIGN)) {
1120          return;
1121      }
1122  
1123      VMW_WRPRN("BAR0 unknown write [%" PRIx64 "] = %" PRIx64 ", size %d",
1124                (uint64_t) addr, val, size);
1125  }
1126  
1127  static uint64_t
1128  vmxnet3_io_bar0_read(void *opaque, hwaddr addr, unsigned size)
1129  {
1130      VMXNET3State *s = opaque;
1131  
1132      if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_IMR,
1133                          VMXNET3_MAX_INTRS, VMXNET3_REG_ALIGN)) {
1134          int l = VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_IMR,
1135                                           VMXNET3_REG_ALIGN);
1136          return s->interrupt_states[l].is_masked;
1137      }
1138  
1139      VMW_CBPRN("BAR0 unknown read [%" PRIx64 "], size %d", addr, size);
1140      return 0;
1141  }
1142  
1143  static void vmxnet3_reset_interrupt_states(VMXNET3State *s)
1144  {
1145      int i;
1146      for (i = 0; i < ARRAY_SIZE(s->interrupt_states); i++) {
1147          s->interrupt_states[i].is_asserted = false;
1148          s->interrupt_states[i].is_pending = false;
1149          s->interrupt_states[i].is_masked = true;
1150      }
1151  }
1152  
1153  static void vmxnet3_reset_mac(VMXNET3State *s)
1154  {
1155      memcpy(&s->conf.macaddr.a, &s->perm_mac.a, sizeof(s->perm_mac.a));
1156      VMW_CFPRN("MAC address set to: " MAC_FMT, MAC_ARG(s->conf.macaddr.a));
1157  }
1158  
1159  static void vmxnet3_deactivate_device(VMXNET3State *s)
1160  {
1161      if (s->device_active) {
1162          VMW_CBPRN("Deactivating vmxnet3...");
1163          net_tx_pkt_uninit(s->tx_pkt);
1164          net_rx_pkt_uninit(s->rx_pkt);
1165          s->device_active = false;
1166      }
1167  }
1168  
1169  static void vmxnet3_reset(VMXNET3State *s)
1170  {
1171      VMW_CBPRN("Resetting vmxnet3...");
1172  
1173      vmxnet3_deactivate_device(s);
1174      vmxnet3_reset_interrupt_states(s);
1175      s->drv_shmem = 0;
1176      s->tx_sop = true;
1177      s->skip_current_tx_pkt = false;
1178  }
1179  
1180  static void vmxnet3_update_rx_mode(VMXNET3State *s)
1181  {
1182      PCIDevice *d = PCI_DEVICE(s);
1183  
1184      s->rx_mode = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem,
1185                                             devRead.rxFilterConf.rxMode);
1186      VMW_CFPRN("RX mode: 0x%08X", s->rx_mode);
1187  }
1188  
1189  static void vmxnet3_update_vlan_filters(VMXNET3State *s)
1190  {
1191      int i;
1192      PCIDevice *d = PCI_DEVICE(s);
1193  
1194      /* Copy configuration from shared memory */
1195      VMXNET3_READ_DRV_SHARED(d, s->drv_shmem,
1196                              devRead.rxFilterConf.vfTable,
1197                              s->vlan_table,
1198                              sizeof(s->vlan_table));
1199  
1200      /* Invert byte order when needed */
1201      for (i = 0; i < ARRAY_SIZE(s->vlan_table); i++) {
1202          s->vlan_table[i] = le32_to_cpu(s->vlan_table[i]);
1203      }
1204  
1205      /* Dump configuration for debugging purposes */
1206      VMW_CFPRN("Configured VLANs:");
1207      for (i = 0; i < sizeof(s->vlan_table) * 8; i++) {
1208          if (VMXNET3_VFTABLE_ENTRY_IS_SET(s->vlan_table, i)) {
1209              VMW_CFPRN("\tVLAN %d is present", i);
1210          }
1211      }
1212  }
1213  
1214  static void vmxnet3_update_mcast_filters(VMXNET3State *s)
1215  {
1216      PCIDevice *d = PCI_DEVICE(s);
1217  
1218      uint16_t list_bytes =
1219          VMXNET3_READ_DRV_SHARED16(d, s->drv_shmem,
1220                                    devRead.rxFilterConf.mfTableLen);
1221  
1222      s->mcast_list_len = list_bytes / sizeof(s->mcast_list[0]);
1223  
1224      s->mcast_list = g_realloc(s->mcast_list, list_bytes);
1225      if (!s->mcast_list) {
1226          if (s->mcast_list_len == 0) {
1227              VMW_CFPRN("Current multicast list is empty");
1228          } else {
1229              VMW_ERPRN("Failed to allocate multicast list of %d elements",
1230                        s->mcast_list_len);
1231          }
1232          s->mcast_list_len = 0;
1233      } else {
1234          int i;
1235          hwaddr mcast_list_pa =
1236              VMXNET3_READ_DRV_SHARED64(d, s->drv_shmem,
1237                                        devRead.rxFilterConf.mfTablePA);
1238  
1239          pci_dma_read(d, mcast_list_pa, s->mcast_list, list_bytes);
1240  
1241          VMW_CFPRN("Current multicast list len is %d:", s->mcast_list_len);
1242          for (i = 0; i < s->mcast_list_len; i++) {
1243              VMW_CFPRN("\t" MAC_FMT, MAC_ARG(s->mcast_list[i].a));
1244          }
1245      }
1246  }
1247  
1248  static void vmxnet3_setup_rx_filtering(VMXNET3State *s)
1249  {
1250      vmxnet3_update_rx_mode(s);
1251      vmxnet3_update_vlan_filters(s);
1252      vmxnet3_update_mcast_filters(s);
1253  }
1254  
1255  static uint32_t vmxnet3_get_interrupt_config(VMXNET3State *s)
1256  {
1257      uint32_t interrupt_mode = VMXNET3_IT_AUTO | (VMXNET3_IMM_AUTO << 2);
1258      VMW_CFPRN("Interrupt config is 0x%X", interrupt_mode);
1259      return interrupt_mode;
1260  }
1261  
1262  static void vmxnet3_fill_stats(VMXNET3State *s)
1263  {
1264      int i;
1265      PCIDevice *d = PCI_DEVICE(s);
1266  
1267      if (!s->device_active)
1268          return;
1269  
1270      for (i = 0; i < s->txq_num; i++) {
1271          pci_dma_write(d,
1272                        s->txq_descr[i].tx_stats_pa,
1273                        &s->txq_descr[i].txq_stats,
1274                        sizeof(s->txq_descr[i].txq_stats));
1275      }
1276  
1277      for (i = 0; i < s->rxq_num; i++) {
1278          pci_dma_write(d,
1279                        s->rxq_descr[i].rx_stats_pa,
1280                        &s->rxq_descr[i].rxq_stats,
1281                        sizeof(s->rxq_descr[i].rxq_stats));
1282      }
1283  }
1284  
1285  static void vmxnet3_adjust_by_guest_type(VMXNET3State *s)
1286  {
1287      struct Vmxnet3_GOSInfo gos;
1288      PCIDevice *d = PCI_DEVICE(s);
1289  
1290      VMXNET3_READ_DRV_SHARED(d, s->drv_shmem, devRead.misc.driverInfo.gos,
1291                              &gos, sizeof(gos));
1292      s->rx_packets_compound =
1293          (gos.gosType == VMXNET3_GOS_TYPE_WIN) ? false : true;
1294  
1295      VMW_CFPRN("Guest type specifics: RXCOMPOUND: %d", s->rx_packets_compound);
1296  }
1297  
1298  static void
1299  vmxnet3_dump_conf_descr(const char *name,
1300                          struct Vmxnet3_VariableLenConfDesc *pm_descr)
1301  {
1302      VMW_CFPRN("%s descriptor dump: Version %u, Length %u",
1303                name, pm_descr->confVer, pm_descr->confLen);
1304  
1305  };
1306  
1307  static void vmxnet3_update_pm_state(VMXNET3State *s)
1308  {
1309      struct Vmxnet3_VariableLenConfDesc pm_descr;
1310      PCIDevice *d = PCI_DEVICE(s);
1311  
1312      pm_descr.confLen =
1313          VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.pmConfDesc.confLen);
1314      pm_descr.confVer =
1315          VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.pmConfDesc.confVer);
1316      pm_descr.confPA =
1317          VMXNET3_READ_DRV_SHARED64(d, s->drv_shmem, devRead.pmConfDesc.confPA);
1318  
1319      vmxnet3_dump_conf_descr("PM State", &pm_descr);
1320  }
1321  
1322  static void vmxnet3_update_features(VMXNET3State *s)
1323  {
1324      uint32_t guest_features;
1325      int rxcso_supported;
1326      PCIDevice *d = PCI_DEVICE(s);
1327  
1328      guest_features = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem,
1329                                                 devRead.misc.uptFeatures);
1330  
1331      rxcso_supported = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_RXCSUM);
1332      s->rx_vlan_stripping = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_RXVLAN);
1333      s->lro_supported = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_LRO);
1334  
1335      VMW_CFPRN("Features configuration: LRO: %d, RXCSUM: %d, VLANSTRIP: %d",
1336                s->lro_supported, rxcso_supported,
1337                s->rx_vlan_stripping);
1338      if (s->peer_has_vhdr) {
1339          qemu_set_offload(qemu_get_queue(s->nic)->peer,
1340                           rxcso_supported,
1341                           s->lro_supported,
1342                           s->lro_supported,
1343                           0,
1344                           0,
1345                           0,
1346                           0);
1347      }
1348  }
1349  
1350  static bool vmxnet3_verify_intx(VMXNET3State *s, int intx)
1351  {
1352      return s->msix_used || msi_enabled(PCI_DEVICE(s))
1353          || intx == pci_get_byte(s->parent_obj.config + PCI_INTERRUPT_PIN) - 1;
1354  }
1355  
1356  static void vmxnet3_validate_interrupt_idx(bool is_msix, int idx)
1357  {
1358      int max_ints = is_msix ? VMXNET3_MAX_INTRS : VMXNET3_MAX_NMSIX_INTRS;
1359      if (idx >= max_ints) {
1360          hw_error("Bad interrupt index: %d\n", idx);
1361      }
1362  }
1363  
1364  static void vmxnet3_validate_interrupts(VMXNET3State *s)
1365  {
1366      int i;
1367  
1368      VMW_CFPRN("Verifying event interrupt index (%d)", s->event_int_idx);
1369      vmxnet3_validate_interrupt_idx(s->msix_used, s->event_int_idx);
1370  
1371      for (i = 0; i < s->txq_num; i++) {
1372          int idx = s->txq_descr[i].intr_idx;
1373          VMW_CFPRN("Verifying TX queue %d interrupt index (%d)", i, idx);
1374          vmxnet3_validate_interrupt_idx(s->msix_used, idx);
1375      }
1376  
1377      for (i = 0; i < s->rxq_num; i++) {
1378          int idx = s->rxq_descr[i].intr_idx;
1379          VMW_CFPRN("Verifying RX queue %d interrupt index (%d)", i, idx);
1380          vmxnet3_validate_interrupt_idx(s->msix_used, idx);
1381      }
1382  }
1383  
1384  static bool vmxnet3_validate_queues(VMXNET3State *s)
1385  {
1386      /*
1387      * txq_num and rxq_num are total number of queues
1388      * configured by guest. These numbers must not
1389      * exceed corresponding maximal values.
1390      */
1391  
1392      if (s->txq_num > VMXNET3_DEVICE_MAX_TX_QUEUES) {
1393          qemu_log_mask(LOG_GUEST_ERROR, "vmxnet3: Bad TX queues number: %d\n",
1394                        s->txq_num);
1395          return false;
1396      }
1397  
1398      if (s->rxq_num > VMXNET3_DEVICE_MAX_RX_QUEUES) {
1399          qemu_log_mask(LOG_GUEST_ERROR, "vmxnet3: Bad RX queues number: %d\n",
1400                        s->rxq_num);
1401          return false;
1402      }
1403  
1404      return true;
1405  }
1406  
1407  static void vmxnet3_activate_device(VMXNET3State *s)
1408  {
1409      int i;
1410      static const uint32_t VMXNET3_DEF_TX_THRESHOLD = 1;
1411      PCIDevice *d = PCI_DEVICE(s);
1412      hwaddr qdescr_table_pa;
1413      uint64_t pa;
1414      uint32_t size;
1415  
1416      /* Verify configuration consistency */
1417      if (!vmxnet3_verify_driver_magic(d, s->drv_shmem)) {
1418          VMW_ERPRN("Device configuration received from driver is invalid");
1419          return;
1420      }
1421  
1422      /* Verify if device is active */
1423      if (s->device_active) {
1424          VMW_CFPRN("Vmxnet3 device is active");
1425          return;
1426      }
1427  
1428      s->txq_num =
1429          VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.misc.numTxQueues);
1430      s->rxq_num =
1431          VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.misc.numRxQueues);
1432  
1433      VMW_CFPRN("Number of TX/RX queues %u/%u", s->txq_num, s->rxq_num);
1434      if (!vmxnet3_validate_queues(s)) {
1435          return;
1436      }
1437  
1438      vmxnet3_adjust_by_guest_type(s);
1439      vmxnet3_update_features(s);
1440      vmxnet3_update_pm_state(s);
1441      vmxnet3_setup_rx_filtering(s);
1442      /* Cache fields from shared memory */
1443      s->mtu = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.misc.mtu);
1444      if (s->mtu < VMXNET3_MIN_MTU || s->mtu > VMXNET3_MAX_MTU) {
1445          qemu_log_mask(LOG_GUEST_ERROR, "vmxnet3: Bad MTU size: %u\n", s->mtu);
1446          return;
1447      }
1448      VMW_CFPRN("MTU is %u", s->mtu);
1449  
1450      s->max_rx_frags =
1451          VMXNET3_READ_DRV_SHARED16(d, s->drv_shmem, devRead.misc.maxNumRxSG);
1452  
1453      if (s->max_rx_frags == 0) {
1454          s->max_rx_frags = 1;
1455      }
1456  
1457      VMW_CFPRN("Max RX fragments is %u", s->max_rx_frags);
1458  
1459      s->event_int_idx =
1460          VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.intrConf.eventIntrIdx);
1461      assert(vmxnet3_verify_intx(s, s->event_int_idx));
1462      VMW_CFPRN("Events interrupt line is %u", s->event_int_idx);
1463  
1464      s->auto_int_masking =
1465          VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.intrConf.autoMask);
1466      VMW_CFPRN("Automatic interrupt masking is %d", (int)s->auto_int_masking);
1467  
1468      qdescr_table_pa =
1469          VMXNET3_READ_DRV_SHARED64(d, s->drv_shmem, devRead.misc.queueDescPA);
1470      VMW_CFPRN("TX queues descriptors table is at 0x%" PRIx64, qdescr_table_pa);
1471  
1472      /*
1473       * Worst-case scenario is a packet that holds all TX rings space so
1474       * we calculate total size of all TX rings for max TX fragments number
1475       */
1476      s->max_tx_frags = 0;
1477  
1478      /* TX queues */
1479      for (i = 0; i < s->txq_num; i++) {
1480          hwaddr qdescr_pa =
1481              qdescr_table_pa + i * sizeof(struct Vmxnet3_TxQueueDesc);
1482  
1483          /* Read interrupt number for this TX queue */
1484          s->txq_descr[i].intr_idx =
1485              VMXNET3_READ_TX_QUEUE_DESCR8(d, qdescr_pa, conf.intrIdx);
1486          assert(vmxnet3_verify_intx(s, s->txq_descr[i].intr_idx));
1487  
1488          VMW_CFPRN("TX Queue %d interrupt: %d", i, s->txq_descr[i].intr_idx);
1489  
1490          /* Read rings memory locations for TX queues */
1491          pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.txRingBasePA);
1492          size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.txRingSize);
1493          if (size > VMXNET3_TX_RING_MAX_SIZE) {
1494              size = VMXNET3_TX_RING_MAX_SIZE;
1495          }
1496  
1497          vmxnet3_ring_init(d, &s->txq_descr[i].tx_ring, pa, size,
1498                            sizeof(struct Vmxnet3_TxDesc), false);
1499          VMXNET3_RING_DUMP(VMW_CFPRN, "TX", i, &s->txq_descr[i].tx_ring);
1500  
1501          s->max_tx_frags += size;
1502  
1503          /* TXC ring */
1504          pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.compRingBasePA);
1505          size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.compRingSize);
1506          if (size > VMXNET3_TC_RING_MAX_SIZE) {
1507              size = VMXNET3_TC_RING_MAX_SIZE;
1508          }
1509          vmxnet3_ring_init(d, &s->txq_descr[i].comp_ring, pa, size,
1510                            sizeof(struct Vmxnet3_TxCompDesc), true);
1511          VMXNET3_RING_DUMP(VMW_CFPRN, "TXC", i, &s->txq_descr[i].comp_ring);
1512  
1513          s->txq_descr[i].tx_stats_pa =
1514              qdescr_pa + offsetof(struct Vmxnet3_TxQueueDesc, stats);
1515  
1516          memset(&s->txq_descr[i].txq_stats, 0,
1517                 sizeof(s->txq_descr[i].txq_stats));
1518  
1519          /* Fill device-managed parameters for queues */
1520          VMXNET3_WRITE_TX_QUEUE_DESCR32(d, qdescr_pa,
1521                                         ctrl.txThreshold,
1522                                         VMXNET3_DEF_TX_THRESHOLD);
1523      }
1524  
1525      /* Preallocate TX packet wrapper */
1526      VMW_CFPRN("Max TX fragments is %u", s->max_tx_frags);
1527      net_tx_pkt_init(&s->tx_pkt, s->max_tx_frags);
1528      net_rx_pkt_init(&s->rx_pkt);
1529  
1530      /* Read rings memory locations for RX queues */
1531      for (i = 0; i < s->rxq_num; i++) {
1532          int j;
1533          hwaddr qd_pa =
1534              qdescr_table_pa + s->txq_num * sizeof(struct Vmxnet3_TxQueueDesc) +
1535              i * sizeof(struct Vmxnet3_RxQueueDesc);
1536  
1537          /* Read interrupt number for this RX queue */
1538          s->rxq_descr[i].intr_idx =
1539              VMXNET3_READ_TX_QUEUE_DESCR8(d, qd_pa, conf.intrIdx);
1540          assert(vmxnet3_verify_intx(s, s->rxq_descr[i].intr_idx));
1541  
1542          VMW_CFPRN("RX Queue %d interrupt: %d", i, s->rxq_descr[i].intr_idx);
1543  
1544          /* Read rings memory locations */
1545          for (j = 0; j < VMXNET3_RX_RINGS_PER_QUEUE; j++) {
1546              /* RX rings */
1547              pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.rxRingBasePA[j]);
1548              size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.rxRingSize[j]);
1549              if (size > VMXNET3_RX_RING_MAX_SIZE) {
1550                  size = VMXNET3_RX_RING_MAX_SIZE;
1551              }
1552              vmxnet3_ring_init(d, &s->rxq_descr[i].rx_ring[j], pa, size,
1553                                sizeof(struct Vmxnet3_RxDesc), false);
1554              VMW_CFPRN("RX queue %d:%d: Base: %" PRIx64 ", Size: %d",
1555                        i, j, pa, size);
1556          }
1557  
1558          /* RXC ring */
1559          pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.compRingBasePA);
1560          size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.compRingSize);
1561          if (size > VMXNET3_RC_RING_MAX_SIZE) {
1562              size = VMXNET3_RC_RING_MAX_SIZE;
1563          }
1564          vmxnet3_ring_init(d, &s->rxq_descr[i].comp_ring, pa, size,
1565                            sizeof(struct Vmxnet3_RxCompDesc), true);
1566          VMW_CFPRN("RXC queue %d: Base: %" PRIx64 ", Size: %d", i, pa, size);
1567  
1568          s->rxq_descr[i].rx_stats_pa =
1569              qd_pa + offsetof(struct Vmxnet3_RxQueueDesc, stats);
1570          memset(&s->rxq_descr[i].rxq_stats, 0,
1571                 sizeof(s->rxq_descr[i].rxq_stats));
1572      }
1573  
1574      vmxnet3_validate_interrupts(s);
1575  
1576      /* Make sure everything is in place before device activation */
1577      smp_wmb();
1578  
1579      vmxnet3_reset_mac(s);
1580  
1581      s->device_active = true;
1582  }
1583  
1584  static void vmxnet3_handle_command(VMXNET3State *s, uint64_t cmd)
1585  {
1586      s->last_command = cmd;
1587  
1588      switch (cmd) {
1589      case VMXNET3_CMD_GET_PERM_MAC_HI:
1590          VMW_CBPRN("Set: Get upper part of permanent MAC");
1591          break;
1592  
1593      case VMXNET3_CMD_GET_PERM_MAC_LO:
1594          VMW_CBPRN("Set: Get lower part of permanent MAC");
1595          break;
1596  
1597      case VMXNET3_CMD_GET_STATS:
1598          VMW_CBPRN("Set: Get device statistics");
1599          vmxnet3_fill_stats(s);
1600          break;
1601  
1602      case VMXNET3_CMD_ACTIVATE_DEV:
1603          VMW_CBPRN("Set: Activating vmxnet3 device");
1604          vmxnet3_activate_device(s);
1605          break;
1606  
1607      case VMXNET3_CMD_UPDATE_RX_MODE:
1608          VMW_CBPRN("Set: Update rx mode");
1609          vmxnet3_update_rx_mode(s);
1610          break;
1611  
1612      case VMXNET3_CMD_UPDATE_VLAN_FILTERS:
1613          VMW_CBPRN("Set: Update VLAN filters");
1614          vmxnet3_update_vlan_filters(s);
1615          break;
1616  
1617      case VMXNET3_CMD_UPDATE_MAC_FILTERS:
1618          VMW_CBPRN("Set: Update MAC filters");
1619          vmxnet3_update_mcast_filters(s);
1620          break;
1621  
1622      case VMXNET3_CMD_UPDATE_FEATURE:
1623          VMW_CBPRN("Set: Update features");
1624          vmxnet3_update_features(s);
1625          break;
1626  
1627      case VMXNET3_CMD_UPDATE_PMCFG:
1628          VMW_CBPRN("Set: Update power management config");
1629          vmxnet3_update_pm_state(s);
1630          break;
1631  
1632      case VMXNET3_CMD_GET_LINK:
1633          VMW_CBPRN("Set: Get link");
1634          break;
1635  
1636      case VMXNET3_CMD_RESET_DEV:
1637          VMW_CBPRN("Set: Reset device");
1638          vmxnet3_reset(s);
1639          break;
1640  
1641      case VMXNET3_CMD_QUIESCE_DEV:
1642          VMW_CBPRN("Set: VMXNET3_CMD_QUIESCE_DEV - deactivate the device");
1643          vmxnet3_deactivate_device(s);
1644          break;
1645  
1646      case VMXNET3_CMD_GET_CONF_INTR:
1647          VMW_CBPRN("Set: VMXNET3_CMD_GET_CONF_INTR - interrupt configuration");
1648          break;
1649  
1650      case VMXNET3_CMD_GET_ADAPTIVE_RING_INFO:
1651          VMW_CBPRN("Set: VMXNET3_CMD_GET_ADAPTIVE_RING_INFO - "
1652                    "adaptive ring info flags");
1653          break;
1654  
1655      case VMXNET3_CMD_GET_DID_LO:
1656          VMW_CBPRN("Set: Get lower part of device ID");
1657          break;
1658  
1659      case VMXNET3_CMD_GET_DID_HI:
1660          VMW_CBPRN("Set: Get upper part of device ID");
1661          break;
1662  
1663      case VMXNET3_CMD_GET_DEV_EXTRA_INFO:
1664          VMW_CBPRN("Set: Get device extra info");
1665          break;
1666  
1667      default:
1668          VMW_CBPRN("Received unknown command: %" PRIx64, cmd);
1669          break;
1670      }
1671  }
1672  
1673  static uint64_t vmxnet3_get_command_status(VMXNET3State *s)
1674  {
1675      uint64_t ret;
1676  
1677      switch (s->last_command) {
1678      case VMXNET3_CMD_ACTIVATE_DEV:
1679          ret = (s->device_active) ? 0 : 1;
1680          VMW_CFPRN("Device active: %" PRIx64, ret);
1681          break;
1682  
1683      case VMXNET3_CMD_RESET_DEV:
1684      case VMXNET3_CMD_QUIESCE_DEV:
1685      case VMXNET3_CMD_GET_QUEUE_STATUS:
1686      case VMXNET3_CMD_GET_DEV_EXTRA_INFO:
1687          ret = 0;
1688          break;
1689  
1690      case VMXNET3_CMD_GET_LINK:
1691          ret = s->link_status_and_speed;
1692          VMW_CFPRN("Link and speed: %" PRIx64, ret);
1693          break;
1694  
1695      case VMXNET3_CMD_GET_PERM_MAC_LO:
1696          ret = vmxnet3_get_mac_low(&s->perm_mac);
1697          break;
1698  
1699      case VMXNET3_CMD_GET_PERM_MAC_HI:
1700          ret = vmxnet3_get_mac_high(&s->perm_mac);
1701          break;
1702  
1703      case VMXNET3_CMD_GET_CONF_INTR:
1704          ret = vmxnet3_get_interrupt_config(s);
1705          break;
1706  
1707      case VMXNET3_CMD_GET_ADAPTIVE_RING_INFO:
1708          ret = VMXNET3_DISABLE_ADAPTIVE_RING;
1709          break;
1710  
1711      case VMXNET3_CMD_GET_DID_LO:
1712          ret = PCI_DEVICE_ID_VMWARE_VMXNET3;
1713          break;
1714  
1715      case VMXNET3_CMD_GET_DID_HI:
1716          ret = VMXNET3_DEVICE_REVISION;
1717          break;
1718  
1719      default:
1720          VMW_WRPRN("Received request for unknown command: %x", s->last_command);
1721          ret = 0;
1722          break;
1723      }
1724  
1725      return ret;
1726  }
1727  
1728  static void vmxnet3_set_events(VMXNET3State *s, uint32_t val)
1729  {
1730      uint32_t events;
1731      PCIDevice *d = PCI_DEVICE(s);
1732  
1733      VMW_CBPRN("Setting events: 0x%x", val);
1734      events = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, ecr) | val;
1735      VMXNET3_WRITE_DRV_SHARED32(d, s->drv_shmem, ecr, events);
1736  }
1737  
1738  static void vmxnet3_ack_events(VMXNET3State *s, uint32_t val)
1739  {
1740      PCIDevice *d = PCI_DEVICE(s);
1741      uint32_t events;
1742  
1743      VMW_CBPRN("Clearing events: 0x%x", val);
1744      events = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, ecr) & ~val;
1745      VMXNET3_WRITE_DRV_SHARED32(d, s->drv_shmem, ecr, events);
1746  }
1747  
1748  static void
1749  vmxnet3_io_bar1_write(void *opaque,
1750                        hwaddr addr,
1751                        uint64_t val,
1752                        unsigned size)
1753  {
1754      VMXNET3State *s = opaque;
1755  
1756      switch (addr) {
1757      /* Vmxnet3 Revision Report Selection */
1758      case VMXNET3_REG_VRRS:
1759          VMW_CBPRN("Write BAR1 [VMXNET3_REG_VRRS] = %" PRIx64 ", size %d",
1760                    val, size);
1761          break;
1762  
1763      /* UPT Version Report Selection */
1764      case VMXNET3_REG_UVRS:
1765          VMW_CBPRN("Write BAR1 [VMXNET3_REG_UVRS] = %" PRIx64 ", size %d",
1766                    val, size);
1767          break;
1768  
1769      /* Driver Shared Address Low */
1770      case VMXNET3_REG_DSAL:
1771          VMW_CBPRN("Write BAR1 [VMXNET3_REG_DSAL] = %" PRIx64 ", size %d",
1772                    val, size);
1773          /*
1774           * Guest driver will first write the low part of the shared
1775           * memory address. We save it to temp variable and set the
1776           * shared address only after we get the high part
1777           */
1778          if (val == 0) {
1779              vmxnet3_deactivate_device(s);
1780          }
1781          s->temp_shared_guest_driver_memory = val;
1782          s->drv_shmem = 0;
1783          break;
1784  
1785      /* Driver Shared Address High */
1786      case VMXNET3_REG_DSAH:
1787          VMW_CBPRN("Write BAR1 [VMXNET3_REG_DSAH] = %" PRIx64 ", size %d",
1788                    val, size);
1789          /*
1790           * Set the shared memory between guest driver and device.
1791           * We already should have low address part.
1792           */
1793          s->drv_shmem = s->temp_shared_guest_driver_memory | (val << 32);
1794          break;
1795  
1796      /* Command */
1797      case VMXNET3_REG_CMD:
1798          VMW_CBPRN("Write BAR1 [VMXNET3_REG_CMD] = %" PRIx64 ", size %d",
1799                    val, size);
1800          vmxnet3_handle_command(s, val);
1801          break;
1802  
1803      /* MAC Address Low */
1804      case VMXNET3_REG_MACL:
1805          VMW_CBPRN("Write BAR1 [VMXNET3_REG_MACL] = %" PRIx64 ", size %d",
1806                    val, size);
1807          s->temp_mac = val;
1808          break;
1809  
1810      /* MAC Address High */
1811      case VMXNET3_REG_MACH:
1812          VMW_CBPRN("Write BAR1 [VMXNET3_REG_MACH] = %" PRIx64 ", size %d",
1813                    val, size);
1814          vmxnet3_set_variable_mac(s, val, s->temp_mac);
1815          break;
1816  
1817      /* Interrupt Cause Register */
1818      case VMXNET3_REG_ICR:
1819          VMW_CBPRN("Write BAR1 [VMXNET3_REG_ICR] = %" PRIx64 ", size %d",
1820                    val, size);
1821          qemu_log_mask(LOG_GUEST_ERROR,
1822                        "%s: write to read-only register VMXNET3_REG_ICR\n",
1823                        TYPE_VMXNET3);
1824          break;
1825  
1826      /* Event Cause Register */
1827      case VMXNET3_REG_ECR:
1828          VMW_CBPRN("Write BAR1 [VMXNET3_REG_ECR] = %" PRIx64 ", size %d",
1829                    val, size);
1830          vmxnet3_ack_events(s, val);
1831          break;
1832  
1833      default:
1834          VMW_CBPRN("Unknown Write to BAR1 [%" PRIx64 "] = %" PRIx64 ", size %d",
1835                    addr, val, size);
1836          break;
1837      }
1838  }
1839  
1840  static uint64_t
1841  vmxnet3_io_bar1_read(void *opaque, hwaddr addr, unsigned size)
1842  {
1843          VMXNET3State *s = opaque;
1844          uint64_t ret = 0;
1845  
1846          switch (addr) {
1847          /* Vmxnet3 Revision Report Selection */
1848          case VMXNET3_REG_VRRS:
1849              VMW_CBPRN("Read BAR1 [VMXNET3_REG_VRRS], size %d", size);
1850              ret = VMXNET3_DEVICE_REVISION;
1851              break;
1852  
1853          /* UPT Version Report Selection */
1854          case VMXNET3_REG_UVRS:
1855              VMW_CBPRN("Read BAR1 [VMXNET3_REG_UVRS], size %d", size);
1856              ret = VMXNET3_UPT_REVISION;
1857              break;
1858  
1859          /* Command */
1860          case VMXNET3_REG_CMD:
1861              VMW_CBPRN("Read BAR1 [VMXNET3_REG_CMD], size %d", size);
1862              ret = vmxnet3_get_command_status(s);
1863              break;
1864  
1865          /* MAC Address Low */
1866          case VMXNET3_REG_MACL:
1867              VMW_CBPRN("Read BAR1 [VMXNET3_REG_MACL], size %d", size);
1868              ret = vmxnet3_get_mac_low(&s->conf.macaddr);
1869              break;
1870  
1871          /* MAC Address High */
1872          case VMXNET3_REG_MACH:
1873              VMW_CBPRN("Read BAR1 [VMXNET3_REG_MACH], size %d", size);
1874              ret = vmxnet3_get_mac_high(&s->conf.macaddr);
1875              break;
1876  
1877          /*
1878           * Interrupt Cause Register
1879           * Used for legacy interrupts only so interrupt index always 0
1880           */
1881          case VMXNET3_REG_ICR:
1882              VMW_CBPRN("Read BAR1 [VMXNET3_REG_ICR], size %d", size);
1883              if (vmxnet3_interrupt_asserted(s, 0)) {
1884                  vmxnet3_clear_interrupt(s, 0);
1885                  ret = true;
1886              } else {
1887                  ret = false;
1888              }
1889              break;
1890  
1891          default:
1892              VMW_CBPRN("Unknown read BAR1[%" PRIx64 "], %d bytes", addr, size);
1893              break;
1894          }
1895  
1896          return ret;
1897  }
1898  
1899  static int
1900  vmxnet3_can_receive(NetClientState *nc)
1901  {
1902      VMXNET3State *s = qemu_get_nic_opaque(nc);
1903      return s->device_active &&
1904             VMXNET_FLAG_IS_SET(s->link_status_and_speed, VMXNET3_LINK_STATUS_UP);
1905  }
1906  
1907  static inline bool
1908  vmxnet3_is_registered_vlan(VMXNET3State *s, const void *data)
1909  {
1910      uint16_t vlan_tag = eth_get_pkt_tci(data) & VLAN_VID_MASK;
1911      if (IS_SPECIAL_VLAN_ID(vlan_tag)) {
1912          return true;
1913      }
1914  
1915      return VMXNET3_VFTABLE_ENTRY_IS_SET(s->vlan_table, vlan_tag);
1916  }
1917  
1918  static bool
1919  vmxnet3_is_allowed_mcast_group(VMXNET3State *s, const uint8_t *group_mac)
1920  {
1921      int i;
1922      for (i = 0; i < s->mcast_list_len; i++) {
1923          if (!memcmp(group_mac, s->mcast_list[i].a, sizeof(s->mcast_list[i]))) {
1924              return true;
1925          }
1926      }
1927      return false;
1928  }
1929  
1930  static bool
1931  vmxnet3_rx_filter_may_indicate(VMXNET3State *s, const void *data,
1932      size_t size)
1933  {
1934      struct eth_header *ehdr = PKT_GET_ETH_HDR(data);
1935  
1936      if (VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_PROMISC)) {
1937          return true;
1938      }
1939  
1940      if (!vmxnet3_is_registered_vlan(s, data)) {
1941          return false;
1942      }
1943  
1944      switch (net_rx_pkt_get_packet_type(s->rx_pkt)) {
1945      case ETH_PKT_UCAST:
1946          if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_UCAST)) {
1947              return false;
1948          }
1949          if (memcmp(s->conf.macaddr.a, ehdr->h_dest, ETH_ALEN)) {
1950              return false;
1951          }
1952          break;
1953  
1954      case ETH_PKT_BCAST:
1955          if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_BCAST)) {
1956              return false;
1957          }
1958          break;
1959  
1960      case ETH_PKT_MCAST:
1961          if (VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_ALL_MULTI)) {
1962              return true;
1963          }
1964          if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_MCAST)) {
1965              return false;
1966          }
1967          if (!vmxnet3_is_allowed_mcast_group(s, ehdr->h_dest)) {
1968              return false;
1969          }
1970          break;
1971  
1972      default:
1973          g_assert_not_reached();
1974      }
1975  
1976      return true;
1977  }
1978  
1979  static ssize_t
1980  vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1981  {
1982      VMXNET3State *s = qemu_get_nic_opaque(nc);
1983      size_t bytes_indicated;
1984  
1985      if (!vmxnet3_can_receive(nc)) {
1986          VMW_PKPRN("Cannot receive now");
1987          return -1;
1988      }
1989  
1990      if (s->peer_has_vhdr) {
1991          net_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf);
1992          buf += sizeof(struct virtio_net_hdr);
1993          size -= sizeof(struct virtio_net_hdr);
1994      }
1995  
1996      net_rx_pkt_set_packet_type(s->rx_pkt,
1997          get_eth_packet_type(PKT_GET_ETH_HDR(buf)));
1998  
1999      if (vmxnet3_rx_filter_may_indicate(s, buf, size)) {
2000          struct iovec iov = {
2001              .iov_base = (void *)buf,
2002              .iov_len = size
2003          };
2004  
2005          net_rx_pkt_set_protocols(s->rx_pkt, &iov, 1, 0);
2006          vmxnet3_rx_need_csum_calculate(s->rx_pkt, buf, size);
2007          net_rx_pkt_attach_data(s->rx_pkt, buf, size, s->rx_vlan_stripping);
2008          bytes_indicated = vmxnet3_indicate_packet(s) ? size : -1;
2009          if (bytes_indicated < size) {
2010              VMW_PKPRN("RX: %zu of %zu bytes indicated", bytes_indicated, size);
2011          }
2012      } else {
2013          VMW_PKPRN("Packet dropped by RX filter");
2014          bytes_indicated = size;
2015      }
2016  
2017      assert(size > 0);
2018      assert(bytes_indicated != 0);
2019      return bytes_indicated;
2020  }
2021  
2022  static void vmxnet3_set_link_status(NetClientState *nc)
2023  {
2024      VMXNET3State *s = qemu_get_nic_opaque(nc);
2025  
2026      if (nc->link_down) {
2027          s->link_status_and_speed &= ~VMXNET3_LINK_STATUS_UP;
2028      } else {
2029          s->link_status_and_speed |= VMXNET3_LINK_STATUS_UP;
2030      }
2031  
2032      vmxnet3_set_events(s, VMXNET3_ECR_LINK);
2033      vmxnet3_trigger_interrupt(s, s->event_int_idx);
2034  }
2035  
2036  static NetClientInfo net_vmxnet3_info = {
2037          .type = NET_CLIENT_DRIVER_NIC,
2038          .size = sizeof(NICState),
2039          .receive = vmxnet3_receive,
2040          .link_status_changed = vmxnet3_set_link_status,
2041  };
2042  
2043  static bool vmxnet3_peer_has_vnet_hdr(VMXNET3State *s)
2044  {
2045      NetClientState *nc = qemu_get_queue(s->nic);
2046  
2047      if (qemu_has_vnet_hdr(nc->peer)) {
2048          return true;
2049      }
2050  
2051      return false;
2052  }
2053  
2054  static void vmxnet3_net_uninit(VMXNET3State *s)
2055  {
2056      g_free(s->mcast_list);
2057      vmxnet3_deactivate_device(s);
2058      qemu_del_nic(s->nic);
2059  }
2060  
2061  static void vmxnet3_net_init(VMXNET3State *s)
2062  {
2063      DeviceState *d = DEVICE(s);
2064  
2065      VMW_CBPRN("vmxnet3_net_init called...");
2066  
2067      qemu_macaddr_default_if_unset(&s->conf.macaddr);
2068  
2069      /* Windows guest will query the address that was set on init */
2070      memcpy(&s->perm_mac.a, &s->conf.macaddr.a, sizeof(s->perm_mac.a));
2071  
2072      s->mcast_list = NULL;
2073      s->mcast_list_len = 0;
2074  
2075      s->link_status_and_speed = VMXNET3_LINK_SPEED | VMXNET3_LINK_STATUS_UP;
2076  
2077      VMW_CFPRN("Permanent MAC: " MAC_FMT, MAC_ARG(s->perm_mac.a));
2078  
2079      s->nic = qemu_new_nic(&net_vmxnet3_info, &s->conf,
2080                            object_get_typename(OBJECT(s)),
2081                            d->id, &d->mem_reentrancy_guard, s);
2082  
2083      s->peer_has_vhdr = vmxnet3_peer_has_vnet_hdr(s);
2084      s->tx_sop = true;
2085      s->skip_current_tx_pkt = false;
2086      s->tx_pkt = NULL;
2087      s->rx_pkt = NULL;
2088      s->rx_vlan_stripping = false;
2089      s->lro_supported = false;
2090  
2091      if (s->peer_has_vhdr) {
2092          qemu_set_vnet_hdr_len(qemu_get_queue(s->nic)->peer,
2093              sizeof(struct virtio_net_hdr));
2094  
2095          qemu_using_vnet_hdr(qemu_get_queue(s->nic)->peer, 1);
2096      }
2097  
2098      qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
2099  }
2100  
2101  static void
2102  vmxnet3_unuse_msix_vectors(VMXNET3State *s, int num_vectors)
2103  {
2104      PCIDevice *d = PCI_DEVICE(s);
2105      int i;
2106      for (i = 0; i < num_vectors; i++) {
2107          msix_vector_unuse(d, i);
2108      }
2109  }
2110  
2111  static void
2112  vmxnet3_use_msix_vectors(VMXNET3State *s, int num_vectors)
2113  {
2114      PCIDevice *d = PCI_DEVICE(s);
2115      int i;
2116      for (i = 0; i < num_vectors; i++) {
2117          msix_vector_use(d, i);
2118      }
2119  }
2120  
2121  static bool
2122  vmxnet3_init_msix(VMXNET3State *s)
2123  {
2124      PCIDevice *d = PCI_DEVICE(s);
2125      int res = msix_init(d, VMXNET3_MAX_INTRS,
2126                          &s->msix_bar,
2127                          VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_TABLE,
2128                          &s->msix_bar,
2129                          VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_PBA(s),
2130                          VMXNET3_MSIX_OFFSET(s), NULL);
2131  
2132      if (0 > res) {
2133          VMW_WRPRN("Failed to initialize MSI-X, error %d", res);
2134          s->msix_used = false;
2135      } else {
2136          vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS);
2137          s->msix_used = true;
2138      }
2139      return s->msix_used;
2140  }
2141  
2142  static void
2143  vmxnet3_cleanup_msix(VMXNET3State *s)
2144  {
2145      PCIDevice *d = PCI_DEVICE(s);
2146  
2147      if (s->msix_used) {
2148          vmxnet3_unuse_msix_vectors(s, VMXNET3_MAX_INTRS);
2149          msix_uninit(d, &s->msix_bar, &s->msix_bar);
2150      }
2151  }
2152  
2153  static void
2154  vmxnet3_cleanup_msi(VMXNET3State *s)
2155  {
2156      PCIDevice *d = PCI_DEVICE(s);
2157  
2158      msi_uninit(d);
2159  }
2160  
2161  static const MemoryRegionOps b0_ops = {
2162      .read = vmxnet3_io_bar0_read,
2163      .write = vmxnet3_io_bar0_write,
2164      .endianness = DEVICE_LITTLE_ENDIAN,
2165      .impl = {
2166              .min_access_size = 4,
2167              .max_access_size = 4,
2168      },
2169  };
2170  
2171  static const MemoryRegionOps b1_ops = {
2172      .read = vmxnet3_io_bar1_read,
2173      .write = vmxnet3_io_bar1_write,
2174      .endianness = DEVICE_LITTLE_ENDIAN,
2175      .impl = {
2176              .min_access_size = 4,
2177              .max_access_size = 4,
2178      },
2179  };
2180  
2181  static uint64_t vmxnet3_device_serial_num(VMXNET3State *s)
2182  {
2183      uint64_t dsn_payload;
2184      uint8_t *dsnp = (uint8_t *)&dsn_payload;
2185  
2186      dsnp[0] = 0xfe;
2187      dsnp[1] = s->conf.macaddr.a[3];
2188      dsnp[2] = s->conf.macaddr.a[4];
2189      dsnp[3] = s->conf.macaddr.a[5];
2190      dsnp[4] = s->conf.macaddr.a[0];
2191      dsnp[5] = s->conf.macaddr.a[1];
2192      dsnp[6] = s->conf.macaddr.a[2];
2193      dsnp[7] = 0xff;
2194      return dsn_payload;
2195  }
2196  
2197  
2198  #define VMXNET3_USE_64BIT         (true)
2199  #define VMXNET3_PER_VECTOR_MASK   (false)
2200  
2201  static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp)
2202  {
2203      VMXNET3State *s = VMXNET3(pci_dev);
2204      int ret;
2205  
2206      VMW_CBPRN("Starting init...");
2207  
2208      memory_region_init_io(&s->bar0, OBJECT(s), &b0_ops, s,
2209                            "vmxnet3-b0", VMXNET3_PT_REG_SIZE);
2210      pci_register_bar(pci_dev, VMXNET3_BAR0_IDX,
2211                       PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar0);
2212  
2213      memory_region_init_io(&s->bar1, OBJECT(s), &b1_ops, s,
2214                            "vmxnet3-b1", VMXNET3_VD_REG_SIZE);
2215      pci_register_bar(pci_dev, VMXNET3_BAR1_IDX,
2216                       PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar1);
2217  
2218      memory_region_init(&s->msix_bar, OBJECT(s), "vmxnet3-msix-bar",
2219                         VMXNET3_MSIX_BAR_SIZE);
2220      pci_register_bar(pci_dev, VMXNET3_MSIX_BAR_IDX,
2221                       PCI_BASE_ADDRESS_SPACE_MEMORY, &s->msix_bar);
2222  
2223      vmxnet3_reset_interrupt_states(s);
2224  
2225      /* Interrupt pin A */
2226      pci_dev->config[PCI_INTERRUPT_PIN] = 0x01;
2227  
2228      ret = msi_init(pci_dev, VMXNET3_MSI_OFFSET(s), VMXNET3_MAX_NMSIX_INTRS,
2229                     VMXNET3_USE_64BIT, VMXNET3_PER_VECTOR_MASK, NULL);
2230      /* Any error other than -ENOTSUP(board's MSI support is broken)
2231       * is a programming error. Fall back to INTx silently on -ENOTSUP */
2232      assert(!ret || ret == -ENOTSUP);
2233  
2234      if (!vmxnet3_init_msix(s)) {
2235          VMW_WRPRN("Failed to initialize MSI-X, configuration is inconsistent.");
2236      }
2237  
2238      vmxnet3_net_init(s);
2239  
2240      if (pci_is_express(pci_dev)) {
2241          if (pci_bus_is_express(pci_get_bus(pci_dev))) {
2242              pcie_endpoint_cap_init(pci_dev, VMXNET3_EXP_EP_OFFSET);
2243          }
2244  
2245          pcie_dev_ser_num_init(pci_dev, VMXNET3_DSN_OFFSET,
2246                                vmxnet3_device_serial_num(s));
2247      }
2248  }
2249  
2250  static void vmxnet3_instance_init(Object *obj)
2251  {
2252      VMXNET3State *s = VMXNET3(obj);
2253      device_add_bootindex_property(obj, &s->conf.bootindex,
2254                                    "bootindex", "/ethernet-phy@0",
2255                                    DEVICE(obj));
2256  }
2257  
2258  static void vmxnet3_pci_uninit(PCIDevice *pci_dev)
2259  {
2260      VMXNET3State *s = VMXNET3(pci_dev);
2261  
2262      VMW_CBPRN("Starting uninit...");
2263  
2264      vmxnet3_net_uninit(s);
2265  
2266      vmxnet3_cleanup_msix(s);
2267  
2268      vmxnet3_cleanup_msi(s);
2269  }
2270  
2271  static void vmxnet3_qdev_reset(DeviceState *dev)
2272  {
2273      PCIDevice *d = PCI_DEVICE(dev);
2274      VMXNET3State *s = VMXNET3(d);
2275  
2276      VMW_CBPRN("Starting QDEV reset...");
2277      vmxnet3_reset(s);
2278  }
2279  
2280  static bool vmxnet3_mc_list_needed(void *opaque)
2281  {
2282      return true;
2283  }
2284  
2285  static int vmxnet3_mcast_list_pre_load(void *opaque)
2286  {
2287      VMXNET3State *s = opaque;
2288  
2289      s->mcast_list = g_malloc(s->mcast_list_buff_size);
2290  
2291      return 0;
2292  }
2293  
2294  
2295  static int vmxnet3_pre_save(void *opaque)
2296  {
2297      VMXNET3State *s = opaque;
2298  
2299      s->mcast_list_buff_size = s->mcast_list_len * sizeof(MACAddr);
2300  
2301      return 0;
2302  }
2303  
2304  static const VMStateDescription vmxstate_vmxnet3_mcast_list = {
2305      .name = "vmxnet3/mcast_list",
2306      .version_id = 1,
2307      .minimum_version_id = 1,
2308      .pre_load = vmxnet3_mcast_list_pre_load,
2309      .needed = vmxnet3_mc_list_needed,
2310      .fields = (const VMStateField[]) {
2311          VMSTATE_VBUFFER_UINT32(mcast_list, VMXNET3State, 0, NULL,
2312              mcast_list_buff_size),
2313          VMSTATE_END_OF_LIST()
2314      }
2315  };
2316  
2317  static const VMStateDescription vmstate_vmxnet3_ring = {
2318      .name = "vmxnet3-ring",
2319      .version_id = 0,
2320      .fields = (const VMStateField[]) {
2321          VMSTATE_UINT64(pa, Vmxnet3Ring),
2322          VMSTATE_UINT32(size, Vmxnet3Ring),
2323          VMSTATE_UINT32(cell_size, Vmxnet3Ring),
2324          VMSTATE_UINT32(next, Vmxnet3Ring),
2325          VMSTATE_UINT8(gen, Vmxnet3Ring),
2326          VMSTATE_END_OF_LIST()
2327      }
2328  };
2329  
2330  static const VMStateDescription vmstate_vmxnet3_tx_stats = {
2331      .name = "vmxnet3-tx-stats",
2332      .version_id = 0,
2333      .fields = (const VMStateField[]) {
2334          VMSTATE_UINT64(TSOPktsTxOK, struct UPT1_TxStats),
2335          VMSTATE_UINT64(TSOBytesTxOK, struct UPT1_TxStats),
2336          VMSTATE_UINT64(ucastPktsTxOK, struct UPT1_TxStats),
2337          VMSTATE_UINT64(ucastBytesTxOK, struct UPT1_TxStats),
2338          VMSTATE_UINT64(mcastPktsTxOK, struct UPT1_TxStats),
2339          VMSTATE_UINT64(mcastBytesTxOK, struct UPT1_TxStats),
2340          VMSTATE_UINT64(bcastPktsTxOK, struct UPT1_TxStats),
2341          VMSTATE_UINT64(bcastBytesTxOK, struct UPT1_TxStats),
2342          VMSTATE_UINT64(pktsTxError, struct UPT1_TxStats),
2343          VMSTATE_UINT64(pktsTxDiscard, struct UPT1_TxStats),
2344          VMSTATE_END_OF_LIST()
2345      }
2346  };
2347  
2348  static const VMStateDescription vmstate_vmxnet3_txq_descr = {
2349      .name = "vmxnet3-txq-descr",
2350      .version_id = 0,
2351      .fields = (const VMStateField[]) {
2352          VMSTATE_STRUCT(tx_ring, Vmxnet3TxqDescr, 0, vmstate_vmxnet3_ring,
2353                         Vmxnet3Ring),
2354          VMSTATE_STRUCT(comp_ring, Vmxnet3TxqDescr, 0, vmstate_vmxnet3_ring,
2355                         Vmxnet3Ring),
2356          VMSTATE_UINT8(intr_idx, Vmxnet3TxqDescr),
2357          VMSTATE_UINT64(tx_stats_pa, Vmxnet3TxqDescr),
2358          VMSTATE_STRUCT(txq_stats, Vmxnet3TxqDescr, 0, vmstate_vmxnet3_tx_stats,
2359                         struct UPT1_TxStats),
2360          VMSTATE_END_OF_LIST()
2361      }
2362  };
2363  
2364  static const VMStateDescription vmstate_vmxnet3_rx_stats = {
2365      .name = "vmxnet3-rx-stats",
2366      .version_id = 0,
2367      .fields = (const VMStateField[]) {
2368          VMSTATE_UINT64(LROPktsRxOK, struct UPT1_RxStats),
2369          VMSTATE_UINT64(LROBytesRxOK, struct UPT1_RxStats),
2370          VMSTATE_UINT64(ucastPktsRxOK, struct UPT1_RxStats),
2371          VMSTATE_UINT64(ucastBytesRxOK, struct UPT1_RxStats),
2372          VMSTATE_UINT64(mcastPktsRxOK, struct UPT1_RxStats),
2373          VMSTATE_UINT64(mcastBytesRxOK, struct UPT1_RxStats),
2374          VMSTATE_UINT64(bcastPktsRxOK, struct UPT1_RxStats),
2375          VMSTATE_UINT64(bcastBytesRxOK, struct UPT1_RxStats),
2376          VMSTATE_UINT64(pktsRxOutOfBuf, struct UPT1_RxStats),
2377          VMSTATE_UINT64(pktsRxError, struct UPT1_RxStats),
2378          VMSTATE_END_OF_LIST()
2379      }
2380  };
2381  
2382  static const VMStateDescription vmstate_vmxnet3_rxq_descr = {
2383      .name = "vmxnet3-rxq-descr",
2384      .version_id = 0,
2385      .fields = (const VMStateField[]) {
2386          VMSTATE_STRUCT_ARRAY(rx_ring, Vmxnet3RxqDescr,
2387                               VMXNET3_RX_RINGS_PER_QUEUE, 0,
2388                               vmstate_vmxnet3_ring, Vmxnet3Ring),
2389          VMSTATE_STRUCT(comp_ring, Vmxnet3RxqDescr, 0, vmstate_vmxnet3_ring,
2390                         Vmxnet3Ring),
2391          VMSTATE_UINT8(intr_idx, Vmxnet3RxqDescr),
2392          VMSTATE_UINT64(rx_stats_pa, Vmxnet3RxqDescr),
2393          VMSTATE_STRUCT(rxq_stats, Vmxnet3RxqDescr, 0, vmstate_vmxnet3_rx_stats,
2394                         struct UPT1_RxStats),
2395          VMSTATE_END_OF_LIST()
2396      }
2397  };
2398  
2399  static int vmxnet3_post_load(void *opaque, int version_id)
2400  {
2401      VMXNET3State *s = opaque;
2402  
2403      net_tx_pkt_init(&s->tx_pkt, s->max_tx_frags);
2404      net_rx_pkt_init(&s->rx_pkt);
2405  
2406      if (s->msix_used) {
2407          vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS);
2408      }
2409  
2410      if (!vmxnet3_validate_queues(s)) {
2411          return -1;
2412      }
2413      vmxnet3_validate_interrupts(s);
2414  
2415      return 0;
2416  }
2417  
2418  static const VMStateDescription vmstate_vmxnet3_int_state = {
2419      .name = "vmxnet3-int-state",
2420      .version_id = 0,
2421      .fields = (const VMStateField[]) {
2422          VMSTATE_BOOL(is_masked, Vmxnet3IntState),
2423          VMSTATE_BOOL(is_pending, Vmxnet3IntState),
2424          VMSTATE_BOOL(is_asserted, Vmxnet3IntState),
2425          VMSTATE_END_OF_LIST()
2426      }
2427  };
2428  
2429  static const VMStateDescription vmstate_vmxnet3 = {
2430      .name = "vmxnet3",
2431      .version_id = 1,
2432      .minimum_version_id = 1,
2433      .pre_save = vmxnet3_pre_save,
2434      .post_load = vmxnet3_post_load,
2435      .fields = (const VMStateField[]) {
2436              VMSTATE_PCI_DEVICE(parent_obj, VMXNET3State),
2437              VMSTATE_MSIX(parent_obj, VMXNET3State),
2438              VMSTATE_BOOL(rx_packets_compound, VMXNET3State),
2439              VMSTATE_BOOL(rx_vlan_stripping, VMXNET3State),
2440              VMSTATE_BOOL(lro_supported, VMXNET3State),
2441              VMSTATE_UINT32(rx_mode, VMXNET3State),
2442              VMSTATE_UINT32(mcast_list_len, VMXNET3State),
2443              VMSTATE_UINT32(mcast_list_buff_size, VMXNET3State),
2444              VMSTATE_UINT32_ARRAY(vlan_table, VMXNET3State, VMXNET3_VFT_SIZE),
2445              VMSTATE_UINT32(mtu, VMXNET3State),
2446              VMSTATE_UINT16(max_rx_frags, VMXNET3State),
2447              VMSTATE_UINT32(max_tx_frags, VMXNET3State),
2448              VMSTATE_UINT8(event_int_idx, VMXNET3State),
2449              VMSTATE_BOOL(auto_int_masking, VMXNET3State),
2450              VMSTATE_UINT8(txq_num, VMXNET3State),
2451              VMSTATE_UINT8(rxq_num, VMXNET3State),
2452              VMSTATE_UINT32(device_active, VMXNET3State),
2453              VMSTATE_UINT32(last_command, VMXNET3State),
2454              VMSTATE_UINT32(link_status_and_speed, VMXNET3State),
2455              VMSTATE_UINT32(temp_mac, VMXNET3State),
2456              VMSTATE_UINT64(drv_shmem, VMXNET3State),
2457              VMSTATE_UINT64(temp_shared_guest_driver_memory, VMXNET3State),
2458  
2459              VMSTATE_STRUCT_ARRAY(txq_descr, VMXNET3State,
2460                  VMXNET3_DEVICE_MAX_TX_QUEUES, 0, vmstate_vmxnet3_txq_descr,
2461                  Vmxnet3TxqDescr),
2462              VMSTATE_STRUCT_ARRAY(rxq_descr, VMXNET3State,
2463                  VMXNET3_DEVICE_MAX_RX_QUEUES, 0, vmstate_vmxnet3_rxq_descr,
2464                  Vmxnet3RxqDescr),
2465              VMSTATE_STRUCT_ARRAY(interrupt_states, VMXNET3State,
2466                  VMXNET3_MAX_INTRS, 0, vmstate_vmxnet3_int_state,
2467                  Vmxnet3IntState),
2468  
2469              VMSTATE_END_OF_LIST()
2470      },
2471      .subsections = (const VMStateDescription * const []) {
2472          &vmxstate_vmxnet3_mcast_list,
2473          NULL
2474      }
2475  };
2476  
2477  static Property vmxnet3_properties[] = {
2478      DEFINE_NIC_PROPERTIES(VMXNET3State, conf),
2479      DEFINE_PROP_BIT("x-old-msi-offsets", VMXNET3State, compat_flags,
2480                      VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT, false),
2481      DEFINE_PROP_BIT("x-disable-pcie", VMXNET3State, compat_flags,
2482                      VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT, false),
2483      DEFINE_PROP_END_OF_LIST(),
2484  };
2485  
2486  static void vmxnet3_realize(DeviceState *qdev, Error **errp)
2487  {
2488      VMXNET3Class *vc = VMXNET3_DEVICE_GET_CLASS(qdev);
2489      PCIDevice *pci_dev = PCI_DEVICE(qdev);
2490      VMXNET3State *s = VMXNET3(qdev);
2491  
2492      if (!(s->compat_flags & VMXNET3_COMPAT_FLAG_DISABLE_PCIE)) {
2493          pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
2494      }
2495  
2496      vc->parent_dc_realize(qdev, errp);
2497  }
2498  
2499  static void vmxnet3_class_init(ObjectClass *class, void *data)
2500  {
2501      DeviceClass *dc = DEVICE_CLASS(class);
2502      PCIDeviceClass *c = PCI_DEVICE_CLASS(class);
2503      VMXNET3Class *vc = VMXNET3_DEVICE_CLASS(class);
2504  
2505      c->realize = vmxnet3_pci_realize;
2506      c->exit = vmxnet3_pci_uninit;
2507      c->vendor_id = PCI_VENDOR_ID_VMWARE;
2508      c->device_id = PCI_DEVICE_ID_VMWARE_VMXNET3;
2509      c->revision = PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION;
2510      c->romfile = "efi-vmxnet3.rom";
2511      c->class_id = PCI_CLASS_NETWORK_ETHERNET;
2512      c->subsystem_vendor_id = PCI_VENDOR_ID_VMWARE;
2513      c->subsystem_id = PCI_DEVICE_ID_VMWARE_VMXNET3;
2514      device_class_set_parent_realize(dc, vmxnet3_realize,
2515                                      &vc->parent_dc_realize);
2516      dc->desc = "VMWare Paravirtualized Ethernet v3";
2517      dc->reset = vmxnet3_qdev_reset;
2518      dc->vmsd = &vmstate_vmxnet3;
2519      device_class_set_props(dc, vmxnet3_properties);
2520      set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
2521  }
2522  
2523  static const TypeInfo vmxnet3_info = {
2524      .name          = TYPE_VMXNET3,
2525      .parent        = TYPE_PCI_DEVICE,
2526      .class_size    = sizeof(VMXNET3Class),
2527      .instance_size = sizeof(VMXNET3State),
2528      .class_init    = vmxnet3_class_init,
2529      .instance_init = vmxnet3_instance_init,
2530      .interfaces = (InterfaceInfo[]) {
2531          { INTERFACE_PCIE_DEVICE },
2532          { INTERFACE_CONVENTIONAL_PCI_DEVICE },
2533          { }
2534      },
2535  };
2536  
2537  static void vmxnet3_register_types(void)
2538  {
2539      VMW_CBPRN("vmxnet3_register_types called...");
2540      type_register_static(&vmxnet3_info);
2541  }
2542  
2543  type_init(vmxnet3_register_types)
2544