1 /* 2 * QEMU VMWARE VMXNET3 paravirtual NIC 3 * 4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) 5 * 6 * Developed by Daynix Computing LTD (http://www.daynix.com) 7 * 8 * Authors: 9 * Dmitry Fleytman <dmitry@daynix.com> 10 * Tamir Shomer <tamirs@daynix.com> 11 * Yan Vugenfirer <yan@daynix.com> 12 * 13 * This work is licensed under the terms of the GNU GPL, version 2. 14 * See the COPYING file in the top-level directory. 15 * 16 */ 17 18 #include "qemu/osdep.h" 19 #include "hw/hw.h" 20 #include "hw/pci/pci.h" 21 #include "net/net.h" 22 #include "net/tap.h" 23 #include "net/checksum.h" 24 #include "sysemu/sysemu.h" 25 #include "qemu-common.h" 26 #include "qemu/bswap.h" 27 #include "hw/pci/msix.h" 28 #include "hw/pci/msi.h" 29 #include "migration/register.h" 30 31 #include "vmxnet3.h" 32 #include "vmxnet_debug.h" 33 #include "vmware_utils.h" 34 #include "net_tx_pkt.h" 35 #include "net_rx_pkt.h" 36 37 #define PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION 0x1 38 #define VMXNET3_MSIX_BAR_SIZE 0x2000 39 #define MIN_BUF_SIZE 60 40 41 /* Compatibility flags for migration */ 42 #define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT 0 43 #define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS \ 44 (1 << VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT) 45 #define VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT 1 46 #define VMXNET3_COMPAT_FLAG_DISABLE_PCIE \ 47 (1 << VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT) 48 49 #define VMXNET3_EXP_EP_OFFSET (0x48) 50 #define VMXNET3_MSI_OFFSET(s) \ 51 ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x50 : 0x84) 52 #define VMXNET3_MSIX_OFFSET(s) \ 53 ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0 : 0x9c) 54 #define VMXNET3_DSN_OFFSET (0x100) 55 56 #define VMXNET3_BAR0_IDX (0) 57 #define VMXNET3_BAR1_IDX (1) 58 #define VMXNET3_MSIX_BAR_IDX (2) 59 60 #define VMXNET3_OFF_MSIX_TABLE (0x000) 61 #define VMXNET3_OFF_MSIX_PBA(s) \ 62 ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x800 : 0x1000) 63 64 /* Link speed in Mbps should be shifted by 16 */ 65 #define VMXNET3_LINK_SPEED (1000 << 16) 66 67 /* Link status: 1 - up, 0 - down. */ 68 #define VMXNET3_LINK_STATUS_UP 0x1 69 70 /* Least significant bit should be set for revision and version */ 71 #define VMXNET3_UPT_REVISION 0x1 72 #define VMXNET3_DEVICE_REVISION 0x1 73 74 /* Number of interrupt vectors for non-MSIx modes */ 75 #define VMXNET3_MAX_NMSIX_INTRS (1) 76 77 /* Macros for rings descriptors access */ 78 #define VMXNET3_READ_TX_QUEUE_DESCR8(_d, dpa, field) \ 79 (vmw_shmem_ld8(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field))) 80 81 #define VMXNET3_WRITE_TX_QUEUE_DESCR8(_d, dpa, field, value) \ 82 (vmw_shmem_st8(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field, value))) 83 84 #define VMXNET3_READ_TX_QUEUE_DESCR32(_d, dpa, field) \ 85 (vmw_shmem_ld32(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field))) 86 87 #define VMXNET3_WRITE_TX_QUEUE_DESCR32(_d, dpa, field, value) \ 88 (vmw_shmem_st32(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field), value)) 89 90 #define VMXNET3_READ_TX_QUEUE_DESCR64(_d, dpa, field) \ 91 (vmw_shmem_ld64(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field))) 92 93 #define VMXNET3_WRITE_TX_QUEUE_DESCR64(_d, dpa, field, value) \ 94 (vmw_shmem_st64(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field), value)) 95 96 #define VMXNET3_READ_RX_QUEUE_DESCR64(_d, dpa, field) \ 97 (vmw_shmem_ld64(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field))) 98 99 #define VMXNET3_READ_RX_QUEUE_DESCR32(_d, dpa, field) \ 100 (vmw_shmem_ld32(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field))) 101 102 #define VMXNET3_WRITE_RX_QUEUE_DESCR64(_d, dpa, field, value) \ 103 (vmw_shmem_st64(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field), value)) 104 105 #define VMXNET3_WRITE_RX_QUEUE_DESCR8(_d, dpa, field, value) \ 106 (vmw_shmem_st8(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field), value)) 107 108 /* Macros for guest driver shared area access */ 109 #define VMXNET3_READ_DRV_SHARED64(_d, shpa, field) \ 110 (vmw_shmem_ld64(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field))) 111 112 #define VMXNET3_READ_DRV_SHARED32(_d, shpa, field) \ 113 (vmw_shmem_ld32(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field))) 114 115 #define VMXNET3_WRITE_DRV_SHARED32(_d, shpa, field, val) \ 116 (vmw_shmem_st32(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field), val)) 117 118 #define VMXNET3_READ_DRV_SHARED16(_d, shpa, field) \ 119 (vmw_shmem_ld16(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field))) 120 121 #define VMXNET3_READ_DRV_SHARED8(_d, shpa, field) \ 122 (vmw_shmem_ld8(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field))) 123 124 #define VMXNET3_READ_DRV_SHARED(_d, shpa, field, b, l) \ 125 (vmw_shmem_read(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field), b, l)) 126 127 #define VMXNET_FLAG_IS_SET(field, flag) (((field) & (flag)) == (flag)) 128 129 typedef struct VMXNET3Class { 130 PCIDeviceClass parent_class; 131 DeviceRealize parent_dc_realize; 132 } VMXNET3Class; 133 134 #define TYPE_VMXNET3 "vmxnet3" 135 #define VMXNET3(obj) OBJECT_CHECK(VMXNET3State, (obj), TYPE_VMXNET3) 136 137 #define VMXNET3_DEVICE_CLASS(klass) \ 138 OBJECT_CLASS_CHECK(VMXNET3Class, (klass), TYPE_VMXNET3) 139 #define VMXNET3_DEVICE_GET_CLASS(obj) \ 140 OBJECT_GET_CLASS(VMXNET3Class, (obj), TYPE_VMXNET3) 141 142 /* Cyclic ring abstraction */ 143 typedef struct { 144 hwaddr pa; 145 uint32_t size; 146 uint32_t cell_size; 147 uint32_t next; 148 uint8_t gen; 149 } Vmxnet3Ring; 150 151 static inline void vmxnet3_ring_init(PCIDevice *d, 152 Vmxnet3Ring *ring, 153 hwaddr pa, 154 uint32_t size, 155 uint32_t cell_size, 156 bool zero_region) 157 { 158 ring->pa = pa; 159 ring->size = size; 160 ring->cell_size = cell_size; 161 ring->gen = VMXNET3_INIT_GEN; 162 ring->next = 0; 163 164 if (zero_region) { 165 vmw_shmem_set(d, pa, 0, size * cell_size); 166 } 167 } 168 169 #define VMXNET3_RING_DUMP(macro, ring_name, ridx, r) \ 170 macro("%s#%d: base %" PRIx64 " size %u cell_size %u gen %d next %u", \ 171 (ring_name), (ridx), \ 172 (r)->pa, (r)->size, (r)->cell_size, (r)->gen, (r)->next) 173 174 static inline void vmxnet3_ring_inc(Vmxnet3Ring *ring) 175 { 176 if (++ring->next >= ring->size) { 177 ring->next = 0; 178 ring->gen ^= 1; 179 } 180 } 181 182 static inline void vmxnet3_ring_dec(Vmxnet3Ring *ring) 183 { 184 if (ring->next-- == 0) { 185 ring->next = ring->size - 1; 186 ring->gen ^= 1; 187 } 188 } 189 190 static inline hwaddr vmxnet3_ring_curr_cell_pa(Vmxnet3Ring *ring) 191 { 192 return ring->pa + ring->next * ring->cell_size; 193 } 194 195 static inline void vmxnet3_ring_read_curr_cell(PCIDevice *d, Vmxnet3Ring *ring, 196 void *buff) 197 { 198 vmw_shmem_read(d, vmxnet3_ring_curr_cell_pa(ring), buff, ring->cell_size); 199 } 200 201 static inline void vmxnet3_ring_write_curr_cell(PCIDevice *d, Vmxnet3Ring *ring, 202 void *buff) 203 { 204 vmw_shmem_write(d, vmxnet3_ring_curr_cell_pa(ring), buff, ring->cell_size); 205 } 206 207 static inline size_t vmxnet3_ring_curr_cell_idx(Vmxnet3Ring *ring) 208 { 209 return ring->next; 210 } 211 212 static inline uint8_t vmxnet3_ring_curr_gen(Vmxnet3Ring *ring) 213 { 214 return ring->gen; 215 } 216 217 /* Debug trace-related functions */ 218 static inline void 219 vmxnet3_dump_tx_descr(struct Vmxnet3_TxDesc *descr) 220 { 221 VMW_PKPRN("TX DESCR: " 222 "addr %" PRIx64 ", len: %d, gen: %d, rsvd: %d, " 223 "dtype: %d, ext1: %d, msscof: %d, hlen: %d, om: %d, " 224 "eop: %d, cq: %d, ext2: %d, ti: %d, tci: %d", 225 le64_to_cpu(descr->addr), descr->len, descr->gen, descr->rsvd, 226 descr->dtype, descr->ext1, descr->msscof, descr->hlen, descr->om, 227 descr->eop, descr->cq, descr->ext2, descr->ti, descr->tci); 228 } 229 230 static inline void 231 vmxnet3_dump_virt_hdr(struct virtio_net_hdr *vhdr) 232 { 233 VMW_PKPRN("VHDR: flags 0x%x, gso_type: 0x%x, hdr_len: %d, gso_size: %d, " 234 "csum_start: %d, csum_offset: %d", 235 vhdr->flags, vhdr->gso_type, vhdr->hdr_len, vhdr->gso_size, 236 vhdr->csum_start, vhdr->csum_offset); 237 } 238 239 static inline void 240 vmxnet3_dump_rx_descr(struct Vmxnet3_RxDesc *descr) 241 { 242 VMW_PKPRN("RX DESCR: addr %" PRIx64 ", len: %d, gen: %d, rsvd: %d, " 243 "dtype: %d, ext1: %d, btype: %d", 244 le64_to_cpu(descr->addr), descr->len, descr->gen, 245 descr->rsvd, descr->dtype, descr->ext1, descr->btype); 246 } 247 248 /* Device state and helper functions */ 249 #define VMXNET3_RX_RINGS_PER_QUEUE (2) 250 251 typedef struct { 252 Vmxnet3Ring tx_ring; 253 Vmxnet3Ring comp_ring; 254 255 uint8_t intr_idx; 256 hwaddr tx_stats_pa; 257 struct UPT1_TxStats txq_stats; 258 } Vmxnet3TxqDescr; 259 260 typedef struct { 261 Vmxnet3Ring rx_ring[VMXNET3_RX_RINGS_PER_QUEUE]; 262 Vmxnet3Ring comp_ring; 263 uint8_t intr_idx; 264 hwaddr rx_stats_pa; 265 struct UPT1_RxStats rxq_stats; 266 } Vmxnet3RxqDescr; 267 268 typedef struct { 269 bool is_masked; 270 bool is_pending; 271 bool is_asserted; 272 } Vmxnet3IntState; 273 274 typedef struct { 275 PCIDevice parent_obj; 276 NICState *nic; 277 NICConf conf; 278 MemoryRegion bar0; 279 MemoryRegion bar1; 280 MemoryRegion msix_bar; 281 282 Vmxnet3RxqDescr rxq_descr[VMXNET3_DEVICE_MAX_RX_QUEUES]; 283 Vmxnet3TxqDescr txq_descr[VMXNET3_DEVICE_MAX_TX_QUEUES]; 284 285 /* Whether MSI-X support was installed successfully */ 286 bool msix_used; 287 hwaddr drv_shmem; 288 hwaddr temp_shared_guest_driver_memory; 289 290 uint8_t txq_num; 291 292 /* This boolean tells whether RX packet being indicated has to */ 293 /* be split into head and body chunks from different RX rings */ 294 bool rx_packets_compound; 295 296 bool rx_vlan_stripping; 297 bool lro_supported; 298 299 uint8_t rxq_num; 300 301 /* Network MTU */ 302 uint32_t mtu; 303 304 /* Maximum number of fragments for indicated TX packets */ 305 uint32_t max_tx_frags; 306 307 /* Maximum number of fragments for indicated RX packets */ 308 uint16_t max_rx_frags; 309 310 /* Index for events interrupt */ 311 uint8_t event_int_idx; 312 313 /* Whether automatic interrupts masking enabled */ 314 bool auto_int_masking; 315 316 bool peer_has_vhdr; 317 318 /* TX packets to QEMU interface */ 319 struct NetTxPkt *tx_pkt; 320 uint32_t offload_mode; 321 uint32_t cso_or_gso_size; 322 uint16_t tci; 323 bool needs_vlan; 324 325 struct NetRxPkt *rx_pkt; 326 327 bool tx_sop; 328 bool skip_current_tx_pkt; 329 330 uint32_t device_active; 331 uint32_t last_command; 332 333 uint32_t link_status_and_speed; 334 335 Vmxnet3IntState interrupt_states[VMXNET3_MAX_INTRS]; 336 337 uint32_t temp_mac; /* To store the low part first */ 338 339 MACAddr perm_mac; 340 uint32_t vlan_table[VMXNET3_VFT_SIZE]; 341 uint32_t rx_mode; 342 MACAddr *mcast_list; 343 uint32_t mcast_list_len; 344 uint32_t mcast_list_buff_size; /* needed for live migration. */ 345 346 /* Compatibility flags for migration */ 347 uint32_t compat_flags; 348 } VMXNET3State; 349 350 /* Interrupt management */ 351 352 /* 353 * This function returns sign whether interrupt line is in asserted state 354 * This depends on the type of interrupt used. For INTX interrupt line will 355 * be asserted until explicit deassertion, for MSI(X) interrupt line will 356 * be deasserted automatically due to notification semantics of the MSI(X) 357 * interrupts 358 */ 359 static bool _vmxnet3_assert_interrupt_line(VMXNET3State *s, uint32_t int_idx) 360 { 361 PCIDevice *d = PCI_DEVICE(s); 362 363 if (s->msix_used && msix_enabled(d)) { 364 VMW_IRPRN("Sending MSI-X notification for vector %u", int_idx); 365 msix_notify(d, int_idx); 366 return false; 367 } 368 if (msi_enabled(d)) { 369 VMW_IRPRN("Sending MSI notification for vector %u", int_idx); 370 msi_notify(d, int_idx); 371 return false; 372 } 373 374 VMW_IRPRN("Asserting line for interrupt %u", int_idx); 375 pci_irq_assert(d); 376 return true; 377 } 378 379 static void _vmxnet3_deassert_interrupt_line(VMXNET3State *s, int lidx) 380 { 381 PCIDevice *d = PCI_DEVICE(s); 382 383 /* 384 * This function should never be called for MSI(X) interrupts 385 * because deassertion never required for message interrupts 386 */ 387 assert(!s->msix_used || !msix_enabled(d)); 388 /* 389 * This function should never be called for MSI(X) interrupts 390 * because deassertion never required for message interrupts 391 */ 392 assert(!msi_enabled(d)); 393 394 VMW_IRPRN("Deasserting line for interrupt %u", lidx); 395 pci_irq_deassert(d); 396 } 397 398 static void vmxnet3_update_interrupt_line_state(VMXNET3State *s, int lidx) 399 { 400 if (!s->interrupt_states[lidx].is_pending && 401 s->interrupt_states[lidx].is_asserted) { 402 VMW_IRPRN("New interrupt line state for index %d is DOWN", lidx); 403 _vmxnet3_deassert_interrupt_line(s, lidx); 404 s->interrupt_states[lidx].is_asserted = false; 405 return; 406 } 407 408 if (s->interrupt_states[lidx].is_pending && 409 !s->interrupt_states[lidx].is_masked && 410 !s->interrupt_states[lidx].is_asserted) { 411 VMW_IRPRN("New interrupt line state for index %d is UP", lidx); 412 s->interrupt_states[lidx].is_asserted = 413 _vmxnet3_assert_interrupt_line(s, lidx); 414 s->interrupt_states[lidx].is_pending = false; 415 return; 416 } 417 } 418 419 static void vmxnet3_trigger_interrupt(VMXNET3State *s, int lidx) 420 { 421 PCIDevice *d = PCI_DEVICE(s); 422 s->interrupt_states[lidx].is_pending = true; 423 vmxnet3_update_interrupt_line_state(s, lidx); 424 425 if (s->msix_used && msix_enabled(d) && s->auto_int_masking) { 426 goto do_automask; 427 } 428 429 if (msi_enabled(d) && s->auto_int_masking) { 430 goto do_automask; 431 } 432 433 return; 434 435 do_automask: 436 s->interrupt_states[lidx].is_masked = true; 437 vmxnet3_update_interrupt_line_state(s, lidx); 438 } 439 440 static bool vmxnet3_interrupt_asserted(VMXNET3State *s, int lidx) 441 { 442 return s->interrupt_states[lidx].is_asserted; 443 } 444 445 static void vmxnet3_clear_interrupt(VMXNET3State *s, int int_idx) 446 { 447 s->interrupt_states[int_idx].is_pending = false; 448 if (s->auto_int_masking) { 449 s->interrupt_states[int_idx].is_masked = true; 450 } 451 vmxnet3_update_interrupt_line_state(s, int_idx); 452 } 453 454 static void 455 vmxnet3_on_interrupt_mask_changed(VMXNET3State *s, int lidx, bool is_masked) 456 { 457 s->interrupt_states[lidx].is_masked = is_masked; 458 vmxnet3_update_interrupt_line_state(s, lidx); 459 } 460 461 static bool vmxnet3_verify_driver_magic(PCIDevice *d, hwaddr dshmem) 462 { 463 return (VMXNET3_READ_DRV_SHARED32(d, dshmem, magic) == VMXNET3_REV1_MAGIC); 464 } 465 466 #define VMXNET3_GET_BYTE(x, byte_num) (((x) >> (byte_num)*8) & 0xFF) 467 #define VMXNET3_MAKE_BYTE(byte_num, val) \ 468 (((uint32_t)((val) & 0xFF)) << (byte_num)*8) 469 470 static void vmxnet3_set_variable_mac(VMXNET3State *s, uint32_t h, uint32_t l) 471 { 472 s->conf.macaddr.a[0] = VMXNET3_GET_BYTE(l, 0); 473 s->conf.macaddr.a[1] = VMXNET3_GET_BYTE(l, 1); 474 s->conf.macaddr.a[2] = VMXNET3_GET_BYTE(l, 2); 475 s->conf.macaddr.a[3] = VMXNET3_GET_BYTE(l, 3); 476 s->conf.macaddr.a[4] = VMXNET3_GET_BYTE(h, 0); 477 s->conf.macaddr.a[5] = VMXNET3_GET_BYTE(h, 1); 478 479 VMW_CFPRN("Variable MAC: " MAC_FMT, MAC_ARG(s->conf.macaddr.a)); 480 481 qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a); 482 } 483 484 static uint64_t vmxnet3_get_mac_low(MACAddr *addr) 485 { 486 return VMXNET3_MAKE_BYTE(0, addr->a[0]) | 487 VMXNET3_MAKE_BYTE(1, addr->a[1]) | 488 VMXNET3_MAKE_BYTE(2, addr->a[2]) | 489 VMXNET3_MAKE_BYTE(3, addr->a[3]); 490 } 491 492 static uint64_t vmxnet3_get_mac_high(MACAddr *addr) 493 { 494 return VMXNET3_MAKE_BYTE(0, addr->a[4]) | 495 VMXNET3_MAKE_BYTE(1, addr->a[5]); 496 } 497 498 static void 499 vmxnet3_inc_tx_consumption_counter(VMXNET3State *s, int qidx) 500 { 501 vmxnet3_ring_inc(&s->txq_descr[qidx].tx_ring); 502 } 503 504 static inline void 505 vmxnet3_inc_rx_consumption_counter(VMXNET3State *s, int qidx, int ridx) 506 { 507 vmxnet3_ring_inc(&s->rxq_descr[qidx].rx_ring[ridx]); 508 } 509 510 static inline void 511 vmxnet3_inc_tx_completion_counter(VMXNET3State *s, int qidx) 512 { 513 vmxnet3_ring_inc(&s->txq_descr[qidx].comp_ring); 514 } 515 516 static void 517 vmxnet3_inc_rx_completion_counter(VMXNET3State *s, int qidx) 518 { 519 vmxnet3_ring_inc(&s->rxq_descr[qidx].comp_ring); 520 } 521 522 static void 523 vmxnet3_dec_rx_completion_counter(VMXNET3State *s, int qidx) 524 { 525 vmxnet3_ring_dec(&s->rxq_descr[qidx].comp_ring); 526 } 527 528 static void vmxnet3_complete_packet(VMXNET3State *s, int qidx, uint32_t tx_ridx) 529 { 530 struct Vmxnet3_TxCompDesc txcq_descr; 531 PCIDevice *d = PCI_DEVICE(s); 532 533 VMXNET3_RING_DUMP(VMW_RIPRN, "TXC", qidx, &s->txq_descr[qidx].comp_ring); 534 535 memset(&txcq_descr, 0, sizeof(txcq_descr)); 536 txcq_descr.txdIdx = tx_ridx; 537 txcq_descr.gen = vmxnet3_ring_curr_gen(&s->txq_descr[qidx].comp_ring); 538 539 vmxnet3_ring_write_curr_cell(d, &s->txq_descr[qidx].comp_ring, &txcq_descr); 540 541 /* Flush changes in TX descriptor before changing the counter value */ 542 smp_wmb(); 543 544 vmxnet3_inc_tx_completion_counter(s, qidx); 545 vmxnet3_trigger_interrupt(s, s->txq_descr[qidx].intr_idx); 546 } 547 548 static bool 549 vmxnet3_setup_tx_offloads(VMXNET3State *s) 550 { 551 switch (s->offload_mode) { 552 case VMXNET3_OM_NONE: 553 net_tx_pkt_build_vheader(s->tx_pkt, false, false, 0); 554 break; 555 556 case VMXNET3_OM_CSUM: 557 net_tx_pkt_build_vheader(s->tx_pkt, false, true, 0); 558 VMW_PKPRN("L4 CSO requested\n"); 559 break; 560 561 case VMXNET3_OM_TSO: 562 net_tx_pkt_build_vheader(s->tx_pkt, true, true, 563 s->cso_or_gso_size); 564 net_tx_pkt_update_ip_checksums(s->tx_pkt); 565 VMW_PKPRN("GSO offload requested."); 566 break; 567 568 default: 569 g_assert_not_reached(); 570 return false; 571 } 572 573 return true; 574 } 575 576 static void 577 vmxnet3_tx_retrieve_metadata(VMXNET3State *s, 578 const struct Vmxnet3_TxDesc *txd) 579 { 580 s->offload_mode = txd->om; 581 s->cso_or_gso_size = txd->msscof; 582 s->tci = txd->tci; 583 s->needs_vlan = txd->ti; 584 } 585 586 typedef enum { 587 VMXNET3_PKT_STATUS_OK, 588 VMXNET3_PKT_STATUS_ERROR, 589 VMXNET3_PKT_STATUS_DISCARD,/* only for tx */ 590 VMXNET3_PKT_STATUS_OUT_OF_BUF /* only for rx */ 591 } Vmxnet3PktStatus; 592 593 static void 594 vmxnet3_on_tx_done_update_stats(VMXNET3State *s, int qidx, 595 Vmxnet3PktStatus status) 596 { 597 size_t tot_len = net_tx_pkt_get_total_len(s->tx_pkt); 598 struct UPT1_TxStats *stats = &s->txq_descr[qidx].txq_stats; 599 600 switch (status) { 601 case VMXNET3_PKT_STATUS_OK: 602 switch (net_tx_pkt_get_packet_type(s->tx_pkt)) { 603 case ETH_PKT_BCAST: 604 stats->bcastPktsTxOK++; 605 stats->bcastBytesTxOK += tot_len; 606 break; 607 case ETH_PKT_MCAST: 608 stats->mcastPktsTxOK++; 609 stats->mcastBytesTxOK += tot_len; 610 break; 611 case ETH_PKT_UCAST: 612 stats->ucastPktsTxOK++; 613 stats->ucastBytesTxOK += tot_len; 614 break; 615 default: 616 g_assert_not_reached(); 617 } 618 619 if (s->offload_mode == VMXNET3_OM_TSO) { 620 /* 621 * According to VMWARE headers this statistic is a number 622 * of packets after segmentation but since we don't have 623 * this information in QEMU model, the best we can do is to 624 * provide number of non-segmented packets 625 */ 626 stats->TSOPktsTxOK++; 627 stats->TSOBytesTxOK += tot_len; 628 } 629 break; 630 631 case VMXNET3_PKT_STATUS_DISCARD: 632 stats->pktsTxDiscard++; 633 break; 634 635 case VMXNET3_PKT_STATUS_ERROR: 636 stats->pktsTxError++; 637 break; 638 639 default: 640 g_assert_not_reached(); 641 } 642 } 643 644 static void 645 vmxnet3_on_rx_done_update_stats(VMXNET3State *s, 646 int qidx, 647 Vmxnet3PktStatus status) 648 { 649 struct UPT1_RxStats *stats = &s->rxq_descr[qidx].rxq_stats; 650 size_t tot_len = net_rx_pkt_get_total_len(s->rx_pkt); 651 652 switch (status) { 653 case VMXNET3_PKT_STATUS_OUT_OF_BUF: 654 stats->pktsRxOutOfBuf++; 655 break; 656 657 case VMXNET3_PKT_STATUS_ERROR: 658 stats->pktsRxError++; 659 break; 660 case VMXNET3_PKT_STATUS_OK: 661 switch (net_rx_pkt_get_packet_type(s->rx_pkt)) { 662 case ETH_PKT_BCAST: 663 stats->bcastPktsRxOK++; 664 stats->bcastBytesRxOK += tot_len; 665 break; 666 case ETH_PKT_MCAST: 667 stats->mcastPktsRxOK++; 668 stats->mcastBytesRxOK += tot_len; 669 break; 670 case ETH_PKT_UCAST: 671 stats->ucastPktsRxOK++; 672 stats->ucastBytesRxOK += tot_len; 673 break; 674 default: 675 g_assert_not_reached(); 676 } 677 678 if (tot_len > s->mtu) { 679 stats->LROPktsRxOK++; 680 stats->LROBytesRxOK += tot_len; 681 } 682 break; 683 default: 684 g_assert_not_reached(); 685 } 686 } 687 688 static inline bool 689 vmxnet3_pop_next_tx_descr(VMXNET3State *s, 690 int qidx, 691 struct Vmxnet3_TxDesc *txd, 692 uint32_t *descr_idx) 693 { 694 Vmxnet3Ring *ring = &s->txq_descr[qidx].tx_ring; 695 PCIDevice *d = PCI_DEVICE(s); 696 697 vmxnet3_ring_read_curr_cell(d, ring, txd); 698 if (txd->gen == vmxnet3_ring_curr_gen(ring)) { 699 /* Only read after generation field verification */ 700 smp_rmb(); 701 /* Re-read to be sure we got the latest version */ 702 vmxnet3_ring_read_curr_cell(d, ring, txd); 703 VMXNET3_RING_DUMP(VMW_RIPRN, "TX", qidx, ring); 704 *descr_idx = vmxnet3_ring_curr_cell_idx(ring); 705 vmxnet3_inc_tx_consumption_counter(s, qidx); 706 return true; 707 } 708 709 return false; 710 } 711 712 static bool 713 vmxnet3_send_packet(VMXNET3State *s, uint32_t qidx) 714 { 715 Vmxnet3PktStatus status = VMXNET3_PKT_STATUS_OK; 716 717 if (!vmxnet3_setup_tx_offloads(s)) { 718 status = VMXNET3_PKT_STATUS_ERROR; 719 goto func_exit; 720 } 721 722 /* debug prints */ 723 vmxnet3_dump_virt_hdr(net_tx_pkt_get_vhdr(s->tx_pkt)); 724 net_tx_pkt_dump(s->tx_pkt); 725 726 if (!net_tx_pkt_send(s->tx_pkt, qemu_get_queue(s->nic))) { 727 status = VMXNET3_PKT_STATUS_DISCARD; 728 goto func_exit; 729 } 730 731 func_exit: 732 vmxnet3_on_tx_done_update_stats(s, qidx, status); 733 return (status == VMXNET3_PKT_STATUS_OK); 734 } 735 736 static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx) 737 { 738 struct Vmxnet3_TxDesc txd; 739 uint32_t txd_idx; 740 uint32_t data_len; 741 hwaddr data_pa; 742 743 for (;;) { 744 if (!vmxnet3_pop_next_tx_descr(s, qidx, &txd, &txd_idx)) { 745 break; 746 } 747 748 vmxnet3_dump_tx_descr(&txd); 749 750 if (!s->skip_current_tx_pkt) { 751 data_len = (txd.len > 0) ? txd.len : VMXNET3_MAX_TX_BUF_SIZE; 752 data_pa = le64_to_cpu(txd.addr); 753 754 if (!net_tx_pkt_add_raw_fragment(s->tx_pkt, 755 data_pa, 756 data_len)) { 757 s->skip_current_tx_pkt = true; 758 } 759 } 760 761 if (s->tx_sop) { 762 vmxnet3_tx_retrieve_metadata(s, &txd); 763 s->tx_sop = false; 764 } 765 766 if (txd.eop) { 767 if (!s->skip_current_tx_pkt && net_tx_pkt_parse(s->tx_pkt)) { 768 if (s->needs_vlan) { 769 net_tx_pkt_setup_vlan_header(s->tx_pkt, s->tci); 770 } 771 772 vmxnet3_send_packet(s, qidx); 773 } else { 774 vmxnet3_on_tx_done_update_stats(s, qidx, 775 VMXNET3_PKT_STATUS_ERROR); 776 } 777 778 vmxnet3_complete_packet(s, qidx, txd_idx); 779 s->tx_sop = true; 780 s->skip_current_tx_pkt = false; 781 net_tx_pkt_reset(s->tx_pkt); 782 } 783 } 784 } 785 786 static inline void 787 vmxnet3_read_next_rx_descr(VMXNET3State *s, int qidx, int ridx, 788 struct Vmxnet3_RxDesc *dbuf, uint32_t *didx) 789 { 790 PCIDevice *d = PCI_DEVICE(s); 791 792 Vmxnet3Ring *ring = &s->rxq_descr[qidx].rx_ring[ridx]; 793 *didx = vmxnet3_ring_curr_cell_idx(ring); 794 vmxnet3_ring_read_curr_cell(d, ring, dbuf); 795 } 796 797 static inline uint8_t 798 vmxnet3_get_rx_ring_gen(VMXNET3State *s, int qidx, int ridx) 799 { 800 return s->rxq_descr[qidx].rx_ring[ridx].gen; 801 } 802 803 static inline hwaddr 804 vmxnet3_pop_rxc_descr(VMXNET3State *s, int qidx, uint32_t *descr_gen) 805 { 806 uint8_t ring_gen; 807 struct Vmxnet3_RxCompDesc rxcd; 808 809 hwaddr daddr = 810 vmxnet3_ring_curr_cell_pa(&s->rxq_descr[qidx].comp_ring); 811 812 pci_dma_read(PCI_DEVICE(s), 813 daddr, &rxcd, sizeof(struct Vmxnet3_RxCompDesc)); 814 ring_gen = vmxnet3_ring_curr_gen(&s->rxq_descr[qidx].comp_ring); 815 816 if (rxcd.gen != ring_gen) { 817 *descr_gen = ring_gen; 818 vmxnet3_inc_rx_completion_counter(s, qidx); 819 return daddr; 820 } 821 822 return 0; 823 } 824 825 static inline void 826 vmxnet3_revert_rxc_descr(VMXNET3State *s, int qidx) 827 { 828 vmxnet3_dec_rx_completion_counter(s, qidx); 829 } 830 831 #define RXQ_IDX (0) 832 #define RX_HEAD_BODY_RING (0) 833 #define RX_BODY_ONLY_RING (1) 834 835 static bool 836 vmxnet3_get_next_head_rx_descr(VMXNET3State *s, 837 struct Vmxnet3_RxDesc *descr_buf, 838 uint32_t *descr_idx, 839 uint32_t *ridx) 840 { 841 for (;;) { 842 uint32_t ring_gen; 843 vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, 844 descr_buf, descr_idx); 845 846 /* If no more free descriptors - return */ 847 ring_gen = vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_HEAD_BODY_RING); 848 if (descr_buf->gen != ring_gen) { 849 return false; 850 } 851 852 /* Only read after generation field verification */ 853 smp_rmb(); 854 /* Re-read to be sure we got the latest version */ 855 vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, 856 descr_buf, descr_idx); 857 858 /* Mark current descriptor as used/skipped */ 859 vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_HEAD_BODY_RING); 860 861 /* If this is what we are looking for - return */ 862 if (descr_buf->btype == VMXNET3_RXD_BTYPE_HEAD) { 863 *ridx = RX_HEAD_BODY_RING; 864 return true; 865 } 866 } 867 } 868 869 static bool 870 vmxnet3_get_next_body_rx_descr(VMXNET3State *s, 871 struct Vmxnet3_RxDesc *d, 872 uint32_t *didx, 873 uint32_t *ridx) 874 { 875 vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, d, didx); 876 877 /* Try to find corresponding descriptor in head/body ring */ 878 if (d->gen == vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_HEAD_BODY_RING)) { 879 /* Only read after generation field verification */ 880 smp_rmb(); 881 /* Re-read to be sure we got the latest version */ 882 vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, d, didx); 883 if (d->btype == VMXNET3_RXD_BTYPE_BODY) { 884 vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_HEAD_BODY_RING); 885 *ridx = RX_HEAD_BODY_RING; 886 return true; 887 } 888 } 889 890 /* 891 * If there is no free descriptors on head/body ring or next free 892 * descriptor is a head descriptor switch to body only ring 893 */ 894 vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_BODY_ONLY_RING, d, didx); 895 896 /* If no more free descriptors - return */ 897 if (d->gen == vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_BODY_ONLY_RING)) { 898 /* Only read after generation field verification */ 899 smp_rmb(); 900 /* Re-read to be sure we got the latest version */ 901 vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_BODY_ONLY_RING, d, didx); 902 assert(d->btype == VMXNET3_RXD_BTYPE_BODY); 903 *ridx = RX_BODY_ONLY_RING; 904 vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_BODY_ONLY_RING); 905 return true; 906 } 907 908 return false; 909 } 910 911 static inline bool 912 vmxnet3_get_next_rx_descr(VMXNET3State *s, bool is_head, 913 struct Vmxnet3_RxDesc *descr_buf, 914 uint32_t *descr_idx, 915 uint32_t *ridx) 916 { 917 if (is_head || !s->rx_packets_compound) { 918 return vmxnet3_get_next_head_rx_descr(s, descr_buf, descr_idx, ridx); 919 } else { 920 return vmxnet3_get_next_body_rx_descr(s, descr_buf, descr_idx, ridx); 921 } 922 } 923 924 /* In case packet was csum offloaded (either NEEDS_CSUM or DATA_VALID), 925 * the implementation always passes an RxCompDesc with a "Checksum 926 * calculated and found correct" to the OS (cnc=0 and tuc=1, see 927 * vmxnet3_rx_update_descr). This emulates the observed ESXi behavior. 928 * 929 * Therefore, if packet has the NEEDS_CSUM set, we must calculate 930 * and place a fully computed checksum into the tcp/udp header. 931 * Otherwise, the OS driver will receive a checksum-correct indication 932 * (CHECKSUM_UNNECESSARY), but with the actual tcp/udp checksum field 933 * having just the pseudo header csum value. 934 * 935 * While this is not a problem if packet is destined for local delivery, 936 * in the case the host OS performs forwarding, it will forward an 937 * incorrectly checksummed packet. 938 */ 939 static void vmxnet3_rx_need_csum_calculate(struct NetRxPkt *pkt, 940 const void *pkt_data, 941 size_t pkt_len) 942 { 943 struct virtio_net_hdr *vhdr; 944 bool isip4, isip6, istcp, isudp; 945 uint8_t *data; 946 int len; 947 948 if (!net_rx_pkt_has_virt_hdr(pkt)) { 949 return; 950 } 951 952 vhdr = net_rx_pkt_get_vhdr(pkt); 953 if (!VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM)) { 954 return; 955 } 956 957 net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); 958 if (!(isip4 || isip6) || !(istcp || isudp)) { 959 return; 960 } 961 962 vmxnet3_dump_virt_hdr(vhdr); 963 964 /* Validate packet len: csum_start + scum_offset + length of csum field */ 965 if (pkt_len < (vhdr->csum_start + vhdr->csum_offset + 2)) { 966 VMW_PKPRN("packet len:%zu < csum_start(%d) + csum_offset(%d) + 2, " 967 "cannot calculate checksum", 968 pkt_len, vhdr->csum_start, vhdr->csum_offset); 969 return; 970 } 971 972 data = (uint8_t *)pkt_data + vhdr->csum_start; 973 len = pkt_len - vhdr->csum_start; 974 /* Put the checksum obtained into the packet */ 975 stw_be_p(data + vhdr->csum_offset, net_raw_checksum(data, len)); 976 977 vhdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM; 978 vhdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID; 979 } 980 981 static void vmxnet3_rx_update_descr(struct NetRxPkt *pkt, 982 struct Vmxnet3_RxCompDesc *rxcd) 983 { 984 int csum_ok, is_gso; 985 bool isip4, isip6, istcp, isudp; 986 struct virtio_net_hdr *vhdr; 987 uint8_t offload_type; 988 989 if (net_rx_pkt_is_vlan_stripped(pkt)) { 990 rxcd->ts = 1; 991 rxcd->tci = net_rx_pkt_get_vlan_tag(pkt); 992 } 993 994 if (!net_rx_pkt_has_virt_hdr(pkt)) { 995 goto nocsum; 996 } 997 998 vhdr = net_rx_pkt_get_vhdr(pkt); 999 /* 1000 * Checksum is valid when lower level tell so or when lower level 1001 * requires checksum offload telling that packet produced/bridged 1002 * locally and did travel over network after last checksum calculation 1003 * or production 1004 */ 1005 csum_ok = VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_DATA_VALID) || 1006 VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM); 1007 1008 offload_type = vhdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN; 1009 is_gso = (offload_type != VIRTIO_NET_HDR_GSO_NONE) ? 1 : 0; 1010 1011 if (!csum_ok && !is_gso) { 1012 goto nocsum; 1013 } 1014 1015 net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); 1016 if ((!istcp && !isudp) || (!isip4 && !isip6)) { 1017 goto nocsum; 1018 } 1019 1020 rxcd->cnc = 0; 1021 rxcd->v4 = isip4 ? 1 : 0; 1022 rxcd->v6 = isip6 ? 1 : 0; 1023 rxcd->tcp = istcp ? 1 : 0; 1024 rxcd->udp = isudp ? 1 : 0; 1025 rxcd->fcs = rxcd->tuc = rxcd->ipc = 1; 1026 return; 1027 1028 nocsum: 1029 rxcd->cnc = 1; 1030 return; 1031 } 1032 1033 static void 1034 vmxnet3_pci_dma_writev(PCIDevice *pci_dev, 1035 const struct iovec *iov, 1036 size_t start_iov_off, 1037 hwaddr target_addr, 1038 size_t bytes_to_copy) 1039 { 1040 size_t curr_off = 0; 1041 size_t copied = 0; 1042 1043 while (bytes_to_copy) { 1044 if (start_iov_off < (curr_off + iov->iov_len)) { 1045 size_t chunk_len = 1046 MIN((curr_off + iov->iov_len) - start_iov_off, bytes_to_copy); 1047 1048 pci_dma_write(pci_dev, target_addr + copied, 1049 iov->iov_base + start_iov_off - curr_off, 1050 chunk_len); 1051 1052 copied += chunk_len; 1053 start_iov_off += chunk_len; 1054 curr_off = start_iov_off; 1055 bytes_to_copy -= chunk_len; 1056 } else { 1057 curr_off += iov->iov_len; 1058 } 1059 iov++; 1060 } 1061 } 1062 1063 static bool 1064 vmxnet3_indicate_packet(VMXNET3State *s) 1065 { 1066 struct Vmxnet3_RxDesc rxd; 1067 PCIDevice *d = PCI_DEVICE(s); 1068 bool is_head = true; 1069 uint32_t rxd_idx; 1070 uint32_t rx_ridx = 0; 1071 1072 struct Vmxnet3_RxCompDesc rxcd; 1073 uint32_t new_rxcd_gen = VMXNET3_INIT_GEN; 1074 hwaddr new_rxcd_pa = 0; 1075 hwaddr ready_rxcd_pa = 0; 1076 struct iovec *data = net_rx_pkt_get_iovec(s->rx_pkt); 1077 size_t bytes_copied = 0; 1078 size_t bytes_left = net_rx_pkt_get_total_len(s->rx_pkt); 1079 uint16_t num_frags = 0; 1080 size_t chunk_size; 1081 1082 net_rx_pkt_dump(s->rx_pkt); 1083 1084 while (bytes_left > 0) { 1085 1086 /* cannot add more frags to packet */ 1087 if (num_frags == s->max_rx_frags) { 1088 break; 1089 } 1090 1091 new_rxcd_pa = vmxnet3_pop_rxc_descr(s, RXQ_IDX, &new_rxcd_gen); 1092 if (!new_rxcd_pa) { 1093 break; 1094 } 1095 1096 if (!vmxnet3_get_next_rx_descr(s, is_head, &rxd, &rxd_idx, &rx_ridx)) { 1097 break; 1098 } 1099 1100 chunk_size = MIN(bytes_left, rxd.len); 1101 vmxnet3_pci_dma_writev(d, data, bytes_copied, 1102 le64_to_cpu(rxd.addr), chunk_size); 1103 bytes_copied += chunk_size; 1104 bytes_left -= chunk_size; 1105 1106 vmxnet3_dump_rx_descr(&rxd); 1107 1108 if (ready_rxcd_pa != 0) { 1109 pci_dma_write(d, ready_rxcd_pa, &rxcd, sizeof(rxcd)); 1110 } 1111 1112 memset(&rxcd, 0, sizeof(struct Vmxnet3_RxCompDesc)); 1113 rxcd.rxdIdx = rxd_idx; 1114 rxcd.len = chunk_size; 1115 rxcd.sop = is_head; 1116 rxcd.gen = new_rxcd_gen; 1117 rxcd.rqID = RXQ_IDX + rx_ridx * s->rxq_num; 1118 1119 if (bytes_left == 0) { 1120 vmxnet3_rx_update_descr(s->rx_pkt, &rxcd); 1121 } 1122 1123 VMW_RIPRN("RX Completion descriptor: rxRing: %lu rxIdx %lu len %lu " 1124 "sop %d csum_correct %lu", 1125 (unsigned long) rx_ridx, 1126 (unsigned long) rxcd.rxdIdx, 1127 (unsigned long) rxcd.len, 1128 (int) rxcd.sop, 1129 (unsigned long) rxcd.tuc); 1130 1131 is_head = false; 1132 ready_rxcd_pa = new_rxcd_pa; 1133 new_rxcd_pa = 0; 1134 num_frags++; 1135 } 1136 1137 if (ready_rxcd_pa != 0) { 1138 rxcd.eop = 1; 1139 rxcd.err = (bytes_left != 0); 1140 1141 pci_dma_write(d, ready_rxcd_pa, &rxcd, sizeof(rxcd)); 1142 1143 /* Flush RX descriptor changes */ 1144 smp_wmb(); 1145 } 1146 1147 if (new_rxcd_pa != 0) { 1148 vmxnet3_revert_rxc_descr(s, RXQ_IDX); 1149 } 1150 1151 vmxnet3_trigger_interrupt(s, s->rxq_descr[RXQ_IDX].intr_idx); 1152 1153 if (bytes_left == 0) { 1154 vmxnet3_on_rx_done_update_stats(s, RXQ_IDX, VMXNET3_PKT_STATUS_OK); 1155 return true; 1156 } else if (num_frags == s->max_rx_frags) { 1157 vmxnet3_on_rx_done_update_stats(s, RXQ_IDX, VMXNET3_PKT_STATUS_ERROR); 1158 return false; 1159 } else { 1160 vmxnet3_on_rx_done_update_stats(s, RXQ_IDX, 1161 VMXNET3_PKT_STATUS_OUT_OF_BUF); 1162 return false; 1163 } 1164 } 1165 1166 static void 1167 vmxnet3_io_bar0_write(void *opaque, hwaddr addr, 1168 uint64_t val, unsigned size) 1169 { 1170 VMXNET3State *s = opaque; 1171 1172 if (!s->device_active) { 1173 return; 1174 } 1175 1176 if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_TXPROD, 1177 VMXNET3_DEVICE_MAX_TX_QUEUES, VMXNET3_REG_ALIGN)) { 1178 int tx_queue_idx = 1179 VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_TXPROD, 1180 VMXNET3_REG_ALIGN); 1181 assert(tx_queue_idx <= s->txq_num); 1182 vmxnet3_process_tx_queue(s, tx_queue_idx); 1183 return; 1184 } 1185 1186 if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_IMR, 1187 VMXNET3_MAX_INTRS, VMXNET3_REG_ALIGN)) { 1188 int l = VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_IMR, 1189 VMXNET3_REG_ALIGN); 1190 1191 VMW_CBPRN("Interrupt mask for line %d written: 0x%" PRIx64, l, val); 1192 1193 vmxnet3_on_interrupt_mask_changed(s, l, val); 1194 return; 1195 } 1196 1197 if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_RXPROD, 1198 VMXNET3_DEVICE_MAX_RX_QUEUES, VMXNET3_REG_ALIGN) || 1199 VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_RXPROD2, 1200 VMXNET3_DEVICE_MAX_RX_QUEUES, VMXNET3_REG_ALIGN)) { 1201 return; 1202 } 1203 1204 VMW_WRPRN("BAR0 unknown write [%" PRIx64 "] = %" PRIx64 ", size %d", 1205 (uint64_t) addr, val, size); 1206 } 1207 1208 static uint64_t 1209 vmxnet3_io_bar0_read(void *opaque, hwaddr addr, unsigned size) 1210 { 1211 VMXNET3State *s = opaque; 1212 1213 if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_IMR, 1214 VMXNET3_MAX_INTRS, VMXNET3_REG_ALIGN)) { 1215 int l = VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_IMR, 1216 VMXNET3_REG_ALIGN); 1217 return s->interrupt_states[l].is_masked; 1218 } 1219 1220 VMW_CBPRN("BAR0 unknown read [%" PRIx64 "], size %d", addr, size); 1221 return 0; 1222 } 1223 1224 static void vmxnet3_reset_interrupt_states(VMXNET3State *s) 1225 { 1226 int i; 1227 for (i = 0; i < ARRAY_SIZE(s->interrupt_states); i++) { 1228 s->interrupt_states[i].is_asserted = false; 1229 s->interrupt_states[i].is_pending = false; 1230 s->interrupt_states[i].is_masked = true; 1231 } 1232 } 1233 1234 static void vmxnet3_reset_mac(VMXNET3State *s) 1235 { 1236 memcpy(&s->conf.macaddr.a, &s->perm_mac.a, sizeof(s->perm_mac.a)); 1237 VMW_CFPRN("MAC address set to: " MAC_FMT, MAC_ARG(s->conf.macaddr.a)); 1238 } 1239 1240 static void vmxnet3_deactivate_device(VMXNET3State *s) 1241 { 1242 if (s->device_active) { 1243 VMW_CBPRN("Deactivating vmxnet3..."); 1244 net_tx_pkt_reset(s->tx_pkt); 1245 net_tx_pkt_uninit(s->tx_pkt); 1246 net_rx_pkt_uninit(s->rx_pkt); 1247 s->device_active = false; 1248 } 1249 } 1250 1251 static void vmxnet3_reset(VMXNET3State *s) 1252 { 1253 VMW_CBPRN("Resetting vmxnet3..."); 1254 1255 vmxnet3_deactivate_device(s); 1256 vmxnet3_reset_interrupt_states(s); 1257 s->drv_shmem = 0; 1258 s->tx_sop = true; 1259 s->skip_current_tx_pkt = false; 1260 } 1261 1262 static void vmxnet3_update_rx_mode(VMXNET3State *s) 1263 { 1264 PCIDevice *d = PCI_DEVICE(s); 1265 1266 s->rx_mode = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, 1267 devRead.rxFilterConf.rxMode); 1268 VMW_CFPRN("RX mode: 0x%08X", s->rx_mode); 1269 } 1270 1271 static void vmxnet3_update_vlan_filters(VMXNET3State *s) 1272 { 1273 int i; 1274 PCIDevice *d = PCI_DEVICE(s); 1275 1276 /* Copy configuration from shared memory */ 1277 VMXNET3_READ_DRV_SHARED(d, s->drv_shmem, 1278 devRead.rxFilterConf.vfTable, 1279 s->vlan_table, 1280 sizeof(s->vlan_table)); 1281 1282 /* Invert byte order when needed */ 1283 for (i = 0; i < ARRAY_SIZE(s->vlan_table); i++) { 1284 s->vlan_table[i] = le32_to_cpu(s->vlan_table[i]); 1285 } 1286 1287 /* Dump configuration for debugging purposes */ 1288 VMW_CFPRN("Configured VLANs:"); 1289 for (i = 0; i < sizeof(s->vlan_table) * 8; i++) { 1290 if (VMXNET3_VFTABLE_ENTRY_IS_SET(s->vlan_table, i)) { 1291 VMW_CFPRN("\tVLAN %d is present", i); 1292 } 1293 } 1294 } 1295 1296 static void vmxnet3_update_mcast_filters(VMXNET3State *s) 1297 { 1298 PCIDevice *d = PCI_DEVICE(s); 1299 1300 uint16_t list_bytes = 1301 VMXNET3_READ_DRV_SHARED16(d, s->drv_shmem, 1302 devRead.rxFilterConf.mfTableLen); 1303 1304 s->mcast_list_len = list_bytes / sizeof(s->mcast_list[0]); 1305 1306 s->mcast_list = g_realloc(s->mcast_list, list_bytes); 1307 if (!s->mcast_list) { 1308 if (s->mcast_list_len == 0) { 1309 VMW_CFPRN("Current multicast list is empty"); 1310 } else { 1311 VMW_ERPRN("Failed to allocate multicast list of %d elements", 1312 s->mcast_list_len); 1313 } 1314 s->mcast_list_len = 0; 1315 } else { 1316 int i; 1317 hwaddr mcast_list_pa = 1318 VMXNET3_READ_DRV_SHARED64(d, s->drv_shmem, 1319 devRead.rxFilterConf.mfTablePA); 1320 1321 pci_dma_read(d, mcast_list_pa, s->mcast_list, list_bytes); 1322 1323 VMW_CFPRN("Current multicast list len is %d:", s->mcast_list_len); 1324 for (i = 0; i < s->mcast_list_len; i++) { 1325 VMW_CFPRN("\t" MAC_FMT, MAC_ARG(s->mcast_list[i].a)); 1326 } 1327 } 1328 } 1329 1330 static void vmxnet3_setup_rx_filtering(VMXNET3State *s) 1331 { 1332 vmxnet3_update_rx_mode(s); 1333 vmxnet3_update_vlan_filters(s); 1334 vmxnet3_update_mcast_filters(s); 1335 } 1336 1337 static uint32_t vmxnet3_get_interrupt_config(VMXNET3State *s) 1338 { 1339 uint32_t interrupt_mode = VMXNET3_IT_AUTO | (VMXNET3_IMM_AUTO << 2); 1340 VMW_CFPRN("Interrupt config is 0x%X", interrupt_mode); 1341 return interrupt_mode; 1342 } 1343 1344 static void vmxnet3_fill_stats(VMXNET3State *s) 1345 { 1346 int i; 1347 PCIDevice *d = PCI_DEVICE(s); 1348 1349 if (!s->device_active) 1350 return; 1351 1352 for (i = 0; i < s->txq_num; i++) { 1353 pci_dma_write(d, 1354 s->txq_descr[i].tx_stats_pa, 1355 &s->txq_descr[i].txq_stats, 1356 sizeof(s->txq_descr[i].txq_stats)); 1357 } 1358 1359 for (i = 0; i < s->rxq_num; i++) { 1360 pci_dma_write(d, 1361 s->rxq_descr[i].rx_stats_pa, 1362 &s->rxq_descr[i].rxq_stats, 1363 sizeof(s->rxq_descr[i].rxq_stats)); 1364 } 1365 } 1366 1367 static void vmxnet3_adjust_by_guest_type(VMXNET3State *s) 1368 { 1369 struct Vmxnet3_GOSInfo gos; 1370 PCIDevice *d = PCI_DEVICE(s); 1371 1372 VMXNET3_READ_DRV_SHARED(d, s->drv_shmem, devRead.misc.driverInfo.gos, 1373 &gos, sizeof(gos)); 1374 s->rx_packets_compound = 1375 (gos.gosType == VMXNET3_GOS_TYPE_WIN) ? false : true; 1376 1377 VMW_CFPRN("Guest type specifics: RXCOMPOUND: %d", s->rx_packets_compound); 1378 } 1379 1380 static void 1381 vmxnet3_dump_conf_descr(const char *name, 1382 struct Vmxnet3_VariableLenConfDesc *pm_descr) 1383 { 1384 VMW_CFPRN("%s descriptor dump: Version %u, Length %u", 1385 name, pm_descr->confVer, pm_descr->confLen); 1386 1387 }; 1388 1389 static void vmxnet3_update_pm_state(VMXNET3State *s) 1390 { 1391 struct Vmxnet3_VariableLenConfDesc pm_descr; 1392 PCIDevice *d = PCI_DEVICE(s); 1393 1394 pm_descr.confLen = 1395 VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.pmConfDesc.confLen); 1396 pm_descr.confVer = 1397 VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.pmConfDesc.confVer); 1398 pm_descr.confPA = 1399 VMXNET3_READ_DRV_SHARED64(d, s->drv_shmem, devRead.pmConfDesc.confPA); 1400 1401 vmxnet3_dump_conf_descr("PM State", &pm_descr); 1402 } 1403 1404 static void vmxnet3_update_features(VMXNET3State *s) 1405 { 1406 uint32_t guest_features; 1407 int rxcso_supported; 1408 PCIDevice *d = PCI_DEVICE(s); 1409 1410 guest_features = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, 1411 devRead.misc.uptFeatures); 1412 1413 rxcso_supported = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_RXCSUM); 1414 s->rx_vlan_stripping = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_RXVLAN); 1415 s->lro_supported = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_LRO); 1416 1417 VMW_CFPRN("Features configuration: LRO: %d, RXCSUM: %d, VLANSTRIP: %d", 1418 s->lro_supported, rxcso_supported, 1419 s->rx_vlan_stripping); 1420 if (s->peer_has_vhdr) { 1421 qemu_set_offload(qemu_get_queue(s->nic)->peer, 1422 rxcso_supported, 1423 s->lro_supported, 1424 s->lro_supported, 1425 0, 1426 0); 1427 } 1428 } 1429 1430 static bool vmxnet3_verify_intx(VMXNET3State *s, int intx) 1431 { 1432 return s->msix_used || msi_enabled(PCI_DEVICE(s)) 1433 || intx == pci_get_byte(s->parent_obj.config + PCI_INTERRUPT_PIN) - 1; 1434 } 1435 1436 static void vmxnet3_validate_interrupt_idx(bool is_msix, int idx) 1437 { 1438 int max_ints = is_msix ? VMXNET3_MAX_INTRS : VMXNET3_MAX_NMSIX_INTRS; 1439 if (idx >= max_ints) { 1440 hw_error("Bad interrupt index: %d\n", idx); 1441 } 1442 } 1443 1444 static void vmxnet3_validate_interrupts(VMXNET3State *s) 1445 { 1446 int i; 1447 1448 VMW_CFPRN("Verifying event interrupt index (%d)", s->event_int_idx); 1449 vmxnet3_validate_interrupt_idx(s->msix_used, s->event_int_idx); 1450 1451 for (i = 0; i < s->txq_num; i++) { 1452 int idx = s->txq_descr[i].intr_idx; 1453 VMW_CFPRN("Verifying TX queue %d interrupt index (%d)", i, idx); 1454 vmxnet3_validate_interrupt_idx(s->msix_used, idx); 1455 } 1456 1457 for (i = 0; i < s->rxq_num; i++) { 1458 int idx = s->rxq_descr[i].intr_idx; 1459 VMW_CFPRN("Verifying RX queue %d interrupt index (%d)", i, idx); 1460 vmxnet3_validate_interrupt_idx(s->msix_used, idx); 1461 } 1462 } 1463 1464 static void vmxnet3_validate_queues(VMXNET3State *s) 1465 { 1466 /* 1467 * txq_num and rxq_num are total number of queues 1468 * configured by guest. These numbers must not 1469 * exceed corresponding maximal values. 1470 */ 1471 1472 if (s->txq_num > VMXNET3_DEVICE_MAX_TX_QUEUES) { 1473 hw_error("Bad TX queues number: %d\n", s->txq_num); 1474 } 1475 1476 if (s->rxq_num > VMXNET3_DEVICE_MAX_RX_QUEUES) { 1477 hw_error("Bad RX queues number: %d\n", s->rxq_num); 1478 } 1479 } 1480 1481 static void vmxnet3_activate_device(VMXNET3State *s) 1482 { 1483 int i; 1484 static const uint32_t VMXNET3_DEF_TX_THRESHOLD = 1; 1485 PCIDevice *d = PCI_DEVICE(s); 1486 hwaddr qdescr_table_pa; 1487 uint64_t pa; 1488 uint32_t size; 1489 1490 /* Verify configuration consistency */ 1491 if (!vmxnet3_verify_driver_magic(d, s->drv_shmem)) { 1492 VMW_ERPRN("Device configuration received from driver is invalid"); 1493 return; 1494 } 1495 1496 /* Verify if device is active */ 1497 if (s->device_active) { 1498 VMW_CFPRN("Vmxnet3 device is active"); 1499 return; 1500 } 1501 1502 vmxnet3_adjust_by_guest_type(s); 1503 vmxnet3_update_features(s); 1504 vmxnet3_update_pm_state(s); 1505 vmxnet3_setup_rx_filtering(s); 1506 /* Cache fields from shared memory */ 1507 s->mtu = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.misc.mtu); 1508 VMW_CFPRN("MTU is %u", s->mtu); 1509 1510 s->max_rx_frags = 1511 VMXNET3_READ_DRV_SHARED16(d, s->drv_shmem, devRead.misc.maxNumRxSG); 1512 1513 if (s->max_rx_frags == 0) { 1514 s->max_rx_frags = 1; 1515 } 1516 1517 VMW_CFPRN("Max RX fragments is %u", s->max_rx_frags); 1518 1519 s->event_int_idx = 1520 VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.intrConf.eventIntrIdx); 1521 assert(vmxnet3_verify_intx(s, s->event_int_idx)); 1522 VMW_CFPRN("Events interrupt line is %u", s->event_int_idx); 1523 1524 s->auto_int_masking = 1525 VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.intrConf.autoMask); 1526 VMW_CFPRN("Automatic interrupt masking is %d", (int)s->auto_int_masking); 1527 1528 s->txq_num = 1529 VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.misc.numTxQueues); 1530 s->rxq_num = 1531 VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.misc.numRxQueues); 1532 1533 VMW_CFPRN("Number of TX/RX queues %u/%u", s->txq_num, s->rxq_num); 1534 vmxnet3_validate_queues(s); 1535 1536 qdescr_table_pa = 1537 VMXNET3_READ_DRV_SHARED64(d, s->drv_shmem, devRead.misc.queueDescPA); 1538 VMW_CFPRN("TX queues descriptors table is at 0x%" PRIx64, qdescr_table_pa); 1539 1540 /* 1541 * Worst-case scenario is a packet that holds all TX rings space so 1542 * we calculate total size of all TX rings for max TX fragments number 1543 */ 1544 s->max_tx_frags = 0; 1545 1546 /* TX queues */ 1547 for (i = 0; i < s->txq_num; i++) { 1548 hwaddr qdescr_pa = 1549 qdescr_table_pa + i * sizeof(struct Vmxnet3_TxQueueDesc); 1550 1551 /* Read interrupt number for this TX queue */ 1552 s->txq_descr[i].intr_idx = 1553 VMXNET3_READ_TX_QUEUE_DESCR8(d, qdescr_pa, conf.intrIdx); 1554 assert(vmxnet3_verify_intx(s, s->txq_descr[i].intr_idx)); 1555 1556 VMW_CFPRN("TX Queue %d interrupt: %d", i, s->txq_descr[i].intr_idx); 1557 1558 /* Read rings memory locations for TX queues */ 1559 pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.txRingBasePA); 1560 size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.txRingSize); 1561 1562 vmxnet3_ring_init(d, &s->txq_descr[i].tx_ring, pa, size, 1563 sizeof(struct Vmxnet3_TxDesc), false); 1564 VMXNET3_RING_DUMP(VMW_CFPRN, "TX", i, &s->txq_descr[i].tx_ring); 1565 1566 s->max_tx_frags += size; 1567 1568 /* TXC ring */ 1569 pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.compRingBasePA); 1570 size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.compRingSize); 1571 vmxnet3_ring_init(d, &s->txq_descr[i].comp_ring, pa, size, 1572 sizeof(struct Vmxnet3_TxCompDesc), true); 1573 VMXNET3_RING_DUMP(VMW_CFPRN, "TXC", i, &s->txq_descr[i].comp_ring); 1574 1575 s->txq_descr[i].tx_stats_pa = 1576 qdescr_pa + offsetof(struct Vmxnet3_TxQueueDesc, stats); 1577 1578 memset(&s->txq_descr[i].txq_stats, 0, 1579 sizeof(s->txq_descr[i].txq_stats)); 1580 1581 /* Fill device-managed parameters for queues */ 1582 VMXNET3_WRITE_TX_QUEUE_DESCR32(d, qdescr_pa, 1583 ctrl.txThreshold, 1584 VMXNET3_DEF_TX_THRESHOLD); 1585 } 1586 1587 /* Preallocate TX packet wrapper */ 1588 VMW_CFPRN("Max TX fragments is %u", s->max_tx_frags); 1589 net_tx_pkt_init(&s->tx_pkt, PCI_DEVICE(s), 1590 s->max_tx_frags, s->peer_has_vhdr); 1591 net_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr); 1592 1593 /* Read rings memory locations for RX queues */ 1594 for (i = 0; i < s->rxq_num; i++) { 1595 int j; 1596 hwaddr qd_pa = 1597 qdescr_table_pa + s->txq_num * sizeof(struct Vmxnet3_TxQueueDesc) + 1598 i * sizeof(struct Vmxnet3_RxQueueDesc); 1599 1600 /* Read interrupt number for this RX queue */ 1601 s->rxq_descr[i].intr_idx = 1602 VMXNET3_READ_TX_QUEUE_DESCR8(d, qd_pa, conf.intrIdx); 1603 assert(vmxnet3_verify_intx(s, s->rxq_descr[i].intr_idx)); 1604 1605 VMW_CFPRN("RX Queue %d interrupt: %d", i, s->rxq_descr[i].intr_idx); 1606 1607 /* Read rings memory locations */ 1608 for (j = 0; j < VMXNET3_RX_RINGS_PER_QUEUE; j++) { 1609 /* RX rings */ 1610 pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.rxRingBasePA[j]); 1611 size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.rxRingSize[j]); 1612 vmxnet3_ring_init(d, &s->rxq_descr[i].rx_ring[j], pa, size, 1613 sizeof(struct Vmxnet3_RxDesc), false); 1614 VMW_CFPRN("RX queue %d:%d: Base: %" PRIx64 ", Size: %d", 1615 i, j, pa, size); 1616 } 1617 1618 /* RXC ring */ 1619 pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.compRingBasePA); 1620 size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.compRingSize); 1621 vmxnet3_ring_init(d, &s->rxq_descr[i].comp_ring, pa, size, 1622 sizeof(struct Vmxnet3_RxCompDesc), true); 1623 VMW_CFPRN("RXC queue %d: Base: %" PRIx64 ", Size: %d", i, pa, size); 1624 1625 s->rxq_descr[i].rx_stats_pa = 1626 qd_pa + offsetof(struct Vmxnet3_RxQueueDesc, stats); 1627 memset(&s->rxq_descr[i].rxq_stats, 0, 1628 sizeof(s->rxq_descr[i].rxq_stats)); 1629 } 1630 1631 vmxnet3_validate_interrupts(s); 1632 1633 /* Make sure everything is in place before device activation */ 1634 smp_wmb(); 1635 1636 vmxnet3_reset_mac(s); 1637 1638 s->device_active = true; 1639 } 1640 1641 static void vmxnet3_handle_command(VMXNET3State *s, uint64_t cmd) 1642 { 1643 s->last_command = cmd; 1644 1645 switch (cmd) { 1646 case VMXNET3_CMD_GET_PERM_MAC_HI: 1647 VMW_CBPRN("Set: Get upper part of permanent MAC"); 1648 break; 1649 1650 case VMXNET3_CMD_GET_PERM_MAC_LO: 1651 VMW_CBPRN("Set: Get lower part of permanent MAC"); 1652 break; 1653 1654 case VMXNET3_CMD_GET_STATS: 1655 VMW_CBPRN("Set: Get device statistics"); 1656 vmxnet3_fill_stats(s); 1657 break; 1658 1659 case VMXNET3_CMD_ACTIVATE_DEV: 1660 VMW_CBPRN("Set: Activating vmxnet3 device"); 1661 vmxnet3_activate_device(s); 1662 break; 1663 1664 case VMXNET3_CMD_UPDATE_RX_MODE: 1665 VMW_CBPRN("Set: Update rx mode"); 1666 vmxnet3_update_rx_mode(s); 1667 break; 1668 1669 case VMXNET3_CMD_UPDATE_VLAN_FILTERS: 1670 VMW_CBPRN("Set: Update VLAN filters"); 1671 vmxnet3_update_vlan_filters(s); 1672 break; 1673 1674 case VMXNET3_CMD_UPDATE_MAC_FILTERS: 1675 VMW_CBPRN("Set: Update MAC filters"); 1676 vmxnet3_update_mcast_filters(s); 1677 break; 1678 1679 case VMXNET3_CMD_UPDATE_FEATURE: 1680 VMW_CBPRN("Set: Update features"); 1681 vmxnet3_update_features(s); 1682 break; 1683 1684 case VMXNET3_CMD_UPDATE_PMCFG: 1685 VMW_CBPRN("Set: Update power management config"); 1686 vmxnet3_update_pm_state(s); 1687 break; 1688 1689 case VMXNET3_CMD_GET_LINK: 1690 VMW_CBPRN("Set: Get link"); 1691 break; 1692 1693 case VMXNET3_CMD_RESET_DEV: 1694 VMW_CBPRN("Set: Reset device"); 1695 vmxnet3_reset(s); 1696 break; 1697 1698 case VMXNET3_CMD_QUIESCE_DEV: 1699 VMW_CBPRN("Set: VMXNET3_CMD_QUIESCE_DEV - deactivate the device"); 1700 vmxnet3_deactivate_device(s); 1701 break; 1702 1703 case VMXNET3_CMD_GET_CONF_INTR: 1704 VMW_CBPRN("Set: VMXNET3_CMD_GET_CONF_INTR - interrupt configuration"); 1705 break; 1706 1707 case VMXNET3_CMD_GET_ADAPTIVE_RING_INFO: 1708 VMW_CBPRN("Set: VMXNET3_CMD_GET_ADAPTIVE_RING_INFO - " 1709 "adaptive ring info flags"); 1710 break; 1711 1712 case VMXNET3_CMD_GET_DID_LO: 1713 VMW_CBPRN("Set: Get lower part of device ID"); 1714 break; 1715 1716 case VMXNET3_CMD_GET_DID_HI: 1717 VMW_CBPRN("Set: Get upper part of device ID"); 1718 break; 1719 1720 case VMXNET3_CMD_GET_DEV_EXTRA_INFO: 1721 VMW_CBPRN("Set: Get device extra info"); 1722 break; 1723 1724 default: 1725 VMW_CBPRN("Received unknown command: %" PRIx64, cmd); 1726 break; 1727 } 1728 } 1729 1730 static uint64_t vmxnet3_get_command_status(VMXNET3State *s) 1731 { 1732 uint64_t ret; 1733 1734 switch (s->last_command) { 1735 case VMXNET3_CMD_ACTIVATE_DEV: 1736 ret = (s->device_active) ? 0 : 1; 1737 VMW_CFPRN("Device active: %" PRIx64, ret); 1738 break; 1739 1740 case VMXNET3_CMD_RESET_DEV: 1741 case VMXNET3_CMD_QUIESCE_DEV: 1742 case VMXNET3_CMD_GET_QUEUE_STATUS: 1743 case VMXNET3_CMD_GET_DEV_EXTRA_INFO: 1744 ret = 0; 1745 break; 1746 1747 case VMXNET3_CMD_GET_LINK: 1748 ret = s->link_status_and_speed; 1749 VMW_CFPRN("Link and speed: %" PRIx64, ret); 1750 break; 1751 1752 case VMXNET3_CMD_GET_PERM_MAC_LO: 1753 ret = vmxnet3_get_mac_low(&s->perm_mac); 1754 break; 1755 1756 case VMXNET3_CMD_GET_PERM_MAC_HI: 1757 ret = vmxnet3_get_mac_high(&s->perm_mac); 1758 break; 1759 1760 case VMXNET3_CMD_GET_CONF_INTR: 1761 ret = vmxnet3_get_interrupt_config(s); 1762 break; 1763 1764 case VMXNET3_CMD_GET_ADAPTIVE_RING_INFO: 1765 ret = VMXNET3_DISABLE_ADAPTIVE_RING; 1766 break; 1767 1768 case VMXNET3_CMD_GET_DID_LO: 1769 ret = PCI_DEVICE_ID_VMWARE_VMXNET3; 1770 break; 1771 1772 case VMXNET3_CMD_GET_DID_HI: 1773 ret = VMXNET3_DEVICE_REVISION; 1774 break; 1775 1776 default: 1777 VMW_WRPRN("Received request for unknown command: %x", s->last_command); 1778 ret = 0; 1779 break; 1780 } 1781 1782 return ret; 1783 } 1784 1785 static void vmxnet3_set_events(VMXNET3State *s, uint32_t val) 1786 { 1787 uint32_t events; 1788 PCIDevice *d = PCI_DEVICE(s); 1789 1790 VMW_CBPRN("Setting events: 0x%x", val); 1791 events = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, ecr) | val; 1792 VMXNET3_WRITE_DRV_SHARED32(d, s->drv_shmem, ecr, events); 1793 } 1794 1795 static void vmxnet3_ack_events(VMXNET3State *s, uint32_t val) 1796 { 1797 PCIDevice *d = PCI_DEVICE(s); 1798 uint32_t events; 1799 1800 VMW_CBPRN("Clearing events: 0x%x", val); 1801 events = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, ecr) & ~val; 1802 VMXNET3_WRITE_DRV_SHARED32(d, s->drv_shmem, ecr, events); 1803 } 1804 1805 static void 1806 vmxnet3_io_bar1_write(void *opaque, 1807 hwaddr addr, 1808 uint64_t val, 1809 unsigned size) 1810 { 1811 VMXNET3State *s = opaque; 1812 1813 switch (addr) { 1814 /* Vmxnet3 Revision Report Selection */ 1815 case VMXNET3_REG_VRRS: 1816 VMW_CBPRN("Write BAR1 [VMXNET3_REG_VRRS] = %" PRIx64 ", size %d", 1817 val, size); 1818 break; 1819 1820 /* UPT Version Report Selection */ 1821 case VMXNET3_REG_UVRS: 1822 VMW_CBPRN("Write BAR1 [VMXNET3_REG_UVRS] = %" PRIx64 ", size %d", 1823 val, size); 1824 break; 1825 1826 /* Driver Shared Address Low */ 1827 case VMXNET3_REG_DSAL: 1828 VMW_CBPRN("Write BAR1 [VMXNET3_REG_DSAL] = %" PRIx64 ", size %d", 1829 val, size); 1830 /* 1831 * Guest driver will first write the low part of the shared 1832 * memory address. We save it to temp variable and set the 1833 * shared address only after we get the high part 1834 */ 1835 if (val == 0) { 1836 vmxnet3_deactivate_device(s); 1837 } 1838 s->temp_shared_guest_driver_memory = val; 1839 s->drv_shmem = 0; 1840 break; 1841 1842 /* Driver Shared Address High */ 1843 case VMXNET3_REG_DSAH: 1844 VMW_CBPRN("Write BAR1 [VMXNET3_REG_DSAH] = %" PRIx64 ", size %d", 1845 val, size); 1846 /* 1847 * Set the shared memory between guest driver and device. 1848 * We already should have low address part. 1849 */ 1850 s->drv_shmem = s->temp_shared_guest_driver_memory | (val << 32); 1851 break; 1852 1853 /* Command */ 1854 case VMXNET3_REG_CMD: 1855 VMW_CBPRN("Write BAR1 [VMXNET3_REG_CMD] = %" PRIx64 ", size %d", 1856 val, size); 1857 vmxnet3_handle_command(s, val); 1858 break; 1859 1860 /* MAC Address Low */ 1861 case VMXNET3_REG_MACL: 1862 VMW_CBPRN("Write BAR1 [VMXNET3_REG_MACL] = %" PRIx64 ", size %d", 1863 val, size); 1864 s->temp_mac = val; 1865 break; 1866 1867 /* MAC Address High */ 1868 case VMXNET3_REG_MACH: 1869 VMW_CBPRN("Write BAR1 [VMXNET3_REG_MACH] = %" PRIx64 ", size %d", 1870 val, size); 1871 vmxnet3_set_variable_mac(s, val, s->temp_mac); 1872 break; 1873 1874 /* Interrupt Cause Register */ 1875 case VMXNET3_REG_ICR: 1876 VMW_CBPRN("Write BAR1 [VMXNET3_REG_ICR] = %" PRIx64 ", size %d", 1877 val, size); 1878 g_assert_not_reached(); 1879 break; 1880 1881 /* Event Cause Register */ 1882 case VMXNET3_REG_ECR: 1883 VMW_CBPRN("Write BAR1 [VMXNET3_REG_ECR] = %" PRIx64 ", size %d", 1884 val, size); 1885 vmxnet3_ack_events(s, val); 1886 break; 1887 1888 default: 1889 VMW_CBPRN("Unknown Write to BAR1 [%" PRIx64 "] = %" PRIx64 ", size %d", 1890 addr, val, size); 1891 break; 1892 } 1893 } 1894 1895 static uint64_t 1896 vmxnet3_io_bar1_read(void *opaque, hwaddr addr, unsigned size) 1897 { 1898 VMXNET3State *s = opaque; 1899 uint64_t ret = 0; 1900 1901 switch (addr) { 1902 /* Vmxnet3 Revision Report Selection */ 1903 case VMXNET3_REG_VRRS: 1904 VMW_CBPRN("Read BAR1 [VMXNET3_REG_VRRS], size %d", size); 1905 ret = VMXNET3_DEVICE_REVISION; 1906 break; 1907 1908 /* UPT Version Report Selection */ 1909 case VMXNET3_REG_UVRS: 1910 VMW_CBPRN("Read BAR1 [VMXNET3_REG_UVRS], size %d", size); 1911 ret = VMXNET3_UPT_REVISION; 1912 break; 1913 1914 /* Command */ 1915 case VMXNET3_REG_CMD: 1916 VMW_CBPRN("Read BAR1 [VMXNET3_REG_CMD], size %d", size); 1917 ret = vmxnet3_get_command_status(s); 1918 break; 1919 1920 /* MAC Address Low */ 1921 case VMXNET3_REG_MACL: 1922 VMW_CBPRN("Read BAR1 [VMXNET3_REG_MACL], size %d", size); 1923 ret = vmxnet3_get_mac_low(&s->conf.macaddr); 1924 break; 1925 1926 /* MAC Address High */ 1927 case VMXNET3_REG_MACH: 1928 VMW_CBPRN("Read BAR1 [VMXNET3_REG_MACH], size %d", size); 1929 ret = vmxnet3_get_mac_high(&s->conf.macaddr); 1930 break; 1931 1932 /* 1933 * Interrupt Cause Register 1934 * Used for legacy interrupts only so interrupt index always 0 1935 */ 1936 case VMXNET3_REG_ICR: 1937 VMW_CBPRN("Read BAR1 [VMXNET3_REG_ICR], size %d", size); 1938 if (vmxnet3_interrupt_asserted(s, 0)) { 1939 vmxnet3_clear_interrupt(s, 0); 1940 ret = true; 1941 } else { 1942 ret = false; 1943 } 1944 break; 1945 1946 default: 1947 VMW_CBPRN("Unknow read BAR1[%" PRIx64 "], %d bytes", addr, size); 1948 break; 1949 } 1950 1951 return ret; 1952 } 1953 1954 static int 1955 vmxnet3_can_receive(NetClientState *nc) 1956 { 1957 VMXNET3State *s = qemu_get_nic_opaque(nc); 1958 return s->device_active && 1959 VMXNET_FLAG_IS_SET(s->link_status_and_speed, VMXNET3_LINK_STATUS_UP); 1960 } 1961 1962 static inline bool 1963 vmxnet3_is_registered_vlan(VMXNET3State *s, const void *data) 1964 { 1965 uint16_t vlan_tag = eth_get_pkt_tci(data) & VLAN_VID_MASK; 1966 if (IS_SPECIAL_VLAN_ID(vlan_tag)) { 1967 return true; 1968 } 1969 1970 return VMXNET3_VFTABLE_ENTRY_IS_SET(s->vlan_table, vlan_tag); 1971 } 1972 1973 static bool 1974 vmxnet3_is_allowed_mcast_group(VMXNET3State *s, const uint8_t *group_mac) 1975 { 1976 int i; 1977 for (i = 0; i < s->mcast_list_len; i++) { 1978 if (!memcmp(group_mac, s->mcast_list[i].a, sizeof(s->mcast_list[i]))) { 1979 return true; 1980 } 1981 } 1982 return false; 1983 } 1984 1985 static bool 1986 vmxnet3_rx_filter_may_indicate(VMXNET3State *s, const void *data, 1987 size_t size) 1988 { 1989 struct eth_header *ehdr = PKT_GET_ETH_HDR(data); 1990 1991 if (VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_PROMISC)) { 1992 return true; 1993 } 1994 1995 if (!vmxnet3_is_registered_vlan(s, data)) { 1996 return false; 1997 } 1998 1999 switch (net_rx_pkt_get_packet_type(s->rx_pkt)) { 2000 case ETH_PKT_UCAST: 2001 if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_UCAST)) { 2002 return false; 2003 } 2004 if (memcmp(s->conf.macaddr.a, ehdr->h_dest, ETH_ALEN)) { 2005 return false; 2006 } 2007 break; 2008 2009 case ETH_PKT_BCAST: 2010 if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_BCAST)) { 2011 return false; 2012 } 2013 break; 2014 2015 case ETH_PKT_MCAST: 2016 if (VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_ALL_MULTI)) { 2017 return true; 2018 } 2019 if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_MCAST)) { 2020 return false; 2021 } 2022 if (!vmxnet3_is_allowed_mcast_group(s, ehdr->h_dest)) { 2023 return false; 2024 } 2025 break; 2026 2027 default: 2028 g_assert_not_reached(); 2029 } 2030 2031 return true; 2032 } 2033 2034 static ssize_t 2035 vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size) 2036 { 2037 VMXNET3State *s = qemu_get_nic_opaque(nc); 2038 size_t bytes_indicated; 2039 uint8_t min_buf[MIN_BUF_SIZE]; 2040 2041 if (!vmxnet3_can_receive(nc)) { 2042 VMW_PKPRN("Cannot receive now"); 2043 return -1; 2044 } 2045 2046 if (s->peer_has_vhdr) { 2047 net_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf); 2048 buf += sizeof(struct virtio_net_hdr); 2049 size -= sizeof(struct virtio_net_hdr); 2050 } 2051 2052 /* Pad to minimum Ethernet frame length */ 2053 if (size < sizeof(min_buf)) { 2054 memcpy(min_buf, buf, size); 2055 memset(&min_buf[size], 0, sizeof(min_buf) - size); 2056 buf = min_buf; 2057 size = sizeof(min_buf); 2058 } 2059 2060 net_rx_pkt_set_packet_type(s->rx_pkt, 2061 get_eth_packet_type(PKT_GET_ETH_HDR(buf))); 2062 2063 if (vmxnet3_rx_filter_may_indicate(s, buf, size)) { 2064 net_rx_pkt_set_protocols(s->rx_pkt, buf, size); 2065 vmxnet3_rx_need_csum_calculate(s->rx_pkt, buf, size); 2066 net_rx_pkt_attach_data(s->rx_pkt, buf, size, s->rx_vlan_stripping); 2067 bytes_indicated = vmxnet3_indicate_packet(s) ? size : -1; 2068 if (bytes_indicated < size) { 2069 VMW_PKPRN("RX: %zu of %zu bytes indicated", bytes_indicated, size); 2070 } 2071 } else { 2072 VMW_PKPRN("Packet dropped by RX filter"); 2073 bytes_indicated = size; 2074 } 2075 2076 assert(size > 0); 2077 assert(bytes_indicated != 0); 2078 return bytes_indicated; 2079 } 2080 2081 static void vmxnet3_set_link_status(NetClientState *nc) 2082 { 2083 VMXNET3State *s = qemu_get_nic_opaque(nc); 2084 2085 if (nc->link_down) { 2086 s->link_status_and_speed &= ~VMXNET3_LINK_STATUS_UP; 2087 } else { 2088 s->link_status_and_speed |= VMXNET3_LINK_STATUS_UP; 2089 } 2090 2091 vmxnet3_set_events(s, VMXNET3_ECR_LINK); 2092 vmxnet3_trigger_interrupt(s, s->event_int_idx); 2093 } 2094 2095 static NetClientInfo net_vmxnet3_info = { 2096 .type = NET_CLIENT_DRIVER_NIC, 2097 .size = sizeof(NICState), 2098 .receive = vmxnet3_receive, 2099 .link_status_changed = vmxnet3_set_link_status, 2100 }; 2101 2102 static bool vmxnet3_peer_has_vnet_hdr(VMXNET3State *s) 2103 { 2104 NetClientState *nc = qemu_get_queue(s->nic); 2105 2106 if (qemu_has_vnet_hdr(nc->peer)) { 2107 return true; 2108 } 2109 2110 return false; 2111 } 2112 2113 static void vmxnet3_net_uninit(VMXNET3State *s) 2114 { 2115 g_free(s->mcast_list); 2116 vmxnet3_deactivate_device(s); 2117 qemu_del_nic(s->nic); 2118 } 2119 2120 static void vmxnet3_net_init(VMXNET3State *s) 2121 { 2122 DeviceState *d = DEVICE(s); 2123 2124 VMW_CBPRN("vmxnet3_net_init called..."); 2125 2126 qemu_macaddr_default_if_unset(&s->conf.macaddr); 2127 2128 /* Windows guest will query the address that was set on init */ 2129 memcpy(&s->perm_mac.a, &s->conf.macaddr.a, sizeof(s->perm_mac.a)); 2130 2131 s->mcast_list = NULL; 2132 s->mcast_list_len = 0; 2133 2134 s->link_status_and_speed = VMXNET3_LINK_SPEED | VMXNET3_LINK_STATUS_UP; 2135 2136 VMW_CFPRN("Permanent MAC: " MAC_FMT, MAC_ARG(s->perm_mac.a)); 2137 2138 s->nic = qemu_new_nic(&net_vmxnet3_info, &s->conf, 2139 object_get_typename(OBJECT(s)), 2140 d->id, s); 2141 2142 s->peer_has_vhdr = vmxnet3_peer_has_vnet_hdr(s); 2143 s->tx_sop = true; 2144 s->skip_current_tx_pkt = false; 2145 s->tx_pkt = NULL; 2146 s->rx_pkt = NULL; 2147 s->rx_vlan_stripping = false; 2148 s->lro_supported = false; 2149 2150 if (s->peer_has_vhdr) { 2151 qemu_set_vnet_hdr_len(qemu_get_queue(s->nic)->peer, 2152 sizeof(struct virtio_net_hdr)); 2153 2154 qemu_using_vnet_hdr(qemu_get_queue(s->nic)->peer, 1); 2155 } 2156 2157 qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a); 2158 } 2159 2160 static void 2161 vmxnet3_unuse_msix_vectors(VMXNET3State *s, int num_vectors) 2162 { 2163 PCIDevice *d = PCI_DEVICE(s); 2164 int i; 2165 for (i = 0; i < num_vectors; i++) { 2166 msix_vector_unuse(d, i); 2167 } 2168 } 2169 2170 static bool 2171 vmxnet3_use_msix_vectors(VMXNET3State *s, int num_vectors) 2172 { 2173 PCIDevice *d = PCI_DEVICE(s); 2174 int i; 2175 for (i = 0; i < num_vectors; i++) { 2176 int res = msix_vector_use(d, i); 2177 if (0 > res) { 2178 VMW_WRPRN("Failed to use MSI-X vector %d, error %d", i, res); 2179 vmxnet3_unuse_msix_vectors(s, i); 2180 return false; 2181 } 2182 } 2183 return true; 2184 } 2185 2186 static bool 2187 vmxnet3_init_msix(VMXNET3State *s) 2188 { 2189 PCIDevice *d = PCI_DEVICE(s); 2190 int res = msix_init(d, VMXNET3_MAX_INTRS, 2191 &s->msix_bar, 2192 VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_TABLE, 2193 &s->msix_bar, 2194 VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_PBA(s), 2195 VMXNET3_MSIX_OFFSET(s), NULL); 2196 2197 if (0 > res) { 2198 VMW_WRPRN("Failed to initialize MSI-X, error %d", res); 2199 s->msix_used = false; 2200 } else { 2201 if (!vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS)) { 2202 VMW_WRPRN("Failed to use MSI-X vectors, error %d", res); 2203 msix_uninit(d, &s->msix_bar, &s->msix_bar); 2204 s->msix_used = false; 2205 } else { 2206 s->msix_used = true; 2207 } 2208 } 2209 return s->msix_used; 2210 } 2211 2212 static void 2213 vmxnet3_cleanup_msix(VMXNET3State *s) 2214 { 2215 PCIDevice *d = PCI_DEVICE(s); 2216 2217 if (s->msix_used) { 2218 vmxnet3_unuse_msix_vectors(s, VMXNET3_MAX_INTRS); 2219 msix_uninit(d, &s->msix_bar, &s->msix_bar); 2220 } 2221 } 2222 2223 static void 2224 vmxnet3_cleanup_msi(VMXNET3State *s) 2225 { 2226 PCIDevice *d = PCI_DEVICE(s); 2227 2228 msi_uninit(d); 2229 } 2230 2231 static void 2232 vmxnet3_msix_save(QEMUFile *f, void *opaque) 2233 { 2234 PCIDevice *d = PCI_DEVICE(opaque); 2235 msix_save(d, f); 2236 } 2237 2238 static int 2239 vmxnet3_msix_load(QEMUFile *f, void *opaque, int version_id) 2240 { 2241 PCIDevice *d = PCI_DEVICE(opaque); 2242 msix_load(d, f); 2243 return 0; 2244 } 2245 2246 static const MemoryRegionOps b0_ops = { 2247 .read = vmxnet3_io_bar0_read, 2248 .write = vmxnet3_io_bar0_write, 2249 .endianness = DEVICE_LITTLE_ENDIAN, 2250 .impl = { 2251 .min_access_size = 4, 2252 .max_access_size = 4, 2253 }, 2254 }; 2255 2256 static const MemoryRegionOps b1_ops = { 2257 .read = vmxnet3_io_bar1_read, 2258 .write = vmxnet3_io_bar1_write, 2259 .endianness = DEVICE_LITTLE_ENDIAN, 2260 .impl = { 2261 .min_access_size = 4, 2262 .max_access_size = 4, 2263 }, 2264 }; 2265 2266 static SaveVMHandlers savevm_vmxnet3_msix = { 2267 .save_state = vmxnet3_msix_save, 2268 .load_state = vmxnet3_msix_load, 2269 }; 2270 2271 static uint64_t vmxnet3_device_serial_num(VMXNET3State *s) 2272 { 2273 uint64_t dsn_payload; 2274 uint8_t *dsnp = (uint8_t *)&dsn_payload; 2275 2276 dsnp[0] = 0xfe; 2277 dsnp[1] = s->conf.macaddr.a[3]; 2278 dsnp[2] = s->conf.macaddr.a[4]; 2279 dsnp[3] = s->conf.macaddr.a[5]; 2280 dsnp[4] = s->conf.macaddr.a[0]; 2281 dsnp[5] = s->conf.macaddr.a[1]; 2282 dsnp[6] = s->conf.macaddr.a[2]; 2283 dsnp[7] = 0xff; 2284 return dsn_payload; 2285 } 2286 2287 2288 #define VMXNET3_USE_64BIT (true) 2289 #define VMXNET3_PER_VECTOR_MASK (false) 2290 2291 static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp) 2292 { 2293 DeviceState *dev = DEVICE(pci_dev); 2294 VMXNET3State *s = VMXNET3(pci_dev); 2295 int ret; 2296 2297 VMW_CBPRN("Starting init..."); 2298 2299 memory_region_init_io(&s->bar0, OBJECT(s), &b0_ops, s, 2300 "vmxnet3-b0", VMXNET3_PT_REG_SIZE); 2301 pci_register_bar(pci_dev, VMXNET3_BAR0_IDX, 2302 PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar0); 2303 2304 memory_region_init_io(&s->bar1, OBJECT(s), &b1_ops, s, 2305 "vmxnet3-b1", VMXNET3_VD_REG_SIZE); 2306 pci_register_bar(pci_dev, VMXNET3_BAR1_IDX, 2307 PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar1); 2308 2309 memory_region_init(&s->msix_bar, OBJECT(s), "vmxnet3-msix-bar", 2310 VMXNET3_MSIX_BAR_SIZE); 2311 pci_register_bar(pci_dev, VMXNET3_MSIX_BAR_IDX, 2312 PCI_BASE_ADDRESS_SPACE_MEMORY, &s->msix_bar); 2313 2314 vmxnet3_reset_interrupt_states(s); 2315 2316 /* Interrupt pin A */ 2317 pci_dev->config[PCI_INTERRUPT_PIN] = 0x01; 2318 2319 ret = msi_init(pci_dev, VMXNET3_MSI_OFFSET(s), VMXNET3_MAX_NMSIX_INTRS, 2320 VMXNET3_USE_64BIT, VMXNET3_PER_VECTOR_MASK, NULL); 2321 /* Any error other than -ENOTSUP(board's MSI support is broken) 2322 * is a programming error. Fall back to INTx silently on -ENOTSUP */ 2323 assert(!ret || ret == -ENOTSUP); 2324 2325 if (!vmxnet3_init_msix(s)) { 2326 VMW_WRPRN("Failed to initialize MSI-X, configuration is inconsistent."); 2327 } 2328 2329 vmxnet3_net_init(s); 2330 2331 if (pci_is_express(pci_dev)) { 2332 if (pci_bus_is_express(pci_dev->bus)) { 2333 pcie_endpoint_cap_init(pci_dev, VMXNET3_EXP_EP_OFFSET); 2334 } 2335 2336 pcie_dev_ser_num_init(pci_dev, VMXNET3_DSN_OFFSET, 2337 vmxnet3_device_serial_num(s)); 2338 } 2339 2340 register_savevm_live(dev, "vmxnet3-msix", -1, 1, &savevm_vmxnet3_msix, s); 2341 } 2342 2343 static void vmxnet3_instance_init(Object *obj) 2344 { 2345 VMXNET3State *s = VMXNET3(obj); 2346 device_add_bootindex_property(obj, &s->conf.bootindex, 2347 "bootindex", "/ethernet-phy@0", 2348 DEVICE(obj), NULL); 2349 } 2350 2351 static void vmxnet3_pci_uninit(PCIDevice *pci_dev) 2352 { 2353 DeviceState *dev = DEVICE(pci_dev); 2354 VMXNET3State *s = VMXNET3(pci_dev); 2355 2356 VMW_CBPRN("Starting uninit..."); 2357 2358 unregister_savevm(dev, "vmxnet3-msix", s); 2359 2360 vmxnet3_net_uninit(s); 2361 2362 vmxnet3_cleanup_msix(s); 2363 2364 vmxnet3_cleanup_msi(s); 2365 } 2366 2367 static void vmxnet3_qdev_reset(DeviceState *dev) 2368 { 2369 PCIDevice *d = PCI_DEVICE(dev); 2370 VMXNET3State *s = VMXNET3(d); 2371 2372 VMW_CBPRN("Starting QDEV reset..."); 2373 vmxnet3_reset(s); 2374 } 2375 2376 static bool vmxnet3_mc_list_needed(void *opaque) 2377 { 2378 return true; 2379 } 2380 2381 static int vmxnet3_mcast_list_pre_load(void *opaque) 2382 { 2383 VMXNET3State *s = opaque; 2384 2385 s->mcast_list = g_malloc(s->mcast_list_buff_size); 2386 2387 return 0; 2388 } 2389 2390 2391 static int vmxnet3_pre_save(void *opaque) 2392 { 2393 VMXNET3State *s = opaque; 2394 2395 s->mcast_list_buff_size = s->mcast_list_len * sizeof(MACAddr); 2396 2397 return 0; 2398 } 2399 2400 static const VMStateDescription vmxstate_vmxnet3_mcast_list = { 2401 .name = "vmxnet3/mcast_list", 2402 .version_id = 1, 2403 .minimum_version_id = 1, 2404 .pre_load = vmxnet3_mcast_list_pre_load, 2405 .needed = vmxnet3_mc_list_needed, 2406 .fields = (VMStateField[]) { 2407 VMSTATE_VBUFFER_UINT32(mcast_list, VMXNET3State, 0, NULL, 2408 mcast_list_buff_size), 2409 VMSTATE_END_OF_LIST() 2410 } 2411 }; 2412 2413 static const VMStateDescription vmstate_vmxnet3_ring = { 2414 .name = "vmxnet3-ring", 2415 .version_id = 0, 2416 .fields = (VMStateField[]) { 2417 VMSTATE_UINT64(pa, Vmxnet3Ring), 2418 VMSTATE_UINT32(size, Vmxnet3Ring), 2419 VMSTATE_UINT32(cell_size, Vmxnet3Ring), 2420 VMSTATE_UINT32(next, Vmxnet3Ring), 2421 VMSTATE_UINT8(gen, Vmxnet3Ring), 2422 VMSTATE_END_OF_LIST() 2423 } 2424 }; 2425 2426 static const VMStateDescription vmstate_vmxnet3_tx_stats = { 2427 .name = "vmxnet3-tx-stats", 2428 .version_id = 0, 2429 .fields = (VMStateField[]) { 2430 VMSTATE_UINT64(TSOPktsTxOK, struct UPT1_TxStats), 2431 VMSTATE_UINT64(TSOBytesTxOK, struct UPT1_TxStats), 2432 VMSTATE_UINT64(ucastPktsTxOK, struct UPT1_TxStats), 2433 VMSTATE_UINT64(ucastBytesTxOK, struct UPT1_TxStats), 2434 VMSTATE_UINT64(mcastPktsTxOK, struct UPT1_TxStats), 2435 VMSTATE_UINT64(mcastBytesTxOK, struct UPT1_TxStats), 2436 VMSTATE_UINT64(bcastPktsTxOK, struct UPT1_TxStats), 2437 VMSTATE_UINT64(bcastBytesTxOK, struct UPT1_TxStats), 2438 VMSTATE_UINT64(pktsTxError, struct UPT1_TxStats), 2439 VMSTATE_UINT64(pktsTxDiscard, struct UPT1_TxStats), 2440 VMSTATE_END_OF_LIST() 2441 } 2442 }; 2443 2444 static const VMStateDescription vmstate_vmxnet3_txq_descr = { 2445 .name = "vmxnet3-txq-descr", 2446 .version_id = 0, 2447 .fields = (VMStateField[]) { 2448 VMSTATE_STRUCT(tx_ring, Vmxnet3TxqDescr, 0, vmstate_vmxnet3_ring, 2449 Vmxnet3Ring), 2450 VMSTATE_STRUCT(comp_ring, Vmxnet3TxqDescr, 0, vmstate_vmxnet3_ring, 2451 Vmxnet3Ring), 2452 VMSTATE_UINT8(intr_idx, Vmxnet3TxqDescr), 2453 VMSTATE_UINT64(tx_stats_pa, Vmxnet3TxqDescr), 2454 VMSTATE_STRUCT(txq_stats, Vmxnet3TxqDescr, 0, vmstate_vmxnet3_tx_stats, 2455 struct UPT1_TxStats), 2456 VMSTATE_END_OF_LIST() 2457 } 2458 }; 2459 2460 static const VMStateDescription vmstate_vmxnet3_rx_stats = { 2461 .name = "vmxnet3-rx-stats", 2462 .version_id = 0, 2463 .fields = (VMStateField[]) { 2464 VMSTATE_UINT64(LROPktsRxOK, struct UPT1_RxStats), 2465 VMSTATE_UINT64(LROBytesRxOK, struct UPT1_RxStats), 2466 VMSTATE_UINT64(ucastPktsRxOK, struct UPT1_RxStats), 2467 VMSTATE_UINT64(ucastBytesRxOK, struct UPT1_RxStats), 2468 VMSTATE_UINT64(mcastPktsRxOK, struct UPT1_RxStats), 2469 VMSTATE_UINT64(mcastBytesRxOK, struct UPT1_RxStats), 2470 VMSTATE_UINT64(bcastPktsRxOK, struct UPT1_RxStats), 2471 VMSTATE_UINT64(bcastBytesRxOK, struct UPT1_RxStats), 2472 VMSTATE_UINT64(pktsRxOutOfBuf, struct UPT1_RxStats), 2473 VMSTATE_UINT64(pktsRxError, struct UPT1_RxStats), 2474 VMSTATE_END_OF_LIST() 2475 } 2476 }; 2477 2478 static const VMStateDescription vmstate_vmxnet3_rxq_descr = { 2479 .name = "vmxnet3-rxq-descr", 2480 .version_id = 0, 2481 .fields = (VMStateField[]) { 2482 VMSTATE_STRUCT_ARRAY(rx_ring, Vmxnet3RxqDescr, 2483 VMXNET3_RX_RINGS_PER_QUEUE, 0, 2484 vmstate_vmxnet3_ring, Vmxnet3Ring), 2485 VMSTATE_STRUCT(comp_ring, Vmxnet3RxqDescr, 0, vmstate_vmxnet3_ring, 2486 Vmxnet3Ring), 2487 VMSTATE_UINT8(intr_idx, Vmxnet3RxqDescr), 2488 VMSTATE_UINT64(rx_stats_pa, Vmxnet3RxqDescr), 2489 VMSTATE_STRUCT(rxq_stats, Vmxnet3RxqDescr, 0, vmstate_vmxnet3_rx_stats, 2490 struct UPT1_RxStats), 2491 VMSTATE_END_OF_LIST() 2492 } 2493 }; 2494 2495 static int vmxnet3_post_load(void *opaque, int version_id) 2496 { 2497 VMXNET3State *s = opaque; 2498 PCIDevice *d = PCI_DEVICE(s); 2499 2500 net_tx_pkt_init(&s->tx_pkt, PCI_DEVICE(s), 2501 s->max_tx_frags, s->peer_has_vhdr); 2502 net_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr); 2503 2504 if (s->msix_used) { 2505 if (!vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS)) { 2506 VMW_WRPRN("Failed to re-use MSI-X vectors"); 2507 msix_uninit(d, &s->msix_bar, &s->msix_bar); 2508 s->msix_used = false; 2509 return -1; 2510 } 2511 } 2512 2513 vmxnet3_validate_queues(s); 2514 vmxnet3_validate_interrupts(s); 2515 2516 return 0; 2517 } 2518 2519 static const VMStateDescription vmstate_vmxnet3_int_state = { 2520 .name = "vmxnet3-int-state", 2521 .version_id = 0, 2522 .fields = (VMStateField[]) { 2523 VMSTATE_BOOL(is_masked, Vmxnet3IntState), 2524 VMSTATE_BOOL(is_pending, Vmxnet3IntState), 2525 VMSTATE_BOOL(is_asserted, Vmxnet3IntState), 2526 VMSTATE_END_OF_LIST() 2527 } 2528 }; 2529 2530 static bool vmxnet3_vmstate_need_pcie_device(void *opaque) 2531 { 2532 VMXNET3State *s = VMXNET3(opaque); 2533 2534 return !(s->compat_flags & VMXNET3_COMPAT_FLAG_DISABLE_PCIE); 2535 } 2536 2537 static bool vmxnet3_vmstate_test_pci_device(void *opaque, int version_id) 2538 { 2539 return !vmxnet3_vmstate_need_pcie_device(opaque); 2540 } 2541 2542 static const VMStateDescription vmstate_vmxnet3_pcie_device = { 2543 .name = "vmxnet3/pcie", 2544 .version_id = 1, 2545 .minimum_version_id = 1, 2546 .needed = vmxnet3_vmstate_need_pcie_device, 2547 .fields = (VMStateField[]) { 2548 VMSTATE_PCI_DEVICE(parent_obj, VMXNET3State), 2549 VMSTATE_END_OF_LIST() 2550 } 2551 }; 2552 2553 static const VMStateDescription vmstate_vmxnet3 = { 2554 .name = "vmxnet3", 2555 .version_id = 1, 2556 .minimum_version_id = 1, 2557 .pre_save = vmxnet3_pre_save, 2558 .post_load = vmxnet3_post_load, 2559 .fields = (VMStateField[]) { 2560 VMSTATE_STRUCT_TEST(parent_obj, VMXNET3State, 2561 vmxnet3_vmstate_test_pci_device, 0, 2562 vmstate_pci_device, PCIDevice), 2563 VMSTATE_BOOL(rx_packets_compound, VMXNET3State), 2564 VMSTATE_BOOL(rx_vlan_stripping, VMXNET3State), 2565 VMSTATE_BOOL(lro_supported, VMXNET3State), 2566 VMSTATE_UINT32(rx_mode, VMXNET3State), 2567 VMSTATE_UINT32(mcast_list_len, VMXNET3State), 2568 VMSTATE_UINT32(mcast_list_buff_size, VMXNET3State), 2569 VMSTATE_UINT32_ARRAY(vlan_table, VMXNET3State, VMXNET3_VFT_SIZE), 2570 VMSTATE_UINT32(mtu, VMXNET3State), 2571 VMSTATE_UINT16(max_rx_frags, VMXNET3State), 2572 VMSTATE_UINT32(max_tx_frags, VMXNET3State), 2573 VMSTATE_UINT8(event_int_idx, VMXNET3State), 2574 VMSTATE_BOOL(auto_int_masking, VMXNET3State), 2575 VMSTATE_UINT8(txq_num, VMXNET3State), 2576 VMSTATE_UINT8(rxq_num, VMXNET3State), 2577 VMSTATE_UINT32(device_active, VMXNET3State), 2578 VMSTATE_UINT32(last_command, VMXNET3State), 2579 VMSTATE_UINT32(link_status_and_speed, VMXNET3State), 2580 VMSTATE_UINT32(temp_mac, VMXNET3State), 2581 VMSTATE_UINT64(drv_shmem, VMXNET3State), 2582 VMSTATE_UINT64(temp_shared_guest_driver_memory, VMXNET3State), 2583 2584 VMSTATE_STRUCT_ARRAY(txq_descr, VMXNET3State, 2585 VMXNET3_DEVICE_MAX_TX_QUEUES, 0, vmstate_vmxnet3_txq_descr, 2586 Vmxnet3TxqDescr), 2587 VMSTATE_STRUCT_ARRAY(rxq_descr, VMXNET3State, 2588 VMXNET3_DEVICE_MAX_RX_QUEUES, 0, vmstate_vmxnet3_rxq_descr, 2589 Vmxnet3RxqDescr), 2590 VMSTATE_STRUCT_ARRAY(interrupt_states, VMXNET3State, 2591 VMXNET3_MAX_INTRS, 0, vmstate_vmxnet3_int_state, 2592 Vmxnet3IntState), 2593 2594 VMSTATE_END_OF_LIST() 2595 }, 2596 .subsections = (const VMStateDescription*[]) { 2597 &vmxstate_vmxnet3_mcast_list, 2598 &vmstate_vmxnet3_pcie_device, 2599 NULL 2600 } 2601 }; 2602 2603 static Property vmxnet3_properties[] = { 2604 DEFINE_NIC_PROPERTIES(VMXNET3State, conf), 2605 DEFINE_PROP_BIT("x-old-msi-offsets", VMXNET3State, compat_flags, 2606 VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT, false), 2607 DEFINE_PROP_BIT("x-disable-pcie", VMXNET3State, compat_flags, 2608 VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT, false), 2609 DEFINE_PROP_END_OF_LIST(), 2610 }; 2611 2612 static void vmxnet3_realize(DeviceState *qdev, Error **errp) 2613 { 2614 VMXNET3Class *vc = VMXNET3_DEVICE_GET_CLASS(qdev); 2615 PCIDevice *pci_dev = PCI_DEVICE(qdev); 2616 VMXNET3State *s = VMXNET3(qdev); 2617 2618 if (!(s->compat_flags & VMXNET3_COMPAT_FLAG_DISABLE_PCIE)) { 2619 pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; 2620 } 2621 2622 vc->parent_dc_realize(qdev, errp); 2623 } 2624 2625 static void vmxnet3_class_init(ObjectClass *class, void *data) 2626 { 2627 DeviceClass *dc = DEVICE_CLASS(class); 2628 PCIDeviceClass *c = PCI_DEVICE_CLASS(class); 2629 VMXNET3Class *vc = VMXNET3_DEVICE_CLASS(class); 2630 2631 c->realize = vmxnet3_pci_realize; 2632 c->exit = vmxnet3_pci_uninit; 2633 c->vendor_id = PCI_VENDOR_ID_VMWARE; 2634 c->device_id = PCI_DEVICE_ID_VMWARE_VMXNET3; 2635 c->revision = PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION; 2636 c->romfile = "efi-vmxnet3.rom"; 2637 c->class_id = PCI_CLASS_NETWORK_ETHERNET; 2638 c->subsystem_vendor_id = PCI_VENDOR_ID_VMWARE; 2639 c->subsystem_id = PCI_DEVICE_ID_VMWARE_VMXNET3; 2640 vc->parent_dc_realize = dc->realize; 2641 dc->realize = vmxnet3_realize; 2642 dc->desc = "VMWare Paravirtualized Ethernet v3"; 2643 dc->reset = vmxnet3_qdev_reset; 2644 dc->vmsd = &vmstate_vmxnet3; 2645 dc->props = vmxnet3_properties; 2646 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); 2647 } 2648 2649 static const TypeInfo vmxnet3_info = { 2650 .name = TYPE_VMXNET3, 2651 .parent = TYPE_PCI_DEVICE, 2652 .class_size = sizeof(VMXNET3Class), 2653 .instance_size = sizeof(VMXNET3State), 2654 .class_init = vmxnet3_class_init, 2655 .instance_init = vmxnet3_instance_init, 2656 }; 2657 2658 static void vmxnet3_register_types(void) 2659 { 2660 VMW_CBPRN("vmxnet3_register_types called..."); 2661 type_register_static(&vmxnet3_info); 2662 } 2663 2664 type_init(vmxnet3_register_types) 2665