1 /* 2 * QEMU VMWARE VMXNET3 paravirtual NIC 3 * 4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) 5 * 6 * Developed by Daynix Computing LTD (http://www.daynix.com) 7 * 8 * Authors: 9 * Dmitry Fleytman <dmitry@daynix.com> 10 * Tamir Shomer <tamirs@daynix.com> 11 * Yan Vugenfirer <yan@daynix.com> 12 * 13 * This work is licensed under the terms of the GNU GPL, version 2. 14 * See the COPYING file in the top-level directory. 15 * 16 */ 17 18 #include "qemu/osdep.h" 19 #include "hw/hw.h" 20 #include "hw/pci/pci.h" 21 #include "net/net.h" 22 #include "net/tap.h" 23 #include "net/checksum.h" 24 #include "sysemu/sysemu.h" 25 #include "qemu-common.h" 26 #include "qemu/bswap.h" 27 #include "hw/pci/msix.h" 28 #include "hw/pci/msi.h" 29 30 #include "vmxnet3.h" 31 #include "vmxnet_debug.h" 32 #include "vmware_utils.h" 33 #include "net_tx_pkt.h" 34 #include "net_rx_pkt.h" 35 36 #define PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION 0x1 37 #define VMXNET3_MSIX_BAR_SIZE 0x2000 38 #define MIN_BUF_SIZE 60 39 40 /* Compatibility flags for migration */ 41 #define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT 0 42 #define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS \ 43 (1 << VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT) 44 #define VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT 1 45 #define VMXNET3_COMPAT_FLAG_DISABLE_PCIE \ 46 (1 << VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT) 47 48 #define VMXNET3_EXP_EP_OFFSET (0x48) 49 #define VMXNET3_MSI_OFFSET(s) \ 50 ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x50 : 0x84) 51 #define VMXNET3_MSIX_OFFSET(s) \ 52 ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0 : 0x9c) 53 #define VMXNET3_DSN_OFFSET (0x100) 54 55 #define VMXNET3_BAR0_IDX (0) 56 #define VMXNET3_BAR1_IDX (1) 57 #define VMXNET3_MSIX_BAR_IDX (2) 58 59 #define VMXNET3_OFF_MSIX_TABLE (0x000) 60 #define VMXNET3_OFF_MSIX_PBA(s) \ 61 ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x800 : 0x1000) 62 63 /* Link speed in Mbps should be shifted by 16 */ 64 #define VMXNET3_LINK_SPEED (1000 << 16) 65 66 /* Link status: 1 - up, 0 - down. */ 67 #define VMXNET3_LINK_STATUS_UP 0x1 68 69 /* Least significant bit should be set for revision and version */ 70 #define VMXNET3_UPT_REVISION 0x1 71 #define VMXNET3_DEVICE_REVISION 0x1 72 73 /* Number of interrupt vectors for non-MSIx modes */ 74 #define VMXNET3_MAX_NMSIX_INTRS (1) 75 76 /* Macros for rings descriptors access */ 77 #define VMXNET3_READ_TX_QUEUE_DESCR8(_d, dpa, field) \ 78 (vmw_shmem_ld8(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field))) 79 80 #define VMXNET3_WRITE_TX_QUEUE_DESCR8(_d, dpa, field, value) \ 81 (vmw_shmem_st8(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field, value))) 82 83 #define VMXNET3_READ_TX_QUEUE_DESCR32(_d, dpa, field) \ 84 (vmw_shmem_ld32(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field))) 85 86 #define VMXNET3_WRITE_TX_QUEUE_DESCR32(_d, dpa, field, value) \ 87 (vmw_shmem_st32(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field), value)) 88 89 #define VMXNET3_READ_TX_QUEUE_DESCR64(_d, dpa, field) \ 90 (vmw_shmem_ld64(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field))) 91 92 #define VMXNET3_WRITE_TX_QUEUE_DESCR64(_d, dpa, field, value) \ 93 (vmw_shmem_st64(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field), value)) 94 95 #define VMXNET3_READ_RX_QUEUE_DESCR64(_d, dpa, field) \ 96 (vmw_shmem_ld64(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field))) 97 98 #define VMXNET3_READ_RX_QUEUE_DESCR32(_d, dpa, field) \ 99 (vmw_shmem_ld32(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field))) 100 101 #define VMXNET3_WRITE_RX_QUEUE_DESCR64(_d, dpa, field, value) \ 102 (vmw_shmem_st64(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field), value)) 103 104 #define VMXNET3_WRITE_RX_QUEUE_DESCR8(_d, dpa, field, value) \ 105 (vmw_shmem_st8(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field), value)) 106 107 /* Macros for guest driver shared area access */ 108 #define VMXNET3_READ_DRV_SHARED64(_d, shpa, field) \ 109 (vmw_shmem_ld64(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field))) 110 111 #define VMXNET3_READ_DRV_SHARED32(_d, shpa, field) \ 112 (vmw_shmem_ld32(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field))) 113 114 #define VMXNET3_WRITE_DRV_SHARED32(_d, shpa, field, val) \ 115 (vmw_shmem_st32(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field), val)) 116 117 #define VMXNET3_READ_DRV_SHARED16(_d, shpa, field) \ 118 (vmw_shmem_ld16(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field))) 119 120 #define VMXNET3_READ_DRV_SHARED8(_d, shpa, field) \ 121 (vmw_shmem_ld8(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field))) 122 123 #define VMXNET3_READ_DRV_SHARED(_d, shpa, field, b, l) \ 124 (vmw_shmem_read(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field), b, l)) 125 126 #define VMXNET_FLAG_IS_SET(field, flag) (((field) & (flag)) == (flag)) 127 128 typedef struct VMXNET3Class { 129 PCIDeviceClass parent_class; 130 DeviceRealize parent_dc_realize; 131 } VMXNET3Class; 132 133 #define TYPE_VMXNET3 "vmxnet3" 134 #define VMXNET3(obj) OBJECT_CHECK(VMXNET3State, (obj), TYPE_VMXNET3) 135 136 #define VMXNET3_DEVICE_CLASS(klass) \ 137 OBJECT_CLASS_CHECK(VMXNET3Class, (klass), TYPE_VMXNET3) 138 #define VMXNET3_DEVICE_GET_CLASS(obj) \ 139 OBJECT_GET_CLASS(VMXNET3Class, (obj), TYPE_VMXNET3) 140 141 /* Cyclic ring abstraction */ 142 typedef struct { 143 hwaddr pa; 144 size_t size; 145 size_t cell_size; 146 size_t next; 147 uint8_t gen; 148 } Vmxnet3Ring; 149 150 static inline void vmxnet3_ring_init(PCIDevice *d, 151 Vmxnet3Ring *ring, 152 hwaddr pa, 153 size_t size, 154 size_t cell_size, 155 bool zero_region) 156 { 157 ring->pa = pa; 158 ring->size = size; 159 ring->cell_size = cell_size; 160 ring->gen = VMXNET3_INIT_GEN; 161 ring->next = 0; 162 163 if (zero_region) { 164 vmw_shmem_set(d, pa, 0, size * cell_size); 165 } 166 } 167 168 #define VMXNET3_RING_DUMP(macro, ring_name, ridx, r) \ 169 macro("%s#%d: base %" PRIx64 " size %zu cell_size %zu gen %d next %zu", \ 170 (ring_name), (ridx), \ 171 (r)->pa, (r)->size, (r)->cell_size, (r)->gen, (r)->next) 172 173 static inline void vmxnet3_ring_inc(Vmxnet3Ring *ring) 174 { 175 if (++ring->next >= ring->size) { 176 ring->next = 0; 177 ring->gen ^= 1; 178 } 179 } 180 181 static inline void vmxnet3_ring_dec(Vmxnet3Ring *ring) 182 { 183 if (ring->next-- == 0) { 184 ring->next = ring->size - 1; 185 ring->gen ^= 1; 186 } 187 } 188 189 static inline hwaddr vmxnet3_ring_curr_cell_pa(Vmxnet3Ring *ring) 190 { 191 return ring->pa + ring->next * ring->cell_size; 192 } 193 194 static inline void vmxnet3_ring_read_curr_cell(PCIDevice *d, Vmxnet3Ring *ring, 195 void *buff) 196 { 197 vmw_shmem_read(d, vmxnet3_ring_curr_cell_pa(ring), buff, ring->cell_size); 198 } 199 200 static inline void vmxnet3_ring_write_curr_cell(PCIDevice *d, Vmxnet3Ring *ring, 201 void *buff) 202 { 203 vmw_shmem_write(d, vmxnet3_ring_curr_cell_pa(ring), buff, ring->cell_size); 204 } 205 206 static inline size_t vmxnet3_ring_curr_cell_idx(Vmxnet3Ring *ring) 207 { 208 return ring->next; 209 } 210 211 static inline uint8_t vmxnet3_ring_curr_gen(Vmxnet3Ring *ring) 212 { 213 return ring->gen; 214 } 215 216 /* Debug trace-related functions */ 217 static inline void 218 vmxnet3_dump_tx_descr(struct Vmxnet3_TxDesc *descr) 219 { 220 VMW_PKPRN("TX DESCR: " 221 "addr %" PRIx64 ", len: %d, gen: %d, rsvd: %d, " 222 "dtype: %d, ext1: %d, msscof: %d, hlen: %d, om: %d, " 223 "eop: %d, cq: %d, ext2: %d, ti: %d, tci: %d", 224 le64_to_cpu(descr->addr), descr->len, descr->gen, descr->rsvd, 225 descr->dtype, descr->ext1, descr->msscof, descr->hlen, descr->om, 226 descr->eop, descr->cq, descr->ext2, descr->ti, descr->tci); 227 } 228 229 static inline void 230 vmxnet3_dump_virt_hdr(struct virtio_net_hdr *vhdr) 231 { 232 VMW_PKPRN("VHDR: flags 0x%x, gso_type: 0x%x, hdr_len: %d, gso_size: %d, " 233 "csum_start: %d, csum_offset: %d", 234 vhdr->flags, vhdr->gso_type, vhdr->hdr_len, vhdr->gso_size, 235 vhdr->csum_start, vhdr->csum_offset); 236 } 237 238 static inline void 239 vmxnet3_dump_rx_descr(struct Vmxnet3_RxDesc *descr) 240 { 241 VMW_PKPRN("RX DESCR: addr %" PRIx64 ", len: %d, gen: %d, rsvd: %d, " 242 "dtype: %d, ext1: %d, btype: %d", 243 le64_to_cpu(descr->addr), descr->len, descr->gen, 244 descr->rsvd, descr->dtype, descr->ext1, descr->btype); 245 } 246 247 /* Device state and helper functions */ 248 #define VMXNET3_RX_RINGS_PER_QUEUE (2) 249 250 typedef struct { 251 Vmxnet3Ring tx_ring; 252 Vmxnet3Ring comp_ring; 253 254 uint8_t intr_idx; 255 hwaddr tx_stats_pa; 256 struct UPT1_TxStats txq_stats; 257 } Vmxnet3TxqDescr; 258 259 typedef struct { 260 Vmxnet3Ring rx_ring[VMXNET3_RX_RINGS_PER_QUEUE]; 261 Vmxnet3Ring comp_ring; 262 uint8_t intr_idx; 263 hwaddr rx_stats_pa; 264 struct UPT1_RxStats rxq_stats; 265 } Vmxnet3RxqDescr; 266 267 typedef struct { 268 bool is_masked; 269 bool is_pending; 270 bool is_asserted; 271 } Vmxnet3IntState; 272 273 typedef struct { 274 PCIDevice parent_obj; 275 NICState *nic; 276 NICConf conf; 277 MemoryRegion bar0; 278 MemoryRegion bar1; 279 MemoryRegion msix_bar; 280 281 Vmxnet3RxqDescr rxq_descr[VMXNET3_DEVICE_MAX_RX_QUEUES]; 282 Vmxnet3TxqDescr txq_descr[VMXNET3_DEVICE_MAX_TX_QUEUES]; 283 284 /* Whether MSI-X support was installed successfully */ 285 bool msix_used; 286 /* Whether MSI support was installed successfully */ 287 bool msi_used; 288 hwaddr drv_shmem; 289 hwaddr temp_shared_guest_driver_memory; 290 291 uint8_t txq_num; 292 293 /* This boolean tells whether RX packet being indicated has to */ 294 /* be split into head and body chunks from different RX rings */ 295 bool rx_packets_compound; 296 297 bool rx_vlan_stripping; 298 bool lro_supported; 299 300 uint8_t rxq_num; 301 302 /* Network MTU */ 303 uint32_t mtu; 304 305 /* Maximum number of fragments for indicated TX packets */ 306 uint32_t max_tx_frags; 307 308 /* Maximum number of fragments for indicated RX packets */ 309 uint16_t max_rx_frags; 310 311 /* Index for events interrupt */ 312 uint8_t event_int_idx; 313 314 /* Whether automatic interrupts masking enabled */ 315 bool auto_int_masking; 316 317 bool peer_has_vhdr; 318 319 /* TX packets to QEMU interface */ 320 struct NetTxPkt *tx_pkt; 321 uint32_t offload_mode; 322 uint32_t cso_or_gso_size; 323 uint16_t tci; 324 bool needs_vlan; 325 326 struct NetRxPkt *rx_pkt; 327 328 bool tx_sop; 329 bool skip_current_tx_pkt; 330 331 uint32_t device_active; 332 uint32_t last_command; 333 334 uint32_t link_status_and_speed; 335 336 Vmxnet3IntState interrupt_states[VMXNET3_MAX_INTRS]; 337 338 uint32_t temp_mac; /* To store the low part first */ 339 340 MACAddr perm_mac; 341 uint32_t vlan_table[VMXNET3_VFT_SIZE]; 342 uint32_t rx_mode; 343 MACAddr *mcast_list; 344 uint32_t mcast_list_len; 345 uint32_t mcast_list_buff_size; /* needed for live migration. */ 346 347 /* Compatibility flags for migration */ 348 uint32_t compat_flags; 349 } VMXNET3State; 350 351 /* Interrupt management */ 352 353 /* 354 * This function returns sign whether interrupt line is in asserted state 355 * This depends on the type of interrupt used. For INTX interrupt line will 356 * be asserted until explicit deassertion, for MSI(X) interrupt line will 357 * be deasserted automatically due to notification semantics of the MSI(X) 358 * interrupts 359 */ 360 static bool _vmxnet3_assert_interrupt_line(VMXNET3State *s, uint32_t int_idx) 361 { 362 PCIDevice *d = PCI_DEVICE(s); 363 364 if (s->msix_used && msix_enabled(d)) { 365 VMW_IRPRN("Sending MSI-X notification for vector %u", int_idx); 366 msix_notify(d, int_idx); 367 return false; 368 } 369 if (s->msi_used && msi_enabled(d)) { 370 VMW_IRPRN("Sending MSI notification for vector %u", int_idx); 371 msi_notify(d, int_idx); 372 return false; 373 } 374 375 VMW_IRPRN("Asserting line for interrupt %u", int_idx); 376 pci_irq_assert(d); 377 return true; 378 } 379 380 static void _vmxnet3_deassert_interrupt_line(VMXNET3State *s, int lidx) 381 { 382 PCIDevice *d = PCI_DEVICE(s); 383 384 /* 385 * This function should never be called for MSI(X) interrupts 386 * because deassertion never required for message interrupts 387 */ 388 assert(!s->msix_used || !msix_enabled(d)); 389 /* 390 * This function should never be called for MSI(X) interrupts 391 * because deassertion never required for message interrupts 392 */ 393 assert(!s->msi_used || !msi_enabled(d)); 394 395 VMW_IRPRN("Deasserting line for interrupt %u", lidx); 396 pci_irq_deassert(d); 397 } 398 399 static void vmxnet3_update_interrupt_line_state(VMXNET3State *s, int lidx) 400 { 401 if (!s->interrupt_states[lidx].is_pending && 402 s->interrupt_states[lidx].is_asserted) { 403 VMW_IRPRN("New interrupt line state for index %d is DOWN", lidx); 404 _vmxnet3_deassert_interrupt_line(s, lidx); 405 s->interrupt_states[lidx].is_asserted = false; 406 return; 407 } 408 409 if (s->interrupt_states[lidx].is_pending && 410 !s->interrupt_states[lidx].is_masked && 411 !s->interrupt_states[lidx].is_asserted) { 412 VMW_IRPRN("New interrupt line state for index %d is UP", lidx); 413 s->interrupt_states[lidx].is_asserted = 414 _vmxnet3_assert_interrupt_line(s, lidx); 415 s->interrupt_states[lidx].is_pending = false; 416 return; 417 } 418 } 419 420 static void vmxnet3_trigger_interrupt(VMXNET3State *s, int lidx) 421 { 422 PCIDevice *d = PCI_DEVICE(s); 423 s->interrupt_states[lidx].is_pending = true; 424 vmxnet3_update_interrupt_line_state(s, lidx); 425 426 if (s->msix_used && msix_enabled(d) && s->auto_int_masking) { 427 goto do_automask; 428 } 429 430 if (s->msi_used && msi_enabled(d) && s->auto_int_masking) { 431 goto do_automask; 432 } 433 434 return; 435 436 do_automask: 437 s->interrupt_states[lidx].is_masked = true; 438 vmxnet3_update_interrupt_line_state(s, lidx); 439 } 440 441 static bool vmxnet3_interrupt_asserted(VMXNET3State *s, int lidx) 442 { 443 return s->interrupt_states[lidx].is_asserted; 444 } 445 446 static void vmxnet3_clear_interrupt(VMXNET3State *s, int int_idx) 447 { 448 s->interrupt_states[int_idx].is_pending = false; 449 if (s->auto_int_masking) { 450 s->interrupt_states[int_idx].is_masked = true; 451 } 452 vmxnet3_update_interrupt_line_state(s, int_idx); 453 } 454 455 static void 456 vmxnet3_on_interrupt_mask_changed(VMXNET3State *s, int lidx, bool is_masked) 457 { 458 s->interrupt_states[lidx].is_masked = is_masked; 459 vmxnet3_update_interrupt_line_state(s, lidx); 460 } 461 462 static bool vmxnet3_verify_driver_magic(PCIDevice *d, hwaddr dshmem) 463 { 464 return (VMXNET3_READ_DRV_SHARED32(d, dshmem, magic) == VMXNET3_REV1_MAGIC); 465 } 466 467 #define VMXNET3_GET_BYTE(x, byte_num) (((x) >> (byte_num)*8) & 0xFF) 468 #define VMXNET3_MAKE_BYTE(byte_num, val) \ 469 (((uint32_t)((val) & 0xFF)) << (byte_num)*8) 470 471 static void vmxnet3_set_variable_mac(VMXNET3State *s, uint32_t h, uint32_t l) 472 { 473 s->conf.macaddr.a[0] = VMXNET3_GET_BYTE(l, 0); 474 s->conf.macaddr.a[1] = VMXNET3_GET_BYTE(l, 1); 475 s->conf.macaddr.a[2] = VMXNET3_GET_BYTE(l, 2); 476 s->conf.macaddr.a[3] = VMXNET3_GET_BYTE(l, 3); 477 s->conf.macaddr.a[4] = VMXNET3_GET_BYTE(h, 0); 478 s->conf.macaddr.a[5] = VMXNET3_GET_BYTE(h, 1); 479 480 VMW_CFPRN("Variable MAC: " MAC_FMT, MAC_ARG(s->conf.macaddr.a)); 481 482 qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a); 483 } 484 485 static uint64_t vmxnet3_get_mac_low(MACAddr *addr) 486 { 487 return VMXNET3_MAKE_BYTE(0, addr->a[0]) | 488 VMXNET3_MAKE_BYTE(1, addr->a[1]) | 489 VMXNET3_MAKE_BYTE(2, addr->a[2]) | 490 VMXNET3_MAKE_BYTE(3, addr->a[3]); 491 } 492 493 static uint64_t vmxnet3_get_mac_high(MACAddr *addr) 494 { 495 return VMXNET3_MAKE_BYTE(0, addr->a[4]) | 496 VMXNET3_MAKE_BYTE(1, addr->a[5]); 497 } 498 499 static void 500 vmxnet3_inc_tx_consumption_counter(VMXNET3State *s, int qidx) 501 { 502 vmxnet3_ring_inc(&s->txq_descr[qidx].tx_ring); 503 } 504 505 static inline void 506 vmxnet3_inc_rx_consumption_counter(VMXNET3State *s, int qidx, int ridx) 507 { 508 vmxnet3_ring_inc(&s->rxq_descr[qidx].rx_ring[ridx]); 509 } 510 511 static inline void 512 vmxnet3_inc_tx_completion_counter(VMXNET3State *s, int qidx) 513 { 514 vmxnet3_ring_inc(&s->txq_descr[qidx].comp_ring); 515 } 516 517 static void 518 vmxnet3_inc_rx_completion_counter(VMXNET3State *s, int qidx) 519 { 520 vmxnet3_ring_inc(&s->rxq_descr[qidx].comp_ring); 521 } 522 523 static void 524 vmxnet3_dec_rx_completion_counter(VMXNET3State *s, int qidx) 525 { 526 vmxnet3_ring_dec(&s->rxq_descr[qidx].comp_ring); 527 } 528 529 static void vmxnet3_complete_packet(VMXNET3State *s, int qidx, uint32_t tx_ridx) 530 { 531 struct Vmxnet3_TxCompDesc txcq_descr; 532 PCIDevice *d = PCI_DEVICE(s); 533 534 VMXNET3_RING_DUMP(VMW_RIPRN, "TXC", qidx, &s->txq_descr[qidx].comp_ring); 535 536 txcq_descr.txdIdx = tx_ridx; 537 txcq_descr.gen = vmxnet3_ring_curr_gen(&s->txq_descr[qidx].comp_ring); 538 539 vmxnet3_ring_write_curr_cell(d, &s->txq_descr[qidx].comp_ring, &txcq_descr); 540 541 /* Flush changes in TX descriptor before changing the counter value */ 542 smp_wmb(); 543 544 vmxnet3_inc_tx_completion_counter(s, qidx); 545 vmxnet3_trigger_interrupt(s, s->txq_descr[qidx].intr_idx); 546 } 547 548 static bool 549 vmxnet3_setup_tx_offloads(VMXNET3State *s) 550 { 551 switch (s->offload_mode) { 552 case VMXNET3_OM_NONE: 553 net_tx_pkt_build_vheader(s->tx_pkt, false, false, 0); 554 break; 555 556 case VMXNET3_OM_CSUM: 557 net_tx_pkt_build_vheader(s->tx_pkt, false, true, 0); 558 VMW_PKPRN("L4 CSO requested\n"); 559 break; 560 561 case VMXNET3_OM_TSO: 562 net_tx_pkt_build_vheader(s->tx_pkt, true, true, 563 s->cso_or_gso_size); 564 net_tx_pkt_update_ip_checksums(s->tx_pkt); 565 VMW_PKPRN("GSO offload requested."); 566 break; 567 568 default: 569 g_assert_not_reached(); 570 return false; 571 } 572 573 return true; 574 } 575 576 static void 577 vmxnet3_tx_retrieve_metadata(VMXNET3State *s, 578 const struct Vmxnet3_TxDesc *txd) 579 { 580 s->offload_mode = txd->om; 581 s->cso_or_gso_size = txd->msscof; 582 s->tci = txd->tci; 583 s->needs_vlan = txd->ti; 584 } 585 586 typedef enum { 587 VMXNET3_PKT_STATUS_OK, 588 VMXNET3_PKT_STATUS_ERROR, 589 VMXNET3_PKT_STATUS_DISCARD,/* only for tx */ 590 VMXNET3_PKT_STATUS_OUT_OF_BUF /* only for rx */ 591 } Vmxnet3PktStatus; 592 593 static void 594 vmxnet3_on_tx_done_update_stats(VMXNET3State *s, int qidx, 595 Vmxnet3PktStatus status) 596 { 597 size_t tot_len = net_tx_pkt_get_total_len(s->tx_pkt); 598 struct UPT1_TxStats *stats = &s->txq_descr[qidx].txq_stats; 599 600 switch (status) { 601 case VMXNET3_PKT_STATUS_OK: 602 switch (net_tx_pkt_get_packet_type(s->tx_pkt)) { 603 case ETH_PKT_BCAST: 604 stats->bcastPktsTxOK++; 605 stats->bcastBytesTxOK += tot_len; 606 break; 607 case ETH_PKT_MCAST: 608 stats->mcastPktsTxOK++; 609 stats->mcastBytesTxOK += tot_len; 610 break; 611 case ETH_PKT_UCAST: 612 stats->ucastPktsTxOK++; 613 stats->ucastBytesTxOK += tot_len; 614 break; 615 default: 616 g_assert_not_reached(); 617 } 618 619 if (s->offload_mode == VMXNET3_OM_TSO) { 620 /* 621 * According to VMWARE headers this statistic is a number 622 * of packets after segmentation but since we don't have 623 * this information in QEMU model, the best we can do is to 624 * provide number of non-segmented packets 625 */ 626 stats->TSOPktsTxOK++; 627 stats->TSOBytesTxOK += tot_len; 628 } 629 break; 630 631 case VMXNET3_PKT_STATUS_DISCARD: 632 stats->pktsTxDiscard++; 633 break; 634 635 case VMXNET3_PKT_STATUS_ERROR: 636 stats->pktsTxError++; 637 break; 638 639 default: 640 g_assert_not_reached(); 641 } 642 } 643 644 static void 645 vmxnet3_on_rx_done_update_stats(VMXNET3State *s, 646 int qidx, 647 Vmxnet3PktStatus status) 648 { 649 struct UPT1_RxStats *stats = &s->rxq_descr[qidx].rxq_stats; 650 size_t tot_len = net_rx_pkt_get_total_len(s->rx_pkt); 651 652 switch (status) { 653 case VMXNET3_PKT_STATUS_OUT_OF_BUF: 654 stats->pktsRxOutOfBuf++; 655 break; 656 657 case VMXNET3_PKT_STATUS_ERROR: 658 stats->pktsRxError++; 659 break; 660 case VMXNET3_PKT_STATUS_OK: 661 switch (net_rx_pkt_get_packet_type(s->rx_pkt)) { 662 case ETH_PKT_BCAST: 663 stats->bcastPktsRxOK++; 664 stats->bcastBytesRxOK += tot_len; 665 break; 666 case ETH_PKT_MCAST: 667 stats->mcastPktsRxOK++; 668 stats->mcastBytesRxOK += tot_len; 669 break; 670 case ETH_PKT_UCAST: 671 stats->ucastPktsRxOK++; 672 stats->ucastBytesRxOK += tot_len; 673 break; 674 default: 675 g_assert_not_reached(); 676 } 677 678 if (tot_len > s->mtu) { 679 stats->LROPktsRxOK++; 680 stats->LROBytesRxOK += tot_len; 681 } 682 break; 683 default: 684 g_assert_not_reached(); 685 } 686 } 687 688 static inline bool 689 vmxnet3_pop_next_tx_descr(VMXNET3State *s, 690 int qidx, 691 struct Vmxnet3_TxDesc *txd, 692 uint32_t *descr_idx) 693 { 694 Vmxnet3Ring *ring = &s->txq_descr[qidx].tx_ring; 695 PCIDevice *d = PCI_DEVICE(s); 696 697 vmxnet3_ring_read_curr_cell(d, ring, txd); 698 if (txd->gen == vmxnet3_ring_curr_gen(ring)) { 699 /* Only read after generation field verification */ 700 smp_rmb(); 701 /* Re-read to be sure we got the latest version */ 702 vmxnet3_ring_read_curr_cell(d, ring, txd); 703 VMXNET3_RING_DUMP(VMW_RIPRN, "TX", qidx, ring); 704 *descr_idx = vmxnet3_ring_curr_cell_idx(ring); 705 vmxnet3_inc_tx_consumption_counter(s, qidx); 706 return true; 707 } 708 709 return false; 710 } 711 712 static bool 713 vmxnet3_send_packet(VMXNET3State *s, uint32_t qidx) 714 { 715 Vmxnet3PktStatus status = VMXNET3_PKT_STATUS_OK; 716 717 if (!vmxnet3_setup_tx_offloads(s)) { 718 status = VMXNET3_PKT_STATUS_ERROR; 719 goto func_exit; 720 } 721 722 /* debug prints */ 723 vmxnet3_dump_virt_hdr(net_tx_pkt_get_vhdr(s->tx_pkt)); 724 net_tx_pkt_dump(s->tx_pkt); 725 726 if (!net_tx_pkt_send(s->tx_pkt, qemu_get_queue(s->nic))) { 727 status = VMXNET3_PKT_STATUS_DISCARD; 728 goto func_exit; 729 } 730 731 func_exit: 732 vmxnet3_on_tx_done_update_stats(s, qidx, status); 733 return (status == VMXNET3_PKT_STATUS_OK); 734 } 735 736 static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx) 737 { 738 struct Vmxnet3_TxDesc txd; 739 uint32_t txd_idx; 740 uint32_t data_len; 741 hwaddr data_pa; 742 743 for (;;) { 744 if (!vmxnet3_pop_next_tx_descr(s, qidx, &txd, &txd_idx)) { 745 break; 746 } 747 748 vmxnet3_dump_tx_descr(&txd); 749 750 if (!s->skip_current_tx_pkt) { 751 data_len = (txd.len > 0) ? txd.len : VMXNET3_MAX_TX_BUF_SIZE; 752 data_pa = le64_to_cpu(txd.addr); 753 754 if (!net_tx_pkt_add_raw_fragment(s->tx_pkt, 755 data_pa, 756 data_len)) { 757 s->skip_current_tx_pkt = true; 758 } 759 } 760 761 if (s->tx_sop) { 762 vmxnet3_tx_retrieve_metadata(s, &txd); 763 s->tx_sop = false; 764 } 765 766 if (txd.eop) { 767 if (!s->skip_current_tx_pkt && net_tx_pkt_parse(s->tx_pkt)) { 768 if (s->needs_vlan) { 769 net_tx_pkt_setup_vlan_header(s->tx_pkt, s->tci); 770 } 771 772 vmxnet3_send_packet(s, qidx); 773 } else { 774 vmxnet3_on_tx_done_update_stats(s, qidx, 775 VMXNET3_PKT_STATUS_ERROR); 776 } 777 778 vmxnet3_complete_packet(s, qidx, txd_idx); 779 s->tx_sop = true; 780 s->skip_current_tx_pkt = false; 781 net_tx_pkt_reset(s->tx_pkt); 782 } 783 } 784 } 785 786 static inline void 787 vmxnet3_read_next_rx_descr(VMXNET3State *s, int qidx, int ridx, 788 struct Vmxnet3_RxDesc *dbuf, uint32_t *didx) 789 { 790 PCIDevice *d = PCI_DEVICE(s); 791 792 Vmxnet3Ring *ring = &s->rxq_descr[qidx].rx_ring[ridx]; 793 *didx = vmxnet3_ring_curr_cell_idx(ring); 794 vmxnet3_ring_read_curr_cell(d, ring, dbuf); 795 } 796 797 static inline uint8_t 798 vmxnet3_get_rx_ring_gen(VMXNET3State *s, int qidx, int ridx) 799 { 800 return s->rxq_descr[qidx].rx_ring[ridx].gen; 801 } 802 803 static inline hwaddr 804 vmxnet3_pop_rxc_descr(VMXNET3State *s, int qidx, uint32_t *descr_gen) 805 { 806 uint8_t ring_gen; 807 struct Vmxnet3_RxCompDesc rxcd; 808 809 hwaddr daddr = 810 vmxnet3_ring_curr_cell_pa(&s->rxq_descr[qidx].comp_ring); 811 812 pci_dma_read(PCI_DEVICE(s), 813 daddr, &rxcd, sizeof(struct Vmxnet3_RxCompDesc)); 814 ring_gen = vmxnet3_ring_curr_gen(&s->rxq_descr[qidx].comp_ring); 815 816 if (rxcd.gen != ring_gen) { 817 *descr_gen = ring_gen; 818 vmxnet3_inc_rx_completion_counter(s, qidx); 819 return daddr; 820 } 821 822 return 0; 823 } 824 825 static inline void 826 vmxnet3_revert_rxc_descr(VMXNET3State *s, int qidx) 827 { 828 vmxnet3_dec_rx_completion_counter(s, qidx); 829 } 830 831 #define RXQ_IDX (0) 832 #define RX_HEAD_BODY_RING (0) 833 #define RX_BODY_ONLY_RING (1) 834 835 static bool 836 vmxnet3_get_next_head_rx_descr(VMXNET3State *s, 837 struct Vmxnet3_RxDesc *descr_buf, 838 uint32_t *descr_idx, 839 uint32_t *ridx) 840 { 841 for (;;) { 842 uint32_t ring_gen; 843 vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, 844 descr_buf, descr_idx); 845 846 /* If no more free descriptors - return */ 847 ring_gen = vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_HEAD_BODY_RING); 848 if (descr_buf->gen != ring_gen) { 849 return false; 850 } 851 852 /* Only read after generation field verification */ 853 smp_rmb(); 854 /* Re-read to be sure we got the latest version */ 855 vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, 856 descr_buf, descr_idx); 857 858 /* Mark current descriptor as used/skipped */ 859 vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_HEAD_BODY_RING); 860 861 /* If this is what we are looking for - return */ 862 if (descr_buf->btype == VMXNET3_RXD_BTYPE_HEAD) { 863 *ridx = RX_HEAD_BODY_RING; 864 return true; 865 } 866 } 867 } 868 869 static bool 870 vmxnet3_get_next_body_rx_descr(VMXNET3State *s, 871 struct Vmxnet3_RxDesc *d, 872 uint32_t *didx, 873 uint32_t *ridx) 874 { 875 vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, d, didx); 876 877 /* Try to find corresponding descriptor in head/body ring */ 878 if (d->gen == vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_HEAD_BODY_RING)) { 879 /* Only read after generation field verification */ 880 smp_rmb(); 881 /* Re-read to be sure we got the latest version */ 882 vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, d, didx); 883 if (d->btype == VMXNET3_RXD_BTYPE_BODY) { 884 vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_HEAD_BODY_RING); 885 *ridx = RX_HEAD_BODY_RING; 886 return true; 887 } 888 } 889 890 /* 891 * If there is no free descriptors on head/body ring or next free 892 * descriptor is a head descriptor switch to body only ring 893 */ 894 vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_BODY_ONLY_RING, d, didx); 895 896 /* If no more free descriptors - return */ 897 if (d->gen == vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_BODY_ONLY_RING)) { 898 /* Only read after generation field verification */ 899 smp_rmb(); 900 /* Re-read to be sure we got the latest version */ 901 vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_BODY_ONLY_RING, d, didx); 902 assert(d->btype == VMXNET3_RXD_BTYPE_BODY); 903 *ridx = RX_BODY_ONLY_RING; 904 vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_BODY_ONLY_RING); 905 return true; 906 } 907 908 return false; 909 } 910 911 static inline bool 912 vmxnet3_get_next_rx_descr(VMXNET3State *s, bool is_head, 913 struct Vmxnet3_RxDesc *descr_buf, 914 uint32_t *descr_idx, 915 uint32_t *ridx) 916 { 917 if (is_head || !s->rx_packets_compound) { 918 return vmxnet3_get_next_head_rx_descr(s, descr_buf, descr_idx, ridx); 919 } else { 920 return vmxnet3_get_next_body_rx_descr(s, descr_buf, descr_idx, ridx); 921 } 922 } 923 924 /* In case packet was csum offloaded (either NEEDS_CSUM or DATA_VALID), 925 * the implementation always passes an RxCompDesc with a "Checksum 926 * calculated and found correct" to the OS (cnc=0 and tuc=1, see 927 * vmxnet3_rx_update_descr). This emulates the observed ESXi behavior. 928 * 929 * Therefore, if packet has the NEEDS_CSUM set, we must calculate 930 * and place a fully computed checksum into the tcp/udp header. 931 * Otherwise, the OS driver will receive a checksum-correct indication 932 * (CHECKSUM_UNNECESSARY), but with the actual tcp/udp checksum field 933 * having just the pseudo header csum value. 934 * 935 * While this is not a problem if packet is destined for local delivery, 936 * in the case the host OS performs forwarding, it will forward an 937 * incorrectly checksummed packet. 938 */ 939 static void vmxnet3_rx_need_csum_calculate(struct NetRxPkt *pkt, 940 const void *pkt_data, 941 size_t pkt_len) 942 { 943 struct virtio_net_hdr *vhdr; 944 bool isip4, isip6, istcp, isudp; 945 uint8_t *data; 946 int len; 947 948 if (!net_rx_pkt_has_virt_hdr(pkt)) { 949 return; 950 } 951 952 vhdr = net_rx_pkt_get_vhdr(pkt); 953 if (!VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM)) { 954 return; 955 } 956 957 net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); 958 if (!(isip4 || isip6) || !(istcp || isudp)) { 959 return; 960 } 961 962 vmxnet3_dump_virt_hdr(vhdr); 963 964 /* Validate packet len: csum_start + scum_offset + length of csum field */ 965 if (pkt_len < (vhdr->csum_start + vhdr->csum_offset + 2)) { 966 VMW_PKPRN("packet len:%zu < csum_start(%d) + csum_offset(%d) + 2, " 967 "cannot calculate checksum", 968 pkt_len, vhdr->csum_start, vhdr->csum_offset); 969 return; 970 } 971 972 data = (uint8_t *)pkt_data + vhdr->csum_start; 973 len = pkt_len - vhdr->csum_start; 974 /* Put the checksum obtained into the packet */ 975 stw_be_p(data + vhdr->csum_offset, net_raw_checksum(data, len)); 976 977 vhdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM; 978 vhdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID; 979 } 980 981 static void vmxnet3_rx_update_descr(struct NetRxPkt *pkt, 982 struct Vmxnet3_RxCompDesc *rxcd) 983 { 984 int csum_ok, is_gso; 985 bool isip4, isip6, istcp, isudp; 986 struct virtio_net_hdr *vhdr; 987 uint8_t offload_type; 988 989 if (net_rx_pkt_is_vlan_stripped(pkt)) { 990 rxcd->ts = 1; 991 rxcd->tci = net_rx_pkt_get_vlan_tag(pkt); 992 } 993 994 if (!net_rx_pkt_has_virt_hdr(pkt)) { 995 goto nocsum; 996 } 997 998 vhdr = net_rx_pkt_get_vhdr(pkt); 999 /* 1000 * Checksum is valid when lower level tell so or when lower level 1001 * requires checksum offload telling that packet produced/bridged 1002 * locally and did travel over network after last checksum calculation 1003 * or production 1004 */ 1005 csum_ok = VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_DATA_VALID) || 1006 VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM); 1007 1008 offload_type = vhdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN; 1009 is_gso = (offload_type != VIRTIO_NET_HDR_GSO_NONE) ? 1 : 0; 1010 1011 if (!csum_ok && !is_gso) { 1012 goto nocsum; 1013 } 1014 1015 net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); 1016 if ((!istcp && !isudp) || (!isip4 && !isip6)) { 1017 goto nocsum; 1018 } 1019 1020 rxcd->cnc = 0; 1021 rxcd->v4 = isip4 ? 1 : 0; 1022 rxcd->v6 = isip6 ? 1 : 0; 1023 rxcd->tcp = istcp ? 1 : 0; 1024 rxcd->udp = isudp ? 1 : 0; 1025 rxcd->fcs = rxcd->tuc = rxcd->ipc = 1; 1026 return; 1027 1028 nocsum: 1029 rxcd->cnc = 1; 1030 return; 1031 } 1032 1033 static void 1034 vmxnet3_pci_dma_writev(PCIDevice *pci_dev, 1035 const struct iovec *iov, 1036 size_t start_iov_off, 1037 hwaddr target_addr, 1038 size_t bytes_to_copy) 1039 { 1040 size_t curr_off = 0; 1041 size_t copied = 0; 1042 1043 while (bytes_to_copy) { 1044 if (start_iov_off < (curr_off + iov->iov_len)) { 1045 size_t chunk_len = 1046 MIN((curr_off + iov->iov_len) - start_iov_off, bytes_to_copy); 1047 1048 pci_dma_write(pci_dev, target_addr + copied, 1049 iov->iov_base + start_iov_off - curr_off, 1050 chunk_len); 1051 1052 copied += chunk_len; 1053 start_iov_off += chunk_len; 1054 curr_off = start_iov_off; 1055 bytes_to_copy -= chunk_len; 1056 } else { 1057 curr_off += iov->iov_len; 1058 } 1059 iov++; 1060 } 1061 } 1062 1063 static bool 1064 vmxnet3_indicate_packet(VMXNET3State *s) 1065 { 1066 struct Vmxnet3_RxDesc rxd; 1067 PCIDevice *d = PCI_DEVICE(s); 1068 bool is_head = true; 1069 uint32_t rxd_idx; 1070 uint32_t rx_ridx = 0; 1071 1072 struct Vmxnet3_RxCompDesc rxcd; 1073 uint32_t new_rxcd_gen = VMXNET3_INIT_GEN; 1074 hwaddr new_rxcd_pa = 0; 1075 hwaddr ready_rxcd_pa = 0; 1076 struct iovec *data = net_rx_pkt_get_iovec(s->rx_pkt); 1077 size_t bytes_copied = 0; 1078 size_t bytes_left = net_rx_pkt_get_total_len(s->rx_pkt); 1079 uint16_t num_frags = 0; 1080 size_t chunk_size; 1081 1082 net_rx_pkt_dump(s->rx_pkt); 1083 1084 while (bytes_left > 0) { 1085 1086 /* cannot add more frags to packet */ 1087 if (num_frags == s->max_rx_frags) { 1088 break; 1089 } 1090 1091 new_rxcd_pa = vmxnet3_pop_rxc_descr(s, RXQ_IDX, &new_rxcd_gen); 1092 if (!new_rxcd_pa) { 1093 break; 1094 } 1095 1096 if (!vmxnet3_get_next_rx_descr(s, is_head, &rxd, &rxd_idx, &rx_ridx)) { 1097 break; 1098 } 1099 1100 chunk_size = MIN(bytes_left, rxd.len); 1101 vmxnet3_pci_dma_writev(d, data, bytes_copied, 1102 le64_to_cpu(rxd.addr), chunk_size); 1103 bytes_copied += chunk_size; 1104 bytes_left -= chunk_size; 1105 1106 vmxnet3_dump_rx_descr(&rxd); 1107 1108 if (ready_rxcd_pa != 0) { 1109 pci_dma_write(d, ready_rxcd_pa, &rxcd, sizeof(rxcd)); 1110 } 1111 1112 memset(&rxcd, 0, sizeof(struct Vmxnet3_RxCompDesc)); 1113 rxcd.rxdIdx = rxd_idx; 1114 rxcd.len = chunk_size; 1115 rxcd.sop = is_head; 1116 rxcd.gen = new_rxcd_gen; 1117 rxcd.rqID = RXQ_IDX + rx_ridx * s->rxq_num; 1118 1119 if (bytes_left == 0) { 1120 vmxnet3_rx_update_descr(s->rx_pkt, &rxcd); 1121 } 1122 1123 VMW_RIPRN("RX Completion descriptor: rxRing: %lu rxIdx %lu len %lu " 1124 "sop %d csum_correct %lu", 1125 (unsigned long) rx_ridx, 1126 (unsigned long) rxcd.rxdIdx, 1127 (unsigned long) rxcd.len, 1128 (int) rxcd.sop, 1129 (unsigned long) rxcd.tuc); 1130 1131 is_head = false; 1132 ready_rxcd_pa = new_rxcd_pa; 1133 new_rxcd_pa = 0; 1134 num_frags++; 1135 } 1136 1137 if (ready_rxcd_pa != 0) { 1138 rxcd.eop = 1; 1139 rxcd.err = (bytes_left != 0); 1140 1141 pci_dma_write(d, ready_rxcd_pa, &rxcd, sizeof(rxcd)); 1142 1143 /* Flush RX descriptor changes */ 1144 smp_wmb(); 1145 } 1146 1147 if (new_rxcd_pa != 0) { 1148 vmxnet3_revert_rxc_descr(s, RXQ_IDX); 1149 } 1150 1151 vmxnet3_trigger_interrupt(s, s->rxq_descr[RXQ_IDX].intr_idx); 1152 1153 if (bytes_left == 0) { 1154 vmxnet3_on_rx_done_update_stats(s, RXQ_IDX, VMXNET3_PKT_STATUS_OK); 1155 return true; 1156 } else if (num_frags == s->max_rx_frags) { 1157 vmxnet3_on_rx_done_update_stats(s, RXQ_IDX, VMXNET3_PKT_STATUS_ERROR); 1158 return false; 1159 } else { 1160 vmxnet3_on_rx_done_update_stats(s, RXQ_IDX, 1161 VMXNET3_PKT_STATUS_OUT_OF_BUF); 1162 return false; 1163 } 1164 } 1165 1166 static void 1167 vmxnet3_io_bar0_write(void *opaque, hwaddr addr, 1168 uint64_t val, unsigned size) 1169 { 1170 VMXNET3State *s = opaque; 1171 1172 if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_TXPROD, 1173 VMXNET3_DEVICE_MAX_TX_QUEUES, VMXNET3_REG_ALIGN)) { 1174 int tx_queue_idx = 1175 VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_TXPROD, 1176 VMXNET3_REG_ALIGN); 1177 assert(tx_queue_idx <= s->txq_num); 1178 vmxnet3_process_tx_queue(s, tx_queue_idx); 1179 return; 1180 } 1181 1182 if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_IMR, 1183 VMXNET3_MAX_INTRS, VMXNET3_REG_ALIGN)) { 1184 int l = VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_IMR, 1185 VMXNET3_REG_ALIGN); 1186 1187 VMW_CBPRN("Interrupt mask for line %d written: 0x%" PRIx64, l, val); 1188 1189 vmxnet3_on_interrupt_mask_changed(s, l, val); 1190 return; 1191 } 1192 1193 if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_RXPROD, 1194 VMXNET3_DEVICE_MAX_RX_QUEUES, VMXNET3_REG_ALIGN) || 1195 VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_RXPROD2, 1196 VMXNET3_DEVICE_MAX_RX_QUEUES, VMXNET3_REG_ALIGN)) { 1197 return; 1198 } 1199 1200 VMW_WRPRN("BAR0 unknown write [%" PRIx64 "] = %" PRIx64 ", size %d", 1201 (uint64_t) addr, val, size); 1202 } 1203 1204 static uint64_t 1205 vmxnet3_io_bar0_read(void *opaque, hwaddr addr, unsigned size) 1206 { 1207 VMXNET3State *s = opaque; 1208 1209 if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_IMR, 1210 VMXNET3_MAX_INTRS, VMXNET3_REG_ALIGN)) { 1211 int l = VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_IMR, 1212 VMXNET3_REG_ALIGN); 1213 return s->interrupt_states[l].is_masked; 1214 } 1215 1216 VMW_CBPRN("BAR0 unknown read [%" PRIx64 "], size %d", addr, size); 1217 return 0; 1218 } 1219 1220 static void vmxnet3_reset_interrupt_states(VMXNET3State *s) 1221 { 1222 int i; 1223 for (i = 0; i < ARRAY_SIZE(s->interrupt_states); i++) { 1224 s->interrupt_states[i].is_asserted = false; 1225 s->interrupt_states[i].is_pending = false; 1226 s->interrupt_states[i].is_masked = true; 1227 } 1228 } 1229 1230 static void vmxnet3_reset_mac(VMXNET3State *s) 1231 { 1232 memcpy(&s->conf.macaddr.a, &s->perm_mac.a, sizeof(s->perm_mac.a)); 1233 VMW_CFPRN("MAC address set to: " MAC_FMT, MAC_ARG(s->conf.macaddr.a)); 1234 } 1235 1236 static void vmxnet3_deactivate_device(VMXNET3State *s) 1237 { 1238 if (s->device_active) { 1239 VMW_CBPRN("Deactivating vmxnet3..."); 1240 net_tx_pkt_reset(s->tx_pkt); 1241 net_tx_pkt_uninit(s->tx_pkt); 1242 net_rx_pkt_uninit(s->rx_pkt); 1243 s->device_active = false; 1244 } 1245 } 1246 1247 static void vmxnet3_reset(VMXNET3State *s) 1248 { 1249 VMW_CBPRN("Resetting vmxnet3..."); 1250 1251 vmxnet3_deactivate_device(s); 1252 vmxnet3_reset_interrupt_states(s); 1253 s->drv_shmem = 0; 1254 s->tx_sop = true; 1255 s->skip_current_tx_pkt = false; 1256 } 1257 1258 static void vmxnet3_update_rx_mode(VMXNET3State *s) 1259 { 1260 PCIDevice *d = PCI_DEVICE(s); 1261 1262 s->rx_mode = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, 1263 devRead.rxFilterConf.rxMode); 1264 VMW_CFPRN("RX mode: 0x%08X", s->rx_mode); 1265 } 1266 1267 static void vmxnet3_update_vlan_filters(VMXNET3State *s) 1268 { 1269 int i; 1270 PCIDevice *d = PCI_DEVICE(s); 1271 1272 /* Copy configuration from shared memory */ 1273 VMXNET3_READ_DRV_SHARED(d, s->drv_shmem, 1274 devRead.rxFilterConf.vfTable, 1275 s->vlan_table, 1276 sizeof(s->vlan_table)); 1277 1278 /* Invert byte order when needed */ 1279 for (i = 0; i < ARRAY_SIZE(s->vlan_table); i++) { 1280 s->vlan_table[i] = le32_to_cpu(s->vlan_table[i]); 1281 } 1282 1283 /* Dump configuration for debugging purposes */ 1284 VMW_CFPRN("Configured VLANs:"); 1285 for (i = 0; i < sizeof(s->vlan_table) * 8; i++) { 1286 if (VMXNET3_VFTABLE_ENTRY_IS_SET(s->vlan_table, i)) { 1287 VMW_CFPRN("\tVLAN %d is present", i); 1288 } 1289 } 1290 } 1291 1292 static void vmxnet3_update_mcast_filters(VMXNET3State *s) 1293 { 1294 PCIDevice *d = PCI_DEVICE(s); 1295 1296 uint16_t list_bytes = 1297 VMXNET3_READ_DRV_SHARED16(d, s->drv_shmem, 1298 devRead.rxFilterConf.mfTableLen); 1299 1300 s->mcast_list_len = list_bytes / sizeof(s->mcast_list[0]); 1301 1302 s->mcast_list = g_realloc(s->mcast_list, list_bytes); 1303 if (!s->mcast_list) { 1304 if (s->mcast_list_len == 0) { 1305 VMW_CFPRN("Current multicast list is empty"); 1306 } else { 1307 VMW_ERPRN("Failed to allocate multicast list of %d elements", 1308 s->mcast_list_len); 1309 } 1310 s->mcast_list_len = 0; 1311 } else { 1312 int i; 1313 hwaddr mcast_list_pa = 1314 VMXNET3_READ_DRV_SHARED64(d, s->drv_shmem, 1315 devRead.rxFilterConf.mfTablePA); 1316 1317 pci_dma_read(d, mcast_list_pa, s->mcast_list, list_bytes); 1318 1319 VMW_CFPRN("Current multicast list len is %d:", s->mcast_list_len); 1320 for (i = 0; i < s->mcast_list_len; i++) { 1321 VMW_CFPRN("\t" MAC_FMT, MAC_ARG(s->mcast_list[i].a)); 1322 } 1323 } 1324 } 1325 1326 static void vmxnet3_setup_rx_filtering(VMXNET3State *s) 1327 { 1328 vmxnet3_update_rx_mode(s); 1329 vmxnet3_update_vlan_filters(s); 1330 vmxnet3_update_mcast_filters(s); 1331 } 1332 1333 static uint32_t vmxnet3_get_interrupt_config(VMXNET3State *s) 1334 { 1335 uint32_t interrupt_mode = VMXNET3_IT_AUTO | (VMXNET3_IMM_AUTO << 2); 1336 VMW_CFPRN("Interrupt config is 0x%X", interrupt_mode); 1337 return interrupt_mode; 1338 } 1339 1340 static void vmxnet3_fill_stats(VMXNET3State *s) 1341 { 1342 int i; 1343 PCIDevice *d = PCI_DEVICE(s); 1344 1345 if (!s->device_active) 1346 return; 1347 1348 for (i = 0; i < s->txq_num; i++) { 1349 pci_dma_write(d, 1350 s->txq_descr[i].tx_stats_pa, 1351 &s->txq_descr[i].txq_stats, 1352 sizeof(s->txq_descr[i].txq_stats)); 1353 } 1354 1355 for (i = 0; i < s->rxq_num; i++) { 1356 pci_dma_write(d, 1357 s->rxq_descr[i].rx_stats_pa, 1358 &s->rxq_descr[i].rxq_stats, 1359 sizeof(s->rxq_descr[i].rxq_stats)); 1360 } 1361 } 1362 1363 static void vmxnet3_adjust_by_guest_type(VMXNET3State *s) 1364 { 1365 struct Vmxnet3_GOSInfo gos; 1366 PCIDevice *d = PCI_DEVICE(s); 1367 1368 VMXNET3_READ_DRV_SHARED(d, s->drv_shmem, devRead.misc.driverInfo.gos, 1369 &gos, sizeof(gos)); 1370 s->rx_packets_compound = 1371 (gos.gosType == VMXNET3_GOS_TYPE_WIN) ? false : true; 1372 1373 VMW_CFPRN("Guest type specifics: RXCOMPOUND: %d", s->rx_packets_compound); 1374 } 1375 1376 static void 1377 vmxnet3_dump_conf_descr(const char *name, 1378 struct Vmxnet3_VariableLenConfDesc *pm_descr) 1379 { 1380 VMW_CFPRN("%s descriptor dump: Version %u, Length %u", 1381 name, pm_descr->confVer, pm_descr->confLen); 1382 1383 }; 1384 1385 static void vmxnet3_update_pm_state(VMXNET3State *s) 1386 { 1387 struct Vmxnet3_VariableLenConfDesc pm_descr; 1388 PCIDevice *d = PCI_DEVICE(s); 1389 1390 pm_descr.confLen = 1391 VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.pmConfDesc.confLen); 1392 pm_descr.confVer = 1393 VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.pmConfDesc.confVer); 1394 pm_descr.confPA = 1395 VMXNET3_READ_DRV_SHARED64(d, s->drv_shmem, devRead.pmConfDesc.confPA); 1396 1397 vmxnet3_dump_conf_descr("PM State", &pm_descr); 1398 } 1399 1400 static void vmxnet3_update_features(VMXNET3State *s) 1401 { 1402 uint32_t guest_features; 1403 int rxcso_supported; 1404 PCIDevice *d = PCI_DEVICE(s); 1405 1406 guest_features = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, 1407 devRead.misc.uptFeatures); 1408 1409 rxcso_supported = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_RXCSUM); 1410 s->rx_vlan_stripping = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_RXVLAN); 1411 s->lro_supported = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_LRO); 1412 1413 VMW_CFPRN("Features configuration: LRO: %d, RXCSUM: %d, VLANSTRIP: %d", 1414 s->lro_supported, rxcso_supported, 1415 s->rx_vlan_stripping); 1416 if (s->peer_has_vhdr) { 1417 qemu_set_offload(qemu_get_queue(s->nic)->peer, 1418 rxcso_supported, 1419 s->lro_supported, 1420 s->lro_supported, 1421 0, 1422 0); 1423 } 1424 } 1425 1426 static bool vmxnet3_verify_intx(VMXNET3State *s, int intx) 1427 { 1428 return s->msix_used || s->msi_used || (intx == 1429 (pci_get_byte(s->parent_obj.config + PCI_INTERRUPT_PIN) - 1)); 1430 } 1431 1432 static void vmxnet3_validate_interrupt_idx(bool is_msix, int idx) 1433 { 1434 int max_ints = is_msix ? VMXNET3_MAX_INTRS : VMXNET3_MAX_NMSIX_INTRS; 1435 if (idx >= max_ints) { 1436 hw_error("Bad interrupt index: %d\n", idx); 1437 } 1438 } 1439 1440 static void vmxnet3_validate_interrupts(VMXNET3State *s) 1441 { 1442 int i; 1443 1444 VMW_CFPRN("Verifying event interrupt index (%d)", s->event_int_idx); 1445 vmxnet3_validate_interrupt_idx(s->msix_used, s->event_int_idx); 1446 1447 for (i = 0; i < s->txq_num; i++) { 1448 int idx = s->txq_descr[i].intr_idx; 1449 VMW_CFPRN("Verifying TX queue %d interrupt index (%d)", i, idx); 1450 vmxnet3_validate_interrupt_idx(s->msix_used, idx); 1451 } 1452 1453 for (i = 0; i < s->rxq_num; i++) { 1454 int idx = s->rxq_descr[i].intr_idx; 1455 VMW_CFPRN("Verifying RX queue %d interrupt index (%d)", i, idx); 1456 vmxnet3_validate_interrupt_idx(s->msix_used, idx); 1457 } 1458 } 1459 1460 static void vmxnet3_validate_queues(VMXNET3State *s) 1461 { 1462 /* 1463 * txq_num and rxq_num are total number of queues 1464 * configured by guest. These numbers must not 1465 * exceed corresponding maximal values. 1466 */ 1467 1468 if (s->txq_num > VMXNET3_DEVICE_MAX_TX_QUEUES) { 1469 hw_error("Bad TX queues number: %d\n", s->txq_num); 1470 } 1471 1472 if (s->rxq_num > VMXNET3_DEVICE_MAX_RX_QUEUES) { 1473 hw_error("Bad RX queues number: %d\n", s->rxq_num); 1474 } 1475 } 1476 1477 static void vmxnet3_activate_device(VMXNET3State *s) 1478 { 1479 int i; 1480 static const uint32_t VMXNET3_DEF_TX_THRESHOLD = 1; 1481 PCIDevice *d = PCI_DEVICE(s); 1482 hwaddr qdescr_table_pa; 1483 uint64_t pa; 1484 uint32_t size; 1485 1486 /* Verify configuration consistency */ 1487 if (!vmxnet3_verify_driver_magic(d, s->drv_shmem)) { 1488 VMW_ERPRN("Device configuration received from driver is invalid"); 1489 return; 1490 } 1491 1492 /* Verify if device is active */ 1493 if (s->device_active) { 1494 VMW_CFPRN("Vmxnet3 device is active"); 1495 return; 1496 } 1497 1498 vmxnet3_adjust_by_guest_type(s); 1499 vmxnet3_update_features(s); 1500 vmxnet3_update_pm_state(s); 1501 vmxnet3_setup_rx_filtering(s); 1502 /* Cache fields from shared memory */ 1503 s->mtu = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.misc.mtu); 1504 VMW_CFPRN("MTU is %u", s->mtu); 1505 1506 s->max_rx_frags = 1507 VMXNET3_READ_DRV_SHARED16(d, s->drv_shmem, devRead.misc.maxNumRxSG); 1508 1509 if (s->max_rx_frags == 0) { 1510 s->max_rx_frags = 1; 1511 } 1512 1513 VMW_CFPRN("Max RX fragments is %u", s->max_rx_frags); 1514 1515 s->event_int_idx = 1516 VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.intrConf.eventIntrIdx); 1517 assert(vmxnet3_verify_intx(s, s->event_int_idx)); 1518 VMW_CFPRN("Events interrupt line is %u", s->event_int_idx); 1519 1520 s->auto_int_masking = 1521 VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.intrConf.autoMask); 1522 VMW_CFPRN("Automatic interrupt masking is %d", (int)s->auto_int_masking); 1523 1524 s->txq_num = 1525 VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.misc.numTxQueues); 1526 s->rxq_num = 1527 VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.misc.numRxQueues); 1528 1529 VMW_CFPRN("Number of TX/RX queues %u/%u", s->txq_num, s->rxq_num); 1530 vmxnet3_validate_queues(s); 1531 1532 qdescr_table_pa = 1533 VMXNET3_READ_DRV_SHARED64(d, s->drv_shmem, devRead.misc.queueDescPA); 1534 VMW_CFPRN("TX queues descriptors table is at 0x%" PRIx64, qdescr_table_pa); 1535 1536 /* 1537 * Worst-case scenario is a packet that holds all TX rings space so 1538 * we calculate total size of all TX rings for max TX fragments number 1539 */ 1540 s->max_tx_frags = 0; 1541 1542 /* TX queues */ 1543 for (i = 0; i < s->txq_num; i++) { 1544 hwaddr qdescr_pa = 1545 qdescr_table_pa + i * sizeof(struct Vmxnet3_TxQueueDesc); 1546 1547 /* Read interrupt number for this TX queue */ 1548 s->txq_descr[i].intr_idx = 1549 VMXNET3_READ_TX_QUEUE_DESCR8(d, qdescr_pa, conf.intrIdx); 1550 assert(vmxnet3_verify_intx(s, s->txq_descr[i].intr_idx)); 1551 1552 VMW_CFPRN("TX Queue %d interrupt: %d", i, s->txq_descr[i].intr_idx); 1553 1554 /* Read rings memory locations for TX queues */ 1555 pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.txRingBasePA); 1556 size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.txRingSize); 1557 1558 vmxnet3_ring_init(d, &s->txq_descr[i].tx_ring, pa, size, 1559 sizeof(struct Vmxnet3_TxDesc), false); 1560 VMXNET3_RING_DUMP(VMW_CFPRN, "TX", i, &s->txq_descr[i].tx_ring); 1561 1562 s->max_tx_frags += size; 1563 1564 /* TXC ring */ 1565 pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.compRingBasePA); 1566 size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.compRingSize); 1567 vmxnet3_ring_init(d, &s->txq_descr[i].comp_ring, pa, size, 1568 sizeof(struct Vmxnet3_TxCompDesc), true); 1569 VMXNET3_RING_DUMP(VMW_CFPRN, "TXC", i, &s->txq_descr[i].comp_ring); 1570 1571 s->txq_descr[i].tx_stats_pa = 1572 qdescr_pa + offsetof(struct Vmxnet3_TxQueueDesc, stats); 1573 1574 memset(&s->txq_descr[i].txq_stats, 0, 1575 sizeof(s->txq_descr[i].txq_stats)); 1576 1577 /* Fill device-managed parameters for queues */ 1578 VMXNET3_WRITE_TX_QUEUE_DESCR32(d, qdescr_pa, 1579 ctrl.txThreshold, 1580 VMXNET3_DEF_TX_THRESHOLD); 1581 } 1582 1583 /* Preallocate TX packet wrapper */ 1584 VMW_CFPRN("Max TX fragments is %u", s->max_tx_frags); 1585 net_tx_pkt_init(&s->tx_pkt, PCI_DEVICE(s), 1586 s->max_tx_frags, s->peer_has_vhdr); 1587 net_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr); 1588 1589 /* Read rings memory locations for RX queues */ 1590 for (i = 0; i < s->rxq_num; i++) { 1591 int j; 1592 hwaddr qd_pa = 1593 qdescr_table_pa + s->txq_num * sizeof(struct Vmxnet3_TxQueueDesc) + 1594 i * sizeof(struct Vmxnet3_RxQueueDesc); 1595 1596 /* Read interrupt number for this RX queue */ 1597 s->rxq_descr[i].intr_idx = 1598 VMXNET3_READ_TX_QUEUE_DESCR8(d, qd_pa, conf.intrIdx); 1599 assert(vmxnet3_verify_intx(s, s->rxq_descr[i].intr_idx)); 1600 1601 VMW_CFPRN("RX Queue %d interrupt: %d", i, s->rxq_descr[i].intr_idx); 1602 1603 /* Read rings memory locations */ 1604 for (j = 0; j < VMXNET3_RX_RINGS_PER_QUEUE; j++) { 1605 /* RX rings */ 1606 pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.rxRingBasePA[j]); 1607 size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.rxRingSize[j]); 1608 vmxnet3_ring_init(d, &s->rxq_descr[i].rx_ring[j], pa, size, 1609 sizeof(struct Vmxnet3_RxDesc), false); 1610 VMW_CFPRN("RX queue %d:%d: Base: %" PRIx64 ", Size: %d", 1611 i, j, pa, size); 1612 } 1613 1614 /* RXC ring */ 1615 pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.compRingBasePA); 1616 size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.compRingSize); 1617 vmxnet3_ring_init(d, &s->rxq_descr[i].comp_ring, pa, size, 1618 sizeof(struct Vmxnet3_RxCompDesc), true); 1619 VMW_CFPRN("RXC queue %d: Base: %" PRIx64 ", Size: %d", i, pa, size); 1620 1621 s->rxq_descr[i].rx_stats_pa = 1622 qd_pa + offsetof(struct Vmxnet3_RxQueueDesc, stats); 1623 memset(&s->rxq_descr[i].rxq_stats, 0, 1624 sizeof(s->rxq_descr[i].rxq_stats)); 1625 } 1626 1627 vmxnet3_validate_interrupts(s); 1628 1629 /* Make sure everything is in place before device activation */ 1630 smp_wmb(); 1631 1632 vmxnet3_reset_mac(s); 1633 1634 s->device_active = true; 1635 } 1636 1637 static void vmxnet3_handle_command(VMXNET3State *s, uint64_t cmd) 1638 { 1639 s->last_command = cmd; 1640 1641 switch (cmd) { 1642 case VMXNET3_CMD_GET_PERM_MAC_HI: 1643 VMW_CBPRN("Set: Get upper part of permanent MAC"); 1644 break; 1645 1646 case VMXNET3_CMD_GET_PERM_MAC_LO: 1647 VMW_CBPRN("Set: Get lower part of permanent MAC"); 1648 break; 1649 1650 case VMXNET3_CMD_GET_STATS: 1651 VMW_CBPRN("Set: Get device statistics"); 1652 vmxnet3_fill_stats(s); 1653 break; 1654 1655 case VMXNET3_CMD_ACTIVATE_DEV: 1656 VMW_CBPRN("Set: Activating vmxnet3 device"); 1657 vmxnet3_activate_device(s); 1658 break; 1659 1660 case VMXNET3_CMD_UPDATE_RX_MODE: 1661 VMW_CBPRN("Set: Update rx mode"); 1662 vmxnet3_update_rx_mode(s); 1663 break; 1664 1665 case VMXNET3_CMD_UPDATE_VLAN_FILTERS: 1666 VMW_CBPRN("Set: Update VLAN filters"); 1667 vmxnet3_update_vlan_filters(s); 1668 break; 1669 1670 case VMXNET3_CMD_UPDATE_MAC_FILTERS: 1671 VMW_CBPRN("Set: Update MAC filters"); 1672 vmxnet3_update_mcast_filters(s); 1673 break; 1674 1675 case VMXNET3_CMD_UPDATE_FEATURE: 1676 VMW_CBPRN("Set: Update features"); 1677 vmxnet3_update_features(s); 1678 break; 1679 1680 case VMXNET3_CMD_UPDATE_PMCFG: 1681 VMW_CBPRN("Set: Update power management config"); 1682 vmxnet3_update_pm_state(s); 1683 break; 1684 1685 case VMXNET3_CMD_GET_LINK: 1686 VMW_CBPRN("Set: Get link"); 1687 break; 1688 1689 case VMXNET3_CMD_RESET_DEV: 1690 VMW_CBPRN("Set: Reset device"); 1691 vmxnet3_reset(s); 1692 break; 1693 1694 case VMXNET3_CMD_QUIESCE_DEV: 1695 VMW_CBPRN("Set: VMXNET3_CMD_QUIESCE_DEV - deactivate the device"); 1696 vmxnet3_deactivate_device(s); 1697 break; 1698 1699 case VMXNET3_CMD_GET_CONF_INTR: 1700 VMW_CBPRN("Set: VMXNET3_CMD_GET_CONF_INTR - interrupt configuration"); 1701 break; 1702 1703 case VMXNET3_CMD_GET_ADAPTIVE_RING_INFO: 1704 VMW_CBPRN("Set: VMXNET3_CMD_GET_ADAPTIVE_RING_INFO - " 1705 "adaptive ring info flags"); 1706 break; 1707 1708 case VMXNET3_CMD_GET_DID_LO: 1709 VMW_CBPRN("Set: Get lower part of device ID"); 1710 break; 1711 1712 case VMXNET3_CMD_GET_DID_HI: 1713 VMW_CBPRN("Set: Get upper part of device ID"); 1714 break; 1715 1716 case VMXNET3_CMD_GET_DEV_EXTRA_INFO: 1717 VMW_CBPRN("Set: Get device extra info"); 1718 break; 1719 1720 default: 1721 VMW_CBPRN("Received unknown command: %" PRIx64, cmd); 1722 break; 1723 } 1724 } 1725 1726 static uint64_t vmxnet3_get_command_status(VMXNET3State *s) 1727 { 1728 uint64_t ret; 1729 1730 switch (s->last_command) { 1731 case VMXNET3_CMD_ACTIVATE_DEV: 1732 ret = (s->device_active) ? 0 : 1; 1733 VMW_CFPRN("Device active: %" PRIx64, ret); 1734 break; 1735 1736 case VMXNET3_CMD_RESET_DEV: 1737 case VMXNET3_CMD_QUIESCE_DEV: 1738 case VMXNET3_CMD_GET_QUEUE_STATUS: 1739 case VMXNET3_CMD_GET_DEV_EXTRA_INFO: 1740 ret = 0; 1741 break; 1742 1743 case VMXNET3_CMD_GET_LINK: 1744 ret = s->link_status_and_speed; 1745 VMW_CFPRN("Link and speed: %" PRIx64, ret); 1746 break; 1747 1748 case VMXNET3_CMD_GET_PERM_MAC_LO: 1749 ret = vmxnet3_get_mac_low(&s->perm_mac); 1750 break; 1751 1752 case VMXNET3_CMD_GET_PERM_MAC_HI: 1753 ret = vmxnet3_get_mac_high(&s->perm_mac); 1754 break; 1755 1756 case VMXNET3_CMD_GET_CONF_INTR: 1757 ret = vmxnet3_get_interrupt_config(s); 1758 break; 1759 1760 case VMXNET3_CMD_GET_ADAPTIVE_RING_INFO: 1761 ret = VMXNET3_DISABLE_ADAPTIVE_RING; 1762 break; 1763 1764 case VMXNET3_CMD_GET_DID_LO: 1765 ret = PCI_DEVICE_ID_VMWARE_VMXNET3; 1766 break; 1767 1768 case VMXNET3_CMD_GET_DID_HI: 1769 ret = VMXNET3_DEVICE_REVISION; 1770 break; 1771 1772 default: 1773 VMW_WRPRN("Received request for unknown command: %x", s->last_command); 1774 ret = 0; 1775 break; 1776 } 1777 1778 return ret; 1779 } 1780 1781 static void vmxnet3_set_events(VMXNET3State *s, uint32_t val) 1782 { 1783 uint32_t events; 1784 PCIDevice *d = PCI_DEVICE(s); 1785 1786 VMW_CBPRN("Setting events: 0x%x", val); 1787 events = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, ecr) | val; 1788 VMXNET3_WRITE_DRV_SHARED32(d, s->drv_shmem, ecr, events); 1789 } 1790 1791 static void vmxnet3_ack_events(VMXNET3State *s, uint32_t val) 1792 { 1793 PCIDevice *d = PCI_DEVICE(s); 1794 uint32_t events; 1795 1796 VMW_CBPRN("Clearing events: 0x%x", val); 1797 events = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, ecr) & ~val; 1798 VMXNET3_WRITE_DRV_SHARED32(d, s->drv_shmem, ecr, events); 1799 } 1800 1801 static void 1802 vmxnet3_io_bar1_write(void *opaque, 1803 hwaddr addr, 1804 uint64_t val, 1805 unsigned size) 1806 { 1807 VMXNET3State *s = opaque; 1808 1809 switch (addr) { 1810 /* Vmxnet3 Revision Report Selection */ 1811 case VMXNET3_REG_VRRS: 1812 VMW_CBPRN("Write BAR1 [VMXNET3_REG_VRRS] = %" PRIx64 ", size %d", 1813 val, size); 1814 break; 1815 1816 /* UPT Version Report Selection */ 1817 case VMXNET3_REG_UVRS: 1818 VMW_CBPRN("Write BAR1 [VMXNET3_REG_UVRS] = %" PRIx64 ", size %d", 1819 val, size); 1820 break; 1821 1822 /* Driver Shared Address Low */ 1823 case VMXNET3_REG_DSAL: 1824 VMW_CBPRN("Write BAR1 [VMXNET3_REG_DSAL] = %" PRIx64 ", size %d", 1825 val, size); 1826 /* 1827 * Guest driver will first write the low part of the shared 1828 * memory address. We save it to temp variable and set the 1829 * shared address only after we get the high part 1830 */ 1831 if (val == 0) { 1832 vmxnet3_deactivate_device(s); 1833 } 1834 s->temp_shared_guest_driver_memory = val; 1835 s->drv_shmem = 0; 1836 break; 1837 1838 /* Driver Shared Address High */ 1839 case VMXNET3_REG_DSAH: 1840 VMW_CBPRN("Write BAR1 [VMXNET3_REG_DSAH] = %" PRIx64 ", size %d", 1841 val, size); 1842 /* 1843 * Set the shared memory between guest driver and device. 1844 * We already should have low address part. 1845 */ 1846 s->drv_shmem = s->temp_shared_guest_driver_memory | (val << 32); 1847 break; 1848 1849 /* Command */ 1850 case VMXNET3_REG_CMD: 1851 VMW_CBPRN("Write BAR1 [VMXNET3_REG_CMD] = %" PRIx64 ", size %d", 1852 val, size); 1853 vmxnet3_handle_command(s, val); 1854 break; 1855 1856 /* MAC Address Low */ 1857 case VMXNET3_REG_MACL: 1858 VMW_CBPRN("Write BAR1 [VMXNET3_REG_MACL] = %" PRIx64 ", size %d", 1859 val, size); 1860 s->temp_mac = val; 1861 break; 1862 1863 /* MAC Address High */ 1864 case VMXNET3_REG_MACH: 1865 VMW_CBPRN("Write BAR1 [VMXNET3_REG_MACH] = %" PRIx64 ", size %d", 1866 val, size); 1867 vmxnet3_set_variable_mac(s, val, s->temp_mac); 1868 break; 1869 1870 /* Interrupt Cause Register */ 1871 case VMXNET3_REG_ICR: 1872 VMW_CBPRN("Write BAR1 [VMXNET3_REG_ICR] = %" PRIx64 ", size %d", 1873 val, size); 1874 g_assert_not_reached(); 1875 break; 1876 1877 /* Event Cause Register */ 1878 case VMXNET3_REG_ECR: 1879 VMW_CBPRN("Write BAR1 [VMXNET3_REG_ECR] = %" PRIx64 ", size %d", 1880 val, size); 1881 vmxnet3_ack_events(s, val); 1882 break; 1883 1884 default: 1885 VMW_CBPRN("Unknown Write to BAR1 [%" PRIx64 "] = %" PRIx64 ", size %d", 1886 addr, val, size); 1887 break; 1888 } 1889 } 1890 1891 static uint64_t 1892 vmxnet3_io_bar1_read(void *opaque, hwaddr addr, unsigned size) 1893 { 1894 VMXNET3State *s = opaque; 1895 uint64_t ret = 0; 1896 1897 switch (addr) { 1898 /* Vmxnet3 Revision Report Selection */ 1899 case VMXNET3_REG_VRRS: 1900 VMW_CBPRN("Read BAR1 [VMXNET3_REG_VRRS], size %d", size); 1901 ret = VMXNET3_DEVICE_REVISION; 1902 break; 1903 1904 /* UPT Version Report Selection */ 1905 case VMXNET3_REG_UVRS: 1906 VMW_CBPRN("Read BAR1 [VMXNET3_REG_UVRS], size %d", size); 1907 ret = VMXNET3_UPT_REVISION; 1908 break; 1909 1910 /* Command */ 1911 case VMXNET3_REG_CMD: 1912 VMW_CBPRN("Read BAR1 [VMXNET3_REG_CMD], size %d", size); 1913 ret = vmxnet3_get_command_status(s); 1914 break; 1915 1916 /* MAC Address Low */ 1917 case VMXNET3_REG_MACL: 1918 VMW_CBPRN("Read BAR1 [VMXNET3_REG_MACL], size %d", size); 1919 ret = vmxnet3_get_mac_low(&s->conf.macaddr); 1920 break; 1921 1922 /* MAC Address High */ 1923 case VMXNET3_REG_MACH: 1924 VMW_CBPRN("Read BAR1 [VMXNET3_REG_MACH], size %d", size); 1925 ret = vmxnet3_get_mac_high(&s->conf.macaddr); 1926 break; 1927 1928 /* 1929 * Interrupt Cause Register 1930 * Used for legacy interrupts only so interrupt index always 0 1931 */ 1932 case VMXNET3_REG_ICR: 1933 VMW_CBPRN("Read BAR1 [VMXNET3_REG_ICR], size %d", size); 1934 if (vmxnet3_interrupt_asserted(s, 0)) { 1935 vmxnet3_clear_interrupt(s, 0); 1936 ret = true; 1937 } else { 1938 ret = false; 1939 } 1940 break; 1941 1942 default: 1943 VMW_CBPRN("Unknow read BAR1[%" PRIx64 "], %d bytes", addr, size); 1944 break; 1945 } 1946 1947 return ret; 1948 } 1949 1950 static int 1951 vmxnet3_can_receive(NetClientState *nc) 1952 { 1953 VMXNET3State *s = qemu_get_nic_opaque(nc); 1954 return s->device_active && 1955 VMXNET_FLAG_IS_SET(s->link_status_and_speed, VMXNET3_LINK_STATUS_UP); 1956 } 1957 1958 static inline bool 1959 vmxnet3_is_registered_vlan(VMXNET3State *s, const void *data) 1960 { 1961 uint16_t vlan_tag = eth_get_pkt_tci(data) & VLAN_VID_MASK; 1962 if (IS_SPECIAL_VLAN_ID(vlan_tag)) { 1963 return true; 1964 } 1965 1966 return VMXNET3_VFTABLE_ENTRY_IS_SET(s->vlan_table, vlan_tag); 1967 } 1968 1969 static bool 1970 vmxnet3_is_allowed_mcast_group(VMXNET3State *s, const uint8_t *group_mac) 1971 { 1972 int i; 1973 for (i = 0; i < s->mcast_list_len; i++) { 1974 if (!memcmp(group_mac, s->mcast_list[i].a, sizeof(s->mcast_list[i]))) { 1975 return true; 1976 } 1977 } 1978 return false; 1979 } 1980 1981 static bool 1982 vmxnet3_rx_filter_may_indicate(VMXNET3State *s, const void *data, 1983 size_t size) 1984 { 1985 struct eth_header *ehdr = PKT_GET_ETH_HDR(data); 1986 1987 if (VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_PROMISC)) { 1988 return true; 1989 } 1990 1991 if (!vmxnet3_is_registered_vlan(s, data)) { 1992 return false; 1993 } 1994 1995 switch (net_rx_pkt_get_packet_type(s->rx_pkt)) { 1996 case ETH_PKT_UCAST: 1997 if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_UCAST)) { 1998 return false; 1999 } 2000 if (memcmp(s->conf.macaddr.a, ehdr->h_dest, ETH_ALEN)) { 2001 return false; 2002 } 2003 break; 2004 2005 case ETH_PKT_BCAST: 2006 if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_BCAST)) { 2007 return false; 2008 } 2009 break; 2010 2011 case ETH_PKT_MCAST: 2012 if (VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_ALL_MULTI)) { 2013 return true; 2014 } 2015 if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_MCAST)) { 2016 return false; 2017 } 2018 if (!vmxnet3_is_allowed_mcast_group(s, ehdr->h_dest)) { 2019 return false; 2020 } 2021 break; 2022 2023 default: 2024 g_assert_not_reached(); 2025 } 2026 2027 return true; 2028 } 2029 2030 static ssize_t 2031 vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size) 2032 { 2033 VMXNET3State *s = qemu_get_nic_opaque(nc); 2034 size_t bytes_indicated; 2035 uint8_t min_buf[MIN_BUF_SIZE]; 2036 2037 if (!vmxnet3_can_receive(nc)) { 2038 VMW_PKPRN("Cannot receive now"); 2039 return -1; 2040 } 2041 2042 if (s->peer_has_vhdr) { 2043 net_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf); 2044 buf += sizeof(struct virtio_net_hdr); 2045 size -= sizeof(struct virtio_net_hdr); 2046 } 2047 2048 /* Pad to minimum Ethernet frame length */ 2049 if (size < sizeof(min_buf)) { 2050 memcpy(min_buf, buf, size); 2051 memset(&min_buf[size], 0, sizeof(min_buf) - size); 2052 buf = min_buf; 2053 size = sizeof(min_buf); 2054 } 2055 2056 net_rx_pkt_set_packet_type(s->rx_pkt, 2057 get_eth_packet_type(PKT_GET_ETH_HDR(buf))); 2058 2059 if (vmxnet3_rx_filter_may_indicate(s, buf, size)) { 2060 net_rx_pkt_set_protocols(s->rx_pkt, buf, size); 2061 vmxnet3_rx_need_csum_calculate(s->rx_pkt, buf, size); 2062 net_rx_pkt_attach_data(s->rx_pkt, buf, size, s->rx_vlan_stripping); 2063 bytes_indicated = vmxnet3_indicate_packet(s) ? size : -1; 2064 if (bytes_indicated < size) { 2065 VMW_PKPRN("RX: %zu of %zu bytes indicated", bytes_indicated, size); 2066 } 2067 } else { 2068 VMW_PKPRN("Packet dropped by RX filter"); 2069 bytes_indicated = size; 2070 } 2071 2072 assert(size > 0); 2073 assert(bytes_indicated != 0); 2074 return bytes_indicated; 2075 } 2076 2077 static void vmxnet3_set_link_status(NetClientState *nc) 2078 { 2079 VMXNET3State *s = qemu_get_nic_opaque(nc); 2080 2081 if (nc->link_down) { 2082 s->link_status_and_speed &= ~VMXNET3_LINK_STATUS_UP; 2083 } else { 2084 s->link_status_and_speed |= VMXNET3_LINK_STATUS_UP; 2085 } 2086 2087 vmxnet3_set_events(s, VMXNET3_ECR_LINK); 2088 vmxnet3_trigger_interrupt(s, s->event_int_idx); 2089 } 2090 2091 static NetClientInfo net_vmxnet3_info = { 2092 .type = NET_CLIENT_OPTIONS_KIND_NIC, 2093 .size = sizeof(NICState), 2094 .receive = vmxnet3_receive, 2095 .link_status_changed = vmxnet3_set_link_status, 2096 }; 2097 2098 static bool vmxnet3_peer_has_vnet_hdr(VMXNET3State *s) 2099 { 2100 NetClientState *nc = qemu_get_queue(s->nic); 2101 2102 if (qemu_has_vnet_hdr(nc->peer)) { 2103 return true; 2104 } 2105 2106 return false; 2107 } 2108 2109 static void vmxnet3_net_uninit(VMXNET3State *s) 2110 { 2111 g_free(s->mcast_list); 2112 vmxnet3_deactivate_device(s); 2113 qemu_del_nic(s->nic); 2114 } 2115 2116 static void vmxnet3_net_init(VMXNET3State *s) 2117 { 2118 DeviceState *d = DEVICE(s); 2119 2120 VMW_CBPRN("vmxnet3_net_init called..."); 2121 2122 qemu_macaddr_default_if_unset(&s->conf.macaddr); 2123 2124 /* Windows guest will query the address that was set on init */ 2125 memcpy(&s->perm_mac.a, &s->conf.macaddr.a, sizeof(s->perm_mac.a)); 2126 2127 s->mcast_list = NULL; 2128 s->mcast_list_len = 0; 2129 2130 s->link_status_and_speed = VMXNET3_LINK_SPEED | VMXNET3_LINK_STATUS_UP; 2131 2132 VMW_CFPRN("Permanent MAC: " MAC_FMT, MAC_ARG(s->perm_mac.a)); 2133 2134 s->nic = qemu_new_nic(&net_vmxnet3_info, &s->conf, 2135 object_get_typename(OBJECT(s)), 2136 d->id, s); 2137 2138 s->peer_has_vhdr = vmxnet3_peer_has_vnet_hdr(s); 2139 s->tx_sop = true; 2140 s->skip_current_tx_pkt = false; 2141 s->tx_pkt = NULL; 2142 s->rx_pkt = NULL; 2143 s->rx_vlan_stripping = false; 2144 s->lro_supported = false; 2145 2146 if (s->peer_has_vhdr) { 2147 qemu_set_vnet_hdr_len(qemu_get_queue(s->nic)->peer, 2148 sizeof(struct virtio_net_hdr)); 2149 2150 qemu_using_vnet_hdr(qemu_get_queue(s->nic)->peer, 1); 2151 } 2152 2153 qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a); 2154 } 2155 2156 static void 2157 vmxnet3_unuse_msix_vectors(VMXNET3State *s, int num_vectors) 2158 { 2159 PCIDevice *d = PCI_DEVICE(s); 2160 int i; 2161 for (i = 0; i < num_vectors; i++) { 2162 msix_vector_unuse(d, i); 2163 } 2164 } 2165 2166 static bool 2167 vmxnet3_use_msix_vectors(VMXNET3State *s, int num_vectors) 2168 { 2169 PCIDevice *d = PCI_DEVICE(s); 2170 int i; 2171 for (i = 0; i < num_vectors; i++) { 2172 int res = msix_vector_use(d, i); 2173 if (0 > res) { 2174 VMW_WRPRN("Failed to use MSI-X vector %d, error %d", i, res); 2175 vmxnet3_unuse_msix_vectors(s, i); 2176 return false; 2177 } 2178 } 2179 return true; 2180 } 2181 2182 static bool 2183 vmxnet3_init_msix(VMXNET3State *s) 2184 { 2185 PCIDevice *d = PCI_DEVICE(s); 2186 int res = msix_init(d, VMXNET3_MAX_INTRS, 2187 &s->msix_bar, 2188 VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_TABLE, 2189 &s->msix_bar, 2190 VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_PBA(s), 2191 VMXNET3_MSIX_OFFSET(s)); 2192 2193 if (0 > res) { 2194 VMW_WRPRN("Failed to initialize MSI-X, error %d", res); 2195 s->msix_used = false; 2196 } else { 2197 if (!vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS)) { 2198 VMW_WRPRN("Failed to use MSI-X vectors, error %d", res); 2199 msix_uninit(d, &s->msix_bar, &s->msix_bar); 2200 s->msix_used = false; 2201 } else { 2202 s->msix_used = true; 2203 } 2204 } 2205 return s->msix_used; 2206 } 2207 2208 static void 2209 vmxnet3_cleanup_msix(VMXNET3State *s) 2210 { 2211 PCIDevice *d = PCI_DEVICE(s); 2212 2213 if (s->msix_used) { 2214 vmxnet3_unuse_msix_vectors(s, VMXNET3_MAX_INTRS); 2215 msix_uninit(d, &s->msix_bar, &s->msix_bar); 2216 } 2217 } 2218 2219 #define VMXNET3_USE_64BIT (true) 2220 #define VMXNET3_PER_VECTOR_MASK (false) 2221 2222 static bool 2223 vmxnet3_init_msi(VMXNET3State *s) 2224 { 2225 PCIDevice *d = PCI_DEVICE(s); 2226 int res; 2227 2228 res = msi_init(d, VMXNET3_MSI_OFFSET(s), VMXNET3_MAX_NMSIX_INTRS, 2229 VMXNET3_USE_64BIT, VMXNET3_PER_VECTOR_MASK); 2230 if (0 > res) { 2231 VMW_WRPRN("Failed to initialize MSI, error %d", res); 2232 s->msi_used = false; 2233 } else { 2234 s->msi_used = true; 2235 } 2236 2237 return s->msi_used; 2238 } 2239 2240 static void 2241 vmxnet3_cleanup_msi(VMXNET3State *s) 2242 { 2243 PCIDevice *d = PCI_DEVICE(s); 2244 2245 if (s->msi_used) { 2246 msi_uninit(d); 2247 } 2248 } 2249 2250 static void 2251 vmxnet3_msix_save(QEMUFile *f, void *opaque) 2252 { 2253 PCIDevice *d = PCI_DEVICE(opaque); 2254 msix_save(d, f); 2255 } 2256 2257 static int 2258 vmxnet3_msix_load(QEMUFile *f, void *opaque, int version_id) 2259 { 2260 PCIDevice *d = PCI_DEVICE(opaque); 2261 msix_load(d, f); 2262 return 0; 2263 } 2264 2265 static const MemoryRegionOps b0_ops = { 2266 .read = vmxnet3_io_bar0_read, 2267 .write = vmxnet3_io_bar0_write, 2268 .endianness = DEVICE_LITTLE_ENDIAN, 2269 .impl = { 2270 .min_access_size = 4, 2271 .max_access_size = 4, 2272 }, 2273 }; 2274 2275 static const MemoryRegionOps b1_ops = { 2276 .read = vmxnet3_io_bar1_read, 2277 .write = vmxnet3_io_bar1_write, 2278 .endianness = DEVICE_LITTLE_ENDIAN, 2279 .impl = { 2280 .min_access_size = 4, 2281 .max_access_size = 4, 2282 }, 2283 }; 2284 2285 static uint64_t vmxnet3_device_serial_num(VMXNET3State *s) 2286 { 2287 uint64_t dsn_payload; 2288 uint8_t *dsnp = (uint8_t *)&dsn_payload; 2289 2290 dsnp[0] = 0xfe; 2291 dsnp[1] = s->conf.macaddr.a[3]; 2292 dsnp[2] = s->conf.macaddr.a[4]; 2293 dsnp[3] = s->conf.macaddr.a[5]; 2294 dsnp[4] = s->conf.macaddr.a[0]; 2295 dsnp[5] = s->conf.macaddr.a[1]; 2296 dsnp[6] = s->conf.macaddr.a[2]; 2297 dsnp[7] = 0xff; 2298 return dsn_payload; 2299 } 2300 2301 static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp) 2302 { 2303 DeviceState *dev = DEVICE(pci_dev); 2304 VMXNET3State *s = VMXNET3(pci_dev); 2305 2306 VMW_CBPRN("Starting init..."); 2307 2308 memory_region_init_io(&s->bar0, OBJECT(s), &b0_ops, s, 2309 "vmxnet3-b0", VMXNET3_PT_REG_SIZE); 2310 pci_register_bar(pci_dev, VMXNET3_BAR0_IDX, 2311 PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar0); 2312 2313 memory_region_init_io(&s->bar1, OBJECT(s), &b1_ops, s, 2314 "vmxnet3-b1", VMXNET3_VD_REG_SIZE); 2315 pci_register_bar(pci_dev, VMXNET3_BAR1_IDX, 2316 PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar1); 2317 2318 memory_region_init(&s->msix_bar, OBJECT(s), "vmxnet3-msix-bar", 2319 VMXNET3_MSIX_BAR_SIZE); 2320 pci_register_bar(pci_dev, VMXNET3_MSIX_BAR_IDX, 2321 PCI_BASE_ADDRESS_SPACE_MEMORY, &s->msix_bar); 2322 2323 vmxnet3_reset_interrupt_states(s); 2324 2325 /* Interrupt pin A */ 2326 pci_dev->config[PCI_INTERRUPT_PIN] = 0x01; 2327 2328 if (!vmxnet3_init_msix(s)) { 2329 VMW_WRPRN("Failed to initialize MSI-X, configuration is inconsistent."); 2330 } 2331 2332 if (!vmxnet3_init_msi(s)) { 2333 VMW_WRPRN("Failed to initialize MSI, configuration is inconsistent."); 2334 } 2335 2336 vmxnet3_net_init(s); 2337 2338 if (pci_is_express(pci_dev)) { 2339 if (pci_bus_is_express(pci_dev->bus)) { 2340 pcie_endpoint_cap_init(pci_dev, VMXNET3_EXP_EP_OFFSET); 2341 } 2342 2343 pcie_dev_ser_num_init(pci_dev, VMXNET3_DSN_OFFSET, 2344 vmxnet3_device_serial_num(s)); 2345 } 2346 2347 register_savevm(dev, "vmxnet3-msix", -1, 1, 2348 vmxnet3_msix_save, vmxnet3_msix_load, s); 2349 } 2350 2351 static void vmxnet3_instance_init(Object *obj) 2352 { 2353 VMXNET3State *s = VMXNET3(obj); 2354 device_add_bootindex_property(obj, &s->conf.bootindex, 2355 "bootindex", "/ethernet-phy@0", 2356 DEVICE(obj), NULL); 2357 } 2358 2359 static void vmxnet3_pci_uninit(PCIDevice *pci_dev) 2360 { 2361 DeviceState *dev = DEVICE(pci_dev); 2362 VMXNET3State *s = VMXNET3(pci_dev); 2363 2364 VMW_CBPRN("Starting uninit..."); 2365 2366 unregister_savevm(dev, "vmxnet3-msix", s); 2367 2368 vmxnet3_net_uninit(s); 2369 2370 vmxnet3_cleanup_msix(s); 2371 2372 vmxnet3_cleanup_msi(s); 2373 } 2374 2375 static void vmxnet3_qdev_reset(DeviceState *dev) 2376 { 2377 PCIDevice *d = PCI_DEVICE(dev); 2378 VMXNET3State *s = VMXNET3(d); 2379 2380 VMW_CBPRN("Starting QDEV reset..."); 2381 vmxnet3_reset(s); 2382 } 2383 2384 static bool vmxnet3_mc_list_needed(void *opaque) 2385 { 2386 return true; 2387 } 2388 2389 static int vmxnet3_mcast_list_pre_load(void *opaque) 2390 { 2391 VMXNET3State *s = opaque; 2392 2393 s->mcast_list = g_malloc(s->mcast_list_buff_size); 2394 2395 return 0; 2396 } 2397 2398 2399 static void vmxnet3_pre_save(void *opaque) 2400 { 2401 VMXNET3State *s = opaque; 2402 2403 s->mcast_list_buff_size = s->mcast_list_len * sizeof(MACAddr); 2404 } 2405 2406 static const VMStateDescription vmxstate_vmxnet3_mcast_list = { 2407 .name = "vmxnet3/mcast_list", 2408 .version_id = 1, 2409 .minimum_version_id = 1, 2410 .pre_load = vmxnet3_mcast_list_pre_load, 2411 .needed = vmxnet3_mc_list_needed, 2412 .fields = (VMStateField[]) { 2413 VMSTATE_VBUFFER_UINT32(mcast_list, VMXNET3State, 0, NULL, 0, 2414 mcast_list_buff_size), 2415 VMSTATE_END_OF_LIST() 2416 } 2417 }; 2418 2419 static void vmxnet3_get_ring_from_file(QEMUFile *f, Vmxnet3Ring *r) 2420 { 2421 r->pa = qemu_get_be64(f); 2422 r->size = qemu_get_be32(f); 2423 r->cell_size = qemu_get_be32(f); 2424 r->next = qemu_get_be32(f); 2425 r->gen = qemu_get_byte(f); 2426 } 2427 2428 static void vmxnet3_put_ring_to_file(QEMUFile *f, Vmxnet3Ring *r) 2429 { 2430 qemu_put_be64(f, r->pa); 2431 qemu_put_be32(f, r->size); 2432 qemu_put_be32(f, r->cell_size); 2433 qemu_put_be32(f, r->next); 2434 qemu_put_byte(f, r->gen); 2435 } 2436 2437 static void vmxnet3_get_tx_stats_from_file(QEMUFile *f, 2438 struct UPT1_TxStats *tx_stat) 2439 { 2440 tx_stat->TSOPktsTxOK = qemu_get_be64(f); 2441 tx_stat->TSOBytesTxOK = qemu_get_be64(f); 2442 tx_stat->ucastPktsTxOK = qemu_get_be64(f); 2443 tx_stat->ucastBytesTxOK = qemu_get_be64(f); 2444 tx_stat->mcastPktsTxOK = qemu_get_be64(f); 2445 tx_stat->mcastBytesTxOK = qemu_get_be64(f); 2446 tx_stat->bcastPktsTxOK = qemu_get_be64(f); 2447 tx_stat->bcastBytesTxOK = qemu_get_be64(f); 2448 tx_stat->pktsTxError = qemu_get_be64(f); 2449 tx_stat->pktsTxDiscard = qemu_get_be64(f); 2450 } 2451 2452 static void vmxnet3_put_tx_stats_to_file(QEMUFile *f, 2453 struct UPT1_TxStats *tx_stat) 2454 { 2455 qemu_put_be64(f, tx_stat->TSOPktsTxOK); 2456 qemu_put_be64(f, tx_stat->TSOBytesTxOK); 2457 qemu_put_be64(f, tx_stat->ucastPktsTxOK); 2458 qemu_put_be64(f, tx_stat->ucastBytesTxOK); 2459 qemu_put_be64(f, tx_stat->mcastPktsTxOK); 2460 qemu_put_be64(f, tx_stat->mcastBytesTxOK); 2461 qemu_put_be64(f, tx_stat->bcastPktsTxOK); 2462 qemu_put_be64(f, tx_stat->bcastBytesTxOK); 2463 qemu_put_be64(f, tx_stat->pktsTxError); 2464 qemu_put_be64(f, tx_stat->pktsTxDiscard); 2465 } 2466 2467 static int vmxnet3_get_txq_descr(QEMUFile *f, void *pv, size_t size) 2468 { 2469 Vmxnet3TxqDescr *r = pv; 2470 2471 vmxnet3_get_ring_from_file(f, &r->tx_ring); 2472 vmxnet3_get_ring_from_file(f, &r->comp_ring); 2473 r->intr_idx = qemu_get_byte(f); 2474 r->tx_stats_pa = qemu_get_be64(f); 2475 2476 vmxnet3_get_tx_stats_from_file(f, &r->txq_stats); 2477 2478 return 0; 2479 } 2480 2481 static void vmxnet3_put_txq_descr(QEMUFile *f, void *pv, size_t size) 2482 { 2483 Vmxnet3TxqDescr *r = pv; 2484 2485 vmxnet3_put_ring_to_file(f, &r->tx_ring); 2486 vmxnet3_put_ring_to_file(f, &r->comp_ring); 2487 qemu_put_byte(f, r->intr_idx); 2488 qemu_put_be64(f, r->tx_stats_pa); 2489 vmxnet3_put_tx_stats_to_file(f, &r->txq_stats); 2490 } 2491 2492 static const VMStateInfo txq_descr_info = { 2493 .name = "txq_descr", 2494 .get = vmxnet3_get_txq_descr, 2495 .put = vmxnet3_put_txq_descr 2496 }; 2497 2498 static void vmxnet3_get_rx_stats_from_file(QEMUFile *f, 2499 struct UPT1_RxStats *rx_stat) 2500 { 2501 rx_stat->LROPktsRxOK = qemu_get_be64(f); 2502 rx_stat->LROBytesRxOK = qemu_get_be64(f); 2503 rx_stat->ucastPktsRxOK = qemu_get_be64(f); 2504 rx_stat->ucastBytesRxOK = qemu_get_be64(f); 2505 rx_stat->mcastPktsRxOK = qemu_get_be64(f); 2506 rx_stat->mcastBytesRxOK = qemu_get_be64(f); 2507 rx_stat->bcastPktsRxOK = qemu_get_be64(f); 2508 rx_stat->bcastBytesRxOK = qemu_get_be64(f); 2509 rx_stat->pktsRxOutOfBuf = qemu_get_be64(f); 2510 rx_stat->pktsRxError = qemu_get_be64(f); 2511 } 2512 2513 static void vmxnet3_put_rx_stats_to_file(QEMUFile *f, 2514 struct UPT1_RxStats *rx_stat) 2515 { 2516 qemu_put_be64(f, rx_stat->LROPktsRxOK); 2517 qemu_put_be64(f, rx_stat->LROBytesRxOK); 2518 qemu_put_be64(f, rx_stat->ucastPktsRxOK); 2519 qemu_put_be64(f, rx_stat->ucastBytesRxOK); 2520 qemu_put_be64(f, rx_stat->mcastPktsRxOK); 2521 qemu_put_be64(f, rx_stat->mcastBytesRxOK); 2522 qemu_put_be64(f, rx_stat->bcastPktsRxOK); 2523 qemu_put_be64(f, rx_stat->bcastBytesRxOK); 2524 qemu_put_be64(f, rx_stat->pktsRxOutOfBuf); 2525 qemu_put_be64(f, rx_stat->pktsRxError); 2526 } 2527 2528 static int vmxnet3_get_rxq_descr(QEMUFile *f, void *pv, size_t size) 2529 { 2530 Vmxnet3RxqDescr *r = pv; 2531 int i; 2532 2533 for (i = 0; i < VMXNET3_RX_RINGS_PER_QUEUE; i++) { 2534 vmxnet3_get_ring_from_file(f, &r->rx_ring[i]); 2535 } 2536 2537 vmxnet3_get_ring_from_file(f, &r->comp_ring); 2538 r->intr_idx = qemu_get_byte(f); 2539 r->rx_stats_pa = qemu_get_be64(f); 2540 2541 vmxnet3_get_rx_stats_from_file(f, &r->rxq_stats); 2542 2543 return 0; 2544 } 2545 2546 static void vmxnet3_put_rxq_descr(QEMUFile *f, void *pv, size_t size) 2547 { 2548 Vmxnet3RxqDescr *r = pv; 2549 int i; 2550 2551 for (i = 0; i < VMXNET3_RX_RINGS_PER_QUEUE; i++) { 2552 vmxnet3_put_ring_to_file(f, &r->rx_ring[i]); 2553 } 2554 2555 vmxnet3_put_ring_to_file(f, &r->comp_ring); 2556 qemu_put_byte(f, r->intr_idx); 2557 qemu_put_be64(f, r->rx_stats_pa); 2558 vmxnet3_put_rx_stats_to_file(f, &r->rxq_stats); 2559 } 2560 2561 static int vmxnet3_post_load(void *opaque, int version_id) 2562 { 2563 VMXNET3State *s = opaque; 2564 PCIDevice *d = PCI_DEVICE(s); 2565 2566 net_tx_pkt_init(&s->tx_pkt, PCI_DEVICE(s), 2567 s->max_tx_frags, s->peer_has_vhdr); 2568 net_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr); 2569 2570 if (s->msix_used) { 2571 if (!vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS)) { 2572 VMW_WRPRN("Failed to re-use MSI-X vectors"); 2573 msix_uninit(d, &s->msix_bar, &s->msix_bar); 2574 s->msix_used = false; 2575 return -1; 2576 } 2577 } 2578 2579 vmxnet3_validate_queues(s); 2580 vmxnet3_validate_interrupts(s); 2581 2582 return 0; 2583 } 2584 2585 static const VMStateInfo rxq_descr_info = { 2586 .name = "rxq_descr", 2587 .get = vmxnet3_get_rxq_descr, 2588 .put = vmxnet3_put_rxq_descr 2589 }; 2590 2591 static int vmxnet3_get_int_state(QEMUFile *f, void *pv, size_t size) 2592 { 2593 Vmxnet3IntState *r = pv; 2594 2595 r->is_masked = qemu_get_byte(f); 2596 r->is_pending = qemu_get_byte(f); 2597 r->is_asserted = qemu_get_byte(f); 2598 2599 return 0; 2600 } 2601 2602 static void vmxnet3_put_int_state(QEMUFile *f, void *pv, size_t size) 2603 { 2604 Vmxnet3IntState *r = pv; 2605 2606 qemu_put_byte(f, r->is_masked); 2607 qemu_put_byte(f, r->is_pending); 2608 qemu_put_byte(f, r->is_asserted); 2609 } 2610 2611 static const VMStateInfo int_state_info = { 2612 .name = "int_state", 2613 .get = vmxnet3_get_int_state, 2614 .put = vmxnet3_put_int_state 2615 }; 2616 2617 static bool vmxnet3_vmstate_need_pcie_device(void *opaque) 2618 { 2619 VMXNET3State *s = VMXNET3(opaque); 2620 2621 return !(s->compat_flags & VMXNET3_COMPAT_FLAG_DISABLE_PCIE); 2622 } 2623 2624 static bool vmxnet3_vmstate_test_pci_device(void *opaque, int version_id) 2625 { 2626 return !vmxnet3_vmstate_need_pcie_device(opaque); 2627 } 2628 2629 static const VMStateDescription vmstate_vmxnet3_pcie_device = { 2630 .name = "vmxnet3/pcie", 2631 .version_id = 1, 2632 .minimum_version_id = 1, 2633 .needed = vmxnet3_vmstate_need_pcie_device, 2634 .fields = (VMStateField[]) { 2635 VMSTATE_PCIE_DEVICE(parent_obj, VMXNET3State), 2636 VMSTATE_END_OF_LIST() 2637 } 2638 }; 2639 2640 static const VMStateDescription vmstate_vmxnet3 = { 2641 .name = "vmxnet3", 2642 .version_id = 1, 2643 .minimum_version_id = 1, 2644 .pre_save = vmxnet3_pre_save, 2645 .post_load = vmxnet3_post_load, 2646 .fields = (VMStateField[]) { 2647 VMSTATE_STRUCT_TEST(parent_obj, VMXNET3State, 2648 vmxnet3_vmstate_test_pci_device, 0, 2649 vmstate_pci_device, PCIDevice), 2650 VMSTATE_BOOL(rx_packets_compound, VMXNET3State), 2651 VMSTATE_BOOL(rx_vlan_stripping, VMXNET3State), 2652 VMSTATE_BOOL(lro_supported, VMXNET3State), 2653 VMSTATE_UINT32(rx_mode, VMXNET3State), 2654 VMSTATE_UINT32(mcast_list_len, VMXNET3State), 2655 VMSTATE_UINT32(mcast_list_buff_size, VMXNET3State), 2656 VMSTATE_UINT32_ARRAY(vlan_table, VMXNET3State, VMXNET3_VFT_SIZE), 2657 VMSTATE_UINT32(mtu, VMXNET3State), 2658 VMSTATE_UINT16(max_rx_frags, VMXNET3State), 2659 VMSTATE_UINT32(max_tx_frags, VMXNET3State), 2660 VMSTATE_UINT8(event_int_idx, VMXNET3State), 2661 VMSTATE_BOOL(auto_int_masking, VMXNET3State), 2662 VMSTATE_UINT8(txq_num, VMXNET3State), 2663 VMSTATE_UINT8(rxq_num, VMXNET3State), 2664 VMSTATE_UINT32(device_active, VMXNET3State), 2665 VMSTATE_UINT32(last_command, VMXNET3State), 2666 VMSTATE_UINT32(link_status_and_speed, VMXNET3State), 2667 VMSTATE_UINT32(temp_mac, VMXNET3State), 2668 VMSTATE_UINT64(drv_shmem, VMXNET3State), 2669 VMSTATE_UINT64(temp_shared_guest_driver_memory, VMXNET3State), 2670 2671 VMSTATE_ARRAY(txq_descr, VMXNET3State, 2672 VMXNET3_DEVICE_MAX_TX_QUEUES, 0, txq_descr_info, 2673 Vmxnet3TxqDescr), 2674 VMSTATE_ARRAY(rxq_descr, VMXNET3State, 2675 VMXNET3_DEVICE_MAX_RX_QUEUES, 0, rxq_descr_info, 2676 Vmxnet3RxqDescr), 2677 VMSTATE_ARRAY(interrupt_states, VMXNET3State, VMXNET3_MAX_INTRS, 2678 0, int_state_info, Vmxnet3IntState), 2679 2680 VMSTATE_END_OF_LIST() 2681 }, 2682 .subsections = (const VMStateDescription*[]) { 2683 &vmxstate_vmxnet3_mcast_list, 2684 &vmstate_vmxnet3_pcie_device, 2685 NULL 2686 } 2687 }; 2688 2689 static Property vmxnet3_properties[] = { 2690 DEFINE_NIC_PROPERTIES(VMXNET3State, conf), 2691 DEFINE_PROP_BIT("x-old-msi-offsets", VMXNET3State, compat_flags, 2692 VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT, false), 2693 DEFINE_PROP_BIT("x-disable-pcie", VMXNET3State, compat_flags, 2694 VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT, false), 2695 DEFINE_PROP_END_OF_LIST(), 2696 }; 2697 2698 static void vmxnet3_realize(DeviceState *qdev, Error **errp) 2699 { 2700 VMXNET3Class *vc = VMXNET3_DEVICE_GET_CLASS(qdev); 2701 PCIDevice *pci_dev = PCI_DEVICE(qdev); 2702 VMXNET3State *s = VMXNET3(qdev); 2703 2704 if (!(s->compat_flags & VMXNET3_COMPAT_FLAG_DISABLE_PCIE)) { 2705 pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; 2706 } 2707 2708 vc->parent_dc_realize(qdev, errp); 2709 } 2710 2711 static void vmxnet3_class_init(ObjectClass *class, void *data) 2712 { 2713 DeviceClass *dc = DEVICE_CLASS(class); 2714 PCIDeviceClass *c = PCI_DEVICE_CLASS(class); 2715 VMXNET3Class *vc = VMXNET3_DEVICE_CLASS(class); 2716 2717 c->realize = vmxnet3_pci_realize; 2718 c->exit = vmxnet3_pci_uninit; 2719 c->vendor_id = PCI_VENDOR_ID_VMWARE; 2720 c->device_id = PCI_DEVICE_ID_VMWARE_VMXNET3; 2721 c->revision = PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION; 2722 c->class_id = PCI_CLASS_NETWORK_ETHERNET; 2723 c->subsystem_vendor_id = PCI_VENDOR_ID_VMWARE; 2724 c->subsystem_id = PCI_DEVICE_ID_VMWARE_VMXNET3; 2725 vc->parent_dc_realize = dc->realize; 2726 dc->realize = vmxnet3_realize; 2727 dc->desc = "VMWare Paravirtualized Ethernet v3"; 2728 dc->reset = vmxnet3_qdev_reset; 2729 dc->vmsd = &vmstate_vmxnet3; 2730 dc->props = vmxnet3_properties; 2731 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); 2732 } 2733 2734 static const TypeInfo vmxnet3_info = { 2735 .name = TYPE_VMXNET3, 2736 .parent = TYPE_PCI_DEVICE, 2737 .class_size = sizeof(VMXNET3Class), 2738 .instance_size = sizeof(VMXNET3State), 2739 .class_init = vmxnet3_class_init, 2740 .instance_init = vmxnet3_instance_init, 2741 }; 2742 2743 static void vmxnet3_register_types(void) 2744 { 2745 VMW_CBPRN("vmxnet3_register_types called..."); 2746 type_register_static(&vmxnet3_info); 2747 } 2748 2749 type_init(vmxnet3_register_types) 2750