1 /* 2 * QEMU Intel 82576 SR/IOV Ethernet Controller Emulation 3 * 4 * Datasheet: 5 * https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/82576eg-gbe-datasheet.pdf 6 * 7 * Copyright (c) 2020-2023 Red Hat, Inc. 8 * Copyright (c) 2015 Ravello Systems LTD (http://ravellosystems.com) 9 * Developed by Daynix Computing LTD (http://www.daynix.com) 10 * 11 * Authors: 12 * Akihiko Odaki <akihiko.odaki@daynix.com> 13 * Gal Hammmer <gal.hammer@sap.com> 14 * Marcel Apfelbaum <marcel.apfelbaum@gmail.com> 15 * Dmitry Fleytman <dmitry@daynix.com> 16 * Leonid Bloch <leonid@daynix.com> 17 * Yan Vugenfirer <yan@daynix.com> 18 * 19 * Based on work done by: 20 * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc. 21 * Copyright (c) 2008 Qumranet 22 * Based on work done by: 23 * Copyright (c) 2007 Dan Aloni 24 * Copyright (c) 2004 Antony T Curtis 25 * 26 * This library is free software; you can redistribute it and/or 27 * modify it under the terms of the GNU Lesser General Public 28 * License as published by the Free Software Foundation; either 29 * version 2.1 of the License, or (at your option) any later version. 30 * 31 * This library is distributed in the hope that it will be useful, 32 * but WITHOUT ANY WARRANTY; without even the implied warranty of 33 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 34 * Lesser General Public License for more details. 35 * 36 * You should have received a copy of the GNU Lesser General Public 37 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 38 */ 39 40 #include "qemu/osdep.h" 41 #include "qemu/units.h" 42 #include "net/eth.h" 43 #include "net/net.h" 44 #include "net/tap.h" 45 #include "qemu/module.h" 46 #include "qemu/range.h" 47 #include "sysemu/sysemu.h" 48 #include "hw/hw.h" 49 #include "hw/net/mii.h" 50 #include "hw/pci/pci.h" 51 #include "hw/pci/pcie.h" 52 #include "hw/pci/pcie_sriov.h" 53 #include "hw/pci/msi.h" 54 #include "hw/pci/msix.h" 55 #include "hw/qdev-properties.h" 56 #include "migration/vmstate.h" 57 58 #include "igb_common.h" 59 #include "igb_core.h" 60 61 #include "trace.h" 62 #include "qapi/error.h" 63 #include "qom/object.h" 64 65 #define TYPE_IGB "igb" 66 OBJECT_DECLARE_SIMPLE_TYPE(IGBState, IGB) 67 68 struct IGBState { 69 PCIDevice parent_obj; 70 NICState *nic; 71 NICConf conf; 72 73 MemoryRegion mmio; 74 MemoryRegion flash; 75 MemoryRegion io; 76 MemoryRegion msix; 77 78 uint32_t ioaddr; 79 80 IGBCore core; 81 }; 82 83 #define IGB_CAP_SRIOV_OFFSET (0x160) 84 #define IGB_VF_OFFSET (0x80) 85 #define IGB_VF_STRIDE (2) 86 87 #define E1000E_MMIO_IDX 0 88 #define E1000E_FLASH_IDX 1 89 #define E1000E_IO_IDX 2 90 #define E1000E_MSIX_IDX 3 91 92 #define E1000E_MMIO_SIZE (128 * KiB) 93 #define E1000E_FLASH_SIZE (128 * KiB) 94 #define E1000E_IO_SIZE (32) 95 #define E1000E_MSIX_SIZE (16 * KiB) 96 97 static void igb_write_config(PCIDevice *dev, uint32_t addr, 98 uint32_t val, int len) 99 { 100 IGBState *s = IGB(dev); 101 102 trace_igb_write_config(addr, val, len); 103 pci_default_write_config(dev, addr, val, len); 104 105 if (range_covers_byte(addr, len, PCI_COMMAND) && 106 (dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) { 107 igb_start_recv(&s->core); 108 } 109 } 110 111 uint64_t 112 igb_mmio_read(void *opaque, hwaddr addr, unsigned size) 113 { 114 IGBState *s = opaque; 115 return igb_core_read(&s->core, addr, size); 116 } 117 118 void 119 igb_mmio_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) 120 { 121 IGBState *s = opaque; 122 igb_core_write(&s->core, addr, val, size); 123 } 124 125 static bool 126 igb_io_get_reg_index(IGBState *s, uint32_t *idx) 127 { 128 if (s->ioaddr < 0x1FFFF) { 129 *idx = s->ioaddr; 130 return true; 131 } 132 133 if (s->ioaddr < 0x7FFFF) { 134 trace_e1000e_wrn_io_addr_undefined(s->ioaddr); 135 return false; 136 } 137 138 if (s->ioaddr < 0xFFFFF) { 139 trace_e1000e_wrn_io_addr_flash(s->ioaddr); 140 return false; 141 } 142 143 trace_e1000e_wrn_io_addr_unknown(s->ioaddr); 144 return false; 145 } 146 147 static uint64_t 148 igb_io_read(void *opaque, hwaddr addr, unsigned size) 149 { 150 IGBState *s = opaque; 151 uint32_t idx = 0; 152 uint64_t val; 153 154 switch (addr) { 155 case E1000_IOADDR: 156 trace_e1000e_io_read_addr(s->ioaddr); 157 return s->ioaddr; 158 case E1000_IODATA: 159 if (igb_io_get_reg_index(s, &idx)) { 160 val = igb_core_read(&s->core, idx, sizeof(val)); 161 trace_e1000e_io_read_data(idx, val); 162 return val; 163 } 164 return 0; 165 default: 166 trace_e1000e_wrn_io_read_unknown(addr); 167 return 0; 168 } 169 } 170 171 static void 172 igb_io_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) 173 { 174 IGBState *s = opaque; 175 uint32_t idx = 0; 176 177 switch (addr) { 178 case E1000_IOADDR: 179 trace_e1000e_io_write_addr(val); 180 s->ioaddr = (uint32_t) val; 181 return; 182 case E1000_IODATA: 183 if (igb_io_get_reg_index(s, &idx)) { 184 trace_e1000e_io_write_data(idx, val); 185 igb_core_write(&s->core, idx, val, sizeof(val)); 186 } 187 return; 188 default: 189 trace_e1000e_wrn_io_write_unknown(addr); 190 return; 191 } 192 } 193 194 static const MemoryRegionOps mmio_ops = { 195 .read = igb_mmio_read, 196 .write = igb_mmio_write, 197 .endianness = DEVICE_LITTLE_ENDIAN, 198 .impl = { 199 .min_access_size = 4, 200 .max_access_size = 4, 201 }, 202 }; 203 204 static const MemoryRegionOps io_ops = { 205 .read = igb_io_read, 206 .write = igb_io_write, 207 .endianness = DEVICE_LITTLE_ENDIAN, 208 .impl = { 209 .min_access_size = 4, 210 .max_access_size = 4, 211 }, 212 }; 213 214 static bool 215 igb_nc_can_receive(NetClientState *nc) 216 { 217 IGBState *s = qemu_get_nic_opaque(nc); 218 return igb_can_receive(&s->core); 219 } 220 221 static ssize_t 222 igb_nc_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) 223 { 224 IGBState *s = qemu_get_nic_opaque(nc); 225 return igb_receive_iov(&s->core, iov, iovcnt); 226 } 227 228 static ssize_t 229 igb_nc_receive(NetClientState *nc, const uint8_t *buf, size_t size) 230 { 231 IGBState *s = qemu_get_nic_opaque(nc); 232 return igb_receive(&s->core, buf, size); 233 } 234 235 static void 236 igb_set_link_status(NetClientState *nc) 237 { 238 IGBState *s = qemu_get_nic_opaque(nc); 239 igb_core_set_link_status(&s->core); 240 } 241 242 static NetClientInfo net_igb_info = { 243 .type = NET_CLIENT_DRIVER_NIC, 244 .size = sizeof(NICState), 245 .can_receive = igb_nc_can_receive, 246 .receive = igb_nc_receive, 247 .receive_iov = igb_nc_receive_iov, 248 .link_status_changed = igb_set_link_status, 249 }; 250 251 /* 252 * EEPROM (NVM) contents documented in section 6.1, table 6-1: 253 * and in 6.10 Software accessed words. 254 */ 255 static const uint16_t igb_eeprom_template[] = { 256 /* Address |Compat.|OEM sp.| ImRev | OEM sp. */ 257 0x0000, 0x0000, 0x0000, 0x0d34, 0xffff, 0x2010, 0xffff, 0xffff, 258 /* PBA |ICtrl1 | SSID | SVID | DevID |-------|ICtrl2 */ 259 0x1040, 0xffff, 0x002b, 0x0000, 0x8086, 0x10c9, 0x0000, 0x70c3, 260 /* SwPin0| DevID | EESZ |-------|ICtrl3 |PCI-tc | MSIX | APtr */ 261 0x0004, 0x10c9, 0x5c00, 0x0000, 0x2880, 0x0014, 0x4a40, 0x0060, 262 /* PCIe Init. Conf 1,2,3 |PCICtrl| LD1,3 |DDevID |DevRev | LD0,2 */ 263 0x6cfb, 0xc7b0, 0x0abe, 0x0403, 0x0783, 0x10a6, 0x0001, 0x0602, 264 /* SwPin1| FunC |LAN-PWR|ManHwC |ICtrl3 | IOVct |VDevID |-------*/ 265 0x0004, 0x0020, 0x0000, 0x004a, 0x2080, 0x00f5, 0x10ca, 0x0000, 266 /*---------------| LD1,3 | LD0,2 | ROEnd | ROSta | Wdog | VPD */ 267 0x0000, 0x0000, 0x4784, 0x4602, 0x0000, 0x0000, 0x1000, 0xffff, 268 /* PCSet0| Ccfg0 |PXEver |IBAcap |PCSet1 | Ccfg1 |iSCVer | ?? */ 269 0x0100, 0x4000, 0x131f, 0x4013, 0x0100, 0x4000, 0xffff, 0xffff, 270 /* PCSet2| Ccfg2 |PCSet3 | Ccfg3 | ?? |AltMacP| ?? |CHKSUM */ 271 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x00e0, 0xffff, 0x0000, 272 /* NC-SIC */ 273 0x0003, 274 }; 275 276 static void igb_core_realize(IGBState *s) 277 { 278 s->core.owner = &s->parent_obj; 279 s->core.owner_nic = s->nic; 280 } 281 282 static void 283 igb_init_msix(IGBState *s) 284 { 285 int i, res; 286 287 res = msix_init(PCI_DEVICE(s), IGB_MSIX_VEC_NUM, 288 &s->msix, 289 E1000E_MSIX_IDX, 0, 290 &s->msix, 291 E1000E_MSIX_IDX, 0x2000, 292 0x70, NULL); 293 294 if (res < 0) { 295 trace_e1000e_msix_init_fail(res); 296 } else { 297 for (i = 0; i < IGB_MSIX_VEC_NUM; i++) { 298 msix_vector_use(PCI_DEVICE(s), i); 299 } 300 } 301 } 302 303 static void 304 igb_cleanup_msix(IGBState *s) 305 { 306 msix_unuse_all_vectors(PCI_DEVICE(s)); 307 msix_uninit(PCI_DEVICE(s), &s->msix, &s->msix); 308 } 309 310 static void 311 igb_init_net_peer(IGBState *s, PCIDevice *pci_dev, uint8_t *macaddr) 312 { 313 DeviceState *dev = DEVICE(pci_dev); 314 NetClientState *nc; 315 int i; 316 317 s->nic = qemu_new_nic(&net_igb_info, &s->conf, 318 object_get_typename(OBJECT(s)), dev->id, s); 319 320 s->core.max_queue_num = s->conf.peers.queues ? s->conf.peers.queues - 1 : 0; 321 322 trace_e1000e_mac_set_permanent(MAC_ARG(macaddr)); 323 memcpy(s->core.permanent_mac, macaddr, sizeof(s->core.permanent_mac)); 324 325 qemu_format_nic_info_str(qemu_get_queue(s->nic), macaddr); 326 327 /* Setup virtio headers */ 328 for (i = 0; i < s->conf.peers.queues; i++) { 329 nc = qemu_get_subqueue(s->nic, i); 330 if (!nc->peer || !qemu_has_vnet_hdr(nc->peer)) { 331 trace_e1000e_cfg_support_virtio(false); 332 return; 333 } 334 } 335 336 trace_e1000e_cfg_support_virtio(true); 337 s->core.has_vnet = true; 338 339 for (i = 0; i < s->conf.peers.queues; i++) { 340 nc = qemu_get_subqueue(s->nic, i); 341 qemu_set_vnet_hdr_len(nc->peer, sizeof(struct virtio_net_hdr)); 342 qemu_using_vnet_hdr(nc->peer, true); 343 } 344 } 345 346 static int 347 igb_add_pm_capability(PCIDevice *pdev, uint8_t offset, uint16_t pmc) 348 { 349 Error *local_err = NULL; 350 int ret = pci_add_capability(pdev, PCI_CAP_ID_PM, offset, 351 PCI_PM_SIZEOF, &local_err); 352 353 if (local_err) { 354 error_report_err(local_err); 355 return ret; 356 } 357 358 pci_set_word(pdev->config + offset + PCI_PM_PMC, 359 PCI_PM_CAP_VER_1_1 | 360 pmc); 361 362 pci_set_word(pdev->wmask + offset + PCI_PM_CTRL, 363 PCI_PM_CTRL_STATE_MASK | 364 PCI_PM_CTRL_PME_ENABLE | 365 PCI_PM_CTRL_DATA_SEL_MASK); 366 367 pci_set_word(pdev->w1cmask + offset + PCI_PM_CTRL, 368 PCI_PM_CTRL_PME_STATUS); 369 370 return ret; 371 } 372 373 static void igb_pci_realize(PCIDevice *pci_dev, Error **errp) 374 { 375 IGBState *s = IGB(pci_dev); 376 uint8_t *macaddr; 377 int ret; 378 379 trace_e1000e_cb_pci_realize(); 380 381 pci_dev->config_write = igb_write_config; 382 383 pci_dev->config[PCI_CACHE_LINE_SIZE] = 0x10; 384 pci_dev->config[PCI_INTERRUPT_PIN] = 1; 385 386 /* Define IO/MMIO regions */ 387 memory_region_init_io(&s->mmio, OBJECT(s), &mmio_ops, s, 388 "igb-mmio", E1000E_MMIO_SIZE); 389 pci_register_bar(pci_dev, E1000E_MMIO_IDX, 390 PCI_BASE_ADDRESS_SPACE_MEMORY, &s->mmio); 391 392 /* 393 * We provide a dummy implementation for the flash BAR 394 * for drivers that may theoretically probe for its presence. 395 */ 396 memory_region_init(&s->flash, OBJECT(s), 397 "igb-flash", E1000E_FLASH_SIZE); 398 pci_register_bar(pci_dev, E1000E_FLASH_IDX, 399 PCI_BASE_ADDRESS_SPACE_MEMORY, &s->flash); 400 401 memory_region_init_io(&s->io, OBJECT(s), &io_ops, s, 402 "igb-io", E1000E_IO_SIZE); 403 pci_register_bar(pci_dev, E1000E_IO_IDX, 404 PCI_BASE_ADDRESS_SPACE_IO, &s->io); 405 406 memory_region_init(&s->msix, OBJECT(s), "igb-msix", 407 E1000E_MSIX_SIZE); 408 pci_register_bar(pci_dev, E1000E_MSIX_IDX, 409 PCI_BASE_ADDRESS_MEM_TYPE_64, &s->msix); 410 411 /* Create networking backend */ 412 qemu_macaddr_default_if_unset(&s->conf.macaddr); 413 macaddr = s->conf.macaddr.a; 414 415 /* Add PCI capabilities in reverse order */ 416 assert(pcie_endpoint_cap_init(pci_dev, 0xa0) > 0); 417 418 igb_init_msix(s); 419 420 ret = msi_init(pci_dev, 0x50, 1, true, true, NULL); 421 if (ret) { 422 trace_e1000e_msi_init_fail(ret); 423 } 424 425 if (igb_add_pm_capability(pci_dev, 0x40, PCI_PM_CAP_DSI) < 0) { 426 hw_error("Failed to initialize PM capability"); 427 } 428 429 /* PCIe extended capabilities (in order) */ 430 if (pcie_aer_init(pci_dev, 1, 0x100, 0x40, errp) < 0) { 431 hw_error("Failed to initialize AER capability"); 432 } 433 434 pcie_ari_init(pci_dev, 0x150, 1); 435 436 pcie_sriov_pf_init(pci_dev, IGB_CAP_SRIOV_OFFSET, "igbvf", 437 IGB_82576_VF_DEV_ID, IGB_MAX_VF_FUNCTIONS, IGB_MAX_VF_FUNCTIONS, 438 IGB_VF_OFFSET, IGB_VF_STRIDE); 439 440 pcie_sriov_pf_init_vf_bar(pci_dev, 0, 441 PCI_BASE_ADDRESS_MEM_TYPE_64 | PCI_BASE_ADDRESS_MEM_PREFETCH, 442 16 * KiB); 443 pcie_sriov_pf_init_vf_bar(pci_dev, 3, 444 PCI_BASE_ADDRESS_MEM_TYPE_64 | PCI_BASE_ADDRESS_MEM_PREFETCH, 445 16 * KiB); 446 447 igb_init_net_peer(s, pci_dev, macaddr); 448 449 /* Initialize core */ 450 igb_core_realize(s); 451 452 igb_core_pci_realize(&s->core, 453 igb_eeprom_template, 454 sizeof(igb_eeprom_template), 455 macaddr); 456 } 457 458 static void igb_pci_uninit(PCIDevice *pci_dev) 459 { 460 IGBState *s = IGB(pci_dev); 461 462 trace_e1000e_cb_pci_uninit(); 463 464 igb_core_pci_uninit(&s->core); 465 466 pcie_sriov_pf_exit(pci_dev); 467 pcie_cap_exit(pci_dev); 468 469 qemu_del_nic(s->nic); 470 471 igb_cleanup_msix(s); 472 msi_uninit(pci_dev); 473 } 474 475 static void igb_qdev_reset_hold(Object *obj) 476 { 477 PCIDevice *d = PCI_DEVICE(obj); 478 IGBState *s = IGB(obj); 479 480 trace_e1000e_cb_qdev_reset_hold(); 481 482 pcie_sriov_pf_disable_vfs(d); 483 igb_core_reset(&s->core); 484 } 485 486 static int igb_pre_save(void *opaque) 487 { 488 IGBState *s = opaque; 489 490 trace_e1000e_cb_pre_save(); 491 492 igb_core_pre_save(&s->core); 493 494 return 0; 495 } 496 497 static int igb_post_load(void *opaque, int version_id) 498 { 499 IGBState *s = opaque; 500 501 trace_e1000e_cb_post_load(); 502 return igb_core_post_load(&s->core); 503 } 504 505 static const VMStateDescription igb_vmstate_tx = { 506 .name = "igb-tx", 507 .version_id = 1, 508 .minimum_version_id = 1, 509 .fields = (VMStateField[]) { 510 VMSTATE_UINT16(vlan, struct igb_tx), 511 VMSTATE_UINT16(mss, struct igb_tx), 512 VMSTATE_BOOL(tse, struct igb_tx), 513 VMSTATE_BOOL(ixsm, struct igb_tx), 514 VMSTATE_BOOL(txsm, struct igb_tx), 515 VMSTATE_BOOL(first, struct igb_tx), 516 VMSTATE_BOOL(skip_cp, struct igb_tx), 517 VMSTATE_END_OF_LIST() 518 } 519 }; 520 521 static const VMStateDescription igb_vmstate_intr_timer = { 522 .name = "igb-intr-timer", 523 .version_id = 1, 524 .minimum_version_id = 1, 525 .fields = (VMStateField[]) { 526 VMSTATE_TIMER_PTR(timer, IGBIntrDelayTimer), 527 VMSTATE_BOOL(running, IGBIntrDelayTimer), 528 VMSTATE_END_OF_LIST() 529 } 530 }; 531 532 #define VMSTATE_IGB_INTR_DELAY_TIMER(_f, _s) \ 533 VMSTATE_STRUCT(_f, _s, 0, \ 534 igb_vmstate_intr_timer, IGBIntrDelayTimer) 535 536 #define VMSTATE_IGB_INTR_DELAY_TIMER_ARRAY(_f, _s, _num) \ 537 VMSTATE_STRUCT_ARRAY(_f, _s, _num, 0, \ 538 igb_vmstate_intr_timer, IGBIntrDelayTimer) 539 540 static const VMStateDescription igb_vmstate = { 541 .name = "igb", 542 .version_id = 1, 543 .minimum_version_id = 1, 544 .pre_save = igb_pre_save, 545 .post_load = igb_post_load, 546 .fields = (VMStateField[]) { 547 VMSTATE_PCI_DEVICE(parent_obj, IGBState), 548 VMSTATE_MSIX(parent_obj, IGBState), 549 550 VMSTATE_UINT32(ioaddr, IGBState), 551 VMSTATE_UINT8(core.rx_desc_len, IGBState), 552 VMSTATE_UINT16_ARRAY(core.eeprom, IGBState, IGB_EEPROM_SIZE), 553 VMSTATE_UINT16_ARRAY(core.phy, IGBState, MAX_PHY_REG_ADDRESS + 1), 554 VMSTATE_UINT32_ARRAY(core.mac, IGBState, E1000E_MAC_SIZE), 555 VMSTATE_UINT8_ARRAY(core.permanent_mac, IGBState, ETH_ALEN), 556 557 VMSTATE_IGB_INTR_DELAY_TIMER_ARRAY(core.eitr, IGBState, 558 IGB_INTR_NUM), 559 560 VMSTATE_UINT32_ARRAY(core.eitr_guest_value, IGBState, IGB_INTR_NUM), 561 562 VMSTATE_STRUCT_ARRAY(core.tx, IGBState, IGB_NUM_QUEUES, 0, 563 igb_vmstate_tx, struct igb_tx), 564 565 VMSTATE_INT64(core.timadj, IGBState), 566 567 VMSTATE_END_OF_LIST() 568 } 569 }; 570 571 static Property igb_properties[] = { 572 DEFINE_NIC_PROPERTIES(IGBState, conf), 573 DEFINE_PROP_END_OF_LIST(), 574 }; 575 576 static void igb_class_init(ObjectClass *class, void *data) 577 { 578 DeviceClass *dc = DEVICE_CLASS(class); 579 ResettableClass *rc = RESETTABLE_CLASS(class); 580 PCIDeviceClass *c = PCI_DEVICE_CLASS(class); 581 582 c->realize = igb_pci_realize; 583 c->exit = igb_pci_uninit; 584 c->vendor_id = PCI_VENDOR_ID_INTEL; 585 c->device_id = E1000_DEV_ID_82576; 586 c->revision = 1; 587 c->class_id = PCI_CLASS_NETWORK_ETHERNET; 588 589 rc->phases.hold = igb_qdev_reset_hold; 590 591 dc->desc = "Intel 82576 Gigabit Ethernet Controller"; 592 dc->vmsd = &igb_vmstate; 593 594 device_class_set_props(dc, igb_properties); 595 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); 596 } 597 598 static void igb_instance_init(Object *obj) 599 { 600 IGBState *s = IGB(obj); 601 device_add_bootindex_property(obj, &s->conf.bootindex, 602 "bootindex", "/ethernet-phy@0", 603 DEVICE(obj)); 604 } 605 606 static const TypeInfo igb_info = { 607 .name = TYPE_IGB, 608 .parent = TYPE_PCI_DEVICE, 609 .instance_size = sizeof(IGBState), 610 .class_init = igb_class_init, 611 .instance_init = igb_instance_init, 612 .interfaces = (InterfaceInfo[]) { 613 { INTERFACE_PCIE_DEVICE }, 614 { } 615 }, 616 }; 617 618 static void igb_register_types(void) 619 { 620 type_register_static(&igb_info); 621 } 622 623 type_init(igb_register_types) 624