1 /* 2 * QEMU e1000 emulation 3 * 4 * Software developer's manual: 5 * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf 6 * 7 * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc. 8 * Copyright (c) 2008 Qumranet 9 * Based on work done by: 10 * Copyright (c) 2007 Dan Aloni 11 * Copyright (c) 2004 Antony T Curtis 12 * 13 * This library is free software; you can redistribute it and/or 14 * modify it under the terms of the GNU Lesser General Public 15 * License as published by the Free Software Foundation; either 16 * version 2 of the License, or (at your option) any later version. 17 * 18 * This library is distributed in the hope that it will be useful, 19 * but WITHOUT ANY WARRANTY; without even the implied warranty of 20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 * Lesser General Public License for more details. 22 * 23 * You should have received a copy of the GNU Lesser General Public 24 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 25 */ 26 27 28 #include "hw/hw.h" 29 #include "hw/pci/pci.h" 30 #include "net/net.h" 31 #include "net/checksum.h" 32 #include "hw/loader.h" 33 #include "sysemu/sysemu.h" 34 #include "sysemu/dma.h" 35 #include "qemu/iov.h" 36 37 #include "e1000_regs.h" 38 39 #define E1000_DEBUG 40 41 #ifdef E1000_DEBUG 42 enum { 43 DEBUG_GENERAL, DEBUG_IO, DEBUG_MMIO, DEBUG_INTERRUPT, 44 DEBUG_RX, DEBUG_TX, DEBUG_MDIC, DEBUG_EEPROM, 45 DEBUG_UNKNOWN, DEBUG_TXSUM, DEBUG_TXERR, DEBUG_RXERR, 46 DEBUG_RXFILTER, DEBUG_PHY, DEBUG_NOTYET, 47 }; 48 #define DBGBIT(x) (1<<DEBUG_##x) 49 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL); 50 51 #define DBGOUT(what, fmt, ...) do { \ 52 if (debugflags & DBGBIT(what)) \ 53 fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \ 54 } while (0) 55 #else 56 #define DBGOUT(what, fmt, ...) do {} while (0) 57 #endif 58 59 #define IOPORT_SIZE 0x40 60 #define PNPMMIO_SIZE 0x20000 61 #define MIN_BUF_SIZE 60 /* Min. octets in an ethernet frame sans FCS */ 62 63 /* this is the size past which hardware will drop packets when setting LPE=0 */ 64 #define MAXIMUM_ETHERNET_VLAN_SIZE 1522 65 /* this is the size past which hardware will drop packets when setting LPE=1 */ 66 #define MAXIMUM_ETHERNET_LPE_SIZE 16384 67 68 #define MAXIMUM_ETHERNET_HDR_LEN (14+4) 69 70 /* 71 * HW models: 72 * E1000_DEV_ID_82540EM works with Windows and Linux 73 * E1000_DEV_ID_82573L OK with windoze and Linux 2.6.22, 74 * appears to perform better than 82540EM, but breaks with Linux 2.6.18 75 * E1000_DEV_ID_82544GC_COPPER appears to work; not well tested 76 * Others never tested 77 */ 78 enum { E1000_DEVID = E1000_DEV_ID_82540EM }; 79 80 /* 81 * May need to specify additional MAC-to-PHY entries -- 82 * Intel's Windows driver refuses to initialize unless they match 83 */ 84 enum { 85 PHY_ID2_INIT = E1000_DEVID == E1000_DEV_ID_82573L ? 0xcc2 : 86 E1000_DEVID == E1000_DEV_ID_82544GC_COPPER ? 0xc30 : 87 /* default to E1000_DEV_ID_82540EM */ 0xc20 88 }; 89 90 typedef struct E1000State_st { 91 /*< private >*/ 92 PCIDevice parent_obj; 93 /*< public >*/ 94 95 NICState *nic; 96 NICConf conf; 97 MemoryRegion mmio; 98 MemoryRegion io; 99 100 uint32_t mac_reg[0x8000]; 101 uint16_t phy_reg[0x20]; 102 uint16_t eeprom_data[64]; 103 104 uint32_t rxbuf_size; 105 uint32_t rxbuf_min_shift; 106 struct e1000_tx { 107 unsigned char header[256]; 108 unsigned char vlan_header[4]; 109 /* Fields vlan and data must not be reordered or separated. */ 110 unsigned char vlan[4]; 111 unsigned char data[0x10000]; 112 uint16_t size; 113 unsigned char sum_needed; 114 unsigned char vlan_needed; 115 uint8_t ipcss; 116 uint8_t ipcso; 117 uint16_t ipcse; 118 uint8_t tucss; 119 uint8_t tucso; 120 uint16_t tucse; 121 uint8_t hdr_len; 122 uint16_t mss; 123 uint32_t paylen; 124 uint16_t tso_frames; 125 char tse; 126 int8_t ip; 127 int8_t tcp; 128 char cptse; // current packet tse bit 129 } tx; 130 131 struct { 132 uint32_t val_in; // shifted in from guest driver 133 uint16_t bitnum_in; 134 uint16_t bitnum_out; 135 uint16_t reading; 136 uint32_t old_eecd; 137 } eecd_state; 138 139 QEMUTimer *autoneg_timer; 140 141 QEMUTimer *mit_timer; /* Mitigation timer. */ 142 bool mit_timer_on; /* Mitigation timer is running. */ 143 bool mit_irq_level; /* Tracks interrupt pin level. */ 144 uint32_t mit_ide; /* Tracks E1000_TXD_CMD_IDE bit. */ 145 146 /* Compatibility flags for migration to/from qemu 1.3.0 and older */ 147 #define E1000_FLAG_AUTONEG_BIT 0 148 #define E1000_FLAG_MIT_BIT 1 149 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT) 150 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT) 151 uint32_t compat_flags; 152 } E1000State; 153 154 #define TYPE_E1000 "e1000" 155 156 #define E1000(obj) \ 157 OBJECT_CHECK(E1000State, (obj), TYPE_E1000) 158 159 #define defreg(x) x = (E1000_##x>>2) 160 enum { 161 defreg(CTRL), defreg(EECD), defreg(EERD), defreg(GPRC), 162 defreg(GPTC), defreg(ICR), defreg(ICS), defreg(IMC), 163 defreg(IMS), defreg(LEDCTL), defreg(MANC), defreg(MDIC), 164 defreg(MPC), defreg(PBA), defreg(RCTL), defreg(RDBAH), 165 defreg(RDBAL), defreg(RDH), defreg(RDLEN), defreg(RDT), 166 defreg(STATUS), defreg(SWSM), defreg(TCTL), defreg(TDBAH), 167 defreg(TDBAL), defreg(TDH), defreg(TDLEN), defreg(TDT), 168 defreg(TORH), defreg(TORL), defreg(TOTH), defreg(TOTL), 169 defreg(TPR), defreg(TPT), defreg(TXDCTL), defreg(WUFC), 170 defreg(RA), defreg(MTA), defreg(CRCERRS),defreg(VFTA), 171 defreg(VET), defreg(RDTR), defreg(RADV), defreg(TADV), 172 defreg(ITR), 173 }; 174 175 static void 176 e1000_link_down(E1000State *s) 177 { 178 s->mac_reg[STATUS] &= ~E1000_STATUS_LU; 179 s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS; 180 } 181 182 static void 183 e1000_link_up(E1000State *s) 184 { 185 s->mac_reg[STATUS] |= E1000_STATUS_LU; 186 s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS; 187 } 188 189 static void 190 set_phy_ctrl(E1000State *s, int index, uint16_t val) 191 { 192 /* 193 * QEMU 1.3 does not support link auto-negotiation emulation, so if we 194 * migrate during auto negotiation, after migration the link will be 195 * down. 196 */ 197 if (!(s->compat_flags & E1000_FLAG_AUTONEG)) { 198 return; 199 } 200 if ((val & MII_CR_AUTO_NEG_EN) && (val & MII_CR_RESTART_AUTO_NEG)) { 201 e1000_link_down(s); 202 s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE; 203 DBGOUT(PHY, "Start link auto negotiation\n"); 204 timer_mod(s->autoneg_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500); 205 } 206 } 207 208 static void 209 e1000_autoneg_timer(void *opaque) 210 { 211 E1000State *s = opaque; 212 if (!qemu_get_queue(s->nic)->link_down) { 213 e1000_link_up(s); 214 } 215 s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE; 216 DBGOUT(PHY, "Auto negotiation is completed\n"); 217 } 218 219 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = { 220 [PHY_CTRL] = set_phy_ctrl, 221 }; 222 223 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) }; 224 225 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W }; 226 static const char phy_regcap[0x20] = { 227 [PHY_STATUS] = PHY_R, [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW, 228 [PHY_ID1] = PHY_R, [M88E1000_PHY_SPEC_CTRL] = PHY_RW, 229 [PHY_CTRL] = PHY_RW, [PHY_1000T_CTRL] = PHY_RW, 230 [PHY_LP_ABILITY] = PHY_R, [PHY_1000T_STATUS] = PHY_R, 231 [PHY_AUTONEG_ADV] = PHY_RW, [M88E1000_RX_ERR_CNTR] = PHY_R, 232 [PHY_ID2] = PHY_R, [M88E1000_PHY_SPEC_STATUS] = PHY_R 233 }; 234 235 static const uint16_t phy_reg_init[] = { 236 [PHY_CTRL] = 0x1140, 237 [PHY_STATUS] = 0x794d, /* link initially up with not completed autoneg */ 238 [PHY_ID1] = 0x141, [PHY_ID2] = PHY_ID2_INIT, 239 [PHY_1000T_CTRL] = 0x0e00, [M88E1000_PHY_SPEC_CTRL] = 0x360, 240 [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60, [PHY_AUTONEG_ADV] = 0xde1, 241 [PHY_LP_ABILITY] = 0x1e0, [PHY_1000T_STATUS] = 0x3c00, 242 [M88E1000_PHY_SPEC_STATUS] = 0xac00, 243 }; 244 245 static const uint32_t mac_reg_init[] = { 246 [PBA] = 0x00100030, 247 [LEDCTL] = 0x602, 248 [CTRL] = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 | 249 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU, 250 [STATUS] = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE | 251 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK | 252 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD | 253 E1000_STATUS_LU, 254 [MANC] = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN | 255 E1000_MANC_ARP_EN | E1000_MANC_0298_EN | 256 E1000_MANC_RMCP_EN, 257 }; 258 259 /* Helper function, *curr == 0 means the value is not set */ 260 static inline void 261 mit_update_delay(uint32_t *curr, uint32_t value) 262 { 263 if (value && (*curr == 0 || value < *curr)) { 264 *curr = value; 265 } 266 } 267 268 static void 269 set_interrupt_cause(E1000State *s, int index, uint32_t val) 270 { 271 PCIDevice *d = PCI_DEVICE(s); 272 uint32_t pending_ints; 273 uint32_t mit_delay; 274 275 if (val && (E1000_DEVID >= E1000_DEV_ID_82547EI_MOBILE)) { 276 /* Only for 8257x */ 277 val |= E1000_ICR_INT_ASSERTED; 278 } 279 s->mac_reg[ICR] = val; 280 281 /* 282 * Make sure ICR and ICS registers have the same value. 283 * The spec says that the ICS register is write-only. However in practice, 284 * on real hardware ICS is readable, and for reads it has the same value as 285 * ICR (except that ICS does not have the clear on read behaviour of ICR). 286 * 287 * The VxWorks PRO/1000 driver uses this behaviour. 288 */ 289 s->mac_reg[ICS] = val; 290 291 pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]); 292 if (!s->mit_irq_level && pending_ints) { 293 /* 294 * Here we detect a potential raising edge. We postpone raising the 295 * interrupt line if we are inside the mitigation delay window 296 * (s->mit_timer_on == 1). 297 * We provide a partial implementation of interrupt mitigation, 298 * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for 299 * RADV and TADV, 256ns units for ITR). RDTR is only used to enable 300 * RADV; relative timers based on TIDV and RDTR are not implemented. 301 */ 302 if (s->mit_timer_on) { 303 return; 304 } 305 if (s->compat_flags & E1000_FLAG_MIT) { 306 /* Compute the next mitigation delay according to pending 307 * interrupts and the current values of RADV (provided 308 * RDTR!=0), TADV and ITR. 309 * Then rearm the timer. 310 */ 311 mit_delay = 0; 312 if (s->mit_ide && 313 (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) { 314 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4); 315 } 316 if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) { 317 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4); 318 } 319 mit_update_delay(&mit_delay, s->mac_reg[ITR]); 320 321 if (mit_delay) { 322 s->mit_timer_on = 1; 323 timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 324 mit_delay * 256); 325 } 326 s->mit_ide = 0; 327 } 328 } 329 330 s->mit_irq_level = (pending_ints != 0); 331 pci_set_irq(d, s->mit_irq_level); 332 } 333 334 static void 335 e1000_mit_timer(void *opaque) 336 { 337 E1000State *s = opaque; 338 339 s->mit_timer_on = 0; 340 /* Call set_interrupt_cause to update the irq level (if necessary). */ 341 set_interrupt_cause(s, 0, s->mac_reg[ICR]); 342 } 343 344 static void 345 set_ics(E1000State *s, int index, uint32_t val) 346 { 347 DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR], 348 s->mac_reg[IMS]); 349 set_interrupt_cause(s, 0, val | s->mac_reg[ICR]); 350 } 351 352 static int 353 rxbufsize(uint32_t v) 354 { 355 v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 | 356 E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 | 357 E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256; 358 switch (v) { 359 case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384: 360 return 16384; 361 case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192: 362 return 8192; 363 case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096: 364 return 4096; 365 case E1000_RCTL_SZ_1024: 366 return 1024; 367 case E1000_RCTL_SZ_512: 368 return 512; 369 case E1000_RCTL_SZ_256: 370 return 256; 371 } 372 return 2048; 373 } 374 375 static void e1000_reset(void *opaque) 376 { 377 E1000State *d = opaque; 378 uint8_t *macaddr = d->conf.macaddr.a; 379 int i; 380 381 timer_del(d->autoneg_timer); 382 timer_del(d->mit_timer); 383 d->mit_timer_on = 0; 384 d->mit_irq_level = 0; 385 d->mit_ide = 0; 386 memset(d->phy_reg, 0, sizeof d->phy_reg); 387 memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init); 388 memset(d->mac_reg, 0, sizeof d->mac_reg); 389 memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init); 390 d->rxbuf_min_shift = 1; 391 memset(&d->tx, 0, sizeof d->tx); 392 393 if (qemu_get_queue(d->nic)->link_down) { 394 e1000_link_down(d); 395 } 396 397 /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */ 398 d->mac_reg[RA] = 0; 399 d->mac_reg[RA + 1] = E1000_RAH_AV; 400 for (i = 0; i < 4; i++) { 401 d->mac_reg[RA] |= macaddr[i] << (8 * i); 402 d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0; 403 } 404 qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr); 405 } 406 407 static void 408 set_ctrl(E1000State *s, int index, uint32_t val) 409 { 410 /* RST is self clearing */ 411 s->mac_reg[CTRL] = val & ~E1000_CTRL_RST; 412 } 413 414 static void 415 set_rx_control(E1000State *s, int index, uint32_t val) 416 { 417 s->mac_reg[RCTL] = val; 418 s->rxbuf_size = rxbufsize(val); 419 s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1; 420 DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT], 421 s->mac_reg[RCTL]); 422 qemu_flush_queued_packets(qemu_get_queue(s->nic)); 423 } 424 425 static void 426 set_mdic(E1000State *s, int index, uint32_t val) 427 { 428 uint32_t data = val & E1000_MDIC_DATA_MASK; 429 uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT); 430 431 if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy # 432 val = s->mac_reg[MDIC] | E1000_MDIC_ERROR; 433 else if (val & E1000_MDIC_OP_READ) { 434 DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr); 435 if (!(phy_regcap[addr] & PHY_R)) { 436 DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr); 437 val |= E1000_MDIC_ERROR; 438 } else 439 val = (val ^ data) | s->phy_reg[addr]; 440 } else if (val & E1000_MDIC_OP_WRITE) { 441 DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data); 442 if (!(phy_regcap[addr] & PHY_W)) { 443 DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr); 444 val |= E1000_MDIC_ERROR; 445 } else { 446 if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) { 447 phyreg_writeops[addr](s, index, data); 448 } 449 s->phy_reg[addr] = data; 450 } 451 } 452 s->mac_reg[MDIC] = val | E1000_MDIC_READY; 453 454 if (val & E1000_MDIC_INT_EN) { 455 set_ics(s, 0, E1000_ICR_MDAC); 456 } 457 } 458 459 static uint32_t 460 get_eecd(E1000State *s, int index) 461 { 462 uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd; 463 464 DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n", 465 s->eecd_state.bitnum_out, s->eecd_state.reading); 466 if (!s->eecd_state.reading || 467 ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >> 468 ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1) 469 ret |= E1000_EECD_DO; 470 return ret; 471 } 472 473 static void 474 set_eecd(E1000State *s, int index, uint32_t val) 475 { 476 uint32_t oldval = s->eecd_state.old_eecd; 477 478 s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS | 479 E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ); 480 if (!(E1000_EECD_CS & val)) // CS inactive; nothing to do 481 return; 482 if (E1000_EECD_CS & (val ^ oldval)) { // CS rise edge; reset state 483 s->eecd_state.val_in = 0; 484 s->eecd_state.bitnum_in = 0; 485 s->eecd_state.bitnum_out = 0; 486 s->eecd_state.reading = 0; 487 } 488 if (!(E1000_EECD_SK & (val ^ oldval))) // no clock edge 489 return; 490 if (!(E1000_EECD_SK & val)) { // falling edge 491 s->eecd_state.bitnum_out++; 492 return; 493 } 494 s->eecd_state.val_in <<= 1; 495 if (val & E1000_EECD_DI) 496 s->eecd_state.val_in |= 1; 497 if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) { 498 s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1; 499 s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) == 500 EEPROM_READ_OPCODE_MICROWIRE); 501 } 502 DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n", 503 s->eecd_state.bitnum_in, s->eecd_state.bitnum_out, 504 s->eecd_state.reading); 505 } 506 507 static uint32_t 508 flash_eerd_read(E1000State *s, int x) 509 { 510 unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START; 511 512 if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0) 513 return (s->mac_reg[EERD]); 514 515 if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG) 516 return (E1000_EEPROM_RW_REG_DONE | r); 517 518 return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) | 519 E1000_EEPROM_RW_REG_DONE | r); 520 } 521 522 static void 523 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse) 524 { 525 uint32_t sum; 526 527 if (cse && cse < n) 528 n = cse + 1; 529 if (sloc < n-1) { 530 sum = net_checksum_add(n-css, data+css); 531 stw_be_p(data + sloc, net_checksum_finish(sum)); 532 } 533 } 534 535 static inline int 536 vlan_enabled(E1000State *s) 537 { 538 return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0); 539 } 540 541 static inline int 542 vlan_rx_filter_enabled(E1000State *s) 543 { 544 return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0); 545 } 546 547 static inline int 548 is_vlan_packet(E1000State *s, const uint8_t *buf) 549 { 550 return (be16_to_cpup((uint16_t *)(buf + 12)) == 551 le16_to_cpup((uint16_t *)(s->mac_reg + VET))); 552 } 553 554 static inline int 555 is_vlan_txd(uint32_t txd_lower) 556 { 557 return ((txd_lower & E1000_TXD_CMD_VLE) != 0); 558 } 559 560 /* FCS aka Ethernet CRC-32. We don't get it from backends and can't 561 * fill it in, just pad descriptor length by 4 bytes unless guest 562 * told us to strip it off the packet. */ 563 static inline int 564 fcs_len(E1000State *s) 565 { 566 return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4; 567 } 568 569 static void 570 e1000_send_packet(E1000State *s, const uint8_t *buf, int size) 571 { 572 NetClientState *nc = qemu_get_queue(s->nic); 573 if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) { 574 nc->info->receive(nc, buf, size); 575 } else { 576 qemu_send_packet(nc, buf, size); 577 } 578 } 579 580 static void 581 xmit_seg(E1000State *s) 582 { 583 uint16_t len, *sp; 584 unsigned int frames = s->tx.tso_frames, css, sofar, n; 585 struct e1000_tx *tp = &s->tx; 586 587 if (tp->tse && tp->cptse) { 588 css = tp->ipcss; 589 DBGOUT(TXSUM, "frames %d size %d ipcss %d\n", 590 frames, tp->size, css); 591 if (tp->ip) { // IPv4 592 stw_be_p(tp->data+css+2, tp->size - css); 593 stw_be_p(tp->data+css+4, 594 be16_to_cpup((uint16_t *)(tp->data+css+4))+frames); 595 } else // IPv6 596 stw_be_p(tp->data+css+4, tp->size - css); 597 css = tp->tucss; 598 len = tp->size - css; 599 DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len); 600 if (tp->tcp) { 601 sofar = frames * tp->mss; 602 stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */ 603 if (tp->paylen - sofar > tp->mss) 604 tp->data[css + 13] &= ~9; // PSH, FIN 605 } else // UDP 606 stw_be_p(tp->data+css+4, len); 607 if (tp->sum_needed & E1000_TXD_POPTS_TXSM) { 608 unsigned int phsum; 609 // add pseudo-header length before checksum calculation 610 sp = (uint16_t *)(tp->data + tp->tucso); 611 phsum = be16_to_cpup(sp) + len; 612 phsum = (phsum >> 16) + (phsum & 0xffff); 613 stw_be_p(sp, phsum); 614 } 615 tp->tso_frames++; 616 } 617 618 if (tp->sum_needed & E1000_TXD_POPTS_TXSM) 619 putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse); 620 if (tp->sum_needed & E1000_TXD_POPTS_IXSM) 621 putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse); 622 if (tp->vlan_needed) { 623 memmove(tp->vlan, tp->data, 4); 624 memmove(tp->data, tp->data + 4, 8); 625 memcpy(tp->data + 8, tp->vlan_header, 4); 626 e1000_send_packet(s, tp->vlan, tp->size + 4); 627 } else 628 e1000_send_packet(s, tp->data, tp->size); 629 s->mac_reg[TPT]++; 630 s->mac_reg[GPTC]++; 631 n = s->mac_reg[TOTL]; 632 if ((s->mac_reg[TOTL] += s->tx.size) < n) 633 s->mac_reg[TOTH]++; 634 } 635 636 static void 637 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp) 638 { 639 PCIDevice *d = PCI_DEVICE(s); 640 uint32_t txd_lower = le32_to_cpu(dp->lower.data); 641 uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D); 642 unsigned int split_size = txd_lower & 0xffff, bytes, sz, op; 643 unsigned int msh = 0xfffff; 644 uint64_t addr; 645 struct e1000_context_desc *xp = (struct e1000_context_desc *)dp; 646 struct e1000_tx *tp = &s->tx; 647 648 s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE); 649 if (dtype == E1000_TXD_CMD_DEXT) { // context descriptor 650 op = le32_to_cpu(xp->cmd_and_length); 651 tp->ipcss = xp->lower_setup.ip_fields.ipcss; 652 tp->ipcso = xp->lower_setup.ip_fields.ipcso; 653 tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse); 654 tp->tucss = xp->upper_setup.tcp_fields.tucss; 655 tp->tucso = xp->upper_setup.tcp_fields.tucso; 656 tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse); 657 tp->paylen = op & 0xfffff; 658 tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len; 659 tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss); 660 tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0; 661 tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0; 662 tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0; 663 tp->tso_frames = 0; 664 if (tp->tucso == 0) { // this is probably wrong 665 DBGOUT(TXSUM, "TCP/UDP: cso 0!\n"); 666 tp->tucso = tp->tucss + (tp->tcp ? 16 : 6); 667 } 668 return; 669 } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) { 670 // data descriptor 671 if (tp->size == 0) { 672 tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8; 673 } 674 tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0; 675 } else { 676 // legacy descriptor 677 tp->cptse = 0; 678 } 679 680 if (vlan_enabled(s) && is_vlan_txd(txd_lower) && 681 (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) { 682 tp->vlan_needed = 1; 683 stw_be_p(tp->vlan_header, 684 le16_to_cpup((uint16_t *)(s->mac_reg + VET))); 685 stw_be_p(tp->vlan_header + 2, 686 le16_to_cpu(dp->upper.fields.special)); 687 } 688 689 addr = le64_to_cpu(dp->buffer_addr); 690 if (tp->tse && tp->cptse) { 691 msh = tp->hdr_len + tp->mss; 692 do { 693 bytes = split_size; 694 if (tp->size + bytes > msh) 695 bytes = msh - tp->size; 696 697 bytes = MIN(sizeof(tp->data) - tp->size, bytes); 698 pci_dma_read(d, addr, tp->data + tp->size, bytes); 699 sz = tp->size + bytes; 700 if (sz >= tp->hdr_len && tp->size < tp->hdr_len) { 701 memmove(tp->header, tp->data, tp->hdr_len); 702 } 703 tp->size = sz; 704 addr += bytes; 705 if (sz == msh) { 706 xmit_seg(s); 707 memmove(tp->data, tp->header, tp->hdr_len); 708 tp->size = tp->hdr_len; 709 } 710 } while (split_size -= bytes); 711 } else if (!tp->tse && tp->cptse) { 712 // context descriptor TSE is not set, while data descriptor TSE is set 713 DBGOUT(TXERR, "TCP segmentation error\n"); 714 } else { 715 split_size = MIN(sizeof(tp->data) - tp->size, split_size); 716 pci_dma_read(d, addr, tp->data + tp->size, split_size); 717 tp->size += split_size; 718 } 719 720 if (!(txd_lower & E1000_TXD_CMD_EOP)) 721 return; 722 if (!(tp->tse && tp->cptse && tp->size < tp->hdr_len)) { 723 xmit_seg(s); 724 } 725 tp->tso_frames = 0; 726 tp->sum_needed = 0; 727 tp->vlan_needed = 0; 728 tp->size = 0; 729 tp->cptse = 0; 730 } 731 732 static uint32_t 733 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp) 734 { 735 PCIDevice *d = PCI_DEVICE(s); 736 uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data); 737 738 if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS))) 739 return 0; 740 txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) & 741 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU); 742 dp->upper.data = cpu_to_le32(txd_upper); 743 pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp), 744 &dp->upper, sizeof(dp->upper)); 745 return E1000_ICR_TXDW; 746 } 747 748 static uint64_t tx_desc_base(E1000State *s) 749 { 750 uint64_t bah = s->mac_reg[TDBAH]; 751 uint64_t bal = s->mac_reg[TDBAL] & ~0xf; 752 753 return (bah << 32) + bal; 754 } 755 756 static void 757 start_xmit(E1000State *s) 758 { 759 PCIDevice *d = PCI_DEVICE(s); 760 dma_addr_t base; 761 struct e1000_tx_desc desc; 762 uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE; 763 764 if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) { 765 DBGOUT(TX, "tx disabled\n"); 766 return; 767 } 768 769 while (s->mac_reg[TDH] != s->mac_reg[TDT]) { 770 base = tx_desc_base(s) + 771 sizeof(struct e1000_tx_desc) * s->mac_reg[TDH]; 772 pci_dma_read(d, base, &desc, sizeof(desc)); 773 774 DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH], 775 (void *)(intptr_t)desc.buffer_addr, desc.lower.data, 776 desc.upper.data); 777 778 process_tx_desc(s, &desc); 779 cause |= txdesc_writeback(s, base, &desc); 780 781 if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN]) 782 s->mac_reg[TDH] = 0; 783 /* 784 * the following could happen only if guest sw assigns 785 * bogus values to TDT/TDLEN. 786 * there's nothing too intelligent we could do about this. 787 */ 788 if (s->mac_reg[TDH] == tdh_start) { 789 DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n", 790 tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]); 791 break; 792 } 793 } 794 set_ics(s, 0, cause); 795 } 796 797 static int 798 receive_filter(E1000State *s, const uint8_t *buf, int size) 799 { 800 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; 801 static const int mta_shift[] = {4, 3, 2, 0}; 802 uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp; 803 804 if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) { 805 uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14)); 806 uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) + 807 ((vid >> 5) & 0x7f)); 808 if ((vfta & (1 << (vid & 0x1f))) == 0) 809 return 0; 810 } 811 812 if (rctl & E1000_RCTL_UPE) // promiscuous 813 return 1; 814 815 if ((buf[0] & 1) && (rctl & E1000_RCTL_MPE)) // promiscuous mcast 816 return 1; 817 818 if ((rctl & E1000_RCTL_BAM) && !memcmp(buf, bcast, sizeof bcast)) 819 return 1; 820 821 for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) { 822 if (!(rp[1] & E1000_RAH_AV)) 823 continue; 824 ra[0] = cpu_to_le32(rp[0]); 825 ra[1] = cpu_to_le32(rp[1]); 826 if (!memcmp(buf, (uint8_t *)ra, 6)) { 827 DBGOUT(RXFILTER, 828 "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n", 829 (int)(rp - s->mac_reg - RA)/2, 830 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]); 831 return 1; 832 } 833 } 834 DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n", 835 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]); 836 837 f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3]; 838 f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff; 839 if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f))) 840 return 1; 841 DBGOUT(RXFILTER, 842 "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n", 843 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], 844 (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5, 845 s->mac_reg[MTA + (f >> 5)]); 846 847 return 0; 848 } 849 850 static void 851 e1000_set_link_status(NetClientState *nc) 852 { 853 E1000State *s = qemu_get_nic_opaque(nc); 854 uint32_t old_status = s->mac_reg[STATUS]; 855 856 if (nc->link_down) { 857 e1000_link_down(s); 858 } else { 859 e1000_link_up(s); 860 } 861 862 if (s->mac_reg[STATUS] != old_status) 863 set_ics(s, 0, E1000_ICR_LSC); 864 } 865 866 static bool e1000_has_rxbufs(E1000State *s, size_t total_size) 867 { 868 int bufs; 869 /* Fast-path short packets */ 870 if (total_size <= s->rxbuf_size) { 871 return s->mac_reg[RDH] != s->mac_reg[RDT]; 872 } 873 if (s->mac_reg[RDH] < s->mac_reg[RDT]) { 874 bufs = s->mac_reg[RDT] - s->mac_reg[RDH]; 875 } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) { 876 bufs = s->mac_reg[RDLEN] / sizeof(struct e1000_rx_desc) + 877 s->mac_reg[RDT] - s->mac_reg[RDH]; 878 } else { 879 return false; 880 } 881 return total_size <= bufs * s->rxbuf_size; 882 } 883 884 static int 885 e1000_can_receive(NetClientState *nc) 886 { 887 E1000State *s = qemu_get_nic_opaque(nc); 888 889 return (s->mac_reg[STATUS] & E1000_STATUS_LU) && 890 (s->mac_reg[RCTL] & E1000_RCTL_EN) && e1000_has_rxbufs(s, 1); 891 } 892 893 static uint64_t rx_desc_base(E1000State *s) 894 { 895 uint64_t bah = s->mac_reg[RDBAH]; 896 uint64_t bal = s->mac_reg[RDBAL] & ~0xf; 897 898 return (bah << 32) + bal; 899 } 900 901 static ssize_t 902 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) 903 { 904 E1000State *s = qemu_get_nic_opaque(nc); 905 PCIDevice *d = PCI_DEVICE(s); 906 struct e1000_rx_desc desc; 907 dma_addr_t base; 908 unsigned int n, rdt; 909 uint32_t rdh_start; 910 uint16_t vlan_special = 0; 911 uint8_t vlan_status = 0; 912 uint8_t min_buf[MIN_BUF_SIZE]; 913 struct iovec min_iov; 914 uint8_t *filter_buf = iov->iov_base; 915 size_t size = iov_size(iov, iovcnt); 916 size_t iov_ofs = 0; 917 size_t desc_offset; 918 size_t desc_size; 919 size_t total_size; 920 921 if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) { 922 return -1; 923 } 924 925 if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) { 926 return -1; 927 } 928 929 /* Pad to minimum Ethernet frame length */ 930 if (size < sizeof(min_buf)) { 931 iov_to_buf(iov, iovcnt, 0, min_buf, size); 932 memset(&min_buf[size], 0, sizeof(min_buf) - size); 933 min_iov.iov_base = filter_buf = min_buf; 934 min_iov.iov_len = size = sizeof(min_buf); 935 iovcnt = 1; 936 iov = &min_iov; 937 } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) { 938 /* This is very unlikely, but may happen. */ 939 iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN); 940 filter_buf = min_buf; 941 } 942 943 /* Discard oversized packets if !LPE and !SBP. */ 944 if ((size > MAXIMUM_ETHERNET_LPE_SIZE || 945 (size > MAXIMUM_ETHERNET_VLAN_SIZE 946 && !(s->mac_reg[RCTL] & E1000_RCTL_LPE))) 947 && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) { 948 return size; 949 } 950 951 if (!receive_filter(s, filter_buf, size)) { 952 return size; 953 } 954 955 if (vlan_enabled(s) && is_vlan_packet(s, filter_buf)) { 956 vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(filter_buf 957 + 14))); 958 iov_ofs = 4; 959 if (filter_buf == iov->iov_base) { 960 memmove(filter_buf + 4, filter_buf, 12); 961 } else { 962 iov_from_buf(iov, iovcnt, 4, filter_buf, 12); 963 while (iov->iov_len <= iov_ofs) { 964 iov_ofs -= iov->iov_len; 965 iov++; 966 } 967 } 968 vlan_status = E1000_RXD_STAT_VP; 969 size -= 4; 970 } 971 972 rdh_start = s->mac_reg[RDH]; 973 desc_offset = 0; 974 total_size = size + fcs_len(s); 975 if (!e1000_has_rxbufs(s, total_size)) { 976 set_ics(s, 0, E1000_ICS_RXO); 977 return -1; 978 } 979 do { 980 desc_size = total_size - desc_offset; 981 if (desc_size > s->rxbuf_size) { 982 desc_size = s->rxbuf_size; 983 } 984 base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH]; 985 pci_dma_read(d, base, &desc, sizeof(desc)); 986 desc.special = vlan_special; 987 desc.status |= (vlan_status | E1000_RXD_STAT_DD); 988 if (desc.buffer_addr) { 989 if (desc_offset < size) { 990 size_t iov_copy; 991 hwaddr ba = le64_to_cpu(desc.buffer_addr); 992 size_t copy_size = size - desc_offset; 993 if (copy_size > s->rxbuf_size) { 994 copy_size = s->rxbuf_size; 995 } 996 do { 997 iov_copy = MIN(copy_size, iov->iov_len - iov_ofs); 998 pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy); 999 copy_size -= iov_copy; 1000 ba += iov_copy; 1001 iov_ofs += iov_copy; 1002 if (iov_ofs == iov->iov_len) { 1003 iov++; 1004 iov_ofs = 0; 1005 } 1006 } while (copy_size); 1007 } 1008 desc_offset += desc_size; 1009 desc.length = cpu_to_le16(desc_size); 1010 if (desc_offset >= total_size) { 1011 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM; 1012 } else { 1013 /* Guest zeroing out status is not a hardware requirement. 1014 Clear EOP in case guest didn't do it. */ 1015 desc.status &= ~E1000_RXD_STAT_EOP; 1016 } 1017 } else { // as per intel docs; skip descriptors with null buf addr 1018 DBGOUT(RX, "Null RX descriptor!!\n"); 1019 } 1020 pci_dma_write(d, base, &desc, sizeof(desc)); 1021 1022 if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN]) 1023 s->mac_reg[RDH] = 0; 1024 /* see comment in start_xmit; same here */ 1025 if (s->mac_reg[RDH] == rdh_start) { 1026 DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n", 1027 rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]); 1028 set_ics(s, 0, E1000_ICS_RXO); 1029 return -1; 1030 } 1031 } while (desc_offset < total_size); 1032 1033 s->mac_reg[GPRC]++; 1034 s->mac_reg[TPR]++; 1035 /* TOR - Total Octets Received: 1036 * This register includes bytes received in a packet from the <Destination 1037 * Address> field through the <CRC> field, inclusively. 1038 */ 1039 n = s->mac_reg[TORL] + size + /* Always include FCS length. */ 4; 1040 if (n < s->mac_reg[TORL]) 1041 s->mac_reg[TORH]++; 1042 s->mac_reg[TORL] = n; 1043 1044 n = E1000_ICS_RXT0; 1045 if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH]) 1046 rdt += s->mac_reg[RDLEN] / sizeof(desc); 1047 if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >> 1048 s->rxbuf_min_shift) 1049 n |= E1000_ICS_RXDMT0; 1050 1051 set_ics(s, 0, n); 1052 1053 return size; 1054 } 1055 1056 static ssize_t 1057 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size) 1058 { 1059 const struct iovec iov = { 1060 .iov_base = (uint8_t *)buf, 1061 .iov_len = size 1062 }; 1063 1064 return e1000_receive_iov(nc, &iov, 1); 1065 } 1066 1067 static uint32_t 1068 mac_readreg(E1000State *s, int index) 1069 { 1070 return s->mac_reg[index]; 1071 } 1072 1073 static uint32_t 1074 mac_icr_read(E1000State *s, int index) 1075 { 1076 uint32_t ret = s->mac_reg[ICR]; 1077 1078 DBGOUT(INTERRUPT, "ICR read: %x\n", ret); 1079 set_interrupt_cause(s, 0, 0); 1080 return ret; 1081 } 1082 1083 static uint32_t 1084 mac_read_clr4(E1000State *s, int index) 1085 { 1086 uint32_t ret = s->mac_reg[index]; 1087 1088 s->mac_reg[index] = 0; 1089 return ret; 1090 } 1091 1092 static uint32_t 1093 mac_read_clr8(E1000State *s, int index) 1094 { 1095 uint32_t ret = s->mac_reg[index]; 1096 1097 s->mac_reg[index] = 0; 1098 s->mac_reg[index-1] = 0; 1099 return ret; 1100 } 1101 1102 static void 1103 mac_writereg(E1000State *s, int index, uint32_t val) 1104 { 1105 uint32_t macaddr[2]; 1106 1107 s->mac_reg[index] = val; 1108 1109 if (index == RA || index == RA + 1) { 1110 macaddr[0] = cpu_to_le32(s->mac_reg[RA]); 1111 macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]); 1112 qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr); 1113 } 1114 } 1115 1116 static void 1117 set_rdt(E1000State *s, int index, uint32_t val) 1118 { 1119 s->mac_reg[index] = val & 0xffff; 1120 if (e1000_has_rxbufs(s, 1)) { 1121 qemu_flush_queued_packets(qemu_get_queue(s->nic)); 1122 } 1123 } 1124 1125 static void 1126 set_16bit(E1000State *s, int index, uint32_t val) 1127 { 1128 s->mac_reg[index] = val & 0xffff; 1129 } 1130 1131 static void 1132 set_dlen(E1000State *s, int index, uint32_t val) 1133 { 1134 s->mac_reg[index] = val & 0xfff80; 1135 } 1136 1137 static void 1138 set_tctl(E1000State *s, int index, uint32_t val) 1139 { 1140 s->mac_reg[index] = val; 1141 s->mac_reg[TDT] &= 0xffff; 1142 start_xmit(s); 1143 } 1144 1145 static void 1146 set_icr(E1000State *s, int index, uint32_t val) 1147 { 1148 DBGOUT(INTERRUPT, "set_icr %x\n", val); 1149 set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val); 1150 } 1151 1152 static void 1153 set_imc(E1000State *s, int index, uint32_t val) 1154 { 1155 s->mac_reg[IMS] &= ~val; 1156 set_ics(s, 0, 0); 1157 } 1158 1159 static void 1160 set_ims(E1000State *s, int index, uint32_t val) 1161 { 1162 s->mac_reg[IMS] |= val; 1163 set_ics(s, 0, 0); 1164 } 1165 1166 #define getreg(x) [x] = mac_readreg 1167 static uint32_t (*macreg_readops[])(E1000State *, int) = { 1168 getreg(PBA), getreg(RCTL), getreg(TDH), getreg(TXDCTL), 1169 getreg(WUFC), getreg(TDT), getreg(CTRL), getreg(LEDCTL), 1170 getreg(MANC), getreg(MDIC), getreg(SWSM), getreg(STATUS), 1171 getreg(TORL), getreg(TOTL), getreg(IMS), getreg(TCTL), 1172 getreg(RDH), getreg(RDT), getreg(VET), getreg(ICS), 1173 getreg(TDBAL), getreg(TDBAH), getreg(RDBAH), getreg(RDBAL), 1174 getreg(TDLEN), getreg(RDLEN), getreg(RDTR), getreg(RADV), 1175 getreg(TADV), getreg(ITR), 1176 1177 [TOTH] = mac_read_clr8, [TORH] = mac_read_clr8, [GPRC] = mac_read_clr4, 1178 [GPTC] = mac_read_clr4, [TPR] = mac_read_clr4, [TPT] = mac_read_clr4, 1179 [ICR] = mac_icr_read, [EECD] = get_eecd, [EERD] = flash_eerd_read, 1180 [CRCERRS ... MPC] = &mac_readreg, 1181 [RA ... RA+31] = &mac_readreg, 1182 [MTA ... MTA+127] = &mac_readreg, 1183 [VFTA ... VFTA+127] = &mac_readreg, 1184 }; 1185 enum { NREADOPS = ARRAY_SIZE(macreg_readops) }; 1186 1187 #define putreg(x) [x] = mac_writereg 1188 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = { 1189 putreg(PBA), putreg(EERD), putreg(SWSM), putreg(WUFC), 1190 putreg(TDBAL), putreg(TDBAH), putreg(TXDCTL), putreg(RDBAH), 1191 putreg(RDBAL), putreg(LEDCTL), putreg(VET), 1192 [TDLEN] = set_dlen, [RDLEN] = set_dlen, [TCTL] = set_tctl, 1193 [TDT] = set_tctl, [MDIC] = set_mdic, [ICS] = set_ics, 1194 [TDH] = set_16bit, [RDH] = set_16bit, [RDT] = set_rdt, 1195 [IMC] = set_imc, [IMS] = set_ims, [ICR] = set_icr, 1196 [EECD] = set_eecd, [RCTL] = set_rx_control, [CTRL] = set_ctrl, 1197 [RDTR] = set_16bit, [RADV] = set_16bit, [TADV] = set_16bit, 1198 [ITR] = set_16bit, 1199 [RA ... RA+31] = &mac_writereg, 1200 [MTA ... MTA+127] = &mac_writereg, 1201 [VFTA ... VFTA+127] = &mac_writereg, 1202 }; 1203 1204 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) }; 1205 1206 static void 1207 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val, 1208 unsigned size) 1209 { 1210 E1000State *s = opaque; 1211 unsigned int index = (addr & 0x1ffff) >> 2; 1212 1213 if (index < NWRITEOPS && macreg_writeops[index]) { 1214 macreg_writeops[index](s, index, val); 1215 } else if (index < NREADOPS && macreg_readops[index]) { 1216 DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n", index<<2, val); 1217 } else { 1218 DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n", 1219 index<<2, val); 1220 } 1221 } 1222 1223 static uint64_t 1224 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size) 1225 { 1226 E1000State *s = opaque; 1227 unsigned int index = (addr & 0x1ffff) >> 2; 1228 1229 if (index < NREADOPS && macreg_readops[index]) 1230 { 1231 return macreg_readops[index](s, index); 1232 } 1233 DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2); 1234 return 0; 1235 } 1236 1237 static const MemoryRegionOps e1000_mmio_ops = { 1238 .read = e1000_mmio_read, 1239 .write = e1000_mmio_write, 1240 .endianness = DEVICE_LITTLE_ENDIAN, 1241 .impl = { 1242 .min_access_size = 4, 1243 .max_access_size = 4, 1244 }, 1245 }; 1246 1247 static uint64_t e1000_io_read(void *opaque, hwaddr addr, 1248 unsigned size) 1249 { 1250 E1000State *s = opaque; 1251 1252 (void)s; 1253 return 0; 1254 } 1255 1256 static void e1000_io_write(void *opaque, hwaddr addr, 1257 uint64_t val, unsigned size) 1258 { 1259 E1000State *s = opaque; 1260 1261 (void)s; 1262 } 1263 1264 static const MemoryRegionOps e1000_io_ops = { 1265 .read = e1000_io_read, 1266 .write = e1000_io_write, 1267 .endianness = DEVICE_LITTLE_ENDIAN, 1268 }; 1269 1270 static bool is_version_1(void *opaque, int version_id) 1271 { 1272 return version_id == 1; 1273 } 1274 1275 static void e1000_pre_save(void *opaque) 1276 { 1277 E1000State *s = opaque; 1278 NetClientState *nc = qemu_get_queue(s->nic); 1279 1280 /* If the mitigation timer is active, emulate a timeout now. */ 1281 if (s->mit_timer_on) { 1282 e1000_mit_timer(s); 1283 } 1284 1285 if (!(s->compat_flags & E1000_FLAG_AUTONEG)) { 1286 return; 1287 } 1288 1289 /* 1290 * If link is down and auto-negotiation is ongoing, complete 1291 * auto-negotiation immediately. This allows is to look at 1292 * MII_SR_AUTONEG_COMPLETE to infer link status on load. 1293 */ 1294 if (nc->link_down && 1295 s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN && 1296 s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG) { 1297 s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE; 1298 } 1299 } 1300 1301 static int e1000_post_load(void *opaque, int version_id) 1302 { 1303 E1000State *s = opaque; 1304 NetClientState *nc = qemu_get_queue(s->nic); 1305 1306 if (!(s->compat_flags & E1000_FLAG_MIT)) { 1307 s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] = 1308 s->mac_reg[TADV] = 0; 1309 s->mit_irq_level = false; 1310 } 1311 s->mit_ide = 0; 1312 s->mit_timer_on = false; 1313 1314 /* nc.link_down can't be migrated, so infer link_down according 1315 * to link status bit in mac_reg[STATUS]. 1316 * Alternatively, restart link negotiation if it was in progress. */ 1317 nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0; 1318 1319 if (!(s->compat_flags & E1000_FLAG_AUTONEG)) { 1320 return 0; 1321 } 1322 1323 if (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN && 1324 s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG && 1325 !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) { 1326 nc->link_down = false; 1327 timer_mod(s->autoneg_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500); 1328 } 1329 1330 return 0; 1331 } 1332 1333 static bool e1000_mit_state_needed(void *opaque) 1334 { 1335 E1000State *s = opaque; 1336 1337 return s->compat_flags & E1000_FLAG_MIT; 1338 } 1339 1340 static const VMStateDescription vmstate_e1000_mit_state = { 1341 .name = "e1000/mit_state", 1342 .version_id = 1, 1343 .minimum_version_id = 1, 1344 .minimum_version_id_old = 1, 1345 .fields = (VMStateField[]) { 1346 VMSTATE_UINT32(mac_reg[RDTR], E1000State), 1347 VMSTATE_UINT32(mac_reg[RADV], E1000State), 1348 VMSTATE_UINT32(mac_reg[TADV], E1000State), 1349 VMSTATE_UINT32(mac_reg[ITR], E1000State), 1350 VMSTATE_BOOL(mit_irq_level, E1000State), 1351 VMSTATE_END_OF_LIST() 1352 } 1353 }; 1354 1355 static const VMStateDescription vmstate_e1000 = { 1356 .name = "e1000", 1357 .version_id = 2, 1358 .minimum_version_id = 1, 1359 .minimum_version_id_old = 1, 1360 .pre_save = e1000_pre_save, 1361 .post_load = e1000_post_load, 1362 .fields = (VMStateField []) { 1363 VMSTATE_PCI_DEVICE(parent_obj, E1000State), 1364 VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */ 1365 VMSTATE_UNUSED(4), /* Was mmio_base. */ 1366 VMSTATE_UINT32(rxbuf_size, E1000State), 1367 VMSTATE_UINT32(rxbuf_min_shift, E1000State), 1368 VMSTATE_UINT32(eecd_state.val_in, E1000State), 1369 VMSTATE_UINT16(eecd_state.bitnum_in, E1000State), 1370 VMSTATE_UINT16(eecd_state.bitnum_out, E1000State), 1371 VMSTATE_UINT16(eecd_state.reading, E1000State), 1372 VMSTATE_UINT32(eecd_state.old_eecd, E1000State), 1373 VMSTATE_UINT8(tx.ipcss, E1000State), 1374 VMSTATE_UINT8(tx.ipcso, E1000State), 1375 VMSTATE_UINT16(tx.ipcse, E1000State), 1376 VMSTATE_UINT8(tx.tucss, E1000State), 1377 VMSTATE_UINT8(tx.tucso, E1000State), 1378 VMSTATE_UINT16(tx.tucse, E1000State), 1379 VMSTATE_UINT32(tx.paylen, E1000State), 1380 VMSTATE_UINT8(tx.hdr_len, E1000State), 1381 VMSTATE_UINT16(tx.mss, E1000State), 1382 VMSTATE_UINT16(tx.size, E1000State), 1383 VMSTATE_UINT16(tx.tso_frames, E1000State), 1384 VMSTATE_UINT8(tx.sum_needed, E1000State), 1385 VMSTATE_INT8(tx.ip, E1000State), 1386 VMSTATE_INT8(tx.tcp, E1000State), 1387 VMSTATE_BUFFER(tx.header, E1000State), 1388 VMSTATE_BUFFER(tx.data, E1000State), 1389 VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64), 1390 VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20), 1391 VMSTATE_UINT32(mac_reg[CTRL], E1000State), 1392 VMSTATE_UINT32(mac_reg[EECD], E1000State), 1393 VMSTATE_UINT32(mac_reg[EERD], E1000State), 1394 VMSTATE_UINT32(mac_reg[GPRC], E1000State), 1395 VMSTATE_UINT32(mac_reg[GPTC], E1000State), 1396 VMSTATE_UINT32(mac_reg[ICR], E1000State), 1397 VMSTATE_UINT32(mac_reg[ICS], E1000State), 1398 VMSTATE_UINT32(mac_reg[IMC], E1000State), 1399 VMSTATE_UINT32(mac_reg[IMS], E1000State), 1400 VMSTATE_UINT32(mac_reg[LEDCTL], E1000State), 1401 VMSTATE_UINT32(mac_reg[MANC], E1000State), 1402 VMSTATE_UINT32(mac_reg[MDIC], E1000State), 1403 VMSTATE_UINT32(mac_reg[MPC], E1000State), 1404 VMSTATE_UINT32(mac_reg[PBA], E1000State), 1405 VMSTATE_UINT32(mac_reg[RCTL], E1000State), 1406 VMSTATE_UINT32(mac_reg[RDBAH], E1000State), 1407 VMSTATE_UINT32(mac_reg[RDBAL], E1000State), 1408 VMSTATE_UINT32(mac_reg[RDH], E1000State), 1409 VMSTATE_UINT32(mac_reg[RDLEN], E1000State), 1410 VMSTATE_UINT32(mac_reg[RDT], E1000State), 1411 VMSTATE_UINT32(mac_reg[STATUS], E1000State), 1412 VMSTATE_UINT32(mac_reg[SWSM], E1000State), 1413 VMSTATE_UINT32(mac_reg[TCTL], E1000State), 1414 VMSTATE_UINT32(mac_reg[TDBAH], E1000State), 1415 VMSTATE_UINT32(mac_reg[TDBAL], E1000State), 1416 VMSTATE_UINT32(mac_reg[TDH], E1000State), 1417 VMSTATE_UINT32(mac_reg[TDLEN], E1000State), 1418 VMSTATE_UINT32(mac_reg[TDT], E1000State), 1419 VMSTATE_UINT32(mac_reg[TORH], E1000State), 1420 VMSTATE_UINT32(mac_reg[TORL], E1000State), 1421 VMSTATE_UINT32(mac_reg[TOTH], E1000State), 1422 VMSTATE_UINT32(mac_reg[TOTL], E1000State), 1423 VMSTATE_UINT32(mac_reg[TPR], E1000State), 1424 VMSTATE_UINT32(mac_reg[TPT], E1000State), 1425 VMSTATE_UINT32(mac_reg[TXDCTL], E1000State), 1426 VMSTATE_UINT32(mac_reg[WUFC], E1000State), 1427 VMSTATE_UINT32(mac_reg[VET], E1000State), 1428 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32), 1429 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128), 1430 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128), 1431 VMSTATE_END_OF_LIST() 1432 }, 1433 .subsections = (VMStateSubsection[]) { 1434 { 1435 .vmsd = &vmstate_e1000_mit_state, 1436 .needed = e1000_mit_state_needed, 1437 }, { 1438 /* empty */ 1439 } 1440 } 1441 }; 1442 1443 static const uint16_t e1000_eeprom_template[64] = { 1444 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0x0000, 0x0000, 0x0000, 1445 0x3000, 0x1000, 0x6403, E1000_DEVID, 0x8086, E1000_DEVID, 0x8086, 0x3040, 1446 0x0008, 0x2000, 0x7e14, 0x0048, 0x1000, 0x00d8, 0x0000, 0x2700, 1447 0x6cc9, 0x3150, 0x0722, 0x040b, 0x0984, 0x0000, 0xc000, 0x0706, 1448 0x1008, 0x0000, 0x0f04, 0x7fff, 0x4d01, 0xffff, 0xffff, 0xffff, 1449 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 1450 0x0100, 0x4000, 0x121c, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 1451 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 1452 }; 1453 1454 /* PCI interface */ 1455 1456 static void 1457 e1000_mmio_setup(E1000State *d) 1458 { 1459 int i; 1460 const uint32_t excluded_regs[] = { 1461 E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS, 1462 E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE 1463 }; 1464 1465 memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d, 1466 "e1000-mmio", PNPMMIO_SIZE); 1467 memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]); 1468 for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++) 1469 memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4, 1470 excluded_regs[i+1] - excluded_regs[i] - 4); 1471 memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE); 1472 } 1473 1474 static void 1475 e1000_cleanup(NetClientState *nc) 1476 { 1477 E1000State *s = qemu_get_nic_opaque(nc); 1478 1479 s->nic = NULL; 1480 } 1481 1482 static void 1483 pci_e1000_uninit(PCIDevice *dev) 1484 { 1485 E1000State *d = E1000(dev); 1486 1487 timer_del(d->autoneg_timer); 1488 timer_free(d->autoneg_timer); 1489 timer_del(d->mit_timer); 1490 timer_free(d->mit_timer); 1491 memory_region_destroy(&d->mmio); 1492 memory_region_destroy(&d->io); 1493 qemu_del_nic(d->nic); 1494 } 1495 1496 static NetClientInfo net_e1000_info = { 1497 .type = NET_CLIENT_OPTIONS_KIND_NIC, 1498 .size = sizeof(NICState), 1499 .can_receive = e1000_can_receive, 1500 .receive = e1000_receive, 1501 .receive_iov = e1000_receive_iov, 1502 .cleanup = e1000_cleanup, 1503 .link_status_changed = e1000_set_link_status, 1504 }; 1505 1506 static int pci_e1000_init(PCIDevice *pci_dev) 1507 { 1508 DeviceState *dev = DEVICE(pci_dev); 1509 E1000State *d = E1000(pci_dev); 1510 uint8_t *pci_conf; 1511 uint16_t checksum = 0; 1512 int i; 1513 uint8_t *macaddr; 1514 1515 pci_conf = pci_dev->config; 1516 1517 /* TODO: RST# value should be 0, PCI spec 6.2.4 */ 1518 pci_conf[PCI_CACHE_LINE_SIZE] = 0x10; 1519 1520 pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */ 1521 1522 e1000_mmio_setup(d); 1523 1524 pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio); 1525 1526 pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io); 1527 1528 memmove(d->eeprom_data, e1000_eeprom_template, 1529 sizeof e1000_eeprom_template); 1530 qemu_macaddr_default_if_unset(&d->conf.macaddr); 1531 macaddr = d->conf.macaddr.a; 1532 for (i = 0; i < 3; i++) 1533 d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i]; 1534 for (i = 0; i < EEPROM_CHECKSUM_REG; i++) 1535 checksum += d->eeprom_data[i]; 1536 checksum = (uint16_t) EEPROM_SUM - checksum; 1537 d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum; 1538 1539 d->nic = qemu_new_nic(&net_e1000_info, &d->conf, 1540 object_get_typename(OBJECT(d)), dev->id, d); 1541 1542 qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr); 1543 1544 add_boot_device_path(d->conf.bootindex, dev, "/ethernet-phy@0"); 1545 1546 d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d); 1547 d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d); 1548 1549 return 0; 1550 } 1551 1552 static void qdev_e1000_reset(DeviceState *dev) 1553 { 1554 E1000State *d = E1000(dev); 1555 e1000_reset(d); 1556 } 1557 1558 static Property e1000_properties[] = { 1559 DEFINE_NIC_PROPERTIES(E1000State, conf), 1560 DEFINE_PROP_BIT("autonegotiation", E1000State, 1561 compat_flags, E1000_FLAG_AUTONEG_BIT, true), 1562 DEFINE_PROP_BIT("mitigation", E1000State, 1563 compat_flags, E1000_FLAG_MIT_BIT, true), 1564 DEFINE_PROP_END_OF_LIST(), 1565 }; 1566 1567 static void e1000_class_init(ObjectClass *klass, void *data) 1568 { 1569 DeviceClass *dc = DEVICE_CLASS(klass); 1570 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 1571 1572 k->init = pci_e1000_init; 1573 k->exit = pci_e1000_uninit; 1574 k->romfile = "efi-e1000.rom"; 1575 k->vendor_id = PCI_VENDOR_ID_INTEL; 1576 k->device_id = E1000_DEVID; 1577 k->revision = 0x03; 1578 k->class_id = PCI_CLASS_NETWORK_ETHERNET; 1579 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); 1580 dc->desc = "Intel Gigabit Ethernet"; 1581 dc->reset = qdev_e1000_reset; 1582 dc->vmsd = &vmstate_e1000; 1583 dc->props = e1000_properties; 1584 } 1585 1586 static const TypeInfo e1000_info = { 1587 .name = TYPE_E1000, 1588 .parent = TYPE_PCI_DEVICE, 1589 .instance_size = sizeof(E1000State), 1590 .class_init = e1000_class_init, 1591 }; 1592 1593 static void e1000_register_types(void) 1594 { 1595 type_register_static(&e1000_info); 1596 } 1597 1598 type_init(e1000_register_types) 1599