1 /* 2 * QEMU e1000 emulation 3 * 4 * Software developer's manual: 5 * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf 6 * 7 * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc. 8 * Copyright (c) 2008 Qumranet 9 * Based on work done by: 10 * Copyright (c) 2007 Dan Aloni 11 * Copyright (c) 2004 Antony T Curtis 12 * 13 * This library is free software; you can redistribute it and/or 14 * modify it under the terms of the GNU Lesser General Public 15 * License as published by the Free Software Foundation; either 16 * version 2 of the License, or (at your option) any later version. 17 * 18 * This library is distributed in the hope that it will be useful, 19 * but WITHOUT ANY WARRANTY; without even the implied warranty of 20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 * Lesser General Public License for more details. 22 * 23 * You should have received a copy of the GNU Lesser General Public 24 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 25 */ 26 27 28 #include "hw/hw.h" 29 #include "hw/pci/pci.h" 30 #include "net/net.h" 31 #include "net/checksum.h" 32 #include "hw/loader.h" 33 #include "sysemu/sysemu.h" 34 #include "sysemu/dma.h" 35 36 #include "e1000_regs.h" 37 38 #define E1000_DEBUG 39 40 #ifdef E1000_DEBUG 41 enum { 42 DEBUG_GENERAL, DEBUG_IO, DEBUG_MMIO, DEBUG_INTERRUPT, 43 DEBUG_RX, DEBUG_TX, DEBUG_MDIC, DEBUG_EEPROM, 44 DEBUG_UNKNOWN, DEBUG_TXSUM, DEBUG_TXERR, DEBUG_RXERR, 45 DEBUG_RXFILTER, DEBUG_PHY, DEBUG_NOTYET, 46 }; 47 #define DBGBIT(x) (1<<DEBUG_##x) 48 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL); 49 50 #define DBGOUT(what, fmt, ...) do { \ 51 if (debugflags & DBGBIT(what)) \ 52 fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \ 53 } while (0) 54 #else 55 #define DBGOUT(what, fmt, ...) do {} while (0) 56 #endif 57 58 #define IOPORT_SIZE 0x40 59 #define PNPMMIO_SIZE 0x20000 60 #define MIN_BUF_SIZE 60 /* Min. octets in an ethernet frame sans FCS */ 61 62 /* this is the size past which hardware will drop packets when setting LPE=0 */ 63 #define MAXIMUM_ETHERNET_VLAN_SIZE 1522 64 /* this is the size past which hardware will drop packets when setting LPE=1 */ 65 #define MAXIMUM_ETHERNET_LPE_SIZE 16384 66 67 /* 68 * HW models: 69 * E1000_DEV_ID_82540EM works with Windows and Linux 70 * E1000_DEV_ID_82573L OK with windoze and Linux 2.6.22, 71 * appears to perform better than 82540EM, but breaks with Linux 2.6.18 72 * E1000_DEV_ID_82544GC_COPPER appears to work; not well tested 73 * Others never tested 74 */ 75 enum { E1000_DEVID = E1000_DEV_ID_82540EM }; 76 77 /* 78 * May need to specify additional MAC-to-PHY entries -- 79 * Intel's Windows driver refuses to initialize unless they match 80 */ 81 enum { 82 PHY_ID2_INIT = E1000_DEVID == E1000_DEV_ID_82573L ? 0xcc2 : 83 E1000_DEVID == E1000_DEV_ID_82544GC_COPPER ? 0xc30 : 84 /* default to E1000_DEV_ID_82540EM */ 0xc20 85 }; 86 87 typedef struct E1000State_st { 88 /*< private >*/ 89 PCIDevice parent_obj; 90 /*< public >*/ 91 92 NICState *nic; 93 NICConf conf; 94 MemoryRegion mmio; 95 MemoryRegion io; 96 97 uint32_t mac_reg[0x8000]; 98 uint16_t phy_reg[0x20]; 99 uint16_t eeprom_data[64]; 100 101 uint32_t rxbuf_size; 102 uint32_t rxbuf_min_shift; 103 struct e1000_tx { 104 unsigned char header[256]; 105 unsigned char vlan_header[4]; 106 /* Fields vlan and data must not be reordered or separated. */ 107 unsigned char vlan[4]; 108 unsigned char data[0x10000]; 109 uint16_t size; 110 unsigned char sum_needed; 111 unsigned char vlan_needed; 112 uint8_t ipcss; 113 uint8_t ipcso; 114 uint16_t ipcse; 115 uint8_t tucss; 116 uint8_t tucso; 117 uint16_t tucse; 118 uint8_t hdr_len; 119 uint16_t mss; 120 uint32_t paylen; 121 uint16_t tso_frames; 122 char tse; 123 int8_t ip; 124 int8_t tcp; 125 char cptse; // current packet tse bit 126 } tx; 127 128 struct { 129 uint32_t val_in; // shifted in from guest driver 130 uint16_t bitnum_in; 131 uint16_t bitnum_out; 132 uint16_t reading; 133 uint32_t old_eecd; 134 } eecd_state; 135 136 QEMUTimer *autoneg_timer; 137 138 QEMUTimer *mit_timer; /* Mitigation timer. */ 139 bool mit_timer_on; /* Mitigation timer is running. */ 140 bool mit_irq_level; /* Tracks interrupt pin level. */ 141 uint32_t mit_ide; /* Tracks E1000_TXD_CMD_IDE bit. */ 142 143 /* Compatibility flags for migration to/from qemu 1.3.0 and older */ 144 #define E1000_FLAG_AUTONEG_BIT 0 145 #define E1000_FLAG_MIT_BIT 1 146 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT) 147 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT) 148 uint32_t compat_flags; 149 } E1000State; 150 151 #define TYPE_E1000 "e1000" 152 153 #define E1000(obj) \ 154 OBJECT_CHECK(E1000State, (obj), TYPE_E1000) 155 156 #define defreg(x) x = (E1000_##x>>2) 157 enum { 158 defreg(CTRL), defreg(EECD), defreg(EERD), defreg(GPRC), 159 defreg(GPTC), defreg(ICR), defreg(ICS), defreg(IMC), 160 defreg(IMS), defreg(LEDCTL), defreg(MANC), defreg(MDIC), 161 defreg(MPC), defreg(PBA), defreg(RCTL), defreg(RDBAH), 162 defreg(RDBAL), defreg(RDH), defreg(RDLEN), defreg(RDT), 163 defreg(STATUS), defreg(SWSM), defreg(TCTL), defreg(TDBAH), 164 defreg(TDBAL), defreg(TDH), defreg(TDLEN), defreg(TDT), 165 defreg(TORH), defreg(TORL), defreg(TOTH), defreg(TOTL), 166 defreg(TPR), defreg(TPT), defreg(TXDCTL), defreg(WUFC), 167 defreg(RA), defreg(MTA), defreg(CRCERRS),defreg(VFTA), 168 defreg(VET), defreg(RDTR), defreg(RADV), defreg(TADV), 169 defreg(ITR), 170 }; 171 172 static void 173 e1000_link_down(E1000State *s) 174 { 175 s->mac_reg[STATUS] &= ~E1000_STATUS_LU; 176 s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS; 177 } 178 179 static void 180 e1000_link_up(E1000State *s) 181 { 182 s->mac_reg[STATUS] |= E1000_STATUS_LU; 183 s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS; 184 } 185 186 static void 187 set_phy_ctrl(E1000State *s, int index, uint16_t val) 188 { 189 /* 190 * QEMU 1.3 does not support link auto-negotiation emulation, so if we 191 * migrate during auto negotiation, after migration the link will be 192 * down. 193 */ 194 if (!(s->compat_flags & E1000_FLAG_AUTONEG)) { 195 return; 196 } 197 if ((val & MII_CR_AUTO_NEG_EN) && (val & MII_CR_RESTART_AUTO_NEG)) { 198 e1000_link_down(s); 199 s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE; 200 DBGOUT(PHY, "Start link auto negotiation\n"); 201 timer_mod(s->autoneg_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500); 202 } 203 } 204 205 static void 206 e1000_autoneg_timer(void *opaque) 207 { 208 E1000State *s = opaque; 209 if (!qemu_get_queue(s->nic)->link_down) { 210 e1000_link_up(s); 211 } 212 s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE; 213 DBGOUT(PHY, "Auto negotiation is completed\n"); 214 } 215 216 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = { 217 [PHY_CTRL] = set_phy_ctrl, 218 }; 219 220 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) }; 221 222 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W }; 223 static const char phy_regcap[0x20] = { 224 [PHY_STATUS] = PHY_R, [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW, 225 [PHY_ID1] = PHY_R, [M88E1000_PHY_SPEC_CTRL] = PHY_RW, 226 [PHY_CTRL] = PHY_RW, [PHY_1000T_CTRL] = PHY_RW, 227 [PHY_LP_ABILITY] = PHY_R, [PHY_1000T_STATUS] = PHY_R, 228 [PHY_AUTONEG_ADV] = PHY_RW, [M88E1000_RX_ERR_CNTR] = PHY_R, 229 [PHY_ID2] = PHY_R, [M88E1000_PHY_SPEC_STATUS] = PHY_R 230 }; 231 232 static const uint16_t phy_reg_init[] = { 233 [PHY_CTRL] = 0x1140, 234 [PHY_STATUS] = 0x794d, /* link initially up with not completed autoneg */ 235 [PHY_ID1] = 0x141, [PHY_ID2] = PHY_ID2_INIT, 236 [PHY_1000T_CTRL] = 0x0e00, [M88E1000_PHY_SPEC_CTRL] = 0x360, 237 [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60, [PHY_AUTONEG_ADV] = 0xde1, 238 [PHY_LP_ABILITY] = 0x1e0, [PHY_1000T_STATUS] = 0x3c00, 239 [M88E1000_PHY_SPEC_STATUS] = 0xac00, 240 }; 241 242 static const uint32_t mac_reg_init[] = { 243 [PBA] = 0x00100030, 244 [LEDCTL] = 0x602, 245 [CTRL] = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 | 246 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU, 247 [STATUS] = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE | 248 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK | 249 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD | 250 E1000_STATUS_LU, 251 [MANC] = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN | 252 E1000_MANC_ARP_EN | E1000_MANC_0298_EN | 253 E1000_MANC_RMCP_EN, 254 }; 255 256 /* Helper function, *curr == 0 means the value is not set */ 257 static inline void 258 mit_update_delay(uint32_t *curr, uint32_t value) 259 { 260 if (value && (*curr == 0 || value < *curr)) { 261 *curr = value; 262 } 263 } 264 265 static void 266 set_interrupt_cause(E1000State *s, int index, uint32_t val) 267 { 268 PCIDevice *d = PCI_DEVICE(s); 269 uint32_t pending_ints; 270 uint32_t mit_delay; 271 272 if (val && (E1000_DEVID >= E1000_DEV_ID_82547EI_MOBILE)) { 273 /* Only for 8257x */ 274 val |= E1000_ICR_INT_ASSERTED; 275 } 276 s->mac_reg[ICR] = val; 277 278 /* 279 * Make sure ICR and ICS registers have the same value. 280 * The spec says that the ICS register is write-only. However in practice, 281 * on real hardware ICS is readable, and for reads it has the same value as 282 * ICR (except that ICS does not have the clear on read behaviour of ICR). 283 * 284 * The VxWorks PRO/1000 driver uses this behaviour. 285 */ 286 s->mac_reg[ICS] = val; 287 288 pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]); 289 if (!s->mit_irq_level && pending_ints) { 290 /* 291 * Here we detect a potential raising edge. We postpone raising the 292 * interrupt line if we are inside the mitigation delay window 293 * (s->mit_timer_on == 1). 294 * We provide a partial implementation of interrupt mitigation, 295 * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for 296 * RADV and TADV, 256ns units for ITR). RDTR is only used to enable 297 * RADV; relative timers based on TIDV and RDTR are not implemented. 298 */ 299 if (s->mit_timer_on) { 300 return; 301 } 302 if (s->compat_flags & E1000_FLAG_MIT) { 303 /* Compute the next mitigation delay according to pending 304 * interrupts and the current values of RADV (provided 305 * RDTR!=0), TADV and ITR. 306 * Then rearm the timer. 307 */ 308 mit_delay = 0; 309 if (s->mit_ide && 310 (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) { 311 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4); 312 } 313 if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) { 314 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4); 315 } 316 mit_update_delay(&mit_delay, s->mac_reg[ITR]); 317 318 if (mit_delay) { 319 s->mit_timer_on = 1; 320 timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 321 mit_delay * 256); 322 } 323 s->mit_ide = 0; 324 } 325 } 326 327 s->mit_irq_level = (pending_ints != 0); 328 qemu_set_irq(d->irq[0], s->mit_irq_level); 329 } 330 331 static void 332 e1000_mit_timer(void *opaque) 333 { 334 E1000State *s = opaque; 335 336 s->mit_timer_on = 0; 337 /* Call set_interrupt_cause to update the irq level (if necessary). */ 338 set_interrupt_cause(s, 0, s->mac_reg[ICR]); 339 } 340 341 static void 342 set_ics(E1000State *s, int index, uint32_t val) 343 { 344 DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR], 345 s->mac_reg[IMS]); 346 set_interrupt_cause(s, 0, val | s->mac_reg[ICR]); 347 } 348 349 static int 350 rxbufsize(uint32_t v) 351 { 352 v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 | 353 E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 | 354 E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256; 355 switch (v) { 356 case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384: 357 return 16384; 358 case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192: 359 return 8192; 360 case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096: 361 return 4096; 362 case E1000_RCTL_SZ_1024: 363 return 1024; 364 case E1000_RCTL_SZ_512: 365 return 512; 366 case E1000_RCTL_SZ_256: 367 return 256; 368 } 369 return 2048; 370 } 371 372 static void e1000_reset(void *opaque) 373 { 374 E1000State *d = opaque; 375 uint8_t *macaddr = d->conf.macaddr.a; 376 int i; 377 378 timer_del(d->autoneg_timer); 379 timer_del(d->mit_timer); 380 d->mit_timer_on = 0; 381 d->mit_irq_level = 0; 382 d->mit_ide = 0; 383 memset(d->phy_reg, 0, sizeof d->phy_reg); 384 memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init); 385 memset(d->mac_reg, 0, sizeof d->mac_reg); 386 memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init); 387 d->rxbuf_min_shift = 1; 388 memset(&d->tx, 0, sizeof d->tx); 389 390 if (qemu_get_queue(d->nic)->link_down) { 391 e1000_link_down(d); 392 } 393 394 /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */ 395 d->mac_reg[RA] = 0; 396 d->mac_reg[RA + 1] = E1000_RAH_AV; 397 for (i = 0; i < 4; i++) { 398 d->mac_reg[RA] |= macaddr[i] << (8 * i); 399 d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0; 400 } 401 } 402 403 static void 404 set_ctrl(E1000State *s, int index, uint32_t val) 405 { 406 /* RST is self clearing */ 407 s->mac_reg[CTRL] = val & ~E1000_CTRL_RST; 408 } 409 410 static void 411 set_rx_control(E1000State *s, int index, uint32_t val) 412 { 413 s->mac_reg[RCTL] = val; 414 s->rxbuf_size = rxbufsize(val); 415 s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1; 416 DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT], 417 s->mac_reg[RCTL]); 418 qemu_flush_queued_packets(qemu_get_queue(s->nic)); 419 } 420 421 static void 422 set_mdic(E1000State *s, int index, uint32_t val) 423 { 424 uint32_t data = val & E1000_MDIC_DATA_MASK; 425 uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT); 426 427 if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy # 428 val = s->mac_reg[MDIC] | E1000_MDIC_ERROR; 429 else if (val & E1000_MDIC_OP_READ) { 430 DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr); 431 if (!(phy_regcap[addr] & PHY_R)) { 432 DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr); 433 val |= E1000_MDIC_ERROR; 434 } else 435 val = (val ^ data) | s->phy_reg[addr]; 436 } else if (val & E1000_MDIC_OP_WRITE) { 437 DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data); 438 if (!(phy_regcap[addr] & PHY_W)) { 439 DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr); 440 val |= E1000_MDIC_ERROR; 441 } else { 442 if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) { 443 phyreg_writeops[addr](s, index, data); 444 } 445 s->phy_reg[addr] = data; 446 } 447 } 448 s->mac_reg[MDIC] = val | E1000_MDIC_READY; 449 450 if (val & E1000_MDIC_INT_EN) { 451 set_ics(s, 0, E1000_ICR_MDAC); 452 } 453 } 454 455 static uint32_t 456 get_eecd(E1000State *s, int index) 457 { 458 uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd; 459 460 DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n", 461 s->eecd_state.bitnum_out, s->eecd_state.reading); 462 if (!s->eecd_state.reading || 463 ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >> 464 ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1) 465 ret |= E1000_EECD_DO; 466 return ret; 467 } 468 469 static void 470 set_eecd(E1000State *s, int index, uint32_t val) 471 { 472 uint32_t oldval = s->eecd_state.old_eecd; 473 474 s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS | 475 E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ); 476 if (!(E1000_EECD_CS & val)) // CS inactive; nothing to do 477 return; 478 if (E1000_EECD_CS & (val ^ oldval)) { // CS rise edge; reset state 479 s->eecd_state.val_in = 0; 480 s->eecd_state.bitnum_in = 0; 481 s->eecd_state.bitnum_out = 0; 482 s->eecd_state.reading = 0; 483 } 484 if (!(E1000_EECD_SK & (val ^ oldval))) // no clock edge 485 return; 486 if (!(E1000_EECD_SK & val)) { // falling edge 487 s->eecd_state.bitnum_out++; 488 return; 489 } 490 s->eecd_state.val_in <<= 1; 491 if (val & E1000_EECD_DI) 492 s->eecd_state.val_in |= 1; 493 if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) { 494 s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1; 495 s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) == 496 EEPROM_READ_OPCODE_MICROWIRE); 497 } 498 DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n", 499 s->eecd_state.bitnum_in, s->eecd_state.bitnum_out, 500 s->eecd_state.reading); 501 } 502 503 static uint32_t 504 flash_eerd_read(E1000State *s, int x) 505 { 506 unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START; 507 508 if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0) 509 return (s->mac_reg[EERD]); 510 511 if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG) 512 return (E1000_EEPROM_RW_REG_DONE | r); 513 514 return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) | 515 E1000_EEPROM_RW_REG_DONE | r); 516 } 517 518 static void 519 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse) 520 { 521 uint32_t sum; 522 523 if (cse && cse < n) 524 n = cse + 1; 525 if (sloc < n-1) { 526 sum = net_checksum_add(n-css, data+css); 527 cpu_to_be16wu((uint16_t *)(data + sloc), 528 net_checksum_finish(sum)); 529 } 530 } 531 532 static inline int 533 vlan_enabled(E1000State *s) 534 { 535 return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0); 536 } 537 538 static inline int 539 vlan_rx_filter_enabled(E1000State *s) 540 { 541 return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0); 542 } 543 544 static inline int 545 is_vlan_packet(E1000State *s, const uint8_t *buf) 546 { 547 return (be16_to_cpup((uint16_t *)(buf + 12)) == 548 le16_to_cpup((uint16_t *)(s->mac_reg + VET))); 549 } 550 551 static inline int 552 is_vlan_txd(uint32_t txd_lower) 553 { 554 return ((txd_lower & E1000_TXD_CMD_VLE) != 0); 555 } 556 557 /* FCS aka Ethernet CRC-32. We don't get it from backends and can't 558 * fill it in, just pad descriptor length by 4 bytes unless guest 559 * told us to strip it off the packet. */ 560 static inline int 561 fcs_len(E1000State *s) 562 { 563 return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4; 564 } 565 566 static void 567 e1000_send_packet(E1000State *s, const uint8_t *buf, int size) 568 { 569 NetClientState *nc = qemu_get_queue(s->nic); 570 if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) { 571 nc->info->receive(nc, buf, size); 572 } else { 573 qemu_send_packet(nc, buf, size); 574 } 575 } 576 577 static void 578 xmit_seg(E1000State *s) 579 { 580 uint16_t len, *sp; 581 unsigned int frames = s->tx.tso_frames, css, sofar, n; 582 struct e1000_tx *tp = &s->tx; 583 584 if (tp->tse && tp->cptse) { 585 css = tp->ipcss; 586 DBGOUT(TXSUM, "frames %d size %d ipcss %d\n", 587 frames, tp->size, css); 588 if (tp->ip) { // IPv4 589 cpu_to_be16wu((uint16_t *)(tp->data+css+2), 590 tp->size - css); 591 cpu_to_be16wu((uint16_t *)(tp->data+css+4), 592 be16_to_cpup((uint16_t *)(tp->data+css+4))+frames); 593 } else // IPv6 594 cpu_to_be16wu((uint16_t *)(tp->data+css+4), 595 tp->size - css); 596 css = tp->tucss; 597 len = tp->size - css; 598 DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len); 599 if (tp->tcp) { 600 sofar = frames * tp->mss; 601 cpu_to_be32wu((uint32_t *)(tp->data+css+4), // seq 602 be32_to_cpupu((uint32_t *)(tp->data+css+4))+sofar); 603 if (tp->paylen - sofar > tp->mss) 604 tp->data[css + 13] &= ~9; // PSH, FIN 605 } else // UDP 606 cpu_to_be16wu((uint16_t *)(tp->data+css+4), len); 607 if (tp->sum_needed & E1000_TXD_POPTS_TXSM) { 608 unsigned int phsum; 609 // add pseudo-header length before checksum calculation 610 sp = (uint16_t *)(tp->data + tp->tucso); 611 phsum = be16_to_cpup(sp) + len; 612 phsum = (phsum >> 16) + (phsum & 0xffff); 613 cpu_to_be16wu(sp, phsum); 614 } 615 tp->tso_frames++; 616 } 617 618 if (tp->sum_needed & E1000_TXD_POPTS_TXSM) 619 putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse); 620 if (tp->sum_needed & E1000_TXD_POPTS_IXSM) 621 putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse); 622 if (tp->vlan_needed) { 623 memmove(tp->vlan, tp->data, 4); 624 memmove(tp->data, tp->data + 4, 8); 625 memcpy(tp->data + 8, tp->vlan_header, 4); 626 e1000_send_packet(s, tp->vlan, tp->size + 4); 627 } else 628 e1000_send_packet(s, tp->data, tp->size); 629 s->mac_reg[TPT]++; 630 s->mac_reg[GPTC]++; 631 n = s->mac_reg[TOTL]; 632 if ((s->mac_reg[TOTL] += s->tx.size) < n) 633 s->mac_reg[TOTH]++; 634 } 635 636 static void 637 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp) 638 { 639 PCIDevice *d = PCI_DEVICE(s); 640 uint32_t txd_lower = le32_to_cpu(dp->lower.data); 641 uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D); 642 unsigned int split_size = txd_lower & 0xffff, bytes, sz, op; 643 unsigned int msh = 0xfffff; 644 uint64_t addr; 645 struct e1000_context_desc *xp = (struct e1000_context_desc *)dp; 646 struct e1000_tx *tp = &s->tx; 647 648 s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE); 649 if (dtype == E1000_TXD_CMD_DEXT) { // context descriptor 650 op = le32_to_cpu(xp->cmd_and_length); 651 tp->ipcss = xp->lower_setup.ip_fields.ipcss; 652 tp->ipcso = xp->lower_setup.ip_fields.ipcso; 653 tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse); 654 tp->tucss = xp->upper_setup.tcp_fields.tucss; 655 tp->tucso = xp->upper_setup.tcp_fields.tucso; 656 tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse); 657 tp->paylen = op & 0xfffff; 658 tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len; 659 tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss); 660 tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0; 661 tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0; 662 tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0; 663 tp->tso_frames = 0; 664 if (tp->tucso == 0) { // this is probably wrong 665 DBGOUT(TXSUM, "TCP/UDP: cso 0!\n"); 666 tp->tucso = tp->tucss + (tp->tcp ? 16 : 6); 667 } 668 return; 669 } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) { 670 // data descriptor 671 if (tp->size == 0) { 672 tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8; 673 } 674 tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0; 675 } else { 676 // legacy descriptor 677 tp->cptse = 0; 678 } 679 680 if (vlan_enabled(s) && is_vlan_txd(txd_lower) && 681 (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) { 682 tp->vlan_needed = 1; 683 cpu_to_be16wu((uint16_t *)(tp->vlan_header), 684 le16_to_cpup((uint16_t *)(s->mac_reg + VET))); 685 cpu_to_be16wu((uint16_t *)(tp->vlan_header + 2), 686 le16_to_cpu(dp->upper.fields.special)); 687 } 688 689 addr = le64_to_cpu(dp->buffer_addr); 690 if (tp->tse && tp->cptse) { 691 msh = tp->hdr_len + tp->mss; 692 do { 693 bytes = split_size; 694 if (tp->size + bytes > msh) 695 bytes = msh - tp->size; 696 697 bytes = MIN(sizeof(tp->data) - tp->size, bytes); 698 pci_dma_read(d, addr, tp->data + tp->size, bytes); 699 sz = tp->size + bytes; 700 if (sz >= tp->hdr_len && tp->size < tp->hdr_len) { 701 memmove(tp->header, tp->data, tp->hdr_len); 702 } 703 tp->size = sz; 704 addr += bytes; 705 if (sz == msh) { 706 xmit_seg(s); 707 memmove(tp->data, tp->header, tp->hdr_len); 708 tp->size = tp->hdr_len; 709 } 710 } while (split_size -= bytes); 711 } else if (!tp->tse && tp->cptse) { 712 // context descriptor TSE is not set, while data descriptor TSE is set 713 DBGOUT(TXERR, "TCP segmentation error\n"); 714 } else { 715 split_size = MIN(sizeof(tp->data) - tp->size, split_size); 716 pci_dma_read(d, addr, tp->data + tp->size, split_size); 717 tp->size += split_size; 718 } 719 720 if (!(txd_lower & E1000_TXD_CMD_EOP)) 721 return; 722 if (!(tp->tse && tp->cptse && tp->size < tp->hdr_len)) { 723 xmit_seg(s); 724 } 725 tp->tso_frames = 0; 726 tp->sum_needed = 0; 727 tp->vlan_needed = 0; 728 tp->size = 0; 729 tp->cptse = 0; 730 } 731 732 static uint32_t 733 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp) 734 { 735 PCIDevice *d = PCI_DEVICE(s); 736 uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data); 737 738 if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS))) 739 return 0; 740 txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) & 741 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU); 742 dp->upper.data = cpu_to_le32(txd_upper); 743 pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp), 744 &dp->upper, sizeof(dp->upper)); 745 return E1000_ICR_TXDW; 746 } 747 748 static uint64_t tx_desc_base(E1000State *s) 749 { 750 uint64_t bah = s->mac_reg[TDBAH]; 751 uint64_t bal = s->mac_reg[TDBAL] & ~0xf; 752 753 return (bah << 32) + bal; 754 } 755 756 static void 757 start_xmit(E1000State *s) 758 { 759 PCIDevice *d = PCI_DEVICE(s); 760 dma_addr_t base; 761 struct e1000_tx_desc desc; 762 uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE; 763 764 if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) { 765 DBGOUT(TX, "tx disabled\n"); 766 return; 767 } 768 769 while (s->mac_reg[TDH] != s->mac_reg[TDT]) { 770 base = tx_desc_base(s) + 771 sizeof(struct e1000_tx_desc) * s->mac_reg[TDH]; 772 pci_dma_read(d, base, &desc, sizeof(desc)); 773 774 DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH], 775 (void *)(intptr_t)desc.buffer_addr, desc.lower.data, 776 desc.upper.data); 777 778 process_tx_desc(s, &desc); 779 cause |= txdesc_writeback(s, base, &desc); 780 781 if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN]) 782 s->mac_reg[TDH] = 0; 783 /* 784 * the following could happen only if guest sw assigns 785 * bogus values to TDT/TDLEN. 786 * there's nothing too intelligent we could do about this. 787 */ 788 if (s->mac_reg[TDH] == tdh_start) { 789 DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n", 790 tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]); 791 break; 792 } 793 } 794 set_ics(s, 0, cause); 795 } 796 797 static int 798 receive_filter(E1000State *s, const uint8_t *buf, int size) 799 { 800 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; 801 static const int mta_shift[] = {4, 3, 2, 0}; 802 uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp; 803 804 if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) { 805 uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14)); 806 uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) + 807 ((vid >> 5) & 0x7f)); 808 if ((vfta & (1 << (vid & 0x1f))) == 0) 809 return 0; 810 } 811 812 if (rctl & E1000_RCTL_UPE) // promiscuous 813 return 1; 814 815 if ((buf[0] & 1) && (rctl & E1000_RCTL_MPE)) // promiscuous mcast 816 return 1; 817 818 if ((rctl & E1000_RCTL_BAM) && !memcmp(buf, bcast, sizeof bcast)) 819 return 1; 820 821 for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) { 822 if (!(rp[1] & E1000_RAH_AV)) 823 continue; 824 ra[0] = cpu_to_le32(rp[0]); 825 ra[1] = cpu_to_le32(rp[1]); 826 if (!memcmp(buf, (uint8_t *)ra, 6)) { 827 DBGOUT(RXFILTER, 828 "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n", 829 (int)(rp - s->mac_reg - RA)/2, 830 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]); 831 return 1; 832 } 833 } 834 DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n", 835 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]); 836 837 f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3]; 838 f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff; 839 if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f))) 840 return 1; 841 DBGOUT(RXFILTER, 842 "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n", 843 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], 844 (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5, 845 s->mac_reg[MTA + (f >> 5)]); 846 847 return 0; 848 } 849 850 static void 851 e1000_set_link_status(NetClientState *nc) 852 { 853 E1000State *s = qemu_get_nic_opaque(nc); 854 uint32_t old_status = s->mac_reg[STATUS]; 855 856 if (nc->link_down) { 857 e1000_link_down(s); 858 } else { 859 e1000_link_up(s); 860 } 861 862 if (s->mac_reg[STATUS] != old_status) 863 set_ics(s, 0, E1000_ICR_LSC); 864 } 865 866 static bool e1000_has_rxbufs(E1000State *s, size_t total_size) 867 { 868 int bufs; 869 /* Fast-path short packets */ 870 if (total_size <= s->rxbuf_size) { 871 return s->mac_reg[RDH] != s->mac_reg[RDT]; 872 } 873 if (s->mac_reg[RDH] < s->mac_reg[RDT]) { 874 bufs = s->mac_reg[RDT] - s->mac_reg[RDH]; 875 } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) { 876 bufs = s->mac_reg[RDLEN] / sizeof(struct e1000_rx_desc) + 877 s->mac_reg[RDT] - s->mac_reg[RDH]; 878 } else { 879 return false; 880 } 881 return total_size <= bufs * s->rxbuf_size; 882 } 883 884 static int 885 e1000_can_receive(NetClientState *nc) 886 { 887 E1000State *s = qemu_get_nic_opaque(nc); 888 889 return (s->mac_reg[STATUS] & E1000_STATUS_LU) && 890 (s->mac_reg[RCTL] & E1000_RCTL_EN) && e1000_has_rxbufs(s, 1); 891 } 892 893 static uint64_t rx_desc_base(E1000State *s) 894 { 895 uint64_t bah = s->mac_reg[RDBAH]; 896 uint64_t bal = s->mac_reg[RDBAL] & ~0xf; 897 898 return (bah << 32) + bal; 899 } 900 901 static ssize_t 902 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size) 903 { 904 E1000State *s = qemu_get_nic_opaque(nc); 905 PCIDevice *d = PCI_DEVICE(s); 906 struct e1000_rx_desc desc; 907 dma_addr_t base; 908 unsigned int n, rdt; 909 uint32_t rdh_start; 910 uint16_t vlan_special = 0; 911 uint8_t vlan_status = 0, vlan_offset = 0; 912 uint8_t min_buf[MIN_BUF_SIZE]; 913 size_t desc_offset; 914 size_t desc_size; 915 size_t total_size; 916 917 if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) { 918 return -1; 919 } 920 921 if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) { 922 return -1; 923 } 924 925 /* Pad to minimum Ethernet frame length */ 926 if (size < sizeof(min_buf)) { 927 memcpy(min_buf, buf, size); 928 memset(&min_buf[size], 0, sizeof(min_buf) - size); 929 buf = min_buf; 930 size = sizeof(min_buf); 931 } 932 933 /* Discard oversized packets if !LPE and !SBP. */ 934 if ((size > MAXIMUM_ETHERNET_LPE_SIZE || 935 (size > MAXIMUM_ETHERNET_VLAN_SIZE 936 && !(s->mac_reg[RCTL] & E1000_RCTL_LPE))) 937 && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) { 938 return size; 939 } 940 941 if (!receive_filter(s, buf, size)) 942 return size; 943 944 if (vlan_enabled(s) && is_vlan_packet(s, buf)) { 945 vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(buf + 14))); 946 memmove((uint8_t *)buf + 4, buf, 12); 947 vlan_status = E1000_RXD_STAT_VP; 948 vlan_offset = 4; 949 size -= 4; 950 } 951 952 rdh_start = s->mac_reg[RDH]; 953 desc_offset = 0; 954 total_size = size + fcs_len(s); 955 if (!e1000_has_rxbufs(s, total_size)) { 956 set_ics(s, 0, E1000_ICS_RXO); 957 return -1; 958 } 959 do { 960 desc_size = total_size - desc_offset; 961 if (desc_size > s->rxbuf_size) { 962 desc_size = s->rxbuf_size; 963 } 964 base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH]; 965 pci_dma_read(d, base, &desc, sizeof(desc)); 966 desc.special = vlan_special; 967 desc.status |= (vlan_status | E1000_RXD_STAT_DD); 968 if (desc.buffer_addr) { 969 if (desc_offset < size) { 970 size_t copy_size = size - desc_offset; 971 if (copy_size > s->rxbuf_size) { 972 copy_size = s->rxbuf_size; 973 } 974 pci_dma_write(d, le64_to_cpu(desc.buffer_addr), 975 buf + desc_offset + vlan_offset, copy_size); 976 } 977 desc_offset += desc_size; 978 desc.length = cpu_to_le16(desc_size); 979 if (desc_offset >= total_size) { 980 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM; 981 } else { 982 /* Guest zeroing out status is not a hardware requirement. 983 Clear EOP in case guest didn't do it. */ 984 desc.status &= ~E1000_RXD_STAT_EOP; 985 } 986 } else { // as per intel docs; skip descriptors with null buf addr 987 DBGOUT(RX, "Null RX descriptor!!\n"); 988 } 989 pci_dma_write(d, base, &desc, sizeof(desc)); 990 991 if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN]) 992 s->mac_reg[RDH] = 0; 993 /* see comment in start_xmit; same here */ 994 if (s->mac_reg[RDH] == rdh_start) { 995 DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n", 996 rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]); 997 set_ics(s, 0, E1000_ICS_RXO); 998 return -1; 999 } 1000 } while (desc_offset < total_size); 1001 1002 s->mac_reg[GPRC]++; 1003 s->mac_reg[TPR]++; 1004 /* TOR - Total Octets Received: 1005 * This register includes bytes received in a packet from the <Destination 1006 * Address> field through the <CRC> field, inclusively. 1007 */ 1008 n = s->mac_reg[TORL] + size + /* Always include FCS length. */ 4; 1009 if (n < s->mac_reg[TORL]) 1010 s->mac_reg[TORH]++; 1011 s->mac_reg[TORL] = n; 1012 1013 n = E1000_ICS_RXT0; 1014 if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH]) 1015 rdt += s->mac_reg[RDLEN] / sizeof(desc); 1016 if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >> 1017 s->rxbuf_min_shift) 1018 n |= E1000_ICS_RXDMT0; 1019 1020 set_ics(s, 0, n); 1021 1022 return size; 1023 } 1024 1025 static uint32_t 1026 mac_readreg(E1000State *s, int index) 1027 { 1028 return s->mac_reg[index]; 1029 } 1030 1031 static uint32_t 1032 mac_icr_read(E1000State *s, int index) 1033 { 1034 uint32_t ret = s->mac_reg[ICR]; 1035 1036 DBGOUT(INTERRUPT, "ICR read: %x\n", ret); 1037 set_interrupt_cause(s, 0, 0); 1038 return ret; 1039 } 1040 1041 static uint32_t 1042 mac_read_clr4(E1000State *s, int index) 1043 { 1044 uint32_t ret = s->mac_reg[index]; 1045 1046 s->mac_reg[index] = 0; 1047 return ret; 1048 } 1049 1050 static uint32_t 1051 mac_read_clr8(E1000State *s, int index) 1052 { 1053 uint32_t ret = s->mac_reg[index]; 1054 1055 s->mac_reg[index] = 0; 1056 s->mac_reg[index-1] = 0; 1057 return ret; 1058 } 1059 1060 static void 1061 mac_writereg(E1000State *s, int index, uint32_t val) 1062 { 1063 s->mac_reg[index] = val; 1064 } 1065 1066 static void 1067 set_rdt(E1000State *s, int index, uint32_t val) 1068 { 1069 s->mac_reg[index] = val & 0xffff; 1070 if (e1000_has_rxbufs(s, 1)) { 1071 qemu_flush_queued_packets(qemu_get_queue(s->nic)); 1072 } 1073 } 1074 1075 static void 1076 set_16bit(E1000State *s, int index, uint32_t val) 1077 { 1078 s->mac_reg[index] = val & 0xffff; 1079 } 1080 1081 static void 1082 set_dlen(E1000State *s, int index, uint32_t val) 1083 { 1084 s->mac_reg[index] = val & 0xfff80; 1085 } 1086 1087 static void 1088 set_tctl(E1000State *s, int index, uint32_t val) 1089 { 1090 s->mac_reg[index] = val; 1091 s->mac_reg[TDT] &= 0xffff; 1092 start_xmit(s); 1093 } 1094 1095 static void 1096 set_icr(E1000State *s, int index, uint32_t val) 1097 { 1098 DBGOUT(INTERRUPT, "set_icr %x\n", val); 1099 set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val); 1100 } 1101 1102 static void 1103 set_imc(E1000State *s, int index, uint32_t val) 1104 { 1105 s->mac_reg[IMS] &= ~val; 1106 set_ics(s, 0, 0); 1107 } 1108 1109 static void 1110 set_ims(E1000State *s, int index, uint32_t val) 1111 { 1112 s->mac_reg[IMS] |= val; 1113 set_ics(s, 0, 0); 1114 } 1115 1116 #define getreg(x) [x] = mac_readreg 1117 static uint32_t (*macreg_readops[])(E1000State *, int) = { 1118 getreg(PBA), getreg(RCTL), getreg(TDH), getreg(TXDCTL), 1119 getreg(WUFC), getreg(TDT), getreg(CTRL), getreg(LEDCTL), 1120 getreg(MANC), getreg(MDIC), getreg(SWSM), getreg(STATUS), 1121 getreg(TORL), getreg(TOTL), getreg(IMS), getreg(TCTL), 1122 getreg(RDH), getreg(RDT), getreg(VET), getreg(ICS), 1123 getreg(TDBAL), getreg(TDBAH), getreg(RDBAH), getreg(RDBAL), 1124 getreg(TDLEN), getreg(RDLEN), getreg(RDTR), getreg(RADV), 1125 getreg(TADV), getreg(ITR), 1126 1127 [TOTH] = mac_read_clr8, [TORH] = mac_read_clr8, [GPRC] = mac_read_clr4, 1128 [GPTC] = mac_read_clr4, [TPR] = mac_read_clr4, [TPT] = mac_read_clr4, 1129 [ICR] = mac_icr_read, [EECD] = get_eecd, [EERD] = flash_eerd_read, 1130 [CRCERRS ... MPC] = &mac_readreg, 1131 [RA ... RA+31] = &mac_readreg, 1132 [MTA ... MTA+127] = &mac_readreg, 1133 [VFTA ... VFTA+127] = &mac_readreg, 1134 }; 1135 enum { NREADOPS = ARRAY_SIZE(macreg_readops) }; 1136 1137 #define putreg(x) [x] = mac_writereg 1138 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = { 1139 putreg(PBA), putreg(EERD), putreg(SWSM), putreg(WUFC), 1140 putreg(TDBAL), putreg(TDBAH), putreg(TXDCTL), putreg(RDBAH), 1141 putreg(RDBAL), putreg(LEDCTL), putreg(VET), 1142 [TDLEN] = set_dlen, [RDLEN] = set_dlen, [TCTL] = set_tctl, 1143 [TDT] = set_tctl, [MDIC] = set_mdic, [ICS] = set_ics, 1144 [TDH] = set_16bit, [RDH] = set_16bit, [RDT] = set_rdt, 1145 [IMC] = set_imc, [IMS] = set_ims, [ICR] = set_icr, 1146 [EECD] = set_eecd, [RCTL] = set_rx_control, [CTRL] = set_ctrl, 1147 [RDTR] = set_16bit, [RADV] = set_16bit, [TADV] = set_16bit, 1148 [ITR] = set_16bit, 1149 [RA ... RA+31] = &mac_writereg, 1150 [MTA ... MTA+127] = &mac_writereg, 1151 [VFTA ... VFTA+127] = &mac_writereg, 1152 }; 1153 1154 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) }; 1155 1156 static void 1157 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val, 1158 unsigned size) 1159 { 1160 E1000State *s = opaque; 1161 unsigned int index = (addr & 0x1ffff) >> 2; 1162 1163 if (index < NWRITEOPS && macreg_writeops[index]) { 1164 macreg_writeops[index](s, index, val); 1165 } else if (index < NREADOPS && macreg_readops[index]) { 1166 DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n", index<<2, val); 1167 } else { 1168 DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n", 1169 index<<2, val); 1170 } 1171 } 1172 1173 static uint64_t 1174 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size) 1175 { 1176 E1000State *s = opaque; 1177 unsigned int index = (addr & 0x1ffff) >> 2; 1178 1179 if (index < NREADOPS && macreg_readops[index]) 1180 { 1181 return macreg_readops[index](s, index); 1182 } 1183 DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2); 1184 return 0; 1185 } 1186 1187 static const MemoryRegionOps e1000_mmio_ops = { 1188 .read = e1000_mmio_read, 1189 .write = e1000_mmio_write, 1190 .endianness = DEVICE_LITTLE_ENDIAN, 1191 .impl = { 1192 .min_access_size = 4, 1193 .max_access_size = 4, 1194 }, 1195 }; 1196 1197 static uint64_t e1000_io_read(void *opaque, hwaddr addr, 1198 unsigned size) 1199 { 1200 E1000State *s = opaque; 1201 1202 (void)s; 1203 return 0; 1204 } 1205 1206 static void e1000_io_write(void *opaque, hwaddr addr, 1207 uint64_t val, unsigned size) 1208 { 1209 E1000State *s = opaque; 1210 1211 (void)s; 1212 } 1213 1214 static const MemoryRegionOps e1000_io_ops = { 1215 .read = e1000_io_read, 1216 .write = e1000_io_write, 1217 .endianness = DEVICE_LITTLE_ENDIAN, 1218 }; 1219 1220 static bool is_version_1(void *opaque, int version_id) 1221 { 1222 return version_id == 1; 1223 } 1224 1225 static void e1000_pre_save(void *opaque) 1226 { 1227 E1000State *s = opaque; 1228 NetClientState *nc = qemu_get_queue(s->nic); 1229 1230 /* If the mitigation timer is active, emulate a timeout now. */ 1231 if (s->mit_timer_on) { 1232 e1000_mit_timer(s); 1233 } 1234 1235 if (!(s->compat_flags & E1000_FLAG_AUTONEG)) { 1236 return; 1237 } 1238 1239 /* 1240 * If link is down and auto-negotiation is ongoing, complete 1241 * auto-negotiation immediately. This allows is to look at 1242 * MII_SR_AUTONEG_COMPLETE to infer link status on load. 1243 */ 1244 if (nc->link_down && 1245 s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN && 1246 s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG) { 1247 s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE; 1248 } 1249 } 1250 1251 static int e1000_post_load(void *opaque, int version_id) 1252 { 1253 E1000State *s = opaque; 1254 NetClientState *nc = qemu_get_queue(s->nic); 1255 1256 if (!(s->compat_flags & E1000_FLAG_MIT)) { 1257 s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] = 1258 s->mac_reg[TADV] = 0; 1259 s->mit_irq_level = false; 1260 } 1261 s->mit_ide = 0; 1262 s->mit_timer_on = false; 1263 1264 /* nc.link_down can't be migrated, so infer link_down according 1265 * to link status bit in mac_reg[STATUS]. 1266 * Alternatively, restart link negotiation if it was in progress. */ 1267 nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0; 1268 1269 if (!(s->compat_flags & E1000_FLAG_AUTONEG)) { 1270 return 0; 1271 } 1272 1273 if (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN && 1274 s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG && 1275 !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) { 1276 nc->link_down = false; 1277 timer_mod(s->autoneg_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500); 1278 } 1279 1280 return 0; 1281 } 1282 1283 static bool e1000_mit_state_needed(void *opaque) 1284 { 1285 E1000State *s = opaque; 1286 1287 return s->compat_flags & E1000_FLAG_MIT; 1288 } 1289 1290 static const VMStateDescription vmstate_e1000_mit_state = { 1291 .name = "e1000/mit_state", 1292 .version_id = 1, 1293 .minimum_version_id = 1, 1294 .minimum_version_id_old = 1, 1295 .fields = (VMStateField[]) { 1296 VMSTATE_UINT32(mac_reg[RDTR], E1000State), 1297 VMSTATE_UINT32(mac_reg[RADV], E1000State), 1298 VMSTATE_UINT32(mac_reg[TADV], E1000State), 1299 VMSTATE_UINT32(mac_reg[ITR], E1000State), 1300 VMSTATE_BOOL(mit_irq_level, E1000State), 1301 VMSTATE_END_OF_LIST() 1302 } 1303 }; 1304 1305 static const VMStateDescription vmstate_e1000 = { 1306 .name = "e1000", 1307 .version_id = 2, 1308 .minimum_version_id = 1, 1309 .minimum_version_id_old = 1, 1310 .pre_save = e1000_pre_save, 1311 .post_load = e1000_post_load, 1312 .fields = (VMStateField []) { 1313 VMSTATE_PCI_DEVICE(parent_obj, E1000State), 1314 VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */ 1315 VMSTATE_UNUSED(4), /* Was mmio_base. */ 1316 VMSTATE_UINT32(rxbuf_size, E1000State), 1317 VMSTATE_UINT32(rxbuf_min_shift, E1000State), 1318 VMSTATE_UINT32(eecd_state.val_in, E1000State), 1319 VMSTATE_UINT16(eecd_state.bitnum_in, E1000State), 1320 VMSTATE_UINT16(eecd_state.bitnum_out, E1000State), 1321 VMSTATE_UINT16(eecd_state.reading, E1000State), 1322 VMSTATE_UINT32(eecd_state.old_eecd, E1000State), 1323 VMSTATE_UINT8(tx.ipcss, E1000State), 1324 VMSTATE_UINT8(tx.ipcso, E1000State), 1325 VMSTATE_UINT16(tx.ipcse, E1000State), 1326 VMSTATE_UINT8(tx.tucss, E1000State), 1327 VMSTATE_UINT8(tx.tucso, E1000State), 1328 VMSTATE_UINT16(tx.tucse, E1000State), 1329 VMSTATE_UINT32(tx.paylen, E1000State), 1330 VMSTATE_UINT8(tx.hdr_len, E1000State), 1331 VMSTATE_UINT16(tx.mss, E1000State), 1332 VMSTATE_UINT16(tx.size, E1000State), 1333 VMSTATE_UINT16(tx.tso_frames, E1000State), 1334 VMSTATE_UINT8(tx.sum_needed, E1000State), 1335 VMSTATE_INT8(tx.ip, E1000State), 1336 VMSTATE_INT8(tx.tcp, E1000State), 1337 VMSTATE_BUFFER(tx.header, E1000State), 1338 VMSTATE_BUFFER(tx.data, E1000State), 1339 VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64), 1340 VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20), 1341 VMSTATE_UINT32(mac_reg[CTRL], E1000State), 1342 VMSTATE_UINT32(mac_reg[EECD], E1000State), 1343 VMSTATE_UINT32(mac_reg[EERD], E1000State), 1344 VMSTATE_UINT32(mac_reg[GPRC], E1000State), 1345 VMSTATE_UINT32(mac_reg[GPTC], E1000State), 1346 VMSTATE_UINT32(mac_reg[ICR], E1000State), 1347 VMSTATE_UINT32(mac_reg[ICS], E1000State), 1348 VMSTATE_UINT32(mac_reg[IMC], E1000State), 1349 VMSTATE_UINT32(mac_reg[IMS], E1000State), 1350 VMSTATE_UINT32(mac_reg[LEDCTL], E1000State), 1351 VMSTATE_UINT32(mac_reg[MANC], E1000State), 1352 VMSTATE_UINT32(mac_reg[MDIC], E1000State), 1353 VMSTATE_UINT32(mac_reg[MPC], E1000State), 1354 VMSTATE_UINT32(mac_reg[PBA], E1000State), 1355 VMSTATE_UINT32(mac_reg[RCTL], E1000State), 1356 VMSTATE_UINT32(mac_reg[RDBAH], E1000State), 1357 VMSTATE_UINT32(mac_reg[RDBAL], E1000State), 1358 VMSTATE_UINT32(mac_reg[RDH], E1000State), 1359 VMSTATE_UINT32(mac_reg[RDLEN], E1000State), 1360 VMSTATE_UINT32(mac_reg[RDT], E1000State), 1361 VMSTATE_UINT32(mac_reg[STATUS], E1000State), 1362 VMSTATE_UINT32(mac_reg[SWSM], E1000State), 1363 VMSTATE_UINT32(mac_reg[TCTL], E1000State), 1364 VMSTATE_UINT32(mac_reg[TDBAH], E1000State), 1365 VMSTATE_UINT32(mac_reg[TDBAL], E1000State), 1366 VMSTATE_UINT32(mac_reg[TDH], E1000State), 1367 VMSTATE_UINT32(mac_reg[TDLEN], E1000State), 1368 VMSTATE_UINT32(mac_reg[TDT], E1000State), 1369 VMSTATE_UINT32(mac_reg[TORH], E1000State), 1370 VMSTATE_UINT32(mac_reg[TORL], E1000State), 1371 VMSTATE_UINT32(mac_reg[TOTH], E1000State), 1372 VMSTATE_UINT32(mac_reg[TOTL], E1000State), 1373 VMSTATE_UINT32(mac_reg[TPR], E1000State), 1374 VMSTATE_UINT32(mac_reg[TPT], E1000State), 1375 VMSTATE_UINT32(mac_reg[TXDCTL], E1000State), 1376 VMSTATE_UINT32(mac_reg[WUFC], E1000State), 1377 VMSTATE_UINT32(mac_reg[VET], E1000State), 1378 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32), 1379 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128), 1380 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128), 1381 VMSTATE_END_OF_LIST() 1382 }, 1383 .subsections = (VMStateSubsection[]) { 1384 { 1385 .vmsd = &vmstate_e1000_mit_state, 1386 .needed = e1000_mit_state_needed, 1387 }, { 1388 /* empty */ 1389 } 1390 } 1391 }; 1392 1393 static const uint16_t e1000_eeprom_template[64] = { 1394 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0x0000, 0x0000, 0x0000, 1395 0x3000, 0x1000, 0x6403, E1000_DEVID, 0x8086, E1000_DEVID, 0x8086, 0x3040, 1396 0x0008, 0x2000, 0x7e14, 0x0048, 0x1000, 0x00d8, 0x0000, 0x2700, 1397 0x6cc9, 0x3150, 0x0722, 0x040b, 0x0984, 0x0000, 0xc000, 0x0706, 1398 0x1008, 0x0000, 0x0f04, 0x7fff, 0x4d01, 0xffff, 0xffff, 0xffff, 1399 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 1400 0x0100, 0x4000, 0x121c, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 1401 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 1402 }; 1403 1404 /* PCI interface */ 1405 1406 static void 1407 e1000_mmio_setup(E1000State *d) 1408 { 1409 int i; 1410 const uint32_t excluded_regs[] = { 1411 E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS, 1412 E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE 1413 }; 1414 1415 memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d, 1416 "e1000-mmio", PNPMMIO_SIZE); 1417 memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]); 1418 for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++) 1419 memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4, 1420 excluded_regs[i+1] - excluded_regs[i] - 4); 1421 memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE); 1422 } 1423 1424 static void 1425 e1000_cleanup(NetClientState *nc) 1426 { 1427 E1000State *s = qemu_get_nic_opaque(nc); 1428 1429 s->nic = NULL; 1430 } 1431 1432 static void 1433 pci_e1000_uninit(PCIDevice *dev) 1434 { 1435 E1000State *d = E1000(dev); 1436 1437 timer_del(d->autoneg_timer); 1438 timer_free(d->autoneg_timer); 1439 timer_del(d->mit_timer); 1440 timer_free(d->mit_timer); 1441 memory_region_destroy(&d->mmio); 1442 memory_region_destroy(&d->io); 1443 qemu_del_nic(d->nic); 1444 } 1445 1446 static NetClientInfo net_e1000_info = { 1447 .type = NET_CLIENT_OPTIONS_KIND_NIC, 1448 .size = sizeof(NICState), 1449 .can_receive = e1000_can_receive, 1450 .receive = e1000_receive, 1451 .cleanup = e1000_cleanup, 1452 .link_status_changed = e1000_set_link_status, 1453 }; 1454 1455 static int pci_e1000_init(PCIDevice *pci_dev) 1456 { 1457 DeviceState *dev = DEVICE(pci_dev); 1458 E1000State *d = E1000(pci_dev); 1459 uint8_t *pci_conf; 1460 uint16_t checksum = 0; 1461 int i; 1462 uint8_t *macaddr; 1463 1464 pci_conf = pci_dev->config; 1465 1466 /* TODO: RST# value should be 0, PCI spec 6.2.4 */ 1467 pci_conf[PCI_CACHE_LINE_SIZE] = 0x10; 1468 1469 pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */ 1470 1471 e1000_mmio_setup(d); 1472 1473 pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio); 1474 1475 pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io); 1476 1477 memmove(d->eeprom_data, e1000_eeprom_template, 1478 sizeof e1000_eeprom_template); 1479 qemu_macaddr_default_if_unset(&d->conf.macaddr); 1480 macaddr = d->conf.macaddr.a; 1481 for (i = 0; i < 3; i++) 1482 d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i]; 1483 for (i = 0; i < EEPROM_CHECKSUM_REG; i++) 1484 checksum += d->eeprom_data[i]; 1485 checksum = (uint16_t) EEPROM_SUM - checksum; 1486 d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum; 1487 1488 d->nic = qemu_new_nic(&net_e1000_info, &d->conf, 1489 object_get_typename(OBJECT(d)), dev->id, d); 1490 1491 qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr); 1492 1493 add_boot_device_path(d->conf.bootindex, dev, "/ethernet-phy@0"); 1494 1495 d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d); 1496 d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d); 1497 1498 return 0; 1499 } 1500 1501 static void qdev_e1000_reset(DeviceState *dev) 1502 { 1503 E1000State *d = E1000(dev); 1504 e1000_reset(d); 1505 } 1506 1507 static Property e1000_properties[] = { 1508 DEFINE_NIC_PROPERTIES(E1000State, conf), 1509 DEFINE_PROP_BIT("autonegotiation", E1000State, 1510 compat_flags, E1000_FLAG_AUTONEG_BIT, true), 1511 DEFINE_PROP_BIT("mitigation", E1000State, 1512 compat_flags, E1000_FLAG_MIT_BIT, true), 1513 DEFINE_PROP_END_OF_LIST(), 1514 }; 1515 1516 static void e1000_class_init(ObjectClass *klass, void *data) 1517 { 1518 DeviceClass *dc = DEVICE_CLASS(klass); 1519 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 1520 1521 k->init = pci_e1000_init; 1522 k->exit = pci_e1000_uninit; 1523 k->romfile = "efi-e1000.rom"; 1524 k->vendor_id = PCI_VENDOR_ID_INTEL; 1525 k->device_id = E1000_DEVID; 1526 k->revision = 0x03; 1527 k->class_id = PCI_CLASS_NETWORK_ETHERNET; 1528 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); 1529 dc->desc = "Intel Gigabit Ethernet"; 1530 dc->reset = qdev_e1000_reset; 1531 dc->vmsd = &vmstate_e1000; 1532 dc->props = e1000_properties; 1533 } 1534 1535 static const TypeInfo e1000_info = { 1536 .name = TYPE_E1000, 1537 .parent = TYPE_PCI_DEVICE, 1538 .instance_size = sizeof(E1000State), 1539 .class_init = e1000_class_init, 1540 }; 1541 1542 static void e1000_register_types(void) 1543 { 1544 type_register_static(&e1000_info); 1545 } 1546 1547 type_init(e1000_register_types) 1548