xref: /openbmc/qemu/hw/net/e1000.c (revision 5accc840)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "hw/hw.h"
29 #include "hw/pci/pci.h"
30 #include "net/net.h"
31 #include "net/checksum.h"
32 #include "hw/loader.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/dma.h"
35 
36 #include "e1000_regs.h"
37 
38 #define E1000_DEBUG
39 
40 #ifdef E1000_DEBUG
41 enum {
42     DEBUG_GENERAL,	DEBUG_IO,	DEBUG_MMIO,	DEBUG_INTERRUPT,
43     DEBUG_RX,		DEBUG_TX,	DEBUG_MDIC,	DEBUG_EEPROM,
44     DEBUG_UNKNOWN,	DEBUG_TXSUM,	DEBUG_TXERR,	DEBUG_RXERR,
45     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
46 };
47 #define DBGBIT(x)	(1<<DEBUG_##x)
48 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
49 
50 #define	DBGOUT(what, fmt, ...) do { \
51     if (debugflags & DBGBIT(what)) \
52         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
53     } while (0)
54 #else
55 #define	DBGOUT(what, fmt, ...) do {} while (0)
56 #endif
57 
58 #define IOPORT_SIZE       0x40
59 #define PNPMMIO_SIZE      0x20000
60 #define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
61 
62 /* this is the size past which hardware will drop packets when setting LPE=0 */
63 #define MAXIMUM_ETHERNET_VLAN_SIZE 1522
64 /* this is the size past which hardware will drop packets when setting LPE=1 */
65 #define MAXIMUM_ETHERNET_LPE_SIZE 16384
66 
67 /*
68  * HW models:
69  *  E1000_DEV_ID_82540EM works with Windows and Linux
70  *  E1000_DEV_ID_82573L OK with windoze and Linux 2.6.22,
71  *	appears to perform better than 82540EM, but breaks with Linux 2.6.18
72  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
73  *  Others never tested
74  */
75 enum { E1000_DEVID = E1000_DEV_ID_82540EM };
76 
77 /*
78  * May need to specify additional MAC-to-PHY entries --
79  * Intel's Windows driver refuses to initialize unless they match
80  */
81 enum {
82     PHY_ID2_INIT = E1000_DEVID == E1000_DEV_ID_82573L ?		0xcc2 :
83                    E1000_DEVID == E1000_DEV_ID_82544GC_COPPER ?	0xc30 :
84                    /* default to E1000_DEV_ID_82540EM */	0xc20
85 };
86 
87 typedef struct E1000State_st {
88     /*< private >*/
89     PCIDevice parent_obj;
90     /*< public >*/
91 
92     NICState *nic;
93     NICConf conf;
94     MemoryRegion mmio;
95     MemoryRegion io;
96 
97     uint32_t mac_reg[0x8000];
98     uint16_t phy_reg[0x20];
99     uint16_t eeprom_data[64];
100 
101     uint32_t rxbuf_size;
102     uint32_t rxbuf_min_shift;
103     struct e1000_tx {
104         unsigned char header[256];
105         unsigned char vlan_header[4];
106         /* Fields vlan and data must not be reordered or separated. */
107         unsigned char vlan[4];
108         unsigned char data[0x10000];
109         uint16_t size;
110         unsigned char sum_needed;
111         unsigned char vlan_needed;
112         uint8_t ipcss;
113         uint8_t ipcso;
114         uint16_t ipcse;
115         uint8_t tucss;
116         uint8_t tucso;
117         uint16_t tucse;
118         uint8_t hdr_len;
119         uint16_t mss;
120         uint32_t paylen;
121         uint16_t tso_frames;
122         char tse;
123         int8_t ip;
124         int8_t tcp;
125         char cptse;     // current packet tse bit
126     } tx;
127 
128     struct {
129         uint32_t val_in;	// shifted in from guest driver
130         uint16_t bitnum_in;
131         uint16_t bitnum_out;
132         uint16_t reading;
133         uint32_t old_eecd;
134     } eecd_state;
135 
136     QEMUTimer *autoneg_timer;
137 
138     QEMUTimer *mit_timer;      /* Mitigation timer. */
139     bool mit_timer_on;         /* Mitigation timer is running. */
140     bool mit_irq_level;        /* Tracks interrupt pin level. */
141     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
142 
143 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
144 #define E1000_FLAG_AUTONEG_BIT 0
145 #define E1000_FLAG_MIT_BIT 1
146 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
147 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
148     uint32_t compat_flags;
149 } E1000State;
150 
151 #define TYPE_E1000 "e1000"
152 
153 #define E1000(obj) \
154     OBJECT_CHECK(E1000State, (obj), TYPE_E1000)
155 
156 #define	defreg(x)	x = (E1000_##x>>2)
157 enum {
158     defreg(CTRL),	defreg(EECD),	defreg(EERD),	defreg(GPRC),
159     defreg(GPTC),	defreg(ICR),	defreg(ICS),	defreg(IMC),
160     defreg(IMS),	defreg(LEDCTL),	defreg(MANC),	defreg(MDIC),
161     defreg(MPC),	defreg(PBA),	defreg(RCTL),	defreg(RDBAH),
162     defreg(RDBAL),	defreg(RDH),	defreg(RDLEN),	defreg(RDT),
163     defreg(STATUS),	defreg(SWSM),	defreg(TCTL),	defreg(TDBAH),
164     defreg(TDBAL),	defreg(TDH),	defreg(TDLEN),	defreg(TDT),
165     defreg(TORH),	defreg(TORL),	defreg(TOTH),	defreg(TOTL),
166     defreg(TPR),	defreg(TPT),	defreg(TXDCTL),	defreg(WUFC),
167     defreg(RA),		defreg(MTA),	defreg(CRCERRS),defreg(VFTA),
168     defreg(VET),        defreg(RDTR),   defreg(RADV),   defreg(TADV),
169     defreg(ITR),
170 };
171 
172 static void
173 e1000_link_down(E1000State *s)
174 {
175     s->mac_reg[STATUS] &= ~E1000_STATUS_LU;
176     s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS;
177 }
178 
179 static void
180 e1000_link_up(E1000State *s)
181 {
182     s->mac_reg[STATUS] |= E1000_STATUS_LU;
183     s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
184 }
185 
186 static void
187 set_phy_ctrl(E1000State *s, int index, uint16_t val)
188 {
189     /*
190      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
191      * migrate during auto negotiation, after migration the link will be
192      * down.
193      */
194     if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
195         return;
196     }
197     if ((val & MII_CR_AUTO_NEG_EN) && (val & MII_CR_RESTART_AUTO_NEG)) {
198         e1000_link_down(s);
199         s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE;
200         DBGOUT(PHY, "Start link auto negotiation\n");
201         timer_mod(s->autoneg_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
202     }
203 }
204 
205 static void
206 e1000_autoneg_timer(void *opaque)
207 {
208     E1000State *s = opaque;
209     if (!qemu_get_queue(s->nic)->link_down) {
210         e1000_link_up(s);
211     }
212     s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
213     DBGOUT(PHY, "Auto negotiation is completed\n");
214 }
215 
216 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
217     [PHY_CTRL] = set_phy_ctrl,
218 };
219 
220 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
221 
222 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
223 static const char phy_regcap[0x20] = {
224     [PHY_STATUS] = PHY_R,	[M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
225     [PHY_ID1] = PHY_R,		[M88E1000_PHY_SPEC_CTRL] = PHY_RW,
226     [PHY_CTRL] = PHY_RW,	[PHY_1000T_CTRL] = PHY_RW,
227     [PHY_LP_ABILITY] = PHY_R,	[PHY_1000T_STATUS] = PHY_R,
228     [PHY_AUTONEG_ADV] = PHY_RW,	[M88E1000_RX_ERR_CNTR] = PHY_R,
229     [PHY_ID2] = PHY_R,		[M88E1000_PHY_SPEC_STATUS] = PHY_R
230 };
231 
232 static const uint16_t phy_reg_init[] = {
233     [PHY_CTRL] = 0x1140,
234     [PHY_STATUS] = 0x794d, /* link initially up with not completed autoneg */
235     [PHY_ID1] = 0x141,				[PHY_ID2] = PHY_ID2_INIT,
236     [PHY_1000T_CTRL] = 0x0e00,			[M88E1000_PHY_SPEC_CTRL] = 0x360,
237     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,	[PHY_AUTONEG_ADV] = 0xde1,
238     [PHY_LP_ABILITY] = 0x1e0,			[PHY_1000T_STATUS] = 0x3c00,
239     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
240 };
241 
242 static const uint32_t mac_reg_init[] = {
243     [PBA] =     0x00100030,
244     [LEDCTL] =  0x602,
245     [CTRL] =    E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
246                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
247     [STATUS] =  0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
248                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
249                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
250                 E1000_STATUS_LU,
251     [MANC] =    E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
252                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
253                 E1000_MANC_RMCP_EN,
254 };
255 
256 /* Helper function, *curr == 0 means the value is not set */
257 static inline void
258 mit_update_delay(uint32_t *curr, uint32_t value)
259 {
260     if (value && (*curr == 0 || value < *curr)) {
261         *curr = value;
262     }
263 }
264 
265 static void
266 set_interrupt_cause(E1000State *s, int index, uint32_t val)
267 {
268     PCIDevice *d = PCI_DEVICE(s);
269     uint32_t pending_ints;
270     uint32_t mit_delay;
271 
272     if (val && (E1000_DEVID >= E1000_DEV_ID_82547EI_MOBILE)) {
273         /* Only for 8257x */
274         val |= E1000_ICR_INT_ASSERTED;
275     }
276     s->mac_reg[ICR] = val;
277 
278     /*
279      * Make sure ICR and ICS registers have the same value.
280      * The spec says that the ICS register is write-only.  However in practice,
281      * on real hardware ICS is readable, and for reads it has the same value as
282      * ICR (except that ICS does not have the clear on read behaviour of ICR).
283      *
284      * The VxWorks PRO/1000 driver uses this behaviour.
285      */
286     s->mac_reg[ICS] = val;
287 
288     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
289     if (!s->mit_irq_level && pending_ints) {
290         /*
291          * Here we detect a potential raising edge. We postpone raising the
292          * interrupt line if we are inside the mitigation delay window
293          * (s->mit_timer_on == 1).
294          * We provide a partial implementation of interrupt mitigation,
295          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
296          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
297          * RADV; relative timers based on TIDV and RDTR are not implemented.
298          */
299         if (s->mit_timer_on) {
300             return;
301         }
302         if (s->compat_flags & E1000_FLAG_MIT) {
303             /* Compute the next mitigation delay according to pending
304              * interrupts and the current values of RADV (provided
305              * RDTR!=0), TADV and ITR.
306              * Then rearm the timer.
307              */
308             mit_delay = 0;
309             if (s->mit_ide &&
310                     (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
311                 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
312             }
313             if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
314                 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
315             }
316             mit_update_delay(&mit_delay, s->mac_reg[ITR]);
317 
318             if (mit_delay) {
319                 s->mit_timer_on = 1;
320                 timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
321                           mit_delay * 256);
322             }
323             s->mit_ide = 0;
324         }
325     }
326 
327     s->mit_irq_level = (pending_ints != 0);
328     qemu_set_irq(d->irq[0], s->mit_irq_level);
329 }
330 
331 static void
332 e1000_mit_timer(void *opaque)
333 {
334     E1000State *s = opaque;
335 
336     s->mit_timer_on = 0;
337     /* Call set_interrupt_cause to update the irq level (if necessary). */
338     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
339 }
340 
341 static void
342 set_ics(E1000State *s, int index, uint32_t val)
343 {
344     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
345         s->mac_reg[IMS]);
346     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
347 }
348 
349 static int
350 rxbufsize(uint32_t v)
351 {
352     v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 |
353          E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 |
354          E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256;
355     switch (v) {
356     case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384:
357         return 16384;
358     case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192:
359         return 8192;
360     case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096:
361         return 4096;
362     case E1000_RCTL_SZ_1024:
363         return 1024;
364     case E1000_RCTL_SZ_512:
365         return 512;
366     case E1000_RCTL_SZ_256:
367         return 256;
368     }
369     return 2048;
370 }
371 
372 static void e1000_reset(void *opaque)
373 {
374     E1000State *d = opaque;
375     uint8_t *macaddr = d->conf.macaddr.a;
376     int i;
377 
378     timer_del(d->autoneg_timer);
379     timer_del(d->mit_timer);
380     d->mit_timer_on = 0;
381     d->mit_irq_level = 0;
382     d->mit_ide = 0;
383     memset(d->phy_reg, 0, sizeof d->phy_reg);
384     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
385     memset(d->mac_reg, 0, sizeof d->mac_reg);
386     memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
387     d->rxbuf_min_shift = 1;
388     memset(&d->tx, 0, sizeof d->tx);
389 
390     if (qemu_get_queue(d->nic)->link_down) {
391         e1000_link_down(d);
392     }
393 
394     /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */
395     d->mac_reg[RA] = 0;
396     d->mac_reg[RA + 1] = E1000_RAH_AV;
397     for (i = 0; i < 4; i++) {
398         d->mac_reg[RA] |= macaddr[i] << (8 * i);
399         d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0;
400     }
401 }
402 
403 static void
404 set_ctrl(E1000State *s, int index, uint32_t val)
405 {
406     /* RST is self clearing */
407     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
408 }
409 
410 static void
411 set_rx_control(E1000State *s, int index, uint32_t val)
412 {
413     s->mac_reg[RCTL] = val;
414     s->rxbuf_size = rxbufsize(val);
415     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
416     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
417            s->mac_reg[RCTL]);
418     qemu_flush_queued_packets(qemu_get_queue(s->nic));
419 }
420 
421 static void
422 set_mdic(E1000State *s, int index, uint32_t val)
423 {
424     uint32_t data = val & E1000_MDIC_DATA_MASK;
425     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
426 
427     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
428         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
429     else if (val & E1000_MDIC_OP_READ) {
430         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
431         if (!(phy_regcap[addr] & PHY_R)) {
432             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
433             val |= E1000_MDIC_ERROR;
434         } else
435             val = (val ^ data) | s->phy_reg[addr];
436     } else if (val & E1000_MDIC_OP_WRITE) {
437         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
438         if (!(phy_regcap[addr] & PHY_W)) {
439             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
440             val |= E1000_MDIC_ERROR;
441         } else {
442             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
443                 phyreg_writeops[addr](s, index, data);
444             }
445             s->phy_reg[addr] = data;
446         }
447     }
448     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
449 
450     if (val & E1000_MDIC_INT_EN) {
451         set_ics(s, 0, E1000_ICR_MDAC);
452     }
453 }
454 
455 static uint32_t
456 get_eecd(E1000State *s, int index)
457 {
458     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
459 
460     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
461            s->eecd_state.bitnum_out, s->eecd_state.reading);
462     if (!s->eecd_state.reading ||
463         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
464           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
465         ret |= E1000_EECD_DO;
466     return ret;
467 }
468 
469 static void
470 set_eecd(E1000State *s, int index, uint32_t val)
471 {
472     uint32_t oldval = s->eecd_state.old_eecd;
473 
474     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
475             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
476     if (!(E1000_EECD_CS & val))			// CS inactive; nothing to do
477 	return;
478     if (E1000_EECD_CS & (val ^ oldval)) {	// CS rise edge; reset state
479 	s->eecd_state.val_in = 0;
480 	s->eecd_state.bitnum_in = 0;
481 	s->eecd_state.bitnum_out = 0;
482 	s->eecd_state.reading = 0;
483     }
484     if (!(E1000_EECD_SK & (val ^ oldval)))	// no clock edge
485         return;
486     if (!(E1000_EECD_SK & val)) {		// falling edge
487         s->eecd_state.bitnum_out++;
488         return;
489     }
490     s->eecd_state.val_in <<= 1;
491     if (val & E1000_EECD_DI)
492         s->eecd_state.val_in |= 1;
493     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
494         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
495         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
496             EEPROM_READ_OPCODE_MICROWIRE);
497     }
498     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
499            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
500            s->eecd_state.reading);
501 }
502 
503 static uint32_t
504 flash_eerd_read(E1000State *s, int x)
505 {
506     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
507 
508     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
509         return (s->mac_reg[EERD]);
510 
511     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
512         return (E1000_EEPROM_RW_REG_DONE | r);
513 
514     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
515            E1000_EEPROM_RW_REG_DONE | r);
516 }
517 
518 static void
519 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
520 {
521     uint32_t sum;
522 
523     if (cse && cse < n)
524         n = cse + 1;
525     if (sloc < n-1) {
526         sum = net_checksum_add(n-css, data+css);
527         cpu_to_be16wu((uint16_t *)(data + sloc),
528                       net_checksum_finish(sum));
529     }
530 }
531 
532 static inline int
533 vlan_enabled(E1000State *s)
534 {
535     return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0);
536 }
537 
538 static inline int
539 vlan_rx_filter_enabled(E1000State *s)
540 {
541     return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0);
542 }
543 
544 static inline int
545 is_vlan_packet(E1000State *s, const uint8_t *buf)
546 {
547     return (be16_to_cpup((uint16_t *)(buf + 12)) ==
548                 le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
549 }
550 
551 static inline int
552 is_vlan_txd(uint32_t txd_lower)
553 {
554     return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
555 }
556 
557 /* FCS aka Ethernet CRC-32. We don't get it from backends and can't
558  * fill it in, just pad descriptor length by 4 bytes unless guest
559  * told us to strip it off the packet. */
560 static inline int
561 fcs_len(E1000State *s)
562 {
563     return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
564 }
565 
566 static void
567 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
568 {
569     NetClientState *nc = qemu_get_queue(s->nic);
570     if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
571         nc->info->receive(nc, buf, size);
572     } else {
573         qemu_send_packet(nc, buf, size);
574     }
575 }
576 
577 static void
578 xmit_seg(E1000State *s)
579 {
580     uint16_t len, *sp;
581     unsigned int frames = s->tx.tso_frames, css, sofar, n;
582     struct e1000_tx *tp = &s->tx;
583 
584     if (tp->tse && tp->cptse) {
585         css = tp->ipcss;
586         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
587                frames, tp->size, css);
588         if (tp->ip) {		// IPv4
589             cpu_to_be16wu((uint16_t *)(tp->data+css+2),
590                           tp->size - css);
591             cpu_to_be16wu((uint16_t *)(tp->data+css+4),
592                           be16_to_cpup((uint16_t *)(tp->data+css+4))+frames);
593         } else			// IPv6
594             cpu_to_be16wu((uint16_t *)(tp->data+css+4),
595                           tp->size - css);
596         css = tp->tucss;
597         len = tp->size - css;
598         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len);
599         if (tp->tcp) {
600             sofar = frames * tp->mss;
601             cpu_to_be32wu((uint32_t *)(tp->data+css+4),	// seq
602                 be32_to_cpupu((uint32_t *)(tp->data+css+4))+sofar);
603             if (tp->paylen - sofar > tp->mss)
604                 tp->data[css + 13] &= ~9;		// PSH, FIN
605         } else	// UDP
606             cpu_to_be16wu((uint16_t *)(tp->data+css+4), len);
607         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
608             unsigned int phsum;
609             // add pseudo-header length before checksum calculation
610             sp = (uint16_t *)(tp->data + tp->tucso);
611             phsum = be16_to_cpup(sp) + len;
612             phsum = (phsum >> 16) + (phsum & 0xffff);
613             cpu_to_be16wu(sp, phsum);
614         }
615         tp->tso_frames++;
616     }
617 
618     if (tp->sum_needed & E1000_TXD_POPTS_TXSM)
619         putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse);
620     if (tp->sum_needed & E1000_TXD_POPTS_IXSM)
621         putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse);
622     if (tp->vlan_needed) {
623         memmove(tp->vlan, tp->data, 4);
624         memmove(tp->data, tp->data + 4, 8);
625         memcpy(tp->data + 8, tp->vlan_header, 4);
626         e1000_send_packet(s, tp->vlan, tp->size + 4);
627     } else
628         e1000_send_packet(s, tp->data, tp->size);
629     s->mac_reg[TPT]++;
630     s->mac_reg[GPTC]++;
631     n = s->mac_reg[TOTL];
632     if ((s->mac_reg[TOTL] += s->tx.size) < n)
633         s->mac_reg[TOTH]++;
634 }
635 
636 static void
637 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
638 {
639     PCIDevice *d = PCI_DEVICE(s);
640     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
641     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
642     unsigned int split_size = txd_lower & 0xffff, bytes, sz, op;
643     unsigned int msh = 0xfffff;
644     uint64_t addr;
645     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
646     struct e1000_tx *tp = &s->tx;
647 
648     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
649     if (dtype == E1000_TXD_CMD_DEXT) {	// context descriptor
650         op = le32_to_cpu(xp->cmd_and_length);
651         tp->ipcss = xp->lower_setup.ip_fields.ipcss;
652         tp->ipcso = xp->lower_setup.ip_fields.ipcso;
653         tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse);
654         tp->tucss = xp->upper_setup.tcp_fields.tucss;
655         tp->tucso = xp->upper_setup.tcp_fields.tucso;
656         tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse);
657         tp->paylen = op & 0xfffff;
658         tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len;
659         tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss);
660         tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0;
661         tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
662         tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
663         tp->tso_frames = 0;
664         if (tp->tucso == 0) {	// this is probably wrong
665             DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
666             tp->tucso = tp->tucss + (tp->tcp ? 16 : 6);
667         }
668         return;
669     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
670         // data descriptor
671         if (tp->size == 0) {
672             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
673         }
674         tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0;
675     } else {
676         // legacy descriptor
677         tp->cptse = 0;
678     }
679 
680     if (vlan_enabled(s) && is_vlan_txd(txd_lower) &&
681         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
682         tp->vlan_needed = 1;
683         cpu_to_be16wu((uint16_t *)(tp->vlan_header),
684                       le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
685         cpu_to_be16wu((uint16_t *)(tp->vlan_header + 2),
686                       le16_to_cpu(dp->upper.fields.special));
687     }
688 
689     addr = le64_to_cpu(dp->buffer_addr);
690     if (tp->tse && tp->cptse) {
691         msh = tp->hdr_len + tp->mss;
692         do {
693             bytes = split_size;
694             if (tp->size + bytes > msh)
695                 bytes = msh - tp->size;
696 
697             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
698             pci_dma_read(d, addr, tp->data + tp->size, bytes);
699             sz = tp->size + bytes;
700             if (sz >= tp->hdr_len && tp->size < tp->hdr_len) {
701                 memmove(tp->header, tp->data, tp->hdr_len);
702             }
703             tp->size = sz;
704             addr += bytes;
705             if (sz == msh) {
706                 xmit_seg(s);
707                 memmove(tp->data, tp->header, tp->hdr_len);
708                 tp->size = tp->hdr_len;
709             }
710         } while (split_size -= bytes);
711     } else if (!tp->tse && tp->cptse) {
712         // context descriptor TSE is not set, while data descriptor TSE is set
713         DBGOUT(TXERR, "TCP segmentation error\n");
714     } else {
715         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
716         pci_dma_read(d, addr, tp->data + tp->size, split_size);
717         tp->size += split_size;
718     }
719 
720     if (!(txd_lower & E1000_TXD_CMD_EOP))
721         return;
722     if (!(tp->tse && tp->cptse && tp->size < tp->hdr_len)) {
723         xmit_seg(s);
724     }
725     tp->tso_frames = 0;
726     tp->sum_needed = 0;
727     tp->vlan_needed = 0;
728     tp->size = 0;
729     tp->cptse = 0;
730 }
731 
732 static uint32_t
733 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
734 {
735     PCIDevice *d = PCI_DEVICE(s);
736     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
737 
738     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
739         return 0;
740     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
741                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
742     dp->upper.data = cpu_to_le32(txd_upper);
743     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
744                   &dp->upper, sizeof(dp->upper));
745     return E1000_ICR_TXDW;
746 }
747 
748 static uint64_t tx_desc_base(E1000State *s)
749 {
750     uint64_t bah = s->mac_reg[TDBAH];
751     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
752 
753     return (bah << 32) + bal;
754 }
755 
756 static void
757 start_xmit(E1000State *s)
758 {
759     PCIDevice *d = PCI_DEVICE(s);
760     dma_addr_t base;
761     struct e1000_tx_desc desc;
762     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
763 
764     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
765         DBGOUT(TX, "tx disabled\n");
766         return;
767     }
768 
769     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
770         base = tx_desc_base(s) +
771                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
772         pci_dma_read(d, base, &desc, sizeof(desc));
773 
774         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
775                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
776                desc.upper.data);
777 
778         process_tx_desc(s, &desc);
779         cause |= txdesc_writeback(s, base, &desc);
780 
781         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
782             s->mac_reg[TDH] = 0;
783         /*
784          * the following could happen only if guest sw assigns
785          * bogus values to TDT/TDLEN.
786          * there's nothing too intelligent we could do about this.
787          */
788         if (s->mac_reg[TDH] == tdh_start) {
789             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
790                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
791             break;
792         }
793     }
794     set_ics(s, 0, cause);
795 }
796 
797 static int
798 receive_filter(E1000State *s, const uint8_t *buf, int size)
799 {
800     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
801     static const int mta_shift[] = {4, 3, 2, 0};
802     uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp;
803 
804     if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) {
805         uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14));
806         uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) +
807                                      ((vid >> 5) & 0x7f));
808         if ((vfta & (1 << (vid & 0x1f))) == 0)
809             return 0;
810     }
811 
812     if (rctl & E1000_RCTL_UPE)			// promiscuous
813         return 1;
814 
815     if ((buf[0] & 1) && (rctl & E1000_RCTL_MPE))	// promiscuous mcast
816         return 1;
817 
818     if ((rctl & E1000_RCTL_BAM) && !memcmp(buf, bcast, sizeof bcast))
819         return 1;
820 
821     for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) {
822         if (!(rp[1] & E1000_RAH_AV))
823             continue;
824         ra[0] = cpu_to_le32(rp[0]);
825         ra[1] = cpu_to_le32(rp[1]);
826         if (!memcmp(buf, (uint8_t *)ra, 6)) {
827             DBGOUT(RXFILTER,
828                    "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n",
829                    (int)(rp - s->mac_reg - RA)/2,
830                    buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
831             return 1;
832         }
833     }
834     DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n",
835            buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
836 
837     f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
838     f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
839     if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f)))
840         return 1;
841     DBGOUT(RXFILTER,
842            "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n",
843            buf[0], buf[1], buf[2], buf[3], buf[4], buf[5],
844            (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5,
845            s->mac_reg[MTA + (f >> 5)]);
846 
847     return 0;
848 }
849 
850 static void
851 e1000_set_link_status(NetClientState *nc)
852 {
853     E1000State *s = qemu_get_nic_opaque(nc);
854     uint32_t old_status = s->mac_reg[STATUS];
855 
856     if (nc->link_down) {
857         e1000_link_down(s);
858     } else {
859         e1000_link_up(s);
860     }
861 
862     if (s->mac_reg[STATUS] != old_status)
863         set_ics(s, 0, E1000_ICR_LSC);
864 }
865 
866 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
867 {
868     int bufs;
869     /* Fast-path short packets */
870     if (total_size <= s->rxbuf_size) {
871         return s->mac_reg[RDH] != s->mac_reg[RDT];
872     }
873     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
874         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
875     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
876         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
877             s->mac_reg[RDT] - s->mac_reg[RDH];
878     } else {
879         return false;
880     }
881     return total_size <= bufs * s->rxbuf_size;
882 }
883 
884 static int
885 e1000_can_receive(NetClientState *nc)
886 {
887     E1000State *s = qemu_get_nic_opaque(nc);
888 
889     return (s->mac_reg[STATUS] & E1000_STATUS_LU) &&
890         (s->mac_reg[RCTL] & E1000_RCTL_EN) && e1000_has_rxbufs(s, 1);
891 }
892 
893 static uint64_t rx_desc_base(E1000State *s)
894 {
895     uint64_t bah = s->mac_reg[RDBAH];
896     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
897 
898     return (bah << 32) + bal;
899 }
900 
901 static ssize_t
902 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
903 {
904     E1000State *s = qemu_get_nic_opaque(nc);
905     PCIDevice *d = PCI_DEVICE(s);
906     struct e1000_rx_desc desc;
907     dma_addr_t base;
908     unsigned int n, rdt;
909     uint32_t rdh_start;
910     uint16_t vlan_special = 0;
911     uint8_t vlan_status = 0, vlan_offset = 0;
912     uint8_t min_buf[MIN_BUF_SIZE];
913     size_t desc_offset;
914     size_t desc_size;
915     size_t total_size;
916 
917     if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) {
918         return -1;
919     }
920 
921     if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) {
922         return -1;
923     }
924 
925     /* Pad to minimum Ethernet frame length */
926     if (size < sizeof(min_buf)) {
927         memcpy(min_buf, buf, size);
928         memset(&min_buf[size], 0, sizeof(min_buf) - size);
929         buf = min_buf;
930         size = sizeof(min_buf);
931     }
932 
933     /* Discard oversized packets if !LPE and !SBP. */
934     if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
935         (size > MAXIMUM_ETHERNET_VLAN_SIZE
936         && !(s->mac_reg[RCTL] & E1000_RCTL_LPE)))
937         && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) {
938         return size;
939     }
940 
941     if (!receive_filter(s, buf, size))
942         return size;
943 
944     if (vlan_enabled(s) && is_vlan_packet(s, buf)) {
945         vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(buf + 14)));
946         memmove((uint8_t *)buf + 4, buf, 12);
947         vlan_status = E1000_RXD_STAT_VP;
948         vlan_offset = 4;
949         size -= 4;
950     }
951 
952     rdh_start = s->mac_reg[RDH];
953     desc_offset = 0;
954     total_size = size + fcs_len(s);
955     if (!e1000_has_rxbufs(s, total_size)) {
956             set_ics(s, 0, E1000_ICS_RXO);
957             return -1;
958     }
959     do {
960         desc_size = total_size - desc_offset;
961         if (desc_size > s->rxbuf_size) {
962             desc_size = s->rxbuf_size;
963         }
964         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
965         pci_dma_read(d, base, &desc, sizeof(desc));
966         desc.special = vlan_special;
967         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
968         if (desc.buffer_addr) {
969             if (desc_offset < size) {
970                 size_t copy_size = size - desc_offset;
971                 if (copy_size > s->rxbuf_size) {
972                     copy_size = s->rxbuf_size;
973                 }
974                 pci_dma_write(d, le64_to_cpu(desc.buffer_addr),
975                               buf + desc_offset + vlan_offset, copy_size);
976             }
977             desc_offset += desc_size;
978             desc.length = cpu_to_le16(desc_size);
979             if (desc_offset >= total_size) {
980                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
981             } else {
982                 /* Guest zeroing out status is not a hardware requirement.
983                    Clear EOP in case guest didn't do it. */
984                 desc.status &= ~E1000_RXD_STAT_EOP;
985             }
986         } else { // as per intel docs; skip descriptors with null buf addr
987             DBGOUT(RX, "Null RX descriptor!!\n");
988         }
989         pci_dma_write(d, base, &desc, sizeof(desc));
990 
991         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
992             s->mac_reg[RDH] = 0;
993         /* see comment in start_xmit; same here */
994         if (s->mac_reg[RDH] == rdh_start) {
995             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
996                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
997             set_ics(s, 0, E1000_ICS_RXO);
998             return -1;
999         }
1000     } while (desc_offset < total_size);
1001 
1002     s->mac_reg[GPRC]++;
1003     s->mac_reg[TPR]++;
1004     /* TOR - Total Octets Received:
1005      * This register includes bytes received in a packet from the <Destination
1006      * Address> field through the <CRC> field, inclusively.
1007      */
1008     n = s->mac_reg[TORL] + size + /* Always include FCS length. */ 4;
1009     if (n < s->mac_reg[TORL])
1010         s->mac_reg[TORH]++;
1011     s->mac_reg[TORL] = n;
1012 
1013     n = E1000_ICS_RXT0;
1014     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1015         rdt += s->mac_reg[RDLEN] / sizeof(desc);
1016     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1017         s->rxbuf_min_shift)
1018         n |= E1000_ICS_RXDMT0;
1019 
1020     set_ics(s, 0, n);
1021 
1022     return size;
1023 }
1024 
1025 static uint32_t
1026 mac_readreg(E1000State *s, int index)
1027 {
1028     return s->mac_reg[index];
1029 }
1030 
1031 static uint32_t
1032 mac_icr_read(E1000State *s, int index)
1033 {
1034     uint32_t ret = s->mac_reg[ICR];
1035 
1036     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1037     set_interrupt_cause(s, 0, 0);
1038     return ret;
1039 }
1040 
1041 static uint32_t
1042 mac_read_clr4(E1000State *s, int index)
1043 {
1044     uint32_t ret = s->mac_reg[index];
1045 
1046     s->mac_reg[index] = 0;
1047     return ret;
1048 }
1049 
1050 static uint32_t
1051 mac_read_clr8(E1000State *s, int index)
1052 {
1053     uint32_t ret = s->mac_reg[index];
1054 
1055     s->mac_reg[index] = 0;
1056     s->mac_reg[index-1] = 0;
1057     return ret;
1058 }
1059 
1060 static void
1061 mac_writereg(E1000State *s, int index, uint32_t val)
1062 {
1063     s->mac_reg[index] = val;
1064 }
1065 
1066 static void
1067 set_rdt(E1000State *s, int index, uint32_t val)
1068 {
1069     s->mac_reg[index] = val & 0xffff;
1070     if (e1000_has_rxbufs(s, 1)) {
1071         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1072     }
1073 }
1074 
1075 static void
1076 set_16bit(E1000State *s, int index, uint32_t val)
1077 {
1078     s->mac_reg[index] = val & 0xffff;
1079 }
1080 
1081 static void
1082 set_dlen(E1000State *s, int index, uint32_t val)
1083 {
1084     s->mac_reg[index] = val & 0xfff80;
1085 }
1086 
1087 static void
1088 set_tctl(E1000State *s, int index, uint32_t val)
1089 {
1090     s->mac_reg[index] = val;
1091     s->mac_reg[TDT] &= 0xffff;
1092     start_xmit(s);
1093 }
1094 
1095 static void
1096 set_icr(E1000State *s, int index, uint32_t val)
1097 {
1098     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1099     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1100 }
1101 
1102 static void
1103 set_imc(E1000State *s, int index, uint32_t val)
1104 {
1105     s->mac_reg[IMS] &= ~val;
1106     set_ics(s, 0, 0);
1107 }
1108 
1109 static void
1110 set_ims(E1000State *s, int index, uint32_t val)
1111 {
1112     s->mac_reg[IMS] |= val;
1113     set_ics(s, 0, 0);
1114 }
1115 
1116 #define getreg(x)	[x] = mac_readreg
1117 static uint32_t (*macreg_readops[])(E1000State *, int) = {
1118     getreg(PBA),	getreg(RCTL),	getreg(TDH),	getreg(TXDCTL),
1119     getreg(WUFC),	getreg(TDT),	getreg(CTRL),	getreg(LEDCTL),
1120     getreg(MANC),	getreg(MDIC),	getreg(SWSM),	getreg(STATUS),
1121     getreg(TORL),	getreg(TOTL),	getreg(IMS),	getreg(TCTL),
1122     getreg(RDH),	getreg(RDT),	getreg(VET),	getreg(ICS),
1123     getreg(TDBAL),	getreg(TDBAH),	getreg(RDBAH),	getreg(RDBAL),
1124     getreg(TDLEN),      getreg(RDLEN),  getreg(RDTR),   getreg(RADV),
1125     getreg(TADV),       getreg(ITR),
1126 
1127     [TOTH] = mac_read_clr8,	[TORH] = mac_read_clr8,	[GPRC] = mac_read_clr4,
1128     [GPTC] = mac_read_clr4,	[TPR] = mac_read_clr4,	[TPT] = mac_read_clr4,
1129     [ICR] = mac_icr_read,	[EECD] = get_eecd,	[EERD] = flash_eerd_read,
1130     [CRCERRS ... MPC] = &mac_readreg,
1131     [RA ... RA+31] = &mac_readreg,
1132     [MTA ... MTA+127] = &mac_readreg,
1133     [VFTA ... VFTA+127] = &mac_readreg,
1134 };
1135 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1136 
1137 #define putreg(x)	[x] = mac_writereg
1138 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1139     putreg(PBA),	putreg(EERD),	putreg(SWSM),	putreg(WUFC),
1140     putreg(TDBAL),	putreg(TDBAH),	putreg(TXDCTL),	putreg(RDBAH),
1141     putreg(RDBAL),	putreg(LEDCTL), putreg(VET),
1142     [TDLEN] = set_dlen,	[RDLEN] = set_dlen,	[TCTL] = set_tctl,
1143     [TDT] = set_tctl,	[MDIC] = set_mdic,	[ICS] = set_ics,
1144     [TDH] = set_16bit,	[RDH] = set_16bit,	[RDT] = set_rdt,
1145     [IMC] = set_imc,	[IMS] = set_ims,	[ICR] = set_icr,
1146     [EECD] = set_eecd,	[RCTL] = set_rx_control, [CTRL] = set_ctrl,
1147     [RDTR] = set_16bit, [RADV] = set_16bit,     [TADV] = set_16bit,
1148     [ITR] = set_16bit,
1149     [RA ... RA+31] = &mac_writereg,
1150     [MTA ... MTA+127] = &mac_writereg,
1151     [VFTA ... VFTA+127] = &mac_writereg,
1152 };
1153 
1154 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1155 
1156 static void
1157 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1158                  unsigned size)
1159 {
1160     E1000State *s = opaque;
1161     unsigned int index = (addr & 0x1ffff) >> 2;
1162 
1163     if (index < NWRITEOPS && macreg_writeops[index]) {
1164         macreg_writeops[index](s, index, val);
1165     } else if (index < NREADOPS && macreg_readops[index]) {
1166         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n", index<<2, val);
1167     } else {
1168         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1169                index<<2, val);
1170     }
1171 }
1172 
1173 static uint64_t
1174 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1175 {
1176     E1000State *s = opaque;
1177     unsigned int index = (addr & 0x1ffff) >> 2;
1178 
1179     if (index < NREADOPS && macreg_readops[index])
1180     {
1181         return macreg_readops[index](s, index);
1182     }
1183     DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1184     return 0;
1185 }
1186 
1187 static const MemoryRegionOps e1000_mmio_ops = {
1188     .read = e1000_mmio_read,
1189     .write = e1000_mmio_write,
1190     .endianness = DEVICE_LITTLE_ENDIAN,
1191     .impl = {
1192         .min_access_size = 4,
1193         .max_access_size = 4,
1194     },
1195 };
1196 
1197 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1198                               unsigned size)
1199 {
1200     E1000State *s = opaque;
1201 
1202     (void)s;
1203     return 0;
1204 }
1205 
1206 static void e1000_io_write(void *opaque, hwaddr addr,
1207                            uint64_t val, unsigned size)
1208 {
1209     E1000State *s = opaque;
1210 
1211     (void)s;
1212 }
1213 
1214 static const MemoryRegionOps e1000_io_ops = {
1215     .read = e1000_io_read,
1216     .write = e1000_io_write,
1217     .endianness = DEVICE_LITTLE_ENDIAN,
1218 };
1219 
1220 static bool is_version_1(void *opaque, int version_id)
1221 {
1222     return version_id == 1;
1223 }
1224 
1225 static void e1000_pre_save(void *opaque)
1226 {
1227     E1000State *s = opaque;
1228     NetClientState *nc = qemu_get_queue(s->nic);
1229 
1230     /* If the mitigation timer is active, emulate a timeout now. */
1231     if (s->mit_timer_on) {
1232         e1000_mit_timer(s);
1233     }
1234 
1235     if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
1236         return;
1237     }
1238 
1239     /*
1240      * If link is down and auto-negotiation is ongoing, complete
1241      * auto-negotiation immediately.  This allows is to look at
1242      * MII_SR_AUTONEG_COMPLETE to infer link status on load.
1243      */
1244     if (nc->link_down &&
1245         s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN &&
1246         s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG) {
1247          s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1248     }
1249 }
1250 
1251 static int e1000_post_load(void *opaque, int version_id)
1252 {
1253     E1000State *s = opaque;
1254     NetClientState *nc = qemu_get_queue(s->nic);
1255 
1256     if (!(s->compat_flags & E1000_FLAG_MIT)) {
1257         s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1258             s->mac_reg[TADV] = 0;
1259         s->mit_irq_level = false;
1260     }
1261     s->mit_ide = 0;
1262     s->mit_timer_on = false;
1263 
1264     /* nc.link_down can't be migrated, so infer link_down according
1265      * to link status bit in mac_reg[STATUS].
1266      * Alternatively, restart link negotiation if it was in progress. */
1267     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1268 
1269     if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
1270         return 0;
1271     }
1272 
1273     if (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN &&
1274         s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG &&
1275         !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1276         nc->link_down = false;
1277         timer_mod(s->autoneg_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1278     }
1279 
1280     return 0;
1281 }
1282 
1283 static bool e1000_mit_state_needed(void *opaque)
1284 {
1285     E1000State *s = opaque;
1286 
1287     return s->compat_flags & E1000_FLAG_MIT;
1288 }
1289 
1290 static const VMStateDescription vmstate_e1000_mit_state = {
1291     .name = "e1000/mit_state",
1292     .version_id = 1,
1293     .minimum_version_id = 1,
1294     .minimum_version_id_old = 1,
1295     .fields    = (VMStateField[]) {
1296         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1297         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1298         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1299         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1300         VMSTATE_BOOL(mit_irq_level, E1000State),
1301         VMSTATE_END_OF_LIST()
1302     }
1303 };
1304 
1305 static const VMStateDescription vmstate_e1000 = {
1306     .name = "e1000",
1307     .version_id = 2,
1308     .minimum_version_id = 1,
1309     .minimum_version_id_old = 1,
1310     .pre_save = e1000_pre_save,
1311     .post_load = e1000_post_load,
1312     .fields      = (VMStateField []) {
1313         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1314         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1315         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1316         VMSTATE_UINT32(rxbuf_size, E1000State),
1317         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1318         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1319         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1320         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1321         VMSTATE_UINT16(eecd_state.reading, E1000State),
1322         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1323         VMSTATE_UINT8(tx.ipcss, E1000State),
1324         VMSTATE_UINT8(tx.ipcso, E1000State),
1325         VMSTATE_UINT16(tx.ipcse, E1000State),
1326         VMSTATE_UINT8(tx.tucss, E1000State),
1327         VMSTATE_UINT8(tx.tucso, E1000State),
1328         VMSTATE_UINT16(tx.tucse, E1000State),
1329         VMSTATE_UINT32(tx.paylen, E1000State),
1330         VMSTATE_UINT8(tx.hdr_len, E1000State),
1331         VMSTATE_UINT16(tx.mss, E1000State),
1332         VMSTATE_UINT16(tx.size, E1000State),
1333         VMSTATE_UINT16(tx.tso_frames, E1000State),
1334         VMSTATE_UINT8(tx.sum_needed, E1000State),
1335         VMSTATE_INT8(tx.ip, E1000State),
1336         VMSTATE_INT8(tx.tcp, E1000State),
1337         VMSTATE_BUFFER(tx.header, E1000State),
1338         VMSTATE_BUFFER(tx.data, E1000State),
1339         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1340         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1341         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1342         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1343         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1344         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1345         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1346         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1347         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1348         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1349         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1350         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1351         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1352         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1353         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1354         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1355         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1356         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1357         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1358         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1359         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1360         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1361         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1362         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1363         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1364         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1365         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1366         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1367         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1368         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1369         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1370         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1371         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1372         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1373         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1374         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1375         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1376         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1377         VMSTATE_UINT32(mac_reg[VET], E1000State),
1378         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1379         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1380         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1381         VMSTATE_END_OF_LIST()
1382     },
1383     .subsections = (VMStateSubsection[]) {
1384         {
1385             .vmsd = &vmstate_e1000_mit_state,
1386             .needed = e1000_mit_state_needed,
1387         }, {
1388             /* empty */
1389         }
1390     }
1391 };
1392 
1393 static const uint16_t e1000_eeprom_template[64] = {
1394     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1395     0x3000, 0x1000, 0x6403, E1000_DEVID, 0x8086, E1000_DEVID, 0x8086, 0x3040,
1396     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1397     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1398     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1399     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1400     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1401     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1402 };
1403 
1404 /* PCI interface */
1405 
1406 static void
1407 e1000_mmio_setup(E1000State *d)
1408 {
1409     int i;
1410     const uint32_t excluded_regs[] = {
1411         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1412         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1413     };
1414 
1415     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1416                           "e1000-mmio", PNPMMIO_SIZE);
1417     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1418     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1419         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1420                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1421     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1422 }
1423 
1424 static void
1425 e1000_cleanup(NetClientState *nc)
1426 {
1427     E1000State *s = qemu_get_nic_opaque(nc);
1428 
1429     s->nic = NULL;
1430 }
1431 
1432 static void
1433 pci_e1000_uninit(PCIDevice *dev)
1434 {
1435     E1000State *d = E1000(dev);
1436 
1437     timer_del(d->autoneg_timer);
1438     timer_free(d->autoneg_timer);
1439     timer_del(d->mit_timer);
1440     timer_free(d->mit_timer);
1441     memory_region_destroy(&d->mmio);
1442     memory_region_destroy(&d->io);
1443     qemu_del_nic(d->nic);
1444 }
1445 
1446 static NetClientInfo net_e1000_info = {
1447     .type = NET_CLIENT_OPTIONS_KIND_NIC,
1448     .size = sizeof(NICState),
1449     .can_receive = e1000_can_receive,
1450     .receive = e1000_receive,
1451     .cleanup = e1000_cleanup,
1452     .link_status_changed = e1000_set_link_status,
1453 };
1454 
1455 static int pci_e1000_init(PCIDevice *pci_dev)
1456 {
1457     DeviceState *dev = DEVICE(pci_dev);
1458     E1000State *d = E1000(pci_dev);
1459     uint8_t *pci_conf;
1460     uint16_t checksum = 0;
1461     int i;
1462     uint8_t *macaddr;
1463 
1464     pci_conf = pci_dev->config;
1465 
1466     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1467     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1468 
1469     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1470 
1471     e1000_mmio_setup(d);
1472 
1473     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1474 
1475     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1476 
1477     memmove(d->eeprom_data, e1000_eeprom_template,
1478         sizeof e1000_eeprom_template);
1479     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1480     macaddr = d->conf.macaddr.a;
1481     for (i = 0; i < 3; i++)
1482         d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i];
1483     for (i = 0; i < EEPROM_CHECKSUM_REG; i++)
1484         checksum += d->eeprom_data[i];
1485     checksum = (uint16_t) EEPROM_SUM - checksum;
1486     d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum;
1487 
1488     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1489                           object_get_typename(OBJECT(d)), dev->id, d);
1490 
1491     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1492 
1493     add_boot_device_path(d->conf.bootindex, dev, "/ethernet-phy@0");
1494 
1495     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1496     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1497 
1498     return 0;
1499 }
1500 
1501 static void qdev_e1000_reset(DeviceState *dev)
1502 {
1503     E1000State *d = E1000(dev);
1504     e1000_reset(d);
1505 }
1506 
1507 static Property e1000_properties[] = {
1508     DEFINE_NIC_PROPERTIES(E1000State, conf),
1509     DEFINE_PROP_BIT("autonegotiation", E1000State,
1510                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1511     DEFINE_PROP_BIT("mitigation", E1000State,
1512                     compat_flags, E1000_FLAG_MIT_BIT, true),
1513     DEFINE_PROP_END_OF_LIST(),
1514 };
1515 
1516 static void e1000_class_init(ObjectClass *klass, void *data)
1517 {
1518     DeviceClass *dc = DEVICE_CLASS(klass);
1519     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1520 
1521     k->init = pci_e1000_init;
1522     k->exit = pci_e1000_uninit;
1523     k->romfile = "efi-e1000.rom";
1524     k->vendor_id = PCI_VENDOR_ID_INTEL;
1525     k->device_id = E1000_DEVID;
1526     k->revision = 0x03;
1527     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1528     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1529     dc->desc = "Intel Gigabit Ethernet";
1530     dc->reset = qdev_e1000_reset;
1531     dc->vmsd = &vmstate_e1000;
1532     dc->props = e1000_properties;
1533 }
1534 
1535 static const TypeInfo e1000_info = {
1536     .name          = TYPE_E1000,
1537     .parent        = TYPE_PCI_DEVICE,
1538     .instance_size = sizeof(E1000State),
1539     .class_init    = e1000_class_init,
1540 };
1541 
1542 static void e1000_register_types(void)
1543 {
1544     type_register_static(&e1000_info);
1545 }
1546 
1547 type_init(e1000_register_types)
1548