xref: /openbmc/qemu/hw/net/e1000.c (revision 1b111dc1)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "hw/hw.h"
29 #include "hw/pci/pci.h"
30 #include "net/net.h"
31 #include "net/checksum.h"
32 #include "hw/loader.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/dma.h"
35 #include "qemu/iov.h"
36 
37 #include "e1000_regs.h"
38 
39 #define E1000_DEBUG
40 
41 #ifdef E1000_DEBUG
42 enum {
43     DEBUG_GENERAL,	DEBUG_IO,	DEBUG_MMIO,	DEBUG_INTERRUPT,
44     DEBUG_RX,		DEBUG_TX,	DEBUG_MDIC,	DEBUG_EEPROM,
45     DEBUG_UNKNOWN,	DEBUG_TXSUM,	DEBUG_TXERR,	DEBUG_RXERR,
46     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
47 };
48 #define DBGBIT(x)	(1<<DEBUG_##x)
49 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
50 
51 #define	DBGOUT(what, fmt, ...) do { \
52     if (debugflags & DBGBIT(what)) \
53         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
54     } while (0)
55 #else
56 #define	DBGOUT(what, fmt, ...) do {} while (0)
57 #endif
58 
59 #define IOPORT_SIZE       0x40
60 #define PNPMMIO_SIZE      0x20000
61 #define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
62 
63 /* this is the size past which hardware will drop packets when setting LPE=0 */
64 #define MAXIMUM_ETHERNET_VLAN_SIZE 1522
65 /* this is the size past which hardware will drop packets when setting LPE=1 */
66 #define MAXIMUM_ETHERNET_LPE_SIZE 16384
67 
68 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
69 
70 /*
71  * HW models:
72  *  E1000_DEV_ID_82540EM works with Windows and Linux
73  *  E1000_DEV_ID_82573L OK with windoze and Linux 2.6.22,
74  *	appears to perform better than 82540EM, but breaks with Linux 2.6.18
75  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
76  *  Others never tested
77  */
78 enum { E1000_DEVID = E1000_DEV_ID_82540EM };
79 
80 /*
81  * May need to specify additional MAC-to-PHY entries --
82  * Intel's Windows driver refuses to initialize unless they match
83  */
84 enum {
85     PHY_ID2_INIT = E1000_DEVID == E1000_DEV_ID_82573L ?		0xcc2 :
86                    E1000_DEVID == E1000_DEV_ID_82544GC_COPPER ?	0xc30 :
87                    /* default to E1000_DEV_ID_82540EM */	0xc20
88 };
89 
90 typedef struct E1000State_st {
91     /*< private >*/
92     PCIDevice parent_obj;
93     /*< public >*/
94 
95     NICState *nic;
96     NICConf conf;
97     MemoryRegion mmio;
98     MemoryRegion io;
99 
100     uint32_t mac_reg[0x8000];
101     uint16_t phy_reg[0x20];
102     uint16_t eeprom_data[64];
103 
104     uint32_t rxbuf_size;
105     uint32_t rxbuf_min_shift;
106     struct e1000_tx {
107         unsigned char header[256];
108         unsigned char vlan_header[4];
109         /* Fields vlan and data must not be reordered or separated. */
110         unsigned char vlan[4];
111         unsigned char data[0x10000];
112         uint16_t size;
113         unsigned char sum_needed;
114         unsigned char vlan_needed;
115         uint8_t ipcss;
116         uint8_t ipcso;
117         uint16_t ipcse;
118         uint8_t tucss;
119         uint8_t tucso;
120         uint16_t tucse;
121         uint8_t hdr_len;
122         uint16_t mss;
123         uint32_t paylen;
124         uint16_t tso_frames;
125         char tse;
126         int8_t ip;
127         int8_t tcp;
128         char cptse;     // current packet tse bit
129     } tx;
130 
131     struct {
132         uint32_t val_in;	// shifted in from guest driver
133         uint16_t bitnum_in;
134         uint16_t bitnum_out;
135         uint16_t reading;
136         uint32_t old_eecd;
137     } eecd_state;
138 
139     QEMUTimer *autoneg_timer;
140 
141     QEMUTimer *mit_timer;      /* Mitigation timer. */
142     bool mit_timer_on;         /* Mitigation timer is running. */
143     bool mit_irq_level;        /* Tracks interrupt pin level. */
144     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
145 
146 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
147 #define E1000_FLAG_AUTONEG_BIT 0
148 #define E1000_FLAG_MIT_BIT 1
149 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
150 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
151     uint32_t compat_flags;
152 } E1000State;
153 
154 #define TYPE_E1000 "e1000"
155 
156 #define E1000(obj) \
157     OBJECT_CHECK(E1000State, (obj), TYPE_E1000)
158 
159 #define	defreg(x)	x = (E1000_##x>>2)
160 enum {
161     defreg(CTRL),	defreg(EECD),	defreg(EERD),	defreg(GPRC),
162     defreg(GPTC),	defreg(ICR),	defreg(ICS),	defreg(IMC),
163     defreg(IMS),	defreg(LEDCTL),	defreg(MANC),	defreg(MDIC),
164     defreg(MPC),	defreg(PBA),	defreg(RCTL),	defreg(RDBAH),
165     defreg(RDBAL),	defreg(RDH),	defreg(RDLEN),	defreg(RDT),
166     defreg(STATUS),	defreg(SWSM),	defreg(TCTL),	defreg(TDBAH),
167     defreg(TDBAL),	defreg(TDH),	defreg(TDLEN),	defreg(TDT),
168     defreg(TORH),	defreg(TORL),	defreg(TOTH),	defreg(TOTL),
169     defreg(TPR),	defreg(TPT),	defreg(TXDCTL),	defreg(WUFC),
170     defreg(RA),		defreg(MTA),	defreg(CRCERRS),defreg(VFTA),
171     defreg(VET),        defreg(RDTR),   defreg(RADV),   defreg(TADV),
172     defreg(ITR),
173 };
174 
175 static void
176 e1000_link_down(E1000State *s)
177 {
178     s->mac_reg[STATUS] &= ~E1000_STATUS_LU;
179     s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS;
180 }
181 
182 static void
183 e1000_link_up(E1000State *s)
184 {
185     s->mac_reg[STATUS] |= E1000_STATUS_LU;
186     s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
187 }
188 
189 static void
190 set_phy_ctrl(E1000State *s, int index, uint16_t val)
191 {
192     /*
193      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
194      * migrate during auto negotiation, after migration the link will be
195      * down.
196      */
197     if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
198         return;
199     }
200     if ((val & MII_CR_AUTO_NEG_EN) && (val & MII_CR_RESTART_AUTO_NEG)) {
201         e1000_link_down(s);
202         s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE;
203         DBGOUT(PHY, "Start link auto negotiation\n");
204         timer_mod(s->autoneg_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
205     }
206 }
207 
208 static void
209 e1000_autoneg_timer(void *opaque)
210 {
211     E1000State *s = opaque;
212     if (!qemu_get_queue(s->nic)->link_down) {
213         e1000_link_up(s);
214     }
215     s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
216     DBGOUT(PHY, "Auto negotiation is completed\n");
217 }
218 
219 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
220     [PHY_CTRL] = set_phy_ctrl,
221 };
222 
223 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
224 
225 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
226 static const char phy_regcap[0x20] = {
227     [PHY_STATUS] = PHY_R,	[M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
228     [PHY_ID1] = PHY_R,		[M88E1000_PHY_SPEC_CTRL] = PHY_RW,
229     [PHY_CTRL] = PHY_RW,	[PHY_1000T_CTRL] = PHY_RW,
230     [PHY_LP_ABILITY] = PHY_R,	[PHY_1000T_STATUS] = PHY_R,
231     [PHY_AUTONEG_ADV] = PHY_RW,	[M88E1000_RX_ERR_CNTR] = PHY_R,
232     [PHY_ID2] = PHY_R,		[M88E1000_PHY_SPEC_STATUS] = PHY_R
233 };
234 
235 static const uint16_t phy_reg_init[] = {
236     [PHY_CTRL] = 0x1140,
237     [PHY_STATUS] = 0x794d, /* link initially up with not completed autoneg */
238     [PHY_ID1] = 0x141,				[PHY_ID2] = PHY_ID2_INIT,
239     [PHY_1000T_CTRL] = 0x0e00,			[M88E1000_PHY_SPEC_CTRL] = 0x360,
240     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,	[PHY_AUTONEG_ADV] = 0xde1,
241     [PHY_LP_ABILITY] = 0x1e0,			[PHY_1000T_STATUS] = 0x3c00,
242     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
243 };
244 
245 static const uint32_t mac_reg_init[] = {
246     [PBA] =     0x00100030,
247     [LEDCTL] =  0x602,
248     [CTRL] =    E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
249                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
250     [STATUS] =  0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
251                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
252                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
253                 E1000_STATUS_LU,
254     [MANC] =    E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
255                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
256                 E1000_MANC_RMCP_EN,
257 };
258 
259 /* Helper function, *curr == 0 means the value is not set */
260 static inline void
261 mit_update_delay(uint32_t *curr, uint32_t value)
262 {
263     if (value && (*curr == 0 || value < *curr)) {
264         *curr = value;
265     }
266 }
267 
268 static void
269 set_interrupt_cause(E1000State *s, int index, uint32_t val)
270 {
271     PCIDevice *d = PCI_DEVICE(s);
272     uint32_t pending_ints;
273     uint32_t mit_delay;
274 
275     if (val && (E1000_DEVID >= E1000_DEV_ID_82547EI_MOBILE)) {
276         /* Only for 8257x */
277         val |= E1000_ICR_INT_ASSERTED;
278     }
279     s->mac_reg[ICR] = val;
280 
281     /*
282      * Make sure ICR and ICS registers have the same value.
283      * The spec says that the ICS register is write-only.  However in practice,
284      * on real hardware ICS is readable, and for reads it has the same value as
285      * ICR (except that ICS does not have the clear on read behaviour of ICR).
286      *
287      * The VxWorks PRO/1000 driver uses this behaviour.
288      */
289     s->mac_reg[ICS] = val;
290 
291     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
292     if (!s->mit_irq_level && pending_ints) {
293         /*
294          * Here we detect a potential raising edge. We postpone raising the
295          * interrupt line if we are inside the mitigation delay window
296          * (s->mit_timer_on == 1).
297          * We provide a partial implementation of interrupt mitigation,
298          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
299          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
300          * RADV; relative timers based on TIDV and RDTR are not implemented.
301          */
302         if (s->mit_timer_on) {
303             return;
304         }
305         if (s->compat_flags & E1000_FLAG_MIT) {
306             /* Compute the next mitigation delay according to pending
307              * interrupts and the current values of RADV (provided
308              * RDTR!=0), TADV and ITR.
309              * Then rearm the timer.
310              */
311             mit_delay = 0;
312             if (s->mit_ide &&
313                     (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
314                 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
315             }
316             if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
317                 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
318             }
319             mit_update_delay(&mit_delay, s->mac_reg[ITR]);
320 
321             if (mit_delay) {
322                 s->mit_timer_on = 1;
323                 timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
324                           mit_delay * 256);
325             }
326             s->mit_ide = 0;
327         }
328     }
329 
330     s->mit_irq_level = (pending_ints != 0);
331     pci_set_irq(d, s->mit_irq_level);
332 }
333 
334 static void
335 e1000_mit_timer(void *opaque)
336 {
337     E1000State *s = opaque;
338 
339     s->mit_timer_on = 0;
340     /* Call set_interrupt_cause to update the irq level (if necessary). */
341     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
342 }
343 
344 static void
345 set_ics(E1000State *s, int index, uint32_t val)
346 {
347     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
348         s->mac_reg[IMS]);
349     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
350 }
351 
352 static int
353 rxbufsize(uint32_t v)
354 {
355     v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 |
356          E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 |
357          E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256;
358     switch (v) {
359     case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384:
360         return 16384;
361     case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192:
362         return 8192;
363     case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096:
364         return 4096;
365     case E1000_RCTL_SZ_1024:
366         return 1024;
367     case E1000_RCTL_SZ_512:
368         return 512;
369     case E1000_RCTL_SZ_256:
370         return 256;
371     }
372     return 2048;
373 }
374 
375 static void e1000_reset(void *opaque)
376 {
377     E1000State *d = opaque;
378     uint8_t *macaddr = d->conf.macaddr.a;
379     int i;
380 
381     timer_del(d->autoneg_timer);
382     timer_del(d->mit_timer);
383     d->mit_timer_on = 0;
384     d->mit_irq_level = 0;
385     d->mit_ide = 0;
386     memset(d->phy_reg, 0, sizeof d->phy_reg);
387     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
388     memset(d->mac_reg, 0, sizeof d->mac_reg);
389     memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
390     d->rxbuf_min_shift = 1;
391     memset(&d->tx, 0, sizeof d->tx);
392 
393     if (qemu_get_queue(d->nic)->link_down) {
394         e1000_link_down(d);
395     }
396 
397     /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */
398     d->mac_reg[RA] = 0;
399     d->mac_reg[RA + 1] = E1000_RAH_AV;
400     for (i = 0; i < 4; i++) {
401         d->mac_reg[RA] |= macaddr[i] << (8 * i);
402         d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0;
403     }
404     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
405 }
406 
407 static void
408 set_ctrl(E1000State *s, int index, uint32_t val)
409 {
410     /* RST is self clearing */
411     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
412 }
413 
414 static void
415 set_rx_control(E1000State *s, int index, uint32_t val)
416 {
417     s->mac_reg[RCTL] = val;
418     s->rxbuf_size = rxbufsize(val);
419     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
420     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
421            s->mac_reg[RCTL]);
422     qemu_flush_queued_packets(qemu_get_queue(s->nic));
423 }
424 
425 static void
426 set_mdic(E1000State *s, int index, uint32_t val)
427 {
428     uint32_t data = val & E1000_MDIC_DATA_MASK;
429     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
430 
431     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
432         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
433     else if (val & E1000_MDIC_OP_READ) {
434         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
435         if (!(phy_regcap[addr] & PHY_R)) {
436             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
437             val |= E1000_MDIC_ERROR;
438         } else
439             val = (val ^ data) | s->phy_reg[addr];
440     } else if (val & E1000_MDIC_OP_WRITE) {
441         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
442         if (!(phy_regcap[addr] & PHY_W)) {
443             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
444             val |= E1000_MDIC_ERROR;
445         } else {
446             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
447                 phyreg_writeops[addr](s, index, data);
448             }
449             s->phy_reg[addr] = data;
450         }
451     }
452     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
453 
454     if (val & E1000_MDIC_INT_EN) {
455         set_ics(s, 0, E1000_ICR_MDAC);
456     }
457 }
458 
459 static uint32_t
460 get_eecd(E1000State *s, int index)
461 {
462     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
463 
464     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
465            s->eecd_state.bitnum_out, s->eecd_state.reading);
466     if (!s->eecd_state.reading ||
467         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
468           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
469         ret |= E1000_EECD_DO;
470     return ret;
471 }
472 
473 static void
474 set_eecd(E1000State *s, int index, uint32_t val)
475 {
476     uint32_t oldval = s->eecd_state.old_eecd;
477 
478     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
479             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
480     if (!(E1000_EECD_CS & val))			// CS inactive; nothing to do
481 	return;
482     if (E1000_EECD_CS & (val ^ oldval)) {	// CS rise edge; reset state
483 	s->eecd_state.val_in = 0;
484 	s->eecd_state.bitnum_in = 0;
485 	s->eecd_state.bitnum_out = 0;
486 	s->eecd_state.reading = 0;
487     }
488     if (!(E1000_EECD_SK & (val ^ oldval)))	// no clock edge
489         return;
490     if (!(E1000_EECD_SK & val)) {		// falling edge
491         s->eecd_state.bitnum_out++;
492         return;
493     }
494     s->eecd_state.val_in <<= 1;
495     if (val & E1000_EECD_DI)
496         s->eecd_state.val_in |= 1;
497     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
498         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
499         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
500             EEPROM_READ_OPCODE_MICROWIRE);
501     }
502     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
503            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
504            s->eecd_state.reading);
505 }
506 
507 static uint32_t
508 flash_eerd_read(E1000State *s, int x)
509 {
510     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
511 
512     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
513         return (s->mac_reg[EERD]);
514 
515     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
516         return (E1000_EEPROM_RW_REG_DONE | r);
517 
518     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
519            E1000_EEPROM_RW_REG_DONE | r);
520 }
521 
522 static void
523 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
524 {
525     uint32_t sum;
526 
527     if (cse && cse < n)
528         n = cse + 1;
529     if (sloc < n-1) {
530         sum = net_checksum_add(n-css, data+css);
531         stw_be_p(data + sloc, net_checksum_finish(sum));
532     }
533 }
534 
535 static inline int
536 vlan_enabled(E1000State *s)
537 {
538     return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0);
539 }
540 
541 static inline int
542 vlan_rx_filter_enabled(E1000State *s)
543 {
544     return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0);
545 }
546 
547 static inline int
548 is_vlan_packet(E1000State *s, const uint8_t *buf)
549 {
550     return (be16_to_cpup((uint16_t *)(buf + 12)) ==
551                 le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
552 }
553 
554 static inline int
555 is_vlan_txd(uint32_t txd_lower)
556 {
557     return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
558 }
559 
560 /* FCS aka Ethernet CRC-32. We don't get it from backends and can't
561  * fill it in, just pad descriptor length by 4 bytes unless guest
562  * told us to strip it off the packet. */
563 static inline int
564 fcs_len(E1000State *s)
565 {
566     return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
567 }
568 
569 static void
570 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
571 {
572     NetClientState *nc = qemu_get_queue(s->nic);
573     if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
574         nc->info->receive(nc, buf, size);
575     } else {
576         qemu_send_packet(nc, buf, size);
577     }
578 }
579 
580 static void
581 xmit_seg(E1000State *s)
582 {
583     uint16_t len, *sp;
584     unsigned int frames = s->tx.tso_frames, css, sofar, n;
585     struct e1000_tx *tp = &s->tx;
586 
587     if (tp->tse && tp->cptse) {
588         css = tp->ipcss;
589         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
590                frames, tp->size, css);
591         if (tp->ip) {		// IPv4
592             stw_be_p(tp->data+css+2, tp->size - css);
593             stw_be_p(tp->data+css+4,
594                           be16_to_cpup((uint16_t *)(tp->data+css+4))+frames);
595         } else			// IPv6
596             stw_be_p(tp->data+css+4, tp->size - css);
597         css = tp->tucss;
598         len = tp->size - css;
599         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len);
600         if (tp->tcp) {
601             sofar = frames * tp->mss;
602             stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
603             if (tp->paylen - sofar > tp->mss)
604                 tp->data[css + 13] &= ~9;		// PSH, FIN
605         } else	// UDP
606             stw_be_p(tp->data+css+4, len);
607         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
608             unsigned int phsum;
609             // add pseudo-header length before checksum calculation
610             sp = (uint16_t *)(tp->data + tp->tucso);
611             phsum = be16_to_cpup(sp) + len;
612             phsum = (phsum >> 16) + (phsum & 0xffff);
613             stw_be_p(sp, phsum);
614         }
615         tp->tso_frames++;
616     }
617 
618     if (tp->sum_needed & E1000_TXD_POPTS_TXSM)
619         putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse);
620     if (tp->sum_needed & E1000_TXD_POPTS_IXSM)
621         putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse);
622     if (tp->vlan_needed) {
623         memmove(tp->vlan, tp->data, 4);
624         memmove(tp->data, tp->data + 4, 8);
625         memcpy(tp->data + 8, tp->vlan_header, 4);
626         e1000_send_packet(s, tp->vlan, tp->size + 4);
627     } else
628         e1000_send_packet(s, tp->data, tp->size);
629     s->mac_reg[TPT]++;
630     s->mac_reg[GPTC]++;
631     n = s->mac_reg[TOTL];
632     if ((s->mac_reg[TOTL] += s->tx.size) < n)
633         s->mac_reg[TOTH]++;
634 }
635 
636 static void
637 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
638 {
639     PCIDevice *d = PCI_DEVICE(s);
640     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
641     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
642     unsigned int split_size = txd_lower & 0xffff, bytes, sz, op;
643     unsigned int msh = 0xfffff;
644     uint64_t addr;
645     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
646     struct e1000_tx *tp = &s->tx;
647 
648     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
649     if (dtype == E1000_TXD_CMD_DEXT) {	// context descriptor
650         op = le32_to_cpu(xp->cmd_and_length);
651         tp->ipcss = xp->lower_setup.ip_fields.ipcss;
652         tp->ipcso = xp->lower_setup.ip_fields.ipcso;
653         tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse);
654         tp->tucss = xp->upper_setup.tcp_fields.tucss;
655         tp->tucso = xp->upper_setup.tcp_fields.tucso;
656         tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse);
657         tp->paylen = op & 0xfffff;
658         tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len;
659         tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss);
660         tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0;
661         tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
662         tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
663         tp->tso_frames = 0;
664         if (tp->tucso == 0) {	// this is probably wrong
665             DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
666             tp->tucso = tp->tucss + (tp->tcp ? 16 : 6);
667         }
668         return;
669     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
670         // data descriptor
671         if (tp->size == 0) {
672             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
673         }
674         tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0;
675     } else {
676         // legacy descriptor
677         tp->cptse = 0;
678     }
679 
680     if (vlan_enabled(s) && is_vlan_txd(txd_lower) &&
681         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
682         tp->vlan_needed = 1;
683         stw_be_p(tp->vlan_header,
684                       le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
685         stw_be_p(tp->vlan_header + 2,
686                       le16_to_cpu(dp->upper.fields.special));
687     }
688 
689     addr = le64_to_cpu(dp->buffer_addr);
690     if (tp->tse && tp->cptse) {
691         msh = tp->hdr_len + tp->mss;
692         do {
693             bytes = split_size;
694             if (tp->size + bytes > msh)
695                 bytes = msh - tp->size;
696 
697             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
698             pci_dma_read(d, addr, tp->data + tp->size, bytes);
699             sz = tp->size + bytes;
700             if (sz >= tp->hdr_len && tp->size < tp->hdr_len) {
701                 memmove(tp->header, tp->data, tp->hdr_len);
702             }
703             tp->size = sz;
704             addr += bytes;
705             if (sz == msh) {
706                 xmit_seg(s);
707                 memmove(tp->data, tp->header, tp->hdr_len);
708                 tp->size = tp->hdr_len;
709             }
710         } while (split_size -= bytes);
711     } else if (!tp->tse && tp->cptse) {
712         // context descriptor TSE is not set, while data descriptor TSE is set
713         DBGOUT(TXERR, "TCP segmentation error\n");
714     } else {
715         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
716         pci_dma_read(d, addr, tp->data + tp->size, split_size);
717         tp->size += split_size;
718     }
719 
720     if (!(txd_lower & E1000_TXD_CMD_EOP))
721         return;
722     if (!(tp->tse && tp->cptse && tp->size < tp->hdr_len)) {
723         xmit_seg(s);
724     }
725     tp->tso_frames = 0;
726     tp->sum_needed = 0;
727     tp->vlan_needed = 0;
728     tp->size = 0;
729     tp->cptse = 0;
730 }
731 
732 static uint32_t
733 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
734 {
735     PCIDevice *d = PCI_DEVICE(s);
736     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
737 
738     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
739         return 0;
740     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
741                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
742     dp->upper.data = cpu_to_le32(txd_upper);
743     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
744                   &dp->upper, sizeof(dp->upper));
745     return E1000_ICR_TXDW;
746 }
747 
748 static uint64_t tx_desc_base(E1000State *s)
749 {
750     uint64_t bah = s->mac_reg[TDBAH];
751     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
752 
753     return (bah << 32) + bal;
754 }
755 
756 static void
757 start_xmit(E1000State *s)
758 {
759     PCIDevice *d = PCI_DEVICE(s);
760     dma_addr_t base;
761     struct e1000_tx_desc desc;
762     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
763 
764     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
765         DBGOUT(TX, "tx disabled\n");
766         return;
767     }
768 
769     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
770         base = tx_desc_base(s) +
771                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
772         pci_dma_read(d, base, &desc, sizeof(desc));
773 
774         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
775                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
776                desc.upper.data);
777 
778         process_tx_desc(s, &desc);
779         cause |= txdesc_writeback(s, base, &desc);
780 
781         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
782             s->mac_reg[TDH] = 0;
783         /*
784          * the following could happen only if guest sw assigns
785          * bogus values to TDT/TDLEN.
786          * there's nothing too intelligent we could do about this.
787          */
788         if (s->mac_reg[TDH] == tdh_start) {
789             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
790                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
791             break;
792         }
793     }
794     set_ics(s, 0, cause);
795 }
796 
797 static int
798 receive_filter(E1000State *s, const uint8_t *buf, int size)
799 {
800     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
801     static const int mta_shift[] = {4, 3, 2, 0};
802     uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp;
803 
804     if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) {
805         uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14));
806         uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) +
807                                      ((vid >> 5) & 0x7f));
808         if ((vfta & (1 << (vid & 0x1f))) == 0)
809             return 0;
810     }
811 
812     if (rctl & E1000_RCTL_UPE)			// promiscuous
813         return 1;
814 
815     if ((buf[0] & 1) && (rctl & E1000_RCTL_MPE))	// promiscuous mcast
816         return 1;
817 
818     if ((rctl & E1000_RCTL_BAM) && !memcmp(buf, bcast, sizeof bcast))
819         return 1;
820 
821     for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) {
822         if (!(rp[1] & E1000_RAH_AV))
823             continue;
824         ra[0] = cpu_to_le32(rp[0]);
825         ra[1] = cpu_to_le32(rp[1]);
826         if (!memcmp(buf, (uint8_t *)ra, 6)) {
827             DBGOUT(RXFILTER,
828                    "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n",
829                    (int)(rp - s->mac_reg - RA)/2,
830                    buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
831             return 1;
832         }
833     }
834     DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n",
835            buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
836 
837     f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
838     f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
839     if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f)))
840         return 1;
841     DBGOUT(RXFILTER,
842            "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n",
843            buf[0], buf[1], buf[2], buf[3], buf[4], buf[5],
844            (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5,
845            s->mac_reg[MTA + (f >> 5)]);
846 
847     return 0;
848 }
849 
850 static void
851 e1000_set_link_status(NetClientState *nc)
852 {
853     E1000State *s = qemu_get_nic_opaque(nc);
854     uint32_t old_status = s->mac_reg[STATUS];
855 
856     if (nc->link_down) {
857         e1000_link_down(s);
858     } else {
859         e1000_link_up(s);
860     }
861 
862     if (s->mac_reg[STATUS] != old_status)
863         set_ics(s, 0, E1000_ICR_LSC);
864 }
865 
866 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
867 {
868     int bufs;
869     /* Fast-path short packets */
870     if (total_size <= s->rxbuf_size) {
871         return s->mac_reg[RDH] != s->mac_reg[RDT];
872     }
873     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
874         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
875     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
876         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
877             s->mac_reg[RDT] - s->mac_reg[RDH];
878     } else {
879         return false;
880     }
881     return total_size <= bufs * s->rxbuf_size;
882 }
883 
884 static int
885 e1000_can_receive(NetClientState *nc)
886 {
887     E1000State *s = qemu_get_nic_opaque(nc);
888 
889     return (s->mac_reg[STATUS] & E1000_STATUS_LU) &&
890         (s->mac_reg[RCTL] & E1000_RCTL_EN) && e1000_has_rxbufs(s, 1);
891 }
892 
893 static uint64_t rx_desc_base(E1000State *s)
894 {
895     uint64_t bah = s->mac_reg[RDBAH];
896     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
897 
898     return (bah << 32) + bal;
899 }
900 
901 static ssize_t
902 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
903 {
904     E1000State *s = qemu_get_nic_opaque(nc);
905     PCIDevice *d = PCI_DEVICE(s);
906     struct e1000_rx_desc desc;
907     dma_addr_t base;
908     unsigned int n, rdt;
909     uint32_t rdh_start;
910     uint16_t vlan_special = 0;
911     uint8_t vlan_status = 0;
912     uint8_t min_buf[MIN_BUF_SIZE];
913     struct iovec min_iov;
914     uint8_t *filter_buf = iov->iov_base;
915     size_t size = iov_size(iov, iovcnt);
916     size_t iov_ofs = 0;
917     size_t desc_offset;
918     size_t desc_size;
919     size_t total_size;
920 
921     if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) {
922         return -1;
923     }
924 
925     if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) {
926         return -1;
927     }
928 
929     /* Pad to minimum Ethernet frame length */
930     if (size < sizeof(min_buf)) {
931         iov_to_buf(iov, iovcnt, 0, min_buf, size);
932         memset(&min_buf[size], 0, sizeof(min_buf) - size);
933         min_iov.iov_base = filter_buf = min_buf;
934         min_iov.iov_len = size = sizeof(min_buf);
935         iovcnt = 1;
936         iov = &min_iov;
937     } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
938         /* This is very unlikely, but may happen. */
939         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
940         filter_buf = min_buf;
941     }
942 
943     /* Discard oversized packets if !LPE and !SBP. */
944     if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
945         (size > MAXIMUM_ETHERNET_VLAN_SIZE
946         && !(s->mac_reg[RCTL] & E1000_RCTL_LPE)))
947         && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) {
948         return size;
949     }
950 
951     if (!receive_filter(s, filter_buf, size)) {
952         return size;
953     }
954 
955     if (vlan_enabled(s) && is_vlan_packet(s, filter_buf)) {
956         vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(filter_buf
957                                                                 + 14)));
958         iov_ofs = 4;
959         if (filter_buf == iov->iov_base) {
960             memmove(filter_buf + 4, filter_buf, 12);
961         } else {
962             iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
963             while (iov->iov_len <= iov_ofs) {
964                 iov_ofs -= iov->iov_len;
965                 iov++;
966             }
967         }
968         vlan_status = E1000_RXD_STAT_VP;
969         size -= 4;
970     }
971 
972     rdh_start = s->mac_reg[RDH];
973     desc_offset = 0;
974     total_size = size + fcs_len(s);
975     if (!e1000_has_rxbufs(s, total_size)) {
976             set_ics(s, 0, E1000_ICS_RXO);
977             return -1;
978     }
979     do {
980         desc_size = total_size - desc_offset;
981         if (desc_size > s->rxbuf_size) {
982             desc_size = s->rxbuf_size;
983         }
984         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
985         pci_dma_read(d, base, &desc, sizeof(desc));
986         desc.special = vlan_special;
987         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
988         if (desc.buffer_addr) {
989             if (desc_offset < size) {
990                 size_t iov_copy;
991                 hwaddr ba = le64_to_cpu(desc.buffer_addr);
992                 size_t copy_size = size - desc_offset;
993                 if (copy_size > s->rxbuf_size) {
994                     copy_size = s->rxbuf_size;
995                 }
996                 do {
997                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
998                     pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
999                     copy_size -= iov_copy;
1000                     ba += iov_copy;
1001                     iov_ofs += iov_copy;
1002                     if (iov_ofs == iov->iov_len) {
1003                         iov++;
1004                         iov_ofs = 0;
1005                     }
1006                 } while (copy_size);
1007             }
1008             desc_offset += desc_size;
1009             desc.length = cpu_to_le16(desc_size);
1010             if (desc_offset >= total_size) {
1011                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1012             } else {
1013                 /* Guest zeroing out status is not a hardware requirement.
1014                    Clear EOP in case guest didn't do it. */
1015                 desc.status &= ~E1000_RXD_STAT_EOP;
1016             }
1017         } else { // as per intel docs; skip descriptors with null buf addr
1018             DBGOUT(RX, "Null RX descriptor!!\n");
1019         }
1020         pci_dma_write(d, base, &desc, sizeof(desc));
1021 
1022         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1023             s->mac_reg[RDH] = 0;
1024         /* see comment in start_xmit; same here */
1025         if (s->mac_reg[RDH] == rdh_start) {
1026             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1027                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1028             set_ics(s, 0, E1000_ICS_RXO);
1029             return -1;
1030         }
1031     } while (desc_offset < total_size);
1032 
1033     s->mac_reg[GPRC]++;
1034     s->mac_reg[TPR]++;
1035     /* TOR - Total Octets Received:
1036      * This register includes bytes received in a packet from the <Destination
1037      * Address> field through the <CRC> field, inclusively.
1038      */
1039     n = s->mac_reg[TORL] + size + /* Always include FCS length. */ 4;
1040     if (n < s->mac_reg[TORL])
1041         s->mac_reg[TORH]++;
1042     s->mac_reg[TORL] = n;
1043 
1044     n = E1000_ICS_RXT0;
1045     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1046         rdt += s->mac_reg[RDLEN] / sizeof(desc);
1047     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1048         s->rxbuf_min_shift)
1049         n |= E1000_ICS_RXDMT0;
1050 
1051     set_ics(s, 0, n);
1052 
1053     return size;
1054 }
1055 
1056 static ssize_t
1057 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1058 {
1059     const struct iovec iov = {
1060         .iov_base = (uint8_t *)buf,
1061         .iov_len = size
1062     };
1063 
1064     return e1000_receive_iov(nc, &iov, 1);
1065 }
1066 
1067 static uint32_t
1068 mac_readreg(E1000State *s, int index)
1069 {
1070     return s->mac_reg[index];
1071 }
1072 
1073 static uint32_t
1074 mac_icr_read(E1000State *s, int index)
1075 {
1076     uint32_t ret = s->mac_reg[ICR];
1077 
1078     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1079     set_interrupt_cause(s, 0, 0);
1080     return ret;
1081 }
1082 
1083 static uint32_t
1084 mac_read_clr4(E1000State *s, int index)
1085 {
1086     uint32_t ret = s->mac_reg[index];
1087 
1088     s->mac_reg[index] = 0;
1089     return ret;
1090 }
1091 
1092 static uint32_t
1093 mac_read_clr8(E1000State *s, int index)
1094 {
1095     uint32_t ret = s->mac_reg[index];
1096 
1097     s->mac_reg[index] = 0;
1098     s->mac_reg[index-1] = 0;
1099     return ret;
1100 }
1101 
1102 static void
1103 mac_writereg(E1000State *s, int index, uint32_t val)
1104 {
1105     uint32_t macaddr[2];
1106 
1107     s->mac_reg[index] = val;
1108 
1109     if (index == RA + 1) {
1110         macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1111         macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1112         qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1113     }
1114 }
1115 
1116 static void
1117 set_rdt(E1000State *s, int index, uint32_t val)
1118 {
1119     s->mac_reg[index] = val & 0xffff;
1120     if (e1000_has_rxbufs(s, 1)) {
1121         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1122     }
1123 }
1124 
1125 static void
1126 set_16bit(E1000State *s, int index, uint32_t val)
1127 {
1128     s->mac_reg[index] = val & 0xffff;
1129 }
1130 
1131 static void
1132 set_dlen(E1000State *s, int index, uint32_t val)
1133 {
1134     s->mac_reg[index] = val & 0xfff80;
1135 }
1136 
1137 static void
1138 set_tctl(E1000State *s, int index, uint32_t val)
1139 {
1140     s->mac_reg[index] = val;
1141     s->mac_reg[TDT] &= 0xffff;
1142     start_xmit(s);
1143 }
1144 
1145 static void
1146 set_icr(E1000State *s, int index, uint32_t val)
1147 {
1148     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1149     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1150 }
1151 
1152 static void
1153 set_imc(E1000State *s, int index, uint32_t val)
1154 {
1155     s->mac_reg[IMS] &= ~val;
1156     set_ics(s, 0, 0);
1157 }
1158 
1159 static void
1160 set_ims(E1000State *s, int index, uint32_t val)
1161 {
1162     s->mac_reg[IMS] |= val;
1163     set_ics(s, 0, 0);
1164 }
1165 
1166 #define getreg(x)	[x] = mac_readreg
1167 static uint32_t (*macreg_readops[])(E1000State *, int) = {
1168     getreg(PBA),	getreg(RCTL),	getreg(TDH),	getreg(TXDCTL),
1169     getreg(WUFC),	getreg(TDT),	getreg(CTRL),	getreg(LEDCTL),
1170     getreg(MANC),	getreg(MDIC),	getreg(SWSM),	getreg(STATUS),
1171     getreg(TORL),	getreg(TOTL),	getreg(IMS),	getreg(TCTL),
1172     getreg(RDH),	getreg(RDT),	getreg(VET),	getreg(ICS),
1173     getreg(TDBAL),	getreg(TDBAH),	getreg(RDBAH),	getreg(RDBAL),
1174     getreg(TDLEN),      getreg(RDLEN),  getreg(RDTR),   getreg(RADV),
1175     getreg(TADV),       getreg(ITR),
1176 
1177     [TOTH] = mac_read_clr8,	[TORH] = mac_read_clr8,	[GPRC] = mac_read_clr4,
1178     [GPTC] = mac_read_clr4,	[TPR] = mac_read_clr4,	[TPT] = mac_read_clr4,
1179     [ICR] = mac_icr_read,	[EECD] = get_eecd,	[EERD] = flash_eerd_read,
1180     [CRCERRS ... MPC] = &mac_readreg,
1181     [RA ... RA+31] = &mac_readreg,
1182     [MTA ... MTA+127] = &mac_readreg,
1183     [VFTA ... VFTA+127] = &mac_readreg,
1184 };
1185 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1186 
1187 #define putreg(x)	[x] = mac_writereg
1188 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1189     putreg(PBA),	putreg(EERD),	putreg(SWSM),	putreg(WUFC),
1190     putreg(TDBAL),	putreg(TDBAH),	putreg(TXDCTL),	putreg(RDBAH),
1191     putreg(RDBAL),	putreg(LEDCTL), putreg(VET),
1192     [TDLEN] = set_dlen,	[RDLEN] = set_dlen,	[TCTL] = set_tctl,
1193     [TDT] = set_tctl,	[MDIC] = set_mdic,	[ICS] = set_ics,
1194     [TDH] = set_16bit,	[RDH] = set_16bit,	[RDT] = set_rdt,
1195     [IMC] = set_imc,	[IMS] = set_ims,	[ICR] = set_icr,
1196     [EECD] = set_eecd,	[RCTL] = set_rx_control, [CTRL] = set_ctrl,
1197     [RDTR] = set_16bit, [RADV] = set_16bit,     [TADV] = set_16bit,
1198     [ITR] = set_16bit,
1199     [RA ... RA+31] = &mac_writereg,
1200     [MTA ... MTA+127] = &mac_writereg,
1201     [VFTA ... VFTA+127] = &mac_writereg,
1202 };
1203 
1204 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1205 
1206 static void
1207 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1208                  unsigned size)
1209 {
1210     E1000State *s = opaque;
1211     unsigned int index = (addr & 0x1ffff) >> 2;
1212 
1213     if (index < NWRITEOPS && macreg_writeops[index]) {
1214         macreg_writeops[index](s, index, val);
1215     } else if (index < NREADOPS && macreg_readops[index]) {
1216         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n", index<<2, val);
1217     } else {
1218         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1219                index<<2, val);
1220     }
1221 }
1222 
1223 static uint64_t
1224 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1225 {
1226     E1000State *s = opaque;
1227     unsigned int index = (addr & 0x1ffff) >> 2;
1228 
1229     if (index < NREADOPS && macreg_readops[index])
1230     {
1231         return macreg_readops[index](s, index);
1232     }
1233     DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1234     return 0;
1235 }
1236 
1237 static const MemoryRegionOps e1000_mmio_ops = {
1238     .read = e1000_mmio_read,
1239     .write = e1000_mmio_write,
1240     .endianness = DEVICE_LITTLE_ENDIAN,
1241     .impl = {
1242         .min_access_size = 4,
1243         .max_access_size = 4,
1244     },
1245 };
1246 
1247 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1248                               unsigned size)
1249 {
1250     E1000State *s = opaque;
1251 
1252     (void)s;
1253     return 0;
1254 }
1255 
1256 static void e1000_io_write(void *opaque, hwaddr addr,
1257                            uint64_t val, unsigned size)
1258 {
1259     E1000State *s = opaque;
1260 
1261     (void)s;
1262 }
1263 
1264 static const MemoryRegionOps e1000_io_ops = {
1265     .read = e1000_io_read,
1266     .write = e1000_io_write,
1267     .endianness = DEVICE_LITTLE_ENDIAN,
1268 };
1269 
1270 static bool is_version_1(void *opaque, int version_id)
1271 {
1272     return version_id == 1;
1273 }
1274 
1275 static void e1000_pre_save(void *opaque)
1276 {
1277     E1000State *s = opaque;
1278     NetClientState *nc = qemu_get_queue(s->nic);
1279 
1280     /* If the mitigation timer is active, emulate a timeout now. */
1281     if (s->mit_timer_on) {
1282         e1000_mit_timer(s);
1283     }
1284 
1285     if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
1286         return;
1287     }
1288 
1289     /*
1290      * If link is down and auto-negotiation is ongoing, complete
1291      * auto-negotiation immediately.  This allows is to look at
1292      * MII_SR_AUTONEG_COMPLETE to infer link status on load.
1293      */
1294     if (nc->link_down &&
1295         s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN &&
1296         s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG) {
1297          s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1298     }
1299 }
1300 
1301 static int e1000_post_load(void *opaque, int version_id)
1302 {
1303     E1000State *s = opaque;
1304     NetClientState *nc = qemu_get_queue(s->nic);
1305 
1306     if (!(s->compat_flags & E1000_FLAG_MIT)) {
1307         s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1308             s->mac_reg[TADV] = 0;
1309         s->mit_irq_level = false;
1310     }
1311     s->mit_ide = 0;
1312     s->mit_timer_on = false;
1313 
1314     /* nc.link_down can't be migrated, so infer link_down according
1315      * to link status bit in mac_reg[STATUS].
1316      * Alternatively, restart link negotiation if it was in progress. */
1317     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1318 
1319     if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
1320         return 0;
1321     }
1322 
1323     if (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN &&
1324         s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG &&
1325         !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1326         nc->link_down = false;
1327         timer_mod(s->autoneg_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1328     }
1329 
1330     return 0;
1331 }
1332 
1333 static bool e1000_mit_state_needed(void *opaque)
1334 {
1335     E1000State *s = opaque;
1336 
1337     return s->compat_flags & E1000_FLAG_MIT;
1338 }
1339 
1340 static const VMStateDescription vmstate_e1000_mit_state = {
1341     .name = "e1000/mit_state",
1342     .version_id = 1,
1343     .minimum_version_id = 1,
1344     .minimum_version_id_old = 1,
1345     .fields    = (VMStateField[]) {
1346         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1347         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1348         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1349         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1350         VMSTATE_BOOL(mit_irq_level, E1000State),
1351         VMSTATE_END_OF_LIST()
1352     }
1353 };
1354 
1355 static const VMStateDescription vmstate_e1000 = {
1356     .name = "e1000",
1357     .version_id = 2,
1358     .minimum_version_id = 1,
1359     .minimum_version_id_old = 1,
1360     .pre_save = e1000_pre_save,
1361     .post_load = e1000_post_load,
1362     .fields      = (VMStateField []) {
1363         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1364         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1365         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1366         VMSTATE_UINT32(rxbuf_size, E1000State),
1367         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1368         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1369         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1370         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1371         VMSTATE_UINT16(eecd_state.reading, E1000State),
1372         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1373         VMSTATE_UINT8(tx.ipcss, E1000State),
1374         VMSTATE_UINT8(tx.ipcso, E1000State),
1375         VMSTATE_UINT16(tx.ipcse, E1000State),
1376         VMSTATE_UINT8(tx.tucss, E1000State),
1377         VMSTATE_UINT8(tx.tucso, E1000State),
1378         VMSTATE_UINT16(tx.tucse, E1000State),
1379         VMSTATE_UINT32(tx.paylen, E1000State),
1380         VMSTATE_UINT8(tx.hdr_len, E1000State),
1381         VMSTATE_UINT16(tx.mss, E1000State),
1382         VMSTATE_UINT16(tx.size, E1000State),
1383         VMSTATE_UINT16(tx.tso_frames, E1000State),
1384         VMSTATE_UINT8(tx.sum_needed, E1000State),
1385         VMSTATE_INT8(tx.ip, E1000State),
1386         VMSTATE_INT8(tx.tcp, E1000State),
1387         VMSTATE_BUFFER(tx.header, E1000State),
1388         VMSTATE_BUFFER(tx.data, E1000State),
1389         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1390         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1391         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1392         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1393         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1394         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1395         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1396         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1397         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1398         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1399         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1400         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1401         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1402         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1403         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1404         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1405         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1406         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1407         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1408         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1409         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1410         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1411         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1412         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1413         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1414         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1415         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1416         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1417         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1418         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1419         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1420         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1421         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1422         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1423         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1424         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1425         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1426         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1427         VMSTATE_UINT32(mac_reg[VET], E1000State),
1428         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1429         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1430         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1431         VMSTATE_END_OF_LIST()
1432     },
1433     .subsections = (VMStateSubsection[]) {
1434         {
1435             .vmsd = &vmstate_e1000_mit_state,
1436             .needed = e1000_mit_state_needed,
1437         }, {
1438             /* empty */
1439         }
1440     }
1441 };
1442 
1443 static const uint16_t e1000_eeprom_template[64] = {
1444     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1445     0x3000, 0x1000, 0x6403, E1000_DEVID, 0x8086, E1000_DEVID, 0x8086, 0x3040,
1446     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1447     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1448     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1449     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1450     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1451     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1452 };
1453 
1454 /* PCI interface */
1455 
1456 static void
1457 e1000_mmio_setup(E1000State *d)
1458 {
1459     int i;
1460     const uint32_t excluded_regs[] = {
1461         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1462         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1463     };
1464 
1465     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1466                           "e1000-mmio", PNPMMIO_SIZE);
1467     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1468     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1469         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1470                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1471     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1472 }
1473 
1474 static void
1475 e1000_cleanup(NetClientState *nc)
1476 {
1477     E1000State *s = qemu_get_nic_opaque(nc);
1478 
1479     s->nic = NULL;
1480 }
1481 
1482 static void
1483 pci_e1000_uninit(PCIDevice *dev)
1484 {
1485     E1000State *d = E1000(dev);
1486 
1487     timer_del(d->autoneg_timer);
1488     timer_free(d->autoneg_timer);
1489     timer_del(d->mit_timer);
1490     timer_free(d->mit_timer);
1491     memory_region_destroy(&d->mmio);
1492     memory_region_destroy(&d->io);
1493     qemu_del_nic(d->nic);
1494 }
1495 
1496 static NetClientInfo net_e1000_info = {
1497     .type = NET_CLIENT_OPTIONS_KIND_NIC,
1498     .size = sizeof(NICState),
1499     .can_receive = e1000_can_receive,
1500     .receive = e1000_receive,
1501     .receive_iov = e1000_receive_iov,
1502     .cleanup = e1000_cleanup,
1503     .link_status_changed = e1000_set_link_status,
1504 };
1505 
1506 static int pci_e1000_init(PCIDevice *pci_dev)
1507 {
1508     DeviceState *dev = DEVICE(pci_dev);
1509     E1000State *d = E1000(pci_dev);
1510     uint8_t *pci_conf;
1511     uint16_t checksum = 0;
1512     int i;
1513     uint8_t *macaddr;
1514 
1515     pci_conf = pci_dev->config;
1516 
1517     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1518     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1519 
1520     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1521 
1522     e1000_mmio_setup(d);
1523 
1524     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1525 
1526     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1527 
1528     memmove(d->eeprom_data, e1000_eeprom_template,
1529         sizeof e1000_eeprom_template);
1530     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1531     macaddr = d->conf.macaddr.a;
1532     for (i = 0; i < 3; i++)
1533         d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i];
1534     for (i = 0; i < EEPROM_CHECKSUM_REG; i++)
1535         checksum += d->eeprom_data[i];
1536     checksum = (uint16_t) EEPROM_SUM - checksum;
1537     d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum;
1538 
1539     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1540                           object_get_typename(OBJECT(d)), dev->id, d);
1541 
1542     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1543 
1544     add_boot_device_path(d->conf.bootindex, dev, "/ethernet-phy@0");
1545 
1546     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1547     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1548 
1549     return 0;
1550 }
1551 
1552 static void qdev_e1000_reset(DeviceState *dev)
1553 {
1554     E1000State *d = E1000(dev);
1555     e1000_reset(d);
1556 }
1557 
1558 static Property e1000_properties[] = {
1559     DEFINE_NIC_PROPERTIES(E1000State, conf),
1560     DEFINE_PROP_BIT("autonegotiation", E1000State,
1561                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1562     DEFINE_PROP_BIT("mitigation", E1000State,
1563                     compat_flags, E1000_FLAG_MIT_BIT, true),
1564     DEFINE_PROP_END_OF_LIST(),
1565 };
1566 
1567 static void e1000_class_init(ObjectClass *klass, void *data)
1568 {
1569     DeviceClass *dc = DEVICE_CLASS(klass);
1570     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1571 
1572     k->init = pci_e1000_init;
1573     k->exit = pci_e1000_uninit;
1574     k->romfile = "efi-e1000.rom";
1575     k->vendor_id = PCI_VENDOR_ID_INTEL;
1576     k->device_id = E1000_DEVID;
1577     k->revision = 0x03;
1578     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1579     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1580     dc->desc = "Intel Gigabit Ethernet";
1581     dc->reset = qdev_e1000_reset;
1582     dc->vmsd = &vmstate_e1000;
1583     dc->props = e1000_properties;
1584 }
1585 
1586 static const TypeInfo e1000_info = {
1587     .name          = TYPE_E1000,
1588     .parent        = TYPE_PCI_DEVICE,
1589     .instance_size = sizeof(E1000State),
1590     .class_init    = e1000_class_init,
1591 };
1592 
1593 static void e1000_register_types(void)
1594 {
1595     type_register_static(&e1000_info);
1596 }
1597 
1598 type_init(e1000_register_types)
1599