xref: /openbmc/qemu/hw/net/e1000.c (revision 56983463)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "hw/hw.h"
29 #include "hw/pci/pci.h"
30 #include "net/net.h"
31 #include "net/checksum.h"
32 #include "hw/loader.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/dma.h"
35 
36 #include "e1000_regs.h"
37 
38 #define E1000_DEBUG
39 
40 #ifdef E1000_DEBUG
41 enum {
42     DEBUG_GENERAL,	DEBUG_IO,	DEBUG_MMIO,	DEBUG_INTERRUPT,
43     DEBUG_RX,		DEBUG_TX,	DEBUG_MDIC,	DEBUG_EEPROM,
44     DEBUG_UNKNOWN,	DEBUG_TXSUM,	DEBUG_TXERR,	DEBUG_RXERR,
45     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
46 };
47 #define DBGBIT(x)	(1<<DEBUG_##x)
48 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
49 
50 #define	DBGOUT(what, fmt, ...) do { \
51     if (debugflags & DBGBIT(what)) \
52         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
53     } while (0)
54 #else
55 #define	DBGOUT(what, fmt, ...) do {} while (0)
56 #endif
57 
58 #define IOPORT_SIZE       0x40
59 #define PNPMMIO_SIZE      0x20000
60 #define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
61 
62 /* this is the size past which hardware will drop packets when setting LPE=0 */
63 #define MAXIMUM_ETHERNET_VLAN_SIZE 1522
64 /* this is the size past which hardware will drop packets when setting LPE=1 */
65 #define MAXIMUM_ETHERNET_LPE_SIZE 16384
66 
67 /*
68  * HW models:
69  *  E1000_DEV_ID_82540EM works with Windows and Linux
70  *  E1000_DEV_ID_82573L OK with windoze and Linux 2.6.22,
71  *	appears to perform better than 82540EM, but breaks with Linux 2.6.18
72  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
73  *  Others never tested
74  */
75 enum { E1000_DEVID = E1000_DEV_ID_82540EM };
76 
77 /*
78  * May need to specify additional MAC-to-PHY entries --
79  * Intel's Windows driver refuses to initialize unless they match
80  */
81 enum {
82     PHY_ID2_INIT = E1000_DEVID == E1000_DEV_ID_82573L ?		0xcc2 :
83                    E1000_DEVID == E1000_DEV_ID_82544GC_COPPER ?	0xc30 :
84                    /* default to E1000_DEV_ID_82540EM */	0xc20
85 };
86 
87 typedef struct E1000State_st {
88     PCIDevice dev;
89     NICState *nic;
90     NICConf conf;
91     MemoryRegion mmio;
92     MemoryRegion io;
93 
94     uint32_t mac_reg[0x8000];
95     uint16_t phy_reg[0x20];
96     uint16_t eeprom_data[64];
97 
98     uint32_t rxbuf_size;
99     uint32_t rxbuf_min_shift;
100     struct e1000_tx {
101         unsigned char header[256];
102         unsigned char vlan_header[4];
103         /* Fields vlan and data must not be reordered or separated. */
104         unsigned char vlan[4];
105         unsigned char data[0x10000];
106         uint16_t size;
107         unsigned char sum_needed;
108         unsigned char vlan_needed;
109         uint8_t ipcss;
110         uint8_t ipcso;
111         uint16_t ipcse;
112         uint8_t tucss;
113         uint8_t tucso;
114         uint16_t tucse;
115         uint8_t hdr_len;
116         uint16_t mss;
117         uint32_t paylen;
118         uint16_t tso_frames;
119         char tse;
120         int8_t ip;
121         int8_t tcp;
122         char cptse;     // current packet tse bit
123     } tx;
124 
125     struct {
126         uint32_t val_in;	// shifted in from guest driver
127         uint16_t bitnum_in;
128         uint16_t bitnum_out;
129         uint16_t reading;
130         uint32_t old_eecd;
131     } eecd_state;
132 
133     QEMUTimer *autoneg_timer;
134 
135 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
136 #define E1000_FLAG_AUTONEG_BIT 0
137 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
138     uint32_t compat_flags;
139 } E1000State;
140 
141 #define	defreg(x)	x = (E1000_##x>>2)
142 enum {
143     defreg(CTRL),	defreg(EECD),	defreg(EERD),	defreg(GPRC),
144     defreg(GPTC),	defreg(ICR),	defreg(ICS),	defreg(IMC),
145     defreg(IMS),	defreg(LEDCTL),	defreg(MANC),	defreg(MDIC),
146     defreg(MPC),	defreg(PBA),	defreg(RCTL),	defreg(RDBAH),
147     defreg(RDBAL),	defreg(RDH),	defreg(RDLEN),	defreg(RDT),
148     defreg(STATUS),	defreg(SWSM),	defreg(TCTL),	defreg(TDBAH),
149     defreg(TDBAL),	defreg(TDH),	defreg(TDLEN),	defreg(TDT),
150     defreg(TORH),	defreg(TORL),	defreg(TOTH),	defreg(TOTL),
151     defreg(TPR),	defreg(TPT),	defreg(TXDCTL),	defreg(WUFC),
152     defreg(RA),		defreg(MTA),	defreg(CRCERRS),defreg(VFTA),
153     defreg(VET),
154 };
155 
156 static void
157 e1000_link_down(E1000State *s)
158 {
159     s->mac_reg[STATUS] &= ~E1000_STATUS_LU;
160     s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS;
161 }
162 
163 static void
164 e1000_link_up(E1000State *s)
165 {
166     s->mac_reg[STATUS] |= E1000_STATUS_LU;
167     s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
168 }
169 
170 static void
171 set_phy_ctrl(E1000State *s, int index, uint16_t val)
172 {
173     /*
174      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
175      * migrate during auto negotiation, after migration the link will be
176      * down.
177      */
178     if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
179         return;
180     }
181     if ((val & MII_CR_AUTO_NEG_EN) && (val & MII_CR_RESTART_AUTO_NEG)) {
182         e1000_link_down(s);
183         s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE;
184         DBGOUT(PHY, "Start link auto negotiation\n");
185         qemu_mod_timer(s->autoneg_timer, qemu_get_clock_ms(vm_clock) + 500);
186     }
187 }
188 
189 static void
190 e1000_autoneg_timer(void *opaque)
191 {
192     E1000State *s = opaque;
193     if (!qemu_get_queue(s->nic)->link_down) {
194         e1000_link_up(s);
195     }
196     s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
197     DBGOUT(PHY, "Auto negotiation is completed\n");
198 }
199 
200 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
201     [PHY_CTRL] = set_phy_ctrl,
202 };
203 
204 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
205 
206 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
207 static const char phy_regcap[0x20] = {
208     [PHY_STATUS] = PHY_R,	[M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
209     [PHY_ID1] = PHY_R,		[M88E1000_PHY_SPEC_CTRL] = PHY_RW,
210     [PHY_CTRL] = PHY_RW,	[PHY_1000T_CTRL] = PHY_RW,
211     [PHY_LP_ABILITY] = PHY_R,	[PHY_1000T_STATUS] = PHY_R,
212     [PHY_AUTONEG_ADV] = PHY_RW,	[M88E1000_RX_ERR_CNTR] = PHY_R,
213     [PHY_ID2] = PHY_R,		[M88E1000_PHY_SPEC_STATUS] = PHY_R
214 };
215 
216 static const uint16_t phy_reg_init[] = {
217     [PHY_CTRL] = 0x1140,
218     [PHY_STATUS] = 0x794d, /* link initially up with not completed autoneg */
219     [PHY_ID1] = 0x141,				[PHY_ID2] = PHY_ID2_INIT,
220     [PHY_1000T_CTRL] = 0x0e00,			[M88E1000_PHY_SPEC_CTRL] = 0x360,
221     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,	[PHY_AUTONEG_ADV] = 0xde1,
222     [PHY_LP_ABILITY] = 0x1e0,			[PHY_1000T_STATUS] = 0x3c00,
223     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
224 };
225 
226 static const uint32_t mac_reg_init[] = {
227     [PBA] =     0x00100030,
228     [LEDCTL] =  0x602,
229     [CTRL] =    E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
230                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
231     [STATUS] =  0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
232                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
233                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
234                 E1000_STATUS_LU,
235     [MANC] =    E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
236                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
237                 E1000_MANC_RMCP_EN,
238 };
239 
240 static void
241 set_interrupt_cause(E1000State *s, int index, uint32_t val)
242 {
243     if (val && (E1000_DEVID >= E1000_DEV_ID_82547EI_MOBILE)) {
244         /* Only for 8257x */
245         val |= E1000_ICR_INT_ASSERTED;
246     }
247     s->mac_reg[ICR] = val;
248 
249     /*
250      * Make sure ICR and ICS registers have the same value.
251      * The spec says that the ICS register is write-only.  However in practice,
252      * on real hardware ICS is readable, and for reads it has the same value as
253      * ICR (except that ICS does not have the clear on read behaviour of ICR).
254      *
255      * The VxWorks PRO/1000 driver uses this behaviour.
256      */
257     s->mac_reg[ICS] = val;
258 
259     qemu_set_irq(s->dev.irq[0], (s->mac_reg[IMS] & s->mac_reg[ICR]) != 0);
260 }
261 
262 static void
263 set_ics(E1000State *s, int index, uint32_t val)
264 {
265     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
266         s->mac_reg[IMS]);
267     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
268 }
269 
270 static int
271 rxbufsize(uint32_t v)
272 {
273     v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 |
274          E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 |
275          E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256;
276     switch (v) {
277     case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384:
278         return 16384;
279     case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192:
280         return 8192;
281     case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096:
282         return 4096;
283     case E1000_RCTL_SZ_1024:
284         return 1024;
285     case E1000_RCTL_SZ_512:
286         return 512;
287     case E1000_RCTL_SZ_256:
288         return 256;
289     }
290     return 2048;
291 }
292 
293 static void e1000_reset(void *opaque)
294 {
295     E1000State *d = opaque;
296     uint8_t *macaddr = d->conf.macaddr.a;
297     int i;
298 
299     qemu_del_timer(d->autoneg_timer);
300     memset(d->phy_reg, 0, sizeof d->phy_reg);
301     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
302     memset(d->mac_reg, 0, sizeof d->mac_reg);
303     memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
304     d->rxbuf_min_shift = 1;
305     memset(&d->tx, 0, sizeof d->tx);
306 
307     if (qemu_get_queue(d->nic)->link_down) {
308         e1000_link_down(d);
309     }
310 
311     /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */
312     d->mac_reg[RA] = 0;
313     d->mac_reg[RA + 1] = E1000_RAH_AV;
314     for (i = 0; i < 4; i++) {
315         d->mac_reg[RA] |= macaddr[i] << (8 * i);
316         d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0;
317     }
318 }
319 
320 static void
321 set_ctrl(E1000State *s, int index, uint32_t val)
322 {
323     /* RST is self clearing */
324     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
325 }
326 
327 static void
328 set_rx_control(E1000State *s, int index, uint32_t val)
329 {
330     s->mac_reg[RCTL] = val;
331     s->rxbuf_size = rxbufsize(val);
332     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
333     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
334            s->mac_reg[RCTL]);
335     qemu_flush_queued_packets(qemu_get_queue(s->nic));
336 }
337 
338 static void
339 set_mdic(E1000State *s, int index, uint32_t val)
340 {
341     uint32_t data = val & E1000_MDIC_DATA_MASK;
342     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
343 
344     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
345         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
346     else if (val & E1000_MDIC_OP_READ) {
347         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
348         if (!(phy_regcap[addr] & PHY_R)) {
349             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
350             val |= E1000_MDIC_ERROR;
351         } else
352             val = (val ^ data) | s->phy_reg[addr];
353     } else if (val & E1000_MDIC_OP_WRITE) {
354         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
355         if (!(phy_regcap[addr] & PHY_W)) {
356             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
357             val |= E1000_MDIC_ERROR;
358         } else {
359             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
360                 phyreg_writeops[addr](s, index, data);
361             }
362             s->phy_reg[addr] = data;
363         }
364     }
365     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
366 
367     if (val & E1000_MDIC_INT_EN) {
368         set_ics(s, 0, E1000_ICR_MDAC);
369     }
370 }
371 
372 static uint32_t
373 get_eecd(E1000State *s, int index)
374 {
375     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
376 
377     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
378            s->eecd_state.bitnum_out, s->eecd_state.reading);
379     if (!s->eecd_state.reading ||
380         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
381           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
382         ret |= E1000_EECD_DO;
383     return ret;
384 }
385 
386 static void
387 set_eecd(E1000State *s, int index, uint32_t val)
388 {
389     uint32_t oldval = s->eecd_state.old_eecd;
390 
391     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
392             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
393     if (!(E1000_EECD_CS & val))			// CS inactive; nothing to do
394 	return;
395     if (E1000_EECD_CS & (val ^ oldval)) {	// CS rise edge; reset state
396 	s->eecd_state.val_in = 0;
397 	s->eecd_state.bitnum_in = 0;
398 	s->eecd_state.bitnum_out = 0;
399 	s->eecd_state.reading = 0;
400     }
401     if (!(E1000_EECD_SK & (val ^ oldval)))	// no clock edge
402         return;
403     if (!(E1000_EECD_SK & val)) {		// falling edge
404         s->eecd_state.bitnum_out++;
405         return;
406     }
407     s->eecd_state.val_in <<= 1;
408     if (val & E1000_EECD_DI)
409         s->eecd_state.val_in |= 1;
410     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
411         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
412         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
413             EEPROM_READ_OPCODE_MICROWIRE);
414     }
415     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
416            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
417            s->eecd_state.reading);
418 }
419 
420 static uint32_t
421 flash_eerd_read(E1000State *s, int x)
422 {
423     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
424 
425     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
426         return (s->mac_reg[EERD]);
427 
428     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
429         return (E1000_EEPROM_RW_REG_DONE | r);
430 
431     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
432            E1000_EEPROM_RW_REG_DONE | r);
433 }
434 
435 static void
436 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
437 {
438     uint32_t sum;
439 
440     if (cse && cse < n)
441         n = cse + 1;
442     if (sloc < n-1) {
443         sum = net_checksum_add(n-css, data+css);
444         cpu_to_be16wu((uint16_t *)(data + sloc),
445                       net_checksum_finish(sum));
446     }
447 }
448 
449 static inline int
450 vlan_enabled(E1000State *s)
451 {
452     return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0);
453 }
454 
455 static inline int
456 vlan_rx_filter_enabled(E1000State *s)
457 {
458     return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0);
459 }
460 
461 static inline int
462 is_vlan_packet(E1000State *s, const uint8_t *buf)
463 {
464     return (be16_to_cpup((uint16_t *)(buf + 12)) ==
465                 le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
466 }
467 
468 static inline int
469 is_vlan_txd(uint32_t txd_lower)
470 {
471     return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
472 }
473 
474 /* FCS aka Ethernet CRC-32. We don't get it from backends and can't
475  * fill it in, just pad descriptor length by 4 bytes unless guest
476  * told us to strip it off the packet. */
477 static inline int
478 fcs_len(E1000State *s)
479 {
480     return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
481 }
482 
483 static void
484 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
485 {
486     NetClientState *nc = qemu_get_queue(s->nic);
487     if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
488         nc->info->receive(nc, buf, size);
489     } else {
490         qemu_send_packet(nc, buf, size);
491     }
492 }
493 
494 static void
495 xmit_seg(E1000State *s)
496 {
497     uint16_t len, *sp;
498     unsigned int frames = s->tx.tso_frames, css, sofar, n;
499     struct e1000_tx *tp = &s->tx;
500 
501     if (tp->tse && tp->cptse) {
502         css = tp->ipcss;
503         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
504                frames, tp->size, css);
505         if (tp->ip) {		// IPv4
506             cpu_to_be16wu((uint16_t *)(tp->data+css+2),
507                           tp->size - css);
508             cpu_to_be16wu((uint16_t *)(tp->data+css+4),
509                           be16_to_cpup((uint16_t *)(tp->data+css+4))+frames);
510         } else			// IPv6
511             cpu_to_be16wu((uint16_t *)(tp->data+css+4),
512                           tp->size - css);
513         css = tp->tucss;
514         len = tp->size - css;
515         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len);
516         if (tp->tcp) {
517             sofar = frames * tp->mss;
518             cpu_to_be32wu((uint32_t *)(tp->data+css+4),	// seq
519                 be32_to_cpupu((uint32_t *)(tp->data+css+4))+sofar);
520             if (tp->paylen - sofar > tp->mss)
521                 tp->data[css + 13] &= ~9;		// PSH, FIN
522         } else	// UDP
523             cpu_to_be16wu((uint16_t *)(tp->data+css+4), len);
524         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
525             unsigned int phsum;
526             // add pseudo-header length before checksum calculation
527             sp = (uint16_t *)(tp->data + tp->tucso);
528             phsum = be16_to_cpup(sp) + len;
529             phsum = (phsum >> 16) + (phsum & 0xffff);
530             cpu_to_be16wu(sp, phsum);
531         }
532         tp->tso_frames++;
533     }
534 
535     if (tp->sum_needed & E1000_TXD_POPTS_TXSM)
536         putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse);
537     if (tp->sum_needed & E1000_TXD_POPTS_IXSM)
538         putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse);
539     if (tp->vlan_needed) {
540         memmove(tp->vlan, tp->data, 4);
541         memmove(tp->data, tp->data + 4, 8);
542         memcpy(tp->data + 8, tp->vlan_header, 4);
543         e1000_send_packet(s, tp->vlan, tp->size + 4);
544     } else
545         e1000_send_packet(s, tp->data, tp->size);
546     s->mac_reg[TPT]++;
547     s->mac_reg[GPTC]++;
548     n = s->mac_reg[TOTL];
549     if ((s->mac_reg[TOTL] += s->tx.size) < n)
550         s->mac_reg[TOTH]++;
551 }
552 
553 static void
554 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
555 {
556     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
557     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
558     unsigned int split_size = txd_lower & 0xffff, bytes, sz, op;
559     unsigned int msh = 0xfffff;
560     uint64_t addr;
561     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
562     struct e1000_tx *tp = &s->tx;
563 
564     if (dtype == E1000_TXD_CMD_DEXT) {	// context descriptor
565         op = le32_to_cpu(xp->cmd_and_length);
566         tp->ipcss = xp->lower_setup.ip_fields.ipcss;
567         tp->ipcso = xp->lower_setup.ip_fields.ipcso;
568         tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse);
569         tp->tucss = xp->upper_setup.tcp_fields.tucss;
570         tp->tucso = xp->upper_setup.tcp_fields.tucso;
571         tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse);
572         tp->paylen = op & 0xfffff;
573         tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len;
574         tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss);
575         tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0;
576         tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
577         tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
578         tp->tso_frames = 0;
579         if (tp->tucso == 0) {	// this is probably wrong
580             DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
581             tp->tucso = tp->tucss + (tp->tcp ? 16 : 6);
582         }
583         return;
584     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
585         // data descriptor
586         if (tp->size == 0) {
587             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
588         }
589         tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0;
590     } else {
591         // legacy descriptor
592         tp->cptse = 0;
593     }
594 
595     if (vlan_enabled(s) && is_vlan_txd(txd_lower) &&
596         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
597         tp->vlan_needed = 1;
598         cpu_to_be16wu((uint16_t *)(tp->vlan_header),
599                       le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
600         cpu_to_be16wu((uint16_t *)(tp->vlan_header + 2),
601                       le16_to_cpu(dp->upper.fields.special));
602     }
603 
604     addr = le64_to_cpu(dp->buffer_addr);
605     if (tp->tse && tp->cptse) {
606         msh = tp->hdr_len + tp->mss;
607         do {
608             bytes = split_size;
609             if (tp->size + bytes > msh)
610                 bytes = msh - tp->size;
611 
612             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
613             pci_dma_read(&s->dev, addr, tp->data + tp->size, bytes);
614             sz = tp->size + bytes;
615             if (sz >= tp->hdr_len && tp->size < tp->hdr_len) {
616                 memmove(tp->header, tp->data, tp->hdr_len);
617             }
618             tp->size = sz;
619             addr += bytes;
620             if (sz == msh) {
621                 xmit_seg(s);
622                 memmove(tp->data, tp->header, tp->hdr_len);
623                 tp->size = tp->hdr_len;
624             }
625         } while (split_size -= bytes);
626     } else if (!tp->tse && tp->cptse) {
627         // context descriptor TSE is not set, while data descriptor TSE is set
628         DBGOUT(TXERR, "TCP segmentation error\n");
629     } else {
630         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
631         pci_dma_read(&s->dev, addr, tp->data + tp->size, split_size);
632         tp->size += split_size;
633     }
634 
635     if (!(txd_lower & E1000_TXD_CMD_EOP))
636         return;
637     if (!(tp->tse && tp->cptse && tp->size < tp->hdr_len)) {
638         xmit_seg(s);
639     }
640     tp->tso_frames = 0;
641     tp->sum_needed = 0;
642     tp->vlan_needed = 0;
643     tp->size = 0;
644     tp->cptse = 0;
645 }
646 
647 static uint32_t
648 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
649 {
650     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
651 
652     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
653         return 0;
654     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
655                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
656     dp->upper.data = cpu_to_le32(txd_upper);
657     pci_dma_write(&s->dev, base + ((char *)&dp->upper - (char *)dp),
658                   &dp->upper, sizeof(dp->upper));
659     return E1000_ICR_TXDW;
660 }
661 
662 static uint64_t tx_desc_base(E1000State *s)
663 {
664     uint64_t bah = s->mac_reg[TDBAH];
665     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
666 
667     return (bah << 32) + bal;
668 }
669 
670 static void
671 start_xmit(E1000State *s)
672 {
673     dma_addr_t base;
674     struct e1000_tx_desc desc;
675     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
676 
677     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
678         DBGOUT(TX, "tx disabled\n");
679         return;
680     }
681 
682     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
683         base = tx_desc_base(s) +
684                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
685         pci_dma_read(&s->dev, base, &desc, sizeof(desc));
686 
687         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
688                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
689                desc.upper.data);
690 
691         process_tx_desc(s, &desc);
692         cause |= txdesc_writeback(s, base, &desc);
693 
694         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
695             s->mac_reg[TDH] = 0;
696         /*
697          * the following could happen only if guest sw assigns
698          * bogus values to TDT/TDLEN.
699          * there's nothing too intelligent we could do about this.
700          */
701         if (s->mac_reg[TDH] == tdh_start) {
702             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
703                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
704             break;
705         }
706     }
707     set_ics(s, 0, cause);
708 }
709 
710 static int
711 receive_filter(E1000State *s, const uint8_t *buf, int size)
712 {
713     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
714     static const int mta_shift[] = {4, 3, 2, 0};
715     uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp;
716 
717     if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) {
718         uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14));
719         uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) +
720                                      ((vid >> 5) & 0x7f));
721         if ((vfta & (1 << (vid & 0x1f))) == 0)
722             return 0;
723     }
724 
725     if (rctl & E1000_RCTL_UPE)			// promiscuous
726         return 1;
727 
728     if ((buf[0] & 1) && (rctl & E1000_RCTL_MPE))	// promiscuous mcast
729         return 1;
730 
731     if ((rctl & E1000_RCTL_BAM) && !memcmp(buf, bcast, sizeof bcast))
732         return 1;
733 
734     for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) {
735         if (!(rp[1] & E1000_RAH_AV))
736             continue;
737         ra[0] = cpu_to_le32(rp[0]);
738         ra[1] = cpu_to_le32(rp[1]);
739         if (!memcmp(buf, (uint8_t *)ra, 6)) {
740             DBGOUT(RXFILTER,
741                    "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n",
742                    (int)(rp - s->mac_reg - RA)/2,
743                    buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
744             return 1;
745         }
746     }
747     DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n",
748            buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
749 
750     f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
751     f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
752     if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f)))
753         return 1;
754     DBGOUT(RXFILTER,
755            "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n",
756            buf[0], buf[1], buf[2], buf[3], buf[4], buf[5],
757            (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5,
758            s->mac_reg[MTA + (f >> 5)]);
759 
760     return 0;
761 }
762 
763 static void
764 e1000_set_link_status(NetClientState *nc)
765 {
766     E1000State *s = qemu_get_nic_opaque(nc);
767     uint32_t old_status = s->mac_reg[STATUS];
768 
769     if (nc->link_down) {
770         e1000_link_down(s);
771     } else {
772         e1000_link_up(s);
773     }
774 
775     if (s->mac_reg[STATUS] != old_status)
776         set_ics(s, 0, E1000_ICR_LSC);
777 }
778 
779 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
780 {
781     int bufs;
782     /* Fast-path short packets */
783     if (total_size <= s->rxbuf_size) {
784         return s->mac_reg[RDH] != s->mac_reg[RDT];
785     }
786     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
787         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
788     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
789         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
790             s->mac_reg[RDT] - s->mac_reg[RDH];
791     } else {
792         return false;
793     }
794     return total_size <= bufs * s->rxbuf_size;
795 }
796 
797 static int
798 e1000_can_receive(NetClientState *nc)
799 {
800     E1000State *s = qemu_get_nic_opaque(nc);
801 
802     return (s->mac_reg[STATUS] & E1000_STATUS_LU) &&
803         (s->mac_reg[RCTL] & E1000_RCTL_EN) && e1000_has_rxbufs(s, 1);
804 }
805 
806 static uint64_t rx_desc_base(E1000State *s)
807 {
808     uint64_t bah = s->mac_reg[RDBAH];
809     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
810 
811     return (bah << 32) + bal;
812 }
813 
814 static ssize_t
815 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
816 {
817     E1000State *s = qemu_get_nic_opaque(nc);
818     struct e1000_rx_desc desc;
819     dma_addr_t base;
820     unsigned int n, rdt;
821     uint32_t rdh_start;
822     uint16_t vlan_special = 0;
823     uint8_t vlan_status = 0, vlan_offset = 0;
824     uint8_t min_buf[MIN_BUF_SIZE];
825     size_t desc_offset;
826     size_t desc_size;
827     size_t total_size;
828 
829     if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) {
830         return -1;
831     }
832 
833     if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) {
834         return -1;
835     }
836 
837     /* Pad to minimum Ethernet frame length */
838     if (size < sizeof(min_buf)) {
839         memcpy(min_buf, buf, size);
840         memset(&min_buf[size], 0, sizeof(min_buf) - size);
841         buf = min_buf;
842         size = sizeof(min_buf);
843     }
844 
845     /* Discard oversized packets if !LPE and !SBP. */
846     if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
847         (size > MAXIMUM_ETHERNET_VLAN_SIZE
848         && !(s->mac_reg[RCTL] & E1000_RCTL_LPE)))
849         && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) {
850         return size;
851     }
852 
853     if (!receive_filter(s, buf, size))
854         return size;
855 
856     if (vlan_enabled(s) && is_vlan_packet(s, buf)) {
857         vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(buf + 14)));
858         memmove((uint8_t *)buf + 4, buf, 12);
859         vlan_status = E1000_RXD_STAT_VP;
860         vlan_offset = 4;
861         size -= 4;
862     }
863 
864     rdh_start = s->mac_reg[RDH];
865     desc_offset = 0;
866     total_size = size + fcs_len(s);
867     if (!e1000_has_rxbufs(s, total_size)) {
868             set_ics(s, 0, E1000_ICS_RXO);
869             return -1;
870     }
871     do {
872         desc_size = total_size - desc_offset;
873         if (desc_size > s->rxbuf_size) {
874             desc_size = s->rxbuf_size;
875         }
876         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
877         pci_dma_read(&s->dev, base, &desc, sizeof(desc));
878         desc.special = vlan_special;
879         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
880         if (desc.buffer_addr) {
881             if (desc_offset < size) {
882                 size_t copy_size = size - desc_offset;
883                 if (copy_size > s->rxbuf_size) {
884                     copy_size = s->rxbuf_size;
885                 }
886                 pci_dma_write(&s->dev, le64_to_cpu(desc.buffer_addr),
887                               buf + desc_offset + vlan_offset, copy_size);
888             }
889             desc_offset += desc_size;
890             desc.length = cpu_to_le16(desc_size);
891             if (desc_offset >= total_size) {
892                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
893             } else {
894                 /* Guest zeroing out status is not a hardware requirement.
895                    Clear EOP in case guest didn't do it. */
896                 desc.status &= ~E1000_RXD_STAT_EOP;
897             }
898         } else { // as per intel docs; skip descriptors with null buf addr
899             DBGOUT(RX, "Null RX descriptor!!\n");
900         }
901         pci_dma_write(&s->dev, base, &desc, sizeof(desc));
902 
903         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
904             s->mac_reg[RDH] = 0;
905         /* see comment in start_xmit; same here */
906         if (s->mac_reg[RDH] == rdh_start) {
907             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
908                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
909             set_ics(s, 0, E1000_ICS_RXO);
910             return -1;
911         }
912     } while (desc_offset < total_size);
913 
914     s->mac_reg[GPRC]++;
915     s->mac_reg[TPR]++;
916     /* TOR - Total Octets Received:
917      * This register includes bytes received in a packet from the <Destination
918      * Address> field through the <CRC> field, inclusively.
919      */
920     n = s->mac_reg[TORL] + size + /* Always include FCS length. */ 4;
921     if (n < s->mac_reg[TORL])
922         s->mac_reg[TORH]++;
923     s->mac_reg[TORL] = n;
924 
925     n = E1000_ICS_RXT0;
926     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
927         rdt += s->mac_reg[RDLEN] / sizeof(desc);
928     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
929         s->rxbuf_min_shift)
930         n |= E1000_ICS_RXDMT0;
931 
932     set_ics(s, 0, n);
933 
934     return size;
935 }
936 
937 static uint32_t
938 mac_readreg(E1000State *s, int index)
939 {
940     return s->mac_reg[index];
941 }
942 
943 static uint32_t
944 mac_icr_read(E1000State *s, int index)
945 {
946     uint32_t ret = s->mac_reg[ICR];
947 
948     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
949     set_interrupt_cause(s, 0, 0);
950     return ret;
951 }
952 
953 static uint32_t
954 mac_read_clr4(E1000State *s, int index)
955 {
956     uint32_t ret = s->mac_reg[index];
957 
958     s->mac_reg[index] = 0;
959     return ret;
960 }
961 
962 static uint32_t
963 mac_read_clr8(E1000State *s, int index)
964 {
965     uint32_t ret = s->mac_reg[index];
966 
967     s->mac_reg[index] = 0;
968     s->mac_reg[index-1] = 0;
969     return ret;
970 }
971 
972 static void
973 mac_writereg(E1000State *s, int index, uint32_t val)
974 {
975     s->mac_reg[index] = val;
976 }
977 
978 static void
979 set_rdt(E1000State *s, int index, uint32_t val)
980 {
981     s->mac_reg[index] = val & 0xffff;
982     if (e1000_has_rxbufs(s, 1)) {
983         qemu_flush_queued_packets(qemu_get_queue(s->nic));
984     }
985 }
986 
987 static void
988 set_16bit(E1000State *s, int index, uint32_t val)
989 {
990     s->mac_reg[index] = val & 0xffff;
991 }
992 
993 static void
994 set_dlen(E1000State *s, int index, uint32_t val)
995 {
996     s->mac_reg[index] = val & 0xfff80;
997 }
998 
999 static void
1000 set_tctl(E1000State *s, int index, uint32_t val)
1001 {
1002     s->mac_reg[index] = val;
1003     s->mac_reg[TDT] &= 0xffff;
1004     start_xmit(s);
1005 }
1006 
1007 static void
1008 set_icr(E1000State *s, int index, uint32_t val)
1009 {
1010     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1011     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1012 }
1013 
1014 static void
1015 set_imc(E1000State *s, int index, uint32_t val)
1016 {
1017     s->mac_reg[IMS] &= ~val;
1018     set_ics(s, 0, 0);
1019 }
1020 
1021 static void
1022 set_ims(E1000State *s, int index, uint32_t val)
1023 {
1024     s->mac_reg[IMS] |= val;
1025     set_ics(s, 0, 0);
1026 }
1027 
1028 #define getreg(x)	[x] = mac_readreg
1029 static uint32_t (*macreg_readops[])(E1000State *, int) = {
1030     getreg(PBA),	getreg(RCTL),	getreg(TDH),	getreg(TXDCTL),
1031     getreg(WUFC),	getreg(TDT),	getreg(CTRL),	getreg(LEDCTL),
1032     getreg(MANC),	getreg(MDIC),	getreg(SWSM),	getreg(STATUS),
1033     getreg(TORL),	getreg(TOTL),	getreg(IMS),	getreg(TCTL),
1034     getreg(RDH),	getreg(RDT),	getreg(VET),	getreg(ICS),
1035     getreg(TDBAL),	getreg(TDBAH),	getreg(RDBAH),	getreg(RDBAL),
1036     getreg(TDLEN),	getreg(RDLEN),
1037 
1038     [TOTH] = mac_read_clr8,	[TORH] = mac_read_clr8,	[GPRC] = mac_read_clr4,
1039     [GPTC] = mac_read_clr4,	[TPR] = mac_read_clr4,	[TPT] = mac_read_clr4,
1040     [ICR] = mac_icr_read,	[EECD] = get_eecd,	[EERD] = flash_eerd_read,
1041     [CRCERRS ... MPC] = &mac_readreg,
1042     [RA ... RA+31] = &mac_readreg,
1043     [MTA ... MTA+127] = &mac_readreg,
1044     [VFTA ... VFTA+127] = &mac_readreg,
1045 };
1046 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1047 
1048 #define putreg(x)	[x] = mac_writereg
1049 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1050     putreg(PBA),	putreg(EERD),	putreg(SWSM),	putreg(WUFC),
1051     putreg(TDBAL),	putreg(TDBAH),	putreg(TXDCTL),	putreg(RDBAH),
1052     putreg(RDBAL),	putreg(LEDCTL), putreg(VET),
1053     [TDLEN] = set_dlen,	[RDLEN] = set_dlen,	[TCTL] = set_tctl,
1054     [TDT] = set_tctl,	[MDIC] = set_mdic,	[ICS] = set_ics,
1055     [TDH] = set_16bit,	[RDH] = set_16bit,	[RDT] = set_rdt,
1056     [IMC] = set_imc,	[IMS] = set_ims,	[ICR] = set_icr,
1057     [EECD] = set_eecd,	[RCTL] = set_rx_control, [CTRL] = set_ctrl,
1058     [RA ... RA+31] = &mac_writereg,
1059     [MTA ... MTA+127] = &mac_writereg,
1060     [VFTA ... VFTA+127] = &mac_writereg,
1061 };
1062 
1063 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1064 
1065 static void
1066 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1067                  unsigned size)
1068 {
1069     E1000State *s = opaque;
1070     unsigned int index = (addr & 0x1ffff) >> 2;
1071 
1072     if (index < NWRITEOPS && macreg_writeops[index]) {
1073         macreg_writeops[index](s, index, val);
1074     } else if (index < NREADOPS && macreg_readops[index]) {
1075         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n", index<<2, val);
1076     } else {
1077         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1078                index<<2, val);
1079     }
1080 }
1081 
1082 static uint64_t
1083 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1084 {
1085     E1000State *s = opaque;
1086     unsigned int index = (addr & 0x1ffff) >> 2;
1087 
1088     if (index < NREADOPS && macreg_readops[index])
1089     {
1090         return macreg_readops[index](s, index);
1091     }
1092     DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1093     return 0;
1094 }
1095 
1096 static const MemoryRegionOps e1000_mmio_ops = {
1097     .read = e1000_mmio_read,
1098     .write = e1000_mmio_write,
1099     .endianness = DEVICE_LITTLE_ENDIAN,
1100     .impl = {
1101         .min_access_size = 4,
1102         .max_access_size = 4,
1103     },
1104 };
1105 
1106 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1107                               unsigned size)
1108 {
1109     E1000State *s = opaque;
1110 
1111     (void)s;
1112     return 0;
1113 }
1114 
1115 static void e1000_io_write(void *opaque, hwaddr addr,
1116                            uint64_t val, unsigned size)
1117 {
1118     E1000State *s = opaque;
1119 
1120     (void)s;
1121 }
1122 
1123 static const MemoryRegionOps e1000_io_ops = {
1124     .read = e1000_io_read,
1125     .write = e1000_io_write,
1126     .endianness = DEVICE_LITTLE_ENDIAN,
1127 };
1128 
1129 static bool is_version_1(void *opaque, int version_id)
1130 {
1131     return version_id == 1;
1132 }
1133 
1134 static void e1000_pre_save(void *opaque)
1135 {
1136     E1000State *s = opaque;
1137     NetClientState *nc = qemu_get_queue(s->nic);
1138 
1139     if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
1140         return;
1141     }
1142 
1143     /*
1144      * If link is down and auto-negotiation is ongoing, complete
1145      * auto-negotiation immediately.  This allows is to look at
1146      * MII_SR_AUTONEG_COMPLETE to infer link status on load.
1147      */
1148     if (nc->link_down &&
1149         s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN &&
1150         s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG) {
1151          s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1152     }
1153 }
1154 
1155 static int e1000_post_load(void *opaque, int version_id)
1156 {
1157     E1000State *s = opaque;
1158     NetClientState *nc = qemu_get_queue(s->nic);
1159 
1160     /* nc.link_down can't be migrated, so infer link_down according
1161      * to link status bit in mac_reg[STATUS].
1162      * Alternatively, restart link negotiation if it was in progress. */
1163     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1164 
1165     if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
1166         return 0;
1167     }
1168 
1169     if (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN &&
1170         s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG &&
1171         !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1172         nc->link_down = false;
1173         qemu_mod_timer(s->autoneg_timer, qemu_get_clock_ms(vm_clock) + 500);
1174     }
1175 
1176     return 0;
1177 }
1178 
1179 static const VMStateDescription vmstate_e1000 = {
1180     .name = "e1000",
1181     .version_id = 2,
1182     .minimum_version_id = 1,
1183     .minimum_version_id_old = 1,
1184     .pre_save = e1000_pre_save,
1185     .post_load = e1000_post_load,
1186     .fields      = (VMStateField []) {
1187         VMSTATE_PCI_DEVICE(dev, E1000State),
1188         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1189         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1190         VMSTATE_UINT32(rxbuf_size, E1000State),
1191         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1192         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1193         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1194         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1195         VMSTATE_UINT16(eecd_state.reading, E1000State),
1196         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1197         VMSTATE_UINT8(tx.ipcss, E1000State),
1198         VMSTATE_UINT8(tx.ipcso, E1000State),
1199         VMSTATE_UINT16(tx.ipcse, E1000State),
1200         VMSTATE_UINT8(tx.tucss, E1000State),
1201         VMSTATE_UINT8(tx.tucso, E1000State),
1202         VMSTATE_UINT16(tx.tucse, E1000State),
1203         VMSTATE_UINT32(tx.paylen, E1000State),
1204         VMSTATE_UINT8(tx.hdr_len, E1000State),
1205         VMSTATE_UINT16(tx.mss, E1000State),
1206         VMSTATE_UINT16(tx.size, E1000State),
1207         VMSTATE_UINT16(tx.tso_frames, E1000State),
1208         VMSTATE_UINT8(tx.sum_needed, E1000State),
1209         VMSTATE_INT8(tx.ip, E1000State),
1210         VMSTATE_INT8(tx.tcp, E1000State),
1211         VMSTATE_BUFFER(tx.header, E1000State),
1212         VMSTATE_BUFFER(tx.data, E1000State),
1213         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1214         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1215         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1216         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1217         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1218         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1219         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1220         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1221         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1222         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1223         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1224         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1225         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1226         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1227         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1228         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1229         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1230         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1231         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1232         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1233         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1234         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1235         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1236         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1237         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1238         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1239         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1240         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1241         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1242         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1243         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1244         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1245         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1246         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1247         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1248         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1249         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1250         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1251         VMSTATE_UINT32(mac_reg[VET], E1000State),
1252         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1253         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1254         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1255         VMSTATE_END_OF_LIST()
1256     }
1257 };
1258 
1259 static const uint16_t e1000_eeprom_template[64] = {
1260     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1261     0x3000, 0x1000, 0x6403, E1000_DEVID, 0x8086, E1000_DEVID, 0x8086, 0x3040,
1262     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1263     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1264     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1265     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1266     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1267     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1268 };
1269 
1270 /* PCI interface */
1271 
1272 static void
1273 e1000_mmio_setup(E1000State *d)
1274 {
1275     int i;
1276     const uint32_t excluded_regs[] = {
1277         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1278         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1279     };
1280 
1281     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1282                           "e1000-mmio", PNPMMIO_SIZE);
1283     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1284     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1285         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1286                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1287     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1288 }
1289 
1290 static void
1291 e1000_cleanup(NetClientState *nc)
1292 {
1293     E1000State *s = qemu_get_nic_opaque(nc);
1294 
1295     s->nic = NULL;
1296 }
1297 
1298 static void
1299 pci_e1000_uninit(PCIDevice *dev)
1300 {
1301     E1000State *d = DO_UPCAST(E1000State, dev, dev);
1302 
1303     qemu_del_timer(d->autoneg_timer);
1304     qemu_free_timer(d->autoneg_timer);
1305     memory_region_destroy(&d->mmio);
1306     memory_region_destroy(&d->io);
1307     qemu_del_nic(d->nic);
1308 }
1309 
1310 static NetClientInfo net_e1000_info = {
1311     .type = NET_CLIENT_OPTIONS_KIND_NIC,
1312     .size = sizeof(NICState),
1313     .can_receive = e1000_can_receive,
1314     .receive = e1000_receive,
1315     .cleanup = e1000_cleanup,
1316     .link_status_changed = e1000_set_link_status,
1317 };
1318 
1319 static int pci_e1000_init(PCIDevice *pci_dev)
1320 {
1321     E1000State *d = DO_UPCAST(E1000State, dev, pci_dev);
1322     uint8_t *pci_conf;
1323     uint16_t checksum = 0;
1324     int i;
1325     uint8_t *macaddr;
1326 
1327     pci_conf = d->dev.config;
1328 
1329     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1330     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1331 
1332     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1333 
1334     e1000_mmio_setup(d);
1335 
1336     pci_register_bar(&d->dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1337 
1338     pci_register_bar(&d->dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1339 
1340     memmove(d->eeprom_data, e1000_eeprom_template,
1341         sizeof e1000_eeprom_template);
1342     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1343     macaddr = d->conf.macaddr.a;
1344     for (i = 0; i < 3; i++)
1345         d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i];
1346     for (i = 0; i < EEPROM_CHECKSUM_REG; i++)
1347         checksum += d->eeprom_data[i];
1348     checksum = (uint16_t) EEPROM_SUM - checksum;
1349     d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum;
1350 
1351     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1352                           object_get_typename(OBJECT(d)), d->dev.qdev.id, d);
1353 
1354     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1355 
1356     add_boot_device_path(d->conf.bootindex, &pci_dev->qdev, "/ethernet-phy@0");
1357 
1358     d->autoneg_timer = qemu_new_timer_ms(vm_clock, e1000_autoneg_timer, d);
1359 
1360     return 0;
1361 }
1362 
1363 static void qdev_e1000_reset(DeviceState *dev)
1364 {
1365     E1000State *d = DO_UPCAST(E1000State, dev.qdev, dev);
1366     e1000_reset(d);
1367 }
1368 
1369 static Property e1000_properties[] = {
1370     DEFINE_NIC_PROPERTIES(E1000State, conf),
1371     DEFINE_PROP_BIT("autonegotiation", E1000State,
1372                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1373     DEFINE_PROP_END_OF_LIST(),
1374 };
1375 
1376 static void e1000_class_init(ObjectClass *klass, void *data)
1377 {
1378     DeviceClass *dc = DEVICE_CLASS(klass);
1379     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1380 
1381     k->init = pci_e1000_init;
1382     k->exit = pci_e1000_uninit;
1383     k->romfile = "efi-e1000.rom";
1384     k->vendor_id = PCI_VENDOR_ID_INTEL;
1385     k->device_id = E1000_DEVID;
1386     k->revision = 0x03;
1387     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1388     dc->desc = "Intel Gigabit Ethernet";
1389     dc->reset = qdev_e1000_reset;
1390     dc->vmsd = &vmstate_e1000;
1391     dc->props = e1000_properties;
1392 }
1393 
1394 static const TypeInfo e1000_info = {
1395     .name          = "e1000",
1396     .parent        = TYPE_PCI_DEVICE,
1397     .instance_size = sizeof(E1000State),
1398     .class_init    = e1000_class_init,
1399 };
1400 
1401 static void e1000_register_types(void)
1402 {
1403     type_register_static(&e1000_info);
1404 }
1405 
1406 type_init(e1000_register_types)
1407