xref: /openbmc/qemu/hw/net/e1000.c (revision 56c4bfb3)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "hw/hw.h"
29 #include "hw/pci/pci.h"
30 #include "net/net.h"
31 #include "net/checksum.h"
32 #include "hw/loader.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/dma.h"
35 
36 #include "e1000_regs.h"
37 
38 #define E1000_DEBUG
39 
40 #ifdef E1000_DEBUG
41 enum {
42     DEBUG_GENERAL,	DEBUG_IO,	DEBUG_MMIO,	DEBUG_INTERRUPT,
43     DEBUG_RX,		DEBUG_TX,	DEBUG_MDIC,	DEBUG_EEPROM,
44     DEBUG_UNKNOWN,	DEBUG_TXSUM,	DEBUG_TXERR,	DEBUG_RXERR,
45     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
46 };
47 #define DBGBIT(x)	(1<<DEBUG_##x)
48 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
49 
50 #define	DBGOUT(what, fmt, ...) do { \
51     if (debugflags & DBGBIT(what)) \
52         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
53     } while (0)
54 #else
55 #define	DBGOUT(what, fmt, ...) do {} while (0)
56 #endif
57 
58 #define IOPORT_SIZE       0x40
59 #define PNPMMIO_SIZE      0x20000
60 #define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
61 
62 /* this is the size past which hardware will drop packets when setting LPE=0 */
63 #define MAXIMUM_ETHERNET_VLAN_SIZE 1522
64 /* this is the size past which hardware will drop packets when setting LPE=1 */
65 #define MAXIMUM_ETHERNET_LPE_SIZE 16384
66 
67 /*
68  * HW models:
69  *  E1000_DEV_ID_82540EM works with Windows and Linux
70  *  E1000_DEV_ID_82573L OK with windoze and Linux 2.6.22,
71  *	appears to perform better than 82540EM, but breaks with Linux 2.6.18
72  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
73  *  Others never tested
74  */
75 enum { E1000_DEVID = E1000_DEV_ID_82540EM };
76 
77 /*
78  * May need to specify additional MAC-to-PHY entries --
79  * Intel's Windows driver refuses to initialize unless they match
80  */
81 enum {
82     PHY_ID2_INIT = E1000_DEVID == E1000_DEV_ID_82573L ?		0xcc2 :
83                    E1000_DEVID == E1000_DEV_ID_82544GC_COPPER ?	0xc30 :
84                    /* default to E1000_DEV_ID_82540EM */	0xc20
85 };
86 
87 typedef struct E1000State_st {
88     /*< private >*/
89     PCIDevice parent_obj;
90     /*< public >*/
91 
92     NICState *nic;
93     NICConf conf;
94     MemoryRegion mmio;
95     MemoryRegion io;
96 
97     uint32_t mac_reg[0x8000];
98     uint16_t phy_reg[0x20];
99     uint16_t eeprom_data[64];
100 
101     uint32_t rxbuf_size;
102     uint32_t rxbuf_min_shift;
103     struct e1000_tx {
104         unsigned char header[256];
105         unsigned char vlan_header[4];
106         /* Fields vlan and data must not be reordered or separated. */
107         unsigned char vlan[4];
108         unsigned char data[0x10000];
109         uint16_t size;
110         unsigned char sum_needed;
111         unsigned char vlan_needed;
112         uint8_t ipcss;
113         uint8_t ipcso;
114         uint16_t ipcse;
115         uint8_t tucss;
116         uint8_t tucso;
117         uint16_t tucse;
118         uint8_t hdr_len;
119         uint16_t mss;
120         uint32_t paylen;
121         uint16_t tso_frames;
122         char tse;
123         int8_t ip;
124         int8_t tcp;
125         char cptse;     // current packet tse bit
126     } tx;
127 
128     struct {
129         uint32_t val_in;	// shifted in from guest driver
130         uint16_t bitnum_in;
131         uint16_t bitnum_out;
132         uint16_t reading;
133         uint32_t old_eecd;
134     } eecd_state;
135 
136     QEMUTimer *autoneg_timer;
137 
138 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
139 #define E1000_FLAG_AUTONEG_BIT 0
140 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
141     uint32_t compat_flags;
142 } E1000State;
143 
144 #define TYPE_E1000 "e1000"
145 
146 #define E1000(obj) \
147     OBJECT_CHECK(E1000State, (obj), TYPE_E1000)
148 
149 #define	defreg(x)	x = (E1000_##x>>2)
150 enum {
151     defreg(CTRL),	defreg(EECD),	defreg(EERD),	defreg(GPRC),
152     defreg(GPTC),	defreg(ICR),	defreg(ICS),	defreg(IMC),
153     defreg(IMS),	defreg(LEDCTL),	defreg(MANC),	defreg(MDIC),
154     defreg(MPC),	defreg(PBA),	defreg(RCTL),	defreg(RDBAH),
155     defreg(RDBAL),	defreg(RDH),	defreg(RDLEN),	defreg(RDT),
156     defreg(STATUS),	defreg(SWSM),	defreg(TCTL),	defreg(TDBAH),
157     defreg(TDBAL),	defreg(TDH),	defreg(TDLEN),	defreg(TDT),
158     defreg(TORH),	defreg(TORL),	defreg(TOTH),	defreg(TOTL),
159     defreg(TPR),	defreg(TPT),	defreg(TXDCTL),	defreg(WUFC),
160     defreg(RA),		defreg(MTA),	defreg(CRCERRS),defreg(VFTA),
161     defreg(VET),
162 };
163 
164 static void
165 e1000_link_down(E1000State *s)
166 {
167     s->mac_reg[STATUS] &= ~E1000_STATUS_LU;
168     s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS;
169 }
170 
171 static void
172 e1000_link_up(E1000State *s)
173 {
174     s->mac_reg[STATUS] |= E1000_STATUS_LU;
175     s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
176 }
177 
178 static void
179 set_phy_ctrl(E1000State *s, int index, uint16_t val)
180 {
181     /*
182      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
183      * migrate during auto negotiation, after migration the link will be
184      * down.
185      */
186     if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
187         return;
188     }
189     if ((val & MII_CR_AUTO_NEG_EN) && (val & MII_CR_RESTART_AUTO_NEG)) {
190         e1000_link_down(s);
191         s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE;
192         DBGOUT(PHY, "Start link auto negotiation\n");
193         qemu_mod_timer(s->autoneg_timer, qemu_get_clock_ms(vm_clock) + 500);
194     }
195 }
196 
197 static void
198 e1000_autoneg_timer(void *opaque)
199 {
200     E1000State *s = opaque;
201     if (!qemu_get_queue(s->nic)->link_down) {
202         e1000_link_up(s);
203     }
204     s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
205     DBGOUT(PHY, "Auto negotiation is completed\n");
206 }
207 
208 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
209     [PHY_CTRL] = set_phy_ctrl,
210 };
211 
212 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
213 
214 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
215 static const char phy_regcap[0x20] = {
216     [PHY_STATUS] = PHY_R,	[M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
217     [PHY_ID1] = PHY_R,		[M88E1000_PHY_SPEC_CTRL] = PHY_RW,
218     [PHY_CTRL] = PHY_RW,	[PHY_1000T_CTRL] = PHY_RW,
219     [PHY_LP_ABILITY] = PHY_R,	[PHY_1000T_STATUS] = PHY_R,
220     [PHY_AUTONEG_ADV] = PHY_RW,	[M88E1000_RX_ERR_CNTR] = PHY_R,
221     [PHY_ID2] = PHY_R,		[M88E1000_PHY_SPEC_STATUS] = PHY_R
222 };
223 
224 static const uint16_t phy_reg_init[] = {
225     [PHY_CTRL] = 0x1140,
226     [PHY_STATUS] = 0x794d, /* link initially up with not completed autoneg */
227     [PHY_ID1] = 0x141,				[PHY_ID2] = PHY_ID2_INIT,
228     [PHY_1000T_CTRL] = 0x0e00,			[M88E1000_PHY_SPEC_CTRL] = 0x360,
229     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,	[PHY_AUTONEG_ADV] = 0xde1,
230     [PHY_LP_ABILITY] = 0x1e0,			[PHY_1000T_STATUS] = 0x3c00,
231     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
232 };
233 
234 static const uint32_t mac_reg_init[] = {
235     [PBA] =     0x00100030,
236     [LEDCTL] =  0x602,
237     [CTRL] =    E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
238                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
239     [STATUS] =  0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
240                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
241                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
242                 E1000_STATUS_LU,
243     [MANC] =    E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
244                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
245                 E1000_MANC_RMCP_EN,
246 };
247 
248 static void
249 set_interrupt_cause(E1000State *s, int index, uint32_t val)
250 {
251     PCIDevice *d = PCI_DEVICE(s);
252 
253     if (val && (E1000_DEVID >= E1000_DEV_ID_82547EI_MOBILE)) {
254         /* Only for 8257x */
255         val |= E1000_ICR_INT_ASSERTED;
256     }
257     s->mac_reg[ICR] = val;
258 
259     /*
260      * Make sure ICR and ICS registers have the same value.
261      * The spec says that the ICS register is write-only.  However in practice,
262      * on real hardware ICS is readable, and for reads it has the same value as
263      * ICR (except that ICS does not have the clear on read behaviour of ICR).
264      *
265      * The VxWorks PRO/1000 driver uses this behaviour.
266      */
267     s->mac_reg[ICS] = val;
268 
269     qemu_set_irq(d->irq[0], (s->mac_reg[IMS] & s->mac_reg[ICR]) != 0);
270 }
271 
272 static void
273 set_ics(E1000State *s, int index, uint32_t val)
274 {
275     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
276         s->mac_reg[IMS]);
277     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
278 }
279 
280 static int
281 rxbufsize(uint32_t v)
282 {
283     v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 |
284          E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 |
285          E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256;
286     switch (v) {
287     case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384:
288         return 16384;
289     case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192:
290         return 8192;
291     case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096:
292         return 4096;
293     case E1000_RCTL_SZ_1024:
294         return 1024;
295     case E1000_RCTL_SZ_512:
296         return 512;
297     case E1000_RCTL_SZ_256:
298         return 256;
299     }
300     return 2048;
301 }
302 
303 static void e1000_reset(void *opaque)
304 {
305     E1000State *d = opaque;
306     uint8_t *macaddr = d->conf.macaddr.a;
307     int i;
308 
309     qemu_del_timer(d->autoneg_timer);
310     memset(d->phy_reg, 0, sizeof d->phy_reg);
311     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
312     memset(d->mac_reg, 0, sizeof d->mac_reg);
313     memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
314     d->rxbuf_min_shift = 1;
315     memset(&d->tx, 0, sizeof d->tx);
316 
317     if (qemu_get_queue(d->nic)->link_down) {
318         e1000_link_down(d);
319     }
320 
321     /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */
322     d->mac_reg[RA] = 0;
323     d->mac_reg[RA + 1] = E1000_RAH_AV;
324     for (i = 0; i < 4; i++) {
325         d->mac_reg[RA] |= macaddr[i] << (8 * i);
326         d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0;
327     }
328 }
329 
330 static void
331 set_ctrl(E1000State *s, int index, uint32_t val)
332 {
333     /* RST is self clearing */
334     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
335 }
336 
337 static void
338 set_rx_control(E1000State *s, int index, uint32_t val)
339 {
340     s->mac_reg[RCTL] = val;
341     s->rxbuf_size = rxbufsize(val);
342     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
343     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
344            s->mac_reg[RCTL]);
345     qemu_flush_queued_packets(qemu_get_queue(s->nic));
346 }
347 
348 static void
349 set_mdic(E1000State *s, int index, uint32_t val)
350 {
351     uint32_t data = val & E1000_MDIC_DATA_MASK;
352     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
353 
354     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
355         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
356     else if (val & E1000_MDIC_OP_READ) {
357         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
358         if (!(phy_regcap[addr] & PHY_R)) {
359             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
360             val |= E1000_MDIC_ERROR;
361         } else
362             val = (val ^ data) | s->phy_reg[addr];
363     } else if (val & E1000_MDIC_OP_WRITE) {
364         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
365         if (!(phy_regcap[addr] & PHY_W)) {
366             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
367             val |= E1000_MDIC_ERROR;
368         } else {
369             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
370                 phyreg_writeops[addr](s, index, data);
371             }
372             s->phy_reg[addr] = data;
373         }
374     }
375     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
376 
377     if (val & E1000_MDIC_INT_EN) {
378         set_ics(s, 0, E1000_ICR_MDAC);
379     }
380 }
381 
382 static uint32_t
383 get_eecd(E1000State *s, int index)
384 {
385     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
386 
387     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
388            s->eecd_state.bitnum_out, s->eecd_state.reading);
389     if (!s->eecd_state.reading ||
390         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
391           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
392         ret |= E1000_EECD_DO;
393     return ret;
394 }
395 
396 static void
397 set_eecd(E1000State *s, int index, uint32_t val)
398 {
399     uint32_t oldval = s->eecd_state.old_eecd;
400 
401     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
402             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
403     if (!(E1000_EECD_CS & val))			// CS inactive; nothing to do
404 	return;
405     if (E1000_EECD_CS & (val ^ oldval)) {	// CS rise edge; reset state
406 	s->eecd_state.val_in = 0;
407 	s->eecd_state.bitnum_in = 0;
408 	s->eecd_state.bitnum_out = 0;
409 	s->eecd_state.reading = 0;
410     }
411     if (!(E1000_EECD_SK & (val ^ oldval)))	// no clock edge
412         return;
413     if (!(E1000_EECD_SK & val)) {		// falling edge
414         s->eecd_state.bitnum_out++;
415         return;
416     }
417     s->eecd_state.val_in <<= 1;
418     if (val & E1000_EECD_DI)
419         s->eecd_state.val_in |= 1;
420     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
421         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
422         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
423             EEPROM_READ_OPCODE_MICROWIRE);
424     }
425     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
426            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
427            s->eecd_state.reading);
428 }
429 
430 static uint32_t
431 flash_eerd_read(E1000State *s, int x)
432 {
433     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
434 
435     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
436         return (s->mac_reg[EERD]);
437 
438     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
439         return (E1000_EEPROM_RW_REG_DONE | r);
440 
441     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
442            E1000_EEPROM_RW_REG_DONE | r);
443 }
444 
445 static void
446 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
447 {
448     uint32_t sum;
449 
450     if (cse && cse < n)
451         n = cse + 1;
452     if (sloc < n-1) {
453         sum = net_checksum_add(n-css, data+css);
454         cpu_to_be16wu((uint16_t *)(data + sloc),
455                       net_checksum_finish(sum));
456     }
457 }
458 
459 static inline int
460 vlan_enabled(E1000State *s)
461 {
462     return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0);
463 }
464 
465 static inline int
466 vlan_rx_filter_enabled(E1000State *s)
467 {
468     return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0);
469 }
470 
471 static inline int
472 is_vlan_packet(E1000State *s, const uint8_t *buf)
473 {
474     return (be16_to_cpup((uint16_t *)(buf + 12)) ==
475                 le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
476 }
477 
478 static inline int
479 is_vlan_txd(uint32_t txd_lower)
480 {
481     return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
482 }
483 
484 /* FCS aka Ethernet CRC-32. We don't get it from backends and can't
485  * fill it in, just pad descriptor length by 4 bytes unless guest
486  * told us to strip it off the packet. */
487 static inline int
488 fcs_len(E1000State *s)
489 {
490     return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
491 }
492 
493 static void
494 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
495 {
496     NetClientState *nc = qemu_get_queue(s->nic);
497     if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
498         nc->info->receive(nc, buf, size);
499     } else {
500         qemu_send_packet(nc, buf, size);
501     }
502 }
503 
504 static void
505 xmit_seg(E1000State *s)
506 {
507     uint16_t len, *sp;
508     unsigned int frames = s->tx.tso_frames, css, sofar, n;
509     struct e1000_tx *tp = &s->tx;
510 
511     if (tp->tse && tp->cptse) {
512         css = tp->ipcss;
513         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
514                frames, tp->size, css);
515         if (tp->ip) {		// IPv4
516             cpu_to_be16wu((uint16_t *)(tp->data+css+2),
517                           tp->size - css);
518             cpu_to_be16wu((uint16_t *)(tp->data+css+4),
519                           be16_to_cpup((uint16_t *)(tp->data+css+4))+frames);
520         } else			// IPv6
521             cpu_to_be16wu((uint16_t *)(tp->data+css+4),
522                           tp->size - css);
523         css = tp->tucss;
524         len = tp->size - css;
525         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len);
526         if (tp->tcp) {
527             sofar = frames * tp->mss;
528             cpu_to_be32wu((uint32_t *)(tp->data+css+4),	// seq
529                 be32_to_cpupu((uint32_t *)(tp->data+css+4))+sofar);
530             if (tp->paylen - sofar > tp->mss)
531                 tp->data[css + 13] &= ~9;		// PSH, FIN
532         } else	// UDP
533             cpu_to_be16wu((uint16_t *)(tp->data+css+4), len);
534         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
535             unsigned int phsum;
536             // add pseudo-header length before checksum calculation
537             sp = (uint16_t *)(tp->data + tp->tucso);
538             phsum = be16_to_cpup(sp) + len;
539             phsum = (phsum >> 16) + (phsum & 0xffff);
540             cpu_to_be16wu(sp, phsum);
541         }
542         tp->tso_frames++;
543     }
544 
545     if (tp->sum_needed & E1000_TXD_POPTS_TXSM)
546         putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse);
547     if (tp->sum_needed & E1000_TXD_POPTS_IXSM)
548         putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse);
549     if (tp->vlan_needed) {
550         memmove(tp->vlan, tp->data, 4);
551         memmove(tp->data, tp->data + 4, 8);
552         memcpy(tp->data + 8, tp->vlan_header, 4);
553         e1000_send_packet(s, tp->vlan, tp->size + 4);
554     } else
555         e1000_send_packet(s, tp->data, tp->size);
556     s->mac_reg[TPT]++;
557     s->mac_reg[GPTC]++;
558     n = s->mac_reg[TOTL];
559     if ((s->mac_reg[TOTL] += s->tx.size) < n)
560         s->mac_reg[TOTH]++;
561 }
562 
563 static void
564 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
565 {
566     PCIDevice *d = PCI_DEVICE(s);
567     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
568     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
569     unsigned int split_size = txd_lower & 0xffff, bytes, sz, op;
570     unsigned int msh = 0xfffff;
571     uint64_t addr;
572     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
573     struct e1000_tx *tp = &s->tx;
574 
575     if (dtype == E1000_TXD_CMD_DEXT) {	// context descriptor
576         op = le32_to_cpu(xp->cmd_and_length);
577         tp->ipcss = xp->lower_setup.ip_fields.ipcss;
578         tp->ipcso = xp->lower_setup.ip_fields.ipcso;
579         tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse);
580         tp->tucss = xp->upper_setup.tcp_fields.tucss;
581         tp->tucso = xp->upper_setup.tcp_fields.tucso;
582         tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse);
583         tp->paylen = op & 0xfffff;
584         tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len;
585         tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss);
586         tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0;
587         tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
588         tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
589         tp->tso_frames = 0;
590         if (tp->tucso == 0) {	// this is probably wrong
591             DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
592             tp->tucso = tp->tucss + (tp->tcp ? 16 : 6);
593         }
594         return;
595     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
596         // data descriptor
597         if (tp->size == 0) {
598             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
599         }
600         tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0;
601     } else {
602         // legacy descriptor
603         tp->cptse = 0;
604     }
605 
606     if (vlan_enabled(s) && is_vlan_txd(txd_lower) &&
607         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
608         tp->vlan_needed = 1;
609         cpu_to_be16wu((uint16_t *)(tp->vlan_header),
610                       le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
611         cpu_to_be16wu((uint16_t *)(tp->vlan_header + 2),
612                       le16_to_cpu(dp->upper.fields.special));
613     }
614 
615     addr = le64_to_cpu(dp->buffer_addr);
616     if (tp->tse && tp->cptse) {
617         msh = tp->hdr_len + tp->mss;
618         do {
619             bytes = split_size;
620             if (tp->size + bytes > msh)
621                 bytes = msh - tp->size;
622 
623             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
624             pci_dma_read(d, addr, tp->data + tp->size, bytes);
625             sz = tp->size + bytes;
626             if (sz >= tp->hdr_len && tp->size < tp->hdr_len) {
627                 memmove(tp->header, tp->data, tp->hdr_len);
628             }
629             tp->size = sz;
630             addr += bytes;
631             if (sz == msh) {
632                 xmit_seg(s);
633                 memmove(tp->data, tp->header, tp->hdr_len);
634                 tp->size = tp->hdr_len;
635             }
636         } while (split_size -= bytes);
637     } else if (!tp->tse && tp->cptse) {
638         // context descriptor TSE is not set, while data descriptor TSE is set
639         DBGOUT(TXERR, "TCP segmentation error\n");
640     } else {
641         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
642         pci_dma_read(d, addr, tp->data + tp->size, split_size);
643         tp->size += split_size;
644     }
645 
646     if (!(txd_lower & E1000_TXD_CMD_EOP))
647         return;
648     if (!(tp->tse && tp->cptse && tp->size < tp->hdr_len)) {
649         xmit_seg(s);
650     }
651     tp->tso_frames = 0;
652     tp->sum_needed = 0;
653     tp->vlan_needed = 0;
654     tp->size = 0;
655     tp->cptse = 0;
656 }
657 
658 static uint32_t
659 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
660 {
661     PCIDevice *d = PCI_DEVICE(s);
662     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
663 
664     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
665         return 0;
666     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
667                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
668     dp->upper.data = cpu_to_le32(txd_upper);
669     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
670                   &dp->upper, sizeof(dp->upper));
671     return E1000_ICR_TXDW;
672 }
673 
674 static uint64_t tx_desc_base(E1000State *s)
675 {
676     uint64_t bah = s->mac_reg[TDBAH];
677     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
678 
679     return (bah << 32) + bal;
680 }
681 
682 static void
683 start_xmit(E1000State *s)
684 {
685     PCIDevice *d = PCI_DEVICE(s);
686     dma_addr_t base;
687     struct e1000_tx_desc desc;
688     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
689 
690     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
691         DBGOUT(TX, "tx disabled\n");
692         return;
693     }
694 
695     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
696         base = tx_desc_base(s) +
697                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
698         pci_dma_read(d, base, &desc, sizeof(desc));
699 
700         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
701                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
702                desc.upper.data);
703 
704         process_tx_desc(s, &desc);
705         cause |= txdesc_writeback(s, base, &desc);
706 
707         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
708             s->mac_reg[TDH] = 0;
709         /*
710          * the following could happen only if guest sw assigns
711          * bogus values to TDT/TDLEN.
712          * there's nothing too intelligent we could do about this.
713          */
714         if (s->mac_reg[TDH] == tdh_start) {
715             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
716                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
717             break;
718         }
719     }
720     set_ics(s, 0, cause);
721 }
722 
723 static int
724 receive_filter(E1000State *s, const uint8_t *buf, int size)
725 {
726     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
727     static const int mta_shift[] = {4, 3, 2, 0};
728     uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp;
729 
730     if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) {
731         uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14));
732         uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) +
733                                      ((vid >> 5) & 0x7f));
734         if ((vfta & (1 << (vid & 0x1f))) == 0)
735             return 0;
736     }
737 
738     if (rctl & E1000_RCTL_UPE)			// promiscuous
739         return 1;
740 
741     if ((buf[0] & 1) && (rctl & E1000_RCTL_MPE))	// promiscuous mcast
742         return 1;
743 
744     if ((rctl & E1000_RCTL_BAM) && !memcmp(buf, bcast, sizeof bcast))
745         return 1;
746 
747     for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) {
748         if (!(rp[1] & E1000_RAH_AV))
749             continue;
750         ra[0] = cpu_to_le32(rp[0]);
751         ra[1] = cpu_to_le32(rp[1]);
752         if (!memcmp(buf, (uint8_t *)ra, 6)) {
753             DBGOUT(RXFILTER,
754                    "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n",
755                    (int)(rp - s->mac_reg - RA)/2,
756                    buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
757             return 1;
758         }
759     }
760     DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n",
761            buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
762 
763     f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
764     f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
765     if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f)))
766         return 1;
767     DBGOUT(RXFILTER,
768            "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n",
769            buf[0], buf[1], buf[2], buf[3], buf[4], buf[5],
770            (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5,
771            s->mac_reg[MTA + (f >> 5)]);
772 
773     return 0;
774 }
775 
776 static void
777 e1000_set_link_status(NetClientState *nc)
778 {
779     E1000State *s = qemu_get_nic_opaque(nc);
780     uint32_t old_status = s->mac_reg[STATUS];
781 
782     if (nc->link_down) {
783         e1000_link_down(s);
784     } else {
785         e1000_link_up(s);
786     }
787 
788     if (s->mac_reg[STATUS] != old_status)
789         set_ics(s, 0, E1000_ICR_LSC);
790 }
791 
792 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
793 {
794     int bufs;
795     /* Fast-path short packets */
796     if (total_size <= s->rxbuf_size) {
797         return s->mac_reg[RDH] != s->mac_reg[RDT];
798     }
799     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
800         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
801     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
802         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
803             s->mac_reg[RDT] - s->mac_reg[RDH];
804     } else {
805         return false;
806     }
807     return total_size <= bufs * s->rxbuf_size;
808 }
809 
810 static int
811 e1000_can_receive(NetClientState *nc)
812 {
813     E1000State *s = qemu_get_nic_opaque(nc);
814 
815     return (s->mac_reg[STATUS] & E1000_STATUS_LU) &&
816         (s->mac_reg[RCTL] & E1000_RCTL_EN) && e1000_has_rxbufs(s, 1);
817 }
818 
819 static uint64_t rx_desc_base(E1000State *s)
820 {
821     uint64_t bah = s->mac_reg[RDBAH];
822     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
823 
824     return (bah << 32) + bal;
825 }
826 
827 static ssize_t
828 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
829 {
830     E1000State *s = qemu_get_nic_opaque(nc);
831     PCIDevice *d = PCI_DEVICE(s);
832     struct e1000_rx_desc desc;
833     dma_addr_t base;
834     unsigned int n, rdt;
835     uint32_t rdh_start;
836     uint16_t vlan_special = 0;
837     uint8_t vlan_status = 0, vlan_offset = 0;
838     uint8_t min_buf[MIN_BUF_SIZE];
839     size_t desc_offset;
840     size_t desc_size;
841     size_t total_size;
842 
843     if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) {
844         return -1;
845     }
846 
847     if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) {
848         return -1;
849     }
850 
851     /* Pad to minimum Ethernet frame length */
852     if (size < sizeof(min_buf)) {
853         memcpy(min_buf, buf, size);
854         memset(&min_buf[size], 0, sizeof(min_buf) - size);
855         buf = min_buf;
856         size = sizeof(min_buf);
857     }
858 
859     /* Discard oversized packets if !LPE and !SBP. */
860     if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
861         (size > MAXIMUM_ETHERNET_VLAN_SIZE
862         && !(s->mac_reg[RCTL] & E1000_RCTL_LPE)))
863         && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) {
864         return size;
865     }
866 
867     if (!receive_filter(s, buf, size))
868         return size;
869 
870     if (vlan_enabled(s) && is_vlan_packet(s, buf)) {
871         vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(buf + 14)));
872         memmove((uint8_t *)buf + 4, buf, 12);
873         vlan_status = E1000_RXD_STAT_VP;
874         vlan_offset = 4;
875         size -= 4;
876     }
877 
878     rdh_start = s->mac_reg[RDH];
879     desc_offset = 0;
880     total_size = size + fcs_len(s);
881     if (!e1000_has_rxbufs(s, total_size)) {
882             set_ics(s, 0, E1000_ICS_RXO);
883             return -1;
884     }
885     do {
886         desc_size = total_size - desc_offset;
887         if (desc_size > s->rxbuf_size) {
888             desc_size = s->rxbuf_size;
889         }
890         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
891         pci_dma_read(d, base, &desc, sizeof(desc));
892         desc.special = vlan_special;
893         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
894         if (desc.buffer_addr) {
895             if (desc_offset < size) {
896                 size_t copy_size = size - desc_offset;
897                 if (copy_size > s->rxbuf_size) {
898                     copy_size = s->rxbuf_size;
899                 }
900                 pci_dma_write(d, le64_to_cpu(desc.buffer_addr),
901                               buf + desc_offset + vlan_offset, copy_size);
902             }
903             desc_offset += desc_size;
904             desc.length = cpu_to_le16(desc_size);
905             if (desc_offset >= total_size) {
906                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
907             } else {
908                 /* Guest zeroing out status is not a hardware requirement.
909                    Clear EOP in case guest didn't do it. */
910                 desc.status &= ~E1000_RXD_STAT_EOP;
911             }
912         } else { // as per intel docs; skip descriptors with null buf addr
913             DBGOUT(RX, "Null RX descriptor!!\n");
914         }
915         pci_dma_write(d, base, &desc, sizeof(desc));
916 
917         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
918             s->mac_reg[RDH] = 0;
919         /* see comment in start_xmit; same here */
920         if (s->mac_reg[RDH] == rdh_start) {
921             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
922                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
923             set_ics(s, 0, E1000_ICS_RXO);
924             return -1;
925         }
926     } while (desc_offset < total_size);
927 
928     s->mac_reg[GPRC]++;
929     s->mac_reg[TPR]++;
930     /* TOR - Total Octets Received:
931      * This register includes bytes received in a packet from the <Destination
932      * Address> field through the <CRC> field, inclusively.
933      */
934     n = s->mac_reg[TORL] + size + /* Always include FCS length. */ 4;
935     if (n < s->mac_reg[TORL])
936         s->mac_reg[TORH]++;
937     s->mac_reg[TORL] = n;
938 
939     n = E1000_ICS_RXT0;
940     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
941         rdt += s->mac_reg[RDLEN] / sizeof(desc);
942     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
943         s->rxbuf_min_shift)
944         n |= E1000_ICS_RXDMT0;
945 
946     set_ics(s, 0, n);
947 
948     return size;
949 }
950 
951 static uint32_t
952 mac_readreg(E1000State *s, int index)
953 {
954     return s->mac_reg[index];
955 }
956 
957 static uint32_t
958 mac_icr_read(E1000State *s, int index)
959 {
960     uint32_t ret = s->mac_reg[ICR];
961 
962     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
963     set_interrupt_cause(s, 0, 0);
964     return ret;
965 }
966 
967 static uint32_t
968 mac_read_clr4(E1000State *s, int index)
969 {
970     uint32_t ret = s->mac_reg[index];
971 
972     s->mac_reg[index] = 0;
973     return ret;
974 }
975 
976 static uint32_t
977 mac_read_clr8(E1000State *s, int index)
978 {
979     uint32_t ret = s->mac_reg[index];
980 
981     s->mac_reg[index] = 0;
982     s->mac_reg[index-1] = 0;
983     return ret;
984 }
985 
986 static void
987 mac_writereg(E1000State *s, int index, uint32_t val)
988 {
989     s->mac_reg[index] = val;
990 }
991 
992 static void
993 set_rdt(E1000State *s, int index, uint32_t val)
994 {
995     s->mac_reg[index] = val & 0xffff;
996     if (e1000_has_rxbufs(s, 1)) {
997         qemu_flush_queued_packets(qemu_get_queue(s->nic));
998     }
999 }
1000 
1001 static void
1002 set_16bit(E1000State *s, int index, uint32_t val)
1003 {
1004     s->mac_reg[index] = val & 0xffff;
1005 }
1006 
1007 static void
1008 set_dlen(E1000State *s, int index, uint32_t val)
1009 {
1010     s->mac_reg[index] = val & 0xfff80;
1011 }
1012 
1013 static void
1014 set_tctl(E1000State *s, int index, uint32_t val)
1015 {
1016     s->mac_reg[index] = val;
1017     s->mac_reg[TDT] &= 0xffff;
1018     start_xmit(s);
1019 }
1020 
1021 static void
1022 set_icr(E1000State *s, int index, uint32_t val)
1023 {
1024     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1025     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1026 }
1027 
1028 static void
1029 set_imc(E1000State *s, int index, uint32_t val)
1030 {
1031     s->mac_reg[IMS] &= ~val;
1032     set_ics(s, 0, 0);
1033 }
1034 
1035 static void
1036 set_ims(E1000State *s, int index, uint32_t val)
1037 {
1038     s->mac_reg[IMS] |= val;
1039     set_ics(s, 0, 0);
1040 }
1041 
1042 #define getreg(x)	[x] = mac_readreg
1043 static uint32_t (*macreg_readops[])(E1000State *, int) = {
1044     getreg(PBA),	getreg(RCTL),	getreg(TDH),	getreg(TXDCTL),
1045     getreg(WUFC),	getreg(TDT),	getreg(CTRL),	getreg(LEDCTL),
1046     getreg(MANC),	getreg(MDIC),	getreg(SWSM),	getreg(STATUS),
1047     getreg(TORL),	getreg(TOTL),	getreg(IMS),	getreg(TCTL),
1048     getreg(RDH),	getreg(RDT),	getreg(VET),	getreg(ICS),
1049     getreg(TDBAL),	getreg(TDBAH),	getreg(RDBAH),	getreg(RDBAL),
1050     getreg(TDLEN),	getreg(RDLEN),
1051 
1052     [TOTH] = mac_read_clr8,	[TORH] = mac_read_clr8,	[GPRC] = mac_read_clr4,
1053     [GPTC] = mac_read_clr4,	[TPR] = mac_read_clr4,	[TPT] = mac_read_clr4,
1054     [ICR] = mac_icr_read,	[EECD] = get_eecd,	[EERD] = flash_eerd_read,
1055     [CRCERRS ... MPC] = &mac_readreg,
1056     [RA ... RA+31] = &mac_readreg,
1057     [MTA ... MTA+127] = &mac_readreg,
1058     [VFTA ... VFTA+127] = &mac_readreg,
1059 };
1060 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1061 
1062 #define putreg(x)	[x] = mac_writereg
1063 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1064     putreg(PBA),	putreg(EERD),	putreg(SWSM),	putreg(WUFC),
1065     putreg(TDBAL),	putreg(TDBAH),	putreg(TXDCTL),	putreg(RDBAH),
1066     putreg(RDBAL),	putreg(LEDCTL), putreg(VET),
1067     [TDLEN] = set_dlen,	[RDLEN] = set_dlen,	[TCTL] = set_tctl,
1068     [TDT] = set_tctl,	[MDIC] = set_mdic,	[ICS] = set_ics,
1069     [TDH] = set_16bit,	[RDH] = set_16bit,	[RDT] = set_rdt,
1070     [IMC] = set_imc,	[IMS] = set_ims,	[ICR] = set_icr,
1071     [EECD] = set_eecd,	[RCTL] = set_rx_control, [CTRL] = set_ctrl,
1072     [RA ... RA+31] = &mac_writereg,
1073     [MTA ... MTA+127] = &mac_writereg,
1074     [VFTA ... VFTA+127] = &mac_writereg,
1075 };
1076 
1077 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1078 
1079 static void
1080 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1081                  unsigned size)
1082 {
1083     E1000State *s = opaque;
1084     unsigned int index = (addr & 0x1ffff) >> 2;
1085 
1086     if (index < NWRITEOPS && macreg_writeops[index]) {
1087         macreg_writeops[index](s, index, val);
1088     } else if (index < NREADOPS && macreg_readops[index]) {
1089         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n", index<<2, val);
1090     } else {
1091         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1092                index<<2, val);
1093     }
1094 }
1095 
1096 static uint64_t
1097 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1098 {
1099     E1000State *s = opaque;
1100     unsigned int index = (addr & 0x1ffff) >> 2;
1101 
1102     if (index < NREADOPS && macreg_readops[index])
1103     {
1104         return macreg_readops[index](s, index);
1105     }
1106     DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1107     return 0;
1108 }
1109 
1110 static const MemoryRegionOps e1000_mmio_ops = {
1111     .read = e1000_mmio_read,
1112     .write = e1000_mmio_write,
1113     .endianness = DEVICE_LITTLE_ENDIAN,
1114     .impl = {
1115         .min_access_size = 4,
1116         .max_access_size = 4,
1117     },
1118 };
1119 
1120 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1121                               unsigned size)
1122 {
1123     E1000State *s = opaque;
1124 
1125     (void)s;
1126     return 0;
1127 }
1128 
1129 static void e1000_io_write(void *opaque, hwaddr addr,
1130                            uint64_t val, unsigned size)
1131 {
1132     E1000State *s = opaque;
1133 
1134     (void)s;
1135 }
1136 
1137 static const MemoryRegionOps e1000_io_ops = {
1138     .read = e1000_io_read,
1139     .write = e1000_io_write,
1140     .endianness = DEVICE_LITTLE_ENDIAN,
1141 };
1142 
1143 static bool is_version_1(void *opaque, int version_id)
1144 {
1145     return version_id == 1;
1146 }
1147 
1148 static void e1000_pre_save(void *opaque)
1149 {
1150     E1000State *s = opaque;
1151     NetClientState *nc = qemu_get_queue(s->nic);
1152 
1153     if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
1154         return;
1155     }
1156 
1157     /*
1158      * If link is down and auto-negotiation is ongoing, complete
1159      * auto-negotiation immediately.  This allows is to look at
1160      * MII_SR_AUTONEG_COMPLETE to infer link status on load.
1161      */
1162     if (nc->link_down &&
1163         s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN &&
1164         s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG) {
1165          s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1166     }
1167 }
1168 
1169 static int e1000_post_load(void *opaque, int version_id)
1170 {
1171     E1000State *s = opaque;
1172     NetClientState *nc = qemu_get_queue(s->nic);
1173 
1174     /* nc.link_down can't be migrated, so infer link_down according
1175      * to link status bit in mac_reg[STATUS].
1176      * Alternatively, restart link negotiation if it was in progress. */
1177     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1178 
1179     if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
1180         return 0;
1181     }
1182 
1183     if (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN &&
1184         s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG &&
1185         !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1186         nc->link_down = false;
1187         qemu_mod_timer(s->autoneg_timer, qemu_get_clock_ms(vm_clock) + 500);
1188     }
1189 
1190     return 0;
1191 }
1192 
1193 static const VMStateDescription vmstate_e1000 = {
1194     .name = "e1000",
1195     .version_id = 2,
1196     .minimum_version_id = 1,
1197     .minimum_version_id_old = 1,
1198     .pre_save = e1000_pre_save,
1199     .post_load = e1000_post_load,
1200     .fields      = (VMStateField []) {
1201         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1202         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1203         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1204         VMSTATE_UINT32(rxbuf_size, E1000State),
1205         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1206         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1207         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1208         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1209         VMSTATE_UINT16(eecd_state.reading, E1000State),
1210         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1211         VMSTATE_UINT8(tx.ipcss, E1000State),
1212         VMSTATE_UINT8(tx.ipcso, E1000State),
1213         VMSTATE_UINT16(tx.ipcse, E1000State),
1214         VMSTATE_UINT8(tx.tucss, E1000State),
1215         VMSTATE_UINT8(tx.tucso, E1000State),
1216         VMSTATE_UINT16(tx.tucse, E1000State),
1217         VMSTATE_UINT32(tx.paylen, E1000State),
1218         VMSTATE_UINT8(tx.hdr_len, E1000State),
1219         VMSTATE_UINT16(tx.mss, E1000State),
1220         VMSTATE_UINT16(tx.size, E1000State),
1221         VMSTATE_UINT16(tx.tso_frames, E1000State),
1222         VMSTATE_UINT8(tx.sum_needed, E1000State),
1223         VMSTATE_INT8(tx.ip, E1000State),
1224         VMSTATE_INT8(tx.tcp, E1000State),
1225         VMSTATE_BUFFER(tx.header, E1000State),
1226         VMSTATE_BUFFER(tx.data, E1000State),
1227         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1228         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1229         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1230         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1231         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1232         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1233         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1234         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1235         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1236         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1237         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1238         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1239         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1240         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1241         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1242         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1243         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1244         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1245         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1246         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1247         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1248         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1249         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1250         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1251         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1252         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1253         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1254         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1255         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1256         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1257         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1258         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1259         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1260         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1261         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1262         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1263         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1264         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1265         VMSTATE_UINT32(mac_reg[VET], E1000State),
1266         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1267         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1268         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1269         VMSTATE_END_OF_LIST()
1270     }
1271 };
1272 
1273 static const uint16_t e1000_eeprom_template[64] = {
1274     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1275     0x3000, 0x1000, 0x6403, E1000_DEVID, 0x8086, E1000_DEVID, 0x8086, 0x3040,
1276     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1277     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1278     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1279     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1280     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1281     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1282 };
1283 
1284 /* PCI interface */
1285 
1286 static void
1287 e1000_mmio_setup(E1000State *d)
1288 {
1289     int i;
1290     const uint32_t excluded_regs[] = {
1291         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1292         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1293     };
1294 
1295     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1296                           "e1000-mmio", PNPMMIO_SIZE);
1297     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1298     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1299         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1300                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1301     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1302 }
1303 
1304 static void
1305 e1000_cleanup(NetClientState *nc)
1306 {
1307     E1000State *s = qemu_get_nic_opaque(nc);
1308 
1309     s->nic = NULL;
1310 }
1311 
1312 static void
1313 pci_e1000_uninit(PCIDevice *dev)
1314 {
1315     E1000State *d = E1000(dev);
1316 
1317     qemu_del_timer(d->autoneg_timer);
1318     qemu_free_timer(d->autoneg_timer);
1319     memory_region_destroy(&d->mmio);
1320     memory_region_destroy(&d->io);
1321     qemu_del_nic(d->nic);
1322 }
1323 
1324 static NetClientInfo net_e1000_info = {
1325     .type = NET_CLIENT_OPTIONS_KIND_NIC,
1326     .size = sizeof(NICState),
1327     .can_receive = e1000_can_receive,
1328     .receive = e1000_receive,
1329     .cleanup = e1000_cleanup,
1330     .link_status_changed = e1000_set_link_status,
1331 };
1332 
1333 static int pci_e1000_init(PCIDevice *pci_dev)
1334 {
1335     DeviceState *dev = DEVICE(pci_dev);
1336     E1000State *d = E1000(pci_dev);
1337     uint8_t *pci_conf;
1338     uint16_t checksum = 0;
1339     int i;
1340     uint8_t *macaddr;
1341 
1342     pci_conf = pci_dev->config;
1343 
1344     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1345     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1346 
1347     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1348 
1349     e1000_mmio_setup(d);
1350 
1351     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1352 
1353     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1354 
1355     memmove(d->eeprom_data, e1000_eeprom_template,
1356         sizeof e1000_eeprom_template);
1357     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1358     macaddr = d->conf.macaddr.a;
1359     for (i = 0; i < 3; i++)
1360         d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i];
1361     for (i = 0; i < EEPROM_CHECKSUM_REG; i++)
1362         checksum += d->eeprom_data[i];
1363     checksum = (uint16_t) EEPROM_SUM - checksum;
1364     d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum;
1365 
1366     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1367                           object_get_typename(OBJECT(d)), dev->id, d);
1368 
1369     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1370 
1371     add_boot_device_path(d->conf.bootindex, dev, "/ethernet-phy@0");
1372 
1373     d->autoneg_timer = qemu_new_timer_ms(vm_clock, e1000_autoneg_timer, d);
1374 
1375     return 0;
1376 }
1377 
1378 static void qdev_e1000_reset(DeviceState *dev)
1379 {
1380     E1000State *d = E1000(dev);
1381     e1000_reset(d);
1382 }
1383 
1384 static Property e1000_properties[] = {
1385     DEFINE_NIC_PROPERTIES(E1000State, conf),
1386     DEFINE_PROP_BIT("autonegotiation", E1000State,
1387                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1388     DEFINE_PROP_END_OF_LIST(),
1389 };
1390 
1391 static void e1000_class_init(ObjectClass *klass, void *data)
1392 {
1393     DeviceClass *dc = DEVICE_CLASS(klass);
1394     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1395 
1396     k->init = pci_e1000_init;
1397     k->exit = pci_e1000_uninit;
1398     k->romfile = "efi-e1000.rom";
1399     k->vendor_id = PCI_VENDOR_ID_INTEL;
1400     k->device_id = E1000_DEVID;
1401     k->revision = 0x03;
1402     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1403     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1404     dc->desc = "Intel Gigabit Ethernet";
1405     dc->reset = qdev_e1000_reset;
1406     dc->vmsd = &vmstate_e1000;
1407     dc->props = e1000_properties;
1408 }
1409 
1410 static const TypeInfo e1000_info = {
1411     .name          = TYPE_E1000,
1412     .parent        = TYPE_PCI_DEVICE,
1413     .instance_size = sizeof(E1000State),
1414     .class_init    = e1000_class_init,
1415 };
1416 
1417 static void e1000_register_types(void)
1418 {
1419     type_register_static(&e1000_info);
1420 }
1421 
1422 type_init(e1000_register_types)
1423