xref: /openbmc/qemu/hw/net/e1000.c (revision 9c4218e9)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "qemu/osdep.h"
29 #include "hw/hw.h"
30 #include "hw/pci/pci.h"
31 #include "net/net.h"
32 #include "net/checksum.h"
33 #include "hw/loader.h"
34 #include "sysemu/sysemu.h"
35 #include "sysemu/dma.h"
36 #include "qemu/iov.h"
37 #include "qemu/range.h"
38 
39 #include "e1000_regs.h"
40 
41 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
42 
43 #define E1000_DEBUG
44 
45 #ifdef E1000_DEBUG
46 enum {
47     DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
48     DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
49     DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
50     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
51 };
52 #define DBGBIT(x)    (1<<DEBUG_##x)
53 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
54 
55 #define DBGOUT(what, fmt, ...) do { \
56     if (debugflags & DBGBIT(what)) \
57         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
58     } while (0)
59 #else
60 #define DBGOUT(what, fmt, ...) do {} while (0)
61 #endif
62 
63 #define IOPORT_SIZE       0x40
64 #define PNPMMIO_SIZE      0x20000
65 #define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
66 
67 /* this is the size past which hardware will drop packets when setting LPE=0 */
68 #define MAXIMUM_ETHERNET_VLAN_SIZE 1522
69 /* this is the size past which hardware will drop packets when setting LPE=1 */
70 #define MAXIMUM_ETHERNET_LPE_SIZE 16384
71 
72 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
73 
74 /*
75  * HW models:
76  *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
77  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
78  *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
79  *  Others never tested
80  */
81 
82 typedef struct E1000State_st {
83     /*< private >*/
84     PCIDevice parent_obj;
85     /*< public >*/
86 
87     NICState *nic;
88     NICConf conf;
89     MemoryRegion mmio;
90     MemoryRegion io;
91 
92     uint32_t mac_reg[0x8000];
93     uint16_t phy_reg[0x20];
94     uint16_t eeprom_data[64];
95 
96     uint32_t rxbuf_size;
97     uint32_t rxbuf_min_shift;
98     struct e1000_tx {
99         unsigned char header[256];
100         unsigned char vlan_header[4];
101         /* Fields vlan and data must not be reordered or separated. */
102         unsigned char vlan[4];
103         unsigned char data[0x10000];
104         uint16_t size;
105         unsigned char sum_needed;
106         unsigned char vlan_needed;
107         uint8_t ipcss;
108         uint8_t ipcso;
109         uint16_t ipcse;
110         uint8_t tucss;
111         uint8_t tucso;
112         uint16_t tucse;
113         uint8_t hdr_len;
114         uint16_t mss;
115         uint32_t paylen;
116         uint16_t tso_frames;
117         char tse;
118         int8_t ip;
119         int8_t tcp;
120         char cptse;     // current packet tse bit
121     } tx;
122 
123     struct {
124         uint32_t val_in;    /* shifted in from guest driver */
125         uint16_t bitnum_in;
126         uint16_t bitnum_out;
127         uint16_t reading;
128         uint32_t old_eecd;
129     } eecd_state;
130 
131     QEMUTimer *autoneg_timer;
132 
133     QEMUTimer *mit_timer;      /* Mitigation timer. */
134     bool mit_timer_on;         /* Mitigation timer is running. */
135     bool mit_irq_level;        /* Tracks interrupt pin level. */
136     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
137 
138 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
139 #define E1000_FLAG_AUTONEG_BIT 0
140 #define E1000_FLAG_MIT_BIT 1
141 #define E1000_FLAG_MAC_BIT 2
142 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
143 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
144 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
145     uint32_t compat_flags;
146 } E1000State;
147 
148 #define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
149 
150 typedef struct E1000BaseClass {
151     PCIDeviceClass parent_class;
152     uint16_t phy_id2;
153 } E1000BaseClass;
154 
155 #define TYPE_E1000_BASE "e1000-base"
156 
157 #define E1000(obj) \
158     OBJECT_CHECK(E1000State, (obj), TYPE_E1000_BASE)
159 
160 #define E1000_DEVICE_CLASS(klass) \
161      OBJECT_CLASS_CHECK(E1000BaseClass, (klass), TYPE_E1000_BASE)
162 #define E1000_DEVICE_GET_CLASS(obj) \
163     OBJECT_GET_CLASS(E1000BaseClass, (obj), TYPE_E1000_BASE)
164 
165 #define defreg(x)    x = (E1000_##x>>2)
166 enum {
167     defreg(CTRL),    defreg(EECD),    defreg(EERD),    defreg(GPRC),
168     defreg(GPTC),    defreg(ICR),     defreg(ICS),     defreg(IMC),
169     defreg(IMS),     defreg(LEDCTL),  defreg(MANC),    defreg(MDIC),
170     defreg(MPC),     defreg(PBA),     defreg(RCTL),    defreg(RDBAH),
171     defreg(RDBAL),   defreg(RDH),     defreg(RDLEN),   defreg(RDT),
172     defreg(STATUS),  defreg(SWSM),    defreg(TCTL),    defreg(TDBAH),
173     defreg(TDBAL),   defreg(TDH),     defreg(TDLEN),   defreg(TDT),
174     defreg(TORH),    defreg(TORL),    defreg(TOTH),    defreg(TOTL),
175     defreg(TPR),     defreg(TPT),     defreg(TXDCTL),  defreg(WUFC),
176     defreg(RA),      defreg(MTA),     defreg(CRCERRS), defreg(VFTA),
177     defreg(VET),     defreg(RDTR),    defreg(RADV),    defreg(TADV),
178     defreg(ITR),     defreg(FCRUC),   defreg(TDFH),    defreg(TDFT),
179     defreg(TDFHS),   defreg(TDFTS),   defreg(TDFPC),   defreg(RDFH),
180     defreg(RDFT),    defreg(RDFHS),   defreg(RDFTS),   defreg(RDFPC),
181     defreg(IPAV),    defreg(WUC),     defreg(WUS),     defreg(AIT),
182     defreg(IP6AT),   defreg(IP4AT),   defreg(FFLT),    defreg(FFMT),
183     defreg(FFVT),    defreg(WUPM),    defreg(PBM),     defreg(SCC),
184     defreg(ECOL),    defreg(MCC),     defreg(LATECOL), defreg(COLC),
185     defreg(DC),      defreg(TNCRS),   defreg(SEC),     defreg(CEXTERR),
186     defreg(RLEC),    defreg(XONRXC),  defreg(XONTXC),  defreg(XOFFRXC),
187     defreg(XOFFTXC), defreg(RFC),     defreg(RJC),     defreg(RNBC),
188     defreg(TSCTFC),  defreg(MGTPRC),  defreg(MGTPDC),  defreg(MGTPTC),
189     defreg(RUC),     defreg(ROC),     defreg(GORCL),   defreg(GORCH),
190     defreg(GOTCL),   defreg(GOTCH),   defreg(BPRC),    defreg(MPRC),
191     defreg(TSCTC),   defreg(PRC64),   defreg(PRC127),  defreg(PRC255),
192     defreg(PRC511),  defreg(PRC1023), defreg(PRC1522), defreg(PTC64),
193     defreg(PTC127),  defreg(PTC255),  defreg(PTC511),  defreg(PTC1023),
194     defreg(PTC1522), defreg(MPTC),    defreg(BPTC)
195 };
196 
197 static void
198 e1000_link_down(E1000State *s)
199 {
200     s->mac_reg[STATUS] &= ~E1000_STATUS_LU;
201     s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS;
202     s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE;
203     s->phy_reg[PHY_LP_ABILITY] &= ~MII_LPAR_LPACK;
204 }
205 
206 static void
207 e1000_link_up(E1000State *s)
208 {
209     s->mac_reg[STATUS] |= E1000_STATUS_LU;
210     s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
211 
212     /* E1000_STATUS_LU is tested by e1000_can_receive() */
213     qemu_flush_queued_packets(qemu_get_queue(s->nic));
214 }
215 
216 static bool
217 have_autoneg(E1000State *s)
218 {
219     return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
220 }
221 
222 static void
223 set_phy_ctrl(E1000State *s, int index, uint16_t val)
224 {
225     /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
226     s->phy_reg[PHY_CTRL] = val & ~(0x3f |
227                                    MII_CR_RESET |
228                                    MII_CR_RESTART_AUTO_NEG);
229 
230     /*
231      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
232      * migrate during auto negotiation, after migration the link will be
233      * down.
234      */
235     if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
236         e1000_link_down(s);
237         DBGOUT(PHY, "Start link auto negotiation\n");
238         timer_mod(s->autoneg_timer,
239                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
240     }
241 }
242 
243 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
244     [PHY_CTRL] = set_phy_ctrl,
245 };
246 
247 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
248 
249 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
250 static const char phy_regcap[0x20] = {
251     [PHY_STATUS]      = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
252     [PHY_ID1]         = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
253     [PHY_CTRL]        = PHY_RW,    [PHY_1000T_CTRL]             = PHY_RW,
254     [PHY_LP_ABILITY]  = PHY_R,     [PHY_1000T_STATUS]           = PHY_R,
255     [PHY_AUTONEG_ADV] = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
256     [PHY_ID2]         = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
257     [PHY_AUTONEG_EXP] = PHY_R,
258 };
259 
260 /* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
261 static const uint16_t phy_reg_init[] = {
262     [PHY_CTRL]   = MII_CR_SPEED_SELECT_MSB |
263                    MII_CR_FULL_DUPLEX |
264                    MII_CR_AUTO_NEG_EN,
265 
266     [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
267                    MII_SR_LINK_STATUS |   /* link initially up */
268                    MII_SR_AUTONEG_CAPS |
269                    /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
270                    MII_SR_PREAMBLE_SUPPRESS |
271                    MII_SR_EXTENDED_STATUS |
272                    MII_SR_10T_HD_CAPS |
273                    MII_SR_10T_FD_CAPS |
274                    MII_SR_100X_HD_CAPS |
275                    MII_SR_100X_FD_CAPS,
276 
277     [PHY_ID1] = 0x141,
278     /* [PHY_ID2] configured per DevId, from e1000_reset() */
279     [PHY_AUTONEG_ADV] = 0xde1,
280     [PHY_LP_ABILITY] = 0x1e0,
281     [PHY_1000T_CTRL] = 0x0e00,
282     [PHY_1000T_STATUS] = 0x3c00,
283     [M88E1000_PHY_SPEC_CTRL] = 0x360,
284     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
285     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
286 };
287 
288 static const uint32_t mac_reg_init[] = {
289     [PBA]     = 0x00100030,
290     [LEDCTL]  = 0x602,
291     [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
292                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
293     [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
294                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
295                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
296                 E1000_STATUS_LU,
297     [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
298                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
299                 E1000_MANC_RMCP_EN,
300 };
301 
302 /* Helper function, *curr == 0 means the value is not set */
303 static inline void
304 mit_update_delay(uint32_t *curr, uint32_t value)
305 {
306     if (value && (*curr == 0 || value < *curr)) {
307         *curr = value;
308     }
309 }
310 
311 static void
312 set_interrupt_cause(E1000State *s, int index, uint32_t val)
313 {
314     PCIDevice *d = PCI_DEVICE(s);
315     uint32_t pending_ints;
316     uint32_t mit_delay;
317 
318     s->mac_reg[ICR] = val;
319 
320     /*
321      * Make sure ICR and ICS registers have the same value.
322      * The spec says that the ICS register is write-only.  However in practice,
323      * on real hardware ICS is readable, and for reads it has the same value as
324      * ICR (except that ICS does not have the clear on read behaviour of ICR).
325      *
326      * The VxWorks PRO/1000 driver uses this behaviour.
327      */
328     s->mac_reg[ICS] = val;
329 
330     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
331     if (!s->mit_irq_level && pending_ints) {
332         /*
333          * Here we detect a potential raising edge. We postpone raising the
334          * interrupt line if we are inside the mitigation delay window
335          * (s->mit_timer_on == 1).
336          * We provide a partial implementation of interrupt mitigation,
337          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
338          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
339          * RADV; relative timers based on TIDV and RDTR are not implemented.
340          */
341         if (s->mit_timer_on) {
342             return;
343         }
344         if (chkflag(MIT)) {
345             /* Compute the next mitigation delay according to pending
346              * interrupts and the current values of RADV (provided
347              * RDTR!=0), TADV and ITR.
348              * Then rearm the timer.
349              */
350             mit_delay = 0;
351             if (s->mit_ide &&
352                     (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
353                 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
354             }
355             if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
356                 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
357             }
358             mit_update_delay(&mit_delay, s->mac_reg[ITR]);
359 
360             if (mit_delay) {
361                 s->mit_timer_on = 1;
362                 timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
363                           mit_delay * 256);
364             }
365             s->mit_ide = 0;
366         }
367     }
368 
369     s->mit_irq_level = (pending_ints != 0);
370     pci_set_irq(d, s->mit_irq_level);
371 }
372 
373 static void
374 e1000_mit_timer(void *opaque)
375 {
376     E1000State *s = opaque;
377 
378     s->mit_timer_on = 0;
379     /* Call set_interrupt_cause to update the irq level (if necessary). */
380     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
381 }
382 
383 static void
384 set_ics(E1000State *s, int index, uint32_t val)
385 {
386     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
387         s->mac_reg[IMS]);
388     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
389 }
390 
391 static void
392 e1000_autoneg_timer(void *opaque)
393 {
394     E1000State *s = opaque;
395     if (!qemu_get_queue(s->nic)->link_down) {
396         e1000_link_up(s);
397         s->phy_reg[PHY_LP_ABILITY] |= MII_LPAR_LPACK;
398         s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
399         DBGOUT(PHY, "Auto negotiation is completed\n");
400         set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
401     }
402 }
403 
404 static int
405 rxbufsize(uint32_t v)
406 {
407     v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 |
408          E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 |
409          E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256;
410     switch (v) {
411     case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384:
412         return 16384;
413     case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192:
414         return 8192;
415     case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096:
416         return 4096;
417     case E1000_RCTL_SZ_1024:
418         return 1024;
419     case E1000_RCTL_SZ_512:
420         return 512;
421     case E1000_RCTL_SZ_256:
422         return 256;
423     }
424     return 2048;
425 }
426 
427 static void e1000_reset(void *opaque)
428 {
429     E1000State *d = opaque;
430     E1000BaseClass *edc = E1000_DEVICE_GET_CLASS(d);
431     uint8_t *macaddr = d->conf.macaddr.a;
432     int i;
433 
434     timer_del(d->autoneg_timer);
435     timer_del(d->mit_timer);
436     d->mit_timer_on = 0;
437     d->mit_irq_level = 0;
438     d->mit_ide = 0;
439     memset(d->phy_reg, 0, sizeof d->phy_reg);
440     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
441     d->phy_reg[PHY_ID2] = edc->phy_id2;
442     memset(d->mac_reg, 0, sizeof d->mac_reg);
443     memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
444     d->rxbuf_min_shift = 1;
445     memset(&d->tx, 0, sizeof d->tx);
446 
447     if (qemu_get_queue(d->nic)->link_down) {
448         e1000_link_down(d);
449     }
450 
451     /* Throttle interrupts to prevent guest (e.g Win 2012) from
452      * reinjecting interrupts endlessly. TODO: fix non ITR case.
453      */
454     d->mac_reg[ITR] = 250;
455 
456     /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */
457     d->mac_reg[RA] = 0;
458     d->mac_reg[RA + 1] = E1000_RAH_AV;
459     for (i = 0; i < 4; i++) {
460         d->mac_reg[RA] |= macaddr[i] << (8 * i);
461         d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0;
462     }
463     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
464 }
465 
466 static void
467 set_ctrl(E1000State *s, int index, uint32_t val)
468 {
469     /* RST is self clearing */
470     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
471 }
472 
473 static void
474 set_rx_control(E1000State *s, int index, uint32_t val)
475 {
476     s->mac_reg[RCTL] = val;
477     s->rxbuf_size = rxbufsize(val);
478     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
479     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
480            s->mac_reg[RCTL]);
481     qemu_flush_queued_packets(qemu_get_queue(s->nic));
482 }
483 
484 static void
485 set_mdic(E1000State *s, int index, uint32_t val)
486 {
487     uint32_t data = val & E1000_MDIC_DATA_MASK;
488     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
489 
490     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
491         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
492     else if (val & E1000_MDIC_OP_READ) {
493         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
494         if (!(phy_regcap[addr] & PHY_R)) {
495             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
496             val |= E1000_MDIC_ERROR;
497         } else
498             val = (val ^ data) | s->phy_reg[addr];
499     } else if (val & E1000_MDIC_OP_WRITE) {
500         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
501         if (!(phy_regcap[addr] & PHY_W)) {
502             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
503             val |= E1000_MDIC_ERROR;
504         } else {
505             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
506                 phyreg_writeops[addr](s, index, data);
507             } else {
508                 s->phy_reg[addr] = data;
509             }
510         }
511     }
512     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
513 
514     if (val & E1000_MDIC_INT_EN) {
515         set_ics(s, 0, E1000_ICR_MDAC);
516     }
517 }
518 
519 static uint32_t
520 get_eecd(E1000State *s, int index)
521 {
522     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
523 
524     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
525            s->eecd_state.bitnum_out, s->eecd_state.reading);
526     if (!s->eecd_state.reading ||
527         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
528           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
529         ret |= E1000_EECD_DO;
530     return ret;
531 }
532 
533 static void
534 set_eecd(E1000State *s, int index, uint32_t val)
535 {
536     uint32_t oldval = s->eecd_state.old_eecd;
537 
538     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
539             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
540     if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
541         return;
542     }
543     if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
544         s->eecd_state.val_in = 0;
545         s->eecd_state.bitnum_in = 0;
546         s->eecd_state.bitnum_out = 0;
547         s->eecd_state.reading = 0;
548     }
549     if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
550         return;
551     }
552     if (!(E1000_EECD_SK & val)) {               /* falling edge */
553         s->eecd_state.bitnum_out++;
554         return;
555     }
556     s->eecd_state.val_in <<= 1;
557     if (val & E1000_EECD_DI)
558         s->eecd_state.val_in |= 1;
559     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
560         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
561         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
562             EEPROM_READ_OPCODE_MICROWIRE);
563     }
564     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
565            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
566            s->eecd_state.reading);
567 }
568 
569 static uint32_t
570 flash_eerd_read(E1000State *s, int x)
571 {
572     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
573 
574     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
575         return (s->mac_reg[EERD]);
576 
577     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
578         return (E1000_EEPROM_RW_REG_DONE | r);
579 
580     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
581            E1000_EEPROM_RW_REG_DONE | r);
582 }
583 
584 static void
585 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
586 {
587     uint32_t sum;
588 
589     if (cse && cse < n)
590         n = cse + 1;
591     if (sloc < n-1) {
592         sum = net_checksum_add(n-css, data+css);
593         stw_be_p(data + sloc, net_checksum_finish(sum));
594     }
595 }
596 
597 static inline void
598 inc_reg_if_not_full(E1000State *s, int index)
599 {
600     if (s->mac_reg[index] != 0xffffffff) {
601         s->mac_reg[index]++;
602     }
603 }
604 
605 static inline void
606 inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
607 {
608     if (!memcmp(arr, bcast, sizeof bcast)) {
609         inc_reg_if_not_full(s, BPTC);
610     } else if (arr[0] & 1) {
611         inc_reg_if_not_full(s, MPTC);
612     }
613 }
614 
615 static void
616 grow_8reg_if_not_full(E1000State *s, int index, int size)
617 {
618     uint64_t sum = s->mac_reg[index] | (uint64_t)s->mac_reg[index+1] << 32;
619 
620     if (sum + size < sum) {
621         sum = ~0ULL;
622     } else {
623         sum += size;
624     }
625     s->mac_reg[index] = sum;
626     s->mac_reg[index+1] = sum >> 32;
627 }
628 
629 static void
630 increase_size_stats(E1000State *s, const int *size_regs, int size)
631 {
632     if (size > 1023) {
633         inc_reg_if_not_full(s, size_regs[5]);
634     } else if (size > 511) {
635         inc_reg_if_not_full(s, size_regs[4]);
636     } else if (size > 255) {
637         inc_reg_if_not_full(s, size_regs[3]);
638     } else if (size > 127) {
639         inc_reg_if_not_full(s, size_regs[2]);
640     } else if (size > 64) {
641         inc_reg_if_not_full(s, size_regs[1]);
642     } else if (size == 64) {
643         inc_reg_if_not_full(s, size_regs[0]);
644     }
645 }
646 
647 static inline int
648 vlan_enabled(E1000State *s)
649 {
650     return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0);
651 }
652 
653 static inline int
654 vlan_rx_filter_enabled(E1000State *s)
655 {
656     return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0);
657 }
658 
659 static inline int
660 is_vlan_packet(E1000State *s, const uint8_t *buf)
661 {
662     return (be16_to_cpup((uint16_t *)(buf + 12)) ==
663                 le16_to_cpu(s->mac_reg[VET]));
664 }
665 
666 static inline int
667 is_vlan_txd(uint32_t txd_lower)
668 {
669     return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
670 }
671 
672 /* FCS aka Ethernet CRC-32. We don't get it from backends and can't
673  * fill it in, just pad descriptor length by 4 bytes unless guest
674  * told us to strip it off the packet. */
675 static inline int
676 fcs_len(E1000State *s)
677 {
678     return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
679 }
680 
681 static void
682 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
683 {
684     static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
685                                     PTC1023, PTC1522 };
686 
687     NetClientState *nc = qemu_get_queue(s->nic);
688     if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
689         nc->info->receive(nc, buf, size);
690     } else {
691         qemu_send_packet(nc, buf, size);
692     }
693     inc_tx_bcast_or_mcast_count(s, buf);
694     increase_size_stats(s, PTCregs, size);
695 }
696 
697 static void
698 xmit_seg(E1000State *s)
699 {
700     uint16_t len, *sp;
701     unsigned int frames = s->tx.tso_frames, css, sofar;
702     struct e1000_tx *tp = &s->tx;
703 
704     if (tp->tse && tp->cptse) {
705         css = tp->ipcss;
706         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
707                frames, tp->size, css);
708         if (tp->ip) {    /* IPv4 */
709             stw_be_p(tp->data+css+2, tp->size - css);
710             stw_be_p(tp->data+css+4,
711                      be16_to_cpup((uint16_t *)(tp->data+css+4))+frames);
712         } else {         /* IPv6 */
713             stw_be_p(tp->data+css+4, tp->size - css);
714         }
715         css = tp->tucss;
716         len = tp->size - css;
717         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len);
718         if (tp->tcp) {
719             sofar = frames * tp->mss;
720             stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
721             if (tp->paylen - sofar > tp->mss) {
722                 tp->data[css + 13] &= ~9;    /* PSH, FIN */
723             } else if (frames) {
724                 inc_reg_if_not_full(s, TSCTC);
725             }
726         } else    /* UDP */
727             stw_be_p(tp->data+css+4, len);
728         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
729             unsigned int phsum;
730             // add pseudo-header length before checksum calculation
731             sp = (uint16_t *)(tp->data + tp->tucso);
732             phsum = be16_to_cpup(sp) + len;
733             phsum = (phsum >> 16) + (phsum & 0xffff);
734             stw_be_p(sp, phsum);
735         }
736         tp->tso_frames++;
737     }
738 
739     if (tp->sum_needed & E1000_TXD_POPTS_TXSM)
740         putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse);
741     if (tp->sum_needed & E1000_TXD_POPTS_IXSM)
742         putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse);
743     if (tp->vlan_needed) {
744         memmove(tp->vlan, tp->data, 4);
745         memmove(tp->data, tp->data + 4, 8);
746         memcpy(tp->data + 8, tp->vlan_header, 4);
747         e1000_send_packet(s, tp->vlan, tp->size + 4);
748     } else {
749         e1000_send_packet(s, tp->data, tp->size);
750     }
751 
752     inc_reg_if_not_full(s, TPT);
753     grow_8reg_if_not_full(s, TOTL, s->tx.size);
754     s->mac_reg[GPTC] = s->mac_reg[TPT];
755     s->mac_reg[GOTCL] = s->mac_reg[TOTL];
756     s->mac_reg[GOTCH] = s->mac_reg[TOTH];
757 }
758 
759 static void
760 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
761 {
762     PCIDevice *d = PCI_DEVICE(s);
763     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
764     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
765     unsigned int split_size = txd_lower & 0xffff, bytes, sz, op;
766     unsigned int msh = 0xfffff;
767     uint64_t addr;
768     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
769     struct e1000_tx *tp = &s->tx;
770 
771     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
772     if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
773         op = le32_to_cpu(xp->cmd_and_length);
774         tp->ipcss = xp->lower_setup.ip_fields.ipcss;
775         tp->ipcso = xp->lower_setup.ip_fields.ipcso;
776         tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse);
777         tp->tucss = xp->upper_setup.tcp_fields.tucss;
778         tp->tucso = xp->upper_setup.tcp_fields.tucso;
779         tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse);
780         tp->paylen = op & 0xfffff;
781         tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len;
782         tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss);
783         tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0;
784         tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
785         tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
786         tp->tso_frames = 0;
787         if (tp->tucso == 0) {    /* this is probably wrong */
788             DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
789             tp->tucso = tp->tucss + (tp->tcp ? 16 : 6);
790         }
791         return;
792     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
793         // data descriptor
794         if (tp->size == 0) {
795             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
796         }
797         tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0;
798     } else {
799         // legacy descriptor
800         tp->cptse = 0;
801     }
802 
803     if (vlan_enabled(s) && is_vlan_txd(txd_lower) &&
804         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
805         tp->vlan_needed = 1;
806         stw_be_p(tp->vlan_header,
807                       le16_to_cpu(s->mac_reg[VET]));
808         stw_be_p(tp->vlan_header + 2,
809                       le16_to_cpu(dp->upper.fields.special));
810     }
811 
812     addr = le64_to_cpu(dp->buffer_addr);
813     if (tp->tse && tp->cptse) {
814         msh = tp->hdr_len + tp->mss;
815         do {
816             bytes = split_size;
817             if (tp->size + bytes > msh)
818                 bytes = msh - tp->size;
819 
820             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
821             pci_dma_read(d, addr, tp->data + tp->size, bytes);
822             sz = tp->size + bytes;
823             if (sz >= tp->hdr_len && tp->size < tp->hdr_len) {
824                 memmove(tp->header, tp->data, tp->hdr_len);
825             }
826             tp->size = sz;
827             addr += bytes;
828             if (sz == msh) {
829                 xmit_seg(s);
830                 memmove(tp->data, tp->header, tp->hdr_len);
831                 tp->size = tp->hdr_len;
832             }
833             split_size -= bytes;
834         } while (bytes && split_size);
835     } else if (!tp->tse && tp->cptse) {
836         // context descriptor TSE is not set, while data descriptor TSE is set
837         DBGOUT(TXERR, "TCP segmentation error\n");
838     } else {
839         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
840         pci_dma_read(d, addr, tp->data + tp->size, split_size);
841         tp->size += split_size;
842     }
843 
844     if (!(txd_lower & E1000_TXD_CMD_EOP))
845         return;
846     if (!(tp->tse && tp->cptse && tp->size < tp->hdr_len)) {
847         xmit_seg(s);
848     }
849     tp->tso_frames = 0;
850     tp->sum_needed = 0;
851     tp->vlan_needed = 0;
852     tp->size = 0;
853     tp->cptse = 0;
854 }
855 
856 static uint32_t
857 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
858 {
859     PCIDevice *d = PCI_DEVICE(s);
860     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
861 
862     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
863         return 0;
864     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
865                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
866     dp->upper.data = cpu_to_le32(txd_upper);
867     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
868                   &dp->upper, sizeof(dp->upper));
869     return E1000_ICR_TXDW;
870 }
871 
872 static uint64_t tx_desc_base(E1000State *s)
873 {
874     uint64_t bah = s->mac_reg[TDBAH];
875     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
876 
877     return (bah << 32) + bal;
878 }
879 
880 static void
881 start_xmit(E1000State *s)
882 {
883     PCIDevice *d = PCI_DEVICE(s);
884     dma_addr_t base;
885     struct e1000_tx_desc desc;
886     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
887 
888     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
889         DBGOUT(TX, "tx disabled\n");
890         return;
891     }
892 
893     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
894         base = tx_desc_base(s) +
895                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
896         pci_dma_read(d, base, &desc, sizeof(desc));
897 
898         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
899                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
900                desc.upper.data);
901 
902         process_tx_desc(s, &desc);
903         cause |= txdesc_writeback(s, base, &desc);
904 
905         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
906             s->mac_reg[TDH] = 0;
907         /*
908          * the following could happen only if guest sw assigns
909          * bogus values to TDT/TDLEN.
910          * there's nothing too intelligent we could do about this.
911          */
912         if (s->mac_reg[TDH] == tdh_start) {
913             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
914                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
915             break;
916         }
917     }
918     set_ics(s, 0, cause);
919 }
920 
921 static int
922 receive_filter(E1000State *s, const uint8_t *buf, int size)
923 {
924     static const int mta_shift[] = {4, 3, 2, 0};
925     uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp;
926     int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1);
927 
928     if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) {
929         uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14));
930         uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) +
931                                      ((vid >> 5) & 0x7f));
932         if ((vfta & (1 << (vid & 0x1f))) == 0)
933             return 0;
934     }
935 
936     if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
937         return 1;
938     }
939 
940     if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
941         inc_reg_if_not_full(s, MPRC);
942         return 1;
943     }
944 
945     if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
946         inc_reg_if_not_full(s, BPRC);
947         return 1;
948     }
949 
950     for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) {
951         if (!(rp[1] & E1000_RAH_AV))
952             continue;
953         ra[0] = cpu_to_le32(rp[0]);
954         ra[1] = cpu_to_le32(rp[1]);
955         if (!memcmp(buf, (uint8_t *)ra, 6)) {
956             DBGOUT(RXFILTER,
957                    "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n",
958                    (int)(rp - s->mac_reg - RA)/2,
959                    buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
960             return 1;
961         }
962     }
963     DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n",
964            buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
965 
966     f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
967     f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
968     if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f))) {
969         inc_reg_if_not_full(s, MPRC);
970         return 1;
971     }
972     DBGOUT(RXFILTER,
973            "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n",
974            buf[0], buf[1], buf[2], buf[3], buf[4], buf[5],
975            (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5,
976            s->mac_reg[MTA + (f >> 5)]);
977 
978     return 0;
979 }
980 
981 static void
982 e1000_set_link_status(NetClientState *nc)
983 {
984     E1000State *s = qemu_get_nic_opaque(nc);
985     uint32_t old_status = s->mac_reg[STATUS];
986 
987     if (nc->link_down) {
988         e1000_link_down(s);
989     } else {
990         if (have_autoneg(s) &&
991             !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
992             /* emulate auto-negotiation if supported */
993             timer_mod(s->autoneg_timer,
994                       qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
995         } else {
996             e1000_link_up(s);
997         }
998     }
999 
1000     if (s->mac_reg[STATUS] != old_status)
1001         set_ics(s, 0, E1000_ICR_LSC);
1002 }
1003 
1004 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
1005 {
1006     int bufs;
1007     /* Fast-path short packets */
1008     if (total_size <= s->rxbuf_size) {
1009         return s->mac_reg[RDH] != s->mac_reg[RDT];
1010     }
1011     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
1012         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
1013     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
1014         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
1015             s->mac_reg[RDT] - s->mac_reg[RDH];
1016     } else {
1017         return false;
1018     }
1019     return total_size <= bufs * s->rxbuf_size;
1020 }
1021 
1022 static int
1023 e1000_can_receive(NetClientState *nc)
1024 {
1025     E1000State *s = qemu_get_nic_opaque(nc);
1026 
1027     return (s->mac_reg[STATUS] & E1000_STATUS_LU) &&
1028         (s->mac_reg[RCTL] & E1000_RCTL_EN) &&
1029         (s->parent_obj.config[PCI_COMMAND] & PCI_COMMAND_MASTER) &&
1030         e1000_has_rxbufs(s, 1);
1031 }
1032 
1033 static uint64_t rx_desc_base(E1000State *s)
1034 {
1035     uint64_t bah = s->mac_reg[RDBAH];
1036     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
1037 
1038     return (bah << 32) + bal;
1039 }
1040 
1041 static ssize_t
1042 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
1043 {
1044     E1000State *s = qemu_get_nic_opaque(nc);
1045     PCIDevice *d = PCI_DEVICE(s);
1046     struct e1000_rx_desc desc;
1047     dma_addr_t base;
1048     unsigned int n, rdt;
1049     uint32_t rdh_start;
1050     uint16_t vlan_special = 0;
1051     uint8_t vlan_status = 0;
1052     uint8_t min_buf[MIN_BUF_SIZE];
1053     struct iovec min_iov;
1054     uint8_t *filter_buf = iov->iov_base;
1055     size_t size = iov_size(iov, iovcnt);
1056     size_t iov_ofs = 0;
1057     size_t desc_offset;
1058     size_t desc_size;
1059     size_t total_size;
1060     static const int PRCregs[6] = { PRC64, PRC127, PRC255, PRC511,
1061                                     PRC1023, PRC1522 };
1062 
1063     if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) {
1064         return -1;
1065     }
1066 
1067     if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) {
1068         return -1;
1069     }
1070 
1071     /* Pad to minimum Ethernet frame length */
1072     if (size < sizeof(min_buf)) {
1073         iov_to_buf(iov, iovcnt, 0, min_buf, size);
1074         memset(&min_buf[size], 0, sizeof(min_buf) - size);
1075         inc_reg_if_not_full(s, RUC);
1076         min_iov.iov_base = filter_buf = min_buf;
1077         min_iov.iov_len = size = sizeof(min_buf);
1078         iovcnt = 1;
1079         iov = &min_iov;
1080     } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
1081         /* This is very unlikely, but may happen. */
1082         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
1083         filter_buf = min_buf;
1084     }
1085 
1086     /* Discard oversized packets if !LPE and !SBP. */
1087     if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
1088         (size > MAXIMUM_ETHERNET_VLAN_SIZE
1089         && !(s->mac_reg[RCTL] & E1000_RCTL_LPE)))
1090         && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) {
1091         inc_reg_if_not_full(s, ROC);
1092         return size;
1093     }
1094 
1095     if (!receive_filter(s, filter_buf, size)) {
1096         return size;
1097     }
1098 
1099     if (vlan_enabled(s) && is_vlan_packet(s, filter_buf)) {
1100         vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(filter_buf
1101                                                                 + 14)));
1102         iov_ofs = 4;
1103         if (filter_buf == iov->iov_base) {
1104             memmove(filter_buf + 4, filter_buf, 12);
1105         } else {
1106             iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
1107             while (iov->iov_len <= iov_ofs) {
1108                 iov_ofs -= iov->iov_len;
1109                 iov++;
1110             }
1111         }
1112         vlan_status = E1000_RXD_STAT_VP;
1113         size -= 4;
1114     }
1115 
1116     rdh_start = s->mac_reg[RDH];
1117     desc_offset = 0;
1118     total_size = size + fcs_len(s);
1119     if (!e1000_has_rxbufs(s, total_size)) {
1120             set_ics(s, 0, E1000_ICS_RXO);
1121             return -1;
1122     }
1123     do {
1124         desc_size = total_size - desc_offset;
1125         if (desc_size > s->rxbuf_size) {
1126             desc_size = s->rxbuf_size;
1127         }
1128         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
1129         pci_dma_read(d, base, &desc, sizeof(desc));
1130         desc.special = vlan_special;
1131         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
1132         if (desc.buffer_addr) {
1133             if (desc_offset < size) {
1134                 size_t iov_copy;
1135                 hwaddr ba = le64_to_cpu(desc.buffer_addr);
1136                 size_t copy_size = size - desc_offset;
1137                 if (copy_size > s->rxbuf_size) {
1138                     copy_size = s->rxbuf_size;
1139                 }
1140                 do {
1141                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
1142                     pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
1143                     copy_size -= iov_copy;
1144                     ba += iov_copy;
1145                     iov_ofs += iov_copy;
1146                     if (iov_ofs == iov->iov_len) {
1147                         iov++;
1148                         iov_ofs = 0;
1149                     }
1150                 } while (copy_size);
1151             }
1152             desc_offset += desc_size;
1153             desc.length = cpu_to_le16(desc_size);
1154             if (desc_offset >= total_size) {
1155                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1156             } else {
1157                 /* Guest zeroing out status is not a hardware requirement.
1158                    Clear EOP in case guest didn't do it. */
1159                 desc.status &= ~E1000_RXD_STAT_EOP;
1160             }
1161         } else { // as per intel docs; skip descriptors with null buf addr
1162             DBGOUT(RX, "Null RX descriptor!!\n");
1163         }
1164         pci_dma_write(d, base, &desc, sizeof(desc));
1165 
1166         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1167             s->mac_reg[RDH] = 0;
1168         /* see comment in start_xmit; same here */
1169         if (s->mac_reg[RDH] == rdh_start) {
1170             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1171                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1172             set_ics(s, 0, E1000_ICS_RXO);
1173             return -1;
1174         }
1175     } while (desc_offset < total_size);
1176 
1177     increase_size_stats(s, PRCregs, total_size);
1178     inc_reg_if_not_full(s, TPR);
1179     s->mac_reg[GPRC] = s->mac_reg[TPR];
1180     /* TOR - Total Octets Received:
1181      * This register includes bytes received in a packet from the <Destination
1182      * Address> field through the <CRC> field, inclusively.
1183      * Always include FCS length (4) in size.
1184      */
1185     grow_8reg_if_not_full(s, TORL, size+4);
1186     s->mac_reg[GORCL] = s->mac_reg[TORL];
1187     s->mac_reg[GORCH] = s->mac_reg[TORH];
1188 
1189     n = E1000_ICS_RXT0;
1190     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1191         rdt += s->mac_reg[RDLEN] / sizeof(desc);
1192     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1193         s->rxbuf_min_shift)
1194         n |= E1000_ICS_RXDMT0;
1195 
1196     set_ics(s, 0, n);
1197 
1198     return size;
1199 }
1200 
1201 static ssize_t
1202 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1203 {
1204     const struct iovec iov = {
1205         .iov_base = (uint8_t *)buf,
1206         .iov_len = size
1207     };
1208 
1209     return e1000_receive_iov(nc, &iov, 1);
1210 }
1211 
1212 static uint32_t
1213 mac_readreg(E1000State *s, int index)
1214 {
1215     return s->mac_reg[index];
1216 }
1217 
1218 static uint32_t
1219 mac_low4_read(E1000State *s, int index)
1220 {
1221     return s->mac_reg[index] & 0xf;
1222 }
1223 
1224 static uint32_t
1225 mac_low11_read(E1000State *s, int index)
1226 {
1227     return s->mac_reg[index] & 0x7ff;
1228 }
1229 
1230 static uint32_t
1231 mac_low13_read(E1000State *s, int index)
1232 {
1233     return s->mac_reg[index] & 0x1fff;
1234 }
1235 
1236 static uint32_t
1237 mac_low16_read(E1000State *s, int index)
1238 {
1239     return s->mac_reg[index] & 0xffff;
1240 }
1241 
1242 static uint32_t
1243 mac_icr_read(E1000State *s, int index)
1244 {
1245     uint32_t ret = s->mac_reg[ICR];
1246 
1247     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1248     set_interrupt_cause(s, 0, 0);
1249     return ret;
1250 }
1251 
1252 static uint32_t
1253 mac_read_clr4(E1000State *s, int index)
1254 {
1255     uint32_t ret = s->mac_reg[index];
1256 
1257     s->mac_reg[index] = 0;
1258     return ret;
1259 }
1260 
1261 static uint32_t
1262 mac_read_clr8(E1000State *s, int index)
1263 {
1264     uint32_t ret = s->mac_reg[index];
1265 
1266     s->mac_reg[index] = 0;
1267     s->mac_reg[index-1] = 0;
1268     return ret;
1269 }
1270 
1271 static void
1272 mac_writereg(E1000State *s, int index, uint32_t val)
1273 {
1274     uint32_t macaddr[2];
1275 
1276     s->mac_reg[index] = val;
1277 
1278     if (index == RA + 1) {
1279         macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1280         macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1281         qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1282     }
1283 }
1284 
1285 static void
1286 set_rdt(E1000State *s, int index, uint32_t val)
1287 {
1288     s->mac_reg[index] = val & 0xffff;
1289     if (e1000_has_rxbufs(s, 1)) {
1290         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1291     }
1292 }
1293 
1294 static void
1295 set_16bit(E1000State *s, int index, uint32_t val)
1296 {
1297     s->mac_reg[index] = val & 0xffff;
1298 }
1299 
1300 static void
1301 set_dlen(E1000State *s, int index, uint32_t val)
1302 {
1303     s->mac_reg[index] = val & 0xfff80;
1304 }
1305 
1306 static void
1307 set_tctl(E1000State *s, int index, uint32_t val)
1308 {
1309     s->mac_reg[index] = val;
1310     s->mac_reg[TDT] &= 0xffff;
1311     start_xmit(s);
1312 }
1313 
1314 static void
1315 set_icr(E1000State *s, int index, uint32_t val)
1316 {
1317     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1318     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1319 }
1320 
1321 static void
1322 set_imc(E1000State *s, int index, uint32_t val)
1323 {
1324     s->mac_reg[IMS] &= ~val;
1325     set_ics(s, 0, 0);
1326 }
1327 
1328 static void
1329 set_ims(E1000State *s, int index, uint32_t val)
1330 {
1331     s->mac_reg[IMS] |= val;
1332     set_ics(s, 0, 0);
1333 }
1334 
1335 #define getreg(x)    [x] = mac_readreg
1336 static uint32_t (*macreg_readops[])(E1000State *, int) = {
1337     getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1338     getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1339     getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1340     getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1341     getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1342     getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1343     getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1344     getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1345     getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1346     getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1347     getreg(TNCRS),    getreg(SEC),      getreg(CEXTERR),  getreg(RLEC),
1348     getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1349     getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1350     getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1351     getreg(GOTCL),
1352 
1353     [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1354     [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1355     [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1356     [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1357     [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1358     [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1359     [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1360     [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1361     [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1362     [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1363     [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1364     [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1365     [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1366     [MPTC]    = mac_read_clr4,
1367     [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1368     [EERD]    = flash_eerd_read,
1369     [RDFH]    = mac_low13_read,     [RDFT]    = mac_low13_read,
1370     [RDFHS]   = mac_low13_read,     [RDFTS]   = mac_low13_read,
1371     [RDFPC]   = mac_low13_read,
1372     [TDFH]    = mac_low11_read,     [TDFT]    = mac_low11_read,
1373     [TDFHS]   = mac_low13_read,     [TDFTS]   = mac_low13_read,
1374     [TDFPC]   = mac_low13_read,
1375     [AIT]     = mac_low16_read,
1376 
1377     [CRCERRS ... MPC]   = &mac_readreg,
1378     [IP6AT ... IP6AT+3] = &mac_readreg,    [IP4AT ... IP4AT+6] = &mac_readreg,
1379     [FFLT ... FFLT+6]   = &mac_low11_read,
1380     [RA ... RA+31]      = &mac_readreg,
1381     [WUPM ... WUPM+31]  = &mac_readreg,
1382     [MTA ... MTA+127]   = &mac_readreg,
1383     [VFTA ... VFTA+127] = &mac_readreg,
1384     [FFMT ... FFMT+254] = &mac_low4_read,
1385     [FFVT ... FFVT+254] = &mac_readreg,
1386     [PBM ... PBM+16383] = &mac_readreg,
1387 };
1388 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1389 
1390 #define putreg(x)    [x] = mac_writereg
1391 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1392     putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1393     putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1394     putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1395     putreg(TDFH),     putreg(TDFT),     putreg(TDFHS),    putreg(TDFTS),
1396     putreg(TDFPC),    putreg(RDFH),     putreg(RDFT),     putreg(RDFHS),
1397     putreg(RDFTS),    putreg(RDFPC),    putreg(IPAV),     putreg(WUC),
1398     putreg(WUS),      putreg(AIT),
1399 
1400     [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL] = set_tctl,
1401     [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]  = set_ics,
1402     [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]  = set_rdt,
1403     [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]  = set_icr,
1404     [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL] = set_ctrl,
1405     [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV] = set_16bit,
1406     [ITR]    = set_16bit,
1407 
1408     [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg,
1409     [FFLT ... FFLT+6]   = &mac_writereg,
1410     [RA ... RA+31]      = &mac_writereg,
1411     [WUPM ... WUPM+31]  = &mac_writereg,
1412     [MTA ... MTA+127]   = &mac_writereg,
1413     [VFTA ... VFTA+127] = &mac_writereg,
1414     [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg,
1415     [PBM ... PBM+16383] = &mac_writereg,
1416 };
1417 
1418 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1419 
1420 enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1421 
1422 #define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1423 /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1424  * f - flag bits (up to 6 possible flags)
1425  * n - flag needed
1426  * p - partially implenented */
1427 static const uint8_t mac_reg_access[0x8000] = {
1428     [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1429     [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1430 
1431     [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1432     [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1433     [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1434     [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1435     [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1436     [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1437     [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1438     [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1439     [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1440     [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1441     [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1442     [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1443     [SEC]     = markflag(MAC),    [CEXTERR] = markflag(MAC),
1444     [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1445     [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1446     [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1447     [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1448     [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1449     [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1450     [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1451     [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1452     [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1453     [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1454     [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1455     [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1456     [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1457     [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1458     [BPTC]    = markflag(MAC),
1459 
1460     [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1461     [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1462     [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1463     [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1464     [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1465     [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1466     [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1467     [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1468     [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1469     [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1470     [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1471 };
1472 
1473 static void
1474 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1475                  unsigned size)
1476 {
1477     E1000State *s = opaque;
1478     unsigned int index = (addr & 0x1ffff) >> 2;
1479 
1480     if (index < NWRITEOPS && macreg_writeops[index]) {
1481         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1482             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1483             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1484                 DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1485                        "It is not fully implemented.\n", index<<2);
1486             }
1487             macreg_writeops[index](s, index, val);
1488         } else {    /* "flag needed" bit is set, but the flag is not active */
1489             DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1490                    index<<2);
1491         }
1492     } else if (index < NREADOPS && macreg_readops[index]) {
1493         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1494                index<<2, val);
1495     } else {
1496         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1497                index<<2, val);
1498     }
1499 }
1500 
1501 static uint64_t
1502 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1503 {
1504     E1000State *s = opaque;
1505     unsigned int index = (addr & 0x1ffff) >> 2;
1506 
1507     if (index < NREADOPS && macreg_readops[index]) {
1508         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1509             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1510             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1511                 DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1512                        "It is not fully implemented.\n", index<<2);
1513             }
1514             return macreg_readops[index](s, index);
1515         } else {    /* "flag needed" bit is set, but the flag is not active */
1516             DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1517                    index<<2);
1518         }
1519     } else {
1520         DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1521     }
1522     return 0;
1523 }
1524 
1525 static const MemoryRegionOps e1000_mmio_ops = {
1526     .read = e1000_mmio_read,
1527     .write = e1000_mmio_write,
1528     .endianness = DEVICE_LITTLE_ENDIAN,
1529     .impl = {
1530         .min_access_size = 4,
1531         .max_access_size = 4,
1532     },
1533 };
1534 
1535 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1536                               unsigned size)
1537 {
1538     E1000State *s = opaque;
1539 
1540     (void)s;
1541     return 0;
1542 }
1543 
1544 static void e1000_io_write(void *opaque, hwaddr addr,
1545                            uint64_t val, unsigned size)
1546 {
1547     E1000State *s = opaque;
1548 
1549     (void)s;
1550 }
1551 
1552 static const MemoryRegionOps e1000_io_ops = {
1553     .read = e1000_io_read,
1554     .write = e1000_io_write,
1555     .endianness = DEVICE_LITTLE_ENDIAN,
1556 };
1557 
1558 static bool is_version_1(void *opaque, int version_id)
1559 {
1560     return version_id == 1;
1561 }
1562 
1563 static void e1000_pre_save(void *opaque)
1564 {
1565     E1000State *s = opaque;
1566     NetClientState *nc = qemu_get_queue(s->nic);
1567 
1568     /* If the mitigation timer is active, emulate a timeout now. */
1569     if (s->mit_timer_on) {
1570         e1000_mit_timer(s);
1571     }
1572 
1573     /*
1574      * If link is down and auto-negotiation is supported and ongoing,
1575      * complete auto-negotiation immediately. This allows us to look
1576      * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1577      */
1578     if (nc->link_down && have_autoneg(s)) {
1579         s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1580     }
1581 }
1582 
1583 static int e1000_post_load(void *opaque, int version_id)
1584 {
1585     E1000State *s = opaque;
1586     NetClientState *nc = qemu_get_queue(s->nic);
1587 
1588     if (!chkflag(MIT)) {
1589         s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1590             s->mac_reg[TADV] = 0;
1591         s->mit_irq_level = false;
1592     }
1593     s->mit_ide = 0;
1594     s->mit_timer_on = false;
1595 
1596     /* nc.link_down can't be migrated, so infer link_down according
1597      * to link status bit in mac_reg[STATUS].
1598      * Alternatively, restart link negotiation if it was in progress. */
1599     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1600 
1601     if (have_autoneg(s) &&
1602         !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1603         nc->link_down = false;
1604         timer_mod(s->autoneg_timer,
1605                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1606     }
1607 
1608     return 0;
1609 }
1610 
1611 static bool e1000_mit_state_needed(void *opaque)
1612 {
1613     E1000State *s = opaque;
1614 
1615     return chkflag(MIT);
1616 }
1617 
1618 static bool e1000_full_mac_needed(void *opaque)
1619 {
1620     E1000State *s = opaque;
1621 
1622     return chkflag(MAC);
1623 }
1624 
1625 static const VMStateDescription vmstate_e1000_mit_state = {
1626     .name = "e1000/mit_state",
1627     .version_id = 1,
1628     .minimum_version_id = 1,
1629     .needed = e1000_mit_state_needed,
1630     .fields = (VMStateField[]) {
1631         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1632         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1633         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1634         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1635         VMSTATE_BOOL(mit_irq_level, E1000State),
1636         VMSTATE_END_OF_LIST()
1637     }
1638 };
1639 
1640 static const VMStateDescription vmstate_e1000_full_mac_state = {
1641     .name = "e1000/full_mac_state",
1642     .version_id = 1,
1643     .minimum_version_id = 1,
1644     .needed = e1000_full_mac_needed,
1645     .fields = (VMStateField[]) {
1646         VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1647         VMSTATE_END_OF_LIST()
1648     }
1649 };
1650 
1651 static const VMStateDescription vmstate_e1000 = {
1652     .name = "e1000",
1653     .version_id = 2,
1654     .minimum_version_id = 1,
1655     .pre_save = e1000_pre_save,
1656     .post_load = e1000_post_load,
1657     .fields = (VMStateField[]) {
1658         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1659         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1660         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1661         VMSTATE_UINT32(rxbuf_size, E1000State),
1662         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1663         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1664         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1665         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1666         VMSTATE_UINT16(eecd_state.reading, E1000State),
1667         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1668         VMSTATE_UINT8(tx.ipcss, E1000State),
1669         VMSTATE_UINT8(tx.ipcso, E1000State),
1670         VMSTATE_UINT16(tx.ipcse, E1000State),
1671         VMSTATE_UINT8(tx.tucss, E1000State),
1672         VMSTATE_UINT8(tx.tucso, E1000State),
1673         VMSTATE_UINT16(tx.tucse, E1000State),
1674         VMSTATE_UINT32(tx.paylen, E1000State),
1675         VMSTATE_UINT8(tx.hdr_len, E1000State),
1676         VMSTATE_UINT16(tx.mss, E1000State),
1677         VMSTATE_UINT16(tx.size, E1000State),
1678         VMSTATE_UINT16(tx.tso_frames, E1000State),
1679         VMSTATE_UINT8(tx.sum_needed, E1000State),
1680         VMSTATE_INT8(tx.ip, E1000State),
1681         VMSTATE_INT8(tx.tcp, E1000State),
1682         VMSTATE_BUFFER(tx.header, E1000State),
1683         VMSTATE_BUFFER(tx.data, E1000State),
1684         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1685         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1686         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1687         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1688         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1689         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1690         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1691         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1692         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1693         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1694         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1695         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1696         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1697         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1698         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1699         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1700         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1701         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1702         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1703         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1704         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1705         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1706         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1707         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1708         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1709         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1710         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1711         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1712         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1713         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1714         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1715         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1716         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1717         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1718         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1719         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1720         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1721         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1722         VMSTATE_UINT32(mac_reg[VET], E1000State),
1723         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1724         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1725         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1726         VMSTATE_END_OF_LIST()
1727     },
1728     .subsections = (const VMStateDescription*[]) {
1729         &vmstate_e1000_mit_state,
1730         &vmstate_e1000_full_mac_state,
1731         NULL
1732     }
1733 };
1734 
1735 /*
1736  * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1737  * Note: A valid DevId will be inserted during pci_e1000_init().
1738  */
1739 static const uint16_t e1000_eeprom_template[64] = {
1740     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1741     0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1742     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1743     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1744     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1745     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1746     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1747     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1748 };
1749 
1750 /* PCI interface */
1751 
1752 static void
1753 e1000_mmio_setup(E1000State *d)
1754 {
1755     int i;
1756     const uint32_t excluded_regs[] = {
1757         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1758         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1759     };
1760 
1761     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1762                           "e1000-mmio", PNPMMIO_SIZE);
1763     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1764     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1765         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1766                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1767     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1768 }
1769 
1770 static void
1771 pci_e1000_uninit(PCIDevice *dev)
1772 {
1773     E1000State *d = E1000(dev);
1774 
1775     timer_del(d->autoneg_timer);
1776     timer_free(d->autoneg_timer);
1777     timer_del(d->mit_timer);
1778     timer_free(d->mit_timer);
1779     qemu_del_nic(d->nic);
1780 }
1781 
1782 static NetClientInfo net_e1000_info = {
1783     .type = NET_CLIENT_OPTIONS_KIND_NIC,
1784     .size = sizeof(NICState),
1785     .can_receive = e1000_can_receive,
1786     .receive = e1000_receive,
1787     .receive_iov = e1000_receive_iov,
1788     .link_status_changed = e1000_set_link_status,
1789 };
1790 
1791 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1792                                 uint32_t val, int len)
1793 {
1794     E1000State *s = E1000(pci_dev);
1795 
1796     pci_default_write_config(pci_dev, address, val, len);
1797 
1798     if (range_covers_byte(address, len, PCI_COMMAND) &&
1799         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1800         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1801     }
1802 }
1803 
1804 
1805 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1806 {
1807     DeviceState *dev = DEVICE(pci_dev);
1808     E1000State *d = E1000(pci_dev);
1809     PCIDeviceClass *pdc = PCI_DEVICE_GET_CLASS(pci_dev);
1810     uint8_t *pci_conf;
1811     uint16_t checksum = 0;
1812     int i;
1813     uint8_t *macaddr;
1814 
1815     pci_dev->config_write = e1000_write_config;
1816 
1817     pci_conf = pci_dev->config;
1818 
1819     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1820     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1821 
1822     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1823 
1824     e1000_mmio_setup(d);
1825 
1826     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1827 
1828     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1829 
1830     memmove(d->eeprom_data, e1000_eeprom_template,
1831         sizeof e1000_eeprom_template);
1832     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1833     macaddr = d->conf.macaddr.a;
1834     for (i = 0; i < 3; i++)
1835         d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i];
1836     d->eeprom_data[11] = d->eeprom_data[13] = pdc->device_id;
1837     for (i = 0; i < EEPROM_CHECKSUM_REG; i++)
1838         checksum += d->eeprom_data[i];
1839     checksum = (uint16_t) EEPROM_SUM - checksum;
1840     d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum;
1841 
1842     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1843                           object_get_typename(OBJECT(d)), dev->id, d);
1844 
1845     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1846 
1847     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1848     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1849 }
1850 
1851 static void qdev_e1000_reset(DeviceState *dev)
1852 {
1853     E1000State *d = E1000(dev);
1854     e1000_reset(d);
1855 }
1856 
1857 static Property e1000_properties[] = {
1858     DEFINE_NIC_PROPERTIES(E1000State, conf),
1859     DEFINE_PROP_BIT("autonegotiation", E1000State,
1860                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1861     DEFINE_PROP_BIT("mitigation", E1000State,
1862                     compat_flags, E1000_FLAG_MIT_BIT, true),
1863     DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1864                     compat_flags, E1000_FLAG_MAC_BIT, true),
1865     DEFINE_PROP_END_OF_LIST(),
1866 };
1867 
1868 typedef struct E1000Info {
1869     const char *name;
1870     uint16_t   device_id;
1871     uint8_t    revision;
1872     uint16_t   phy_id2;
1873 } E1000Info;
1874 
1875 static void e1000_class_init(ObjectClass *klass, void *data)
1876 {
1877     DeviceClass *dc = DEVICE_CLASS(klass);
1878     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1879     E1000BaseClass *e = E1000_DEVICE_CLASS(klass);
1880     const E1000Info *info = data;
1881 
1882     k->realize = pci_e1000_realize;
1883     k->exit = pci_e1000_uninit;
1884     k->romfile = "efi-e1000.rom";
1885     k->vendor_id = PCI_VENDOR_ID_INTEL;
1886     k->device_id = info->device_id;
1887     k->revision = info->revision;
1888     e->phy_id2 = info->phy_id2;
1889     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1890     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1891     dc->desc = "Intel Gigabit Ethernet";
1892     dc->reset = qdev_e1000_reset;
1893     dc->vmsd = &vmstate_e1000;
1894     dc->props = e1000_properties;
1895 }
1896 
1897 static void e1000_instance_init(Object *obj)
1898 {
1899     E1000State *n = E1000(obj);
1900     device_add_bootindex_property(obj, &n->conf.bootindex,
1901                                   "bootindex", "/ethernet-phy@0",
1902                                   DEVICE(n), NULL);
1903 }
1904 
1905 static const TypeInfo e1000_base_info = {
1906     .name          = TYPE_E1000_BASE,
1907     .parent        = TYPE_PCI_DEVICE,
1908     .instance_size = sizeof(E1000State),
1909     .instance_init = e1000_instance_init,
1910     .class_size    = sizeof(E1000BaseClass),
1911     .abstract      = true,
1912 };
1913 
1914 static const E1000Info e1000_devices[] = {
1915     {
1916         .name      = "e1000",
1917         .device_id = E1000_DEV_ID_82540EM,
1918         .revision  = 0x03,
1919         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1920     },
1921     {
1922         .name      = "e1000-82544gc",
1923         .device_id = E1000_DEV_ID_82544GC_COPPER,
1924         .revision  = 0x03,
1925         .phy_id2   = E1000_PHY_ID2_82544x,
1926     },
1927     {
1928         .name      = "e1000-82545em",
1929         .device_id = E1000_DEV_ID_82545EM_COPPER,
1930         .revision  = 0x03,
1931         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1932     },
1933 };
1934 
1935 static void e1000_register_types(void)
1936 {
1937     int i;
1938 
1939     type_register_static(&e1000_base_info);
1940     for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1941         const E1000Info *info = &e1000_devices[i];
1942         TypeInfo type_info = {};
1943 
1944         type_info.name = info->name;
1945         type_info.parent = TYPE_E1000_BASE;
1946         type_info.class_data = (void *)info;
1947         type_info.class_init = e1000_class_init;
1948         type_info.instance_init = e1000_instance_init;
1949 
1950         type_register(&type_info);
1951     }
1952 }
1953 
1954 type_init(e1000_register_types)
1955