xref: /openbmc/qemu/hw/net/e1000.c (revision 87c9b5e0)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "qemu/osdep.h"
29 #include "hw/hw.h"
30 #include "hw/pci/pci.h"
31 #include "net/net.h"
32 #include "net/checksum.h"
33 #include "hw/loader.h"
34 #include "sysemu/sysemu.h"
35 #include "sysemu/dma.h"
36 #include "qemu/iov.h"
37 #include "qemu/range.h"
38 
39 #include "e1000_regs.h"
40 
41 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
42 
43 #define E1000_DEBUG
44 
45 #ifdef E1000_DEBUG
46 enum {
47     DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
48     DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
49     DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
50     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
51 };
52 #define DBGBIT(x)    (1<<DEBUG_##x)
53 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
54 
55 #define DBGOUT(what, fmt, ...) do { \
56     if (debugflags & DBGBIT(what)) \
57         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
58     } while (0)
59 #else
60 #define DBGOUT(what, fmt, ...) do {} while (0)
61 #endif
62 
63 #define IOPORT_SIZE       0x40
64 #define PNPMMIO_SIZE      0x20000
65 #define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
66 
67 /* this is the size past which hardware will drop packets when setting LPE=0 */
68 #define MAXIMUM_ETHERNET_VLAN_SIZE 1522
69 /* this is the size past which hardware will drop packets when setting LPE=1 */
70 #define MAXIMUM_ETHERNET_LPE_SIZE 16384
71 
72 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
73 
74 /*
75  * HW models:
76  *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
77  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
78  *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
79  *  Others never tested
80  */
81 
82 typedef struct E1000State_st {
83     /*< private >*/
84     PCIDevice parent_obj;
85     /*< public >*/
86 
87     NICState *nic;
88     NICConf conf;
89     MemoryRegion mmio;
90     MemoryRegion io;
91 
92     uint32_t mac_reg[0x8000];
93     uint16_t phy_reg[0x20];
94     uint16_t eeprom_data[64];
95 
96     uint32_t rxbuf_size;
97     uint32_t rxbuf_min_shift;
98     struct e1000_tx {
99         unsigned char header[256];
100         unsigned char vlan_header[4];
101         /* Fields vlan and data must not be reordered or separated. */
102         unsigned char vlan[4];
103         unsigned char data[0x10000];
104         uint16_t size;
105         unsigned char sum_needed;
106         unsigned char vlan_needed;
107         uint8_t ipcss;
108         uint8_t ipcso;
109         uint16_t ipcse;
110         uint8_t tucss;
111         uint8_t tucso;
112         uint16_t tucse;
113         uint8_t hdr_len;
114         uint16_t mss;
115         uint32_t paylen;
116         uint16_t tso_frames;
117         char tse;
118         int8_t ip;
119         int8_t tcp;
120         char cptse;     // current packet tse bit
121     } tx;
122 
123     struct {
124         uint32_t val_in;    /* shifted in from guest driver */
125         uint16_t bitnum_in;
126         uint16_t bitnum_out;
127         uint16_t reading;
128         uint32_t old_eecd;
129     } eecd_state;
130 
131     QEMUTimer *autoneg_timer;
132 
133     QEMUTimer *mit_timer;      /* Mitigation timer. */
134     bool mit_timer_on;         /* Mitigation timer is running. */
135     bool mit_irq_level;        /* Tracks interrupt pin level. */
136     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
137 
138 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
139 #define E1000_FLAG_AUTONEG_BIT 0
140 #define E1000_FLAG_MIT_BIT 1
141 #define E1000_FLAG_MAC_BIT 2
142 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
143 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
144 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
145     uint32_t compat_flags;
146 } E1000State;
147 
148 #define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
149 
150 typedef struct E1000BaseClass {
151     PCIDeviceClass parent_class;
152     uint16_t phy_id2;
153 } E1000BaseClass;
154 
155 #define TYPE_E1000_BASE "e1000-base"
156 
157 #define E1000(obj) \
158     OBJECT_CHECK(E1000State, (obj), TYPE_E1000_BASE)
159 
160 #define E1000_DEVICE_CLASS(klass) \
161      OBJECT_CLASS_CHECK(E1000BaseClass, (klass), TYPE_E1000_BASE)
162 #define E1000_DEVICE_GET_CLASS(obj) \
163     OBJECT_GET_CLASS(E1000BaseClass, (obj), TYPE_E1000_BASE)
164 
165 #define defreg(x)    x = (E1000_##x>>2)
166 enum {
167     defreg(CTRL),    defreg(EECD),    defreg(EERD),    defreg(GPRC),
168     defreg(GPTC),    defreg(ICR),     defreg(ICS),     defreg(IMC),
169     defreg(IMS),     defreg(LEDCTL),  defreg(MANC),    defreg(MDIC),
170     defreg(MPC),     defreg(PBA),     defreg(RCTL),    defreg(RDBAH),
171     defreg(RDBAL),   defreg(RDH),     defreg(RDLEN),   defreg(RDT),
172     defreg(STATUS),  defreg(SWSM),    defreg(TCTL),    defreg(TDBAH),
173     defreg(TDBAL),   defreg(TDH),     defreg(TDLEN),   defreg(TDT),
174     defreg(TORH),    defreg(TORL),    defreg(TOTH),    defreg(TOTL),
175     defreg(TPR),     defreg(TPT),     defreg(TXDCTL),  defreg(WUFC),
176     defreg(RA),      defreg(MTA),     defreg(CRCERRS), defreg(VFTA),
177     defreg(VET),     defreg(RDTR),    defreg(RADV),    defreg(TADV),
178     defreg(ITR),     defreg(FCRUC),   defreg(TDFH),    defreg(TDFT),
179     defreg(TDFHS),   defreg(TDFTS),   defreg(TDFPC),   defreg(RDFH),
180     defreg(RDFT),    defreg(RDFHS),   defreg(RDFTS),   defreg(RDFPC),
181     defreg(IPAV),    defreg(WUC),     defreg(WUS),     defreg(AIT),
182     defreg(IP6AT),   defreg(IP4AT),   defreg(FFLT),    defreg(FFMT),
183     defreg(FFVT),    defreg(WUPM),    defreg(PBM),     defreg(SCC),
184     defreg(ECOL),    defreg(MCC),     defreg(LATECOL), defreg(COLC),
185     defreg(DC),      defreg(TNCRS),   defreg(SEC),     defreg(CEXTERR),
186     defreg(RLEC),    defreg(XONRXC),  defreg(XONTXC),  defreg(XOFFRXC),
187     defreg(XOFFTXC), defreg(RFC),     defreg(RJC),     defreg(RNBC),
188     defreg(TSCTFC),  defreg(MGTPRC),  defreg(MGTPDC),  defreg(MGTPTC),
189     defreg(RUC),     defreg(ROC),     defreg(GORCL),   defreg(GORCH),
190     defreg(GOTCL),   defreg(GOTCH),   defreg(BPRC),    defreg(MPRC),
191     defreg(TSCTC),   defreg(PRC64),   defreg(PRC127),  defreg(PRC255),
192     defreg(PRC511),  defreg(PRC1023), defreg(PRC1522), defreg(PTC64),
193     defreg(PTC127),  defreg(PTC255),  defreg(PTC511),  defreg(PTC1023),
194     defreg(PTC1522), defreg(MPTC),    defreg(BPTC)
195 };
196 
197 static void
198 e1000_link_down(E1000State *s)
199 {
200     s->mac_reg[STATUS] &= ~E1000_STATUS_LU;
201     s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS;
202     s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE;
203     s->phy_reg[PHY_LP_ABILITY] &= ~MII_LPAR_LPACK;
204 }
205 
206 static void
207 e1000_link_up(E1000State *s)
208 {
209     s->mac_reg[STATUS] |= E1000_STATUS_LU;
210     s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
211 
212     /* E1000_STATUS_LU is tested by e1000_can_receive() */
213     qemu_flush_queued_packets(qemu_get_queue(s->nic));
214 }
215 
216 static bool
217 have_autoneg(E1000State *s)
218 {
219     return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
220 }
221 
222 static void
223 set_phy_ctrl(E1000State *s, int index, uint16_t val)
224 {
225     /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
226     s->phy_reg[PHY_CTRL] = val & ~(0x3f |
227                                    MII_CR_RESET |
228                                    MII_CR_RESTART_AUTO_NEG);
229 
230     /*
231      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
232      * migrate during auto negotiation, after migration the link will be
233      * down.
234      */
235     if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
236         e1000_link_down(s);
237         DBGOUT(PHY, "Start link auto negotiation\n");
238         timer_mod(s->autoneg_timer,
239                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
240     }
241 }
242 
243 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
244     [PHY_CTRL] = set_phy_ctrl,
245 };
246 
247 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
248 
249 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
250 static const char phy_regcap[0x20] = {
251     [PHY_STATUS]      = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
252     [PHY_ID1]         = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
253     [PHY_CTRL]        = PHY_RW,    [PHY_1000T_CTRL]             = PHY_RW,
254     [PHY_LP_ABILITY]  = PHY_R,     [PHY_1000T_STATUS]           = PHY_R,
255     [PHY_AUTONEG_ADV] = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
256     [PHY_ID2]         = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
257     [PHY_AUTONEG_EXP] = PHY_R,
258 };
259 
260 /* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
261 static const uint16_t phy_reg_init[] = {
262     [PHY_CTRL]   = MII_CR_SPEED_SELECT_MSB |
263                    MII_CR_FULL_DUPLEX |
264                    MII_CR_AUTO_NEG_EN,
265 
266     [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
267                    MII_SR_LINK_STATUS |   /* link initially up */
268                    MII_SR_AUTONEG_CAPS |
269                    /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
270                    MII_SR_PREAMBLE_SUPPRESS |
271                    MII_SR_EXTENDED_STATUS |
272                    MII_SR_10T_HD_CAPS |
273                    MII_SR_10T_FD_CAPS |
274                    MII_SR_100X_HD_CAPS |
275                    MII_SR_100X_FD_CAPS,
276 
277     [PHY_ID1] = 0x141,
278     /* [PHY_ID2] configured per DevId, from e1000_reset() */
279     [PHY_AUTONEG_ADV] = 0xde1,
280     [PHY_LP_ABILITY] = 0x1e0,
281     [PHY_1000T_CTRL] = 0x0e00,
282     [PHY_1000T_STATUS] = 0x3c00,
283     [M88E1000_PHY_SPEC_CTRL] = 0x360,
284     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
285     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
286 };
287 
288 static const uint32_t mac_reg_init[] = {
289     [PBA]     = 0x00100030,
290     [LEDCTL]  = 0x602,
291     [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
292                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
293     [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
294                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
295                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
296                 E1000_STATUS_LU,
297     [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
298                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
299                 E1000_MANC_RMCP_EN,
300 };
301 
302 /* Helper function, *curr == 0 means the value is not set */
303 static inline void
304 mit_update_delay(uint32_t *curr, uint32_t value)
305 {
306     if (value && (*curr == 0 || value < *curr)) {
307         *curr = value;
308     }
309 }
310 
311 static void
312 set_interrupt_cause(E1000State *s, int index, uint32_t val)
313 {
314     PCIDevice *d = PCI_DEVICE(s);
315     uint32_t pending_ints;
316     uint32_t mit_delay;
317 
318     s->mac_reg[ICR] = val;
319 
320     /*
321      * Make sure ICR and ICS registers have the same value.
322      * The spec says that the ICS register is write-only.  However in practice,
323      * on real hardware ICS is readable, and for reads it has the same value as
324      * ICR (except that ICS does not have the clear on read behaviour of ICR).
325      *
326      * The VxWorks PRO/1000 driver uses this behaviour.
327      */
328     s->mac_reg[ICS] = val;
329 
330     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
331     if (!s->mit_irq_level && pending_ints) {
332         /*
333          * Here we detect a potential raising edge. We postpone raising the
334          * interrupt line if we are inside the mitigation delay window
335          * (s->mit_timer_on == 1).
336          * We provide a partial implementation of interrupt mitigation,
337          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
338          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
339          * RADV; relative timers based on TIDV and RDTR are not implemented.
340          */
341         if (s->mit_timer_on) {
342             return;
343         }
344         if (chkflag(MIT)) {
345             /* Compute the next mitigation delay according to pending
346              * interrupts and the current values of RADV (provided
347              * RDTR!=0), TADV and ITR.
348              * Then rearm the timer.
349              */
350             mit_delay = 0;
351             if (s->mit_ide &&
352                     (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
353                 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
354             }
355             if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
356                 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
357             }
358             mit_update_delay(&mit_delay, s->mac_reg[ITR]);
359 
360             if (mit_delay) {
361                 s->mit_timer_on = 1;
362                 timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
363                           mit_delay * 256);
364             }
365             s->mit_ide = 0;
366         }
367     }
368 
369     s->mit_irq_level = (pending_ints != 0);
370     pci_set_irq(d, s->mit_irq_level);
371 }
372 
373 static void
374 e1000_mit_timer(void *opaque)
375 {
376     E1000State *s = opaque;
377 
378     s->mit_timer_on = 0;
379     /* Call set_interrupt_cause to update the irq level (if necessary). */
380     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
381 }
382 
383 static void
384 set_ics(E1000State *s, int index, uint32_t val)
385 {
386     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
387         s->mac_reg[IMS]);
388     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
389 }
390 
391 static void
392 e1000_autoneg_timer(void *opaque)
393 {
394     E1000State *s = opaque;
395     if (!qemu_get_queue(s->nic)->link_down) {
396         e1000_link_up(s);
397         s->phy_reg[PHY_LP_ABILITY] |= MII_LPAR_LPACK;
398         s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
399         DBGOUT(PHY, "Auto negotiation is completed\n");
400         set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
401     }
402 }
403 
404 static int
405 rxbufsize(uint32_t v)
406 {
407     v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 |
408          E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 |
409          E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256;
410     switch (v) {
411     case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384:
412         return 16384;
413     case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192:
414         return 8192;
415     case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096:
416         return 4096;
417     case E1000_RCTL_SZ_1024:
418         return 1024;
419     case E1000_RCTL_SZ_512:
420         return 512;
421     case E1000_RCTL_SZ_256:
422         return 256;
423     }
424     return 2048;
425 }
426 
427 static void e1000_reset(void *opaque)
428 {
429     E1000State *d = opaque;
430     E1000BaseClass *edc = E1000_DEVICE_GET_CLASS(d);
431     uint8_t *macaddr = d->conf.macaddr.a;
432     int i;
433 
434     timer_del(d->autoneg_timer);
435     timer_del(d->mit_timer);
436     d->mit_timer_on = 0;
437     d->mit_irq_level = 0;
438     d->mit_ide = 0;
439     memset(d->phy_reg, 0, sizeof d->phy_reg);
440     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
441     d->phy_reg[PHY_ID2] = edc->phy_id2;
442     memset(d->mac_reg, 0, sizeof d->mac_reg);
443     memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
444     d->rxbuf_min_shift = 1;
445     memset(&d->tx, 0, sizeof d->tx);
446 
447     if (qemu_get_queue(d->nic)->link_down) {
448         e1000_link_down(d);
449     }
450 
451     /* Throttle interrupts to prevent guest (e.g Win 2012) from
452      * reinjecting interrupts endlessly. TODO: fix non ITR case.
453      */
454     d->mac_reg[ITR] = 250;
455 
456     /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */
457     d->mac_reg[RA] = 0;
458     d->mac_reg[RA + 1] = E1000_RAH_AV;
459     for (i = 0; i < 4; i++) {
460         d->mac_reg[RA] |= macaddr[i] << (8 * i);
461         d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0;
462     }
463     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
464 }
465 
466 static void
467 set_ctrl(E1000State *s, int index, uint32_t val)
468 {
469     /* RST is self clearing */
470     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
471 }
472 
473 static void
474 set_rx_control(E1000State *s, int index, uint32_t val)
475 {
476     s->mac_reg[RCTL] = val;
477     s->rxbuf_size = rxbufsize(val);
478     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
479     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
480            s->mac_reg[RCTL]);
481     qemu_flush_queued_packets(qemu_get_queue(s->nic));
482 }
483 
484 static void
485 set_mdic(E1000State *s, int index, uint32_t val)
486 {
487     uint32_t data = val & E1000_MDIC_DATA_MASK;
488     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
489 
490     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
491         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
492     else if (val & E1000_MDIC_OP_READ) {
493         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
494         if (!(phy_regcap[addr] & PHY_R)) {
495             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
496             val |= E1000_MDIC_ERROR;
497         } else
498             val = (val ^ data) | s->phy_reg[addr];
499     } else if (val & E1000_MDIC_OP_WRITE) {
500         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
501         if (!(phy_regcap[addr] & PHY_W)) {
502             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
503             val |= E1000_MDIC_ERROR;
504         } else {
505             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
506                 phyreg_writeops[addr](s, index, data);
507             } else {
508                 s->phy_reg[addr] = data;
509             }
510         }
511     }
512     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
513 
514     if (val & E1000_MDIC_INT_EN) {
515         set_ics(s, 0, E1000_ICR_MDAC);
516     }
517 }
518 
519 static uint32_t
520 get_eecd(E1000State *s, int index)
521 {
522     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
523 
524     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
525            s->eecd_state.bitnum_out, s->eecd_state.reading);
526     if (!s->eecd_state.reading ||
527         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
528           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
529         ret |= E1000_EECD_DO;
530     return ret;
531 }
532 
533 static void
534 set_eecd(E1000State *s, int index, uint32_t val)
535 {
536     uint32_t oldval = s->eecd_state.old_eecd;
537 
538     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
539             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
540     if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
541         return;
542     }
543     if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
544         s->eecd_state.val_in = 0;
545         s->eecd_state.bitnum_in = 0;
546         s->eecd_state.bitnum_out = 0;
547         s->eecd_state.reading = 0;
548     }
549     if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
550         return;
551     }
552     if (!(E1000_EECD_SK & val)) {               /* falling edge */
553         s->eecd_state.bitnum_out++;
554         return;
555     }
556     s->eecd_state.val_in <<= 1;
557     if (val & E1000_EECD_DI)
558         s->eecd_state.val_in |= 1;
559     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
560         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
561         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
562             EEPROM_READ_OPCODE_MICROWIRE);
563     }
564     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
565            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
566            s->eecd_state.reading);
567 }
568 
569 static uint32_t
570 flash_eerd_read(E1000State *s, int x)
571 {
572     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
573 
574     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
575         return (s->mac_reg[EERD]);
576 
577     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
578         return (E1000_EEPROM_RW_REG_DONE | r);
579 
580     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
581            E1000_EEPROM_RW_REG_DONE | r);
582 }
583 
584 static void
585 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
586 {
587     uint32_t sum;
588 
589     if (cse && cse < n)
590         n = cse + 1;
591     if (sloc < n-1) {
592         sum = net_checksum_add(n-css, data+css);
593         stw_be_p(data + sloc, net_checksum_finish(sum));
594     }
595 }
596 
597 static inline void
598 inc_reg_if_not_full(E1000State *s, int index)
599 {
600     if (s->mac_reg[index] != 0xffffffff) {
601         s->mac_reg[index]++;
602     }
603 }
604 
605 static inline void
606 inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
607 {
608     if (!memcmp(arr, bcast, sizeof bcast)) {
609         inc_reg_if_not_full(s, BPTC);
610     } else if (arr[0] & 1) {
611         inc_reg_if_not_full(s, MPTC);
612     }
613 }
614 
615 static void
616 grow_8reg_if_not_full(E1000State *s, int index, int size)
617 {
618     uint64_t sum = s->mac_reg[index] | (uint64_t)s->mac_reg[index+1] << 32;
619 
620     if (sum + size < sum) {
621         sum = ~0ULL;
622     } else {
623         sum += size;
624     }
625     s->mac_reg[index] = sum;
626     s->mac_reg[index+1] = sum >> 32;
627 }
628 
629 static void
630 increase_size_stats(E1000State *s, const int *size_regs, int size)
631 {
632     if (size > 1023) {
633         inc_reg_if_not_full(s, size_regs[5]);
634     } else if (size > 511) {
635         inc_reg_if_not_full(s, size_regs[4]);
636     } else if (size > 255) {
637         inc_reg_if_not_full(s, size_regs[3]);
638     } else if (size > 127) {
639         inc_reg_if_not_full(s, size_regs[2]);
640     } else if (size > 64) {
641         inc_reg_if_not_full(s, size_regs[1]);
642     } else if (size == 64) {
643         inc_reg_if_not_full(s, size_regs[0]);
644     }
645 }
646 
647 static inline int
648 vlan_enabled(E1000State *s)
649 {
650     return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0);
651 }
652 
653 static inline int
654 vlan_rx_filter_enabled(E1000State *s)
655 {
656     return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0);
657 }
658 
659 static inline int
660 is_vlan_packet(E1000State *s, const uint8_t *buf)
661 {
662     return (be16_to_cpup((uint16_t *)(buf + 12)) ==
663                 le16_to_cpu(s->mac_reg[VET]));
664 }
665 
666 static inline int
667 is_vlan_txd(uint32_t txd_lower)
668 {
669     return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
670 }
671 
672 /* FCS aka Ethernet CRC-32. We don't get it from backends and can't
673  * fill it in, just pad descriptor length by 4 bytes unless guest
674  * told us to strip it off the packet. */
675 static inline int
676 fcs_len(E1000State *s)
677 {
678     return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
679 }
680 
681 static void
682 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
683 {
684     static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
685                                     PTC1023, PTC1522 };
686 
687     NetClientState *nc = qemu_get_queue(s->nic);
688     if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
689         nc->info->receive(nc, buf, size);
690     } else {
691         qemu_send_packet(nc, buf, size);
692     }
693     inc_tx_bcast_or_mcast_count(s, buf);
694     increase_size_stats(s, PTCregs, size);
695 }
696 
697 static void
698 xmit_seg(E1000State *s)
699 {
700     uint16_t len, *sp;
701     unsigned int frames = s->tx.tso_frames, css, sofar;
702     struct e1000_tx *tp = &s->tx;
703 
704     if (tp->tse && tp->cptse) {
705         css = tp->ipcss;
706         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
707                frames, tp->size, css);
708         if (tp->ip) {    /* IPv4 */
709             stw_be_p(tp->data+css+2, tp->size - css);
710             stw_be_p(tp->data+css+4,
711                      be16_to_cpup((uint16_t *)(tp->data+css+4))+frames);
712         } else {         /* IPv6 */
713             stw_be_p(tp->data+css+4, tp->size - css);
714         }
715         css = tp->tucss;
716         len = tp->size - css;
717         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len);
718         if (tp->tcp) {
719             sofar = frames * tp->mss;
720             stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
721             if (tp->paylen - sofar > tp->mss) {
722                 tp->data[css + 13] &= ~9;    /* PSH, FIN */
723             } else if (frames) {
724                 inc_reg_if_not_full(s, TSCTC);
725             }
726         } else    /* UDP */
727             stw_be_p(tp->data+css+4, len);
728         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
729             unsigned int phsum;
730             // add pseudo-header length before checksum calculation
731             sp = (uint16_t *)(tp->data + tp->tucso);
732             phsum = be16_to_cpup(sp) + len;
733             phsum = (phsum >> 16) + (phsum & 0xffff);
734             stw_be_p(sp, phsum);
735         }
736         tp->tso_frames++;
737     }
738 
739     if (tp->sum_needed & E1000_TXD_POPTS_TXSM)
740         putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse);
741     if (tp->sum_needed & E1000_TXD_POPTS_IXSM)
742         putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse);
743     if (tp->vlan_needed) {
744         memmove(tp->vlan, tp->data, 4);
745         memmove(tp->data, tp->data + 4, 8);
746         memcpy(tp->data + 8, tp->vlan_header, 4);
747         e1000_send_packet(s, tp->vlan, tp->size + 4);
748     } else {
749         e1000_send_packet(s, tp->data, tp->size);
750     }
751 
752     inc_reg_if_not_full(s, TPT);
753     grow_8reg_if_not_full(s, TOTL, s->tx.size);
754     s->mac_reg[GPTC] = s->mac_reg[TPT];
755     s->mac_reg[GOTCL] = s->mac_reg[TOTL];
756     s->mac_reg[GOTCH] = s->mac_reg[TOTH];
757 }
758 
759 static void
760 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
761 {
762     PCIDevice *d = PCI_DEVICE(s);
763     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
764     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
765     unsigned int split_size = txd_lower & 0xffff, bytes, sz, op;
766     unsigned int msh = 0xfffff;
767     uint64_t addr;
768     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
769     struct e1000_tx *tp = &s->tx;
770 
771     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
772     if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
773         op = le32_to_cpu(xp->cmd_and_length);
774         tp->ipcss = xp->lower_setup.ip_fields.ipcss;
775         tp->ipcso = xp->lower_setup.ip_fields.ipcso;
776         tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse);
777         tp->tucss = xp->upper_setup.tcp_fields.tucss;
778         tp->tucso = xp->upper_setup.tcp_fields.tucso;
779         tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse);
780         tp->paylen = op & 0xfffff;
781         tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len;
782         tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss);
783         tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0;
784         tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
785         tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
786         tp->tso_frames = 0;
787         if (tp->tucso == 0) {    /* this is probably wrong */
788             DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
789             tp->tucso = tp->tucss + (tp->tcp ? 16 : 6);
790         }
791         return;
792     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
793         // data descriptor
794         if (tp->size == 0) {
795             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
796         }
797         tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0;
798     } else {
799         // legacy descriptor
800         tp->cptse = 0;
801     }
802 
803     if (vlan_enabled(s) && is_vlan_txd(txd_lower) &&
804         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
805         tp->vlan_needed = 1;
806         stw_be_p(tp->vlan_header,
807                       le16_to_cpu(s->mac_reg[VET]));
808         stw_be_p(tp->vlan_header + 2,
809                       le16_to_cpu(dp->upper.fields.special));
810     }
811 
812     addr = le64_to_cpu(dp->buffer_addr);
813     if (tp->tse && tp->cptse) {
814         msh = tp->hdr_len + tp->mss;
815         do {
816             bytes = split_size;
817             if (tp->size + bytes > msh)
818                 bytes = msh - tp->size;
819 
820             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
821             pci_dma_read(d, addr, tp->data + tp->size, bytes);
822             sz = tp->size + bytes;
823             if (sz >= tp->hdr_len && tp->size < tp->hdr_len) {
824                 memmove(tp->header, tp->data, tp->hdr_len);
825             }
826             tp->size = sz;
827             addr += bytes;
828             if (sz == msh) {
829                 xmit_seg(s);
830                 memmove(tp->data, tp->header, tp->hdr_len);
831                 tp->size = tp->hdr_len;
832             }
833             split_size -= bytes;
834         } while (bytes && split_size);
835     } else if (!tp->tse && tp->cptse) {
836         // context descriptor TSE is not set, while data descriptor TSE is set
837         DBGOUT(TXERR, "TCP segmentation error\n");
838     } else {
839         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
840         pci_dma_read(d, addr, tp->data + tp->size, split_size);
841         tp->size += split_size;
842     }
843 
844     if (!(txd_lower & E1000_TXD_CMD_EOP))
845         return;
846     if (!(tp->tse && tp->cptse && tp->size < tp->hdr_len)) {
847         xmit_seg(s);
848     }
849     tp->tso_frames = 0;
850     tp->sum_needed = 0;
851     tp->vlan_needed = 0;
852     tp->size = 0;
853     tp->cptse = 0;
854 }
855 
856 static uint32_t
857 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
858 {
859     PCIDevice *d = PCI_DEVICE(s);
860     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
861 
862     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
863         return 0;
864     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
865                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
866     dp->upper.data = cpu_to_le32(txd_upper);
867     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
868                   &dp->upper, sizeof(dp->upper));
869     return E1000_ICR_TXDW;
870 }
871 
872 static uint64_t tx_desc_base(E1000State *s)
873 {
874     uint64_t bah = s->mac_reg[TDBAH];
875     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
876 
877     return (bah << 32) + bal;
878 }
879 
880 static void
881 start_xmit(E1000State *s)
882 {
883     PCIDevice *d = PCI_DEVICE(s);
884     dma_addr_t base;
885     struct e1000_tx_desc desc;
886     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
887 
888     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
889         DBGOUT(TX, "tx disabled\n");
890         return;
891     }
892 
893     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
894         base = tx_desc_base(s) +
895                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
896         pci_dma_read(d, base, &desc, sizeof(desc));
897 
898         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
899                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
900                desc.upper.data);
901 
902         process_tx_desc(s, &desc);
903         cause |= txdesc_writeback(s, base, &desc);
904 
905         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
906             s->mac_reg[TDH] = 0;
907         /*
908          * the following could happen only if guest sw assigns
909          * bogus values to TDT/TDLEN.
910          * there's nothing too intelligent we could do about this.
911          */
912         if (s->mac_reg[TDH] == tdh_start ||
913             tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
914             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
915                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
916             break;
917         }
918     }
919     set_ics(s, 0, cause);
920 }
921 
922 static int
923 receive_filter(E1000State *s, const uint8_t *buf, int size)
924 {
925     static const int mta_shift[] = {4, 3, 2, 0};
926     uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp;
927     int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1);
928 
929     if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) {
930         uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14));
931         uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) +
932                                      ((vid >> 5) & 0x7f));
933         if ((vfta & (1 << (vid & 0x1f))) == 0)
934             return 0;
935     }
936 
937     if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
938         return 1;
939     }
940 
941     if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
942         inc_reg_if_not_full(s, MPRC);
943         return 1;
944     }
945 
946     if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
947         inc_reg_if_not_full(s, BPRC);
948         return 1;
949     }
950 
951     for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) {
952         if (!(rp[1] & E1000_RAH_AV))
953             continue;
954         ra[0] = cpu_to_le32(rp[0]);
955         ra[1] = cpu_to_le32(rp[1]);
956         if (!memcmp(buf, (uint8_t *)ra, 6)) {
957             DBGOUT(RXFILTER,
958                    "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n",
959                    (int)(rp - s->mac_reg - RA)/2,
960                    buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
961             return 1;
962         }
963     }
964     DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n",
965            buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
966 
967     f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
968     f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
969     if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f))) {
970         inc_reg_if_not_full(s, MPRC);
971         return 1;
972     }
973     DBGOUT(RXFILTER,
974            "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n",
975            buf[0], buf[1], buf[2], buf[3], buf[4], buf[5],
976            (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5,
977            s->mac_reg[MTA + (f >> 5)]);
978 
979     return 0;
980 }
981 
982 static void
983 e1000_set_link_status(NetClientState *nc)
984 {
985     E1000State *s = qemu_get_nic_opaque(nc);
986     uint32_t old_status = s->mac_reg[STATUS];
987 
988     if (nc->link_down) {
989         e1000_link_down(s);
990     } else {
991         if (have_autoneg(s) &&
992             !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
993             /* emulate auto-negotiation if supported */
994             timer_mod(s->autoneg_timer,
995                       qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
996         } else {
997             e1000_link_up(s);
998         }
999     }
1000 
1001     if (s->mac_reg[STATUS] != old_status)
1002         set_ics(s, 0, E1000_ICR_LSC);
1003 }
1004 
1005 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
1006 {
1007     int bufs;
1008     /* Fast-path short packets */
1009     if (total_size <= s->rxbuf_size) {
1010         return s->mac_reg[RDH] != s->mac_reg[RDT];
1011     }
1012     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
1013         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
1014     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
1015         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
1016             s->mac_reg[RDT] - s->mac_reg[RDH];
1017     } else {
1018         return false;
1019     }
1020     return total_size <= bufs * s->rxbuf_size;
1021 }
1022 
1023 static int
1024 e1000_can_receive(NetClientState *nc)
1025 {
1026     E1000State *s = qemu_get_nic_opaque(nc);
1027 
1028     return (s->mac_reg[STATUS] & E1000_STATUS_LU) &&
1029         (s->mac_reg[RCTL] & E1000_RCTL_EN) &&
1030         (s->parent_obj.config[PCI_COMMAND] & PCI_COMMAND_MASTER) &&
1031         e1000_has_rxbufs(s, 1);
1032 }
1033 
1034 static uint64_t rx_desc_base(E1000State *s)
1035 {
1036     uint64_t bah = s->mac_reg[RDBAH];
1037     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
1038 
1039     return (bah << 32) + bal;
1040 }
1041 
1042 static ssize_t
1043 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
1044 {
1045     E1000State *s = qemu_get_nic_opaque(nc);
1046     PCIDevice *d = PCI_DEVICE(s);
1047     struct e1000_rx_desc desc;
1048     dma_addr_t base;
1049     unsigned int n, rdt;
1050     uint32_t rdh_start;
1051     uint16_t vlan_special = 0;
1052     uint8_t vlan_status = 0;
1053     uint8_t min_buf[MIN_BUF_SIZE];
1054     struct iovec min_iov;
1055     uint8_t *filter_buf = iov->iov_base;
1056     size_t size = iov_size(iov, iovcnt);
1057     size_t iov_ofs = 0;
1058     size_t desc_offset;
1059     size_t desc_size;
1060     size_t total_size;
1061     static const int PRCregs[6] = { PRC64, PRC127, PRC255, PRC511,
1062                                     PRC1023, PRC1522 };
1063 
1064     if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) {
1065         return -1;
1066     }
1067 
1068     if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) {
1069         return -1;
1070     }
1071 
1072     /* Pad to minimum Ethernet frame length */
1073     if (size < sizeof(min_buf)) {
1074         iov_to_buf(iov, iovcnt, 0, min_buf, size);
1075         memset(&min_buf[size], 0, sizeof(min_buf) - size);
1076         inc_reg_if_not_full(s, RUC);
1077         min_iov.iov_base = filter_buf = min_buf;
1078         min_iov.iov_len = size = sizeof(min_buf);
1079         iovcnt = 1;
1080         iov = &min_iov;
1081     } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
1082         /* This is very unlikely, but may happen. */
1083         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
1084         filter_buf = min_buf;
1085     }
1086 
1087     /* Discard oversized packets if !LPE and !SBP. */
1088     if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
1089         (size > MAXIMUM_ETHERNET_VLAN_SIZE
1090         && !(s->mac_reg[RCTL] & E1000_RCTL_LPE)))
1091         && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) {
1092         inc_reg_if_not_full(s, ROC);
1093         return size;
1094     }
1095 
1096     if (!receive_filter(s, filter_buf, size)) {
1097         return size;
1098     }
1099 
1100     if (vlan_enabled(s) && is_vlan_packet(s, filter_buf)) {
1101         vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(filter_buf
1102                                                                 + 14)));
1103         iov_ofs = 4;
1104         if (filter_buf == iov->iov_base) {
1105             memmove(filter_buf + 4, filter_buf, 12);
1106         } else {
1107             iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
1108             while (iov->iov_len <= iov_ofs) {
1109                 iov_ofs -= iov->iov_len;
1110                 iov++;
1111             }
1112         }
1113         vlan_status = E1000_RXD_STAT_VP;
1114         size -= 4;
1115     }
1116 
1117     rdh_start = s->mac_reg[RDH];
1118     desc_offset = 0;
1119     total_size = size + fcs_len(s);
1120     if (!e1000_has_rxbufs(s, total_size)) {
1121             set_ics(s, 0, E1000_ICS_RXO);
1122             return -1;
1123     }
1124     do {
1125         desc_size = total_size - desc_offset;
1126         if (desc_size > s->rxbuf_size) {
1127             desc_size = s->rxbuf_size;
1128         }
1129         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
1130         pci_dma_read(d, base, &desc, sizeof(desc));
1131         desc.special = vlan_special;
1132         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
1133         if (desc.buffer_addr) {
1134             if (desc_offset < size) {
1135                 size_t iov_copy;
1136                 hwaddr ba = le64_to_cpu(desc.buffer_addr);
1137                 size_t copy_size = size - desc_offset;
1138                 if (copy_size > s->rxbuf_size) {
1139                     copy_size = s->rxbuf_size;
1140                 }
1141                 do {
1142                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
1143                     pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
1144                     copy_size -= iov_copy;
1145                     ba += iov_copy;
1146                     iov_ofs += iov_copy;
1147                     if (iov_ofs == iov->iov_len) {
1148                         iov++;
1149                         iov_ofs = 0;
1150                     }
1151                 } while (copy_size);
1152             }
1153             desc_offset += desc_size;
1154             desc.length = cpu_to_le16(desc_size);
1155             if (desc_offset >= total_size) {
1156                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1157             } else {
1158                 /* Guest zeroing out status is not a hardware requirement.
1159                    Clear EOP in case guest didn't do it. */
1160                 desc.status &= ~E1000_RXD_STAT_EOP;
1161             }
1162         } else { // as per intel docs; skip descriptors with null buf addr
1163             DBGOUT(RX, "Null RX descriptor!!\n");
1164         }
1165         pci_dma_write(d, base, &desc, sizeof(desc));
1166 
1167         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1168             s->mac_reg[RDH] = 0;
1169         /* see comment in start_xmit; same here */
1170         if (s->mac_reg[RDH] == rdh_start ||
1171             rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
1172             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1173                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1174             set_ics(s, 0, E1000_ICS_RXO);
1175             return -1;
1176         }
1177     } while (desc_offset < total_size);
1178 
1179     increase_size_stats(s, PRCregs, total_size);
1180     inc_reg_if_not_full(s, TPR);
1181     s->mac_reg[GPRC] = s->mac_reg[TPR];
1182     /* TOR - Total Octets Received:
1183      * This register includes bytes received in a packet from the <Destination
1184      * Address> field through the <CRC> field, inclusively.
1185      * Always include FCS length (4) in size.
1186      */
1187     grow_8reg_if_not_full(s, TORL, size+4);
1188     s->mac_reg[GORCL] = s->mac_reg[TORL];
1189     s->mac_reg[GORCH] = s->mac_reg[TORH];
1190 
1191     n = E1000_ICS_RXT0;
1192     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1193         rdt += s->mac_reg[RDLEN] / sizeof(desc);
1194     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1195         s->rxbuf_min_shift)
1196         n |= E1000_ICS_RXDMT0;
1197 
1198     set_ics(s, 0, n);
1199 
1200     return size;
1201 }
1202 
1203 static ssize_t
1204 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1205 {
1206     const struct iovec iov = {
1207         .iov_base = (uint8_t *)buf,
1208         .iov_len = size
1209     };
1210 
1211     return e1000_receive_iov(nc, &iov, 1);
1212 }
1213 
1214 static uint32_t
1215 mac_readreg(E1000State *s, int index)
1216 {
1217     return s->mac_reg[index];
1218 }
1219 
1220 static uint32_t
1221 mac_low4_read(E1000State *s, int index)
1222 {
1223     return s->mac_reg[index] & 0xf;
1224 }
1225 
1226 static uint32_t
1227 mac_low11_read(E1000State *s, int index)
1228 {
1229     return s->mac_reg[index] & 0x7ff;
1230 }
1231 
1232 static uint32_t
1233 mac_low13_read(E1000State *s, int index)
1234 {
1235     return s->mac_reg[index] & 0x1fff;
1236 }
1237 
1238 static uint32_t
1239 mac_low16_read(E1000State *s, int index)
1240 {
1241     return s->mac_reg[index] & 0xffff;
1242 }
1243 
1244 static uint32_t
1245 mac_icr_read(E1000State *s, int index)
1246 {
1247     uint32_t ret = s->mac_reg[ICR];
1248 
1249     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1250     set_interrupt_cause(s, 0, 0);
1251     return ret;
1252 }
1253 
1254 static uint32_t
1255 mac_read_clr4(E1000State *s, int index)
1256 {
1257     uint32_t ret = s->mac_reg[index];
1258 
1259     s->mac_reg[index] = 0;
1260     return ret;
1261 }
1262 
1263 static uint32_t
1264 mac_read_clr8(E1000State *s, int index)
1265 {
1266     uint32_t ret = s->mac_reg[index];
1267 
1268     s->mac_reg[index] = 0;
1269     s->mac_reg[index-1] = 0;
1270     return ret;
1271 }
1272 
1273 static void
1274 mac_writereg(E1000State *s, int index, uint32_t val)
1275 {
1276     uint32_t macaddr[2];
1277 
1278     s->mac_reg[index] = val;
1279 
1280     if (index == RA + 1) {
1281         macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1282         macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1283         qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1284     }
1285 }
1286 
1287 static void
1288 set_rdt(E1000State *s, int index, uint32_t val)
1289 {
1290     s->mac_reg[index] = val & 0xffff;
1291     if (e1000_has_rxbufs(s, 1)) {
1292         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1293     }
1294 }
1295 
1296 static void
1297 set_16bit(E1000State *s, int index, uint32_t val)
1298 {
1299     s->mac_reg[index] = val & 0xffff;
1300 }
1301 
1302 static void
1303 set_dlen(E1000State *s, int index, uint32_t val)
1304 {
1305     s->mac_reg[index] = val & 0xfff80;
1306 }
1307 
1308 static void
1309 set_tctl(E1000State *s, int index, uint32_t val)
1310 {
1311     s->mac_reg[index] = val;
1312     s->mac_reg[TDT] &= 0xffff;
1313     start_xmit(s);
1314 }
1315 
1316 static void
1317 set_icr(E1000State *s, int index, uint32_t val)
1318 {
1319     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1320     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1321 }
1322 
1323 static void
1324 set_imc(E1000State *s, int index, uint32_t val)
1325 {
1326     s->mac_reg[IMS] &= ~val;
1327     set_ics(s, 0, 0);
1328 }
1329 
1330 static void
1331 set_ims(E1000State *s, int index, uint32_t val)
1332 {
1333     s->mac_reg[IMS] |= val;
1334     set_ics(s, 0, 0);
1335 }
1336 
1337 #define getreg(x)    [x] = mac_readreg
1338 static uint32_t (*macreg_readops[])(E1000State *, int) = {
1339     getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1340     getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1341     getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1342     getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1343     getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1344     getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1345     getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1346     getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1347     getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1348     getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1349     getreg(TNCRS),    getreg(SEC),      getreg(CEXTERR),  getreg(RLEC),
1350     getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1351     getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1352     getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1353     getreg(GOTCL),
1354 
1355     [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1356     [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1357     [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1358     [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1359     [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1360     [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1361     [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1362     [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1363     [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1364     [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1365     [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1366     [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1367     [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1368     [MPTC]    = mac_read_clr4,
1369     [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1370     [EERD]    = flash_eerd_read,
1371     [RDFH]    = mac_low13_read,     [RDFT]    = mac_low13_read,
1372     [RDFHS]   = mac_low13_read,     [RDFTS]   = mac_low13_read,
1373     [RDFPC]   = mac_low13_read,
1374     [TDFH]    = mac_low11_read,     [TDFT]    = mac_low11_read,
1375     [TDFHS]   = mac_low13_read,     [TDFTS]   = mac_low13_read,
1376     [TDFPC]   = mac_low13_read,
1377     [AIT]     = mac_low16_read,
1378 
1379     [CRCERRS ... MPC]   = &mac_readreg,
1380     [IP6AT ... IP6AT+3] = &mac_readreg,    [IP4AT ... IP4AT+6] = &mac_readreg,
1381     [FFLT ... FFLT+6]   = &mac_low11_read,
1382     [RA ... RA+31]      = &mac_readreg,
1383     [WUPM ... WUPM+31]  = &mac_readreg,
1384     [MTA ... MTA+127]   = &mac_readreg,
1385     [VFTA ... VFTA+127] = &mac_readreg,
1386     [FFMT ... FFMT+254] = &mac_low4_read,
1387     [FFVT ... FFVT+254] = &mac_readreg,
1388     [PBM ... PBM+16383] = &mac_readreg,
1389 };
1390 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1391 
1392 #define putreg(x)    [x] = mac_writereg
1393 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1394     putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1395     putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1396     putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1397     putreg(TDFH),     putreg(TDFT),     putreg(TDFHS),    putreg(TDFTS),
1398     putreg(TDFPC),    putreg(RDFH),     putreg(RDFT),     putreg(RDFHS),
1399     putreg(RDFTS),    putreg(RDFPC),    putreg(IPAV),     putreg(WUC),
1400     putreg(WUS),      putreg(AIT),
1401 
1402     [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL] = set_tctl,
1403     [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]  = set_ics,
1404     [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]  = set_rdt,
1405     [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]  = set_icr,
1406     [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL] = set_ctrl,
1407     [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV] = set_16bit,
1408     [ITR]    = set_16bit,
1409 
1410     [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg,
1411     [FFLT ... FFLT+6]   = &mac_writereg,
1412     [RA ... RA+31]      = &mac_writereg,
1413     [WUPM ... WUPM+31]  = &mac_writereg,
1414     [MTA ... MTA+127]   = &mac_writereg,
1415     [VFTA ... VFTA+127] = &mac_writereg,
1416     [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg,
1417     [PBM ... PBM+16383] = &mac_writereg,
1418 };
1419 
1420 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1421 
1422 enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1423 
1424 #define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1425 /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1426  * f - flag bits (up to 6 possible flags)
1427  * n - flag needed
1428  * p - partially implenented */
1429 static const uint8_t mac_reg_access[0x8000] = {
1430     [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1431     [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1432 
1433     [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1434     [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1435     [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1436     [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1437     [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1438     [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1439     [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1440     [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1441     [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1442     [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1443     [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1444     [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1445     [SEC]     = markflag(MAC),    [CEXTERR] = markflag(MAC),
1446     [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1447     [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1448     [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1449     [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1450     [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1451     [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1452     [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1453     [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1454     [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1455     [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1456     [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1457     [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1458     [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1459     [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1460     [BPTC]    = markflag(MAC),
1461 
1462     [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1463     [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1464     [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1465     [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1466     [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1467     [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1468     [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1469     [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1470     [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1471     [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1472     [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1473 };
1474 
1475 static void
1476 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1477                  unsigned size)
1478 {
1479     E1000State *s = opaque;
1480     unsigned int index = (addr & 0x1ffff) >> 2;
1481 
1482     if (index < NWRITEOPS && macreg_writeops[index]) {
1483         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1484             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1485             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1486                 DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1487                        "It is not fully implemented.\n", index<<2);
1488             }
1489             macreg_writeops[index](s, index, val);
1490         } else {    /* "flag needed" bit is set, but the flag is not active */
1491             DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1492                    index<<2);
1493         }
1494     } else if (index < NREADOPS && macreg_readops[index]) {
1495         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1496                index<<2, val);
1497     } else {
1498         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1499                index<<2, val);
1500     }
1501 }
1502 
1503 static uint64_t
1504 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1505 {
1506     E1000State *s = opaque;
1507     unsigned int index = (addr & 0x1ffff) >> 2;
1508 
1509     if (index < NREADOPS && macreg_readops[index]) {
1510         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1511             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1512             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1513                 DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1514                        "It is not fully implemented.\n", index<<2);
1515             }
1516             return macreg_readops[index](s, index);
1517         } else {    /* "flag needed" bit is set, but the flag is not active */
1518             DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1519                    index<<2);
1520         }
1521     } else {
1522         DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1523     }
1524     return 0;
1525 }
1526 
1527 static const MemoryRegionOps e1000_mmio_ops = {
1528     .read = e1000_mmio_read,
1529     .write = e1000_mmio_write,
1530     .endianness = DEVICE_LITTLE_ENDIAN,
1531     .impl = {
1532         .min_access_size = 4,
1533         .max_access_size = 4,
1534     },
1535 };
1536 
1537 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1538                               unsigned size)
1539 {
1540     E1000State *s = opaque;
1541 
1542     (void)s;
1543     return 0;
1544 }
1545 
1546 static void e1000_io_write(void *opaque, hwaddr addr,
1547                            uint64_t val, unsigned size)
1548 {
1549     E1000State *s = opaque;
1550 
1551     (void)s;
1552 }
1553 
1554 static const MemoryRegionOps e1000_io_ops = {
1555     .read = e1000_io_read,
1556     .write = e1000_io_write,
1557     .endianness = DEVICE_LITTLE_ENDIAN,
1558 };
1559 
1560 static bool is_version_1(void *opaque, int version_id)
1561 {
1562     return version_id == 1;
1563 }
1564 
1565 static void e1000_pre_save(void *opaque)
1566 {
1567     E1000State *s = opaque;
1568     NetClientState *nc = qemu_get_queue(s->nic);
1569 
1570     /* If the mitigation timer is active, emulate a timeout now. */
1571     if (s->mit_timer_on) {
1572         e1000_mit_timer(s);
1573     }
1574 
1575     /*
1576      * If link is down and auto-negotiation is supported and ongoing,
1577      * complete auto-negotiation immediately. This allows us to look
1578      * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1579      */
1580     if (nc->link_down && have_autoneg(s)) {
1581         s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1582     }
1583 }
1584 
1585 static int e1000_post_load(void *opaque, int version_id)
1586 {
1587     E1000State *s = opaque;
1588     NetClientState *nc = qemu_get_queue(s->nic);
1589 
1590     if (!chkflag(MIT)) {
1591         s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1592             s->mac_reg[TADV] = 0;
1593         s->mit_irq_level = false;
1594     }
1595     s->mit_ide = 0;
1596     s->mit_timer_on = false;
1597 
1598     /* nc.link_down can't be migrated, so infer link_down according
1599      * to link status bit in mac_reg[STATUS].
1600      * Alternatively, restart link negotiation if it was in progress. */
1601     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1602 
1603     if (have_autoneg(s) &&
1604         !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1605         nc->link_down = false;
1606         timer_mod(s->autoneg_timer,
1607                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1608     }
1609 
1610     return 0;
1611 }
1612 
1613 static bool e1000_mit_state_needed(void *opaque)
1614 {
1615     E1000State *s = opaque;
1616 
1617     return chkflag(MIT);
1618 }
1619 
1620 static bool e1000_full_mac_needed(void *opaque)
1621 {
1622     E1000State *s = opaque;
1623 
1624     return chkflag(MAC);
1625 }
1626 
1627 static const VMStateDescription vmstate_e1000_mit_state = {
1628     .name = "e1000/mit_state",
1629     .version_id = 1,
1630     .minimum_version_id = 1,
1631     .needed = e1000_mit_state_needed,
1632     .fields = (VMStateField[]) {
1633         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1634         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1635         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1636         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1637         VMSTATE_BOOL(mit_irq_level, E1000State),
1638         VMSTATE_END_OF_LIST()
1639     }
1640 };
1641 
1642 static const VMStateDescription vmstate_e1000_full_mac_state = {
1643     .name = "e1000/full_mac_state",
1644     .version_id = 1,
1645     .minimum_version_id = 1,
1646     .needed = e1000_full_mac_needed,
1647     .fields = (VMStateField[]) {
1648         VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1649         VMSTATE_END_OF_LIST()
1650     }
1651 };
1652 
1653 static const VMStateDescription vmstate_e1000 = {
1654     .name = "e1000",
1655     .version_id = 2,
1656     .minimum_version_id = 1,
1657     .pre_save = e1000_pre_save,
1658     .post_load = e1000_post_load,
1659     .fields = (VMStateField[]) {
1660         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1661         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1662         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1663         VMSTATE_UINT32(rxbuf_size, E1000State),
1664         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1665         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1666         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1667         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1668         VMSTATE_UINT16(eecd_state.reading, E1000State),
1669         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1670         VMSTATE_UINT8(tx.ipcss, E1000State),
1671         VMSTATE_UINT8(tx.ipcso, E1000State),
1672         VMSTATE_UINT16(tx.ipcse, E1000State),
1673         VMSTATE_UINT8(tx.tucss, E1000State),
1674         VMSTATE_UINT8(tx.tucso, E1000State),
1675         VMSTATE_UINT16(tx.tucse, E1000State),
1676         VMSTATE_UINT32(tx.paylen, E1000State),
1677         VMSTATE_UINT8(tx.hdr_len, E1000State),
1678         VMSTATE_UINT16(tx.mss, E1000State),
1679         VMSTATE_UINT16(tx.size, E1000State),
1680         VMSTATE_UINT16(tx.tso_frames, E1000State),
1681         VMSTATE_UINT8(tx.sum_needed, E1000State),
1682         VMSTATE_INT8(tx.ip, E1000State),
1683         VMSTATE_INT8(tx.tcp, E1000State),
1684         VMSTATE_BUFFER(tx.header, E1000State),
1685         VMSTATE_BUFFER(tx.data, E1000State),
1686         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1687         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1688         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1689         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1690         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1691         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1692         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1693         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1694         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1695         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1696         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1697         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1698         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1699         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1700         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1701         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1702         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1703         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1704         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1705         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1706         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1707         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1708         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1709         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1710         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1711         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1712         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1713         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1714         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1715         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1716         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1717         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1718         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1719         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1720         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1721         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1722         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1723         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1724         VMSTATE_UINT32(mac_reg[VET], E1000State),
1725         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1726         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1727         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1728         VMSTATE_END_OF_LIST()
1729     },
1730     .subsections = (const VMStateDescription*[]) {
1731         &vmstate_e1000_mit_state,
1732         &vmstate_e1000_full_mac_state,
1733         NULL
1734     }
1735 };
1736 
1737 /*
1738  * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1739  * Note: A valid DevId will be inserted during pci_e1000_init().
1740  */
1741 static const uint16_t e1000_eeprom_template[64] = {
1742     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1743     0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1744     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1745     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1746     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1747     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1748     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1749     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1750 };
1751 
1752 /* PCI interface */
1753 
1754 static void
1755 e1000_mmio_setup(E1000State *d)
1756 {
1757     int i;
1758     const uint32_t excluded_regs[] = {
1759         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1760         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1761     };
1762 
1763     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1764                           "e1000-mmio", PNPMMIO_SIZE);
1765     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1766     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1767         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1768                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1769     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1770 }
1771 
1772 static void
1773 pci_e1000_uninit(PCIDevice *dev)
1774 {
1775     E1000State *d = E1000(dev);
1776 
1777     timer_del(d->autoneg_timer);
1778     timer_free(d->autoneg_timer);
1779     timer_del(d->mit_timer);
1780     timer_free(d->mit_timer);
1781     qemu_del_nic(d->nic);
1782 }
1783 
1784 static NetClientInfo net_e1000_info = {
1785     .type = NET_CLIENT_OPTIONS_KIND_NIC,
1786     .size = sizeof(NICState),
1787     .can_receive = e1000_can_receive,
1788     .receive = e1000_receive,
1789     .receive_iov = e1000_receive_iov,
1790     .link_status_changed = e1000_set_link_status,
1791 };
1792 
1793 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1794                                 uint32_t val, int len)
1795 {
1796     E1000State *s = E1000(pci_dev);
1797 
1798     pci_default_write_config(pci_dev, address, val, len);
1799 
1800     if (range_covers_byte(address, len, PCI_COMMAND) &&
1801         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1802         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1803     }
1804 }
1805 
1806 
1807 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1808 {
1809     DeviceState *dev = DEVICE(pci_dev);
1810     E1000State *d = E1000(pci_dev);
1811     PCIDeviceClass *pdc = PCI_DEVICE_GET_CLASS(pci_dev);
1812     uint8_t *pci_conf;
1813     uint16_t checksum = 0;
1814     int i;
1815     uint8_t *macaddr;
1816 
1817     pci_dev->config_write = e1000_write_config;
1818 
1819     pci_conf = pci_dev->config;
1820 
1821     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1822     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1823 
1824     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1825 
1826     e1000_mmio_setup(d);
1827 
1828     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1829 
1830     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1831 
1832     memmove(d->eeprom_data, e1000_eeprom_template,
1833         sizeof e1000_eeprom_template);
1834     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1835     macaddr = d->conf.macaddr.a;
1836     for (i = 0; i < 3; i++)
1837         d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i];
1838     d->eeprom_data[11] = d->eeprom_data[13] = pdc->device_id;
1839     for (i = 0; i < EEPROM_CHECKSUM_REG; i++)
1840         checksum += d->eeprom_data[i];
1841     checksum = (uint16_t) EEPROM_SUM - checksum;
1842     d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum;
1843 
1844     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1845                           object_get_typename(OBJECT(d)), dev->id, d);
1846 
1847     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1848 
1849     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1850     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1851 }
1852 
1853 static void qdev_e1000_reset(DeviceState *dev)
1854 {
1855     E1000State *d = E1000(dev);
1856     e1000_reset(d);
1857 }
1858 
1859 static Property e1000_properties[] = {
1860     DEFINE_NIC_PROPERTIES(E1000State, conf),
1861     DEFINE_PROP_BIT("autonegotiation", E1000State,
1862                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1863     DEFINE_PROP_BIT("mitigation", E1000State,
1864                     compat_flags, E1000_FLAG_MIT_BIT, true),
1865     DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1866                     compat_flags, E1000_FLAG_MAC_BIT, true),
1867     DEFINE_PROP_END_OF_LIST(),
1868 };
1869 
1870 typedef struct E1000Info {
1871     const char *name;
1872     uint16_t   device_id;
1873     uint8_t    revision;
1874     uint16_t   phy_id2;
1875 } E1000Info;
1876 
1877 static void e1000_class_init(ObjectClass *klass, void *data)
1878 {
1879     DeviceClass *dc = DEVICE_CLASS(klass);
1880     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1881     E1000BaseClass *e = E1000_DEVICE_CLASS(klass);
1882     const E1000Info *info = data;
1883 
1884     k->realize = pci_e1000_realize;
1885     k->exit = pci_e1000_uninit;
1886     k->romfile = "efi-e1000.rom";
1887     k->vendor_id = PCI_VENDOR_ID_INTEL;
1888     k->device_id = info->device_id;
1889     k->revision = info->revision;
1890     e->phy_id2 = info->phy_id2;
1891     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1892     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1893     dc->desc = "Intel Gigabit Ethernet";
1894     dc->reset = qdev_e1000_reset;
1895     dc->vmsd = &vmstate_e1000;
1896     dc->props = e1000_properties;
1897 }
1898 
1899 static void e1000_instance_init(Object *obj)
1900 {
1901     E1000State *n = E1000(obj);
1902     device_add_bootindex_property(obj, &n->conf.bootindex,
1903                                   "bootindex", "/ethernet-phy@0",
1904                                   DEVICE(n), NULL);
1905 }
1906 
1907 static const TypeInfo e1000_base_info = {
1908     .name          = TYPE_E1000_BASE,
1909     .parent        = TYPE_PCI_DEVICE,
1910     .instance_size = sizeof(E1000State),
1911     .instance_init = e1000_instance_init,
1912     .class_size    = sizeof(E1000BaseClass),
1913     .abstract      = true,
1914 };
1915 
1916 static const E1000Info e1000_devices[] = {
1917     {
1918         .name      = "e1000",
1919         .device_id = E1000_DEV_ID_82540EM,
1920         .revision  = 0x03,
1921         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1922     },
1923     {
1924         .name      = "e1000-82544gc",
1925         .device_id = E1000_DEV_ID_82544GC_COPPER,
1926         .revision  = 0x03,
1927         .phy_id2   = E1000_PHY_ID2_82544x,
1928     },
1929     {
1930         .name      = "e1000-82545em",
1931         .device_id = E1000_DEV_ID_82545EM_COPPER,
1932         .revision  = 0x03,
1933         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1934     },
1935 };
1936 
1937 static void e1000_register_types(void)
1938 {
1939     int i;
1940 
1941     type_register_static(&e1000_base_info);
1942     for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1943         const E1000Info *info = &e1000_devices[i];
1944         TypeInfo type_info = {};
1945 
1946         type_info.name = info->name;
1947         type_info.parent = TYPE_E1000_BASE;
1948         type_info.class_data = (void *)info;
1949         type_info.class_init = e1000_class_init;
1950         type_info.instance_init = e1000_instance_init;
1951 
1952         type_register(&type_info);
1953     }
1954 }
1955 
1956 type_init(e1000_register_types)
1957