xref: /openbmc/qemu/hw/net/e1000.c (revision f1f7e4bf)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "hw/hw.h"
29 #include "hw/pci/pci.h"
30 #include "net/net.h"
31 #include "net/checksum.h"
32 #include "hw/loader.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/dma.h"
35 #include "qemu/iov.h"
36 #include "qemu/range.h"
37 
38 #include "e1000_regs.h"
39 
40 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
41 
42 #define E1000_DEBUG
43 
44 #ifdef E1000_DEBUG
45 enum {
46     DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
47     DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
48     DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
49     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
50 };
51 #define DBGBIT(x)    (1<<DEBUG_##x)
52 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
53 
54 #define DBGOUT(what, fmt, ...) do { \
55     if (debugflags & DBGBIT(what)) \
56         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
57     } while (0)
58 #else
59 #define DBGOUT(what, fmt, ...) do {} while (0)
60 #endif
61 
62 #define IOPORT_SIZE       0x40
63 #define PNPMMIO_SIZE      0x20000
64 #define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
65 
66 /* this is the size past which hardware will drop packets when setting LPE=0 */
67 #define MAXIMUM_ETHERNET_VLAN_SIZE 1522
68 /* this is the size past which hardware will drop packets when setting LPE=1 */
69 #define MAXIMUM_ETHERNET_LPE_SIZE 16384
70 
71 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
72 
73 /*
74  * HW models:
75  *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
76  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
77  *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
78  *  Others never tested
79  */
80 
81 typedef struct E1000State_st {
82     /*< private >*/
83     PCIDevice parent_obj;
84     /*< public >*/
85 
86     NICState *nic;
87     NICConf conf;
88     MemoryRegion mmio;
89     MemoryRegion io;
90 
91     uint32_t mac_reg[0x8000];
92     uint16_t phy_reg[0x20];
93     uint16_t eeprom_data[64];
94 
95     uint32_t rxbuf_size;
96     uint32_t rxbuf_min_shift;
97     struct e1000_tx {
98         unsigned char header[256];
99         unsigned char vlan_header[4];
100         /* Fields vlan and data must not be reordered or separated. */
101         unsigned char vlan[4];
102         unsigned char data[0x10000];
103         uint16_t size;
104         unsigned char sum_needed;
105         unsigned char vlan_needed;
106         uint8_t ipcss;
107         uint8_t ipcso;
108         uint16_t ipcse;
109         uint8_t tucss;
110         uint8_t tucso;
111         uint16_t tucse;
112         uint8_t hdr_len;
113         uint16_t mss;
114         uint32_t paylen;
115         uint16_t tso_frames;
116         char tse;
117         int8_t ip;
118         int8_t tcp;
119         char cptse;     // current packet tse bit
120     } tx;
121 
122     struct {
123         uint32_t val_in;    /* shifted in from guest driver */
124         uint16_t bitnum_in;
125         uint16_t bitnum_out;
126         uint16_t reading;
127         uint32_t old_eecd;
128     } eecd_state;
129 
130     QEMUTimer *autoneg_timer;
131 
132     QEMUTimer *mit_timer;      /* Mitigation timer. */
133     bool mit_timer_on;         /* Mitigation timer is running. */
134     bool mit_irq_level;        /* Tracks interrupt pin level. */
135     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
136 
137 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
138 #define E1000_FLAG_AUTONEG_BIT 0
139 #define E1000_FLAG_MIT_BIT 1
140 #define E1000_FLAG_MAC_BIT 2
141 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
142 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
143 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
144     uint32_t compat_flags;
145 } E1000State;
146 
147 #define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
148 
149 typedef struct E1000BaseClass {
150     PCIDeviceClass parent_class;
151     uint16_t phy_id2;
152 } E1000BaseClass;
153 
154 #define TYPE_E1000_BASE "e1000-base"
155 
156 #define E1000(obj) \
157     OBJECT_CHECK(E1000State, (obj), TYPE_E1000_BASE)
158 
159 #define E1000_DEVICE_CLASS(klass) \
160      OBJECT_CLASS_CHECK(E1000BaseClass, (klass), TYPE_E1000_BASE)
161 #define E1000_DEVICE_GET_CLASS(obj) \
162     OBJECT_GET_CLASS(E1000BaseClass, (obj), TYPE_E1000_BASE)
163 
164 #define defreg(x)    x = (E1000_##x>>2)
165 enum {
166     defreg(CTRL),    defreg(EECD),    defreg(EERD),    defreg(GPRC),
167     defreg(GPTC),    defreg(ICR),     defreg(ICS),     defreg(IMC),
168     defreg(IMS),     defreg(LEDCTL),  defreg(MANC),    defreg(MDIC),
169     defreg(MPC),     defreg(PBA),     defreg(RCTL),    defreg(RDBAH),
170     defreg(RDBAL),   defreg(RDH),     defreg(RDLEN),   defreg(RDT),
171     defreg(STATUS),  defreg(SWSM),    defreg(TCTL),    defreg(TDBAH),
172     defreg(TDBAL),   defreg(TDH),     defreg(TDLEN),   defreg(TDT),
173     defreg(TORH),    defreg(TORL),    defreg(TOTH),    defreg(TOTL),
174     defreg(TPR),     defreg(TPT),     defreg(TXDCTL),  defreg(WUFC),
175     defreg(RA),      defreg(MTA),     defreg(CRCERRS), defreg(VFTA),
176     defreg(VET),     defreg(RDTR),    defreg(RADV),    defreg(TADV),
177     defreg(ITR),     defreg(FCRUC),   defreg(TDFH),    defreg(TDFT),
178     defreg(TDFHS),   defreg(TDFTS),   defreg(TDFPC),   defreg(RDFH),
179     defreg(RDFT),    defreg(RDFHS),   defreg(RDFTS),   defreg(RDFPC),
180     defreg(IPAV),    defreg(WUC),     defreg(WUS),     defreg(AIT),
181     defreg(IP6AT),   defreg(IP4AT),   defreg(FFLT),    defreg(FFMT),
182     defreg(FFVT),    defreg(WUPM),    defreg(PBM),     defreg(SCC),
183     defreg(ECOL),    defreg(MCC),     defreg(LATECOL), defreg(COLC),
184     defreg(DC),      defreg(TNCRS),   defreg(SEC),     defreg(CEXTERR),
185     defreg(RLEC),    defreg(XONRXC),  defreg(XONTXC),  defreg(XOFFRXC),
186     defreg(XOFFTXC), defreg(RFC),     defreg(RJC),     defreg(RNBC),
187     defreg(TSCTFC),  defreg(MGTPRC),  defreg(MGTPDC),  defreg(MGTPTC),
188     defreg(RUC),     defreg(ROC),     defreg(GORCL),   defreg(GORCH),
189     defreg(GOTCL),   defreg(GOTCH),   defreg(BPRC),    defreg(MPRC),
190     defreg(TSCTC),   defreg(PRC64),   defreg(PRC127),  defreg(PRC255),
191     defreg(PRC511),  defreg(PRC1023), defreg(PRC1522), defreg(PTC64),
192     defreg(PTC127),  defreg(PTC255),  defreg(PTC511),  defreg(PTC1023),
193     defreg(PTC1522), defreg(MPTC),    defreg(BPTC)
194 };
195 
196 static void
197 e1000_link_down(E1000State *s)
198 {
199     s->mac_reg[STATUS] &= ~E1000_STATUS_LU;
200     s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS;
201     s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE;
202     s->phy_reg[PHY_LP_ABILITY] &= ~MII_LPAR_LPACK;
203 }
204 
205 static void
206 e1000_link_up(E1000State *s)
207 {
208     s->mac_reg[STATUS] |= E1000_STATUS_LU;
209     s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
210 
211     /* E1000_STATUS_LU is tested by e1000_can_receive() */
212     qemu_flush_queued_packets(qemu_get_queue(s->nic));
213 }
214 
215 static bool
216 have_autoneg(E1000State *s)
217 {
218     return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
219 }
220 
221 static void
222 set_phy_ctrl(E1000State *s, int index, uint16_t val)
223 {
224     /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
225     s->phy_reg[PHY_CTRL] = val & ~(0x3f |
226                                    MII_CR_RESET |
227                                    MII_CR_RESTART_AUTO_NEG);
228 
229     /*
230      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
231      * migrate during auto negotiation, after migration the link will be
232      * down.
233      */
234     if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
235         e1000_link_down(s);
236         DBGOUT(PHY, "Start link auto negotiation\n");
237         timer_mod(s->autoneg_timer,
238                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
239     }
240 }
241 
242 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
243     [PHY_CTRL] = set_phy_ctrl,
244 };
245 
246 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
247 
248 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
249 static const char phy_regcap[0x20] = {
250     [PHY_STATUS]      = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
251     [PHY_ID1]         = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
252     [PHY_CTRL]        = PHY_RW,    [PHY_1000T_CTRL]             = PHY_RW,
253     [PHY_LP_ABILITY]  = PHY_R,     [PHY_1000T_STATUS]           = PHY_R,
254     [PHY_AUTONEG_ADV] = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
255     [PHY_ID2]         = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
256     [PHY_AUTONEG_EXP] = PHY_R,
257 };
258 
259 /* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
260 static const uint16_t phy_reg_init[] = {
261     [PHY_CTRL]   = MII_CR_SPEED_SELECT_MSB |
262                    MII_CR_FULL_DUPLEX |
263                    MII_CR_AUTO_NEG_EN,
264 
265     [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
266                    MII_SR_LINK_STATUS |   /* link initially up */
267                    MII_SR_AUTONEG_CAPS |
268                    /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
269                    MII_SR_PREAMBLE_SUPPRESS |
270                    MII_SR_EXTENDED_STATUS |
271                    MII_SR_10T_HD_CAPS |
272                    MII_SR_10T_FD_CAPS |
273                    MII_SR_100X_HD_CAPS |
274                    MII_SR_100X_FD_CAPS,
275 
276     [PHY_ID1] = 0x141,
277     /* [PHY_ID2] configured per DevId, from e1000_reset() */
278     [PHY_AUTONEG_ADV] = 0xde1,
279     [PHY_LP_ABILITY] = 0x1e0,
280     [PHY_1000T_CTRL] = 0x0e00,
281     [PHY_1000T_STATUS] = 0x3c00,
282     [M88E1000_PHY_SPEC_CTRL] = 0x360,
283     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
284     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
285 };
286 
287 static const uint32_t mac_reg_init[] = {
288     [PBA]     = 0x00100030,
289     [LEDCTL]  = 0x602,
290     [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
291                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
292     [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
293                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
294                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
295                 E1000_STATUS_LU,
296     [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
297                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
298                 E1000_MANC_RMCP_EN,
299 };
300 
301 /* Helper function, *curr == 0 means the value is not set */
302 static inline void
303 mit_update_delay(uint32_t *curr, uint32_t value)
304 {
305     if (value && (*curr == 0 || value < *curr)) {
306         *curr = value;
307     }
308 }
309 
310 static void
311 set_interrupt_cause(E1000State *s, int index, uint32_t val)
312 {
313     PCIDevice *d = PCI_DEVICE(s);
314     uint32_t pending_ints;
315     uint32_t mit_delay;
316 
317     s->mac_reg[ICR] = val;
318 
319     /*
320      * Make sure ICR and ICS registers have the same value.
321      * The spec says that the ICS register is write-only.  However in practice,
322      * on real hardware ICS is readable, and for reads it has the same value as
323      * ICR (except that ICS does not have the clear on read behaviour of ICR).
324      *
325      * The VxWorks PRO/1000 driver uses this behaviour.
326      */
327     s->mac_reg[ICS] = val;
328 
329     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
330     if (!s->mit_irq_level && pending_ints) {
331         /*
332          * Here we detect a potential raising edge. We postpone raising the
333          * interrupt line if we are inside the mitigation delay window
334          * (s->mit_timer_on == 1).
335          * We provide a partial implementation of interrupt mitigation,
336          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
337          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
338          * RADV; relative timers based on TIDV and RDTR are not implemented.
339          */
340         if (s->mit_timer_on) {
341             return;
342         }
343         if (chkflag(MIT)) {
344             /* Compute the next mitigation delay according to pending
345              * interrupts and the current values of RADV (provided
346              * RDTR!=0), TADV and ITR.
347              * Then rearm the timer.
348              */
349             mit_delay = 0;
350             if (s->mit_ide &&
351                     (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
352                 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
353             }
354             if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
355                 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
356             }
357             mit_update_delay(&mit_delay, s->mac_reg[ITR]);
358 
359             if (mit_delay) {
360                 s->mit_timer_on = 1;
361                 timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
362                           mit_delay * 256);
363             }
364             s->mit_ide = 0;
365         }
366     }
367 
368     s->mit_irq_level = (pending_ints != 0);
369     pci_set_irq(d, s->mit_irq_level);
370 }
371 
372 static void
373 e1000_mit_timer(void *opaque)
374 {
375     E1000State *s = opaque;
376 
377     s->mit_timer_on = 0;
378     /* Call set_interrupt_cause to update the irq level (if necessary). */
379     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
380 }
381 
382 static void
383 set_ics(E1000State *s, int index, uint32_t val)
384 {
385     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
386         s->mac_reg[IMS]);
387     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
388 }
389 
390 static void
391 e1000_autoneg_timer(void *opaque)
392 {
393     E1000State *s = opaque;
394     if (!qemu_get_queue(s->nic)->link_down) {
395         e1000_link_up(s);
396         s->phy_reg[PHY_LP_ABILITY] |= MII_LPAR_LPACK;
397         s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
398         DBGOUT(PHY, "Auto negotiation is completed\n");
399         set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
400     }
401 }
402 
403 static int
404 rxbufsize(uint32_t v)
405 {
406     v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 |
407          E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 |
408          E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256;
409     switch (v) {
410     case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384:
411         return 16384;
412     case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192:
413         return 8192;
414     case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096:
415         return 4096;
416     case E1000_RCTL_SZ_1024:
417         return 1024;
418     case E1000_RCTL_SZ_512:
419         return 512;
420     case E1000_RCTL_SZ_256:
421         return 256;
422     }
423     return 2048;
424 }
425 
426 static void e1000_reset(void *opaque)
427 {
428     E1000State *d = opaque;
429     E1000BaseClass *edc = E1000_DEVICE_GET_CLASS(d);
430     uint8_t *macaddr = d->conf.macaddr.a;
431     int i;
432 
433     timer_del(d->autoneg_timer);
434     timer_del(d->mit_timer);
435     d->mit_timer_on = 0;
436     d->mit_irq_level = 0;
437     d->mit_ide = 0;
438     memset(d->phy_reg, 0, sizeof d->phy_reg);
439     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
440     d->phy_reg[PHY_ID2] = edc->phy_id2;
441     memset(d->mac_reg, 0, sizeof d->mac_reg);
442     memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
443     d->rxbuf_min_shift = 1;
444     memset(&d->tx, 0, sizeof d->tx);
445 
446     if (qemu_get_queue(d->nic)->link_down) {
447         e1000_link_down(d);
448     }
449 
450     /* Throttle interrupts to prevent guest (e.g Win 2012) from
451      * reinjecting interrupts endlessly. TODO: fix non ITR case.
452      */
453     d->mac_reg[ITR] = 250;
454 
455     /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */
456     d->mac_reg[RA] = 0;
457     d->mac_reg[RA + 1] = E1000_RAH_AV;
458     for (i = 0; i < 4; i++) {
459         d->mac_reg[RA] |= macaddr[i] << (8 * i);
460         d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0;
461     }
462     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
463 }
464 
465 static void
466 set_ctrl(E1000State *s, int index, uint32_t val)
467 {
468     /* RST is self clearing */
469     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
470 }
471 
472 static void
473 set_rx_control(E1000State *s, int index, uint32_t val)
474 {
475     s->mac_reg[RCTL] = val;
476     s->rxbuf_size = rxbufsize(val);
477     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
478     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
479            s->mac_reg[RCTL]);
480     qemu_flush_queued_packets(qemu_get_queue(s->nic));
481 }
482 
483 static void
484 set_mdic(E1000State *s, int index, uint32_t val)
485 {
486     uint32_t data = val & E1000_MDIC_DATA_MASK;
487     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
488 
489     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
490         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
491     else if (val & E1000_MDIC_OP_READ) {
492         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
493         if (!(phy_regcap[addr] & PHY_R)) {
494             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
495             val |= E1000_MDIC_ERROR;
496         } else
497             val = (val ^ data) | s->phy_reg[addr];
498     } else if (val & E1000_MDIC_OP_WRITE) {
499         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
500         if (!(phy_regcap[addr] & PHY_W)) {
501             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
502             val |= E1000_MDIC_ERROR;
503         } else {
504             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
505                 phyreg_writeops[addr](s, index, data);
506             } else {
507                 s->phy_reg[addr] = data;
508             }
509         }
510     }
511     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
512 
513     if (val & E1000_MDIC_INT_EN) {
514         set_ics(s, 0, E1000_ICR_MDAC);
515     }
516 }
517 
518 static uint32_t
519 get_eecd(E1000State *s, int index)
520 {
521     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
522 
523     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
524            s->eecd_state.bitnum_out, s->eecd_state.reading);
525     if (!s->eecd_state.reading ||
526         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
527           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
528         ret |= E1000_EECD_DO;
529     return ret;
530 }
531 
532 static void
533 set_eecd(E1000State *s, int index, uint32_t val)
534 {
535     uint32_t oldval = s->eecd_state.old_eecd;
536 
537     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
538             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
539     if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
540         return;
541     }
542     if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
543         s->eecd_state.val_in = 0;
544         s->eecd_state.bitnum_in = 0;
545         s->eecd_state.bitnum_out = 0;
546         s->eecd_state.reading = 0;
547     }
548     if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
549         return;
550     }
551     if (!(E1000_EECD_SK & val)) {               /* falling edge */
552         s->eecd_state.bitnum_out++;
553         return;
554     }
555     s->eecd_state.val_in <<= 1;
556     if (val & E1000_EECD_DI)
557         s->eecd_state.val_in |= 1;
558     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
559         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
560         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
561             EEPROM_READ_OPCODE_MICROWIRE);
562     }
563     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
564            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
565            s->eecd_state.reading);
566 }
567 
568 static uint32_t
569 flash_eerd_read(E1000State *s, int x)
570 {
571     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
572 
573     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
574         return (s->mac_reg[EERD]);
575 
576     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
577         return (E1000_EEPROM_RW_REG_DONE | r);
578 
579     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
580            E1000_EEPROM_RW_REG_DONE | r);
581 }
582 
583 static void
584 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
585 {
586     uint32_t sum;
587 
588     if (cse && cse < n)
589         n = cse + 1;
590     if (sloc < n-1) {
591         sum = net_checksum_add(n-css, data+css);
592         stw_be_p(data + sloc, net_checksum_finish(sum));
593     }
594 }
595 
596 static inline void
597 inc_reg_if_not_full(E1000State *s, int index)
598 {
599     if (s->mac_reg[index] != 0xffffffff) {
600         s->mac_reg[index]++;
601     }
602 }
603 
604 static inline void
605 inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
606 {
607     if (!memcmp(arr, bcast, sizeof bcast)) {
608         inc_reg_if_not_full(s, BPTC);
609     } else if (arr[0] & 1) {
610         inc_reg_if_not_full(s, MPTC);
611     }
612 }
613 
614 static void
615 grow_8reg_if_not_full(E1000State *s, int index, int size)
616 {
617     uint64_t sum = s->mac_reg[index] | (uint64_t)s->mac_reg[index+1] << 32;
618 
619     if (sum + size < sum) {
620         sum = ~0ULL;
621     } else {
622         sum += size;
623     }
624     s->mac_reg[index] = sum;
625     s->mac_reg[index+1] = sum >> 32;
626 }
627 
628 static void
629 increase_size_stats(E1000State *s, const int *size_regs, int size)
630 {
631     if (size > 1023) {
632         inc_reg_if_not_full(s, size_regs[5]);
633     } else if (size > 511) {
634         inc_reg_if_not_full(s, size_regs[4]);
635     } else if (size > 255) {
636         inc_reg_if_not_full(s, size_regs[3]);
637     } else if (size > 127) {
638         inc_reg_if_not_full(s, size_regs[2]);
639     } else if (size > 64) {
640         inc_reg_if_not_full(s, size_regs[1]);
641     } else if (size == 64) {
642         inc_reg_if_not_full(s, size_regs[0]);
643     }
644 }
645 
646 static inline int
647 vlan_enabled(E1000State *s)
648 {
649     return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0);
650 }
651 
652 static inline int
653 vlan_rx_filter_enabled(E1000State *s)
654 {
655     return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0);
656 }
657 
658 static inline int
659 is_vlan_packet(E1000State *s, const uint8_t *buf)
660 {
661     return (be16_to_cpup((uint16_t *)(buf + 12)) ==
662                 le16_to_cpu(s->mac_reg[VET]));
663 }
664 
665 static inline int
666 is_vlan_txd(uint32_t txd_lower)
667 {
668     return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
669 }
670 
671 /* FCS aka Ethernet CRC-32. We don't get it from backends and can't
672  * fill it in, just pad descriptor length by 4 bytes unless guest
673  * told us to strip it off the packet. */
674 static inline int
675 fcs_len(E1000State *s)
676 {
677     return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
678 }
679 
680 static void
681 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
682 {
683     static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
684                                     PTC1023, PTC1522 };
685 
686     NetClientState *nc = qemu_get_queue(s->nic);
687     if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
688         nc->info->receive(nc, buf, size);
689     } else {
690         qemu_send_packet(nc, buf, size);
691     }
692     inc_tx_bcast_or_mcast_count(s, buf);
693     increase_size_stats(s, PTCregs, size);
694 }
695 
696 static void
697 xmit_seg(E1000State *s)
698 {
699     uint16_t len, *sp;
700     unsigned int frames = s->tx.tso_frames, css, sofar;
701     struct e1000_tx *tp = &s->tx;
702 
703     if (tp->tse && tp->cptse) {
704         css = tp->ipcss;
705         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
706                frames, tp->size, css);
707         if (tp->ip) {    /* IPv4 */
708             stw_be_p(tp->data+css+2, tp->size - css);
709             stw_be_p(tp->data+css+4,
710                      be16_to_cpup((uint16_t *)(tp->data+css+4))+frames);
711         } else {         /* IPv6 */
712             stw_be_p(tp->data+css+4, tp->size - css);
713         }
714         css = tp->tucss;
715         len = tp->size - css;
716         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len);
717         if (tp->tcp) {
718             sofar = frames * tp->mss;
719             stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
720             if (tp->paylen - sofar > tp->mss) {
721                 tp->data[css + 13] &= ~9;    /* PSH, FIN */
722             } else if (frames) {
723                 inc_reg_if_not_full(s, TSCTC);
724             }
725         } else    /* UDP */
726             stw_be_p(tp->data+css+4, len);
727         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
728             unsigned int phsum;
729             // add pseudo-header length before checksum calculation
730             sp = (uint16_t *)(tp->data + tp->tucso);
731             phsum = be16_to_cpup(sp) + len;
732             phsum = (phsum >> 16) + (phsum & 0xffff);
733             stw_be_p(sp, phsum);
734         }
735         tp->tso_frames++;
736     }
737 
738     if (tp->sum_needed & E1000_TXD_POPTS_TXSM)
739         putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse);
740     if (tp->sum_needed & E1000_TXD_POPTS_IXSM)
741         putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse);
742     if (tp->vlan_needed) {
743         memmove(tp->vlan, tp->data, 4);
744         memmove(tp->data, tp->data + 4, 8);
745         memcpy(tp->data + 8, tp->vlan_header, 4);
746         e1000_send_packet(s, tp->vlan, tp->size + 4);
747     } else {
748         e1000_send_packet(s, tp->data, tp->size);
749     }
750 
751     inc_reg_if_not_full(s, TPT);
752     grow_8reg_if_not_full(s, TOTL, s->tx.size);
753     s->mac_reg[GPTC] = s->mac_reg[TPT];
754     s->mac_reg[GOTCL] = s->mac_reg[TOTL];
755     s->mac_reg[GOTCH] = s->mac_reg[TOTH];
756 }
757 
758 static void
759 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
760 {
761     PCIDevice *d = PCI_DEVICE(s);
762     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
763     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
764     unsigned int split_size = txd_lower & 0xffff, bytes, sz, op;
765     unsigned int msh = 0xfffff;
766     uint64_t addr;
767     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
768     struct e1000_tx *tp = &s->tx;
769 
770     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
771     if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
772         op = le32_to_cpu(xp->cmd_and_length);
773         tp->ipcss = xp->lower_setup.ip_fields.ipcss;
774         tp->ipcso = xp->lower_setup.ip_fields.ipcso;
775         tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse);
776         tp->tucss = xp->upper_setup.tcp_fields.tucss;
777         tp->tucso = xp->upper_setup.tcp_fields.tucso;
778         tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse);
779         tp->paylen = op & 0xfffff;
780         tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len;
781         tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss);
782         tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0;
783         tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
784         tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
785         tp->tso_frames = 0;
786         if (tp->tucso == 0) {    /* this is probably wrong */
787             DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
788             tp->tucso = tp->tucss + (tp->tcp ? 16 : 6);
789         }
790         return;
791     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
792         // data descriptor
793         if (tp->size == 0) {
794             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
795         }
796         tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0;
797     } else {
798         // legacy descriptor
799         tp->cptse = 0;
800     }
801 
802     if (vlan_enabled(s) && is_vlan_txd(txd_lower) &&
803         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
804         tp->vlan_needed = 1;
805         stw_be_p(tp->vlan_header,
806                       le16_to_cpu(s->mac_reg[VET]));
807         stw_be_p(tp->vlan_header + 2,
808                       le16_to_cpu(dp->upper.fields.special));
809     }
810 
811     addr = le64_to_cpu(dp->buffer_addr);
812     if (tp->tse && tp->cptse) {
813         msh = tp->hdr_len + tp->mss;
814         do {
815             bytes = split_size;
816             if (tp->size + bytes > msh)
817                 bytes = msh - tp->size;
818 
819             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
820             pci_dma_read(d, addr, tp->data + tp->size, bytes);
821             sz = tp->size + bytes;
822             if (sz >= tp->hdr_len && tp->size < tp->hdr_len) {
823                 memmove(tp->header, tp->data, tp->hdr_len);
824             }
825             tp->size = sz;
826             addr += bytes;
827             if (sz == msh) {
828                 xmit_seg(s);
829                 memmove(tp->data, tp->header, tp->hdr_len);
830                 tp->size = tp->hdr_len;
831             }
832             split_size -= bytes;
833         } while (bytes && split_size);
834     } else if (!tp->tse && tp->cptse) {
835         // context descriptor TSE is not set, while data descriptor TSE is set
836         DBGOUT(TXERR, "TCP segmentation error\n");
837     } else {
838         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
839         pci_dma_read(d, addr, tp->data + tp->size, split_size);
840         tp->size += split_size;
841     }
842 
843     if (!(txd_lower & E1000_TXD_CMD_EOP))
844         return;
845     if (!(tp->tse && tp->cptse && tp->size < tp->hdr_len)) {
846         xmit_seg(s);
847     }
848     tp->tso_frames = 0;
849     tp->sum_needed = 0;
850     tp->vlan_needed = 0;
851     tp->size = 0;
852     tp->cptse = 0;
853 }
854 
855 static uint32_t
856 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
857 {
858     PCIDevice *d = PCI_DEVICE(s);
859     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
860 
861     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
862         return 0;
863     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
864                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
865     dp->upper.data = cpu_to_le32(txd_upper);
866     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
867                   &dp->upper, sizeof(dp->upper));
868     return E1000_ICR_TXDW;
869 }
870 
871 static uint64_t tx_desc_base(E1000State *s)
872 {
873     uint64_t bah = s->mac_reg[TDBAH];
874     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
875 
876     return (bah << 32) + bal;
877 }
878 
879 static void
880 start_xmit(E1000State *s)
881 {
882     PCIDevice *d = PCI_DEVICE(s);
883     dma_addr_t base;
884     struct e1000_tx_desc desc;
885     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
886 
887     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
888         DBGOUT(TX, "tx disabled\n");
889         return;
890     }
891 
892     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
893         base = tx_desc_base(s) +
894                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
895         pci_dma_read(d, base, &desc, sizeof(desc));
896 
897         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
898                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
899                desc.upper.data);
900 
901         process_tx_desc(s, &desc);
902         cause |= txdesc_writeback(s, base, &desc);
903 
904         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
905             s->mac_reg[TDH] = 0;
906         /*
907          * the following could happen only if guest sw assigns
908          * bogus values to TDT/TDLEN.
909          * there's nothing too intelligent we could do about this.
910          */
911         if (s->mac_reg[TDH] == tdh_start) {
912             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
913                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
914             break;
915         }
916     }
917     set_ics(s, 0, cause);
918 }
919 
920 static int
921 receive_filter(E1000State *s, const uint8_t *buf, int size)
922 {
923     static const int mta_shift[] = {4, 3, 2, 0};
924     uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp;
925     int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1);
926 
927     if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) {
928         uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14));
929         uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) +
930                                      ((vid >> 5) & 0x7f));
931         if ((vfta & (1 << (vid & 0x1f))) == 0)
932             return 0;
933     }
934 
935     if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
936         return 1;
937     }
938 
939     if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
940         inc_reg_if_not_full(s, MPRC);
941         return 1;
942     }
943 
944     if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
945         inc_reg_if_not_full(s, BPRC);
946         return 1;
947     }
948 
949     for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) {
950         if (!(rp[1] & E1000_RAH_AV))
951             continue;
952         ra[0] = cpu_to_le32(rp[0]);
953         ra[1] = cpu_to_le32(rp[1]);
954         if (!memcmp(buf, (uint8_t *)ra, 6)) {
955             DBGOUT(RXFILTER,
956                    "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n",
957                    (int)(rp - s->mac_reg - RA)/2,
958                    buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
959             return 1;
960         }
961     }
962     DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n",
963            buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
964 
965     f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
966     f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
967     if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f))) {
968         inc_reg_if_not_full(s, MPRC);
969         return 1;
970     }
971     DBGOUT(RXFILTER,
972            "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n",
973            buf[0], buf[1], buf[2], buf[3], buf[4], buf[5],
974            (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5,
975            s->mac_reg[MTA + (f >> 5)]);
976 
977     return 0;
978 }
979 
980 static void
981 e1000_set_link_status(NetClientState *nc)
982 {
983     E1000State *s = qemu_get_nic_opaque(nc);
984     uint32_t old_status = s->mac_reg[STATUS];
985 
986     if (nc->link_down) {
987         e1000_link_down(s);
988     } else {
989         if (have_autoneg(s) &&
990             !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
991             /* emulate auto-negotiation if supported */
992             timer_mod(s->autoneg_timer,
993                       qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
994         } else {
995             e1000_link_up(s);
996         }
997     }
998 
999     if (s->mac_reg[STATUS] != old_status)
1000         set_ics(s, 0, E1000_ICR_LSC);
1001 }
1002 
1003 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
1004 {
1005     int bufs;
1006     /* Fast-path short packets */
1007     if (total_size <= s->rxbuf_size) {
1008         return s->mac_reg[RDH] != s->mac_reg[RDT];
1009     }
1010     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
1011         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
1012     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
1013         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
1014             s->mac_reg[RDT] - s->mac_reg[RDH];
1015     } else {
1016         return false;
1017     }
1018     return total_size <= bufs * s->rxbuf_size;
1019 }
1020 
1021 static int
1022 e1000_can_receive(NetClientState *nc)
1023 {
1024     E1000State *s = qemu_get_nic_opaque(nc);
1025 
1026     return (s->mac_reg[STATUS] & E1000_STATUS_LU) &&
1027         (s->mac_reg[RCTL] & E1000_RCTL_EN) &&
1028         (s->parent_obj.config[PCI_COMMAND] & PCI_COMMAND_MASTER) &&
1029         e1000_has_rxbufs(s, 1);
1030 }
1031 
1032 static uint64_t rx_desc_base(E1000State *s)
1033 {
1034     uint64_t bah = s->mac_reg[RDBAH];
1035     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
1036 
1037     return (bah << 32) + bal;
1038 }
1039 
1040 static ssize_t
1041 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
1042 {
1043     E1000State *s = qemu_get_nic_opaque(nc);
1044     PCIDevice *d = PCI_DEVICE(s);
1045     struct e1000_rx_desc desc;
1046     dma_addr_t base;
1047     unsigned int n, rdt;
1048     uint32_t rdh_start;
1049     uint16_t vlan_special = 0;
1050     uint8_t vlan_status = 0;
1051     uint8_t min_buf[MIN_BUF_SIZE];
1052     struct iovec min_iov;
1053     uint8_t *filter_buf = iov->iov_base;
1054     size_t size = iov_size(iov, iovcnt);
1055     size_t iov_ofs = 0;
1056     size_t desc_offset;
1057     size_t desc_size;
1058     size_t total_size;
1059     static const int PRCregs[6] = { PRC64, PRC127, PRC255, PRC511,
1060                                     PRC1023, PRC1522 };
1061 
1062     if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) {
1063         return -1;
1064     }
1065 
1066     if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) {
1067         return -1;
1068     }
1069 
1070     /* Pad to minimum Ethernet frame length */
1071     if (size < sizeof(min_buf)) {
1072         iov_to_buf(iov, iovcnt, 0, min_buf, size);
1073         memset(&min_buf[size], 0, sizeof(min_buf) - size);
1074         inc_reg_if_not_full(s, RUC);
1075         min_iov.iov_base = filter_buf = min_buf;
1076         min_iov.iov_len = size = sizeof(min_buf);
1077         iovcnt = 1;
1078         iov = &min_iov;
1079     } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
1080         /* This is very unlikely, but may happen. */
1081         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
1082         filter_buf = min_buf;
1083     }
1084 
1085     /* Discard oversized packets if !LPE and !SBP. */
1086     if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
1087         (size > MAXIMUM_ETHERNET_VLAN_SIZE
1088         && !(s->mac_reg[RCTL] & E1000_RCTL_LPE)))
1089         && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) {
1090         inc_reg_if_not_full(s, ROC);
1091         return size;
1092     }
1093 
1094     if (!receive_filter(s, filter_buf, size)) {
1095         return size;
1096     }
1097 
1098     if (vlan_enabled(s) && is_vlan_packet(s, filter_buf)) {
1099         vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(filter_buf
1100                                                                 + 14)));
1101         iov_ofs = 4;
1102         if (filter_buf == iov->iov_base) {
1103             memmove(filter_buf + 4, filter_buf, 12);
1104         } else {
1105             iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
1106             while (iov->iov_len <= iov_ofs) {
1107                 iov_ofs -= iov->iov_len;
1108                 iov++;
1109             }
1110         }
1111         vlan_status = E1000_RXD_STAT_VP;
1112         size -= 4;
1113     }
1114 
1115     rdh_start = s->mac_reg[RDH];
1116     desc_offset = 0;
1117     total_size = size + fcs_len(s);
1118     if (!e1000_has_rxbufs(s, total_size)) {
1119             set_ics(s, 0, E1000_ICS_RXO);
1120             return -1;
1121     }
1122     do {
1123         desc_size = total_size - desc_offset;
1124         if (desc_size > s->rxbuf_size) {
1125             desc_size = s->rxbuf_size;
1126         }
1127         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
1128         pci_dma_read(d, base, &desc, sizeof(desc));
1129         desc.special = vlan_special;
1130         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
1131         if (desc.buffer_addr) {
1132             if (desc_offset < size) {
1133                 size_t iov_copy;
1134                 hwaddr ba = le64_to_cpu(desc.buffer_addr);
1135                 size_t copy_size = size - desc_offset;
1136                 if (copy_size > s->rxbuf_size) {
1137                     copy_size = s->rxbuf_size;
1138                 }
1139                 do {
1140                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
1141                     pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
1142                     copy_size -= iov_copy;
1143                     ba += iov_copy;
1144                     iov_ofs += iov_copy;
1145                     if (iov_ofs == iov->iov_len) {
1146                         iov++;
1147                         iov_ofs = 0;
1148                     }
1149                 } while (copy_size);
1150             }
1151             desc_offset += desc_size;
1152             desc.length = cpu_to_le16(desc_size);
1153             if (desc_offset >= total_size) {
1154                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1155             } else {
1156                 /* Guest zeroing out status is not a hardware requirement.
1157                    Clear EOP in case guest didn't do it. */
1158                 desc.status &= ~E1000_RXD_STAT_EOP;
1159             }
1160         } else { // as per intel docs; skip descriptors with null buf addr
1161             DBGOUT(RX, "Null RX descriptor!!\n");
1162         }
1163         pci_dma_write(d, base, &desc, sizeof(desc));
1164 
1165         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1166             s->mac_reg[RDH] = 0;
1167         /* see comment in start_xmit; same here */
1168         if (s->mac_reg[RDH] == rdh_start) {
1169             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1170                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1171             set_ics(s, 0, E1000_ICS_RXO);
1172             return -1;
1173         }
1174     } while (desc_offset < total_size);
1175 
1176     increase_size_stats(s, PRCregs, total_size);
1177     inc_reg_if_not_full(s, TPR);
1178     s->mac_reg[GPRC] = s->mac_reg[TPR];
1179     /* TOR - Total Octets Received:
1180      * This register includes bytes received in a packet from the <Destination
1181      * Address> field through the <CRC> field, inclusively.
1182      * Always include FCS length (4) in size.
1183      */
1184     grow_8reg_if_not_full(s, TORL, size+4);
1185     s->mac_reg[GORCL] = s->mac_reg[TORL];
1186     s->mac_reg[GORCH] = s->mac_reg[TORH];
1187 
1188     n = E1000_ICS_RXT0;
1189     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1190         rdt += s->mac_reg[RDLEN] / sizeof(desc);
1191     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1192         s->rxbuf_min_shift)
1193         n |= E1000_ICS_RXDMT0;
1194 
1195     set_ics(s, 0, n);
1196 
1197     return size;
1198 }
1199 
1200 static ssize_t
1201 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1202 {
1203     const struct iovec iov = {
1204         .iov_base = (uint8_t *)buf,
1205         .iov_len = size
1206     };
1207 
1208     return e1000_receive_iov(nc, &iov, 1);
1209 }
1210 
1211 static uint32_t
1212 mac_readreg(E1000State *s, int index)
1213 {
1214     return s->mac_reg[index];
1215 }
1216 
1217 static uint32_t
1218 mac_low4_read(E1000State *s, int index)
1219 {
1220     return s->mac_reg[index] & 0xf;
1221 }
1222 
1223 static uint32_t
1224 mac_low11_read(E1000State *s, int index)
1225 {
1226     return s->mac_reg[index] & 0x7ff;
1227 }
1228 
1229 static uint32_t
1230 mac_low13_read(E1000State *s, int index)
1231 {
1232     return s->mac_reg[index] & 0x1fff;
1233 }
1234 
1235 static uint32_t
1236 mac_low16_read(E1000State *s, int index)
1237 {
1238     return s->mac_reg[index] & 0xffff;
1239 }
1240 
1241 static uint32_t
1242 mac_icr_read(E1000State *s, int index)
1243 {
1244     uint32_t ret = s->mac_reg[ICR];
1245 
1246     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1247     set_interrupt_cause(s, 0, 0);
1248     return ret;
1249 }
1250 
1251 static uint32_t
1252 mac_read_clr4(E1000State *s, int index)
1253 {
1254     uint32_t ret = s->mac_reg[index];
1255 
1256     s->mac_reg[index] = 0;
1257     return ret;
1258 }
1259 
1260 static uint32_t
1261 mac_read_clr8(E1000State *s, int index)
1262 {
1263     uint32_t ret = s->mac_reg[index];
1264 
1265     s->mac_reg[index] = 0;
1266     s->mac_reg[index-1] = 0;
1267     return ret;
1268 }
1269 
1270 static void
1271 mac_writereg(E1000State *s, int index, uint32_t val)
1272 {
1273     uint32_t macaddr[2];
1274 
1275     s->mac_reg[index] = val;
1276 
1277     if (index == RA + 1) {
1278         macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1279         macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1280         qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1281     }
1282 }
1283 
1284 static void
1285 set_rdt(E1000State *s, int index, uint32_t val)
1286 {
1287     s->mac_reg[index] = val & 0xffff;
1288     if (e1000_has_rxbufs(s, 1)) {
1289         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1290     }
1291 }
1292 
1293 static void
1294 set_16bit(E1000State *s, int index, uint32_t val)
1295 {
1296     s->mac_reg[index] = val & 0xffff;
1297 }
1298 
1299 static void
1300 set_dlen(E1000State *s, int index, uint32_t val)
1301 {
1302     s->mac_reg[index] = val & 0xfff80;
1303 }
1304 
1305 static void
1306 set_tctl(E1000State *s, int index, uint32_t val)
1307 {
1308     s->mac_reg[index] = val;
1309     s->mac_reg[TDT] &= 0xffff;
1310     start_xmit(s);
1311 }
1312 
1313 static void
1314 set_icr(E1000State *s, int index, uint32_t val)
1315 {
1316     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1317     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1318 }
1319 
1320 static void
1321 set_imc(E1000State *s, int index, uint32_t val)
1322 {
1323     s->mac_reg[IMS] &= ~val;
1324     set_ics(s, 0, 0);
1325 }
1326 
1327 static void
1328 set_ims(E1000State *s, int index, uint32_t val)
1329 {
1330     s->mac_reg[IMS] |= val;
1331     set_ics(s, 0, 0);
1332 }
1333 
1334 #define getreg(x)    [x] = mac_readreg
1335 static uint32_t (*macreg_readops[])(E1000State *, int) = {
1336     getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1337     getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1338     getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1339     getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1340     getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1341     getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1342     getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1343     getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1344     getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1345     getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1346     getreg(TNCRS),    getreg(SEC),      getreg(CEXTERR),  getreg(RLEC),
1347     getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1348     getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1349     getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1350     getreg(GOTCL),
1351 
1352     [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1353     [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1354     [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1355     [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1356     [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1357     [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1358     [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1359     [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1360     [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1361     [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1362     [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1363     [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1364     [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1365     [MPTC]    = mac_read_clr4,
1366     [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1367     [EERD]    = flash_eerd_read,
1368     [RDFH]    = mac_low13_read,     [RDFT]    = mac_low13_read,
1369     [RDFHS]   = mac_low13_read,     [RDFTS]   = mac_low13_read,
1370     [RDFPC]   = mac_low13_read,
1371     [TDFH]    = mac_low11_read,     [TDFT]    = mac_low11_read,
1372     [TDFHS]   = mac_low13_read,     [TDFTS]   = mac_low13_read,
1373     [TDFPC]   = mac_low13_read,
1374     [AIT]     = mac_low16_read,
1375 
1376     [CRCERRS ... MPC]   = &mac_readreg,
1377     [IP6AT ... IP6AT+3] = &mac_readreg,    [IP4AT ... IP4AT+6] = &mac_readreg,
1378     [FFLT ... FFLT+6]   = &mac_low11_read,
1379     [RA ... RA+31]      = &mac_readreg,
1380     [WUPM ... WUPM+31]  = &mac_readreg,
1381     [MTA ... MTA+127]   = &mac_readreg,
1382     [VFTA ... VFTA+127] = &mac_readreg,
1383     [FFMT ... FFMT+254] = &mac_low4_read,
1384     [FFVT ... FFVT+254] = &mac_readreg,
1385     [PBM ... PBM+16383] = &mac_readreg,
1386 };
1387 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1388 
1389 #define putreg(x)    [x] = mac_writereg
1390 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1391     putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1392     putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1393     putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1394     putreg(TDFH),     putreg(TDFT),     putreg(TDFHS),    putreg(TDFTS),
1395     putreg(TDFPC),    putreg(RDFH),     putreg(RDFT),     putreg(RDFHS),
1396     putreg(RDFTS),    putreg(RDFPC),    putreg(IPAV),     putreg(WUC),
1397     putreg(WUS),      putreg(AIT),
1398 
1399     [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL] = set_tctl,
1400     [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]  = set_ics,
1401     [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]  = set_rdt,
1402     [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]  = set_icr,
1403     [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL] = set_ctrl,
1404     [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV] = set_16bit,
1405     [ITR]    = set_16bit,
1406 
1407     [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg,
1408     [FFLT ... FFLT+6]   = &mac_writereg,
1409     [RA ... RA+31]      = &mac_writereg,
1410     [WUPM ... WUPM+31]  = &mac_writereg,
1411     [MTA ... MTA+127]   = &mac_writereg,
1412     [VFTA ... VFTA+127] = &mac_writereg,
1413     [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg,
1414     [PBM ... PBM+16383] = &mac_writereg,
1415 };
1416 
1417 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1418 
1419 enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1420 
1421 #define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1422 /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1423  * f - flag bits (up to 6 possible flags)
1424  * n - flag needed
1425  * p - partially implenented */
1426 static const uint8_t mac_reg_access[0x8000] = {
1427     [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1428     [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1429 
1430     [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1431     [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1432     [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1433     [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1434     [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1435     [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1436     [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1437     [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1438     [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1439     [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1440     [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1441     [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1442     [SEC]     = markflag(MAC),    [CEXTERR] = markflag(MAC),
1443     [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1444     [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1445     [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1446     [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1447     [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1448     [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1449     [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1450     [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1451     [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1452     [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1453     [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1454     [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1455     [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1456     [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1457     [BPTC]    = markflag(MAC),
1458 
1459     [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1460     [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1461     [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1462     [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1463     [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1464     [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1465     [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1466     [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1467     [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1468     [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1469     [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1470 };
1471 
1472 static void
1473 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1474                  unsigned size)
1475 {
1476     E1000State *s = opaque;
1477     unsigned int index = (addr & 0x1ffff) >> 2;
1478 
1479     if (index < NWRITEOPS && macreg_writeops[index]) {
1480         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1481             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1482             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1483                 DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1484                        "It is not fully implemented.\n", index<<2);
1485             }
1486             macreg_writeops[index](s, index, val);
1487         } else {    /* "flag needed" bit is set, but the flag is not active */
1488             DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1489                    index<<2);
1490         }
1491     } else if (index < NREADOPS && macreg_readops[index]) {
1492         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1493                index<<2, val);
1494     } else {
1495         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1496                index<<2, val);
1497     }
1498 }
1499 
1500 static uint64_t
1501 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1502 {
1503     E1000State *s = opaque;
1504     unsigned int index = (addr & 0x1ffff) >> 2;
1505 
1506     if (index < NREADOPS && macreg_readops[index]) {
1507         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1508             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1509             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1510                 DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1511                        "It is not fully implemented.\n", index<<2);
1512             }
1513             return macreg_readops[index](s, index);
1514         } else {    /* "flag needed" bit is set, but the flag is not active */
1515             DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1516                    index<<2);
1517         }
1518     } else {
1519         DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1520     }
1521     return 0;
1522 }
1523 
1524 static const MemoryRegionOps e1000_mmio_ops = {
1525     .read = e1000_mmio_read,
1526     .write = e1000_mmio_write,
1527     .endianness = DEVICE_LITTLE_ENDIAN,
1528     .impl = {
1529         .min_access_size = 4,
1530         .max_access_size = 4,
1531     },
1532 };
1533 
1534 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1535                               unsigned size)
1536 {
1537     E1000State *s = opaque;
1538 
1539     (void)s;
1540     return 0;
1541 }
1542 
1543 static void e1000_io_write(void *opaque, hwaddr addr,
1544                            uint64_t val, unsigned size)
1545 {
1546     E1000State *s = opaque;
1547 
1548     (void)s;
1549 }
1550 
1551 static const MemoryRegionOps e1000_io_ops = {
1552     .read = e1000_io_read,
1553     .write = e1000_io_write,
1554     .endianness = DEVICE_LITTLE_ENDIAN,
1555 };
1556 
1557 static bool is_version_1(void *opaque, int version_id)
1558 {
1559     return version_id == 1;
1560 }
1561 
1562 static void e1000_pre_save(void *opaque)
1563 {
1564     E1000State *s = opaque;
1565     NetClientState *nc = qemu_get_queue(s->nic);
1566 
1567     /* If the mitigation timer is active, emulate a timeout now. */
1568     if (s->mit_timer_on) {
1569         e1000_mit_timer(s);
1570     }
1571 
1572     /*
1573      * If link is down and auto-negotiation is supported and ongoing,
1574      * complete auto-negotiation immediately. This allows us to look
1575      * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1576      */
1577     if (nc->link_down && have_autoneg(s)) {
1578         s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1579     }
1580 }
1581 
1582 static int e1000_post_load(void *opaque, int version_id)
1583 {
1584     E1000State *s = opaque;
1585     NetClientState *nc = qemu_get_queue(s->nic);
1586 
1587     if (!chkflag(MIT)) {
1588         s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1589             s->mac_reg[TADV] = 0;
1590         s->mit_irq_level = false;
1591     }
1592     s->mit_ide = 0;
1593     s->mit_timer_on = false;
1594 
1595     /* nc.link_down can't be migrated, so infer link_down according
1596      * to link status bit in mac_reg[STATUS].
1597      * Alternatively, restart link negotiation if it was in progress. */
1598     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1599 
1600     if (have_autoneg(s) &&
1601         !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1602         nc->link_down = false;
1603         timer_mod(s->autoneg_timer,
1604                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1605     }
1606 
1607     return 0;
1608 }
1609 
1610 static bool e1000_mit_state_needed(void *opaque)
1611 {
1612     E1000State *s = opaque;
1613 
1614     return chkflag(MIT);
1615 }
1616 
1617 static bool e1000_full_mac_needed(void *opaque)
1618 {
1619     E1000State *s = opaque;
1620 
1621     return chkflag(MAC);
1622 }
1623 
1624 static const VMStateDescription vmstate_e1000_mit_state = {
1625     .name = "e1000/mit_state",
1626     .version_id = 1,
1627     .minimum_version_id = 1,
1628     .needed = e1000_mit_state_needed,
1629     .fields = (VMStateField[]) {
1630         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1631         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1632         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1633         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1634         VMSTATE_BOOL(mit_irq_level, E1000State),
1635         VMSTATE_END_OF_LIST()
1636     }
1637 };
1638 
1639 static const VMStateDescription vmstate_e1000_full_mac_state = {
1640     .name = "e1000/full_mac_state",
1641     .version_id = 1,
1642     .minimum_version_id = 1,
1643     .needed = e1000_full_mac_needed,
1644     .fields = (VMStateField[]) {
1645         VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1646         VMSTATE_END_OF_LIST()
1647     }
1648 };
1649 
1650 static const VMStateDescription vmstate_e1000 = {
1651     .name = "e1000",
1652     .version_id = 2,
1653     .minimum_version_id = 1,
1654     .pre_save = e1000_pre_save,
1655     .post_load = e1000_post_load,
1656     .fields = (VMStateField[]) {
1657         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1658         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1659         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1660         VMSTATE_UINT32(rxbuf_size, E1000State),
1661         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1662         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1663         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1664         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1665         VMSTATE_UINT16(eecd_state.reading, E1000State),
1666         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1667         VMSTATE_UINT8(tx.ipcss, E1000State),
1668         VMSTATE_UINT8(tx.ipcso, E1000State),
1669         VMSTATE_UINT16(tx.ipcse, E1000State),
1670         VMSTATE_UINT8(tx.tucss, E1000State),
1671         VMSTATE_UINT8(tx.tucso, E1000State),
1672         VMSTATE_UINT16(tx.tucse, E1000State),
1673         VMSTATE_UINT32(tx.paylen, E1000State),
1674         VMSTATE_UINT8(tx.hdr_len, E1000State),
1675         VMSTATE_UINT16(tx.mss, E1000State),
1676         VMSTATE_UINT16(tx.size, E1000State),
1677         VMSTATE_UINT16(tx.tso_frames, E1000State),
1678         VMSTATE_UINT8(tx.sum_needed, E1000State),
1679         VMSTATE_INT8(tx.ip, E1000State),
1680         VMSTATE_INT8(tx.tcp, E1000State),
1681         VMSTATE_BUFFER(tx.header, E1000State),
1682         VMSTATE_BUFFER(tx.data, E1000State),
1683         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1684         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1685         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1686         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1687         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1688         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1689         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1690         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1691         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1692         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1693         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1694         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1695         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1696         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1697         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1698         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1699         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1700         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1701         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1702         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1703         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1704         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1705         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1706         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1707         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1708         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1709         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1710         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1711         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1712         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1713         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1714         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1715         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1716         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1717         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1718         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1719         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1720         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1721         VMSTATE_UINT32(mac_reg[VET], E1000State),
1722         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1723         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1724         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1725         VMSTATE_END_OF_LIST()
1726     },
1727     .subsections = (const VMStateDescription*[]) {
1728         &vmstate_e1000_mit_state,
1729         &vmstate_e1000_full_mac_state,
1730         NULL
1731     }
1732 };
1733 
1734 /*
1735  * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1736  * Note: A valid DevId will be inserted during pci_e1000_init().
1737  */
1738 static const uint16_t e1000_eeprom_template[64] = {
1739     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1740     0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1741     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1742     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1743     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1744     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1745     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1746     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1747 };
1748 
1749 /* PCI interface */
1750 
1751 static void
1752 e1000_mmio_setup(E1000State *d)
1753 {
1754     int i;
1755     const uint32_t excluded_regs[] = {
1756         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1757         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1758     };
1759 
1760     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1761                           "e1000-mmio", PNPMMIO_SIZE);
1762     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1763     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1764         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1765                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1766     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1767 }
1768 
1769 static void
1770 pci_e1000_uninit(PCIDevice *dev)
1771 {
1772     E1000State *d = E1000(dev);
1773 
1774     timer_del(d->autoneg_timer);
1775     timer_free(d->autoneg_timer);
1776     timer_del(d->mit_timer);
1777     timer_free(d->mit_timer);
1778     qemu_del_nic(d->nic);
1779 }
1780 
1781 static NetClientInfo net_e1000_info = {
1782     .type = NET_CLIENT_OPTIONS_KIND_NIC,
1783     .size = sizeof(NICState),
1784     .can_receive = e1000_can_receive,
1785     .receive = e1000_receive,
1786     .receive_iov = e1000_receive_iov,
1787     .link_status_changed = e1000_set_link_status,
1788 };
1789 
1790 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1791                                 uint32_t val, int len)
1792 {
1793     E1000State *s = E1000(pci_dev);
1794 
1795     pci_default_write_config(pci_dev, address, val, len);
1796 
1797     if (range_covers_byte(address, len, PCI_COMMAND) &&
1798         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1799         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1800     }
1801 }
1802 
1803 
1804 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1805 {
1806     DeviceState *dev = DEVICE(pci_dev);
1807     E1000State *d = E1000(pci_dev);
1808     PCIDeviceClass *pdc = PCI_DEVICE_GET_CLASS(pci_dev);
1809     uint8_t *pci_conf;
1810     uint16_t checksum = 0;
1811     int i;
1812     uint8_t *macaddr;
1813 
1814     pci_dev->config_write = e1000_write_config;
1815 
1816     pci_conf = pci_dev->config;
1817 
1818     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1819     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1820 
1821     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1822 
1823     e1000_mmio_setup(d);
1824 
1825     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1826 
1827     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1828 
1829     memmove(d->eeprom_data, e1000_eeprom_template,
1830         sizeof e1000_eeprom_template);
1831     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1832     macaddr = d->conf.macaddr.a;
1833     for (i = 0; i < 3; i++)
1834         d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i];
1835     d->eeprom_data[11] = d->eeprom_data[13] = pdc->device_id;
1836     for (i = 0; i < EEPROM_CHECKSUM_REG; i++)
1837         checksum += d->eeprom_data[i];
1838     checksum = (uint16_t) EEPROM_SUM - checksum;
1839     d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum;
1840 
1841     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1842                           object_get_typename(OBJECT(d)), dev->id, d);
1843 
1844     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1845 
1846     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1847     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1848 }
1849 
1850 static void qdev_e1000_reset(DeviceState *dev)
1851 {
1852     E1000State *d = E1000(dev);
1853     e1000_reset(d);
1854 }
1855 
1856 static Property e1000_properties[] = {
1857     DEFINE_NIC_PROPERTIES(E1000State, conf),
1858     DEFINE_PROP_BIT("autonegotiation", E1000State,
1859                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1860     DEFINE_PROP_BIT("mitigation", E1000State,
1861                     compat_flags, E1000_FLAG_MIT_BIT, true),
1862     DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1863                     compat_flags, E1000_FLAG_MAC_BIT, true),
1864     DEFINE_PROP_END_OF_LIST(),
1865 };
1866 
1867 typedef struct E1000Info {
1868     const char *name;
1869     uint16_t   device_id;
1870     uint8_t    revision;
1871     uint16_t   phy_id2;
1872 } E1000Info;
1873 
1874 static void e1000_class_init(ObjectClass *klass, void *data)
1875 {
1876     DeviceClass *dc = DEVICE_CLASS(klass);
1877     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1878     E1000BaseClass *e = E1000_DEVICE_CLASS(klass);
1879     const E1000Info *info = data;
1880 
1881     k->realize = pci_e1000_realize;
1882     k->exit = pci_e1000_uninit;
1883     k->romfile = "efi-e1000.rom";
1884     k->vendor_id = PCI_VENDOR_ID_INTEL;
1885     k->device_id = info->device_id;
1886     k->revision = info->revision;
1887     e->phy_id2 = info->phy_id2;
1888     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1889     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1890     dc->desc = "Intel Gigabit Ethernet";
1891     dc->reset = qdev_e1000_reset;
1892     dc->vmsd = &vmstate_e1000;
1893     dc->props = e1000_properties;
1894 }
1895 
1896 static void e1000_instance_init(Object *obj)
1897 {
1898     E1000State *n = E1000(obj);
1899     device_add_bootindex_property(obj, &n->conf.bootindex,
1900                                   "bootindex", "/ethernet-phy@0",
1901                                   DEVICE(n), NULL);
1902 }
1903 
1904 static const TypeInfo e1000_base_info = {
1905     .name          = TYPE_E1000_BASE,
1906     .parent        = TYPE_PCI_DEVICE,
1907     .instance_size = sizeof(E1000State),
1908     .instance_init = e1000_instance_init,
1909     .class_size    = sizeof(E1000BaseClass),
1910     .abstract      = true,
1911 };
1912 
1913 static const E1000Info e1000_devices[] = {
1914     {
1915         .name      = "e1000",
1916         .device_id = E1000_DEV_ID_82540EM,
1917         .revision  = 0x03,
1918         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1919     },
1920     {
1921         .name      = "e1000-82544gc",
1922         .device_id = E1000_DEV_ID_82544GC_COPPER,
1923         .revision  = 0x03,
1924         .phy_id2   = E1000_PHY_ID2_82544x,
1925     },
1926     {
1927         .name      = "e1000-82545em",
1928         .device_id = E1000_DEV_ID_82545EM_COPPER,
1929         .revision  = 0x03,
1930         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1931     },
1932 };
1933 
1934 static void e1000_register_types(void)
1935 {
1936     int i;
1937 
1938     type_register_static(&e1000_base_info);
1939     for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1940         const E1000Info *info = &e1000_devices[i];
1941         TypeInfo type_info = {};
1942 
1943         type_info.name = info->name;
1944         type_info.parent = TYPE_E1000_BASE;
1945         type_info.class_data = (void *)info;
1946         type_info.class_init = e1000_class_init;
1947         type_info.instance_init = e1000_instance_init;
1948 
1949         type_register(&type_info);
1950     }
1951 }
1952 
1953 type_init(e1000_register_types)
1954