xref: /openbmc/qemu/hw/net/e1000.c (revision 886ce6f8)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "hw/hw.h"
29 #include "hw/pci/pci.h"
30 #include "net/net.h"
31 #include "net/checksum.h"
32 #include "hw/loader.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/dma.h"
35 #include "qemu/iov.h"
36 #include "qemu/range.h"
37 
38 #include "e1000_regs.h"
39 
40 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
41 
42 #define E1000_DEBUG
43 
44 #ifdef E1000_DEBUG
45 enum {
46     DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
47     DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
48     DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
49     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
50 };
51 #define DBGBIT(x)    (1<<DEBUG_##x)
52 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
53 
54 #define DBGOUT(what, fmt, ...) do { \
55     if (debugflags & DBGBIT(what)) \
56         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
57     } while (0)
58 #else
59 #define DBGOUT(what, fmt, ...) do {} while (0)
60 #endif
61 
62 #define IOPORT_SIZE       0x40
63 #define PNPMMIO_SIZE      0x20000
64 #define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
65 
66 /* this is the size past which hardware will drop packets when setting LPE=0 */
67 #define MAXIMUM_ETHERNET_VLAN_SIZE 1522
68 /* this is the size past which hardware will drop packets when setting LPE=1 */
69 #define MAXIMUM_ETHERNET_LPE_SIZE 16384
70 
71 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
72 
73 /*
74  * HW models:
75  *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
76  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
77  *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
78  *  Others never tested
79  */
80 
81 typedef struct E1000State_st {
82     /*< private >*/
83     PCIDevice parent_obj;
84     /*< public >*/
85 
86     NICState *nic;
87     NICConf conf;
88     MemoryRegion mmio;
89     MemoryRegion io;
90 
91     uint32_t mac_reg[0x8000];
92     uint16_t phy_reg[0x20];
93     uint16_t eeprom_data[64];
94 
95     uint32_t rxbuf_size;
96     uint32_t rxbuf_min_shift;
97     struct e1000_tx {
98         unsigned char header[256];
99         unsigned char vlan_header[4];
100         /* Fields vlan and data must not be reordered or separated. */
101         unsigned char vlan[4];
102         unsigned char data[0x10000];
103         uint16_t size;
104         unsigned char sum_needed;
105         unsigned char vlan_needed;
106         uint8_t ipcss;
107         uint8_t ipcso;
108         uint16_t ipcse;
109         uint8_t tucss;
110         uint8_t tucso;
111         uint16_t tucse;
112         uint8_t hdr_len;
113         uint16_t mss;
114         uint32_t paylen;
115         uint16_t tso_frames;
116         char tse;
117         int8_t ip;
118         int8_t tcp;
119         char cptse;     // current packet tse bit
120     } tx;
121 
122     struct {
123         uint32_t val_in;    /* shifted in from guest driver */
124         uint16_t bitnum_in;
125         uint16_t bitnum_out;
126         uint16_t reading;
127         uint32_t old_eecd;
128     } eecd_state;
129 
130     QEMUTimer *autoneg_timer;
131 
132     QEMUTimer *mit_timer;      /* Mitigation timer. */
133     bool mit_timer_on;         /* Mitigation timer is running. */
134     bool mit_irq_level;        /* Tracks interrupt pin level. */
135     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
136 
137 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
138 #define E1000_FLAG_AUTONEG_BIT 0
139 #define E1000_FLAG_MIT_BIT 1
140 #define E1000_FLAG_MAC_BIT 2
141 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
142 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
143 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
144     uint32_t compat_flags;
145 } E1000State;
146 
147 #define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
148 
149 typedef struct E1000BaseClass {
150     PCIDeviceClass parent_class;
151     uint16_t phy_id2;
152 } E1000BaseClass;
153 
154 #define TYPE_E1000_BASE "e1000-base"
155 
156 #define E1000(obj) \
157     OBJECT_CHECK(E1000State, (obj), TYPE_E1000_BASE)
158 
159 #define E1000_DEVICE_CLASS(klass) \
160      OBJECT_CLASS_CHECK(E1000BaseClass, (klass), TYPE_E1000_BASE)
161 #define E1000_DEVICE_GET_CLASS(obj) \
162     OBJECT_GET_CLASS(E1000BaseClass, (obj), TYPE_E1000_BASE)
163 
164 #define defreg(x)    x = (E1000_##x>>2)
165 enum {
166     defreg(CTRL),    defreg(EECD),    defreg(EERD),    defreg(GPRC),
167     defreg(GPTC),    defreg(ICR),     defreg(ICS),     defreg(IMC),
168     defreg(IMS),     defreg(LEDCTL),  defreg(MANC),    defreg(MDIC),
169     defreg(MPC),     defreg(PBA),     defreg(RCTL),    defreg(RDBAH),
170     defreg(RDBAL),   defreg(RDH),     defreg(RDLEN),   defreg(RDT),
171     defreg(STATUS),  defreg(SWSM),    defreg(TCTL),    defreg(TDBAH),
172     defreg(TDBAL),   defreg(TDH),     defreg(TDLEN),   defreg(TDT),
173     defreg(TORH),    defreg(TORL),    defreg(TOTH),    defreg(TOTL),
174     defreg(TPR),     defreg(TPT),     defreg(TXDCTL),  defreg(WUFC),
175     defreg(RA),      defreg(MTA),     defreg(CRCERRS), defreg(VFTA),
176     defreg(VET),     defreg(RDTR),    defreg(RADV),    defreg(TADV),
177     defreg(ITR),     defreg(FCRUC),   defreg(TDFH),    defreg(TDFT),
178     defreg(TDFHS),   defreg(TDFTS),   defreg(TDFPC),   defreg(RDFH),
179     defreg(RDFT),    defreg(RDFHS),   defreg(RDFTS),   defreg(RDFPC),
180     defreg(IPAV),    defreg(WUC),     defreg(WUS),     defreg(AIT),
181     defreg(IP6AT),   defreg(IP4AT),   defreg(FFLT),    defreg(FFMT),
182     defreg(FFVT),    defreg(WUPM),    defreg(PBM),     defreg(SCC),
183     defreg(ECOL),    defreg(MCC),     defreg(LATECOL), defreg(COLC),
184     defreg(DC),      defreg(TNCRS),   defreg(SEC),     defreg(CEXTERR),
185     defreg(RLEC),    defreg(XONRXC),  defreg(XONTXC),  defreg(XOFFRXC),
186     defreg(XOFFTXC), defreg(RFC),     defreg(RJC),     defreg(RNBC),
187     defreg(TSCTFC),  defreg(MGTPRC),  defreg(MGTPDC),  defreg(MGTPTC),
188     defreg(RUC),     defreg(ROC),     defreg(GORCL),   defreg(GORCH),
189     defreg(GOTCL),   defreg(GOTCH),   defreg(BPRC),    defreg(MPRC),
190     defreg(TSCTC),   defreg(PRC64),   defreg(PRC127),  defreg(PRC255),
191     defreg(PRC511),  defreg(PRC1023), defreg(PRC1522), defreg(PTC64),
192     defreg(PTC127),  defreg(PTC255),  defreg(PTC511),  defreg(PTC1023),
193     defreg(PTC1522), defreg(MPTC),    defreg(BPTC)
194 };
195 
196 static void
197 e1000_link_down(E1000State *s)
198 {
199     s->mac_reg[STATUS] &= ~E1000_STATUS_LU;
200     s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS;
201     s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE;
202     s->phy_reg[PHY_LP_ABILITY] &= ~MII_LPAR_LPACK;
203 }
204 
205 static void
206 e1000_link_up(E1000State *s)
207 {
208     s->mac_reg[STATUS] |= E1000_STATUS_LU;
209     s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
210 
211     /* E1000_STATUS_LU is tested by e1000_can_receive() */
212     qemu_flush_queued_packets(qemu_get_queue(s->nic));
213 }
214 
215 static bool
216 have_autoneg(E1000State *s)
217 {
218     return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
219 }
220 
221 static void
222 set_phy_ctrl(E1000State *s, int index, uint16_t val)
223 {
224     /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
225     s->phy_reg[PHY_CTRL] = val & ~(0x3f |
226                                    MII_CR_RESET |
227                                    MII_CR_RESTART_AUTO_NEG);
228 
229     /*
230      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
231      * migrate during auto negotiation, after migration the link will be
232      * down.
233      */
234     if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
235         e1000_link_down(s);
236         DBGOUT(PHY, "Start link auto negotiation\n");
237         timer_mod(s->autoneg_timer,
238                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
239     }
240 }
241 
242 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
243     [PHY_CTRL] = set_phy_ctrl,
244 };
245 
246 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
247 
248 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
249 static const char phy_regcap[0x20] = {
250     [PHY_STATUS]      = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
251     [PHY_ID1]         = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
252     [PHY_CTRL]        = PHY_RW,    [PHY_1000T_CTRL]             = PHY_RW,
253     [PHY_LP_ABILITY]  = PHY_R,     [PHY_1000T_STATUS]           = PHY_R,
254     [PHY_AUTONEG_ADV] = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
255     [PHY_ID2]         = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
256     [PHY_AUTONEG_EXP] = PHY_R,
257 };
258 
259 /* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
260 static const uint16_t phy_reg_init[] = {
261     [PHY_CTRL]   = MII_CR_SPEED_SELECT_MSB |
262                    MII_CR_FULL_DUPLEX |
263                    MII_CR_AUTO_NEG_EN,
264 
265     [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
266                    MII_SR_LINK_STATUS |   /* link initially up */
267                    MII_SR_AUTONEG_CAPS |
268                    /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
269                    MII_SR_PREAMBLE_SUPPRESS |
270                    MII_SR_EXTENDED_STATUS |
271                    MII_SR_10T_HD_CAPS |
272                    MII_SR_10T_FD_CAPS |
273                    MII_SR_100X_HD_CAPS |
274                    MII_SR_100X_FD_CAPS,
275 
276     [PHY_ID1] = 0x141,
277     /* [PHY_ID2] configured per DevId, from e1000_reset() */
278     [PHY_AUTONEG_ADV] = 0xde1,
279     [PHY_LP_ABILITY] = 0x1e0,
280     [PHY_1000T_CTRL] = 0x0e00,
281     [PHY_1000T_STATUS] = 0x3c00,
282     [M88E1000_PHY_SPEC_CTRL] = 0x360,
283     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
284     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
285 };
286 
287 static const uint32_t mac_reg_init[] = {
288     [PBA]     = 0x00100030,
289     [LEDCTL]  = 0x602,
290     [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
291                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
292     [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
293                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
294                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
295                 E1000_STATUS_LU,
296     [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
297                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
298                 E1000_MANC_RMCP_EN,
299 };
300 
301 /* Helper function, *curr == 0 means the value is not set */
302 static inline void
303 mit_update_delay(uint32_t *curr, uint32_t value)
304 {
305     if (value && (*curr == 0 || value < *curr)) {
306         *curr = value;
307     }
308 }
309 
310 static void
311 set_interrupt_cause(E1000State *s, int index, uint32_t val)
312 {
313     PCIDevice *d = PCI_DEVICE(s);
314     uint32_t pending_ints;
315     uint32_t mit_delay;
316 
317     s->mac_reg[ICR] = val;
318 
319     /*
320      * Make sure ICR and ICS registers have the same value.
321      * The spec says that the ICS register is write-only.  However in practice,
322      * on real hardware ICS is readable, and for reads it has the same value as
323      * ICR (except that ICS does not have the clear on read behaviour of ICR).
324      *
325      * The VxWorks PRO/1000 driver uses this behaviour.
326      */
327     s->mac_reg[ICS] = val;
328 
329     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
330     if (!s->mit_irq_level && pending_ints) {
331         /*
332          * Here we detect a potential raising edge. We postpone raising the
333          * interrupt line if we are inside the mitigation delay window
334          * (s->mit_timer_on == 1).
335          * We provide a partial implementation of interrupt mitigation,
336          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
337          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
338          * RADV; relative timers based on TIDV and RDTR are not implemented.
339          */
340         if (s->mit_timer_on) {
341             return;
342         }
343         if (chkflag(MIT)) {
344             /* Compute the next mitigation delay according to pending
345              * interrupts and the current values of RADV (provided
346              * RDTR!=0), TADV and ITR.
347              * Then rearm the timer.
348              */
349             mit_delay = 0;
350             if (s->mit_ide &&
351                     (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
352                 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
353             }
354             if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
355                 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
356             }
357             mit_update_delay(&mit_delay, s->mac_reg[ITR]);
358 
359             if (mit_delay) {
360                 s->mit_timer_on = 1;
361                 timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
362                           mit_delay * 256);
363             }
364             s->mit_ide = 0;
365         }
366     }
367 
368     s->mit_irq_level = (pending_ints != 0);
369     pci_set_irq(d, s->mit_irq_level);
370 }
371 
372 static void
373 e1000_mit_timer(void *opaque)
374 {
375     E1000State *s = opaque;
376 
377     s->mit_timer_on = 0;
378     /* Call set_interrupt_cause to update the irq level (if necessary). */
379     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
380 }
381 
382 static void
383 set_ics(E1000State *s, int index, uint32_t val)
384 {
385     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
386         s->mac_reg[IMS]);
387     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
388 }
389 
390 static void
391 e1000_autoneg_timer(void *opaque)
392 {
393     E1000State *s = opaque;
394     if (!qemu_get_queue(s->nic)->link_down) {
395         e1000_link_up(s);
396         s->phy_reg[PHY_LP_ABILITY] |= MII_LPAR_LPACK;
397         s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
398         DBGOUT(PHY, "Auto negotiation is completed\n");
399         set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
400     }
401 }
402 
403 static int
404 rxbufsize(uint32_t v)
405 {
406     v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 |
407          E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 |
408          E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256;
409     switch (v) {
410     case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384:
411         return 16384;
412     case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192:
413         return 8192;
414     case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096:
415         return 4096;
416     case E1000_RCTL_SZ_1024:
417         return 1024;
418     case E1000_RCTL_SZ_512:
419         return 512;
420     case E1000_RCTL_SZ_256:
421         return 256;
422     }
423     return 2048;
424 }
425 
426 static void e1000_reset(void *opaque)
427 {
428     E1000State *d = opaque;
429     E1000BaseClass *edc = E1000_DEVICE_GET_CLASS(d);
430     uint8_t *macaddr = d->conf.macaddr.a;
431     int i;
432 
433     timer_del(d->autoneg_timer);
434     timer_del(d->mit_timer);
435     d->mit_timer_on = 0;
436     d->mit_irq_level = 0;
437     d->mit_ide = 0;
438     memset(d->phy_reg, 0, sizeof d->phy_reg);
439     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
440     d->phy_reg[PHY_ID2] = edc->phy_id2;
441     memset(d->mac_reg, 0, sizeof d->mac_reg);
442     memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
443     d->rxbuf_min_shift = 1;
444     memset(&d->tx, 0, sizeof d->tx);
445 
446     if (qemu_get_queue(d->nic)->link_down) {
447         e1000_link_down(d);
448     }
449 
450     /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */
451     d->mac_reg[RA] = 0;
452     d->mac_reg[RA + 1] = E1000_RAH_AV;
453     for (i = 0; i < 4; i++) {
454         d->mac_reg[RA] |= macaddr[i] << (8 * i);
455         d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0;
456     }
457     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
458 }
459 
460 static void
461 set_ctrl(E1000State *s, int index, uint32_t val)
462 {
463     /* RST is self clearing */
464     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
465 }
466 
467 static void
468 set_rx_control(E1000State *s, int index, uint32_t val)
469 {
470     s->mac_reg[RCTL] = val;
471     s->rxbuf_size = rxbufsize(val);
472     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
473     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
474            s->mac_reg[RCTL]);
475     qemu_flush_queued_packets(qemu_get_queue(s->nic));
476 }
477 
478 static void
479 set_mdic(E1000State *s, int index, uint32_t val)
480 {
481     uint32_t data = val & E1000_MDIC_DATA_MASK;
482     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
483 
484     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
485         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
486     else if (val & E1000_MDIC_OP_READ) {
487         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
488         if (!(phy_regcap[addr] & PHY_R)) {
489             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
490             val |= E1000_MDIC_ERROR;
491         } else
492             val = (val ^ data) | s->phy_reg[addr];
493     } else if (val & E1000_MDIC_OP_WRITE) {
494         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
495         if (!(phy_regcap[addr] & PHY_W)) {
496             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
497             val |= E1000_MDIC_ERROR;
498         } else {
499             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
500                 phyreg_writeops[addr](s, index, data);
501             } else {
502                 s->phy_reg[addr] = data;
503             }
504         }
505     }
506     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
507 
508     if (val & E1000_MDIC_INT_EN) {
509         set_ics(s, 0, E1000_ICR_MDAC);
510     }
511 }
512 
513 static uint32_t
514 get_eecd(E1000State *s, int index)
515 {
516     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
517 
518     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
519            s->eecd_state.bitnum_out, s->eecd_state.reading);
520     if (!s->eecd_state.reading ||
521         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
522           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
523         ret |= E1000_EECD_DO;
524     return ret;
525 }
526 
527 static void
528 set_eecd(E1000State *s, int index, uint32_t val)
529 {
530     uint32_t oldval = s->eecd_state.old_eecd;
531 
532     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
533             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
534     if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
535         return;
536     }
537     if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
538         s->eecd_state.val_in = 0;
539         s->eecd_state.bitnum_in = 0;
540         s->eecd_state.bitnum_out = 0;
541         s->eecd_state.reading = 0;
542     }
543     if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
544         return;
545     }
546     if (!(E1000_EECD_SK & val)) {               /* falling edge */
547         s->eecd_state.bitnum_out++;
548         return;
549     }
550     s->eecd_state.val_in <<= 1;
551     if (val & E1000_EECD_DI)
552         s->eecd_state.val_in |= 1;
553     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
554         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
555         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
556             EEPROM_READ_OPCODE_MICROWIRE);
557     }
558     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
559            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
560            s->eecd_state.reading);
561 }
562 
563 static uint32_t
564 flash_eerd_read(E1000State *s, int x)
565 {
566     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
567 
568     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
569         return (s->mac_reg[EERD]);
570 
571     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
572         return (E1000_EEPROM_RW_REG_DONE | r);
573 
574     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
575            E1000_EEPROM_RW_REG_DONE | r);
576 }
577 
578 static void
579 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
580 {
581     uint32_t sum;
582 
583     if (cse && cse < n)
584         n = cse + 1;
585     if (sloc < n-1) {
586         sum = net_checksum_add(n-css, data+css);
587         stw_be_p(data + sloc, net_checksum_finish(sum));
588     }
589 }
590 
591 static inline void
592 inc_reg_if_not_full(E1000State *s, int index)
593 {
594     if (s->mac_reg[index] != 0xffffffff) {
595         s->mac_reg[index]++;
596     }
597 }
598 
599 static inline void
600 inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
601 {
602     if (!memcmp(arr, bcast, sizeof bcast)) {
603         inc_reg_if_not_full(s, BPTC);
604     } else if (arr[0] & 1) {
605         inc_reg_if_not_full(s, MPTC);
606     }
607 }
608 
609 static void
610 grow_8reg_if_not_full(E1000State *s, int index, int size)
611 {
612     uint64_t sum = s->mac_reg[index] | (uint64_t)s->mac_reg[index+1] << 32;
613 
614     if (sum + size < sum) {
615         sum = ~0ULL;
616     } else {
617         sum += size;
618     }
619     s->mac_reg[index] = sum;
620     s->mac_reg[index+1] = sum >> 32;
621 }
622 
623 static void
624 increase_size_stats(E1000State *s, const int *size_regs, int size)
625 {
626     if (size > 1023) {
627         inc_reg_if_not_full(s, size_regs[5]);
628     } else if (size > 511) {
629         inc_reg_if_not_full(s, size_regs[4]);
630     } else if (size > 255) {
631         inc_reg_if_not_full(s, size_regs[3]);
632     } else if (size > 127) {
633         inc_reg_if_not_full(s, size_regs[2]);
634     } else if (size > 64) {
635         inc_reg_if_not_full(s, size_regs[1]);
636     } else if (size == 64) {
637         inc_reg_if_not_full(s, size_regs[0]);
638     }
639 }
640 
641 static inline int
642 vlan_enabled(E1000State *s)
643 {
644     return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0);
645 }
646 
647 static inline int
648 vlan_rx_filter_enabled(E1000State *s)
649 {
650     return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0);
651 }
652 
653 static inline int
654 is_vlan_packet(E1000State *s, const uint8_t *buf)
655 {
656     return (be16_to_cpup((uint16_t *)(buf + 12)) ==
657                 le16_to_cpu(s->mac_reg[VET]));
658 }
659 
660 static inline int
661 is_vlan_txd(uint32_t txd_lower)
662 {
663     return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
664 }
665 
666 /* FCS aka Ethernet CRC-32. We don't get it from backends and can't
667  * fill it in, just pad descriptor length by 4 bytes unless guest
668  * told us to strip it off the packet. */
669 static inline int
670 fcs_len(E1000State *s)
671 {
672     return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
673 }
674 
675 static void
676 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
677 {
678     static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
679                                     PTC1023, PTC1522 };
680 
681     NetClientState *nc = qemu_get_queue(s->nic);
682     if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
683         nc->info->receive(nc, buf, size);
684     } else {
685         qemu_send_packet(nc, buf, size);
686     }
687     inc_tx_bcast_or_mcast_count(s, buf);
688     increase_size_stats(s, PTCregs, size);
689 }
690 
691 static void
692 xmit_seg(E1000State *s)
693 {
694     uint16_t len, *sp;
695     unsigned int frames = s->tx.tso_frames, css, sofar;
696     struct e1000_tx *tp = &s->tx;
697 
698     if (tp->tse && tp->cptse) {
699         css = tp->ipcss;
700         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
701                frames, tp->size, css);
702         if (tp->ip) {    /* IPv4 */
703             stw_be_p(tp->data+css+2, tp->size - css);
704             stw_be_p(tp->data+css+4,
705                      be16_to_cpup((uint16_t *)(tp->data+css+4))+frames);
706         } else {         /* IPv6 */
707             stw_be_p(tp->data+css+4, tp->size - css);
708         }
709         css = tp->tucss;
710         len = tp->size - css;
711         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len);
712         if (tp->tcp) {
713             sofar = frames * tp->mss;
714             stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
715             if (tp->paylen - sofar > tp->mss) {
716                 tp->data[css + 13] &= ~9;    /* PSH, FIN */
717             } else if (frames) {
718                 inc_reg_if_not_full(s, TSCTC);
719             }
720         } else    /* UDP */
721             stw_be_p(tp->data+css+4, len);
722         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
723             unsigned int phsum;
724             // add pseudo-header length before checksum calculation
725             sp = (uint16_t *)(tp->data + tp->tucso);
726             phsum = be16_to_cpup(sp) + len;
727             phsum = (phsum >> 16) + (phsum & 0xffff);
728             stw_be_p(sp, phsum);
729         }
730         tp->tso_frames++;
731     }
732 
733     if (tp->sum_needed & E1000_TXD_POPTS_TXSM)
734         putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse);
735     if (tp->sum_needed & E1000_TXD_POPTS_IXSM)
736         putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse);
737     if (tp->vlan_needed) {
738         memmove(tp->vlan, tp->data, 4);
739         memmove(tp->data, tp->data + 4, 8);
740         memcpy(tp->data + 8, tp->vlan_header, 4);
741         e1000_send_packet(s, tp->vlan, tp->size + 4);
742     } else {
743         e1000_send_packet(s, tp->data, tp->size);
744     }
745 
746     inc_reg_if_not_full(s, TPT);
747     grow_8reg_if_not_full(s, TOTL, s->tx.size);
748     s->mac_reg[GPTC] = s->mac_reg[TPT];
749     s->mac_reg[GOTCL] = s->mac_reg[TOTL];
750     s->mac_reg[GOTCH] = s->mac_reg[TOTH];
751 }
752 
753 static void
754 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
755 {
756     PCIDevice *d = PCI_DEVICE(s);
757     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
758     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
759     unsigned int split_size = txd_lower & 0xffff, bytes, sz, op;
760     unsigned int msh = 0xfffff;
761     uint64_t addr;
762     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
763     struct e1000_tx *tp = &s->tx;
764 
765     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
766     if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
767         op = le32_to_cpu(xp->cmd_and_length);
768         tp->ipcss = xp->lower_setup.ip_fields.ipcss;
769         tp->ipcso = xp->lower_setup.ip_fields.ipcso;
770         tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse);
771         tp->tucss = xp->upper_setup.tcp_fields.tucss;
772         tp->tucso = xp->upper_setup.tcp_fields.tucso;
773         tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse);
774         tp->paylen = op & 0xfffff;
775         tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len;
776         tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss);
777         tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0;
778         tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
779         tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
780         tp->tso_frames = 0;
781         if (tp->tucso == 0) {    /* this is probably wrong */
782             DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
783             tp->tucso = tp->tucss + (tp->tcp ? 16 : 6);
784         }
785         return;
786     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
787         // data descriptor
788         if (tp->size == 0) {
789             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
790         }
791         tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0;
792     } else {
793         // legacy descriptor
794         tp->cptse = 0;
795     }
796 
797     if (vlan_enabled(s) && is_vlan_txd(txd_lower) &&
798         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
799         tp->vlan_needed = 1;
800         stw_be_p(tp->vlan_header,
801                       le16_to_cpu(s->mac_reg[VET]));
802         stw_be_p(tp->vlan_header + 2,
803                       le16_to_cpu(dp->upper.fields.special));
804     }
805 
806     addr = le64_to_cpu(dp->buffer_addr);
807     if (tp->tse && tp->cptse) {
808         msh = tp->hdr_len + tp->mss;
809         do {
810             bytes = split_size;
811             if (tp->size + bytes > msh)
812                 bytes = msh - tp->size;
813 
814             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
815             pci_dma_read(d, addr, tp->data + tp->size, bytes);
816             sz = tp->size + bytes;
817             if (sz >= tp->hdr_len && tp->size < tp->hdr_len) {
818                 memmove(tp->header, tp->data, tp->hdr_len);
819             }
820             tp->size = sz;
821             addr += bytes;
822             if (sz == msh) {
823                 xmit_seg(s);
824                 memmove(tp->data, tp->header, tp->hdr_len);
825                 tp->size = tp->hdr_len;
826             }
827             split_size -= bytes;
828         } while (bytes && split_size);
829     } else if (!tp->tse && tp->cptse) {
830         // context descriptor TSE is not set, while data descriptor TSE is set
831         DBGOUT(TXERR, "TCP segmentation error\n");
832     } else {
833         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
834         pci_dma_read(d, addr, tp->data + tp->size, split_size);
835         tp->size += split_size;
836     }
837 
838     if (!(txd_lower & E1000_TXD_CMD_EOP))
839         return;
840     if (!(tp->tse && tp->cptse && tp->size < tp->hdr_len)) {
841         xmit_seg(s);
842     }
843     tp->tso_frames = 0;
844     tp->sum_needed = 0;
845     tp->vlan_needed = 0;
846     tp->size = 0;
847     tp->cptse = 0;
848 }
849 
850 static uint32_t
851 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
852 {
853     PCIDevice *d = PCI_DEVICE(s);
854     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
855 
856     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
857         return 0;
858     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
859                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
860     dp->upper.data = cpu_to_le32(txd_upper);
861     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
862                   &dp->upper, sizeof(dp->upper));
863     return E1000_ICR_TXDW;
864 }
865 
866 static uint64_t tx_desc_base(E1000State *s)
867 {
868     uint64_t bah = s->mac_reg[TDBAH];
869     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
870 
871     return (bah << 32) + bal;
872 }
873 
874 static void
875 start_xmit(E1000State *s)
876 {
877     PCIDevice *d = PCI_DEVICE(s);
878     dma_addr_t base;
879     struct e1000_tx_desc desc;
880     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
881 
882     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
883         DBGOUT(TX, "tx disabled\n");
884         return;
885     }
886 
887     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
888         base = tx_desc_base(s) +
889                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
890         pci_dma_read(d, base, &desc, sizeof(desc));
891 
892         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
893                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
894                desc.upper.data);
895 
896         process_tx_desc(s, &desc);
897         cause |= txdesc_writeback(s, base, &desc);
898 
899         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
900             s->mac_reg[TDH] = 0;
901         /*
902          * the following could happen only if guest sw assigns
903          * bogus values to TDT/TDLEN.
904          * there's nothing too intelligent we could do about this.
905          */
906         if (s->mac_reg[TDH] == tdh_start) {
907             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
908                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
909             break;
910         }
911     }
912     set_ics(s, 0, cause);
913 }
914 
915 static int
916 receive_filter(E1000State *s, const uint8_t *buf, int size)
917 {
918     static const int mta_shift[] = {4, 3, 2, 0};
919     uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp;
920     int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1);
921 
922     if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) {
923         uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14));
924         uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) +
925                                      ((vid >> 5) & 0x7f));
926         if ((vfta & (1 << (vid & 0x1f))) == 0)
927             return 0;
928     }
929 
930     if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
931         return 1;
932     }
933 
934     if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
935         inc_reg_if_not_full(s, MPRC);
936         return 1;
937     }
938 
939     if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
940         inc_reg_if_not_full(s, BPRC);
941         return 1;
942     }
943 
944     for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) {
945         if (!(rp[1] & E1000_RAH_AV))
946             continue;
947         ra[0] = cpu_to_le32(rp[0]);
948         ra[1] = cpu_to_le32(rp[1]);
949         if (!memcmp(buf, (uint8_t *)ra, 6)) {
950             DBGOUT(RXFILTER,
951                    "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n",
952                    (int)(rp - s->mac_reg - RA)/2,
953                    buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
954             return 1;
955         }
956     }
957     DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n",
958            buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
959 
960     f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
961     f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
962     if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f))) {
963         inc_reg_if_not_full(s, MPRC);
964         return 1;
965     }
966     DBGOUT(RXFILTER,
967            "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n",
968            buf[0], buf[1], buf[2], buf[3], buf[4], buf[5],
969            (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5,
970            s->mac_reg[MTA + (f >> 5)]);
971 
972     return 0;
973 }
974 
975 static void
976 e1000_set_link_status(NetClientState *nc)
977 {
978     E1000State *s = qemu_get_nic_opaque(nc);
979     uint32_t old_status = s->mac_reg[STATUS];
980 
981     if (nc->link_down) {
982         e1000_link_down(s);
983     } else {
984         if (have_autoneg(s) &&
985             !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
986             /* emulate auto-negotiation if supported */
987             timer_mod(s->autoneg_timer,
988                       qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
989         } else {
990             e1000_link_up(s);
991         }
992     }
993 
994     if (s->mac_reg[STATUS] != old_status)
995         set_ics(s, 0, E1000_ICR_LSC);
996 }
997 
998 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
999 {
1000     int bufs;
1001     /* Fast-path short packets */
1002     if (total_size <= s->rxbuf_size) {
1003         return s->mac_reg[RDH] != s->mac_reg[RDT];
1004     }
1005     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
1006         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
1007     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
1008         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
1009             s->mac_reg[RDT] - s->mac_reg[RDH];
1010     } else {
1011         return false;
1012     }
1013     return total_size <= bufs * s->rxbuf_size;
1014 }
1015 
1016 static int
1017 e1000_can_receive(NetClientState *nc)
1018 {
1019     E1000State *s = qemu_get_nic_opaque(nc);
1020 
1021     return (s->mac_reg[STATUS] & E1000_STATUS_LU) &&
1022         (s->mac_reg[RCTL] & E1000_RCTL_EN) &&
1023         (s->parent_obj.config[PCI_COMMAND] & PCI_COMMAND_MASTER) &&
1024         e1000_has_rxbufs(s, 1);
1025 }
1026 
1027 static uint64_t rx_desc_base(E1000State *s)
1028 {
1029     uint64_t bah = s->mac_reg[RDBAH];
1030     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
1031 
1032     return (bah << 32) + bal;
1033 }
1034 
1035 static ssize_t
1036 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
1037 {
1038     E1000State *s = qemu_get_nic_opaque(nc);
1039     PCIDevice *d = PCI_DEVICE(s);
1040     struct e1000_rx_desc desc;
1041     dma_addr_t base;
1042     unsigned int n, rdt;
1043     uint32_t rdh_start;
1044     uint16_t vlan_special = 0;
1045     uint8_t vlan_status = 0;
1046     uint8_t min_buf[MIN_BUF_SIZE];
1047     struct iovec min_iov;
1048     uint8_t *filter_buf = iov->iov_base;
1049     size_t size = iov_size(iov, iovcnt);
1050     size_t iov_ofs = 0;
1051     size_t desc_offset;
1052     size_t desc_size;
1053     size_t total_size;
1054     static const int PRCregs[6] = { PRC64, PRC127, PRC255, PRC511,
1055                                     PRC1023, PRC1522 };
1056 
1057     if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) {
1058         return -1;
1059     }
1060 
1061     if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) {
1062         return -1;
1063     }
1064 
1065     /* Pad to minimum Ethernet frame length */
1066     if (size < sizeof(min_buf)) {
1067         iov_to_buf(iov, iovcnt, 0, min_buf, size);
1068         memset(&min_buf[size], 0, sizeof(min_buf) - size);
1069         inc_reg_if_not_full(s, RUC);
1070         min_iov.iov_base = filter_buf = min_buf;
1071         min_iov.iov_len = size = sizeof(min_buf);
1072         iovcnt = 1;
1073         iov = &min_iov;
1074     } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
1075         /* This is very unlikely, but may happen. */
1076         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
1077         filter_buf = min_buf;
1078     }
1079 
1080     /* Discard oversized packets if !LPE and !SBP. */
1081     if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
1082         (size > MAXIMUM_ETHERNET_VLAN_SIZE
1083         && !(s->mac_reg[RCTL] & E1000_RCTL_LPE)))
1084         && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) {
1085         inc_reg_if_not_full(s, ROC);
1086         return size;
1087     }
1088 
1089     if (!receive_filter(s, filter_buf, size)) {
1090         return size;
1091     }
1092 
1093     if (vlan_enabled(s) && is_vlan_packet(s, filter_buf)) {
1094         vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(filter_buf
1095                                                                 + 14)));
1096         iov_ofs = 4;
1097         if (filter_buf == iov->iov_base) {
1098             memmove(filter_buf + 4, filter_buf, 12);
1099         } else {
1100             iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
1101             while (iov->iov_len <= iov_ofs) {
1102                 iov_ofs -= iov->iov_len;
1103                 iov++;
1104             }
1105         }
1106         vlan_status = E1000_RXD_STAT_VP;
1107         size -= 4;
1108     }
1109 
1110     rdh_start = s->mac_reg[RDH];
1111     desc_offset = 0;
1112     total_size = size + fcs_len(s);
1113     if (!e1000_has_rxbufs(s, total_size)) {
1114             set_ics(s, 0, E1000_ICS_RXO);
1115             return -1;
1116     }
1117     do {
1118         desc_size = total_size - desc_offset;
1119         if (desc_size > s->rxbuf_size) {
1120             desc_size = s->rxbuf_size;
1121         }
1122         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
1123         pci_dma_read(d, base, &desc, sizeof(desc));
1124         desc.special = vlan_special;
1125         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
1126         if (desc.buffer_addr) {
1127             if (desc_offset < size) {
1128                 size_t iov_copy;
1129                 hwaddr ba = le64_to_cpu(desc.buffer_addr);
1130                 size_t copy_size = size - desc_offset;
1131                 if (copy_size > s->rxbuf_size) {
1132                     copy_size = s->rxbuf_size;
1133                 }
1134                 do {
1135                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
1136                     pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
1137                     copy_size -= iov_copy;
1138                     ba += iov_copy;
1139                     iov_ofs += iov_copy;
1140                     if (iov_ofs == iov->iov_len) {
1141                         iov++;
1142                         iov_ofs = 0;
1143                     }
1144                 } while (copy_size);
1145             }
1146             desc_offset += desc_size;
1147             desc.length = cpu_to_le16(desc_size);
1148             if (desc_offset >= total_size) {
1149                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1150             } else {
1151                 /* Guest zeroing out status is not a hardware requirement.
1152                    Clear EOP in case guest didn't do it. */
1153                 desc.status &= ~E1000_RXD_STAT_EOP;
1154             }
1155         } else { // as per intel docs; skip descriptors with null buf addr
1156             DBGOUT(RX, "Null RX descriptor!!\n");
1157         }
1158         pci_dma_write(d, base, &desc, sizeof(desc));
1159 
1160         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1161             s->mac_reg[RDH] = 0;
1162         /* see comment in start_xmit; same here */
1163         if (s->mac_reg[RDH] == rdh_start) {
1164             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1165                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1166             set_ics(s, 0, E1000_ICS_RXO);
1167             return -1;
1168         }
1169     } while (desc_offset < total_size);
1170 
1171     increase_size_stats(s, PRCregs, total_size);
1172     inc_reg_if_not_full(s, TPR);
1173     s->mac_reg[GPRC] = s->mac_reg[TPR];
1174     /* TOR - Total Octets Received:
1175      * This register includes bytes received in a packet from the <Destination
1176      * Address> field through the <CRC> field, inclusively.
1177      * Always include FCS length (4) in size.
1178      */
1179     grow_8reg_if_not_full(s, TORL, size+4);
1180     s->mac_reg[GORCL] = s->mac_reg[TORL];
1181     s->mac_reg[GORCH] = s->mac_reg[TORH];
1182 
1183     n = E1000_ICS_RXT0;
1184     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1185         rdt += s->mac_reg[RDLEN] / sizeof(desc);
1186     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1187         s->rxbuf_min_shift)
1188         n |= E1000_ICS_RXDMT0;
1189 
1190     set_ics(s, 0, n);
1191 
1192     return size;
1193 }
1194 
1195 static ssize_t
1196 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1197 {
1198     const struct iovec iov = {
1199         .iov_base = (uint8_t *)buf,
1200         .iov_len = size
1201     };
1202 
1203     return e1000_receive_iov(nc, &iov, 1);
1204 }
1205 
1206 static uint32_t
1207 mac_readreg(E1000State *s, int index)
1208 {
1209     return s->mac_reg[index];
1210 }
1211 
1212 static uint32_t
1213 mac_low4_read(E1000State *s, int index)
1214 {
1215     return s->mac_reg[index] & 0xf;
1216 }
1217 
1218 static uint32_t
1219 mac_low11_read(E1000State *s, int index)
1220 {
1221     return s->mac_reg[index] & 0x7ff;
1222 }
1223 
1224 static uint32_t
1225 mac_low13_read(E1000State *s, int index)
1226 {
1227     return s->mac_reg[index] & 0x1fff;
1228 }
1229 
1230 static uint32_t
1231 mac_low16_read(E1000State *s, int index)
1232 {
1233     return s->mac_reg[index] & 0xffff;
1234 }
1235 
1236 static uint32_t
1237 mac_icr_read(E1000State *s, int index)
1238 {
1239     uint32_t ret = s->mac_reg[ICR];
1240 
1241     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1242     set_interrupt_cause(s, 0, 0);
1243     return ret;
1244 }
1245 
1246 static uint32_t
1247 mac_read_clr4(E1000State *s, int index)
1248 {
1249     uint32_t ret = s->mac_reg[index];
1250 
1251     s->mac_reg[index] = 0;
1252     return ret;
1253 }
1254 
1255 static uint32_t
1256 mac_read_clr8(E1000State *s, int index)
1257 {
1258     uint32_t ret = s->mac_reg[index];
1259 
1260     s->mac_reg[index] = 0;
1261     s->mac_reg[index-1] = 0;
1262     return ret;
1263 }
1264 
1265 static void
1266 mac_writereg(E1000State *s, int index, uint32_t val)
1267 {
1268     uint32_t macaddr[2];
1269 
1270     s->mac_reg[index] = val;
1271 
1272     if (index == RA + 1) {
1273         macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1274         macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1275         qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1276     }
1277 }
1278 
1279 static void
1280 set_rdt(E1000State *s, int index, uint32_t val)
1281 {
1282     s->mac_reg[index] = val & 0xffff;
1283     if (e1000_has_rxbufs(s, 1)) {
1284         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1285     }
1286 }
1287 
1288 static void
1289 set_16bit(E1000State *s, int index, uint32_t val)
1290 {
1291     s->mac_reg[index] = val & 0xffff;
1292 }
1293 
1294 static void
1295 set_dlen(E1000State *s, int index, uint32_t val)
1296 {
1297     s->mac_reg[index] = val & 0xfff80;
1298 }
1299 
1300 static void
1301 set_tctl(E1000State *s, int index, uint32_t val)
1302 {
1303     s->mac_reg[index] = val;
1304     s->mac_reg[TDT] &= 0xffff;
1305     start_xmit(s);
1306 }
1307 
1308 static void
1309 set_icr(E1000State *s, int index, uint32_t val)
1310 {
1311     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1312     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1313 }
1314 
1315 static void
1316 set_imc(E1000State *s, int index, uint32_t val)
1317 {
1318     s->mac_reg[IMS] &= ~val;
1319     set_ics(s, 0, 0);
1320 }
1321 
1322 static void
1323 set_ims(E1000State *s, int index, uint32_t val)
1324 {
1325     s->mac_reg[IMS] |= val;
1326     set_ics(s, 0, 0);
1327 }
1328 
1329 #define getreg(x)    [x] = mac_readreg
1330 static uint32_t (*macreg_readops[])(E1000State *, int) = {
1331     getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1332     getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1333     getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1334     getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1335     getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1336     getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1337     getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1338     getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1339     getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1340     getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1341     getreg(TNCRS),    getreg(SEC),      getreg(CEXTERR),  getreg(RLEC),
1342     getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1343     getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1344     getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1345     getreg(GOTCL),
1346 
1347     [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1348     [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1349     [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1350     [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1351     [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1352     [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1353     [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1354     [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1355     [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1356     [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1357     [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1358     [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1359     [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1360     [MPTC]    = mac_read_clr4,
1361     [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1362     [EERD]    = flash_eerd_read,
1363     [RDFH]    = mac_low13_read,     [RDFT]    = mac_low13_read,
1364     [RDFHS]   = mac_low13_read,     [RDFTS]   = mac_low13_read,
1365     [RDFPC]   = mac_low13_read,
1366     [TDFH]    = mac_low11_read,     [TDFT]    = mac_low11_read,
1367     [TDFHS]   = mac_low13_read,     [TDFTS]   = mac_low13_read,
1368     [TDFPC]   = mac_low13_read,
1369     [AIT]     = mac_low16_read,
1370 
1371     [CRCERRS ... MPC]   = &mac_readreg,
1372     [IP6AT ... IP6AT+3] = &mac_readreg,    [IP4AT ... IP4AT+6] = &mac_readreg,
1373     [FFLT ... FFLT+6]   = &mac_low11_read,
1374     [RA ... RA+31]      = &mac_readreg,
1375     [WUPM ... WUPM+31]  = &mac_readreg,
1376     [MTA ... MTA+127]   = &mac_readreg,
1377     [VFTA ... VFTA+127] = &mac_readreg,
1378     [FFMT ... FFMT+254] = &mac_low4_read,
1379     [FFVT ... FFVT+254] = &mac_readreg,
1380     [PBM ... PBM+16383] = &mac_readreg,
1381 };
1382 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1383 
1384 #define putreg(x)    [x] = mac_writereg
1385 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1386     putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1387     putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1388     putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1389     putreg(TDFH),     putreg(TDFT),     putreg(TDFHS),    putreg(TDFTS),
1390     putreg(TDFPC),    putreg(RDFH),     putreg(RDFT),     putreg(RDFHS),
1391     putreg(RDFTS),    putreg(RDFPC),    putreg(IPAV),     putreg(WUC),
1392     putreg(WUS),      putreg(AIT),
1393 
1394     [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL] = set_tctl,
1395     [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]  = set_ics,
1396     [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]  = set_rdt,
1397     [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]  = set_icr,
1398     [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL] = set_ctrl,
1399     [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV] = set_16bit,
1400     [ITR]    = set_16bit,
1401 
1402     [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg,
1403     [FFLT ... FFLT+6]   = &mac_writereg,
1404     [RA ... RA+31]      = &mac_writereg,
1405     [WUPM ... WUPM+31]  = &mac_writereg,
1406     [MTA ... MTA+127]   = &mac_writereg,
1407     [VFTA ... VFTA+127] = &mac_writereg,
1408     [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg,
1409     [PBM ... PBM+16383] = &mac_writereg,
1410 };
1411 
1412 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1413 
1414 enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1415 
1416 #define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1417 /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1418  * f - flag bits (up to 6 possible flags)
1419  * n - flag needed
1420  * p - partially implenented */
1421 static const uint8_t mac_reg_access[0x8000] = {
1422     [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1423     [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1424 
1425     [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1426     [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1427     [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1428     [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1429     [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1430     [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1431     [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1432     [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1433     [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1434     [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1435     [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1436     [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1437     [SEC]     = markflag(MAC),    [CEXTERR] = markflag(MAC),
1438     [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1439     [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1440     [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1441     [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1442     [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1443     [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1444     [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1445     [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1446     [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1447     [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1448     [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1449     [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1450     [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1451     [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1452     [BPTC]    = markflag(MAC),
1453 
1454     [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1455     [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1456     [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1457     [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1458     [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1459     [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1460     [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1461     [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1462     [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1463     [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1464     [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1465 };
1466 
1467 static void
1468 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1469                  unsigned size)
1470 {
1471     E1000State *s = opaque;
1472     unsigned int index = (addr & 0x1ffff) >> 2;
1473 
1474     if (index < NWRITEOPS && macreg_writeops[index]) {
1475         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1476             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1477             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1478                 DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1479                        "It is not fully implemented.\n", index<<2);
1480             }
1481             macreg_writeops[index](s, index, val);
1482         } else {    /* "flag needed" bit is set, but the flag is not active */
1483             DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1484                    index<<2);
1485         }
1486     } else if (index < NREADOPS && macreg_readops[index]) {
1487         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1488                index<<2, val);
1489     } else {
1490         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1491                index<<2, val);
1492     }
1493 }
1494 
1495 static uint64_t
1496 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1497 {
1498     E1000State *s = opaque;
1499     unsigned int index = (addr & 0x1ffff) >> 2;
1500 
1501     if (index < NREADOPS && macreg_readops[index]) {
1502         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1503             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1504             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1505                 DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1506                        "It is not fully implemented.\n", index<<2);
1507             }
1508             return macreg_readops[index](s, index);
1509         } else {    /* "flag needed" bit is set, but the flag is not active */
1510             DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1511                    index<<2);
1512         }
1513     } else {
1514         DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1515     }
1516     return 0;
1517 }
1518 
1519 static const MemoryRegionOps e1000_mmio_ops = {
1520     .read = e1000_mmio_read,
1521     .write = e1000_mmio_write,
1522     .endianness = DEVICE_LITTLE_ENDIAN,
1523     .impl = {
1524         .min_access_size = 4,
1525         .max_access_size = 4,
1526     },
1527 };
1528 
1529 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1530                               unsigned size)
1531 {
1532     E1000State *s = opaque;
1533 
1534     (void)s;
1535     return 0;
1536 }
1537 
1538 static void e1000_io_write(void *opaque, hwaddr addr,
1539                            uint64_t val, unsigned size)
1540 {
1541     E1000State *s = opaque;
1542 
1543     (void)s;
1544 }
1545 
1546 static const MemoryRegionOps e1000_io_ops = {
1547     .read = e1000_io_read,
1548     .write = e1000_io_write,
1549     .endianness = DEVICE_LITTLE_ENDIAN,
1550 };
1551 
1552 static bool is_version_1(void *opaque, int version_id)
1553 {
1554     return version_id == 1;
1555 }
1556 
1557 static void e1000_pre_save(void *opaque)
1558 {
1559     E1000State *s = opaque;
1560     NetClientState *nc = qemu_get_queue(s->nic);
1561 
1562     /* If the mitigation timer is active, emulate a timeout now. */
1563     if (s->mit_timer_on) {
1564         e1000_mit_timer(s);
1565     }
1566 
1567     /*
1568      * If link is down and auto-negotiation is supported and ongoing,
1569      * complete auto-negotiation immediately. This allows us to look
1570      * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1571      */
1572     if (nc->link_down && have_autoneg(s)) {
1573         s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1574     }
1575 }
1576 
1577 static int e1000_post_load(void *opaque, int version_id)
1578 {
1579     E1000State *s = opaque;
1580     NetClientState *nc = qemu_get_queue(s->nic);
1581 
1582     if (!chkflag(MIT)) {
1583         s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1584             s->mac_reg[TADV] = 0;
1585         s->mit_irq_level = false;
1586     }
1587     s->mit_ide = 0;
1588     s->mit_timer_on = false;
1589 
1590     /* nc.link_down can't be migrated, so infer link_down according
1591      * to link status bit in mac_reg[STATUS].
1592      * Alternatively, restart link negotiation if it was in progress. */
1593     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1594 
1595     if (have_autoneg(s) &&
1596         !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1597         nc->link_down = false;
1598         timer_mod(s->autoneg_timer,
1599                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1600     }
1601 
1602     return 0;
1603 }
1604 
1605 static bool e1000_mit_state_needed(void *opaque)
1606 {
1607     E1000State *s = opaque;
1608 
1609     return chkflag(MIT);
1610 }
1611 
1612 static bool e1000_full_mac_needed(void *opaque)
1613 {
1614     E1000State *s = opaque;
1615 
1616     return chkflag(MAC);
1617 }
1618 
1619 static const VMStateDescription vmstate_e1000_mit_state = {
1620     .name = "e1000/mit_state",
1621     .version_id = 1,
1622     .minimum_version_id = 1,
1623     .needed = e1000_mit_state_needed,
1624     .fields = (VMStateField[]) {
1625         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1626         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1627         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1628         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1629         VMSTATE_BOOL(mit_irq_level, E1000State),
1630         VMSTATE_END_OF_LIST()
1631     }
1632 };
1633 
1634 static const VMStateDescription vmstate_e1000_full_mac_state = {
1635     .name = "e1000/full_mac_state",
1636     .version_id = 1,
1637     .minimum_version_id = 1,
1638     .needed = e1000_full_mac_needed,
1639     .fields = (VMStateField[]) {
1640         VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1641         VMSTATE_END_OF_LIST()
1642     }
1643 };
1644 
1645 static const VMStateDescription vmstate_e1000 = {
1646     .name = "e1000",
1647     .version_id = 2,
1648     .minimum_version_id = 1,
1649     .pre_save = e1000_pre_save,
1650     .post_load = e1000_post_load,
1651     .fields = (VMStateField[]) {
1652         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1653         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1654         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1655         VMSTATE_UINT32(rxbuf_size, E1000State),
1656         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1657         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1658         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1659         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1660         VMSTATE_UINT16(eecd_state.reading, E1000State),
1661         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1662         VMSTATE_UINT8(tx.ipcss, E1000State),
1663         VMSTATE_UINT8(tx.ipcso, E1000State),
1664         VMSTATE_UINT16(tx.ipcse, E1000State),
1665         VMSTATE_UINT8(tx.tucss, E1000State),
1666         VMSTATE_UINT8(tx.tucso, E1000State),
1667         VMSTATE_UINT16(tx.tucse, E1000State),
1668         VMSTATE_UINT32(tx.paylen, E1000State),
1669         VMSTATE_UINT8(tx.hdr_len, E1000State),
1670         VMSTATE_UINT16(tx.mss, E1000State),
1671         VMSTATE_UINT16(tx.size, E1000State),
1672         VMSTATE_UINT16(tx.tso_frames, E1000State),
1673         VMSTATE_UINT8(tx.sum_needed, E1000State),
1674         VMSTATE_INT8(tx.ip, E1000State),
1675         VMSTATE_INT8(tx.tcp, E1000State),
1676         VMSTATE_BUFFER(tx.header, E1000State),
1677         VMSTATE_BUFFER(tx.data, E1000State),
1678         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1679         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1680         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1681         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1682         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1683         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1684         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1685         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1686         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1687         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1688         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1689         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1690         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1691         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1692         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1693         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1694         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1695         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1696         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1697         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1698         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1699         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1700         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1701         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1702         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1703         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1704         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1705         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1706         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1707         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1708         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1709         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1710         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1711         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1712         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1713         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1714         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1715         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1716         VMSTATE_UINT32(mac_reg[VET], E1000State),
1717         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1718         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1719         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1720         VMSTATE_END_OF_LIST()
1721     },
1722     .subsections = (const VMStateDescription*[]) {
1723         &vmstate_e1000_mit_state,
1724         &vmstate_e1000_full_mac_state,
1725         NULL
1726     }
1727 };
1728 
1729 /*
1730  * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1731  * Note: A valid DevId will be inserted during pci_e1000_init().
1732  */
1733 static const uint16_t e1000_eeprom_template[64] = {
1734     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1735     0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1736     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1737     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1738     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1739     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1740     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1741     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1742 };
1743 
1744 /* PCI interface */
1745 
1746 static void
1747 e1000_mmio_setup(E1000State *d)
1748 {
1749     int i;
1750     const uint32_t excluded_regs[] = {
1751         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1752         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1753     };
1754 
1755     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1756                           "e1000-mmio", PNPMMIO_SIZE);
1757     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1758     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1759         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1760                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1761     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1762 }
1763 
1764 static void
1765 pci_e1000_uninit(PCIDevice *dev)
1766 {
1767     E1000State *d = E1000(dev);
1768 
1769     timer_del(d->autoneg_timer);
1770     timer_free(d->autoneg_timer);
1771     timer_del(d->mit_timer);
1772     timer_free(d->mit_timer);
1773     qemu_del_nic(d->nic);
1774 }
1775 
1776 static NetClientInfo net_e1000_info = {
1777     .type = NET_CLIENT_OPTIONS_KIND_NIC,
1778     .size = sizeof(NICState),
1779     .can_receive = e1000_can_receive,
1780     .receive = e1000_receive,
1781     .receive_iov = e1000_receive_iov,
1782     .link_status_changed = e1000_set_link_status,
1783 };
1784 
1785 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1786                                 uint32_t val, int len)
1787 {
1788     E1000State *s = E1000(pci_dev);
1789 
1790     pci_default_write_config(pci_dev, address, val, len);
1791 
1792     if (range_covers_byte(address, len, PCI_COMMAND) &&
1793         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1794         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1795     }
1796 }
1797 
1798 
1799 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1800 {
1801     DeviceState *dev = DEVICE(pci_dev);
1802     E1000State *d = E1000(pci_dev);
1803     PCIDeviceClass *pdc = PCI_DEVICE_GET_CLASS(pci_dev);
1804     uint8_t *pci_conf;
1805     uint16_t checksum = 0;
1806     int i;
1807     uint8_t *macaddr;
1808 
1809     pci_dev->config_write = e1000_write_config;
1810 
1811     pci_conf = pci_dev->config;
1812 
1813     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1814     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1815 
1816     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1817 
1818     e1000_mmio_setup(d);
1819 
1820     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1821 
1822     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1823 
1824     memmove(d->eeprom_data, e1000_eeprom_template,
1825         sizeof e1000_eeprom_template);
1826     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1827     macaddr = d->conf.macaddr.a;
1828     for (i = 0; i < 3; i++)
1829         d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i];
1830     d->eeprom_data[11] = d->eeprom_data[13] = pdc->device_id;
1831     for (i = 0; i < EEPROM_CHECKSUM_REG; i++)
1832         checksum += d->eeprom_data[i];
1833     checksum = (uint16_t) EEPROM_SUM - checksum;
1834     d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum;
1835 
1836     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1837                           object_get_typename(OBJECT(d)), dev->id, d);
1838 
1839     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1840 
1841     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1842     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1843 }
1844 
1845 static void qdev_e1000_reset(DeviceState *dev)
1846 {
1847     E1000State *d = E1000(dev);
1848     e1000_reset(d);
1849 }
1850 
1851 static Property e1000_properties[] = {
1852     DEFINE_NIC_PROPERTIES(E1000State, conf),
1853     DEFINE_PROP_BIT("autonegotiation", E1000State,
1854                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1855     DEFINE_PROP_BIT("mitigation", E1000State,
1856                     compat_flags, E1000_FLAG_MIT_BIT, true),
1857     DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1858                     compat_flags, E1000_FLAG_MAC_BIT, true),
1859     DEFINE_PROP_END_OF_LIST(),
1860 };
1861 
1862 typedef struct E1000Info {
1863     const char *name;
1864     uint16_t   device_id;
1865     uint8_t    revision;
1866     uint16_t   phy_id2;
1867 } E1000Info;
1868 
1869 static void e1000_class_init(ObjectClass *klass, void *data)
1870 {
1871     DeviceClass *dc = DEVICE_CLASS(klass);
1872     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1873     E1000BaseClass *e = E1000_DEVICE_CLASS(klass);
1874     const E1000Info *info = data;
1875 
1876     k->realize = pci_e1000_realize;
1877     k->exit = pci_e1000_uninit;
1878     k->romfile = "efi-e1000.rom";
1879     k->vendor_id = PCI_VENDOR_ID_INTEL;
1880     k->device_id = info->device_id;
1881     k->revision = info->revision;
1882     e->phy_id2 = info->phy_id2;
1883     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1884     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1885     dc->desc = "Intel Gigabit Ethernet";
1886     dc->reset = qdev_e1000_reset;
1887     dc->vmsd = &vmstate_e1000;
1888     dc->props = e1000_properties;
1889 }
1890 
1891 static void e1000_instance_init(Object *obj)
1892 {
1893     E1000State *n = E1000(obj);
1894     device_add_bootindex_property(obj, &n->conf.bootindex,
1895                                   "bootindex", "/ethernet-phy@0",
1896                                   DEVICE(n), NULL);
1897 }
1898 
1899 static const TypeInfo e1000_base_info = {
1900     .name          = TYPE_E1000_BASE,
1901     .parent        = TYPE_PCI_DEVICE,
1902     .instance_size = sizeof(E1000State),
1903     .instance_init = e1000_instance_init,
1904     .class_size    = sizeof(E1000BaseClass),
1905     .abstract      = true,
1906 };
1907 
1908 static const E1000Info e1000_devices[] = {
1909     {
1910         .name      = "e1000",
1911         .device_id = E1000_DEV_ID_82540EM,
1912         .revision  = 0x03,
1913         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1914     },
1915     {
1916         .name      = "e1000-82544gc",
1917         .device_id = E1000_DEV_ID_82544GC_COPPER,
1918         .revision  = 0x03,
1919         .phy_id2   = E1000_PHY_ID2_82544x,
1920     },
1921     {
1922         .name      = "e1000-82545em",
1923         .device_id = E1000_DEV_ID_82545EM_COPPER,
1924         .revision  = 0x03,
1925         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1926     },
1927 };
1928 
1929 static void e1000_register_types(void)
1930 {
1931     int i;
1932 
1933     type_register_static(&e1000_base_info);
1934     for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1935         const E1000Info *info = &e1000_devices[i];
1936         TypeInfo type_info = {};
1937 
1938         type_info.name = info->name;
1939         type_info.parent = TYPE_E1000_BASE;
1940         type_info.class_data = (void *)info;
1941         type_info.class_init = e1000_class_init;
1942         type_info.instance_init = e1000_instance_init;
1943 
1944         type_register(&type_info);
1945     }
1946 }
1947 
1948 type_init(e1000_register_types)
1949