xref: /openbmc/qemu/hw/net/e1000.c (revision 503bb0b9)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "qemu/osdep.h"
29 #include "hw/hw.h"
30 #include "hw/pci/pci.h"
31 #include "net/net.h"
32 #include "net/checksum.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/dma.h"
35 #include "qemu/iov.h"
36 #include "qemu/range.h"
37 
38 #include "e1000x_common.h"
39 #include "trace.h"
40 
41 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
42 
43 /* #define E1000_DEBUG */
44 
45 #ifdef E1000_DEBUG
46 enum {
47     DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
48     DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
49     DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
50     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
51 };
52 #define DBGBIT(x)    (1<<DEBUG_##x)
53 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
54 
55 #define DBGOUT(what, fmt, ...) do { \
56     if (debugflags & DBGBIT(what)) \
57         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
58     } while (0)
59 #else
60 #define DBGOUT(what, fmt, ...) do {} while (0)
61 #endif
62 
63 #define IOPORT_SIZE       0x40
64 #define PNPMMIO_SIZE      0x20000
65 #define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
66 
67 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
68 
69 /*
70  * HW models:
71  *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
72  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
73  *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
74  *  Others never tested
75  */
76 
77 typedef struct E1000State_st {
78     /*< private >*/
79     PCIDevice parent_obj;
80     /*< public >*/
81 
82     NICState *nic;
83     NICConf conf;
84     MemoryRegion mmio;
85     MemoryRegion io;
86 
87     uint32_t mac_reg[0x8000];
88     uint16_t phy_reg[0x20];
89     uint16_t eeprom_data[64];
90 
91     uint32_t rxbuf_size;
92     uint32_t rxbuf_min_shift;
93     struct e1000_tx {
94         unsigned char header[256];
95         unsigned char vlan_header[4];
96         /* Fields vlan and data must not be reordered or separated. */
97         unsigned char vlan[4];
98         unsigned char data[0x10000];
99         uint16_t size;
100         unsigned char vlan_needed;
101         unsigned char sum_needed;
102         bool cptse;
103         e1000x_txd_props props;
104         e1000x_txd_props tso_props;
105         uint16_t tso_frames;
106     } tx;
107 
108     struct {
109         uint32_t val_in;    /* shifted in from guest driver */
110         uint16_t bitnum_in;
111         uint16_t bitnum_out;
112         uint16_t reading;
113         uint32_t old_eecd;
114     } eecd_state;
115 
116     QEMUTimer *autoneg_timer;
117 
118     QEMUTimer *mit_timer;      /* Mitigation timer. */
119     bool mit_timer_on;         /* Mitigation timer is running. */
120     bool mit_irq_level;        /* Tracks interrupt pin level. */
121     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
122 
123 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
124 #define E1000_FLAG_AUTONEG_BIT 0
125 #define E1000_FLAG_MIT_BIT 1
126 #define E1000_FLAG_MAC_BIT 2
127 #define E1000_FLAG_TSO_BIT 3
128 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
129 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
130 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
131 #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
132     uint32_t compat_flags;
133     bool received_tx_tso;
134     bool use_tso_for_migration;
135     e1000x_txd_props mig_props;
136 } E1000State;
137 
138 #define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
139 
140 typedef struct E1000BaseClass {
141     PCIDeviceClass parent_class;
142     uint16_t phy_id2;
143 } E1000BaseClass;
144 
145 #define TYPE_E1000_BASE "e1000-base"
146 
147 #define E1000(obj) \
148     OBJECT_CHECK(E1000State, (obj), TYPE_E1000_BASE)
149 
150 #define E1000_DEVICE_CLASS(klass) \
151      OBJECT_CLASS_CHECK(E1000BaseClass, (klass), TYPE_E1000_BASE)
152 #define E1000_DEVICE_GET_CLASS(obj) \
153     OBJECT_GET_CLASS(E1000BaseClass, (obj), TYPE_E1000_BASE)
154 
155 static void
156 e1000_link_up(E1000State *s)
157 {
158     e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg);
159 
160     /* E1000_STATUS_LU is tested by e1000_can_receive() */
161     qemu_flush_queued_packets(qemu_get_queue(s->nic));
162 }
163 
164 static void
165 e1000_autoneg_done(E1000State *s)
166 {
167     e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg);
168 
169     /* E1000_STATUS_LU is tested by e1000_can_receive() */
170     qemu_flush_queued_packets(qemu_get_queue(s->nic));
171 }
172 
173 static bool
174 have_autoneg(E1000State *s)
175 {
176     return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
177 }
178 
179 static void
180 set_phy_ctrl(E1000State *s, int index, uint16_t val)
181 {
182     /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
183     s->phy_reg[PHY_CTRL] = val & ~(0x3f |
184                                    MII_CR_RESET |
185                                    MII_CR_RESTART_AUTO_NEG);
186 
187     /*
188      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
189      * migrate during auto negotiation, after migration the link will be
190      * down.
191      */
192     if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
193         e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
194     }
195 }
196 
197 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
198     [PHY_CTRL] = set_phy_ctrl,
199 };
200 
201 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
202 
203 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
204 static const char phy_regcap[0x20] = {
205     [PHY_STATUS]      = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
206     [PHY_ID1]         = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
207     [PHY_CTRL]        = PHY_RW,    [PHY_1000T_CTRL]             = PHY_RW,
208     [PHY_LP_ABILITY]  = PHY_R,     [PHY_1000T_STATUS]           = PHY_R,
209     [PHY_AUTONEG_ADV] = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
210     [PHY_ID2]         = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
211     [PHY_AUTONEG_EXP] = PHY_R,
212 };
213 
214 /* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
215 static const uint16_t phy_reg_init[] = {
216     [PHY_CTRL]   = MII_CR_SPEED_SELECT_MSB |
217                    MII_CR_FULL_DUPLEX |
218                    MII_CR_AUTO_NEG_EN,
219 
220     [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
221                    MII_SR_LINK_STATUS |   /* link initially up */
222                    MII_SR_AUTONEG_CAPS |
223                    /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
224                    MII_SR_PREAMBLE_SUPPRESS |
225                    MII_SR_EXTENDED_STATUS |
226                    MII_SR_10T_HD_CAPS |
227                    MII_SR_10T_FD_CAPS |
228                    MII_SR_100X_HD_CAPS |
229                    MII_SR_100X_FD_CAPS,
230 
231     [PHY_ID1] = 0x141,
232     /* [PHY_ID2] configured per DevId, from e1000_reset() */
233     [PHY_AUTONEG_ADV] = 0xde1,
234     [PHY_LP_ABILITY] = 0x1e0,
235     [PHY_1000T_CTRL] = 0x0e00,
236     [PHY_1000T_STATUS] = 0x3c00,
237     [M88E1000_PHY_SPEC_CTRL] = 0x360,
238     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
239     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
240 };
241 
242 static const uint32_t mac_reg_init[] = {
243     [PBA]     = 0x00100030,
244     [LEDCTL]  = 0x602,
245     [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
246                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
247     [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
248                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
249                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
250                 E1000_STATUS_LU,
251     [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
252                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
253                 E1000_MANC_RMCP_EN,
254 };
255 
256 /* Helper function, *curr == 0 means the value is not set */
257 static inline void
258 mit_update_delay(uint32_t *curr, uint32_t value)
259 {
260     if (value && (*curr == 0 || value < *curr)) {
261         *curr = value;
262     }
263 }
264 
265 static void
266 set_interrupt_cause(E1000State *s, int index, uint32_t val)
267 {
268     PCIDevice *d = PCI_DEVICE(s);
269     uint32_t pending_ints;
270     uint32_t mit_delay;
271 
272     s->mac_reg[ICR] = val;
273 
274     /*
275      * Make sure ICR and ICS registers have the same value.
276      * The spec says that the ICS register is write-only.  However in practice,
277      * on real hardware ICS is readable, and for reads it has the same value as
278      * ICR (except that ICS does not have the clear on read behaviour of ICR).
279      *
280      * The VxWorks PRO/1000 driver uses this behaviour.
281      */
282     s->mac_reg[ICS] = val;
283 
284     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
285     if (!s->mit_irq_level && pending_ints) {
286         /*
287          * Here we detect a potential raising edge. We postpone raising the
288          * interrupt line if we are inside the mitigation delay window
289          * (s->mit_timer_on == 1).
290          * We provide a partial implementation of interrupt mitigation,
291          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
292          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
293          * RADV; relative timers based on TIDV and RDTR are not implemented.
294          */
295         if (s->mit_timer_on) {
296             return;
297         }
298         if (chkflag(MIT)) {
299             /* Compute the next mitigation delay according to pending
300              * interrupts and the current values of RADV (provided
301              * RDTR!=0), TADV and ITR.
302              * Then rearm the timer.
303              */
304             mit_delay = 0;
305             if (s->mit_ide &&
306                     (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
307                 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
308             }
309             if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
310                 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
311             }
312             mit_update_delay(&mit_delay, s->mac_reg[ITR]);
313 
314             /*
315              * According to e1000 SPEC, the Ethernet controller guarantees
316              * a maximum observable interrupt rate of 7813 interrupts/sec.
317              * Thus if mit_delay < 500 then the delay should be set to the
318              * minimum delay possible which is 500.
319              */
320             mit_delay = (mit_delay < 500) ? 500 : mit_delay;
321 
322             s->mit_timer_on = 1;
323             timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
324                       mit_delay * 256);
325             s->mit_ide = 0;
326         }
327     }
328 
329     s->mit_irq_level = (pending_ints != 0);
330     pci_set_irq(d, s->mit_irq_level);
331 }
332 
333 static void
334 e1000_mit_timer(void *opaque)
335 {
336     E1000State *s = opaque;
337 
338     s->mit_timer_on = 0;
339     /* Call set_interrupt_cause to update the irq level (if necessary). */
340     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
341 }
342 
343 static void
344 set_ics(E1000State *s, int index, uint32_t val)
345 {
346     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
347         s->mac_reg[IMS]);
348     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
349 }
350 
351 static void
352 e1000_autoneg_timer(void *opaque)
353 {
354     E1000State *s = opaque;
355     if (!qemu_get_queue(s->nic)->link_down) {
356         e1000_autoneg_done(s);
357         set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
358     }
359 }
360 
361 static void e1000_reset(void *opaque)
362 {
363     E1000State *d = opaque;
364     E1000BaseClass *edc = E1000_DEVICE_GET_CLASS(d);
365     uint8_t *macaddr = d->conf.macaddr.a;
366 
367     timer_del(d->autoneg_timer);
368     timer_del(d->mit_timer);
369     d->mit_timer_on = 0;
370     d->mit_irq_level = 0;
371     d->mit_ide = 0;
372     memset(d->phy_reg, 0, sizeof d->phy_reg);
373     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
374     d->phy_reg[PHY_ID2] = edc->phy_id2;
375     memset(d->mac_reg, 0, sizeof d->mac_reg);
376     memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
377     d->rxbuf_min_shift = 1;
378     memset(&d->tx, 0, sizeof d->tx);
379 
380     if (qemu_get_queue(d->nic)->link_down) {
381         e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg);
382     }
383 
384     e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
385 }
386 
387 static void
388 set_ctrl(E1000State *s, int index, uint32_t val)
389 {
390     /* RST is self clearing */
391     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
392 }
393 
394 static void
395 set_rx_control(E1000State *s, int index, uint32_t val)
396 {
397     s->mac_reg[RCTL] = val;
398     s->rxbuf_size = e1000x_rxbufsize(val);
399     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
400     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
401            s->mac_reg[RCTL]);
402     qemu_flush_queued_packets(qemu_get_queue(s->nic));
403 }
404 
405 static void
406 set_mdic(E1000State *s, int index, uint32_t val)
407 {
408     uint32_t data = val & E1000_MDIC_DATA_MASK;
409     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
410 
411     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
412         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
413     else if (val & E1000_MDIC_OP_READ) {
414         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
415         if (!(phy_regcap[addr] & PHY_R)) {
416             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
417             val |= E1000_MDIC_ERROR;
418         } else
419             val = (val ^ data) | s->phy_reg[addr];
420     } else if (val & E1000_MDIC_OP_WRITE) {
421         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
422         if (!(phy_regcap[addr] & PHY_W)) {
423             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
424             val |= E1000_MDIC_ERROR;
425         } else {
426             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
427                 phyreg_writeops[addr](s, index, data);
428             } else {
429                 s->phy_reg[addr] = data;
430             }
431         }
432     }
433     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
434 
435     if (val & E1000_MDIC_INT_EN) {
436         set_ics(s, 0, E1000_ICR_MDAC);
437     }
438 }
439 
440 static uint32_t
441 get_eecd(E1000State *s, int index)
442 {
443     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
444 
445     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
446            s->eecd_state.bitnum_out, s->eecd_state.reading);
447     if (!s->eecd_state.reading ||
448         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
449           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
450         ret |= E1000_EECD_DO;
451     return ret;
452 }
453 
454 static void
455 set_eecd(E1000State *s, int index, uint32_t val)
456 {
457     uint32_t oldval = s->eecd_state.old_eecd;
458 
459     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
460             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
461     if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
462         return;
463     }
464     if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
465         s->eecd_state.val_in = 0;
466         s->eecd_state.bitnum_in = 0;
467         s->eecd_state.bitnum_out = 0;
468         s->eecd_state.reading = 0;
469     }
470     if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
471         return;
472     }
473     if (!(E1000_EECD_SK & val)) {               /* falling edge */
474         s->eecd_state.bitnum_out++;
475         return;
476     }
477     s->eecd_state.val_in <<= 1;
478     if (val & E1000_EECD_DI)
479         s->eecd_state.val_in |= 1;
480     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
481         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
482         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
483             EEPROM_READ_OPCODE_MICROWIRE);
484     }
485     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
486            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
487            s->eecd_state.reading);
488 }
489 
490 static uint32_t
491 flash_eerd_read(E1000State *s, int x)
492 {
493     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
494 
495     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
496         return (s->mac_reg[EERD]);
497 
498     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
499         return (E1000_EEPROM_RW_REG_DONE | r);
500 
501     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
502            E1000_EEPROM_RW_REG_DONE | r);
503 }
504 
505 static void
506 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
507 {
508     uint32_t sum;
509 
510     if (cse && cse < n)
511         n = cse + 1;
512     if (sloc < n-1) {
513         sum = net_checksum_add(n-css, data+css);
514         stw_be_p(data + sloc, net_checksum_finish_nozero(sum));
515     }
516 }
517 
518 static inline void
519 inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
520 {
521     if (!memcmp(arr, bcast, sizeof bcast)) {
522         e1000x_inc_reg_if_not_full(s->mac_reg, BPTC);
523     } else if (arr[0] & 1) {
524         e1000x_inc_reg_if_not_full(s->mac_reg, MPTC);
525     }
526 }
527 
528 static void
529 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
530 {
531     static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
532                                     PTC1023, PTC1522 };
533 
534     NetClientState *nc = qemu_get_queue(s->nic);
535     if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
536         nc->info->receive(nc, buf, size);
537     } else {
538         qemu_send_packet(nc, buf, size);
539     }
540     inc_tx_bcast_or_mcast_count(s, buf);
541     e1000x_increase_size_stats(s->mac_reg, PTCregs, size);
542 }
543 
544 static void
545 xmit_seg(E1000State *s)
546 {
547     uint16_t len;
548     unsigned int frames = s->tx.tso_frames, css, sofar;
549     struct e1000_tx *tp = &s->tx;
550     struct e1000x_txd_props *props = tp->cptse ? &tp->tso_props : &tp->props;
551 
552     if (tp->cptse) {
553         css = props->ipcss;
554         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
555                frames, tp->size, css);
556         if (props->ip) {    /* IPv4 */
557             stw_be_p(tp->data+css+2, tp->size - css);
558             stw_be_p(tp->data+css+4,
559                      lduw_be_p(tp->data + css + 4) + frames);
560         } else {         /* IPv6 */
561             stw_be_p(tp->data+css+4, tp->size - css);
562         }
563         css = props->tucss;
564         len = tp->size - css;
565         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", props->tcp, css, len);
566         if (props->tcp) {
567             sofar = frames * props->mss;
568             stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
569             if (props->paylen - sofar > props->mss) {
570                 tp->data[css + 13] &= ~9;    /* PSH, FIN */
571             } else if (frames) {
572                 e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
573             }
574         } else {    /* UDP */
575             stw_be_p(tp->data+css+4, len);
576         }
577         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
578             unsigned int phsum;
579             // add pseudo-header length before checksum calculation
580             void *sp = tp->data + props->tucso;
581 
582             phsum = lduw_be_p(sp) + len;
583             phsum = (phsum >> 16) + (phsum & 0xffff);
584             stw_be_p(sp, phsum);
585         }
586         tp->tso_frames++;
587     }
588 
589     if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
590         putsum(tp->data, tp->size, props->tucso, props->tucss, props->tucse);
591     }
592     if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
593         putsum(tp->data, tp->size, props->ipcso, props->ipcss, props->ipcse);
594     }
595     if (tp->vlan_needed) {
596         memmove(tp->vlan, tp->data, 4);
597         memmove(tp->data, tp->data + 4, 8);
598         memcpy(tp->data + 8, tp->vlan_header, 4);
599         e1000_send_packet(s, tp->vlan, tp->size + 4);
600     } else {
601         e1000_send_packet(s, tp->data, tp->size);
602     }
603 
604     e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
605     e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size);
606     s->mac_reg[GPTC] = s->mac_reg[TPT];
607     s->mac_reg[GOTCL] = s->mac_reg[TOTL];
608     s->mac_reg[GOTCH] = s->mac_reg[TOTH];
609 }
610 
611 static void
612 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
613 {
614     PCIDevice *d = PCI_DEVICE(s);
615     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
616     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
617     unsigned int split_size = txd_lower & 0xffff, bytes, sz;
618     unsigned int msh = 0xfffff;
619     uint64_t addr;
620     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
621     struct e1000_tx *tp = &s->tx;
622 
623     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
624     if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
625         if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
626             e1000x_read_tx_ctx_descr(xp, &tp->tso_props);
627             s->use_tso_for_migration = 1;
628             tp->tso_frames = 0;
629         } else {
630             e1000x_read_tx_ctx_descr(xp, &tp->props);
631             s->use_tso_for_migration = 0;
632         }
633         return;
634     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
635         // data descriptor
636         if (tp->size == 0) {
637             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
638         }
639         tp->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
640     } else {
641         // legacy descriptor
642         tp->cptse = 0;
643     }
644 
645     if (e1000x_vlan_enabled(s->mac_reg) &&
646         e1000x_is_vlan_txd(txd_lower) &&
647         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
648         tp->vlan_needed = 1;
649         stw_be_p(tp->vlan_header,
650                       le16_to_cpu(s->mac_reg[VET]));
651         stw_be_p(tp->vlan_header + 2,
652                       le16_to_cpu(dp->upper.fields.special));
653     }
654 
655     addr = le64_to_cpu(dp->buffer_addr);
656     if (tp->cptse) {
657         msh = tp->tso_props.hdr_len + tp->tso_props.mss;
658         do {
659             bytes = split_size;
660             if (tp->size + bytes > msh)
661                 bytes = msh - tp->size;
662 
663             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
664             pci_dma_read(d, addr, tp->data + tp->size, bytes);
665             sz = tp->size + bytes;
666             if (sz >= tp->tso_props.hdr_len
667                 && tp->size < tp->tso_props.hdr_len) {
668                 memmove(tp->header, tp->data, tp->tso_props.hdr_len);
669             }
670             tp->size = sz;
671             addr += bytes;
672             if (sz == msh) {
673                 xmit_seg(s);
674                 memmove(tp->data, tp->header, tp->tso_props.hdr_len);
675                 tp->size = tp->tso_props.hdr_len;
676             }
677             split_size -= bytes;
678         } while (bytes && split_size);
679     } else {
680         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
681         pci_dma_read(d, addr, tp->data + tp->size, split_size);
682         tp->size += split_size;
683     }
684 
685     if (!(txd_lower & E1000_TXD_CMD_EOP))
686         return;
687     if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) {
688         xmit_seg(s);
689     }
690     tp->tso_frames = 0;
691     tp->sum_needed = 0;
692     tp->vlan_needed = 0;
693     tp->size = 0;
694     tp->cptse = 0;
695 }
696 
697 static uint32_t
698 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
699 {
700     PCIDevice *d = PCI_DEVICE(s);
701     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
702 
703     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
704         return 0;
705     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
706                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
707     dp->upper.data = cpu_to_le32(txd_upper);
708     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
709                   &dp->upper, sizeof(dp->upper));
710     return E1000_ICR_TXDW;
711 }
712 
713 static uint64_t tx_desc_base(E1000State *s)
714 {
715     uint64_t bah = s->mac_reg[TDBAH];
716     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
717 
718     return (bah << 32) + bal;
719 }
720 
721 static void
722 start_xmit(E1000State *s)
723 {
724     PCIDevice *d = PCI_DEVICE(s);
725     dma_addr_t base;
726     struct e1000_tx_desc desc;
727     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
728 
729     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
730         DBGOUT(TX, "tx disabled\n");
731         return;
732     }
733 
734     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
735         base = tx_desc_base(s) +
736                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
737         pci_dma_read(d, base, &desc, sizeof(desc));
738 
739         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
740                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
741                desc.upper.data);
742 
743         process_tx_desc(s, &desc);
744         cause |= txdesc_writeback(s, base, &desc);
745 
746         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
747             s->mac_reg[TDH] = 0;
748         /*
749          * the following could happen only if guest sw assigns
750          * bogus values to TDT/TDLEN.
751          * there's nothing too intelligent we could do about this.
752          */
753         if (s->mac_reg[TDH] == tdh_start ||
754             tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
755             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
756                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
757             break;
758         }
759     }
760     set_ics(s, 0, cause);
761 }
762 
763 static int
764 receive_filter(E1000State *s, const uint8_t *buf, int size)
765 {
766     uint32_t rctl = s->mac_reg[RCTL];
767     int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1);
768 
769     if (e1000x_is_vlan_packet(buf, le16_to_cpu(s->mac_reg[VET])) &&
770         e1000x_vlan_rx_filter_enabled(s->mac_reg)) {
771         uint16_t vid = lduw_be_p(buf + 14);
772         uint32_t vfta = ldl_le_p((uint32_t*)(s->mac_reg + VFTA) +
773                                  ((vid >> 5) & 0x7f));
774         if ((vfta & (1 << (vid & 0x1f))) == 0)
775             return 0;
776     }
777 
778     if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
779         return 1;
780     }
781 
782     if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
783         e1000x_inc_reg_if_not_full(s->mac_reg, MPRC);
784         return 1;
785     }
786 
787     if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
788         e1000x_inc_reg_if_not_full(s->mac_reg, BPRC);
789         return 1;
790     }
791 
792     return e1000x_rx_group_filter(s->mac_reg, buf);
793 }
794 
795 static void
796 e1000_set_link_status(NetClientState *nc)
797 {
798     E1000State *s = qemu_get_nic_opaque(nc);
799     uint32_t old_status = s->mac_reg[STATUS];
800 
801     if (nc->link_down) {
802         e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg);
803     } else {
804         if (have_autoneg(s) &&
805             !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
806             e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
807         } else {
808             e1000_link_up(s);
809         }
810     }
811 
812     if (s->mac_reg[STATUS] != old_status)
813         set_ics(s, 0, E1000_ICR_LSC);
814 }
815 
816 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
817 {
818     int bufs;
819     /* Fast-path short packets */
820     if (total_size <= s->rxbuf_size) {
821         return s->mac_reg[RDH] != s->mac_reg[RDT];
822     }
823     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
824         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
825     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
826         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
827             s->mac_reg[RDT] - s->mac_reg[RDH];
828     } else {
829         return false;
830     }
831     return total_size <= bufs * s->rxbuf_size;
832 }
833 
834 static int
835 e1000_can_receive(NetClientState *nc)
836 {
837     E1000State *s = qemu_get_nic_opaque(nc);
838 
839     return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
840         e1000_has_rxbufs(s, 1);
841 }
842 
843 static uint64_t rx_desc_base(E1000State *s)
844 {
845     uint64_t bah = s->mac_reg[RDBAH];
846     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
847 
848     return (bah << 32) + bal;
849 }
850 
851 static void
852 e1000_receiver_overrun(E1000State *s, size_t size)
853 {
854     trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
855     e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
856     e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
857     set_ics(s, 0, E1000_ICS_RXO);
858 }
859 
860 static ssize_t
861 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
862 {
863     E1000State *s = qemu_get_nic_opaque(nc);
864     PCIDevice *d = PCI_DEVICE(s);
865     struct e1000_rx_desc desc;
866     dma_addr_t base;
867     unsigned int n, rdt;
868     uint32_t rdh_start;
869     uint16_t vlan_special = 0;
870     uint8_t vlan_status = 0;
871     uint8_t min_buf[MIN_BUF_SIZE];
872     struct iovec min_iov;
873     uint8_t *filter_buf = iov->iov_base;
874     size_t size = iov_size(iov, iovcnt);
875     size_t iov_ofs = 0;
876     size_t desc_offset;
877     size_t desc_size;
878     size_t total_size;
879 
880     if (!e1000x_hw_rx_enabled(s->mac_reg)) {
881         return -1;
882     }
883 
884     /* Pad to minimum Ethernet frame length */
885     if (size < sizeof(min_buf)) {
886         iov_to_buf(iov, iovcnt, 0, min_buf, size);
887         memset(&min_buf[size], 0, sizeof(min_buf) - size);
888         e1000x_inc_reg_if_not_full(s->mac_reg, RUC);
889         min_iov.iov_base = filter_buf = min_buf;
890         min_iov.iov_len = size = sizeof(min_buf);
891         iovcnt = 1;
892         iov = &min_iov;
893     } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
894         /* This is very unlikely, but may happen. */
895         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
896         filter_buf = min_buf;
897     }
898 
899     /* Discard oversized packets if !LPE and !SBP. */
900     if (e1000x_is_oversized(s->mac_reg, size)) {
901         return size;
902     }
903 
904     if (!receive_filter(s, filter_buf, size)) {
905         return size;
906     }
907 
908     if (e1000x_vlan_enabled(s->mac_reg) &&
909         e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) {
910         vlan_special = cpu_to_le16(lduw_be_p(filter_buf + 14));
911         iov_ofs = 4;
912         if (filter_buf == iov->iov_base) {
913             memmove(filter_buf + 4, filter_buf, 12);
914         } else {
915             iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
916             while (iov->iov_len <= iov_ofs) {
917                 iov_ofs -= iov->iov_len;
918                 iov++;
919             }
920         }
921         vlan_status = E1000_RXD_STAT_VP;
922         size -= 4;
923     }
924 
925     rdh_start = s->mac_reg[RDH];
926     desc_offset = 0;
927     total_size = size + e1000x_fcs_len(s->mac_reg);
928     if (!e1000_has_rxbufs(s, total_size)) {
929         e1000_receiver_overrun(s, total_size);
930         return -1;
931     }
932     do {
933         desc_size = total_size - desc_offset;
934         if (desc_size > s->rxbuf_size) {
935             desc_size = s->rxbuf_size;
936         }
937         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
938         pci_dma_read(d, base, &desc, sizeof(desc));
939         desc.special = vlan_special;
940         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
941         if (desc.buffer_addr) {
942             if (desc_offset < size) {
943                 size_t iov_copy;
944                 hwaddr ba = le64_to_cpu(desc.buffer_addr);
945                 size_t copy_size = size - desc_offset;
946                 if (copy_size > s->rxbuf_size) {
947                     copy_size = s->rxbuf_size;
948                 }
949                 do {
950                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
951                     pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
952                     copy_size -= iov_copy;
953                     ba += iov_copy;
954                     iov_ofs += iov_copy;
955                     if (iov_ofs == iov->iov_len) {
956                         iov++;
957                         iov_ofs = 0;
958                     }
959                 } while (copy_size);
960             }
961             desc_offset += desc_size;
962             desc.length = cpu_to_le16(desc_size);
963             if (desc_offset >= total_size) {
964                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
965             } else {
966                 /* Guest zeroing out status is not a hardware requirement.
967                    Clear EOP in case guest didn't do it. */
968                 desc.status &= ~E1000_RXD_STAT_EOP;
969             }
970         } else { // as per intel docs; skip descriptors with null buf addr
971             DBGOUT(RX, "Null RX descriptor!!\n");
972         }
973         pci_dma_write(d, base, &desc, sizeof(desc));
974 
975         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
976             s->mac_reg[RDH] = 0;
977         /* see comment in start_xmit; same here */
978         if (s->mac_reg[RDH] == rdh_start ||
979             rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
980             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
981                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
982             e1000_receiver_overrun(s, total_size);
983             return -1;
984         }
985     } while (desc_offset < total_size);
986 
987     e1000x_update_rx_total_stats(s->mac_reg, size, total_size);
988 
989     n = E1000_ICS_RXT0;
990     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
991         rdt += s->mac_reg[RDLEN] / sizeof(desc);
992     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
993         s->rxbuf_min_shift)
994         n |= E1000_ICS_RXDMT0;
995 
996     set_ics(s, 0, n);
997 
998     return size;
999 }
1000 
1001 static ssize_t
1002 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1003 {
1004     const struct iovec iov = {
1005         .iov_base = (uint8_t *)buf,
1006         .iov_len = size
1007     };
1008 
1009     return e1000_receive_iov(nc, &iov, 1);
1010 }
1011 
1012 static uint32_t
1013 mac_readreg(E1000State *s, int index)
1014 {
1015     return s->mac_reg[index];
1016 }
1017 
1018 static uint32_t
1019 mac_low4_read(E1000State *s, int index)
1020 {
1021     return s->mac_reg[index] & 0xf;
1022 }
1023 
1024 static uint32_t
1025 mac_low11_read(E1000State *s, int index)
1026 {
1027     return s->mac_reg[index] & 0x7ff;
1028 }
1029 
1030 static uint32_t
1031 mac_low13_read(E1000State *s, int index)
1032 {
1033     return s->mac_reg[index] & 0x1fff;
1034 }
1035 
1036 static uint32_t
1037 mac_low16_read(E1000State *s, int index)
1038 {
1039     return s->mac_reg[index] & 0xffff;
1040 }
1041 
1042 static uint32_t
1043 mac_icr_read(E1000State *s, int index)
1044 {
1045     uint32_t ret = s->mac_reg[ICR];
1046 
1047     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1048     set_interrupt_cause(s, 0, 0);
1049     return ret;
1050 }
1051 
1052 static uint32_t
1053 mac_read_clr4(E1000State *s, int index)
1054 {
1055     uint32_t ret = s->mac_reg[index];
1056 
1057     s->mac_reg[index] = 0;
1058     return ret;
1059 }
1060 
1061 static uint32_t
1062 mac_read_clr8(E1000State *s, int index)
1063 {
1064     uint32_t ret = s->mac_reg[index];
1065 
1066     s->mac_reg[index] = 0;
1067     s->mac_reg[index-1] = 0;
1068     return ret;
1069 }
1070 
1071 static void
1072 mac_writereg(E1000State *s, int index, uint32_t val)
1073 {
1074     uint32_t macaddr[2];
1075 
1076     s->mac_reg[index] = val;
1077 
1078     if (index == RA + 1) {
1079         macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1080         macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1081         qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1082     }
1083 }
1084 
1085 static void
1086 set_rdt(E1000State *s, int index, uint32_t val)
1087 {
1088     s->mac_reg[index] = val & 0xffff;
1089     if (e1000_has_rxbufs(s, 1)) {
1090         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1091     }
1092 }
1093 
1094 static void
1095 set_16bit(E1000State *s, int index, uint32_t val)
1096 {
1097     s->mac_reg[index] = val & 0xffff;
1098 }
1099 
1100 static void
1101 set_dlen(E1000State *s, int index, uint32_t val)
1102 {
1103     s->mac_reg[index] = val & 0xfff80;
1104 }
1105 
1106 static void
1107 set_tctl(E1000State *s, int index, uint32_t val)
1108 {
1109     s->mac_reg[index] = val;
1110     s->mac_reg[TDT] &= 0xffff;
1111     start_xmit(s);
1112 }
1113 
1114 static void
1115 set_icr(E1000State *s, int index, uint32_t val)
1116 {
1117     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1118     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1119 }
1120 
1121 static void
1122 set_imc(E1000State *s, int index, uint32_t val)
1123 {
1124     s->mac_reg[IMS] &= ~val;
1125     set_ics(s, 0, 0);
1126 }
1127 
1128 static void
1129 set_ims(E1000State *s, int index, uint32_t val)
1130 {
1131     s->mac_reg[IMS] |= val;
1132     set_ics(s, 0, 0);
1133 }
1134 
1135 #define getreg(x)    [x] = mac_readreg
1136 static uint32_t (*macreg_readops[])(E1000State *, int) = {
1137     getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1138     getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1139     getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1140     getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1141     getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1142     getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1143     getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1144     getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1145     getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1146     getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1147     getreg(TNCRS),    getreg(SEQEC),    getreg(CEXTERR),  getreg(RLEC),
1148     getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1149     getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1150     getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1151     getreg(GOTCL),
1152 
1153     [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1154     [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1155     [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1156     [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1157     [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1158     [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1159     [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1160     [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1161     [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1162     [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1163     [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1164     [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1165     [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1166     [MPTC]    = mac_read_clr4,
1167     [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1168     [EERD]    = flash_eerd_read,
1169     [RDFH]    = mac_low13_read,     [RDFT]    = mac_low13_read,
1170     [RDFHS]   = mac_low13_read,     [RDFTS]   = mac_low13_read,
1171     [RDFPC]   = mac_low13_read,
1172     [TDFH]    = mac_low11_read,     [TDFT]    = mac_low11_read,
1173     [TDFHS]   = mac_low13_read,     [TDFTS]   = mac_low13_read,
1174     [TDFPC]   = mac_low13_read,
1175     [AIT]     = mac_low16_read,
1176 
1177     [CRCERRS ... MPC]   = &mac_readreg,
1178     [IP6AT ... IP6AT+3] = &mac_readreg,    [IP4AT ... IP4AT+6] = &mac_readreg,
1179     [FFLT ... FFLT+6]   = &mac_low11_read,
1180     [RA ... RA+31]      = &mac_readreg,
1181     [WUPM ... WUPM+31]  = &mac_readreg,
1182     [MTA ... MTA+127]   = &mac_readreg,
1183     [VFTA ... VFTA+127] = &mac_readreg,
1184     [FFMT ... FFMT+254] = &mac_low4_read,
1185     [FFVT ... FFVT+254] = &mac_readreg,
1186     [PBM ... PBM+16383] = &mac_readreg,
1187 };
1188 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1189 
1190 #define putreg(x)    [x] = mac_writereg
1191 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1192     putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1193     putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1194     putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1195     putreg(TDFH),     putreg(TDFT),     putreg(TDFHS),    putreg(TDFTS),
1196     putreg(TDFPC),    putreg(RDFH),     putreg(RDFT),     putreg(RDFHS),
1197     putreg(RDFTS),    putreg(RDFPC),    putreg(IPAV),     putreg(WUC),
1198     putreg(WUS),      putreg(AIT),
1199 
1200     [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL] = set_tctl,
1201     [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]  = set_ics,
1202     [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]  = set_rdt,
1203     [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]  = set_icr,
1204     [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL] = set_ctrl,
1205     [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV] = set_16bit,
1206     [ITR]    = set_16bit,
1207 
1208     [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg,
1209     [FFLT ... FFLT+6]   = &mac_writereg,
1210     [RA ... RA+31]      = &mac_writereg,
1211     [WUPM ... WUPM+31]  = &mac_writereg,
1212     [MTA ... MTA+127]   = &mac_writereg,
1213     [VFTA ... VFTA+127] = &mac_writereg,
1214     [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg,
1215     [PBM ... PBM+16383] = &mac_writereg,
1216 };
1217 
1218 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1219 
1220 enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1221 
1222 #define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1223 /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1224  * f - flag bits (up to 6 possible flags)
1225  * n - flag needed
1226  * p - partially implenented */
1227 static const uint8_t mac_reg_access[0x8000] = {
1228     [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1229     [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1230 
1231     [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1232     [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1233     [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1234     [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1235     [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1236     [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1237     [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1238     [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1239     [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1240     [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1241     [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1242     [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1243     [SEQEC]   = markflag(MAC),    [CEXTERR] = markflag(MAC),
1244     [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1245     [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1246     [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1247     [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1248     [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1249     [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1250     [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1251     [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1252     [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1253     [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1254     [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1255     [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1256     [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1257     [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1258     [BPTC]    = markflag(MAC),
1259 
1260     [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1261     [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1262     [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1263     [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1264     [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1265     [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1266     [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1267     [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1268     [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1269     [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1270     [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1271 };
1272 
1273 static void
1274 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1275                  unsigned size)
1276 {
1277     E1000State *s = opaque;
1278     unsigned int index = (addr & 0x1ffff) >> 2;
1279 
1280     if (index < NWRITEOPS && macreg_writeops[index]) {
1281         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1282             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1283             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1284                 DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1285                        "It is not fully implemented.\n", index<<2);
1286             }
1287             macreg_writeops[index](s, index, val);
1288         } else {    /* "flag needed" bit is set, but the flag is not active */
1289             DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1290                    index<<2);
1291         }
1292     } else if (index < NREADOPS && macreg_readops[index]) {
1293         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1294                index<<2, val);
1295     } else {
1296         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1297                index<<2, val);
1298     }
1299 }
1300 
1301 static uint64_t
1302 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1303 {
1304     E1000State *s = opaque;
1305     unsigned int index = (addr & 0x1ffff) >> 2;
1306 
1307     if (index < NREADOPS && macreg_readops[index]) {
1308         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1309             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1310             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1311                 DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1312                        "It is not fully implemented.\n", index<<2);
1313             }
1314             return macreg_readops[index](s, index);
1315         } else {    /* "flag needed" bit is set, but the flag is not active */
1316             DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1317                    index<<2);
1318         }
1319     } else {
1320         DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1321     }
1322     return 0;
1323 }
1324 
1325 static const MemoryRegionOps e1000_mmio_ops = {
1326     .read = e1000_mmio_read,
1327     .write = e1000_mmio_write,
1328     .endianness = DEVICE_LITTLE_ENDIAN,
1329     .impl = {
1330         .min_access_size = 4,
1331         .max_access_size = 4,
1332     },
1333 };
1334 
1335 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1336                               unsigned size)
1337 {
1338     E1000State *s = opaque;
1339 
1340     (void)s;
1341     return 0;
1342 }
1343 
1344 static void e1000_io_write(void *opaque, hwaddr addr,
1345                            uint64_t val, unsigned size)
1346 {
1347     E1000State *s = opaque;
1348 
1349     (void)s;
1350 }
1351 
1352 static const MemoryRegionOps e1000_io_ops = {
1353     .read = e1000_io_read,
1354     .write = e1000_io_write,
1355     .endianness = DEVICE_LITTLE_ENDIAN,
1356 };
1357 
1358 static bool is_version_1(void *opaque, int version_id)
1359 {
1360     return version_id == 1;
1361 }
1362 
1363 static int e1000_pre_save(void *opaque)
1364 {
1365     E1000State *s = opaque;
1366     NetClientState *nc = qemu_get_queue(s->nic);
1367 
1368     /* If the mitigation timer is active, emulate a timeout now. */
1369     if (s->mit_timer_on) {
1370         e1000_mit_timer(s);
1371     }
1372 
1373     /*
1374      * If link is down and auto-negotiation is supported and ongoing,
1375      * complete auto-negotiation immediately. This allows us to look
1376      * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1377      */
1378     if (nc->link_down && have_autoneg(s)) {
1379         s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1380     }
1381 
1382     /* Decide which set of props to migrate in the main structure */
1383     if (chkflag(TSO) || !s->use_tso_for_migration) {
1384         /* Either we're migrating with the extra subsection, in which
1385          * case the mig_props is always 'props' OR
1386          * we've not got the subsection, but 'props' was the last
1387          * updated.
1388          */
1389         s->mig_props = s->tx.props;
1390     } else {
1391         /* We're not using the subsection, and 'tso_props' was
1392          * the last updated.
1393          */
1394         s->mig_props = s->tx.tso_props;
1395     }
1396     return 0;
1397 }
1398 
1399 static int e1000_post_load(void *opaque, int version_id)
1400 {
1401     E1000State *s = opaque;
1402     NetClientState *nc = qemu_get_queue(s->nic);
1403 
1404     if (!chkflag(MIT)) {
1405         s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1406             s->mac_reg[TADV] = 0;
1407         s->mit_irq_level = false;
1408     }
1409     s->mit_ide = 0;
1410     s->mit_timer_on = false;
1411 
1412     /* nc.link_down can't be migrated, so infer link_down according
1413      * to link status bit in mac_reg[STATUS].
1414      * Alternatively, restart link negotiation if it was in progress. */
1415     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1416 
1417     if (have_autoneg(s) &&
1418         !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1419         nc->link_down = false;
1420         timer_mod(s->autoneg_timer,
1421                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1422     }
1423 
1424     s->tx.props = s->mig_props;
1425     if (!s->received_tx_tso) {
1426         /* We received only one set of offload data (tx.props)
1427          * and haven't got tx.tso_props.  The best we can do
1428          * is dupe the data.
1429          */
1430         s->tx.tso_props = s->mig_props;
1431     }
1432     return 0;
1433 }
1434 
1435 static int e1000_tx_tso_post_load(void *opaque, int version_id)
1436 {
1437     E1000State *s = opaque;
1438     s->received_tx_tso = true;
1439     return 0;
1440 }
1441 
1442 static bool e1000_mit_state_needed(void *opaque)
1443 {
1444     E1000State *s = opaque;
1445 
1446     return chkflag(MIT);
1447 }
1448 
1449 static bool e1000_full_mac_needed(void *opaque)
1450 {
1451     E1000State *s = opaque;
1452 
1453     return chkflag(MAC);
1454 }
1455 
1456 static bool e1000_tso_state_needed(void *opaque)
1457 {
1458     E1000State *s = opaque;
1459 
1460     return chkflag(TSO);
1461 }
1462 
1463 static const VMStateDescription vmstate_e1000_mit_state = {
1464     .name = "e1000/mit_state",
1465     .version_id = 1,
1466     .minimum_version_id = 1,
1467     .needed = e1000_mit_state_needed,
1468     .fields = (VMStateField[]) {
1469         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1470         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1471         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1472         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1473         VMSTATE_BOOL(mit_irq_level, E1000State),
1474         VMSTATE_END_OF_LIST()
1475     }
1476 };
1477 
1478 static const VMStateDescription vmstate_e1000_full_mac_state = {
1479     .name = "e1000/full_mac_state",
1480     .version_id = 1,
1481     .minimum_version_id = 1,
1482     .needed = e1000_full_mac_needed,
1483     .fields = (VMStateField[]) {
1484         VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1485         VMSTATE_END_OF_LIST()
1486     }
1487 };
1488 
1489 static const VMStateDescription vmstate_e1000_tx_tso_state = {
1490     .name = "e1000/tx_tso_state",
1491     .version_id = 1,
1492     .minimum_version_id = 1,
1493     .needed = e1000_tso_state_needed,
1494     .post_load = e1000_tx_tso_post_load,
1495     .fields = (VMStateField[]) {
1496         VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
1497         VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
1498         VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
1499         VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
1500         VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
1501         VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
1502         VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
1503         VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
1504         VMSTATE_UINT16(tx.tso_props.mss, E1000State),
1505         VMSTATE_INT8(tx.tso_props.ip, E1000State),
1506         VMSTATE_INT8(tx.tso_props.tcp, E1000State),
1507         VMSTATE_END_OF_LIST()
1508     }
1509 };
1510 
1511 static const VMStateDescription vmstate_e1000 = {
1512     .name = "e1000",
1513     .version_id = 2,
1514     .minimum_version_id = 1,
1515     .pre_save = e1000_pre_save,
1516     .post_load = e1000_post_load,
1517     .fields = (VMStateField[]) {
1518         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1519         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1520         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1521         VMSTATE_UINT32(rxbuf_size, E1000State),
1522         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1523         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1524         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1525         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1526         VMSTATE_UINT16(eecd_state.reading, E1000State),
1527         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1528         VMSTATE_UINT8(mig_props.ipcss, E1000State),
1529         VMSTATE_UINT8(mig_props.ipcso, E1000State),
1530         VMSTATE_UINT16(mig_props.ipcse, E1000State),
1531         VMSTATE_UINT8(mig_props.tucss, E1000State),
1532         VMSTATE_UINT8(mig_props.tucso, E1000State),
1533         VMSTATE_UINT16(mig_props.tucse, E1000State),
1534         VMSTATE_UINT32(mig_props.paylen, E1000State),
1535         VMSTATE_UINT8(mig_props.hdr_len, E1000State),
1536         VMSTATE_UINT16(mig_props.mss, E1000State),
1537         VMSTATE_UINT16(tx.size, E1000State),
1538         VMSTATE_UINT16(tx.tso_frames, E1000State),
1539         VMSTATE_UINT8(tx.sum_needed, E1000State),
1540         VMSTATE_INT8(mig_props.ip, E1000State),
1541         VMSTATE_INT8(mig_props.tcp, E1000State),
1542         VMSTATE_BUFFER(tx.header, E1000State),
1543         VMSTATE_BUFFER(tx.data, E1000State),
1544         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1545         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1546         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1547         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1548         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1549         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1550         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1551         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1552         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1553         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1554         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1555         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1556         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1557         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1558         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1559         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1560         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1561         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1562         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1563         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1564         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1565         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1566         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1567         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1568         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1569         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1570         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1571         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1572         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1573         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1574         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1575         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1576         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1577         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1578         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1579         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1580         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1581         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1582         VMSTATE_UINT32(mac_reg[VET], E1000State),
1583         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1584         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1585         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1586         VMSTATE_END_OF_LIST()
1587     },
1588     .subsections = (const VMStateDescription*[]) {
1589         &vmstate_e1000_mit_state,
1590         &vmstate_e1000_full_mac_state,
1591         &vmstate_e1000_tx_tso_state,
1592         NULL
1593     }
1594 };
1595 
1596 /*
1597  * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1598  * Note: A valid DevId will be inserted during pci_e1000_init().
1599  */
1600 static const uint16_t e1000_eeprom_template[64] = {
1601     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1602     0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1603     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1604     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1605     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1606     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1607     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1608     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1609 };
1610 
1611 /* PCI interface */
1612 
1613 static void
1614 e1000_mmio_setup(E1000State *d)
1615 {
1616     int i;
1617     const uint32_t excluded_regs[] = {
1618         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1619         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1620     };
1621 
1622     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1623                           "e1000-mmio", PNPMMIO_SIZE);
1624     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1625     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1626         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1627                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1628     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1629 }
1630 
1631 static void
1632 pci_e1000_uninit(PCIDevice *dev)
1633 {
1634     E1000State *d = E1000(dev);
1635 
1636     timer_del(d->autoneg_timer);
1637     timer_free(d->autoneg_timer);
1638     timer_del(d->mit_timer);
1639     timer_free(d->mit_timer);
1640     qemu_del_nic(d->nic);
1641 }
1642 
1643 static NetClientInfo net_e1000_info = {
1644     .type = NET_CLIENT_DRIVER_NIC,
1645     .size = sizeof(NICState),
1646     .can_receive = e1000_can_receive,
1647     .receive = e1000_receive,
1648     .receive_iov = e1000_receive_iov,
1649     .link_status_changed = e1000_set_link_status,
1650 };
1651 
1652 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1653                                 uint32_t val, int len)
1654 {
1655     E1000State *s = E1000(pci_dev);
1656 
1657     pci_default_write_config(pci_dev, address, val, len);
1658 
1659     if (range_covers_byte(address, len, PCI_COMMAND) &&
1660         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1661         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1662     }
1663 }
1664 
1665 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1666 {
1667     DeviceState *dev = DEVICE(pci_dev);
1668     E1000State *d = E1000(pci_dev);
1669     uint8_t *pci_conf;
1670     uint8_t *macaddr;
1671 
1672     pci_dev->config_write = e1000_write_config;
1673 
1674     pci_conf = pci_dev->config;
1675 
1676     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1677     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1678 
1679     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1680 
1681     e1000_mmio_setup(d);
1682 
1683     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1684 
1685     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1686 
1687     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1688     macaddr = d->conf.macaddr.a;
1689 
1690     e1000x_core_prepare_eeprom(d->eeprom_data,
1691                                e1000_eeprom_template,
1692                                sizeof(e1000_eeprom_template),
1693                                PCI_DEVICE_GET_CLASS(pci_dev)->device_id,
1694                                macaddr);
1695 
1696     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1697                           object_get_typename(OBJECT(d)), dev->id, d);
1698 
1699     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1700 
1701     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1702     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1703 }
1704 
1705 static void qdev_e1000_reset(DeviceState *dev)
1706 {
1707     E1000State *d = E1000(dev);
1708     e1000_reset(d);
1709 }
1710 
1711 static Property e1000_properties[] = {
1712     DEFINE_NIC_PROPERTIES(E1000State, conf),
1713     DEFINE_PROP_BIT("autonegotiation", E1000State,
1714                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1715     DEFINE_PROP_BIT("mitigation", E1000State,
1716                     compat_flags, E1000_FLAG_MIT_BIT, true),
1717     DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1718                     compat_flags, E1000_FLAG_MAC_BIT, true),
1719     DEFINE_PROP_BIT("migrate_tso_props", E1000State,
1720                     compat_flags, E1000_FLAG_TSO_BIT, true),
1721     DEFINE_PROP_END_OF_LIST(),
1722 };
1723 
1724 typedef struct E1000Info {
1725     const char *name;
1726     uint16_t   device_id;
1727     uint8_t    revision;
1728     uint16_t   phy_id2;
1729 } E1000Info;
1730 
1731 static void e1000_class_init(ObjectClass *klass, void *data)
1732 {
1733     DeviceClass *dc = DEVICE_CLASS(klass);
1734     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1735     E1000BaseClass *e = E1000_DEVICE_CLASS(klass);
1736     const E1000Info *info = data;
1737 
1738     k->realize = pci_e1000_realize;
1739     k->exit = pci_e1000_uninit;
1740     k->romfile = "efi-e1000.rom";
1741     k->vendor_id = PCI_VENDOR_ID_INTEL;
1742     k->device_id = info->device_id;
1743     k->revision = info->revision;
1744     e->phy_id2 = info->phy_id2;
1745     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1746     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1747     dc->desc = "Intel Gigabit Ethernet";
1748     dc->reset = qdev_e1000_reset;
1749     dc->vmsd = &vmstate_e1000;
1750     dc->props = e1000_properties;
1751 }
1752 
1753 static void e1000_instance_init(Object *obj)
1754 {
1755     E1000State *n = E1000(obj);
1756     device_add_bootindex_property(obj, &n->conf.bootindex,
1757                                   "bootindex", "/ethernet-phy@0",
1758                                   DEVICE(n), NULL);
1759 }
1760 
1761 static const TypeInfo e1000_base_info = {
1762     .name          = TYPE_E1000_BASE,
1763     .parent        = TYPE_PCI_DEVICE,
1764     .instance_size = sizeof(E1000State),
1765     .instance_init = e1000_instance_init,
1766     .class_size    = sizeof(E1000BaseClass),
1767     .abstract      = true,
1768     .interfaces = (InterfaceInfo[]) {
1769         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1770         { },
1771     },
1772 };
1773 
1774 static const E1000Info e1000_devices[] = {
1775     {
1776         .name      = "e1000",
1777         .device_id = E1000_DEV_ID_82540EM,
1778         .revision  = 0x03,
1779         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1780     },
1781     {
1782         .name      = "e1000-82544gc",
1783         .device_id = E1000_DEV_ID_82544GC_COPPER,
1784         .revision  = 0x03,
1785         .phy_id2   = E1000_PHY_ID2_82544x,
1786     },
1787     {
1788         .name      = "e1000-82545em",
1789         .device_id = E1000_DEV_ID_82545EM_COPPER,
1790         .revision  = 0x03,
1791         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1792     },
1793 };
1794 
1795 static void e1000_register_types(void)
1796 {
1797     int i;
1798 
1799     type_register_static(&e1000_base_info);
1800     for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1801         const E1000Info *info = &e1000_devices[i];
1802         TypeInfo type_info = {};
1803 
1804         type_info.name = info->name;
1805         type_info.parent = TYPE_E1000_BASE;
1806         type_info.class_data = (void *)info;
1807         type_info.class_init = e1000_class_init;
1808         type_info.instance_init = e1000_instance_init;
1809 
1810         type_register(&type_info);
1811     }
1812 }
1813 
1814 type_init(e1000_register_types)
1815