xref: /openbmc/qemu/hw/net/e1000.c (revision ed5abf46)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "qemu/osdep.h"
29 #include "hw/pci/pci.h"
30 #include "hw/qdev-properties.h"
31 #include "migration/vmstate.h"
32 #include "net/net.h"
33 #include "net/checksum.h"
34 #include "sysemu/sysemu.h"
35 #include "sysemu/dma.h"
36 #include "qemu/iov.h"
37 #include "qemu/module.h"
38 #include "qemu/range.h"
39 
40 #include "e1000x_common.h"
41 #include "trace.h"
42 
43 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
44 
45 /* #define E1000_DEBUG */
46 
47 #ifdef E1000_DEBUG
48 enum {
49     DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
50     DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
51     DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
52     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
53 };
54 #define DBGBIT(x)    (1<<DEBUG_##x)
55 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
56 
57 #define DBGOUT(what, fmt, ...) do { \
58     if (debugflags & DBGBIT(what)) \
59         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
60     } while (0)
61 #else
62 #define DBGOUT(what, fmt, ...) do {} while (0)
63 #endif
64 
65 #define IOPORT_SIZE       0x40
66 #define PNPMMIO_SIZE      0x20000
67 #define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
68 
69 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
70 
71 /*
72  * HW models:
73  *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
74  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
75  *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
76  *  Others never tested
77  */
78 
79 typedef struct E1000State_st {
80     /*< private >*/
81     PCIDevice parent_obj;
82     /*< public >*/
83 
84     NICState *nic;
85     NICConf conf;
86     MemoryRegion mmio;
87     MemoryRegion io;
88 
89     uint32_t mac_reg[0x8000];
90     uint16_t phy_reg[0x20];
91     uint16_t eeprom_data[64];
92 
93     uint32_t rxbuf_size;
94     uint32_t rxbuf_min_shift;
95     struct e1000_tx {
96         unsigned char header[256];
97         unsigned char vlan_header[4];
98         /* Fields vlan and data must not be reordered or separated. */
99         unsigned char vlan[4];
100         unsigned char data[0x10000];
101         uint16_t size;
102         unsigned char vlan_needed;
103         unsigned char sum_needed;
104         bool cptse;
105         e1000x_txd_props props;
106         e1000x_txd_props tso_props;
107         uint16_t tso_frames;
108     } tx;
109 
110     struct {
111         uint32_t val_in;    /* shifted in from guest driver */
112         uint16_t bitnum_in;
113         uint16_t bitnum_out;
114         uint16_t reading;
115         uint32_t old_eecd;
116     } eecd_state;
117 
118     QEMUTimer *autoneg_timer;
119 
120     QEMUTimer *mit_timer;      /* Mitigation timer. */
121     bool mit_timer_on;         /* Mitigation timer is running. */
122     bool mit_irq_level;        /* Tracks interrupt pin level. */
123     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
124 
125     QEMUTimer *flush_queue_timer;
126 
127 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
128 #define E1000_FLAG_AUTONEG_BIT 0
129 #define E1000_FLAG_MIT_BIT 1
130 #define E1000_FLAG_MAC_BIT 2
131 #define E1000_FLAG_TSO_BIT 3
132 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
133 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
134 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
135 #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
136     uint32_t compat_flags;
137     bool received_tx_tso;
138     bool use_tso_for_migration;
139     e1000x_txd_props mig_props;
140 } E1000State;
141 
142 #define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
143 
144 typedef struct E1000BaseClass {
145     PCIDeviceClass parent_class;
146     uint16_t phy_id2;
147 } E1000BaseClass;
148 
149 #define TYPE_E1000_BASE "e1000-base"
150 
151 #define E1000(obj) \
152     OBJECT_CHECK(E1000State, (obj), TYPE_E1000_BASE)
153 
154 #define E1000_DEVICE_CLASS(klass) \
155      OBJECT_CLASS_CHECK(E1000BaseClass, (klass), TYPE_E1000_BASE)
156 #define E1000_DEVICE_GET_CLASS(obj) \
157     OBJECT_GET_CLASS(E1000BaseClass, (obj), TYPE_E1000_BASE)
158 
159 static void
160 e1000_link_up(E1000State *s)
161 {
162     e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg);
163 
164     /* E1000_STATUS_LU is tested by e1000_can_receive() */
165     qemu_flush_queued_packets(qemu_get_queue(s->nic));
166 }
167 
168 static void
169 e1000_autoneg_done(E1000State *s)
170 {
171     e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg);
172 
173     /* E1000_STATUS_LU is tested by e1000_can_receive() */
174     qemu_flush_queued_packets(qemu_get_queue(s->nic));
175 }
176 
177 static bool
178 have_autoneg(E1000State *s)
179 {
180     return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
181 }
182 
183 static void
184 set_phy_ctrl(E1000State *s, int index, uint16_t val)
185 {
186     /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
187     s->phy_reg[PHY_CTRL] = val & ~(0x3f |
188                                    MII_CR_RESET |
189                                    MII_CR_RESTART_AUTO_NEG);
190 
191     /*
192      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
193      * migrate during auto negotiation, after migration the link will be
194      * down.
195      */
196     if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
197         e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
198     }
199 }
200 
201 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
202     [PHY_CTRL] = set_phy_ctrl,
203 };
204 
205 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
206 
207 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
208 static const char phy_regcap[0x20] = {
209     [PHY_STATUS]      = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
210     [PHY_ID1]         = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
211     [PHY_CTRL]        = PHY_RW,    [PHY_1000T_CTRL]             = PHY_RW,
212     [PHY_LP_ABILITY]  = PHY_R,     [PHY_1000T_STATUS]           = PHY_R,
213     [PHY_AUTONEG_ADV] = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
214     [PHY_ID2]         = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
215     [PHY_AUTONEG_EXP] = PHY_R,
216 };
217 
218 /* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
219 static const uint16_t phy_reg_init[] = {
220     [PHY_CTRL]   = MII_CR_SPEED_SELECT_MSB |
221                    MII_CR_FULL_DUPLEX |
222                    MII_CR_AUTO_NEG_EN,
223 
224     [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
225                    MII_SR_LINK_STATUS |   /* link initially up */
226                    MII_SR_AUTONEG_CAPS |
227                    /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
228                    MII_SR_PREAMBLE_SUPPRESS |
229                    MII_SR_EXTENDED_STATUS |
230                    MII_SR_10T_HD_CAPS |
231                    MII_SR_10T_FD_CAPS |
232                    MII_SR_100X_HD_CAPS |
233                    MII_SR_100X_FD_CAPS,
234 
235     [PHY_ID1] = 0x141,
236     /* [PHY_ID2] configured per DevId, from e1000_reset() */
237     [PHY_AUTONEG_ADV] = 0xde1,
238     [PHY_LP_ABILITY] = 0x1e0,
239     [PHY_1000T_CTRL] = 0x0e00,
240     [PHY_1000T_STATUS] = 0x3c00,
241     [M88E1000_PHY_SPEC_CTRL] = 0x360,
242     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
243     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
244 };
245 
246 static const uint32_t mac_reg_init[] = {
247     [PBA]     = 0x00100030,
248     [LEDCTL]  = 0x602,
249     [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
250                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
251     [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
252                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
253                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
254                 E1000_STATUS_LU,
255     [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
256                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
257                 E1000_MANC_RMCP_EN,
258 };
259 
260 /* Helper function, *curr == 0 means the value is not set */
261 static inline void
262 mit_update_delay(uint32_t *curr, uint32_t value)
263 {
264     if (value && (*curr == 0 || value < *curr)) {
265         *curr = value;
266     }
267 }
268 
269 static void
270 set_interrupt_cause(E1000State *s, int index, uint32_t val)
271 {
272     PCIDevice *d = PCI_DEVICE(s);
273     uint32_t pending_ints;
274     uint32_t mit_delay;
275 
276     s->mac_reg[ICR] = val;
277 
278     /*
279      * Make sure ICR and ICS registers have the same value.
280      * The spec says that the ICS register is write-only.  However in practice,
281      * on real hardware ICS is readable, and for reads it has the same value as
282      * ICR (except that ICS does not have the clear on read behaviour of ICR).
283      *
284      * The VxWorks PRO/1000 driver uses this behaviour.
285      */
286     s->mac_reg[ICS] = val;
287 
288     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
289     if (!s->mit_irq_level && pending_ints) {
290         /*
291          * Here we detect a potential raising edge. We postpone raising the
292          * interrupt line if we are inside the mitigation delay window
293          * (s->mit_timer_on == 1).
294          * We provide a partial implementation of interrupt mitigation,
295          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
296          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
297          * RADV; relative timers based on TIDV and RDTR are not implemented.
298          */
299         if (s->mit_timer_on) {
300             return;
301         }
302         if (chkflag(MIT)) {
303             /* Compute the next mitigation delay according to pending
304              * interrupts and the current values of RADV (provided
305              * RDTR!=0), TADV and ITR.
306              * Then rearm the timer.
307              */
308             mit_delay = 0;
309             if (s->mit_ide &&
310                     (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
311                 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
312             }
313             if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
314                 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
315             }
316             mit_update_delay(&mit_delay, s->mac_reg[ITR]);
317 
318             /*
319              * According to e1000 SPEC, the Ethernet controller guarantees
320              * a maximum observable interrupt rate of 7813 interrupts/sec.
321              * Thus if mit_delay < 500 then the delay should be set to the
322              * minimum delay possible which is 500.
323              */
324             mit_delay = (mit_delay < 500) ? 500 : mit_delay;
325 
326             s->mit_timer_on = 1;
327             timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
328                       mit_delay * 256);
329             s->mit_ide = 0;
330         }
331     }
332 
333     s->mit_irq_level = (pending_ints != 0);
334     pci_set_irq(d, s->mit_irq_level);
335 }
336 
337 static void
338 e1000_mit_timer(void *opaque)
339 {
340     E1000State *s = opaque;
341 
342     s->mit_timer_on = 0;
343     /* Call set_interrupt_cause to update the irq level (if necessary). */
344     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
345 }
346 
347 static void
348 set_ics(E1000State *s, int index, uint32_t val)
349 {
350     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
351         s->mac_reg[IMS]);
352     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
353 }
354 
355 static void
356 e1000_autoneg_timer(void *opaque)
357 {
358     E1000State *s = opaque;
359     if (!qemu_get_queue(s->nic)->link_down) {
360         e1000_autoneg_done(s);
361         set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
362     }
363 }
364 
365 static void e1000_reset(void *opaque)
366 {
367     E1000State *d = opaque;
368     E1000BaseClass *edc = E1000_DEVICE_GET_CLASS(d);
369     uint8_t *macaddr = d->conf.macaddr.a;
370 
371     timer_del(d->autoneg_timer);
372     timer_del(d->mit_timer);
373     timer_del(d->flush_queue_timer);
374     d->mit_timer_on = 0;
375     d->mit_irq_level = 0;
376     d->mit_ide = 0;
377     memset(d->phy_reg, 0, sizeof d->phy_reg);
378     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
379     d->phy_reg[PHY_ID2] = edc->phy_id2;
380     memset(d->mac_reg, 0, sizeof d->mac_reg);
381     memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
382     d->rxbuf_min_shift = 1;
383     memset(&d->tx, 0, sizeof d->tx);
384 
385     if (qemu_get_queue(d->nic)->link_down) {
386         e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg);
387     }
388 
389     e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
390 }
391 
392 static void
393 set_ctrl(E1000State *s, int index, uint32_t val)
394 {
395     /* RST is self clearing */
396     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
397 }
398 
399 static void
400 e1000_flush_queue_timer(void *opaque)
401 {
402     E1000State *s = opaque;
403 
404     qemu_flush_queued_packets(qemu_get_queue(s->nic));
405 }
406 
407 static void
408 set_rx_control(E1000State *s, int index, uint32_t val)
409 {
410     s->mac_reg[RCTL] = val;
411     s->rxbuf_size = e1000x_rxbufsize(val);
412     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
413     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
414            s->mac_reg[RCTL]);
415     timer_mod(s->flush_queue_timer,
416               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
417 }
418 
419 static void
420 set_mdic(E1000State *s, int index, uint32_t val)
421 {
422     uint32_t data = val & E1000_MDIC_DATA_MASK;
423     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
424 
425     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
426         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
427     else if (val & E1000_MDIC_OP_READ) {
428         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
429         if (!(phy_regcap[addr] & PHY_R)) {
430             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
431             val |= E1000_MDIC_ERROR;
432         } else
433             val = (val ^ data) | s->phy_reg[addr];
434     } else if (val & E1000_MDIC_OP_WRITE) {
435         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
436         if (!(phy_regcap[addr] & PHY_W)) {
437             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
438             val |= E1000_MDIC_ERROR;
439         } else {
440             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
441                 phyreg_writeops[addr](s, index, data);
442             } else {
443                 s->phy_reg[addr] = data;
444             }
445         }
446     }
447     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
448 
449     if (val & E1000_MDIC_INT_EN) {
450         set_ics(s, 0, E1000_ICR_MDAC);
451     }
452 }
453 
454 static uint32_t
455 get_eecd(E1000State *s, int index)
456 {
457     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
458 
459     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
460            s->eecd_state.bitnum_out, s->eecd_state.reading);
461     if (!s->eecd_state.reading ||
462         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
463           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
464         ret |= E1000_EECD_DO;
465     return ret;
466 }
467 
468 static void
469 set_eecd(E1000State *s, int index, uint32_t val)
470 {
471     uint32_t oldval = s->eecd_state.old_eecd;
472 
473     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
474             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
475     if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
476         return;
477     }
478     if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
479         s->eecd_state.val_in = 0;
480         s->eecd_state.bitnum_in = 0;
481         s->eecd_state.bitnum_out = 0;
482         s->eecd_state.reading = 0;
483     }
484     if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
485         return;
486     }
487     if (!(E1000_EECD_SK & val)) {               /* falling edge */
488         s->eecd_state.bitnum_out++;
489         return;
490     }
491     s->eecd_state.val_in <<= 1;
492     if (val & E1000_EECD_DI)
493         s->eecd_state.val_in |= 1;
494     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
495         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
496         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
497             EEPROM_READ_OPCODE_MICROWIRE);
498     }
499     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
500            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
501            s->eecd_state.reading);
502 }
503 
504 static uint32_t
505 flash_eerd_read(E1000State *s, int x)
506 {
507     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
508 
509     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
510         return (s->mac_reg[EERD]);
511 
512     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
513         return (E1000_EEPROM_RW_REG_DONE | r);
514 
515     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
516            E1000_EEPROM_RW_REG_DONE | r);
517 }
518 
519 static void
520 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
521 {
522     uint32_t sum;
523 
524     if (cse && cse < n)
525         n = cse + 1;
526     if (sloc < n-1) {
527         sum = net_checksum_add(n-css, data+css);
528         stw_be_p(data + sloc, net_checksum_finish_nozero(sum));
529     }
530 }
531 
532 static inline void
533 inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
534 {
535     if (!memcmp(arr, bcast, sizeof bcast)) {
536         e1000x_inc_reg_if_not_full(s->mac_reg, BPTC);
537     } else if (arr[0] & 1) {
538         e1000x_inc_reg_if_not_full(s->mac_reg, MPTC);
539     }
540 }
541 
542 static void
543 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
544 {
545     static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
546                                     PTC1023, PTC1522 };
547 
548     NetClientState *nc = qemu_get_queue(s->nic);
549     if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
550         nc->info->receive(nc, buf, size);
551     } else {
552         qemu_send_packet(nc, buf, size);
553     }
554     inc_tx_bcast_or_mcast_count(s, buf);
555     e1000x_increase_size_stats(s->mac_reg, PTCregs, size);
556 }
557 
558 static void
559 xmit_seg(E1000State *s)
560 {
561     uint16_t len;
562     unsigned int frames = s->tx.tso_frames, css, sofar;
563     struct e1000_tx *tp = &s->tx;
564     struct e1000x_txd_props *props = tp->cptse ? &tp->tso_props : &tp->props;
565 
566     if (tp->cptse) {
567         css = props->ipcss;
568         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
569                frames, tp->size, css);
570         if (props->ip) {    /* IPv4 */
571             stw_be_p(tp->data+css+2, tp->size - css);
572             stw_be_p(tp->data+css+4,
573                      lduw_be_p(tp->data + css + 4) + frames);
574         } else {         /* IPv6 */
575             stw_be_p(tp->data+css+4, tp->size - css);
576         }
577         css = props->tucss;
578         len = tp->size - css;
579         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", props->tcp, css, len);
580         if (props->tcp) {
581             sofar = frames * props->mss;
582             stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
583             if (props->paylen - sofar > props->mss) {
584                 tp->data[css + 13] &= ~9;    /* PSH, FIN */
585             } else if (frames) {
586                 e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
587             }
588         } else {    /* UDP */
589             stw_be_p(tp->data+css+4, len);
590         }
591         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
592             unsigned int phsum;
593             // add pseudo-header length before checksum calculation
594             void *sp = tp->data + props->tucso;
595 
596             phsum = lduw_be_p(sp) + len;
597             phsum = (phsum >> 16) + (phsum & 0xffff);
598             stw_be_p(sp, phsum);
599         }
600         tp->tso_frames++;
601     }
602 
603     if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
604         putsum(tp->data, tp->size, props->tucso, props->tucss, props->tucse);
605     }
606     if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
607         putsum(tp->data, tp->size, props->ipcso, props->ipcss, props->ipcse);
608     }
609     if (tp->vlan_needed) {
610         memmove(tp->vlan, tp->data, 4);
611         memmove(tp->data, tp->data + 4, 8);
612         memcpy(tp->data + 8, tp->vlan_header, 4);
613         e1000_send_packet(s, tp->vlan, tp->size + 4);
614     } else {
615         e1000_send_packet(s, tp->data, tp->size);
616     }
617 
618     e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
619     e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size);
620     s->mac_reg[GPTC] = s->mac_reg[TPT];
621     s->mac_reg[GOTCL] = s->mac_reg[TOTL];
622     s->mac_reg[GOTCH] = s->mac_reg[TOTH];
623 }
624 
625 static void
626 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
627 {
628     PCIDevice *d = PCI_DEVICE(s);
629     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
630     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
631     unsigned int split_size = txd_lower & 0xffff, bytes, sz;
632     unsigned int msh = 0xfffff;
633     uint64_t addr;
634     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
635     struct e1000_tx *tp = &s->tx;
636 
637     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
638     if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
639         if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
640             e1000x_read_tx_ctx_descr(xp, &tp->tso_props);
641             s->use_tso_for_migration = 1;
642             tp->tso_frames = 0;
643         } else {
644             e1000x_read_tx_ctx_descr(xp, &tp->props);
645             s->use_tso_for_migration = 0;
646         }
647         return;
648     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
649         // data descriptor
650         if (tp->size == 0) {
651             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
652         }
653         tp->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
654     } else {
655         // legacy descriptor
656         tp->cptse = 0;
657     }
658 
659     if (e1000x_vlan_enabled(s->mac_reg) &&
660         e1000x_is_vlan_txd(txd_lower) &&
661         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
662         tp->vlan_needed = 1;
663         stw_be_p(tp->vlan_header,
664                       le16_to_cpu(s->mac_reg[VET]));
665         stw_be_p(tp->vlan_header + 2,
666                       le16_to_cpu(dp->upper.fields.special));
667     }
668 
669     addr = le64_to_cpu(dp->buffer_addr);
670     if (tp->cptse) {
671         msh = tp->tso_props.hdr_len + tp->tso_props.mss;
672         do {
673             bytes = split_size;
674             if (tp->size + bytes > msh)
675                 bytes = msh - tp->size;
676 
677             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
678             pci_dma_read(d, addr, tp->data + tp->size, bytes);
679             sz = tp->size + bytes;
680             if (sz >= tp->tso_props.hdr_len
681                 && tp->size < tp->tso_props.hdr_len) {
682                 memmove(tp->header, tp->data, tp->tso_props.hdr_len);
683             }
684             tp->size = sz;
685             addr += bytes;
686             if (sz == msh) {
687                 xmit_seg(s);
688                 memmove(tp->data, tp->header, tp->tso_props.hdr_len);
689                 tp->size = tp->tso_props.hdr_len;
690             }
691             split_size -= bytes;
692         } while (bytes && split_size);
693     } else {
694         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
695         pci_dma_read(d, addr, tp->data + tp->size, split_size);
696         tp->size += split_size;
697     }
698 
699     if (!(txd_lower & E1000_TXD_CMD_EOP))
700         return;
701     if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) {
702         xmit_seg(s);
703     }
704     tp->tso_frames = 0;
705     tp->sum_needed = 0;
706     tp->vlan_needed = 0;
707     tp->size = 0;
708     tp->cptse = 0;
709 }
710 
711 static uint32_t
712 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
713 {
714     PCIDevice *d = PCI_DEVICE(s);
715     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
716 
717     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
718         return 0;
719     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
720                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
721     dp->upper.data = cpu_to_le32(txd_upper);
722     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
723                   &dp->upper, sizeof(dp->upper));
724     return E1000_ICR_TXDW;
725 }
726 
727 static uint64_t tx_desc_base(E1000State *s)
728 {
729     uint64_t bah = s->mac_reg[TDBAH];
730     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
731 
732     return (bah << 32) + bal;
733 }
734 
735 static void
736 start_xmit(E1000State *s)
737 {
738     PCIDevice *d = PCI_DEVICE(s);
739     dma_addr_t base;
740     struct e1000_tx_desc desc;
741     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
742 
743     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
744         DBGOUT(TX, "tx disabled\n");
745         return;
746     }
747 
748     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
749         base = tx_desc_base(s) +
750                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
751         pci_dma_read(d, base, &desc, sizeof(desc));
752 
753         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
754                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
755                desc.upper.data);
756 
757         process_tx_desc(s, &desc);
758         cause |= txdesc_writeback(s, base, &desc);
759 
760         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
761             s->mac_reg[TDH] = 0;
762         /*
763          * the following could happen only if guest sw assigns
764          * bogus values to TDT/TDLEN.
765          * there's nothing too intelligent we could do about this.
766          */
767         if (s->mac_reg[TDH] == tdh_start ||
768             tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
769             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
770                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
771             break;
772         }
773     }
774     set_ics(s, 0, cause);
775 }
776 
777 static int
778 receive_filter(E1000State *s, const uint8_t *buf, int size)
779 {
780     uint32_t rctl = s->mac_reg[RCTL];
781     int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1);
782 
783     if (e1000x_is_vlan_packet(buf, le16_to_cpu(s->mac_reg[VET])) &&
784         e1000x_vlan_rx_filter_enabled(s->mac_reg)) {
785         uint16_t vid = lduw_be_p(buf + 14);
786         uint32_t vfta = ldl_le_p((uint32_t*)(s->mac_reg + VFTA) +
787                                  ((vid >> 5) & 0x7f));
788         if ((vfta & (1 << (vid & 0x1f))) == 0)
789             return 0;
790     }
791 
792     if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
793         return 1;
794     }
795 
796     if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
797         e1000x_inc_reg_if_not_full(s->mac_reg, MPRC);
798         return 1;
799     }
800 
801     if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
802         e1000x_inc_reg_if_not_full(s->mac_reg, BPRC);
803         return 1;
804     }
805 
806     return e1000x_rx_group_filter(s->mac_reg, buf);
807 }
808 
809 static void
810 e1000_set_link_status(NetClientState *nc)
811 {
812     E1000State *s = qemu_get_nic_opaque(nc);
813     uint32_t old_status = s->mac_reg[STATUS];
814 
815     if (nc->link_down) {
816         e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg);
817     } else {
818         if (have_autoneg(s) &&
819             !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
820             e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
821         } else {
822             e1000_link_up(s);
823         }
824     }
825 
826     if (s->mac_reg[STATUS] != old_status)
827         set_ics(s, 0, E1000_ICR_LSC);
828 }
829 
830 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
831 {
832     int bufs;
833     /* Fast-path short packets */
834     if (total_size <= s->rxbuf_size) {
835         return s->mac_reg[RDH] != s->mac_reg[RDT];
836     }
837     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
838         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
839     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
840         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
841             s->mac_reg[RDT] - s->mac_reg[RDH];
842     } else {
843         return false;
844     }
845     return total_size <= bufs * s->rxbuf_size;
846 }
847 
848 static int
849 e1000_can_receive(NetClientState *nc)
850 {
851     E1000State *s = qemu_get_nic_opaque(nc);
852 
853     return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
854         e1000_has_rxbufs(s, 1) && !timer_pending(s->flush_queue_timer);
855 }
856 
857 static uint64_t rx_desc_base(E1000State *s)
858 {
859     uint64_t bah = s->mac_reg[RDBAH];
860     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
861 
862     return (bah << 32) + bal;
863 }
864 
865 static void
866 e1000_receiver_overrun(E1000State *s, size_t size)
867 {
868     trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
869     e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
870     e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
871     set_ics(s, 0, E1000_ICS_RXO);
872 }
873 
874 static ssize_t
875 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
876 {
877     E1000State *s = qemu_get_nic_opaque(nc);
878     PCIDevice *d = PCI_DEVICE(s);
879     struct e1000_rx_desc desc;
880     dma_addr_t base;
881     unsigned int n, rdt;
882     uint32_t rdh_start;
883     uint16_t vlan_special = 0;
884     uint8_t vlan_status = 0;
885     uint8_t min_buf[MIN_BUF_SIZE];
886     struct iovec min_iov;
887     uint8_t *filter_buf = iov->iov_base;
888     size_t size = iov_size(iov, iovcnt);
889     size_t iov_ofs = 0;
890     size_t desc_offset;
891     size_t desc_size;
892     size_t total_size;
893 
894     if (!e1000x_hw_rx_enabled(s->mac_reg)) {
895         return -1;
896     }
897 
898     if (timer_pending(s->flush_queue_timer)) {
899         return 0;
900     }
901 
902     /* Pad to minimum Ethernet frame length */
903     if (size < sizeof(min_buf)) {
904         iov_to_buf(iov, iovcnt, 0, min_buf, size);
905         memset(&min_buf[size], 0, sizeof(min_buf) - size);
906         min_iov.iov_base = filter_buf = min_buf;
907         min_iov.iov_len = size = sizeof(min_buf);
908         iovcnt = 1;
909         iov = &min_iov;
910     } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
911         /* This is very unlikely, but may happen. */
912         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
913         filter_buf = min_buf;
914     }
915 
916     /* Discard oversized packets if !LPE and !SBP. */
917     if (e1000x_is_oversized(s->mac_reg, size)) {
918         return size;
919     }
920 
921     if (!receive_filter(s, filter_buf, size)) {
922         return size;
923     }
924 
925     if (e1000x_vlan_enabled(s->mac_reg) &&
926         e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) {
927         vlan_special = cpu_to_le16(lduw_be_p(filter_buf + 14));
928         iov_ofs = 4;
929         if (filter_buf == iov->iov_base) {
930             memmove(filter_buf + 4, filter_buf, 12);
931         } else {
932             iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
933             while (iov->iov_len <= iov_ofs) {
934                 iov_ofs -= iov->iov_len;
935                 iov++;
936             }
937         }
938         vlan_status = E1000_RXD_STAT_VP;
939         size -= 4;
940     }
941 
942     rdh_start = s->mac_reg[RDH];
943     desc_offset = 0;
944     total_size = size + e1000x_fcs_len(s->mac_reg);
945     if (!e1000_has_rxbufs(s, total_size)) {
946         e1000_receiver_overrun(s, total_size);
947         return -1;
948     }
949     do {
950         desc_size = total_size - desc_offset;
951         if (desc_size > s->rxbuf_size) {
952             desc_size = s->rxbuf_size;
953         }
954         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
955         pci_dma_read(d, base, &desc, sizeof(desc));
956         desc.special = vlan_special;
957         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
958         if (desc.buffer_addr) {
959             if (desc_offset < size) {
960                 size_t iov_copy;
961                 hwaddr ba = le64_to_cpu(desc.buffer_addr);
962                 size_t copy_size = size - desc_offset;
963                 if (copy_size > s->rxbuf_size) {
964                     copy_size = s->rxbuf_size;
965                 }
966                 do {
967                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
968                     pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
969                     copy_size -= iov_copy;
970                     ba += iov_copy;
971                     iov_ofs += iov_copy;
972                     if (iov_ofs == iov->iov_len) {
973                         iov++;
974                         iov_ofs = 0;
975                     }
976                 } while (copy_size);
977             }
978             desc_offset += desc_size;
979             desc.length = cpu_to_le16(desc_size);
980             if (desc_offset >= total_size) {
981                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
982             } else {
983                 /* Guest zeroing out status is not a hardware requirement.
984                    Clear EOP in case guest didn't do it. */
985                 desc.status &= ~E1000_RXD_STAT_EOP;
986             }
987         } else { // as per intel docs; skip descriptors with null buf addr
988             DBGOUT(RX, "Null RX descriptor!!\n");
989         }
990         pci_dma_write(d, base, &desc, sizeof(desc));
991 
992         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
993             s->mac_reg[RDH] = 0;
994         /* see comment in start_xmit; same here */
995         if (s->mac_reg[RDH] == rdh_start ||
996             rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
997             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
998                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
999             e1000_receiver_overrun(s, total_size);
1000             return -1;
1001         }
1002     } while (desc_offset < total_size);
1003 
1004     e1000x_update_rx_total_stats(s->mac_reg, size, total_size);
1005 
1006     n = E1000_ICS_RXT0;
1007     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1008         rdt += s->mac_reg[RDLEN] / sizeof(desc);
1009     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1010         s->rxbuf_min_shift)
1011         n |= E1000_ICS_RXDMT0;
1012 
1013     set_ics(s, 0, n);
1014 
1015     return size;
1016 }
1017 
1018 static ssize_t
1019 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1020 {
1021     const struct iovec iov = {
1022         .iov_base = (uint8_t *)buf,
1023         .iov_len = size
1024     };
1025 
1026     return e1000_receive_iov(nc, &iov, 1);
1027 }
1028 
1029 static uint32_t
1030 mac_readreg(E1000State *s, int index)
1031 {
1032     return s->mac_reg[index];
1033 }
1034 
1035 static uint32_t
1036 mac_low4_read(E1000State *s, int index)
1037 {
1038     return s->mac_reg[index] & 0xf;
1039 }
1040 
1041 static uint32_t
1042 mac_low11_read(E1000State *s, int index)
1043 {
1044     return s->mac_reg[index] & 0x7ff;
1045 }
1046 
1047 static uint32_t
1048 mac_low13_read(E1000State *s, int index)
1049 {
1050     return s->mac_reg[index] & 0x1fff;
1051 }
1052 
1053 static uint32_t
1054 mac_low16_read(E1000State *s, int index)
1055 {
1056     return s->mac_reg[index] & 0xffff;
1057 }
1058 
1059 static uint32_t
1060 mac_icr_read(E1000State *s, int index)
1061 {
1062     uint32_t ret = s->mac_reg[ICR];
1063 
1064     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1065     set_interrupt_cause(s, 0, 0);
1066     return ret;
1067 }
1068 
1069 static uint32_t
1070 mac_read_clr4(E1000State *s, int index)
1071 {
1072     uint32_t ret = s->mac_reg[index];
1073 
1074     s->mac_reg[index] = 0;
1075     return ret;
1076 }
1077 
1078 static uint32_t
1079 mac_read_clr8(E1000State *s, int index)
1080 {
1081     uint32_t ret = s->mac_reg[index];
1082 
1083     s->mac_reg[index] = 0;
1084     s->mac_reg[index-1] = 0;
1085     return ret;
1086 }
1087 
1088 static void
1089 mac_writereg(E1000State *s, int index, uint32_t val)
1090 {
1091     uint32_t macaddr[2];
1092 
1093     s->mac_reg[index] = val;
1094 
1095     if (index == RA + 1) {
1096         macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1097         macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1098         qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1099     }
1100 }
1101 
1102 static void
1103 set_rdt(E1000State *s, int index, uint32_t val)
1104 {
1105     s->mac_reg[index] = val & 0xffff;
1106     if (e1000_has_rxbufs(s, 1)) {
1107         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1108     }
1109 }
1110 
1111 static void
1112 set_16bit(E1000State *s, int index, uint32_t val)
1113 {
1114     s->mac_reg[index] = val & 0xffff;
1115 }
1116 
1117 static void
1118 set_dlen(E1000State *s, int index, uint32_t val)
1119 {
1120     s->mac_reg[index] = val & 0xfff80;
1121 }
1122 
1123 static void
1124 set_tctl(E1000State *s, int index, uint32_t val)
1125 {
1126     s->mac_reg[index] = val;
1127     s->mac_reg[TDT] &= 0xffff;
1128     start_xmit(s);
1129 }
1130 
1131 static void
1132 set_icr(E1000State *s, int index, uint32_t val)
1133 {
1134     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1135     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1136 }
1137 
1138 static void
1139 set_imc(E1000State *s, int index, uint32_t val)
1140 {
1141     s->mac_reg[IMS] &= ~val;
1142     set_ics(s, 0, 0);
1143 }
1144 
1145 static void
1146 set_ims(E1000State *s, int index, uint32_t val)
1147 {
1148     s->mac_reg[IMS] |= val;
1149     set_ics(s, 0, 0);
1150 }
1151 
1152 #define getreg(x)    [x] = mac_readreg
1153 typedef uint32_t (*readops)(E1000State *, int);
1154 static const readops macreg_readops[] = {
1155     getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1156     getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1157     getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1158     getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1159     getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1160     getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1161     getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1162     getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1163     getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1164     getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1165     getreg(TNCRS),    getreg(SEQEC),    getreg(CEXTERR),  getreg(RLEC),
1166     getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1167     getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1168     getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1169     getreg(GOTCL),
1170 
1171     [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1172     [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1173     [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1174     [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1175     [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1176     [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1177     [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1178     [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1179     [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1180     [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1181     [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1182     [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1183     [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1184     [MPTC]    = mac_read_clr4,
1185     [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1186     [EERD]    = flash_eerd_read,
1187     [RDFH]    = mac_low13_read,     [RDFT]    = mac_low13_read,
1188     [RDFHS]   = mac_low13_read,     [RDFTS]   = mac_low13_read,
1189     [RDFPC]   = mac_low13_read,
1190     [TDFH]    = mac_low11_read,     [TDFT]    = mac_low11_read,
1191     [TDFHS]   = mac_low13_read,     [TDFTS]   = mac_low13_read,
1192     [TDFPC]   = mac_low13_read,
1193     [AIT]     = mac_low16_read,
1194 
1195     [CRCERRS ... MPC]   = &mac_readreg,
1196     [IP6AT ... IP6AT+3] = &mac_readreg,    [IP4AT ... IP4AT+6] = &mac_readreg,
1197     [FFLT ... FFLT+6]   = &mac_low11_read,
1198     [RA ... RA+31]      = &mac_readreg,
1199     [WUPM ... WUPM+31]  = &mac_readreg,
1200     [MTA ... MTA+127]   = &mac_readreg,
1201     [VFTA ... VFTA+127] = &mac_readreg,
1202     [FFMT ... FFMT+254] = &mac_low4_read,
1203     [FFVT ... FFVT+254] = &mac_readreg,
1204     [PBM ... PBM+16383] = &mac_readreg,
1205 };
1206 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1207 
1208 #define putreg(x)    [x] = mac_writereg
1209 typedef void (*writeops)(E1000State *, int, uint32_t);
1210 static const writeops macreg_writeops[] = {
1211     putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1212     putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1213     putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1214     putreg(TDFH),     putreg(TDFT),     putreg(TDFHS),    putreg(TDFTS),
1215     putreg(TDFPC),    putreg(RDFH),     putreg(RDFT),     putreg(RDFHS),
1216     putreg(RDFTS),    putreg(RDFPC),    putreg(IPAV),     putreg(WUC),
1217     putreg(WUS),      putreg(AIT),
1218 
1219     [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL] = set_tctl,
1220     [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]  = set_ics,
1221     [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]  = set_rdt,
1222     [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]  = set_icr,
1223     [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL] = set_ctrl,
1224     [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV] = set_16bit,
1225     [ITR]    = set_16bit,
1226 
1227     [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg,
1228     [FFLT ... FFLT+6]   = &mac_writereg,
1229     [RA ... RA+31]      = &mac_writereg,
1230     [WUPM ... WUPM+31]  = &mac_writereg,
1231     [MTA ... MTA+127]   = &mac_writereg,
1232     [VFTA ... VFTA+127] = &mac_writereg,
1233     [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg,
1234     [PBM ... PBM+16383] = &mac_writereg,
1235 };
1236 
1237 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1238 
1239 enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1240 
1241 #define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1242 /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1243  * f - flag bits (up to 6 possible flags)
1244  * n - flag needed
1245  * p - partially implenented */
1246 static const uint8_t mac_reg_access[0x8000] = {
1247     [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1248     [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1249 
1250     [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1251     [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1252     [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1253     [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1254     [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1255     [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1256     [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1257     [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1258     [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1259     [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1260     [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1261     [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1262     [SEQEC]   = markflag(MAC),    [CEXTERR] = markflag(MAC),
1263     [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1264     [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1265     [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1266     [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1267     [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1268     [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1269     [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1270     [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1271     [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1272     [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1273     [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1274     [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1275     [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1276     [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1277     [BPTC]    = markflag(MAC),
1278 
1279     [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1280     [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1281     [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1282     [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1283     [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1284     [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1285     [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1286     [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1287     [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1288     [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1289     [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1290 };
1291 
1292 static void
1293 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1294                  unsigned size)
1295 {
1296     E1000State *s = opaque;
1297     unsigned int index = (addr & 0x1ffff) >> 2;
1298 
1299     if (index < NWRITEOPS && macreg_writeops[index]) {
1300         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1301             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1302             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1303                 DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1304                        "It is not fully implemented.\n", index<<2);
1305             }
1306             macreg_writeops[index](s, index, val);
1307         } else {    /* "flag needed" bit is set, but the flag is not active */
1308             DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1309                    index<<2);
1310         }
1311     } else if (index < NREADOPS && macreg_readops[index]) {
1312         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1313                index<<2, val);
1314     } else {
1315         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1316                index<<2, val);
1317     }
1318 }
1319 
1320 static uint64_t
1321 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1322 {
1323     E1000State *s = opaque;
1324     unsigned int index = (addr & 0x1ffff) >> 2;
1325 
1326     if (index < NREADOPS && macreg_readops[index]) {
1327         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1328             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1329             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1330                 DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1331                        "It is not fully implemented.\n", index<<2);
1332             }
1333             return macreg_readops[index](s, index);
1334         } else {    /* "flag needed" bit is set, but the flag is not active */
1335             DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1336                    index<<2);
1337         }
1338     } else {
1339         DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1340     }
1341     return 0;
1342 }
1343 
1344 static const MemoryRegionOps e1000_mmio_ops = {
1345     .read = e1000_mmio_read,
1346     .write = e1000_mmio_write,
1347     .endianness = DEVICE_LITTLE_ENDIAN,
1348     .impl = {
1349         .min_access_size = 4,
1350         .max_access_size = 4,
1351     },
1352 };
1353 
1354 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1355                               unsigned size)
1356 {
1357     E1000State *s = opaque;
1358 
1359     (void)s;
1360     return 0;
1361 }
1362 
1363 static void e1000_io_write(void *opaque, hwaddr addr,
1364                            uint64_t val, unsigned size)
1365 {
1366     E1000State *s = opaque;
1367 
1368     (void)s;
1369 }
1370 
1371 static const MemoryRegionOps e1000_io_ops = {
1372     .read = e1000_io_read,
1373     .write = e1000_io_write,
1374     .endianness = DEVICE_LITTLE_ENDIAN,
1375 };
1376 
1377 static bool is_version_1(void *opaque, int version_id)
1378 {
1379     return version_id == 1;
1380 }
1381 
1382 static int e1000_pre_save(void *opaque)
1383 {
1384     E1000State *s = opaque;
1385     NetClientState *nc = qemu_get_queue(s->nic);
1386 
1387     /*
1388      * If link is down and auto-negotiation is supported and ongoing,
1389      * complete auto-negotiation immediately. This allows us to look
1390      * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1391      */
1392     if (nc->link_down && have_autoneg(s)) {
1393         s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1394     }
1395 
1396     /* Decide which set of props to migrate in the main structure */
1397     if (chkflag(TSO) || !s->use_tso_for_migration) {
1398         /* Either we're migrating with the extra subsection, in which
1399          * case the mig_props is always 'props' OR
1400          * we've not got the subsection, but 'props' was the last
1401          * updated.
1402          */
1403         s->mig_props = s->tx.props;
1404     } else {
1405         /* We're not using the subsection, and 'tso_props' was
1406          * the last updated.
1407          */
1408         s->mig_props = s->tx.tso_props;
1409     }
1410     return 0;
1411 }
1412 
1413 static int e1000_post_load(void *opaque, int version_id)
1414 {
1415     E1000State *s = opaque;
1416     NetClientState *nc = qemu_get_queue(s->nic);
1417 
1418     if (!chkflag(MIT)) {
1419         s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1420             s->mac_reg[TADV] = 0;
1421         s->mit_irq_level = false;
1422     }
1423     s->mit_ide = 0;
1424     s->mit_timer_on = true;
1425     timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1);
1426 
1427     /* nc.link_down can't be migrated, so infer link_down according
1428      * to link status bit in mac_reg[STATUS].
1429      * Alternatively, restart link negotiation if it was in progress. */
1430     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1431 
1432     if (have_autoneg(s) &&
1433         !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1434         nc->link_down = false;
1435         timer_mod(s->autoneg_timer,
1436                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1437     }
1438 
1439     s->tx.props = s->mig_props;
1440     if (!s->received_tx_tso) {
1441         /* We received only one set of offload data (tx.props)
1442          * and haven't got tx.tso_props.  The best we can do
1443          * is dupe the data.
1444          */
1445         s->tx.tso_props = s->mig_props;
1446     }
1447     return 0;
1448 }
1449 
1450 static int e1000_tx_tso_post_load(void *opaque, int version_id)
1451 {
1452     E1000State *s = opaque;
1453     s->received_tx_tso = true;
1454     return 0;
1455 }
1456 
1457 static bool e1000_mit_state_needed(void *opaque)
1458 {
1459     E1000State *s = opaque;
1460 
1461     return chkflag(MIT);
1462 }
1463 
1464 static bool e1000_full_mac_needed(void *opaque)
1465 {
1466     E1000State *s = opaque;
1467 
1468     return chkflag(MAC);
1469 }
1470 
1471 static bool e1000_tso_state_needed(void *opaque)
1472 {
1473     E1000State *s = opaque;
1474 
1475     return chkflag(TSO);
1476 }
1477 
1478 static const VMStateDescription vmstate_e1000_mit_state = {
1479     .name = "e1000/mit_state",
1480     .version_id = 1,
1481     .minimum_version_id = 1,
1482     .needed = e1000_mit_state_needed,
1483     .fields = (VMStateField[]) {
1484         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1485         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1486         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1487         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1488         VMSTATE_BOOL(mit_irq_level, E1000State),
1489         VMSTATE_END_OF_LIST()
1490     }
1491 };
1492 
1493 static const VMStateDescription vmstate_e1000_full_mac_state = {
1494     .name = "e1000/full_mac_state",
1495     .version_id = 1,
1496     .minimum_version_id = 1,
1497     .needed = e1000_full_mac_needed,
1498     .fields = (VMStateField[]) {
1499         VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1500         VMSTATE_END_OF_LIST()
1501     }
1502 };
1503 
1504 static const VMStateDescription vmstate_e1000_tx_tso_state = {
1505     .name = "e1000/tx_tso_state",
1506     .version_id = 1,
1507     .minimum_version_id = 1,
1508     .needed = e1000_tso_state_needed,
1509     .post_load = e1000_tx_tso_post_load,
1510     .fields = (VMStateField[]) {
1511         VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
1512         VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
1513         VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
1514         VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
1515         VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
1516         VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
1517         VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
1518         VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
1519         VMSTATE_UINT16(tx.tso_props.mss, E1000State),
1520         VMSTATE_INT8(tx.tso_props.ip, E1000State),
1521         VMSTATE_INT8(tx.tso_props.tcp, E1000State),
1522         VMSTATE_END_OF_LIST()
1523     }
1524 };
1525 
1526 static const VMStateDescription vmstate_e1000 = {
1527     .name = "e1000",
1528     .version_id = 2,
1529     .minimum_version_id = 1,
1530     .pre_save = e1000_pre_save,
1531     .post_load = e1000_post_load,
1532     .fields = (VMStateField[]) {
1533         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1534         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1535         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1536         VMSTATE_UINT32(rxbuf_size, E1000State),
1537         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1538         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1539         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1540         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1541         VMSTATE_UINT16(eecd_state.reading, E1000State),
1542         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1543         VMSTATE_UINT8(mig_props.ipcss, E1000State),
1544         VMSTATE_UINT8(mig_props.ipcso, E1000State),
1545         VMSTATE_UINT16(mig_props.ipcse, E1000State),
1546         VMSTATE_UINT8(mig_props.tucss, E1000State),
1547         VMSTATE_UINT8(mig_props.tucso, E1000State),
1548         VMSTATE_UINT16(mig_props.tucse, E1000State),
1549         VMSTATE_UINT32(mig_props.paylen, E1000State),
1550         VMSTATE_UINT8(mig_props.hdr_len, E1000State),
1551         VMSTATE_UINT16(mig_props.mss, E1000State),
1552         VMSTATE_UINT16(tx.size, E1000State),
1553         VMSTATE_UINT16(tx.tso_frames, E1000State),
1554         VMSTATE_UINT8(tx.sum_needed, E1000State),
1555         VMSTATE_INT8(mig_props.ip, E1000State),
1556         VMSTATE_INT8(mig_props.tcp, E1000State),
1557         VMSTATE_BUFFER(tx.header, E1000State),
1558         VMSTATE_BUFFER(tx.data, E1000State),
1559         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1560         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1561         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1562         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1563         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1564         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1565         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1566         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1567         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1568         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1569         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1570         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1571         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1572         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1573         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1574         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1575         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1576         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1577         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1578         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1579         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1580         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1581         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1582         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1583         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1584         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1585         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1586         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1587         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1588         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1589         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1590         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1591         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1592         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1593         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1594         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1595         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1596         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1597         VMSTATE_UINT32(mac_reg[VET], E1000State),
1598         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1599         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1600         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1601         VMSTATE_END_OF_LIST()
1602     },
1603     .subsections = (const VMStateDescription*[]) {
1604         &vmstate_e1000_mit_state,
1605         &vmstate_e1000_full_mac_state,
1606         &vmstate_e1000_tx_tso_state,
1607         NULL
1608     }
1609 };
1610 
1611 /*
1612  * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1613  * Note: A valid DevId will be inserted during pci_e1000_realize().
1614  */
1615 static const uint16_t e1000_eeprom_template[64] = {
1616     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1617     0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1618     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1619     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1620     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1621     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1622     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1623     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1624 };
1625 
1626 /* PCI interface */
1627 
1628 static void
1629 e1000_mmio_setup(E1000State *d)
1630 {
1631     int i;
1632     const uint32_t excluded_regs[] = {
1633         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1634         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1635     };
1636 
1637     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1638                           "e1000-mmio", PNPMMIO_SIZE);
1639     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1640     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1641         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1642                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1643     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1644 }
1645 
1646 static void
1647 pci_e1000_uninit(PCIDevice *dev)
1648 {
1649     E1000State *d = E1000(dev);
1650 
1651     timer_del(d->autoneg_timer);
1652     timer_free(d->autoneg_timer);
1653     timer_del(d->mit_timer);
1654     timer_free(d->mit_timer);
1655     timer_del(d->flush_queue_timer);
1656     timer_free(d->flush_queue_timer);
1657     qemu_del_nic(d->nic);
1658 }
1659 
1660 static NetClientInfo net_e1000_info = {
1661     .type = NET_CLIENT_DRIVER_NIC,
1662     .size = sizeof(NICState),
1663     .can_receive = e1000_can_receive,
1664     .receive = e1000_receive,
1665     .receive_iov = e1000_receive_iov,
1666     .link_status_changed = e1000_set_link_status,
1667 };
1668 
1669 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1670                                 uint32_t val, int len)
1671 {
1672     E1000State *s = E1000(pci_dev);
1673 
1674     pci_default_write_config(pci_dev, address, val, len);
1675 
1676     if (range_covers_byte(address, len, PCI_COMMAND) &&
1677         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1678         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1679     }
1680 }
1681 
1682 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1683 {
1684     DeviceState *dev = DEVICE(pci_dev);
1685     E1000State *d = E1000(pci_dev);
1686     uint8_t *pci_conf;
1687     uint8_t *macaddr;
1688 
1689     pci_dev->config_write = e1000_write_config;
1690 
1691     pci_conf = pci_dev->config;
1692 
1693     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1694     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1695 
1696     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1697 
1698     e1000_mmio_setup(d);
1699 
1700     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1701 
1702     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1703 
1704     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1705     macaddr = d->conf.macaddr.a;
1706 
1707     e1000x_core_prepare_eeprom(d->eeprom_data,
1708                                e1000_eeprom_template,
1709                                sizeof(e1000_eeprom_template),
1710                                PCI_DEVICE_GET_CLASS(pci_dev)->device_id,
1711                                macaddr);
1712 
1713     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1714                           object_get_typename(OBJECT(d)), dev->id, d);
1715 
1716     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1717 
1718     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1719     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1720     d->flush_queue_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
1721                                         e1000_flush_queue_timer, d);
1722 }
1723 
1724 static void qdev_e1000_reset(DeviceState *dev)
1725 {
1726     E1000State *d = E1000(dev);
1727     e1000_reset(d);
1728 }
1729 
1730 static Property e1000_properties[] = {
1731     DEFINE_NIC_PROPERTIES(E1000State, conf),
1732     DEFINE_PROP_BIT("autonegotiation", E1000State,
1733                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1734     DEFINE_PROP_BIT("mitigation", E1000State,
1735                     compat_flags, E1000_FLAG_MIT_BIT, true),
1736     DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1737                     compat_flags, E1000_FLAG_MAC_BIT, true),
1738     DEFINE_PROP_BIT("migrate_tso_props", E1000State,
1739                     compat_flags, E1000_FLAG_TSO_BIT, true),
1740     DEFINE_PROP_END_OF_LIST(),
1741 };
1742 
1743 typedef struct E1000Info {
1744     const char *name;
1745     uint16_t   device_id;
1746     uint8_t    revision;
1747     uint16_t   phy_id2;
1748 } E1000Info;
1749 
1750 static void e1000_class_init(ObjectClass *klass, void *data)
1751 {
1752     DeviceClass *dc = DEVICE_CLASS(klass);
1753     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1754     E1000BaseClass *e = E1000_DEVICE_CLASS(klass);
1755     const E1000Info *info = data;
1756 
1757     k->realize = pci_e1000_realize;
1758     k->exit = pci_e1000_uninit;
1759     k->romfile = "efi-e1000.rom";
1760     k->vendor_id = PCI_VENDOR_ID_INTEL;
1761     k->device_id = info->device_id;
1762     k->revision = info->revision;
1763     e->phy_id2 = info->phy_id2;
1764     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1765     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1766     dc->desc = "Intel Gigabit Ethernet";
1767     dc->reset = qdev_e1000_reset;
1768     dc->vmsd = &vmstate_e1000;
1769     device_class_set_props(dc, e1000_properties);
1770 }
1771 
1772 static void e1000_instance_init(Object *obj)
1773 {
1774     E1000State *n = E1000(obj);
1775     device_add_bootindex_property(obj, &n->conf.bootindex,
1776                                   "bootindex", "/ethernet-phy@0",
1777                                   DEVICE(n), NULL);
1778 }
1779 
1780 static const TypeInfo e1000_base_info = {
1781     .name          = TYPE_E1000_BASE,
1782     .parent        = TYPE_PCI_DEVICE,
1783     .instance_size = sizeof(E1000State),
1784     .instance_init = e1000_instance_init,
1785     .class_size    = sizeof(E1000BaseClass),
1786     .abstract      = true,
1787     .interfaces = (InterfaceInfo[]) {
1788         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1789         { },
1790     },
1791 };
1792 
1793 static const E1000Info e1000_devices[] = {
1794     {
1795         .name      = "e1000",
1796         .device_id = E1000_DEV_ID_82540EM,
1797         .revision  = 0x03,
1798         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1799     },
1800     {
1801         .name      = "e1000-82544gc",
1802         .device_id = E1000_DEV_ID_82544GC_COPPER,
1803         .revision  = 0x03,
1804         .phy_id2   = E1000_PHY_ID2_82544x,
1805     },
1806     {
1807         .name      = "e1000-82545em",
1808         .device_id = E1000_DEV_ID_82545EM_COPPER,
1809         .revision  = 0x03,
1810         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1811     },
1812 };
1813 
1814 static void e1000_register_types(void)
1815 {
1816     int i;
1817 
1818     type_register_static(&e1000_base_info);
1819     for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1820         const E1000Info *info = &e1000_devices[i];
1821         TypeInfo type_info = {};
1822 
1823         type_info.name = info->name;
1824         type_info.parent = TYPE_E1000_BASE;
1825         type_info.class_data = (void *)info;
1826         type_info.class_init = e1000_class_init;
1827         type_info.instance_init = e1000_instance_init;
1828 
1829         type_register(&type_info);
1830     }
1831 }
1832 
1833 type_init(e1000_register_types)
1834