xref: /openbmc/qemu/hw/net/e1000.c (revision 59a3a1c0)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "qemu/osdep.h"
29 #include "hw/pci/pci.h"
30 #include "hw/qdev-properties.h"
31 #include "migration/vmstate.h"
32 #include "net/net.h"
33 #include "net/checksum.h"
34 #include "sysemu/sysemu.h"
35 #include "sysemu/dma.h"
36 #include "qemu/iov.h"
37 #include "qemu/module.h"
38 #include "qemu/range.h"
39 
40 #include "e1000x_common.h"
41 #include "trace.h"
42 
43 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
44 
45 /* #define E1000_DEBUG */
46 
47 #ifdef E1000_DEBUG
48 enum {
49     DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
50     DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
51     DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
52     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
53 };
54 #define DBGBIT(x)    (1<<DEBUG_##x)
55 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
56 
57 #define DBGOUT(what, fmt, ...) do { \
58     if (debugflags & DBGBIT(what)) \
59         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
60     } while (0)
61 #else
62 #define DBGOUT(what, fmt, ...) do {} while (0)
63 #endif
64 
65 #define IOPORT_SIZE       0x40
66 #define PNPMMIO_SIZE      0x20000
67 #define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
68 
69 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
70 
71 /*
72  * HW models:
73  *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
74  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
75  *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
76  *  Others never tested
77  */
78 
79 typedef struct E1000State_st {
80     /*< private >*/
81     PCIDevice parent_obj;
82     /*< public >*/
83 
84     NICState *nic;
85     NICConf conf;
86     MemoryRegion mmio;
87     MemoryRegion io;
88 
89     uint32_t mac_reg[0x8000];
90     uint16_t phy_reg[0x20];
91     uint16_t eeprom_data[64];
92 
93     uint32_t rxbuf_size;
94     uint32_t rxbuf_min_shift;
95     struct e1000_tx {
96         unsigned char header[256];
97         unsigned char vlan_header[4];
98         /* Fields vlan and data must not be reordered or separated. */
99         unsigned char vlan[4];
100         unsigned char data[0x10000];
101         uint16_t size;
102         unsigned char vlan_needed;
103         unsigned char sum_needed;
104         bool cptse;
105         e1000x_txd_props props;
106         e1000x_txd_props tso_props;
107         uint16_t tso_frames;
108     } tx;
109 
110     struct {
111         uint32_t val_in;    /* shifted in from guest driver */
112         uint16_t bitnum_in;
113         uint16_t bitnum_out;
114         uint16_t reading;
115         uint32_t old_eecd;
116     } eecd_state;
117 
118     QEMUTimer *autoneg_timer;
119 
120     QEMUTimer *mit_timer;      /* Mitigation timer. */
121     bool mit_timer_on;         /* Mitigation timer is running. */
122     bool mit_irq_level;        /* Tracks interrupt pin level. */
123     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
124 
125     QEMUTimer *flush_queue_timer;
126 
127 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
128 #define E1000_FLAG_AUTONEG_BIT 0
129 #define E1000_FLAG_MIT_BIT 1
130 #define E1000_FLAG_MAC_BIT 2
131 #define E1000_FLAG_TSO_BIT 3
132 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
133 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
134 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
135 #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
136     uint32_t compat_flags;
137     bool received_tx_tso;
138     bool use_tso_for_migration;
139     e1000x_txd_props mig_props;
140 } E1000State;
141 
142 #define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
143 
144 typedef struct E1000BaseClass {
145     PCIDeviceClass parent_class;
146     uint16_t phy_id2;
147 } E1000BaseClass;
148 
149 #define TYPE_E1000_BASE "e1000-base"
150 
151 #define E1000(obj) \
152     OBJECT_CHECK(E1000State, (obj), TYPE_E1000_BASE)
153 
154 #define E1000_DEVICE_CLASS(klass) \
155      OBJECT_CLASS_CHECK(E1000BaseClass, (klass), TYPE_E1000_BASE)
156 #define E1000_DEVICE_GET_CLASS(obj) \
157     OBJECT_GET_CLASS(E1000BaseClass, (obj), TYPE_E1000_BASE)
158 
159 static void
160 e1000_link_up(E1000State *s)
161 {
162     e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg);
163 
164     /* E1000_STATUS_LU is tested by e1000_can_receive() */
165     qemu_flush_queued_packets(qemu_get_queue(s->nic));
166 }
167 
168 static void
169 e1000_autoneg_done(E1000State *s)
170 {
171     e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg);
172 
173     /* E1000_STATUS_LU is tested by e1000_can_receive() */
174     qemu_flush_queued_packets(qemu_get_queue(s->nic));
175 }
176 
177 static bool
178 have_autoneg(E1000State *s)
179 {
180     return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
181 }
182 
183 static void
184 set_phy_ctrl(E1000State *s, int index, uint16_t val)
185 {
186     /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
187     s->phy_reg[PHY_CTRL] = val & ~(0x3f |
188                                    MII_CR_RESET |
189                                    MII_CR_RESTART_AUTO_NEG);
190 
191     /*
192      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
193      * migrate during auto negotiation, after migration the link will be
194      * down.
195      */
196     if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
197         e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
198     }
199 }
200 
201 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
202     [PHY_CTRL] = set_phy_ctrl,
203 };
204 
205 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
206 
207 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
208 static const char phy_regcap[0x20] = {
209     [PHY_STATUS]      = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
210     [PHY_ID1]         = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
211     [PHY_CTRL]        = PHY_RW,    [PHY_1000T_CTRL]             = PHY_RW,
212     [PHY_LP_ABILITY]  = PHY_R,     [PHY_1000T_STATUS]           = PHY_R,
213     [PHY_AUTONEG_ADV] = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
214     [PHY_ID2]         = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
215     [PHY_AUTONEG_EXP] = PHY_R,
216 };
217 
218 /* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
219 static const uint16_t phy_reg_init[] = {
220     [PHY_CTRL]   = MII_CR_SPEED_SELECT_MSB |
221                    MII_CR_FULL_DUPLEX |
222                    MII_CR_AUTO_NEG_EN,
223 
224     [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
225                    MII_SR_LINK_STATUS |   /* link initially up */
226                    MII_SR_AUTONEG_CAPS |
227                    /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
228                    MII_SR_PREAMBLE_SUPPRESS |
229                    MII_SR_EXTENDED_STATUS |
230                    MII_SR_10T_HD_CAPS |
231                    MII_SR_10T_FD_CAPS |
232                    MII_SR_100X_HD_CAPS |
233                    MII_SR_100X_FD_CAPS,
234 
235     [PHY_ID1] = 0x141,
236     /* [PHY_ID2] configured per DevId, from e1000_reset() */
237     [PHY_AUTONEG_ADV] = 0xde1,
238     [PHY_LP_ABILITY] = 0x1e0,
239     [PHY_1000T_CTRL] = 0x0e00,
240     [PHY_1000T_STATUS] = 0x3c00,
241     [M88E1000_PHY_SPEC_CTRL] = 0x360,
242     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
243     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
244 };
245 
246 static const uint32_t mac_reg_init[] = {
247     [PBA]     = 0x00100030,
248     [LEDCTL]  = 0x602,
249     [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
250                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
251     [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
252                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
253                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
254                 E1000_STATUS_LU,
255     [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
256                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
257                 E1000_MANC_RMCP_EN,
258 };
259 
260 /* Helper function, *curr == 0 means the value is not set */
261 static inline void
262 mit_update_delay(uint32_t *curr, uint32_t value)
263 {
264     if (value && (*curr == 0 || value < *curr)) {
265         *curr = value;
266     }
267 }
268 
269 static void
270 set_interrupt_cause(E1000State *s, int index, uint32_t val)
271 {
272     PCIDevice *d = PCI_DEVICE(s);
273     uint32_t pending_ints;
274     uint32_t mit_delay;
275 
276     s->mac_reg[ICR] = val;
277 
278     /*
279      * Make sure ICR and ICS registers have the same value.
280      * The spec says that the ICS register is write-only.  However in practice,
281      * on real hardware ICS is readable, and for reads it has the same value as
282      * ICR (except that ICS does not have the clear on read behaviour of ICR).
283      *
284      * The VxWorks PRO/1000 driver uses this behaviour.
285      */
286     s->mac_reg[ICS] = val;
287 
288     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
289     if (!s->mit_irq_level && pending_ints) {
290         /*
291          * Here we detect a potential raising edge. We postpone raising the
292          * interrupt line if we are inside the mitigation delay window
293          * (s->mit_timer_on == 1).
294          * We provide a partial implementation of interrupt mitigation,
295          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
296          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
297          * RADV; relative timers based on TIDV and RDTR are not implemented.
298          */
299         if (s->mit_timer_on) {
300             return;
301         }
302         if (chkflag(MIT)) {
303             /* Compute the next mitigation delay according to pending
304              * interrupts and the current values of RADV (provided
305              * RDTR!=0), TADV and ITR.
306              * Then rearm the timer.
307              */
308             mit_delay = 0;
309             if (s->mit_ide &&
310                     (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
311                 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
312             }
313             if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
314                 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
315             }
316             mit_update_delay(&mit_delay, s->mac_reg[ITR]);
317 
318             /*
319              * According to e1000 SPEC, the Ethernet controller guarantees
320              * a maximum observable interrupt rate of 7813 interrupts/sec.
321              * Thus if mit_delay < 500 then the delay should be set to the
322              * minimum delay possible which is 500.
323              */
324             mit_delay = (mit_delay < 500) ? 500 : mit_delay;
325 
326             s->mit_timer_on = 1;
327             timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
328                       mit_delay * 256);
329             s->mit_ide = 0;
330         }
331     }
332 
333     s->mit_irq_level = (pending_ints != 0);
334     pci_set_irq(d, s->mit_irq_level);
335 }
336 
337 static void
338 e1000_mit_timer(void *opaque)
339 {
340     E1000State *s = opaque;
341 
342     s->mit_timer_on = 0;
343     /* Call set_interrupt_cause to update the irq level (if necessary). */
344     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
345 }
346 
347 static void
348 set_ics(E1000State *s, int index, uint32_t val)
349 {
350     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
351         s->mac_reg[IMS]);
352     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
353 }
354 
355 static void
356 e1000_autoneg_timer(void *opaque)
357 {
358     E1000State *s = opaque;
359     if (!qemu_get_queue(s->nic)->link_down) {
360         e1000_autoneg_done(s);
361         set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
362     }
363 }
364 
365 static void e1000_reset(void *opaque)
366 {
367     E1000State *d = opaque;
368     E1000BaseClass *edc = E1000_DEVICE_GET_CLASS(d);
369     uint8_t *macaddr = d->conf.macaddr.a;
370 
371     timer_del(d->autoneg_timer);
372     timer_del(d->mit_timer);
373     timer_del(d->flush_queue_timer);
374     d->mit_timer_on = 0;
375     d->mit_irq_level = 0;
376     d->mit_ide = 0;
377     memset(d->phy_reg, 0, sizeof d->phy_reg);
378     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
379     d->phy_reg[PHY_ID2] = edc->phy_id2;
380     memset(d->mac_reg, 0, sizeof d->mac_reg);
381     memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
382     d->rxbuf_min_shift = 1;
383     memset(&d->tx, 0, sizeof d->tx);
384 
385     if (qemu_get_queue(d->nic)->link_down) {
386         e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg);
387     }
388 
389     e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
390 }
391 
392 static void
393 set_ctrl(E1000State *s, int index, uint32_t val)
394 {
395     /* RST is self clearing */
396     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
397 }
398 
399 static void
400 e1000_flush_queue_timer(void *opaque)
401 {
402     E1000State *s = opaque;
403 
404     qemu_flush_queued_packets(qemu_get_queue(s->nic));
405 }
406 
407 static void
408 set_rx_control(E1000State *s, int index, uint32_t val)
409 {
410     s->mac_reg[RCTL] = val;
411     s->rxbuf_size = e1000x_rxbufsize(val);
412     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
413     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
414            s->mac_reg[RCTL]);
415     timer_mod(s->flush_queue_timer,
416               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
417 }
418 
419 static void
420 set_mdic(E1000State *s, int index, uint32_t val)
421 {
422     uint32_t data = val & E1000_MDIC_DATA_MASK;
423     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
424 
425     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
426         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
427     else if (val & E1000_MDIC_OP_READ) {
428         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
429         if (!(phy_regcap[addr] & PHY_R)) {
430             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
431             val |= E1000_MDIC_ERROR;
432         } else
433             val = (val ^ data) | s->phy_reg[addr];
434     } else if (val & E1000_MDIC_OP_WRITE) {
435         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
436         if (!(phy_regcap[addr] & PHY_W)) {
437             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
438             val |= E1000_MDIC_ERROR;
439         } else {
440             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
441                 phyreg_writeops[addr](s, index, data);
442             } else {
443                 s->phy_reg[addr] = data;
444             }
445         }
446     }
447     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
448 
449     if (val & E1000_MDIC_INT_EN) {
450         set_ics(s, 0, E1000_ICR_MDAC);
451     }
452 }
453 
454 static uint32_t
455 get_eecd(E1000State *s, int index)
456 {
457     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
458 
459     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
460            s->eecd_state.bitnum_out, s->eecd_state.reading);
461     if (!s->eecd_state.reading ||
462         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
463           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
464         ret |= E1000_EECD_DO;
465     return ret;
466 }
467 
468 static void
469 set_eecd(E1000State *s, int index, uint32_t val)
470 {
471     uint32_t oldval = s->eecd_state.old_eecd;
472 
473     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
474             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
475     if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
476         return;
477     }
478     if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
479         s->eecd_state.val_in = 0;
480         s->eecd_state.bitnum_in = 0;
481         s->eecd_state.bitnum_out = 0;
482         s->eecd_state.reading = 0;
483     }
484     if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
485         return;
486     }
487     if (!(E1000_EECD_SK & val)) {               /* falling edge */
488         s->eecd_state.bitnum_out++;
489         return;
490     }
491     s->eecd_state.val_in <<= 1;
492     if (val & E1000_EECD_DI)
493         s->eecd_state.val_in |= 1;
494     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
495         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
496         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
497             EEPROM_READ_OPCODE_MICROWIRE);
498     }
499     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
500            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
501            s->eecd_state.reading);
502 }
503 
504 static uint32_t
505 flash_eerd_read(E1000State *s, int x)
506 {
507     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
508 
509     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
510         return (s->mac_reg[EERD]);
511 
512     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
513         return (E1000_EEPROM_RW_REG_DONE | r);
514 
515     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
516            E1000_EEPROM_RW_REG_DONE | r);
517 }
518 
519 static void
520 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
521 {
522     uint32_t sum;
523 
524     if (cse && cse < n)
525         n = cse + 1;
526     if (sloc < n-1) {
527         sum = net_checksum_add(n-css, data+css);
528         stw_be_p(data + sloc, net_checksum_finish_nozero(sum));
529     }
530 }
531 
532 static inline void
533 inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
534 {
535     if (!memcmp(arr, bcast, sizeof bcast)) {
536         e1000x_inc_reg_if_not_full(s->mac_reg, BPTC);
537     } else if (arr[0] & 1) {
538         e1000x_inc_reg_if_not_full(s->mac_reg, MPTC);
539     }
540 }
541 
542 static void
543 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
544 {
545     static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
546                                     PTC1023, PTC1522 };
547 
548     NetClientState *nc = qemu_get_queue(s->nic);
549     if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
550         nc->info->receive(nc, buf, size);
551     } else {
552         qemu_send_packet(nc, buf, size);
553     }
554     inc_tx_bcast_or_mcast_count(s, buf);
555     e1000x_increase_size_stats(s->mac_reg, PTCregs, size);
556 }
557 
558 static void
559 xmit_seg(E1000State *s)
560 {
561     uint16_t len;
562     unsigned int frames = s->tx.tso_frames, css, sofar;
563     struct e1000_tx *tp = &s->tx;
564     struct e1000x_txd_props *props = tp->cptse ? &tp->tso_props : &tp->props;
565 
566     if (tp->cptse) {
567         css = props->ipcss;
568         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
569                frames, tp->size, css);
570         if (props->ip) {    /* IPv4 */
571             stw_be_p(tp->data+css+2, tp->size - css);
572             stw_be_p(tp->data+css+4,
573                      lduw_be_p(tp->data + css + 4) + frames);
574         } else {         /* IPv6 */
575             stw_be_p(tp->data+css+4, tp->size - css);
576         }
577         css = props->tucss;
578         len = tp->size - css;
579         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", props->tcp, css, len);
580         if (props->tcp) {
581             sofar = frames * props->mss;
582             stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
583             if (props->paylen - sofar > props->mss) {
584                 tp->data[css + 13] &= ~9;    /* PSH, FIN */
585             } else if (frames) {
586                 e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
587             }
588         } else {    /* UDP */
589             stw_be_p(tp->data+css+4, len);
590         }
591         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
592             unsigned int phsum;
593             // add pseudo-header length before checksum calculation
594             void *sp = tp->data + props->tucso;
595 
596             phsum = lduw_be_p(sp) + len;
597             phsum = (phsum >> 16) + (phsum & 0xffff);
598             stw_be_p(sp, phsum);
599         }
600         tp->tso_frames++;
601     }
602 
603     if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
604         putsum(tp->data, tp->size, props->tucso, props->tucss, props->tucse);
605     }
606     if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
607         putsum(tp->data, tp->size, props->ipcso, props->ipcss, props->ipcse);
608     }
609     if (tp->vlan_needed) {
610         memmove(tp->vlan, tp->data, 4);
611         memmove(tp->data, tp->data + 4, 8);
612         memcpy(tp->data + 8, tp->vlan_header, 4);
613         e1000_send_packet(s, tp->vlan, tp->size + 4);
614     } else {
615         e1000_send_packet(s, tp->data, tp->size);
616     }
617 
618     e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
619     e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size);
620     s->mac_reg[GPTC] = s->mac_reg[TPT];
621     s->mac_reg[GOTCL] = s->mac_reg[TOTL];
622     s->mac_reg[GOTCH] = s->mac_reg[TOTH];
623 }
624 
625 static void
626 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
627 {
628     PCIDevice *d = PCI_DEVICE(s);
629     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
630     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
631     unsigned int split_size = txd_lower & 0xffff, bytes, sz;
632     unsigned int msh = 0xfffff;
633     uint64_t addr;
634     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
635     struct e1000_tx *tp = &s->tx;
636 
637     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
638     if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
639         if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
640             e1000x_read_tx_ctx_descr(xp, &tp->tso_props);
641             s->use_tso_for_migration = 1;
642             tp->tso_frames = 0;
643         } else {
644             e1000x_read_tx_ctx_descr(xp, &tp->props);
645             s->use_tso_for_migration = 0;
646         }
647         return;
648     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
649         // data descriptor
650         if (tp->size == 0) {
651             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
652         }
653         tp->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
654     } else {
655         // legacy descriptor
656         tp->cptse = 0;
657     }
658 
659     if (e1000x_vlan_enabled(s->mac_reg) &&
660         e1000x_is_vlan_txd(txd_lower) &&
661         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
662         tp->vlan_needed = 1;
663         stw_be_p(tp->vlan_header,
664                       le16_to_cpu(s->mac_reg[VET]));
665         stw_be_p(tp->vlan_header + 2,
666                       le16_to_cpu(dp->upper.fields.special));
667     }
668 
669     addr = le64_to_cpu(dp->buffer_addr);
670     if (tp->cptse) {
671         msh = tp->tso_props.hdr_len + tp->tso_props.mss;
672         do {
673             bytes = split_size;
674             if (tp->size + bytes > msh)
675                 bytes = msh - tp->size;
676 
677             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
678             pci_dma_read(d, addr, tp->data + tp->size, bytes);
679             sz = tp->size + bytes;
680             if (sz >= tp->tso_props.hdr_len
681                 && tp->size < tp->tso_props.hdr_len) {
682                 memmove(tp->header, tp->data, tp->tso_props.hdr_len);
683             }
684             tp->size = sz;
685             addr += bytes;
686             if (sz == msh) {
687                 xmit_seg(s);
688                 memmove(tp->data, tp->header, tp->tso_props.hdr_len);
689                 tp->size = tp->tso_props.hdr_len;
690             }
691             split_size -= bytes;
692         } while (bytes && split_size);
693     } else {
694         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
695         pci_dma_read(d, addr, tp->data + tp->size, split_size);
696         tp->size += split_size;
697     }
698 
699     if (!(txd_lower & E1000_TXD_CMD_EOP))
700         return;
701     if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) {
702         xmit_seg(s);
703     }
704     tp->tso_frames = 0;
705     tp->sum_needed = 0;
706     tp->vlan_needed = 0;
707     tp->size = 0;
708     tp->cptse = 0;
709 }
710 
711 static uint32_t
712 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
713 {
714     PCIDevice *d = PCI_DEVICE(s);
715     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
716 
717     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
718         return 0;
719     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
720                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
721     dp->upper.data = cpu_to_le32(txd_upper);
722     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
723                   &dp->upper, sizeof(dp->upper));
724     return E1000_ICR_TXDW;
725 }
726 
727 static uint64_t tx_desc_base(E1000State *s)
728 {
729     uint64_t bah = s->mac_reg[TDBAH];
730     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
731 
732     return (bah << 32) + bal;
733 }
734 
735 static void
736 start_xmit(E1000State *s)
737 {
738     PCIDevice *d = PCI_DEVICE(s);
739     dma_addr_t base;
740     struct e1000_tx_desc desc;
741     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
742 
743     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
744         DBGOUT(TX, "tx disabled\n");
745         return;
746     }
747 
748     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
749         base = tx_desc_base(s) +
750                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
751         pci_dma_read(d, base, &desc, sizeof(desc));
752 
753         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
754                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
755                desc.upper.data);
756 
757         process_tx_desc(s, &desc);
758         cause |= txdesc_writeback(s, base, &desc);
759 
760         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
761             s->mac_reg[TDH] = 0;
762         /*
763          * the following could happen only if guest sw assigns
764          * bogus values to TDT/TDLEN.
765          * there's nothing too intelligent we could do about this.
766          */
767         if (s->mac_reg[TDH] == tdh_start ||
768             tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
769             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
770                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
771             break;
772         }
773     }
774     set_ics(s, 0, cause);
775 }
776 
777 static int
778 receive_filter(E1000State *s, const uint8_t *buf, int size)
779 {
780     uint32_t rctl = s->mac_reg[RCTL];
781     int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1);
782 
783     if (e1000x_is_vlan_packet(buf, le16_to_cpu(s->mac_reg[VET])) &&
784         e1000x_vlan_rx_filter_enabled(s->mac_reg)) {
785         uint16_t vid = lduw_be_p(buf + 14);
786         uint32_t vfta = ldl_le_p((uint32_t*)(s->mac_reg + VFTA) +
787                                  ((vid >> 5) & 0x7f));
788         if ((vfta & (1 << (vid & 0x1f))) == 0)
789             return 0;
790     }
791 
792     if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
793         return 1;
794     }
795 
796     if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
797         e1000x_inc_reg_if_not_full(s->mac_reg, MPRC);
798         return 1;
799     }
800 
801     if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
802         e1000x_inc_reg_if_not_full(s->mac_reg, BPRC);
803         return 1;
804     }
805 
806     return e1000x_rx_group_filter(s->mac_reg, buf);
807 }
808 
809 static void
810 e1000_set_link_status(NetClientState *nc)
811 {
812     E1000State *s = qemu_get_nic_opaque(nc);
813     uint32_t old_status = s->mac_reg[STATUS];
814 
815     if (nc->link_down) {
816         e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg);
817     } else {
818         if (have_autoneg(s) &&
819             !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
820             e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
821         } else {
822             e1000_link_up(s);
823         }
824     }
825 
826     if (s->mac_reg[STATUS] != old_status)
827         set_ics(s, 0, E1000_ICR_LSC);
828 }
829 
830 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
831 {
832     int bufs;
833     /* Fast-path short packets */
834     if (total_size <= s->rxbuf_size) {
835         return s->mac_reg[RDH] != s->mac_reg[RDT];
836     }
837     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
838         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
839     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
840         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
841             s->mac_reg[RDT] - s->mac_reg[RDH];
842     } else {
843         return false;
844     }
845     return total_size <= bufs * s->rxbuf_size;
846 }
847 
848 static int
849 e1000_can_receive(NetClientState *nc)
850 {
851     E1000State *s = qemu_get_nic_opaque(nc);
852 
853     return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
854         e1000_has_rxbufs(s, 1) && !timer_pending(s->flush_queue_timer);
855 }
856 
857 static uint64_t rx_desc_base(E1000State *s)
858 {
859     uint64_t bah = s->mac_reg[RDBAH];
860     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
861 
862     return (bah << 32) + bal;
863 }
864 
865 static void
866 e1000_receiver_overrun(E1000State *s, size_t size)
867 {
868     trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
869     e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
870     e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
871     set_ics(s, 0, E1000_ICS_RXO);
872 }
873 
874 static ssize_t
875 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
876 {
877     E1000State *s = qemu_get_nic_opaque(nc);
878     PCIDevice *d = PCI_DEVICE(s);
879     struct e1000_rx_desc desc;
880     dma_addr_t base;
881     unsigned int n, rdt;
882     uint32_t rdh_start;
883     uint16_t vlan_special = 0;
884     uint8_t vlan_status = 0;
885     uint8_t min_buf[MIN_BUF_SIZE];
886     struct iovec min_iov;
887     uint8_t *filter_buf = iov->iov_base;
888     size_t size = iov_size(iov, iovcnt);
889     size_t iov_ofs = 0;
890     size_t desc_offset;
891     size_t desc_size;
892     size_t total_size;
893 
894     if (!e1000x_hw_rx_enabled(s->mac_reg)) {
895         return -1;
896     }
897 
898     if (timer_pending(s->flush_queue_timer)) {
899         return 0;
900     }
901 
902     /* Pad to minimum Ethernet frame length */
903     if (size < sizeof(min_buf)) {
904         iov_to_buf(iov, iovcnt, 0, min_buf, size);
905         memset(&min_buf[size], 0, sizeof(min_buf) - size);
906         min_iov.iov_base = filter_buf = min_buf;
907         min_iov.iov_len = size = sizeof(min_buf);
908         iovcnt = 1;
909         iov = &min_iov;
910     } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
911         /* This is very unlikely, but may happen. */
912         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
913         filter_buf = min_buf;
914     }
915 
916     /* Discard oversized packets if !LPE and !SBP. */
917     if (e1000x_is_oversized(s->mac_reg, size)) {
918         return size;
919     }
920 
921     if (!receive_filter(s, filter_buf, size)) {
922         return size;
923     }
924 
925     if (e1000x_vlan_enabled(s->mac_reg) &&
926         e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) {
927         vlan_special = cpu_to_le16(lduw_be_p(filter_buf + 14));
928         iov_ofs = 4;
929         if (filter_buf == iov->iov_base) {
930             memmove(filter_buf + 4, filter_buf, 12);
931         } else {
932             iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
933             while (iov->iov_len <= iov_ofs) {
934                 iov_ofs -= iov->iov_len;
935                 iov++;
936             }
937         }
938         vlan_status = E1000_RXD_STAT_VP;
939         size -= 4;
940     }
941 
942     rdh_start = s->mac_reg[RDH];
943     desc_offset = 0;
944     total_size = size + e1000x_fcs_len(s->mac_reg);
945     if (!e1000_has_rxbufs(s, total_size)) {
946         e1000_receiver_overrun(s, total_size);
947         return -1;
948     }
949     do {
950         desc_size = total_size - desc_offset;
951         if (desc_size > s->rxbuf_size) {
952             desc_size = s->rxbuf_size;
953         }
954         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
955         pci_dma_read(d, base, &desc, sizeof(desc));
956         desc.special = vlan_special;
957         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
958         if (desc.buffer_addr) {
959             if (desc_offset < size) {
960                 size_t iov_copy;
961                 hwaddr ba = le64_to_cpu(desc.buffer_addr);
962                 size_t copy_size = size - desc_offset;
963                 if (copy_size > s->rxbuf_size) {
964                     copy_size = s->rxbuf_size;
965                 }
966                 do {
967                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
968                     pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
969                     copy_size -= iov_copy;
970                     ba += iov_copy;
971                     iov_ofs += iov_copy;
972                     if (iov_ofs == iov->iov_len) {
973                         iov++;
974                         iov_ofs = 0;
975                     }
976                 } while (copy_size);
977             }
978             desc_offset += desc_size;
979             desc.length = cpu_to_le16(desc_size);
980             if (desc_offset >= total_size) {
981                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
982             } else {
983                 /* Guest zeroing out status is not a hardware requirement.
984                    Clear EOP in case guest didn't do it. */
985                 desc.status &= ~E1000_RXD_STAT_EOP;
986             }
987         } else { // as per intel docs; skip descriptors with null buf addr
988             DBGOUT(RX, "Null RX descriptor!!\n");
989         }
990         pci_dma_write(d, base, &desc, sizeof(desc));
991 
992         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
993             s->mac_reg[RDH] = 0;
994         /* see comment in start_xmit; same here */
995         if (s->mac_reg[RDH] == rdh_start ||
996             rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
997             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
998                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
999             e1000_receiver_overrun(s, total_size);
1000             return -1;
1001         }
1002     } while (desc_offset < total_size);
1003 
1004     e1000x_update_rx_total_stats(s->mac_reg, size, total_size);
1005 
1006     n = E1000_ICS_RXT0;
1007     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1008         rdt += s->mac_reg[RDLEN] / sizeof(desc);
1009     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1010         s->rxbuf_min_shift)
1011         n |= E1000_ICS_RXDMT0;
1012 
1013     set_ics(s, 0, n);
1014 
1015     return size;
1016 }
1017 
1018 static ssize_t
1019 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1020 {
1021     const struct iovec iov = {
1022         .iov_base = (uint8_t *)buf,
1023         .iov_len = size
1024     };
1025 
1026     return e1000_receive_iov(nc, &iov, 1);
1027 }
1028 
1029 static uint32_t
1030 mac_readreg(E1000State *s, int index)
1031 {
1032     return s->mac_reg[index];
1033 }
1034 
1035 static uint32_t
1036 mac_low4_read(E1000State *s, int index)
1037 {
1038     return s->mac_reg[index] & 0xf;
1039 }
1040 
1041 static uint32_t
1042 mac_low11_read(E1000State *s, int index)
1043 {
1044     return s->mac_reg[index] & 0x7ff;
1045 }
1046 
1047 static uint32_t
1048 mac_low13_read(E1000State *s, int index)
1049 {
1050     return s->mac_reg[index] & 0x1fff;
1051 }
1052 
1053 static uint32_t
1054 mac_low16_read(E1000State *s, int index)
1055 {
1056     return s->mac_reg[index] & 0xffff;
1057 }
1058 
1059 static uint32_t
1060 mac_icr_read(E1000State *s, int index)
1061 {
1062     uint32_t ret = s->mac_reg[ICR];
1063 
1064     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1065     set_interrupt_cause(s, 0, 0);
1066     return ret;
1067 }
1068 
1069 static uint32_t
1070 mac_read_clr4(E1000State *s, int index)
1071 {
1072     uint32_t ret = s->mac_reg[index];
1073 
1074     s->mac_reg[index] = 0;
1075     return ret;
1076 }
1077 
1078 static uint32_t
1079 mac_read_clr8(E1000State *s, int index)
1080 {
1081     uint32_t ret = s->mac_reg[index];
1082 
1083     s->mac_reg[index] = 0;
1084     s->mac_reg[index-1] = 0;
1085     return ret;
1086 }
1087 
1088 static void
1089 mac_writereg(E1000State *s, int index, uint32_t val)
1090 {
1091     uint32_t macaddr[2];
1092 
1093     s->mac_reg[index] = val;
1094 
1095     if (index == RA + 1) {
1096         macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1097         macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1098         qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1099     }
1100 }
1101 
1102 static void
1103 set_rdt(E1000State *s, int index, uint32_t val)
1104 {
1105     s->mac_reg[index] = val & 0xffff;
1106     if (e1000_has_rxbufs(s, 1)) {
1107         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1108     }
1109 }
1110 
1111 static void
1112 set_16bit(E1000State *s, int index, uint32_t val)
1113 {
1114     s->mac_reg[index] = val & 0xffff;
1115 }
1116 
1117 static void
1118 set_dlen(E1000State *s, int index, uint32_t val)
1119 {
1120     s->mac_reg[index] = val & 0xfff80;
1121 }
1122 
1123 static void
1124 set_tctl(E1000State *s, int index, uint32_t val)
1125 {
1126     s->mac_reg[index] = val;
1127     s->mac_reg[TDT] &= 0xffff;
1128     start_xmit(s);
1129 }
1130 
1131 static void
1132 set_icr(E1000State *s, int index, uint32_t val)
1133 {
1134     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1135     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1136 }
1137 
1138 static void
1139 set_imc(E1000State *s, int index, uint32_t val)
1140 {
1141     s->mac_reg[IMS] &= ~val;
1142     set_ics(s, 0, 0);
1143 }
1144 
1145 static void
1146 set_ims(E1000State *s, int index, uint32_t val)
1147 {
1148     s->mac_reg[IMS] |= val;
1149     set_ics(s, 0, 0);
1150 }
1151 
1152 #define getreg(x)    [x] = mac_readreg
1153 static uint32_t (*macreg_readops[])(E1000State *, int) = {
1154     getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1155     getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1156     getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1157     getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1158     getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1159     getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1160     getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1161     getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1162     getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1163     getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1164     getreg(TNCRS),    getreg(SEQEC),    getreg(CEXTERR),  getreg(RLEC),
1165     getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1166     getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1167     getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1168     getreg(GOTCL),
1169 
1170     [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1171     [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1172     [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1173     [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1174     [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1175     [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1176     [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1177     [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1178     [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1179     [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1180     [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1181     [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1182     [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1183     [MPTC]    = mac_read_clr4,
1184     [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1185     [EERD]    = flash_eerd_read,
1186     [RDFH]    = mac_low13_read,     [RDFT]    = mac_low13_read,
1187     [RDFHS]   = mac_low13_read,     [RDFTS]   = mac_low13_read,
1188     [RDFPC]   = mac_low13_read,
1189     [TDFH]    = mac_low11_read,     [TDFT]    = mac_low11_read,
1190     [TDFHS]   = mac_low13_read,     [TDFTS]   = mac_low13_read,
1191     [TDFPC]   = mac_low13_read,
1192     [AIT]     = mac_low16_read,
1193 
1194     [CRCERRS ... MPC]   = &mac_readreg,
1195     [IP6AT ... IP6AT+3] = &mac_readreg,    [IP4AT ... IP4AT+6] = &mac_readreg,
1196     [FFLT ... FFLT+6]   = &mac_low11_read,
1197     [RA ... RA+31]      = &mac_readreg,
1198     [WUPM ... WUPM+31]  = &mac_readreg,
1199     [MTA ... MTA+127]   = &mac_readreg,
1200     [VFTA ... VFTA+127] = &mac_readreg,
1201     [FFMT ... FFMT+254] = &mac_low4_read,
1202     [FFVT ... FFVT+254] = &mac_readreg,
1203     [PBM ... PBM+16383] = &mac_readreg,
1204 };
1205 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1206 
1207 #define putreg(x)    [x] = mac_writereg
1208 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1209     putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1210     putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1211     putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1212     putreg(TDFH),     putreg(TDFT),     putreg(TDFHS),    putreg(TDFTS),
1213     putreg(TDFPC),    putreg(RDFH),     putreg(RDFT),     putreg(RDFHS),
1214     putreg(RDFTS),    putreg(RDFPC),    putreg(IPAV),     putreg(WUC),
1215     putreg(WUS),      putreg(AIT),
1216 
1217     [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL] = set_tctl,
1218     [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]  = set_ics,
1219     [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]  = set_rdt,
1220     [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]  = set_icr,
1221     [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL] = set_ctrl,
1222     [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV] = set_16bit,
1223     [ITR]    = set_16bit,
1224 
1225     [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg,
1226     [FFLT ... FFLT+6]   = &mac_writereg,
1227     [RA ... RA+31]      = &mac_writereg,
1228     [WUPM ... WUPM+31]  = &mac_writereg,
1229     [MTA ... MTA+127]   = &mac_writereg,
1230     [VFTA ... VFTA+127] = &mac_writereg,
1231     [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg,
1232     [PBM ... PBM+16383] = &mac_writereg,
1233 };
1234 
1235 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1236 
1237 enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1238 
1239 #define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1240 /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1241  * f - flag bits (up to 6 possible flags)
1242  * n - flag needed
1243  * p - partially implenented */
1244 static const uint8_t mac_reg_access[0x8000] = {
1245     [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1246     [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1247 
1248     [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1249     [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1250     [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1251     [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1252     [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1253     [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1254     [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1255     [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1256     [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1257     [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1258     [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1259     [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1260     [SEQEC]   = markflag(MAC),    [CEXTERR] = markflag(MAC),
1261     [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1262     [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1263     [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1264     [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1265     [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1266     [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1267     [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1268     [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1269     [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1270     [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1271     [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1272     [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1273     [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1274     [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1275     [BPTC]    = markflag(MAC),
1276 
1277     [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1278     [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1279     [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1280     [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1281     [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1282     [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1283     [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1284     [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1285     [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1286     [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1287     [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1288 };
1289 
1290 static void
1291 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1292                  unsigned size)
1293 {
1294     E1000State *s = opaque;
1295     unsigned int index = (addr & 0x1ffff) >> 2;
1296 
1297     if (index < NWRITEOPS && macreg_writeops[index]) {
1298         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1299             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1300             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1301                 DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1302                        "It is not fully implemented.\n", index<<2);
1303             }
1304             macreg_writeops[index](s, index, val);
1305         } else {    /* "flag needed" bit is set, but the flag is not active */
1306             DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1307                    index<<2);
1308         }
1309     } else if (index < NREADOPS && macreg_readops[index]) {
1310         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1311                index<<2, val);
1312     } else {
1313         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1314                index<<2, val);
1315     }
1316 }
1317 
1318 static uint64_t
1319 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1320 {
1321     E1000State *s = opaque;
1322     unsigned int index = (addr & 0x1ffff) >> 2;
1323 
1324     if (index < NREADOPS && macreg_readops[index]) {
1325         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1326             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1327             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1328                 DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1329                        "It is not fully implemented.\n", index<<2);
1330             }
1331             return macreg_readops[index](s, index);
1332         } else {    /* "flag needed" bit is set, but the flag is not active */
1333             DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1334                    index<<2);
1335         }
1336     } else {
1337         DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1338     }
1339     return 0;
1340 }
1341 
1342 static const MemoryRegionOps e1000_mmio_ops = {
1343     .read = e1000_mmio_read,
1344     .write = e1000_mmio_write,
1345     .endianness = DEVICE_LITTLE_ENDIAN,
1346     .impl = {
1347         .min_access_size = 4,
1348         .max_access_size = 4,
1349     },
1350 };
1351 
1352 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1353                               unsigned size)
1354 {
1355     E1000State *s = opaque;
1356 
1357     (void)s;
1358     return 0;
1359 }
1360 
1361 static void e1000_io_write(void *opaque, hwaddr addr,
1362                            uint64_t val, unsigned size)
1363 {
1364     E1000State *s = opaque;
1365 
1366     (void)s;
1367 }
1368 
1369 static const MemoryRegionOps e1000_io_ops = {
1370     .read = e1000_io_read,
1371     .write = e1000_io_write,
1372     .endianness = DEVICE_LITTLE_ENDIAN,
1373 };
1374 
1375 static bool is_version_1(void *opaque, int version_id)
1376 {
1377     return version_id == 1;
1378 }
1379 
1380 static int e1000_pre_save(void *opaque)
1381 {
1382     E1000State *s = opaque;
1383     NetClientState *nc = qemu_get_queue(s->nic);
1384 
1385     /*
1386      * If link is down and auto-negotiation is supported and ongoing,
1387      * complete auto-negotiation immediately. This allows us to look
1388      * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1389      */
1390     if (nc->link_down && have_autoneg(s)) {
1391         s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1392     }
1393 
1394     /* Decide which set of props to migrate in the main structure */
1395     if (chkflag(TSO) || !s->use_tso_for_migration) {
1396         /* Either we're migrating with the extra subsection, in which
1397          * case the mig_props is always 'props' OR
1398          * we've not got the subsection, but 'props' was the last
1399          * updated.
1400          */
1401         s->mig_props = s->tx.props;
1402     } else {
1403         /* We're not using the subsection, and 'tso_props' was
1404          * the last updated.
1405          */
1406         s->mig_props = s->tx.tso_props;
1407     }
1408     return 0;
1409 }
1410 
1411 static int e1000_post_load(void *opaque, int version_id)
1412 {
1413     E1000State *s = opaque;
1414     NetClientState *nc = qemu_get_queue(s->nic);
1415 
1416     if (!chkflag(MIT)) {
1417         s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1418             s->mac_reg[TADV] = 0;
1419         s->mit_irq_level = false;
1420     }
1421     s->mit_ide = 0;
1422     s->mit_timer_on = true;
1423     timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1);
1424 
1425     /* nc.link_down can't be migrated, so infer link_down according
1426      * to link status bit in mac_reg[STATUS].
1427      * Alternatively, restart link negotiation if it was in progress. */
1428     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1429 
1430     if (have_autoneg(s) &&
1431         !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1432         nc->link_down = false;
1433         timer_mod(s->autoneg_timer,
1434                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1435     }
1436 
1437     s->tx.props = s->mig_props;
1438     if (!s->received_tx_tso) {
1439         /* We received only one set of offload data (tx.props)
1440          * and haven't got tx.tso_props.  The best we can do
1441          * is dupe the data.
1442          */
1443         s->tx.tso_props = s->mig_props;
1444     }
1445     return 0;
1446 }
1447 
1448 static int e1000_tx_tso_post_load(void *opaque, int version_id)
1449 {
1450     E1000State *s = opaque;
1451     s->received_tx_tso = true;
1452     return 0;
1453 }
1454 
1455 static bool e1000_mit_state_needed(void *opaque)
1456 {
1457     E1000State *s = opaque;
1458 
1459     return chkflag(MIT);
1460 }
1461 
1462 static bool e1000_full_mac_needed(void *opaque)
1463 {
1464     E1000State *s = opaque;
1465 
1466     return chkflag(MAC);
1467 }
1468 
1469 static bool e1000_tso_state_needed(void *opaque)
1470 {
1471     E1000State *s = opaque;
1472 
1473     return chkflag(TSO);
1474 }
1475 
1476 static const VMStateDescription vmstate_e1000_mit_state = {
1477     .name = "e1000/mit_state",
1478     .version_id = 1,
1479     .minimum_version_id = 1,
1480     .needed = e1000_mit_state_needed,
1481     .fields = (VMStateField[]) {
1482         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1483         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1484         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1485         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1486         VMSTATE_BOOL(mit_irq_level, E1000State),
1487         VMSTATE_END_OF_LIST()
1488     }
1489 };
1490 
1491 static const VMStateDescription vmstate_e1000_full_mac_state = {
1492     .name = "e1000/full_mac_state",
1493     .version_id = 1,
1494     .minimum_version_id = 1,
1495     .needed = e1000_full_mac_needed,
1496     .fields = (VMStateField[]) {
1497         VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1498         VMSTATE_END_OF_LIST()
1499     }
1500 };
1501 
1502 static const VMStateDescription vmstate_e1000_tx_tso_state = {
1503     .name = "e1000/tx_tso_state",
1504     .version_id = 1,
1505     .minimum_version_id = 1,
1506     .needed = e1000_tso_state_needed,
1507     .post_load = e1000_tx_tso_post_load,
1508     .fields = (VMStateField[]) {
1509         VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
1510         VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
1511         VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
1512         VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
1513         VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
1514         VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
1515         VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
1516         VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
1517         VMSTATE_UINT16(tx.tso_props.mss, E1000State),
1518         VMSTATE_INT8(tx.tso_props.ip, E1000State),
1519         VMSTATE_INT8(tx.tso_props.tcp, E1000State),
1520         VMSTATE_END_OF_LIST()
1521     }
1522 };
1523 
1524 static const VMStateDescription vmstate_e1000 = {
1525     .name = "e1000",
1526     .version_id = 2,
1527     .minimum_version_id = 1,
1528     .pre_save = e1000_pre_save,
1529     .post_load = e1000_post_load,
1530     .fields = (VMStateField[]) {
1531         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1532         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1533         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1534         VMSTATE_UINT32(rxbuf_size, E1000State),
1535         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1536         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1537         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1538         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1539         VMSTATE_UINT16(eecd_state.reading, E1000State),
1540         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1541         VMSTATE_UINT8(mig_props.ipcss, E1000State),
1542         VMSTATE_UINT8(mig_props.ipcso, E1000State),
1543         VMSTATE_UINT16(mig_props.ipcse, E1000State),
1544         VMSTATE_UINT8(mig_props.tucss, E1000State),
1545         VMSTATE_UINT8(mig_props.tucso, E1000State),
1546         VMSTATE_UINT16(mig_props.tucse, E1000State),
1547         VMSTATE_UINT32(mig_props.paylen, E1000State),
1548         VMSTATE_UINT8(mig_props.hdr_len, E1000State),
1549         VMSTATE_UINT16(mig_props.mss, E1000State),
1550         VMSTATE_UINT16(tx.size, E1000State),
1551         VMSTATE_UINT16(tx.tso_frames, E1000State),
1552         VMSTATE_UINT8(tx.sum_needed, E1000State),
1553         VMSTATE_INT8(mig_props.ip, E1000State),
1554         VMSTATE_INT8(mig_props.tcp, E1000State),
1555         VMSTATE_BUFFER(tx.header, E1000State),
1556         VMSTATE_BUFFER(tx.data, E1000State),
1557         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1558         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1559         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1560         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1561         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1562         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1563         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1564         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1565         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1566         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1567         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1568         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1569         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1570         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1571         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1572         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1573         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1574         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1575         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1576         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1577         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1578         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1579         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1580         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1581         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1582         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1583         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1584         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1585         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1586         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1587         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1588         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1589         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1590         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1591         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1592         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1593         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1594         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1595         VMSTATE_UINT32(mac_reg[VET], E1000State),
1596         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1597         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1598         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1599         VMSTATE_END_OF_LIST()
1600     },
1601     .subsections = (const VMStateDescription*[]) {
1602         &vmstate_e1000_mit_state,
1603         &vmstate_e1000_full_mac_state,
1604         &vmstate_e1000_tx_tso_state,
1605         NULL
1606     }
1607 };
1608 
1609 /*
1610  * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1611  * Note: A valid DevId will be inserted during pci_e1000_init().
1612  */
1613 static const uint16_t e1000_eeprom_template[64] = {
1614     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1615     0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1616     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1617     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1618     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1619     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1620     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1621     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1622 };
1623 
1624 /* PCI interface */
1625 
1626 static void
1627 e1000_mmio_setup(E1000State *d)
1628 {
1629     int i;
1630     const uint32_t excluded_regs[] = {
1631         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1632         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1633     };
1634 
1635     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1636                           "e1000-mmio", PNPMMIO_SIZE);
1637     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1638     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1639         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1640                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1641     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1642 }
1643 
1644 static void
1645 pci_e1000_uninit(PCIDevice *dev)
1646 {
1647     E1000State *d = E1000(dev);
1648 
1649     timer_del(d->autoneg_timer);
1650     timer_free(d->autoneg_timer);
1651     timer_del(d->mit_timer);
1652     timer_free(d->mit_timer);
1653     timer_del(d->flush_queue_timer);
1654     timer_free(d->flush_queue_timer);
1655     qemu_del_nic(d->nic);
1656 }
1657 
1658 static NetClientInfo net_e1000_info = {
1659     .type = NET_CLIENT_DRIVER_NIC,
1660     .size = sizeof(NICState),
1661     .can_receive = e1000_can_receive,
1662     .receive = e1000_receive,
1663     .receive_iov = e1000_receive_iov,
1664     .link_status_changed = e1000_set_link_status,
1665 };
1666 
1667 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1668                                 uint32_t val, int len)
1669 {
1670     E1000State *s = E1000(pci_dev);
1671 
1672     pci_default_write_config(pci_dev, address, val, len);
1673 
1674     if (range_covers_byte(address, len, PCI_COMMAND) &&
1675         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1676         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1677     }
1678 }
1679 
1680 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1681 {
1682     DeviceState *dev = DEVICE(pci_dev);
1683     E1000State *d = E1000(pci_dev);
1684     uint8_t *pci_conf;
1685     uint8_t *macaddr;
1686 
1687     pci_dev->config_write = e1000_write_config;
1688 
1689     pci_conf = pci_dev->config;
1690 
1691     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1692     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1693 
1694     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1695 
1696     e1000_mmio_setup(d);
1697 
1698     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1699 
1700     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1701 
1702     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1703     macaddr = d->conf.macaddr.a;
1704 
1705     e1000x_core_prepare_eeprom(d->eeprom_data,
1706                                e1000_eeprom_template,
1707                                sizeof(e1000_eeprom_template),
1708                                PCI_DEVICE_GET_CLASS(pci_dev)->device_id,
1709                                macaddr);
1710 
1711     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1712                           object_get_typename(OBJECT(d)), dev->id, d);
1713 
1714     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1715 
1716     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1717     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1718     d->flush_queue_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
1719                                         e1000_flush_queue_timer, d);
1720 }
1721 
1722 static void qdev_e1000_reset(DeviceState *dev)
1723 {
1724     E1000State *d = E1000(dev);
1725     e1000_reset(d);
1726 }
1727 
1728 static Property e1000_properties[] = {
1729     DEFINE_NIC_PROPERTIES(E1000State, conf),
1730     DEFINE_PROP_BIT("autonegotiation", E1000State,
1731                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1732     DEFINE_PROP_BIT("mitigation", E1000State,
1733                     compat_flags, E1000_FLAG_MIT_BIT, true),
1734     DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1735                     compat_flags, E1000_FLAG_MAC_BIT, true),
1736     DEFINE_PROP_BIT("migrate_tso_props", E1000State,
1737                     compat_flags, E1000_FLAG_TSO_BIT, true),
1738     DEFINE_PROP_END_OF_LIST(),
1739 };
1740 
1741 typedef struct E1000Info {
1742     const char *name;
1743     uint16_t   device_id;
1744     uint8_t    revision;
1745     uint16_t   phy_id2;
1746 } E1000Info;
1747 
1748 static void e1000_class_init(ObjectClass *klass, void *data)
1749 {
1750     DeviceClass *dc = DEVICE_CLASS(klass);
1751     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1752     E1000BaseClass *e = E1000_DEVICE_CLASS(klass);
1753     const E1000Info *info = data;
1754 
1755     k->realize = pci_e1000_realize;
1756     k->exit = pci_e1000_uninit;
1757     k->romfile = "efi-e1000.rom";
1758     k->vendor_id = PCI_VENDOR_ID_INTEL;
1759     k->device_id = info->device_id;
1760     k->revision = info->revision;
1761     e->phy_id2 = info->phy_id2;
1762     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1763     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1764     dc->desc = "Intel Gigabit Ethernet";
1765     dc->reset = qdev_e1000_reset;
1766     dc->vmsd = &vmstate_e1000;
1767     dc->props = e1000_properties;
1768 }
1769 
1770 static void e1000_instance_init(Object *obj)
1771 {
1772     E1000State *n = E1000(obj);
1773     device_add_bootindex_property(obj, &n->conf.bootindex,
1774                                   "bootindex", "/ethernet-phy@0",
1775                                   DEVICE(n), NULL);
1776 }
1777 
1778 static const TypeInfo e1000_base_info = {
1779     .name          = TYPE_E1000_BASE,
1780     .parent        = TYPE_PCI_DEVICE,
1781     .instance_size = sizeof(E1000State),
1782     .instance_init = e1000_instance_init,
1783     .class_size    = sizeof(E1000BaseClass),
1784     .abstract      = true,
1785     .interfaces = (InterfaceInfo[]) {
1786         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1787         { },
1788     },
1789 };
1790 
1791 static const E1000Info e1000_devices[] = {
1792     {
1793         .name      = "e1000",
1794         .device_id = E1000_DEV_ID_82540EM,
1795         .revision  = 0x03,
1796         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1797     },
1798     {
1799         .name      = "e1000-82544gc",
1800         .device_id = E1000_DEV_ID_82544GC_COPPER,
1801         .revision  = 0x03,
1802         .phy_id2   = E1000_PHY_ID2_82544x,
1803     },
1804     {
1805         .name      = "e1000-82545em",
1806         .device_id = E1000_DEV_ID_82545EM_COPPER,
1807         .revision  = 0x03,
1808         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1809     },
1810 };
1811 
1812 static void e1000_register_types(void)
1813 {
1814     int i;
1815 
1816     type_register_static(&e1000_base_info);
1817     for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1818         const E1000Info *info = &e1000_devices[i];
1819         TypeInfo type_info = {};
1820 
1821         type_info.name = info->name;
1822         type_info.parent = TYPE_E1000_BASE;
1823         type_info.class_data = (void *)info;
1824         type_info.class_init = e1000_class_init;
1825         type_info.instance_init = e1000_instance_init;
1826 
1827         type_register(&type_info);
1828     }
1829 }
1830 
1831 type_init(e1000_register_types)
1832