xref: /openbmc/qemu/hw/net/e1000.c (revision 24496b8d)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "qemu/osdep.h"
29 #include "hw/hw.h"
30 #include "hw/pci/pci.h"
31 #include "net/net.h"
32 #include "net/checksum.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/dma.h"
35 #include "qemu/iov.h"
36 #include "qemu/range.h"
37 
38 #include "e1000x_common.h"
39 #include "trace.h"
40 
41 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
42 
43 /* #define E1000_DEBUG */
44 
45 #ifdef E1000_DEBUG
46 enum {
47     DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
48     DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
49     DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
50     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
51 };
52 #define DBGBIT(x)    (1<<DEBUG_##x)
53 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
54 
55 #define DBGOUT(what, fmt, ...) do { \
56     if (debugflags & DBGBIT(what)) \
57         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
58     } while (0)
59 #else
60 #define DBGOUT(what, fmt, ...) do {} while (0)
61 #endif
62 
63 #define IOPORT_SIZE       0x40
64 #define PNPMMIO_SIZE      0x20000
65 #define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
66 
67 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
68 
69 /*
70  * HW models:
71  *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
72  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
73  *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
74  *  Others never tested
75  */
76 
77 typedef struct E1000State_st {
78     /*< private >*/
79     PCIDevice parent_obj;
80     /*< public >*/
81 
82     NICState *nic;
83     NICConf conf;
84     MemoryRegion mmio;
85     MemoryRegion io;
86 
87     uint32_t mac_reg[0x8000];
88     uint16_t phy_reg[0x20];
89     uint16_t eeprom_data[64];
90 
91     uint32_t rxbuf_size;
92     uint32_t rxbuf_min_shift;
93     struct e1000_tx {
94         unsigned char header[256];
95         unsigned char vlan_header[4];
96         /* Fields vlan and data must not be reordered or separated. */
97         unsigned char vlan[4];
98         unsigned char data[0x10000];
99         uint16_t size;
100         unsigned char vlan_needed;
101         unsigned char sum_needed;
102         bool cptse;
103         e1000x_txd_props props;
104         e1000x_txd_props tso_props;
105         uint16_t tso_frames;
106     } tx;
107 
108     struct {
109         uint32_t val_in;    /* shifted in from guest driver */
110         uint16_t bitnum_in;
111         uint16_t bitnum_out;
112         uint16_t reading;
113         uint32_t old_eecd;
114     } eecd_state;
115 
116     QEMUTimer *autoneg_timer;
117 
118     QEMUTimer *mit_timer;      /* Mitigation timer. */
119     bool mit_timer_on;         /* Mitigation timer is running. */
120     bool mit_irq_level;        /* Tracks interrupt pin level. */
121     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
122 
123     QEMUTimer *flush_queue_timer;
124 
125 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
126 #define E1000_FLAG_AUTONEG_BIT 0
127 #define E1000_FLAG_MIT_BIT 1
128 #define E1000_FLAG_MAC_BIT 2
129 #define E1000_FLAG_TSO_BIT 3
130 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
131 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
132 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
133 #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
134     uint32_t compat_flags;
135     bool received_tx_tso;
136     bool use_tso_for_migration;
137     e1000x_txd_props mig_props;
138 } E1000State;
139 
140 #define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
141 
142 typedef struct E1000BaseClass {
143     PCIDeviceClass parent_class;
144     uint16_t phy_id2;
145 } E1000BaseClass;
146 
147 #define TYPE_E1000_BASE "e1000-base"
148 
149 #define E1000(obj) \
150     OBJECT_CHECK(E1000State, (obj), TYPE_E1000_BASE)
151 
152 #define E1000_DEVICE_CLASS(klass) \
153      OBJECT_CLASS_CHECK(E1000BaseClass, (klass), TYPE_E1000_BASE)
154 #define E1000_DEVICE_GET_CLASS(obj) \
155     OBJECT_GET_CLASS(E1000BaseClass, (obj), TYPE_E1000_BASE)
156 
157 static void
158 e1000_link_up(E1000State *s)
159 {
160     e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg);
161 
162     /* E1000_STATUS_LU is tested by e1000_can_receive() */
163     qemu_flush_queued_packets(qemu_get_queue(s->nic));
164 }
165 
166 static void
167 e1000_autoneg_done(E1000State *s)
168 {
169     e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg);
170 
171     /* E1000_STATUS_LU is tested by e1000_can_receive() */
172     qemu_flush_queued_packets(qemu_get_queue(s->nic));
173 }
174 
175 static bool
176 have_autoneg(E1000State *s)
177 {
178     return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
179 }
180 
181 static void
182 set_phy_ctrl(E1000State *s, int index, uint16_t val)
183 {
184     /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
185     s->phy_reg[PHY_CTRL] = val & ~(0x3f |
186                                    MII_CR_RESET |
187                                    MII_CR_RESTART_AUTO_NEG);
188 
189     /*
190      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
191      * migrate during auto negotiation, after migration the link will be
192      * down.
193      */
194     if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
195         e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
196     }
197 }
198 
199 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
200     [PHY_CTRL] = set_phy_ctrl,
201 };
202 
203 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
204 
205 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
206 static const char phy_regcap[0x20] = {
207     [PHY_STATUS]      = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
208     [PHY_ID1]         = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
209     [PHY_CTRL]        = PHY_RW,    [PHY_1000T_CTRL]             = PHY_RW,
210     [PHY_LP_ABILITY]  = PHY_R,     [PHY_1000T_STATUS]           = PHY_R,
211     [PHY_AUTONEG_ADV] = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
212     [PHY_ID2]         = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
213     [PHY_AUTONEG_EXP] = PHY_R,
214 };
215 
216 /* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
217 static const uint16_t phy_reg_init[] = {
218     [PHY_CTRL]   = MII_CR_SPEED_SELECT_MSB |
219                    MII_CR_FULL_DUPLEX |
220                    MII_CR_AUTO_NEG_EN,
221 
222     [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
223                    MII_SR_LINK_STATUS |   /* link initially up */
224                    MII_SR_AUTONEG_CAPS |
225                    /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
226                    MII_SR_PREAMBLE_SUPPRESS |
227                    MII_SR_EXTENDED_STATUS |
228                    MII_SR_10T_HD_CAPS |
229                    MII_SR_10T_FD_CAPS |
230                    MII_SR_100X_HD_CAPS |
231                    MII_SR_100X_FD_CAPS,
232 
233     [PHY_ID1] = 0x141,
234     /* [PHY_ID2] configured per DevId, from e1000_reset() */
235     [PHY_AUTONEG_ADV] = 0xde1,
236     [PHY_LP_ABILITY] = 0x1e0,
237     [PHY_1000T_CTRL] = 0x0e00,
238     [PHY_1000T_STATUS] = 0x3c00,
239     [M88E1000_PHY_SPEC_CTRL] = 0x360,
240     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
241     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
242 };
243 
244 static const uint32_t mac_reg_init[] = {
245     [PBA]     = 0x00100030,
246     [LEDCTL]  = 0x602,
247     [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
248                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
249     [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
250                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
251                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
252                 E1000_STATUS_LU,
253     [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
254                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
255                 E1000_MANC_RMCP_EN,
256 };
257 
258 /* Helper function, *curr == 0 means the value is not set */
259 static inline void
260 mit_update_delay(uint32_t *curr, uint32_t value)
261 {
262     if (value && (*curr == 0 || value < *curr)) {
263         *curr = value;
264     }
265 }
266 
267 static void
268 set_interrupt_cause(E1000State *s, int index, uint32_t val)
269 {
270     PCIDevice *d = PCI_DEVICE(s);
271     uint32_t pending_ints;
272     uint32_t mit_delay;
273 
274     s->mac_reg[ICR] = val;
275 
276     /*
277      * Make sure ICR and ICS registers have the same value.
278      * The spec says that the ICS register is write-only.  However in practice,
279      * on real hardware ICS is readable, and for reads it has the same value as
280      * ICR (except that ICS does not have the clear on read behaviour of ICR).
281      *
282      * The VxWorks PRO/1000 driver uses this behaviour.
283      */
284     s->mac_reg[ICS] = val;
285 
286     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
287     if (!s->mit_irq_level && pending_ints) {
288         /*
289          * Here we detect a potential raising edge. We postpone raising the
290          * interrupt line if we are inside the mitigation delay window
291          * (s->mit_timer_on == 1).
292          * We provide a partial implementation of interrupt mitigation,
293          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
294          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
295          * RADV; relative timers based on TIDV and RDTR are not implemented.
296          */
297         if (s->mit_timer_on) {
298             return;
299         }
300         if (chkflag(MIT)) {
301             /* Compute the next mitigation delay according to pending
302              * interrupts and the current values of RADV (provided
303              * RDTR!=0), TADV and ITR.
304              * Then rearm the timer.
305              */
306             mit_delay = 0;
307             if (s->mit_ide &&
308                     (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
309                 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
310             }
311             if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
312                 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
313             }
314             mit_update_delay(&mit_delay, s->mac_reg[ITR]);
315 
316             /*
317              * According to e1000 SPEC, the Ethernet controller guarantees
318              * a maximum observable interrupt rate of 7813 interrupts/sec.
319              * Thus if mit_delay < 500 then the delay should be set to the
320              * minimum delay possible which is 500.
321              */
322             mit_delay = (mit_delay < 500) ? 500 : mit_delay;
323 
324             s->mit_timer_on = 1;
325             timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
326                       mit_delay * 256);
327             s->mit_ide = 0;
328         }
329     }
330 
331     s->mit_irq_level = (pending_ints != 0);
332     pci_set_irq(d, s->mit_irq_level);
333 }
334 
335 static void
336 e1000_mit_timer(void *opaque)
337 {
338     E1000State *s = opaque;
339 
340     s->mit_timer_on = 0;
341     /* Call set_interrupt_cause to update the irq level (if necessary). */
342     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
343 }
344 
345 static void
346 set_ics(E1000State *s, int index, uint32_t val)
347 {
348     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
349         s->mac_reg[IMS]);
350     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
351 }
352 
353 static void
354 e1000_autoneg_timer(void *opaque)
355 {
356     E1000State *s = opaque;
357     if (!qemu_get_queue(s->nic)->link_down) {
358         e1000_autoneg_done(s);
359         set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
360     }
361 }
362 
363 static void e1000_reset(void *opaque)
364 {
365     E1000State *d = opaque;
366     E1000BaseClass *edc = E1000_DEVICE_GET_CLASS(d);
367     uint8_t *macaddr = d->conf.macaddr.a;
368 
369     timer_del(d->autoneg_timer);
370     timer_del(d->mit_timer);
371     timer_del(d->flush_queue_timer);
372     d->mit_timer_on = 0;
373     d->mit_irq_level = 0;
374     d->mit_ide = 0;
375     memset(d->phy_reg, 0, sizeof d->phy_reg);
376     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
377     d->phy_reg[PHY_ID2] = edc->phy_id2;
378     memset(d->mac_reg, 0, sizeof d->mac_reg);
379     memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
380     d->rxbuf_min_shift = 1;
381     memset(&d->tx, 0, sizeof d->tx);
382 
383     if (qemu_get_queue(d->nic)->link_down) {
384         e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg);
385     }
386 
387     e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
388 }
389 
390 static void
391 set_ctrl(E1000State *s, int index, uint32_t val)
392 {
393     /* RST is self clearing */
394     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
395 }
396 
397 static void
398 e1000_flush_queue_timer(void *opaque)
399 {
400     E1000State *s = opaque;
401 
402     qemu_flush_queued_packets(qemu_get_queue(s->nic));
403 }
404 
405 static void
406 set_rx_control(E1000State *s, int index, uint32_t val)
407 {
408     s->mac_reg[RCTL] = val;
409     s->rxbuf_size = e1000x_rxbufsize(val);
410     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
411     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
412            s->mac_reg[RCTL]);
413     timer_mod(s->flush_queue_timer,
414               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
415 }
416 
417 static void
418 set_mdic(E1000State *s, int index, uint32_t val)
419 {
420     uint32_t data = val & E1000_MDIC_DATA_MASK;
421     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
422 
423     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
424         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
425     else if (val & E1000_MDIC_OP_READ) {
426         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
427         if (!(phy_regcap[addr] & PHY_R)) {
428             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
429             val |= E1000_MDIC_ERROR;
430         } else
431             val = (val ^ data) | s->phy_reg[addr];
432     } else if (val & E1000_MDIC_OP_WRITE) {
433         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
434         if (!(phy_regcap[addr] & PHY_W)) {
435             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
436             val |= E1000_MDIC_ERROR;
437         } else {
438             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
439                 phyreg_writeops[addr](s, index, data);
440             } else {
441                 s->phy_reg[addr] = data;
442             }
443         }
444     }
445     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
446 
447     if (val & E1000_MDIC_INT_EN) {
448         set_ics(s, 0, E1000_ICR_MDAC);
449     }
450 }
451 
452 static uint32_t
453 get_eecd(E1000State *s, int index)
454 {
455     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
456 
457     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
458            s->eecd_state.bitnum_out, s->eecd_state.reading);
459     if (!s->eecd_state.reading ||
460         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
461           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
462         ret |= E1000_EECD_DO;
463     return ret;
464 }
465 
466 static void
467 set_eecd(E1000State *s, int index, uint32_t val)
468 {
469     uint32_t oldval = s->eecd_state.old_eecd;
470 
471     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
472             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
473     if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
474         return;
475     }
476     if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
477         s->eecd_state.val_in = 0;
478         s->eecd_state.bitnum_in = 0;
479         s->eecd_state.bitnum_out = 0;
480         s->eecd_state.reading = 0;
481     }
482     if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
483         return;
484     }
485     if (!(E1000_EECD_SK & val)) {               /* falling edge */
486         s->eecd_state.bitnum_out++;
487         return;
488     }
489     s->eecd_state.val_in <<= 1;
490     if (val & E1000_EECD_DI)
491         s->eecd_state.val_in |= 1;
492     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
493         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
494         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
495             EEPROM_READ_OPCODE_MICROWIRE);
496     }
497     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
498            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
499            s->eecd_state.reading);
500 }
501 
502 static uint32_t
503 flash_eerd_read(E1000State *s, int x)
504 {
505     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
506 
507     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
508         return (s->mac_reg[EERD]);
509 
510     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
511         return (E1000_EEPROM_RW_REG_DONE | r);
512 
513     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
514            E1000_EEPROM_RW_REG_DONE | r);
515 }
516 
517 static void
518 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
519 {
520     uint32_t sum;
521 
522     if (cse && cse < n)
523         n = cse + 1;
524     if (sloc < n-1) {
525         sum = net_checksum_add(n-css, data+css);
526         stw_be_p(data + sloc, net_checksum_finish_nozero(sum));
527     }
528 }
529 
530 static inline void
531 inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
532 {
533     if (!memcmp(arr, bcast, sizeof bcast)) {
534         e1000x_inc_reg_if_not_full(s->mac_reg, BPTC);
535     } else if (arr[0] & 1) {
536         e1000x_inc_reg_if_not_full(s->mac_reg, MPTC);
537     }
538 }
539 
540 static void
541 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
542 {
543     static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
544                                     PTC1023, PTC1522 };
545 
546     NetClientState *nc = qemu_get_queue(s->nic);
547     if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
548         nc->info->receive(nc, buf, size);
549     } else {
550         qemu_send_packet(nc, buf, size);
551     }
552     inc_tx_bcast_or_mcast_count(s, buf);
553     e1000x_increase_size_stats(s->mac_reg, PTCregs, size);
554 }
555 
556 static void
557 xmit_seg(E1000State *s)
558 {
559     uint16_t len;
560     unsigned int frames = s->tx.tso_frames, css, sofar;
561     struct e1000_tx *tp = &s->tx;
562     struct e1000x_txd_props *props = tp->cptse ? &tp->tso_props : &tp->props;
563 
564     if (tp->cptse) {
565         css = props->ipcss;
566         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
567                frames, tp->size, css);
568         if (props->ip) {    /* IPv4 */
569             stw_be_p(tp->data+css+2, tp->size - css);
570             stw_be_p(tp->data+css+4,
571                      lduw_be_p(tp->data + css + 4) + frames);
572         } else {         /* IPv6 */
573             stw_be_p(tp->data+css+4, tp->size - css);
574         }
575         css = props->tucss;
576         len = tp->size - css;
577         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", props->tcp, css, len);
578         if (props->tcp) {
579             sofar = frames * props->mss;
580             stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
581             if (props->paylen - sofar > props->mss) {
582                 tp->data[css + 13] &= ~9;    /* PSH, FIN */
583             } else if (frames) {
584                 e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
585             }
586         } else {    /* UDP */
587             stw_be_p(tp->data+css+4, len);
588         }
589         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
590             unsigned int phsum;
591             // add pseudo-header length before checksum calculation
592             void *sp = tp->data + props->tucso;
593 
594             phsum = lduw_be_p(sp) + len;
595             phsum = (phsum >> 16) + (phsum & 0xffff);
596             stw_be_p(sp, phsum);
597         }
598         tp->tso_frames++;
599     }
600 
601     if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
602         putsum(tp->data, tp->size, props->tucso, props->tucss, props->tucse);
603     }
604     if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
605         putsum(tp->data, tp->size, props->ipcso, props->ipcss, props->ipcse);
606     }
607     if (tp->vlan_needed) {
608         memmove(tp->vlan, tp->data, 4);
609         memmove(tp->data, tp->data + 4, 8);
610         memcpy(tp->data + 8, tp->vlan_header, 4);
611         e1000_send_packet(s, tp->vlan, tp->size + 4);
612     } else {
613         e1000_send_packet(s, tp->data, tp->size);
614     }
615 
616     e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
617     e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size);
618     s->mac_reg[GPTC] = s->mac_reg[TPT];
619     s->mac_reg[GOTCL] = s->mac_reg[TOTL];
620     s->mac_reg[GOTCH] = s->mac_reg[TOTH];
621 }
622 
623 static void
624 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
625 {
626     PCIDevice *d = PCI_DEVICE(s);
627     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
628     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
629     unsigned int split_size = txd_lower & 0xffff, bytes, sz;
630     unsigned int msh = 0xfffff;
631     uint64_t addr;
632     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
633     struct e1000_tx *tp = &s->tx;
634 
635     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
636     if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
637         if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
638             e1000x_read_tx_ctx_descr(xp, &tp->tso_props);
639             s->use_tso_for_migration = 1;
640             tp->tso_frames = 0;
641         } else {
642             e1000x_read_tx_ctx_descr(xp, &tp->props);
643             s->use_tso_for_migration = 0;
644         }
645         return;
646     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
647         // data descriptor
648         if (tp->size == 0) {
649             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
650         }
651         tp->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
652     } else {
653         // legacy descriptor
654         tp->cptse = 0;
655     }
656 
657     if (e1000x_vlan_enabled(s->mac_reg) &&
658         e1000x_is_vlan_txd(txd_lower) &&
659         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
660         tp->vlan_needed = 1;
661         stw_be_p(tp->vlan_header,
662                       le16_to_cpu(s->mac_reg[VET]));
663         stw_be_p(tp->vlan_header + 2,
664                       le16_to_cpu(dp->upper.fields.special));
665     }
666 
667     addr = le64_to_cpu(dp->buffer_addr);
668     if (tp->cptse) {
669         msh = tp->tso_props.hdr_len + tp->tso_props.mss;
670         do {
671             bytes = split_size;
672             if (tp->size + bytes > msh)
673                 bytes = msh - tp->size;
674 
675             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
676             pci_dma_read(d, addr, tp->data + tp->size, bytes);
677             sz = tp->size + bytes;
678             if (sz >= tp->tso_props.hdr_len
679                 && tp->size < tp->tso_props.hdr_len) {
680                 memmove(tp->header, tp->data, tp->tso_props.hdr_len);
681             }
682             tp->size = sz;
683             addr += bytes;
684             if (sz == msh) {
685                 xmit_seg(s);
686                 memmove(tp->data, tp->header, tp->tso_props.hdr_len);
687                 tp->size = tp->tso_props.hdr_len;
688             }
689             split_size -= bytes;
690         } while (bytes && split_size);
691     } else {
692         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
693         pci_dma_read(d, addr, tp->data + tp->size, split_size);
694         tp->size += split_size;
695     }
696 
697     if (!(txd_lower & E1000_TXD_CMD_EOP))
698         return;
699     if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) {
700         xmit_seg(s);
701     }
702     tp->tso_frames = 0;
703     tp->sum_needed = 0;
704     tp->vlan_needed = 0;
705     tp->size = 0;
706     tp->cptse = 0;
707 }
708 
709 static uint32_t
710 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
711 {
712     PCIDevice *d = PCI_DEVICE(s);
713     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
714 
715     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
716         return 0;
717     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
718                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
719     dp->upper.data = cpu_to_le32(txd_upper);
720     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
721                   &dp->upper, sizeof(dp->upper));
722     return E1000_ICR_TXDW;
723 }
724 
725 static uint64_t tx_desc_base(E1000State *s)
726 {
727     uint64_t bah = s->mac_reg[TDBAH];
728     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
729 
730     return (bah << 32) + bal;
731 }
732 
733 static void
734 start_xmit(E1000State *s)
735 {
736     PCIDevice *d = PCI_DEVICE(s);
737     dma_addr_t base;
738     struct e1000_tx_desc desc;
739     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
740 
741     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
742         DBGOUT(TX, "tx disabled\n");
743         return;
744     }
745 
746     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
747         base = tx_desc_base(s) +
748                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
749         pci_dma_read(d, base, &desc, sizeof(desc));
750 
751         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
752                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
753                desc.upper.data);
754 
755         process_tx_desc(s, &desc);
756         cause |= txdesc_writeback(s, base, &desc);
757 
758         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
759             s->mac_reg[TDH] = 0;
760         /*
761          * the following could happen only if guest sw assigns
762          * bogus values to TDT/TDLEN.
763          * there's nothing too intelligent we could do about this.
764          */
765         if (s->mac_reg[TDH] == tdh_start ||
766             tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
767             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
768                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
769             break;
770         }
771     }
772     set_ics(s, 0, cause);
773 }
774 
775 static int
776 receive_filter(E1000State *s, const uint8_t *buf, int size)
777 {
778     uint32_t rctl = s->mac_reg[RCTL];
779     int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1);
780 
781     if (e1000x_is_vlan_packet(buf, le16_to_cpu(s->mac_reg[VET])) &&
782         e1000x_vlan_rx_filter_enabled(s->mac_reg)) {
783         uint16_t vid = lduw_be_p(buf + 14);
784         uint32_t vfta = ldl_le_p((uint32_t*)(s->mac_reg + VFTA) +
785                                  ((vid >> 5) & 0x7f));
786         if ((vfta & (1 << (vid & 0x1f))) == 0)
787             return 0;
788     }
789 
790     if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
791         return 1;
792     }
793 
794     if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
795         e1000x_inc_reg_if_not_full(s->mac_reg, MPRC);
796         return 1;
797     }
798 
799     if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
800         e1000x_inc_reg_if_not_full(s->mac_reg, BPRC);
801         return 1;
802     }
803 
804     return e1000x_rx_group_filter(s->mac_reg, buf);
805 }
806 
807 static void
808 e1000_set_link_status(NetClientState *nc)
809 {
810     E1000State *s = qemu_get_nic_opaque(nc);
811     uint32_t old_status = s->mac_reg[STATUS];
812 
813     if (nc->link_down) {
814         e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg);
815     } else {
816         if (have_autoneg(s) &&
817             !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
818             e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
819         } else {
820             e1000_link_up(s);
821         }
822     }
823 
824     if (s->mac_reg[STATUS] != old_status)
825         set_ics(s, 0, E1000_ICR_LSC);
826 }
827 
828 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
829 {
830     int bufs;
831     /* Fast-path short packets */
832     if (total_size <= s->rxbuf_size) {
833         return s->mac_reg[RDH] != s->mac_reg[RDT];
834     }
835     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
836         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
837     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
838         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
839             s->mac_reg[RDT] - s->mac_reg[RDH];
840     } else {
841         return false;
842     }
843     return total_size <= bufs * s->rxbuf_size;
844 }
845 
846 static int
847 e1000_can_receive(NetClientState *nc)
848 {
849     E1000State *s = qemu_get_nic_opaque(nc);
850 
851     return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
852         e1000_has_rxbufs(s, 1) && !timer_pending(s->flush_queue_timer);
853 }
854 
855 static uint64_t rx_desc_base(E1000State *s)
856 {
857     uint64_t bah = s->mac_reg[RDBAH];
858     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
859 
860     return (bah << 32) + bal;
861 }
862 
863 static void
864 e1000_receiver_overrun(E1000State *s, size_t size)
865 {
866     trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
867     e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
868     e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
869     set_ics(s, 0, E1000_ICS_RXO);
870 }
871 
872 static ssize_t
873 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
874 {
875     E1000State *s = qemu_get_nic_opaque(nc);
876     PCIDevice *d = PCI_DEVICE(s);
877     struct e1000_rx_desc desc;
878     dma_addr_t base;
879     unsigned int n, rdt;
880     uint32_t rdh_start;
881     uint16_t vlan_special = 0;
882     uint8_t vlan_status = 0;
883     uint8_t min_buf[MIN_BUF_SIZE];
884     struct iovec min_iov;
885     uint8_t *filter_buf = iov->iov_base;
886     size_t size = iov_size(iov, iovcnt);
887     size_t iov_ofs = 0;
888     size_t desc_offset;
889     size_t desc_size;
890     size_t total_size;
891 
892     if (!e1000x_hw_rx_enabled(s->mac_reg)) {
893         return -1;
894     }
895 
896     if (timer_pending(s->flush_queue_timer)) {
897         return 0;
898     }
899 
900     /* Pad to minimum Ethernet frame length */
901     if (size < sizeof(min_buf)) {
902         iov_to_buf(iov, iovcnt, 0, min_buf, size);
903         memset(&min_buf[size], 0, sizeof(min_buf) - size);
904         e1000x_inc_reg_if_not_full(s->mac_reg, RUC);
905         min_iov.iov_base = filter_buf = min_buf;
906         min_iov.iov_len = size = sizeof(min_buf);
907         iovcnt = 1;
908         iov = &min_iov;
909     } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
910         /* This is very unlikely, but may happen. */
911         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
912         filter_buf = min_buf;
913     }
914 
915     /* Discard oversized packets if !LPE and !SBP. */
916     if (e1000x_is_oversized(s->mac_reg, size)) {
917         return size;
918     }
919 
920     if (!receive_filter(s, filter_buf, size)) {
921         return size;
922     }
923 
924     if (e1000x_vlan_enabled(s->mac_reg) &&
925         e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) {
926         vlan_special = cpu_to_le16(lduw_be_p(filter_buf + 14));
927         iov_ofs = 4;
928         if (filter_buf == iov->iov_base) {
929             memmove(filter_buf + 4, filter_buf, 12);
930         } else {
931             iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
932             while (iov->iov_len <= iov_ofs) {
933                 iov_ofs -= iov->iov_len;
934                 iov++;
935             }
936         }
937         vlan_status = E1000_RXD_STAT_VP;
938         size -= 4;
939     }
940 
941     rdh_start = s->mac_reg[RDH];
942     desc_offset = 0;
943     total_size = size + e1000x_fcs_len(s->mac_reg);
944     if (!e1000_has_rxbufs(s, total_size)) {
945         e1000_receiver_overrun(s, total_size);
946         return -1;
947     }
948     do {
949         desc_size = total_size - desc_offset;
950         if (desc_size > s->rxbuf_size) {
951             desc_size = s->rxbuf_size;
952         }
953         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
954         pci_dma_read(d, base, &desc, sizeof(desc));
955         desc.special = vlan_special;
956         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
957         if (desc.buffer_addr) {
958             if (desc_offset < size) {
959                 size_t iov_copy;
960                 hwaddr ba = le64_to_cpu(desc.buffer_addr);
961                 size_t copy_size = size - desc_offset;
962                 if (copy_size > s->rxbuf_size) {
963                     copy_size = s->rxbuf_size;
964                 }
965                 do {
966                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
967                     pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
968                     copy_size -= iov_copy;
969                     ba += iov_copy;
970                     iov_ofs += iov_copy;
971                     if (iov_ofs == iov->iov_len) {
972                         iov++;
973                         iov_ofs = 0;
974                     }
975                 } while (copy_size);
976             }
977             desc_offset += desc_size;
978             desc.length = cpu_to_le16(desc_size);
979             if (desc_offset >= total_size) {
980                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
981             } else {
982                 /* Guest zeroing out status is not a hardware requirement.
983                    Clear EOP in case guest didn't do it. */
984                 desc.status &= ~E1000_RXD_STAT_EOP;
985             }
986         } else { // as per intel docs; skip descriptors with null buf addr
987             DBGOUT(RX, "Null RX descriptor!!\n");
988         }
989         pci_dma_write(d, base, &desc, sizeof(desc));
990 
991         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
992             s->mac_reg[RDH] = 0;
993         /* see comment in start_xmit; same here */
994         if (s->mac_reg[RDH] == rdh_start ||
995             rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
996             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
997                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
998             e1000_receiver_overrun(s, total_size);
999             return -1;
1000         }
1001     } while (desc_offset < total_size);
1002 
1003     e1000x_update_rx_total_stats(s->mac_reg, size, total_size);
1004 
1005     n = E1000_ICS_RXT0;
1006     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1007         rdt += s->mac_reg[RDLEN] / sizeof(desc);
1008     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1009         s->rxbuf_min_shift)
1010         n |= E1000_ICS_RXDMT0;
1011 
1012     set_ics(s, 0, n);
1013 
1014     return size;
1015 }
1016 
1017 static ssize_t
1018 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1019 {
1020     const struct iovec iov = {
1021         .iov_base = (uint8_t *)buf,
1022         .iov_len = size
1023     };
1024 
1025     return e1000_receive_iov(nc, &iov, 1);
1026 }
1027 
1028 static uint32_t
1029 mac_readreg(E1000State *s, int index)
1030 {
1031     return s->mac_reg[index];
1032 }
1033 
1034 static uint32_t
1035 mac_low4_read(E1000State *s, int index)
1036 {
1037     return s->mac_reg[index] & 0xf;
1038 }
1039 
1040 static uint32_t
1041 mac_low11_read(E1000State *s, int index)
1042 {
1043     return s->mac_reg[index] & 0x7ff;
1044 }
1045 
1046 static uint32_t
1047 mac_low13_read(E1000State *s, int index)
1048 {
1049     return s->mac_reg[index] & 0x1fff;
1050 }
1051 
1052 static uint32_t
1053 mac_low16_read(E1000State *s, int index)
1054 {
1055     return s->mac_reg[index] & 0xffff;
1056 }
1057 
1058 static uint32_t
1059 mac_icr_read(E1000State *s, int index)
1060 {
1061     uint32_t ret = s->mac_reg[ICR];
1062 
1063     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1064     set_interrupt_cause(s, 0, 0);
1065     return ret;
1066 }
1067 
1068 static uint32_t
1069 mac_read_clr4(E1000State *s, int index)
1070 {
1071     uint32_t ret = s->mac_reg[index];
1072 
1073     s->mac_reg[index] = 0;
1074     return ret;
1075 }
1076 
1077 static uint32_t
1078 mac_read_clr8(E1000State *s, int index)
1079 {
1080     uint32_t ret = s->mac_reg[index];
1081 
1082     s->mac_reg[index] = 0;
1083     s->mac_reg[index-1] = 0;
1084     return ret;
1085 }
1086 
1087 static void
1088 mac_writereg(E1000State *s, int index, uint32_t val)
1089 {
1090     uint32_t macaddr[2];
1091 
1092     s->mac_reg[index] = val;
1093 
1094     if (index == RA + 1) {
1095         macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1096         macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1097         qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1098     }
1099 }
1100 
1101 static void
1102 set_rdt(E1000State *s, int index, uint32_t val)
1103 {
1104     s->mac_reg[index] = val & 0xffff;
1105     if (e1000_has_rxbufs(s, 1)) {
1106         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1107     }
1108 }
1109 
1110 static void
1111 set_16bit(E1000State *s, int index, uint32_t val)
1112 {
1113     s->mac_reg[index] = val & 0xffff;
1114 }
1115 
1116 static void
1117 set_dlen(E1000State *s, int index, uint32_t val)
1118 {
1119     s->mac_reg[index] = val & 0xfff80;
1120 }
1121 
1122 static void
1123 set_tctl(E1000State *s, int index, uint32_t val)
1124 {
1125     s->mac_reg[index] = val;
1126     s->mac_reg[TDT] &= 0xffff;
1127     start_xmit(s);
1128 }
1129 
1130 static void
1131 set_icr(E1000State *s, int index, uint32_t val)
1132 {
1133     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1134     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1135 }
1136 
1137 static void
1138 set_imc(E1000State *s, int index, uint32_t val)
1139 {
1140     s->mac_reg[IMS] &= ~val;
1141     set_ics(s, 0, 0);
1142 }
1143 
1144 static void
1145 set_ims(E1000State *s, int index, uint32_t val)
1146 {
1147     s->mac_reg[IMS] |= val;
1148     set_ics(s, 0, 0);
1149 }
1150 
1151 #define getreg(x)    [x] = mac_readreg
1152 static uint32_t (*macreg_readops[])(E1000State *, int) = {
1153     getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1154     getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1155     getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1156     getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1157     getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1158     getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1159     getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1160     getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1161     getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1162     getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1163     getreg(TNCRS),    getreg(SEQEC),    getreg(CEXTERR),  getreg(RLEC),
1164     getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1165     getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1166     getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1167     getreg(GOTCL),
1168 
1169     [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1170     [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1171     [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1172     [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1173     [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1174     [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1175     [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1176     [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1177     [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1178     [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1179     [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1180     [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1181     [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1182     [MPTC]    = mac_read_clr4,
1183     [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1184     [EERD]    = flash_eerd_read,
1185     [RDFH]    = mac_low13_read,     [RDFT]    = mac_low13_read,
1186     [RDFHS]   = mac_low13_read,     [RDFTS]   = mac_low13_read,
1187     [RDFPC]   = mac_low13_read,
1188     [TDFH]    = mac_low11_read,     [TDFT]    = mac_low11_read,
1189     [TDFHS]   = mac_low13_read,     [TDFTS]   = mac_low13_read,
1190     [TDFPC]   = mac_low13_read,
1191     [AIT]     = mac_low16_read,
1192 
1193     [CRCERRS ... MPC]   = &mac_readreg,
1194     [IP6AT ... IP6AT+3] = &mac_readreg,    [IP4AT ... IP4AT+6] = &mac_readreg,
1195     [FFLT ... FFLT+6]   = &mac_low11_read,
1196     [RA ... RA+31]      = &mac_readreg,
1197     [WUPM ... WUPM+31]  = &mac_readreg,
1198     [MTA ... MTA+127]   = &mac_readreg,
1199     [VFTA ... VFTA+127] = &mac_readreg,
1200     [FFMT ... FFMT+254] = &mac_low4_read,
1201     [FFVT ... FFVT+254] = &mac_readreg,
1202     [PBM ... PBM+16383] = &mac_readreg,
1203 };
1204 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1205 
1206 #define putreg(x)    [x] = mac_writereg
1207 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1208     putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1209     putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1210     putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1211     putreg(TDFH),     putreg(TDFT),     putreg(TDFHS),    putreg(TDFTS),
1212     putreg(TDFPC),    putreg(RDFH),     putreg(RDFT),     putreg(RDFHS),
1213     putreg(RDFTS),    putreg(RDFPC),    putreg(IPAV),     putreg(WUC),
1214     putreg(WUS),      putreg(AIT),
1215 
1216     [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL] = set_tctl,
1217     [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]  = set_ics,
1218     [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]  = set_rdt,
1219     [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]  = set_icr,
1220     [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL] = set_ctrl,
1221     [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV] = set_16bit,
1222     [ITR]    = set_16bit,
1223 
1224     [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg,
1225     [FFLT ... FFLT+6]   = &mac_writereg,
1226     [RA ... RA+31]      = &mac_writereg,
1227     [WUPM ... WUPM+31]  = &mac_writereg,
1228     [MTA ... MTA+127]   = &mac_writereg,
1229     [VFTA ... VFTA+127] = &mac_writereg,
1230     [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg,
1231     [PBM ... PBM+16383] = &mac_writereg,
1232 };
1233 
1234 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1235 
1236 enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1237 
1238 #define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1239 /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1240  * f - flag bits (up to 6 possible flags)
1241  * n - flag needed
1242  * p - partially implenented */
1243 static const uint8_t mac_reg_access[0x8000] = {
1244     [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1245     [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1246 
1247     [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1248     [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1249     [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1250     [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1251     [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1252     [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1253     [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1254     [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1255     [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1256     [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1257     [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1258     [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1259     [SEQEC]   = markflag(MAC),    [CEXTERR] = markflag(MAC),
1260     [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1261     [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1262     [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1263     [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1264     [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1265     [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1266     [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1267     [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1268     [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1269     [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1270     [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1271     [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1272     [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1273     [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1274     [BPTC]    = markflag(MAC),
1275 
1276     [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1277     [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1278     [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1279     [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1280     [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1281     [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1282     [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1283     [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1284     [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1285     [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1286     [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1287 };
1288 
1289 static void
1290 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1291                  unsigned size)
1292 {
1293     E1000State *s = opaque;
1294     unsigned int index = (addr & 0x1ffff) >> 2;
1295 
1296     if (index < NWRITEOPS && macreg_writeops[index]) {
1297         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1298             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1299             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1300                 DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1301                        "It is not fully implemented.\n", index<<2);
1302             }
1303             macreg_writeops[index](s, index, val);
1304         } else {    /* "flag needed" bit is set, but the flag is not active */
1305             DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1306                    index<<2);
1307         }
1308     } else if (index < NREADOPS && macreg_readops[index]) {
1309         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1310                index<<2, val);
1311     } else {
1312         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1313                index<<2, val);
1314     }
1315 }
1316 
1317 static uint64_t
1318 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1319 {
1320     E1000State *s = opaque;
1321     unsigned int index = (addr & 0x1ffff) >> 2;
1322 
1323     if (index < NREADOPS && macreg_readops[index]) {
1324         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1325             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1326             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1327                 DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1328                        "It is not fully implemented.\n", index<<2);
1329             }
1330             return macreg_readops[index](s, index);
1331         } else {    /* "flag needed" bit is set, but the flag is not active */
1332             DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1333                    index<<2);
1334         }
1335     } else {
1336         DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1337     }
1338     return 0;
1339 }
1340 
1341 static const MemoryRegionOps e1000_mmio_ops = {
1342     .read = e1000_mmio_read,
1343     .write = e1000_mmio_write,
1344     .endianness = DEVICE_LITTLE_ENDIAN,
1345     .impl = {
1346         .min_access_size = 4,
1347         .max_access_size = 4,
1348     },
1349 };
1350 
1351 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1352                               unsigned size)
1353 {
1354     E1000State *s = opaque;
1355 
1356     (void)s;
1357     return 0;
1358 }
1359 
1360 static void e1000_io_write(void *opaque, hwaddr addr,
1361                            uint64_t val, unsigned size)
1362 {
1363     E1000State *s = opaque;
1364 
1365     (void)s;
1366 }
1367 
1368 static const MemoryRegionOps e1000_io_ops = {
1369     .read = e1000_io_read,
1370     .write = e1000_io_write,
1371     .endianness = DEVICE_LITTLE_ENDIAN,
1372 };
1373 
1374 static bool is_version_1(void *opaque, int version_id)
1375 {
1376     return version_id == 1;
1377 }
1378 
1379 static int e1000_pre_save(void *opaque)
1380 {
1381     E1000State *s = opaque;
1382     NetClientState *nc = qemu_get_queue(s->nic);
1383 
1384     /* If the mitigation timer is active, emulate a timeout now. */
1385     if (s->mit_timer_on) {
1386         e1000_mit_timer(s);
1387     }
1388 
1389     /*
1390      * If link is down and auto-negotiation is supported and ongoing,
1391      * complete auto-negotiation immediately. This allows us to look
1392      * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1393      */
1394     if (nc->link_down && have_autoneg(s)) {
1395         s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1396     }
1397 
1398     /* Decide which set of props to migrate in the main structure */
1399     if (chkflag(TSO) || !s->use_tso_for_migration) {
1400         /* Either we're migrating with the extra subsection, in which
1401          * case the mig_props is always 'props' OR
1402          * we've not got the subsection, but 'props' was the last
1403          * updated.
1404          */
1405         s->mig_props = s->tx.props;
1406     } else {
1407         /* We're not using the subsection, and 'tso_props' was
1408          * the last updated.
1409          */
1410         s->mig_props = s->tx.tso_props;
1411     }
1412     return 0;
1413 }
1414 
1415 static int e1000_post_load(void *opaque, int version_id)
1416 {
1417     E1000State *s = opaque;
1418     NetClientState *nc = qemu_get_queue(s->nic);
1419 
1420     if (!chkflag(MIT)) {
1421         s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1422             s->mac_reg[TADV] = 0;
1423         s->mit_irq_level = false;
1424     }
1425     s->mit_ide = 0;
1426     s->mit_timer_on = false;
1427 
1428     /* nc.link_down can't be migrated, so infer link_down according
1429      * to link status bit in mac_reg[STATUS].
1430      * Alternatively, restart link negotiation if it was in progress. */
1431     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1432 
1433     if (have_autoneg(s) &&
1434         !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1435         nc->link_down = false;
1436         timer_mod(s->autoneg_timer,
1437                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1438     }
1439 
1440     s->tx.props = s->mig_props;
1441     if (!s->received_tx_tso) {
1442         /* We received only one set of offload data (tx.props)
1443          * and haven't got tx.tso_props.  The best we can do
1444          * is dupe the data.
1445          */
1446         s->tx.tso_props = s->mig_props;
1447     }
1448     return 0;
1449 }
1450 
1451 static int e1000_tx_tso_post_load(void *opaque, int version_id)
1452 {
1453     E1000State *s = opaque;
1454     s->received_tx_tso = true;
1455     return 0;
1456 }
1457 
1458 static bool e1000_mit_state_needed(void *opaque)
1459 {
1460     E1000State *s = opaque;
1461 
1462     return chkflag(MIT);
1463 }
1464 
1465 static bool e1000_full_mac_needed(void *opaque)
1466 {
1467     E1000State *s = opaque;
1468 
1469     return chkflag(MAC);
1470 }
1471 
1472 static bool e1000_tso_state_needed(void *opaque)
1473 {
1474     E1000State *s = opaque;
1475 
1476     return chkflag(TSO);
1477 }
1478 
1479 static const VMStateDescription vmstate_e1000_mit_state = {
1480     .name = "e1000/mit_state",
1481     .version_id = 1,
1482     .minimum_version_id = 1,
1483     .needed = e1000_mit_state_needed,
1484     .fields = (VMStateField[]) {
1485         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1486         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1487         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1488         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1489         VMSTATE_BOOL(mit_irq_level, E1000State),
1490         VMSTATE_END_OF_LIST()
1491     }
1492 };
1493 
1494 static const VMStateDescription vmstate_e1000_full_mac_state = {
1495     .name = "e1000/full_mac_state",
1496     .version_id = 1,
1497     .minimum_version_id = 1,
1498     .needed = e1000_full_mac_needed,
1499     .fields = (VMStateField[]) {
1500         VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1501         VMSTATE_END_OF_LIST()
1502     }
1503 };
1504 
1505 static const VMStateDescription vmstate_e1000_tx_tso_state = {
1506     .name = "e1000/tx_tso_state",
1507     .version_id = 1,
1508     .minimum_version_id = 1,
1509     .needed = e1000_tso_state_needed,
1510     .post_load = e1000_tx_tso_post_load,
1511     .fields = (VMStateField[]) {
1512         VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
1513         VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
1514         VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
1515         VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
1516         VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
1517         VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
1518         VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
1519         VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
1520         VMSTATE_UINT16(tx.tso_props.mss, E1000State),
1521         VMSTATE_INT8(tx.tso_props.ip, E1000State),
1522         VMSTATE_INT8(tx.tso_props.tcp, E1000State),
1523         VMSTATE_END_OF_LIST()
1524     }
1525 };
1526 
1527 static const VMStateDescription vmstate_e1000 = {
1528     .name = "e1000",
1529     .version_id = 2,
1530     .minimum_version_id = 1,
1531     .pre_save = e1000_pre_save,
1532     .post_load = e1000_post_load,
1533     .fields = (VMStateField[]) {
1534         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1535         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1536         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1537         VMSTATE_UINT32(rxbuf_size, E1000State),
1538         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1539         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1540         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1541         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1542         VMSTATE_UINT16(eecd_state.reading, E1000State),
1543         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1544         VMSTATE_UINT8(mig_props.ipcss, E1000State),
1545         VMSTATE_UINT8(mig_props.ipcso, E1000State),
1546         VMSTATE_UINT16(mig_props.ipcse, E1000State),
1547         VMSTATE_UINT8(mig_props.tucss, E1000State),
1548         VMSTATE_UINT8(mig_props.tucso, E1000State),
1549         VMSTATE_UINT16(mig_props.tucse, E1000State),
1550         VMSTATE_UINT32(mig_props.paylen, E1000State),
1551         VMSTATE_UINT8(mig_props.hdr_len, E1000State),
1552         VMSTATE_UINT16(mig_props.mss, E1000State),
1553         VMSTATE_UINT16(tx.size, E1000State),
1554         VMSTATE_UINT16(tx.tso_frames, E1000State),
1555         VMSTATE_UINT8(tx.sum_needed, E1000State),
1556         VMSTATE_INT8(mig_props.ip, E1000State),
1557         VMSTATE_INT8(mig_props.tcp, E1000State),
1558         VMSTATE_BUFFER(tx.header, E1000State),
1559         VMSTATE_BUFFER(tx.data, E1000State),
1560         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1561         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1562         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1563         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1564         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1565         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1566         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1567         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1568         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1569         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1570         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1571         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1572         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1573         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1574         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1575         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1576         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1577         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1578         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1579         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1580         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1581         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1582         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1583         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1584         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1585         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1586         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1587         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1588         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1589         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1590         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1591         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1592         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1593         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1594         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1595         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1596         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1597         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1598         VMSTATE_UINT32(mac_reg[VET], E1000State),
1599         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1600         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1601         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1602         VMSTATE_END_OF_LIST()
1603     },
1604     .subsections = (const VMStateDescription*[]) {
1605         &vmstate_e1000_mit_state,
1606         &vmstate_e1000_full_mac_state,
1607         &vmstate_e1000_tx_tso_state,
1608         NULL
1609     }
1610 };
1611 
1612 /*
1613  * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1614  * Note: A valid DevId will be inserted during pci_e1000_init().
1615  */
1616 static const uint16_t e1000_eeprom_template[64] = {
1617     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1618     0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1619     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1620     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1621     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1622     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1623     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1624     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1625 };
1626 
1627 /* PCI interface */
1628 
1629 static void
1630 e1000_mmio_setup(E1000State *d)
1631 {
1632     int i;
1633     const uint32_t excluded_regs[] = {
1634         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1635         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1636     };
1637 
1638     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1639                           "e1000-mmio", PNPMMIO_SIZE);
1640     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1641     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1642         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1643                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1644     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1645 }
1646 
1647 static void
1648 pci_e1000_uninit(PCIDevice *dev)
1649 {
1650     E1000State *d = E1000(dev);
1651 
1652     timer_del(d->autoneg_timer);
1653     timer_free(d->autoneg_timer);
1654     timer_del(d->mit_timer);
1655     timer_free(d->mit_timer);
1656     timer_del(d->flush_queue_timer);
1657     timer_free(d->flush_queue_timer);
1658     qemu_del_nic(d->nic);
1659 }
1660 
1661 static NetClientInfo net_e1000_info = {
1662     .type = NET_CLIENT_DRIVER_NIC,
1663     .size = sizeof(NICState),
1664     .can_receive = e1000_can_receive,
1665     .receive = e1000_receive,
1666     .receive_iov = e1000_receive_iov,
1667     .link_status_changed = e1000_set_link_status,
1668 };
1669 
1670 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1671                                 uint32_t val, int len)
1672 {
1673     E1000State *s = E1000(pci_dev);
1674 
1675     pci_default_write_config(pci_dev, address, val, len);
1676 
1677     if (range_covers_byte(address, len, PCI_COMMAND) &&
1678         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1679         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1680     }
1681 }
1682 
1683 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1684 {
1685     DeviceState *dev = DEVICE(pci_dev);
1686     E1000State *d = E1000(pci_dev);
1687     uint8_t *pci_conf;
1688     uint8_t *macaddr;
1689 
1690     pci_dev->config_write = e1000_write_config;
1691 
1692     pci_conf = pci_dev->config;
1693 
1694     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1695     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1696 
1697     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1698 
1699     e1000_mmio_setup(d);
1700 
1701     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1702 
1703     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1704 
1705     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1706     macaddr = d->conf.macaddr.a;
1707 
1708     e1000x_core_prepare_eeprom(d->eeprom_data,
1709                                e1000_eeprom_template,
1710                                sizeof(e1000_eeprom_template),
1711                                PCI_DEVICE_GET_CLASS(pci_dev)->device_id,
1712                                macaddr);
1713 
1714     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1715                           object_get_typename(OBJECT(d)), dev->id, d);
1716 
1717     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1718 
1719     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1720     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1721     d->flush_queue_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
1722                                         e1000_flush_queue_timer, d);
1723 }
1724 
1725 static void qdev_e1000_reset(DeviceState *dev)
1726 {
1727     E1000State *d = E1000(dev);
1728     e1000_reset(d);
1729 }
1730 
1731 static Property e1000_properties[] = {
1732     DEFINE_NIC_PROPERTIES(E1000State, conf),
1733     DEFINE_PROP_BIT("autonegotiation", E1000State,
1734                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1735     DEFINE_PROP_BIT("mitigation", E1000State,
1736                     compat_flags, E1000_FLAG_MIT_BIT, true),
1737     DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1738                     compat_flags, E1000_FLAG_MAC_BIT, true),
1739     DEFINE_PROP_BIT("migrate_tso_props", E1000State,
1740                     compat_flags, E1000_FLAG_TSO_BIT, true),
1741     DEFINE_PROP_END_OF_LIST(),
1742 };
1743 
1744 typedef struct E1000Info {
1745     const char *name;
1746     uint16_t   device_id;
1747     uint8_t    revision;
1748     uint16_t   phy_id2;
1749 } E1000Info;
1750 
1751 static void e1000_class_init(ObjectClass *klass, void *data)
1752 {
1753     DeviceClass *dc = DEVICE_CLASS(klass);
1754     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1755     E1000BaseClass *e = E1000_DEVICE_CLASS(klass);
1756     const E1000Info *info = data;
1757 
1758     k->realize = pci_e1000_realize;
1759     k->exit = pci_e1000_uninit;
1760     k->romfile = "efi-e1000.rom";
1761     k->vendor_id = PCI_VENDOR_ID_INTEL;
1762     k->device_id = info->device_id;
1763     k->revision = info->revision;
1764     e->phy_id2 = info->phy_id2;
1765     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1766     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1767     dc->desc = "Intel Gigabit Ethernet";
1768     dc->reset = qdev_e1000_reset;
1769     dc->vmsd = &vmstate_e1000;
1770     dc->props = e1000_properties;
1771 }
1772 
1773 static void e1000_instance_init(Object *obj)
1774 {
1775     E1000State *n = E1000(obj);
1776     device_add_bootindex_property(obj, &n->conf.bootindex,
1777                                   "bootindex", "/ethernet-phy@0",
1778                                   DEVICE(n), NULL);
1779 }
1780 
1781 static const TypeInfo e1000_base_info = {
1782     .name          = TYPE_E1000_BASE,
1783     .parent        = TYPE_PCI_DEVICE,
1784     .instance_size = sizeof(E1000State),
1785     .instance_init = e1000_instance_init,
1786     .class_size    = sizeof(E1000BaseClass),
1787     .abstract      = true,
1788     .interfaces = (InterfaceInfo[]) {
1789         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1790         { },
1791     },
1792 };
1793 
1794 static const E1000Info e1000_devices[] = {
1795     {
1796         .name      = "e1000",
1797         .device_id = E1000_DEV_ID_82540EM,
1798         .revision  = 0x03,
1799         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1800     },
1801     {
1802         .name      = "e1000-82544gc",
1803         .device_id = E1000_DEV_ID_82544GC_COPPER,
1804         .revision  = 0x03,
1805         .phy_id2   = E1000_PHY_ID2_82544x,
1806     },
1807     {
1808         .name      = "e1000-82545em",
1809         .device_id = E1000_DEV_ID_82545EM_COPPER,
1810         .revision  = 0x03,
1811         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1812     },
1813 };
1814 
1815 static void e1000_register_types(void)
1816 {
1817     int i;
1818 
1819     type_register_static(&e1000_base_info);
1820     for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1821         const E1000Info *info = &e1000_devices[i];
1822         TypeInfo type_info = {};
1823 
1824         type_info.name = info->name;
1825         type_info.parent = TYPE_E1000_BASE;
1826         type_info.class_data = (void *)info;
1827         type_info.class_init = e1000_class_init;
1828         type_info.instance_init = e1000_instance_init;
1829 
1830         type_register(&type_info);
1831     }
1832 }
1833 
1834 type_init(e1000_register_types)
1835