xref: /openbmc/qemu/hw/net/e1000.c (revision 32e70aad)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "qemu/osdep.h"
29 #include "hw/hw.h"
30 #include "hw/pci/pci.h"
31 #include "net/net.h"
32 #include "net/checksum.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/dma.h"
35 #include "qemu/iov.h"
36 #include "qemu/module.h"
37 #include "qemu/range.h"
38 
39 #include "e1000x_common.h"
40 #include "trace.h"
41 
42 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
43 
44 /* #define E1000_DEBUG */
45 
46 #ifdef E1000_DEBUG
47 enum {
48     DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
49     DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
50     DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
51     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
52 };
53 #define DBGBIT(x)    (1<<DEBUG_##x)
54 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
55 
56 #define DBGOUT(what, fmt, ...) do { \
57     if (debugflags & DBGBIT(what)) \
58         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
59     } while (0)
60 #else
61 #define DBGOUT(what, fmt, ...) do {} while (0)
62 #endif
63 
64 #define IOPORT_SIZE       0x40
65 #define PNPMMIO_SIZE      0x20000
66 #define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
67 
68 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
69 
70 /*
71  * HW models:
72  *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
73  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
74  *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
75  *  Others never tested
76  */
77 
78 typedef struct E1000State_st {
79     /*< private >*/
80     PCIDevice parent_obj;
81     /*< public >*/
82 
83     NICState *nic;
84     NICConf conf;
85     MemoryRegion mmio;
86     MemoryRegion io;
87 
88     uint32_t mac_reg[0x8000];
89     uint16_t phy_reg[0x20];
90     uint16_t eeprom_data[64];
91 
92     uint32_t rxbuf_size;
93     uint32_t rxbuf_min_shift;
94     struct e1000_tx {
95         unsigned char header[256];
96         unsigned char vlan_header[4];
97         /* Fields vlan and data must not be reordered or separated. */
98         unsigned char vlan[4];
99         unsigned char data[0x10000];
100         uint16_t size;
101         unsigned char vlan_needed;
102         unsigned char sum_needed;
103         bool cptse;
104         e1000x_txd_props props;
105         e1000x_txd_props tso_props;
106         uint16_t tso_frames;
107     } tx;
108 
109     struct {
110         uint32_t val_in;    /* shifted in from guest driver */
111         uint16_t bitnum_in;
112         uint16_t bitnum_out;
113         uint16_t reading;
114         uint32_t old_eecd;
115     } eecd_state;
116 
117     QEMUTimer *autoneg_timer;
118 
119     QEMUTimer *mit_timer;      /* Mitigation timer. */
120     bool mit_timer_on;         /* Mitigation timer is running. */
121     bool mit_irq_level;        /* Tracks interrupt pin level. */
122     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
123 
124     QEMUTimer *flush_queue_timer;
125 
126 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
127 #define E1000_FLAG_AUTONEG_BIT 0
128 #define E1000_FLAG_MIT_BIT 1
129 #define E1000_FLAG_MAC_BIT 2
130 #define E1000_FLAG_TSO_BIT 3
131 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
132 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
133 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
134 #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
135     uint32_t compat_flags;
136     bool received_tx_tso;
137     bool use_tso_for_migration;
138     e1000x_txd_props mig_props;
139 } E1000State;
140 
141 #define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
142 
143 typedef struct E1000BaseClass {
144     PCIDeviceClass parent_class;
145     uint16_t phy_id2;
146 } E1000BaseClass;
147 
148 #define TYPE_E1000_BASE "e1000-base"
149 
150 #define E1000(obj) \
151     OBJECT_CHECK(E1000State, (obj), TYPE_E1000_BASE)
152 
153 #define E1000_DEVICE_CLASS(klass) \
154      OBJECT_CLASS_CHECK(E1000BaseClass, (klass), TYPE_E1000_BASE)
155 #define E1000_DEVICE_GET_CLASS(obj) \
156     OBJECT_GET_CLASS(E1000BaseClass, (obj), TYPE_E1000_BASE)
157 
158 static void
159 e1000_link_up(E1000State *s)
160 {
161     e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg);
162 
163     /* E1000_STATUS_LU is tested by e1000_can_receive() */
164     qemu_flush_queued_packets(qemu_get_queue(s->nic));
165 }
166 
167 static void
168 e1000_autoneg_done(E1000State *s)
169 {
170     e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg);
171 
172     /* E1000_STATUS_LU is tested by e1000_can_receive() */
173     qemu_flush_queued_packets(qemu_get_queue(s->nic));
174 }
175 
176 static bool
177 have_autoneg(E1000State *s)
178 {
179     return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
180 }
181 
182 static void
183 set_phy_ctrl(E1000State *s, int index, uint16_t val)
184 {
185     /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
186     s->phy_reg[PHY_CTRL] = val & ~(0x3f |
187                                    MII_CR_RESET |
188                                    MII_CR_RESTART_AUTO_NEG);
189 
190     /*
191      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
192      * migrate during auto negotiation, after migration the link will be
193      * down.
194      */
195     if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
196         e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
197     }
198 }
199 
200 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
201     [PHY_CTRL] = set_phy_ctrl,
202 };
203 
204 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
205 
206 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
207 static const char phy_regcap[0x20] = {
208     [PHY_STATUS]      = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
209     [PHY_ID1]         = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
210     [PHY_CTRL]        = PHY_RW,    [PHY_1000T_CTRL]             = PHY_RW,
211     [PHY_LP_ABILITY]  = PHY_R,     [PHY_1000T_STATUS]           = PHY_R,
212     [PHY_AUTONEG_ADV] = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
213     [PHY_ID2]         = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
214     [PHY_AUTONEG_EXP] = PHY_R,
215 };
216 
217 /* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
218 static const uint16_t phy_reg_init[] = {
219     [PHY_CTRL]   = MII_CR_SPEED_SELECT_MSB |
220                    MII_CR_FULL_DUPLEX |
221                    MII_CR_AUTO_NEG_EN,
222 
223     [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
224                    MII_SR_LINK_STATUS |   /* link initially up */
225                    MII_SR_AUTONEG_CAPS |
226                    /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
227                    MII_SR_PREAMBLE_SUPPRESS |
228                    MII_SR_EXTENDED_STATUS |
229                    MII_SR_10T_HD_CAPS |
230                    MII_SR_10T_FD_CAPS |
231                    MII_SR_100X_HD_CAPS |
232                    MII_SR_100X_FD_CAPS,
233 
234     [PHY_ID1] = 0x141,
235     /* [PHY_ID2] configured per DevId, from e1000_reset() */
236     [PHY_AUTONEG_ADV] = 0xde1,
237     [PHY_LP_ABILITY] = 0x1e0,
238     [PHY_1000T_CTRL] = 0x0e00,
239     [PHY_1000T_STATUS] = 0x3c00,
240     [M88E1000_PHY_SPEC_CTRL] = 0x360,
241     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
242     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
243 };
244 
245 static const uint32_t mac_reg_init[] = {
246     [PBA]     = 0x00100030,
247     [LEDCTL]  = 0x602,
248     [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
249                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
250     [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
251                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
252                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
253                 E1000_STATUS_LU,
254     [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
255                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
256                 E1000_MANC_RMCP_EN,
257 };
258 
259 /* Helper function, *curr == 0 means the value is not set */
260 static inline void
261 mit_update_delay(uint32_t *curr, uint32_t value)
262 {
263     if (value && (*curr == 0 || value < *curr)) {
264         *curr = value;
265     }
266 }
267 
268 static void
269 set_interrupt_cause(E1000State *s, int index, uint32_t val)
270 {
271     PCIDevice *d = PCI_DEVICE(s);
272     uint32_t pending_ints;
273     uint32_t mit_delay;
274 
275     s->mac_reg[ICR] = val;
276 
277     /*
278      * Make sure ICR and ICS registers have the same value.
279      * The spec says that the ICS register is write-only.  However in practice,
280      * on real hardware ICS is readable, and for reads it has the same value as
281      * ICR (except that ICS does not have the clear on read behaviour of ICR).
282      *
283      * The VxWorks PRO/1000 driver uses this behaviour.
284      */
285     s->mac_reg[ICS] = val;
286 
287     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
288     if (!s->mit_irq_level && pending_ints) {
289         /*
290          * Here we detect a potential raising edge. We postpone raising the
291          * interrupt line if we are inside the mitigation delay window
292          * (s->mit_timer_on == 1).
293          * We provide a partial implementation of interrupt mitigation,
294          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
295          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
296          * RADV; relative timers based on TIDV and RDTR are not implemented.
297          */
298         if (s->mit_timer_on) {
299             return;
300         }
301         if (chkflag(MIT)) {
302             /* Compute the next mitigation delay according to pending
303              * interrupts and the current values of RADV (provided
304              * RDTR!=0), TADV and ITR.
305              * Then rearm the timer.
306              */
307             mit_delay = 0;
308             if (s->mit_ide &&
309                     (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
310                 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
311             }
312             if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
313                 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
314             }
315             mit_update_delay(&mit_delay, s->mac_reg[ITR]);
316 
317             /*
318              * According to e1000 SPEC, the Ethernet controller guarantees
319              * a maximum observable interrupt rate of 7813 interrupts/sec.
320              * Thus if mit_delay < 500 then the delay should be set to the
321              * minimum delay possible which is 500.
322              */
323             mit_delay = (mit_delay < 500) ? 500 : mit_delay;
324 
325             s->mit_timer_on = 1;
326             timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
327                       mit_delay * 256);
328             s->mit_ide = 0;
329         }
330     }
331 
332     s->mit_irq_level = (pending_ints != 0);
333     pci_set_irq(d, s->mit_irq_level);
334 }
335 
336 static void
337 e1000_mit_timer(void *opaque)
338 {
339     E1000State *s = opaque;
340 
341     s->mit_timer_on = 0;
342     /* Call set_interrupt_cause to update the irq level (if necessary). */
343     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
344 }
345 
346 static void
347 set_ics(E1000State *s, int index, uint32_t val)
348 {
349     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
350         s->mac_reg[IMS]);
351     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
352 }
353 
354 static void
355 e1000_autoneg_timer(void *opaque)
356 {
357     E1000State *s = opaque;
358     if (!qemu_get_queue(s->nic)->link_down) {
359         e1000_autoneg_done(s);
360         set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
361     }
362 }
363 
364 static void e1000_reset(void *opaque)
365 {
366     E1000State *d = opaque;
367     E1000BaseClass *edc = E1000_DEVICE_GET_CLASS(d);
368     uint8_t *macaddr = d->conf.macaddr.a;
369 
370     timer_del(d->autoneg_timer);
371     timer_del(d->mit_timer);
372     timer_del(d->flush_queue_timer);
373     d->mit_timer_on = 0;
374     d->mit_irq_level = 0;
375     d->mit_ide = 0;
376     memset(d->phy_reg, 0, sizeof d->phy_reg);
377     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
378     d->phy_reg[PHY_ID2] = edc->phy_id2;
379     memset(d->mac_reg, 0, sizeof d->mac_reg);
380     memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
381     d->rxbuf_min_shift = 1;
382     memset(&d->tx, 0, sizeof d->tx);
383 
384     if (qemu_get_queue(d->nic)->link_down) {
385         e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg);
386     }
387 
388     e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
389 }
390 
391 static void
392 set_ctrl(E1000State *s, int index, uint32_t val)
393 {
394     /* RST is self clearing */
395     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
396 }
397 
398 static void
399 e1000_flush_queue_timer(void *opaque)
400 {
401     E1000State *s = opaque;
402 
403     qemu_flush_queued_packets(qemu_get_queue(s->nic));
404 }
405 
406 static void
407 set_rx_control(E1000State *s, int index, uint32_t val)
408 {
409     s->mac_reg[RCTL] = val;
410     s->rxbuf_size = e1000x_rxbufsize(val);
411     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
412     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
413            s->mac_reg[RCTL]);
414     timer_mod(s->flush_queue_timer,
415               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
416 }
417 
418 static void
419 set_mdic(E1000State *s, int index, uint32_t val)
420 {
421     uint32_t data = val & E1000_MDIC_DATA_MASK;
422     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
423 
424     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
425         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
426     else if (val & E1000_MDIC_OP_READ) {
427         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
428         if (!(phy_regcap[addr] & PHY_R)) {
429             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
430             val |= E1000_MDIC_ERROR;
431         } else
432             val = (val ^ data) | s->phy_reg[addr];
433     } else if (val & E1000_MDIC_OP_WRITE) {
434         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
435         if (!(phy_regcap[addr] & PHY_W)) {
436             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
437             val |= E1000_MDIC_ERROR;
438         } else {
439             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
440                 phyreg_writeops[addr](s, index, data);
441             } else {
442                 s->phy_reg[addr] = data;
443             }
444         }
445     }
446     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
447 
448     if (val & E1000_MDIC_INT_EN) {
449         set_ics(s, 0, E1000_ICR_MDAC);
450     }
451 }
452 
453 static uint32_t
454 get_eecd(E1000State *s, int index)
455 {
456     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
457 
458     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
459            s->eecd_state.bitnum_out, s->eecd_state.reading);
460     if (!s->eecd_state.reading ||
461         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
462           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
463         ret |= E1000_EECD_DO;
464     return ret;
465 }
466 
467 static void
468 set_eecd(E1000State *s, int index, uint32_t val)
469 {
470     uint32_t oldval = s->eecd_state.old_eecd;
471 
472     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
473             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
474     if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
475         return;
476     }
477     if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
478         s->eecd_state.val_in = 0;
479         s->eecd_state.bitnum_in = 0;
480         s->eecd_state.bitnum_out = 0;
481         s->eecd_state.reading = 0;
482     }
483     if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
484         return;
485     }
486     if (!(E1000_EECD_SK & val)) {               /* falling edge */
487         s->eecd_state.bitnum_out++;
488         return;
489     }
490     s->eecd_state.val_in <<= 1;
491     if (val & E1000_EECD_DI)
492         s->eecd_state.val_in |= 1;
493     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
494         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
495         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
496             EEPROM_READ_OPCODE_MICROWIRE);
497     }
498     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
499            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
500            s->eecd_state.reading);
501 }
502 
503 static uint32_t
504 flash_eerd_read(E1000State *s, int x)
505 {
506     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
507 
508     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
509         return (s->mac_reg[EERD]);
510 
511     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
512         return (E1000_EEPROM_RW_REG_DONE | r);
513 
514     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
515            E1000_EEPROM_RW_REG_DONE | r);
516 }
517 
518 static void
519 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
520 {
521     uint32_t sum;
522 
523     if (cse && cse < n)
524         n = cse + 1;
525     if (sloc < n-1) {
526         sum = net_checksum_add(n-css, data+css);
527         stw_be_p(data + sloc, net_checksum_finish_nozero(sum));
528     }
529 }
530 
531 static inline void
532 inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
533 {
534     if (!memcmp(arr, bcast, sizeof bcast)) {
535         e1000x_inc_reg_if_not_full(s->mac_reg, BPTC);
536     } else if (arr[0] & 1) {
537         e1000x_inc_reg_if_not_full(s->mac_reg, MPTC);
538     }
539 }
540 
541 static void
542 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
543 {
544     static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
545                                     PTC1023, PTC1522 };
546 
547     NetClientState *nc = qemu_get_queue(s->nic);
548     if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
549         nc->info->receive(nc, buf, size);
550     } else {
551         qemu_send_packet(nc, buf, size);
552     }
553     inc_tx_bcast_or_mcast_count(s, buf);
554     e1000x_increase_size_stats(s->mac_reg, PTCregs, size);
555 }
556 
557 static void
558 xmit_seg(E1000State *s)
559 {
560     uint16_t len;
561     unsigned int frames = s->tx.tso_frames, css, sofar;
562     struct e1000_tx *tp = &s->tx;
563     struct e1000x_txd_props *props = tp->cptse ? &tp->tso_props : &tp->props;
564 
565     if (tp->cptse) {
566         css = props->ipcss;
567         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
568                frames, tp->size, css);
569         if (props->ip) {    /* IPv4 */
570             stw_be_p(tp->data+css+2, tp->size - css);
571             stw_be_p(tp->data+css+4,
572                      lduw_be_p(tp->data + css + 4) + frames);
573         } else {         /* IPv6 */
574             stw_be_p(tp->data+css+4, tp->size - css);
575         }
576         css = props->tucss;
577         len = tp->size - css;
578         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", props->tcp, css, len);
579         if (props->tcp) {
580             sofar = frames * props->mss;
581             stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
582             if (props->paylen - sofar > props->mss) {
583                 tp->data[css + 13] &= ~9;    /* PSH, FIN */
584             } else if (frames) {
585                 e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
586             }
587         } else {    /* UDP */
588             stw_be_p(tp->data+css+4, len);
589         }
590         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
591             unsigned int phsum;
592             // add pseudo-header length before checksum calculation
593             void *sp = tp->data + props->tucso;
594 
595             phsum = lduw_be_p(sp) + len;
596             phsum = (phsum >> 16) + (phsum & 0xffff);
597             stw_be_p(sp, phsum);
598         }
599         tp->tso_frames++;
600     }
601 
602     if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
603         putsum(tp->data, tp->size, props->tucso, props->tucss, props->tucse);
604     }
605     if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
606         putsum(tp->data, tp->size, props->ipcso, props->ipcss, props->ipcse);
607     }
608     if (tp->vlan_needed) {
609         memmove(tp->vlan, tp->data, 4);
610         memmove(tp->data, tp->data + 4, 8);
611         memcpy(tp->data + 8, tp->vlan_header, 4);
612         e1000_send_packet(s, tp->vlan, tp->size + 4);
613     } else {
614         e1000_send_packet(s, tp->data, tp->size);
615     }
616 
617     e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
618     e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size);
619     s->mac_reg[GPTC] = s->mac_reg[TPT];
620     s->mac_reg[GOTCL] = s->mac_reg[TOTL];
621     s->mac_reg[GOTCH] = s->mac_reg[TOTH];
622 }
623 
624 static void
625 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
626 {
627     PCIDevice *d = PCI_DEVICE(s);
628     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
629     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
630     unsigned int split_size = txd_lower & 0xffff, bytes, sz;
631     unsigned int msh = 0xfffff;
632     uint64_t addr;
633     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
634     struct e1000_tx *tp = &s->tx;
635 
636     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
637     if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
638         if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
639             e1000x_read_tx_ctx_descr(xp, &tp->tso_props);
640             s->use_tso_for_migration = 1;
641             tp->tso_frames = 0;
642         } else {
643             e1000x_read_tx_ctx_descr(xp, &tp->props);
644             s->use_tso_for_migration = 0;
645         }
646         return;
647     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
648         // data descriptor
649         if (tp->size == 0) {
650             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
651         }
652         tp->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
653     } else {
654         // legacy descriptor
655         tp->cptse = 0;
656     }
657 
658     if (e1000x_vlan_enabled(s->mac_reg) &&
659         e1000x_is_vlan_txd(txd_lower) &&
660         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
661         tp->vlan_needed = 1;
662         stw_be_p(tp->vlan_header,
663                       le16_to_cpu(s->mac_reg[VET]));
664         stw_be_p(tp->vlan_header + 2,
665                       le16_to_cpu(dp->upper.fields.special));
666     }
667 
668     addr = le64_to_cpu(dp->buffer_addr);
669     if (tp->cptse) {
670         msh = tp->tso_props.hdr_len + tp->tso_props.mss;
671         do {
672             bytes = split_size;
673             if (tp->size + bytes > msh)
674                 bytes = msh - tp->size;
675 
676             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
677             pci_dma_read(d, addr, tp->data + tp->size, bytes);
678             sz = tp->size + bytes;
679             if (sz >= tp->tso_props.hdr_len
680                 && tp->size < tp->tso_props.hdr_len) {
681                 memmove(tp->header, tp->data, tp->tso_props.hdr_len);
682             }
683             tp->size = sz;
684             addr += bytes;
685             if (sz == msh) {
686                 xmit_seg(s);
687                 memmove(tp->data, tp->header, tp->tso_props.hdr_len);
688                 tp->size = tp->tso_props.hdr_len;
689             }
690             split_size -= bytes;
691         } while (bytes && split_size);
692     } else {
693         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
694         pci_dma_read(d, addr, tp->data + tp->size, split_size);
695         tp->size += split_size;
696     }
697 
698     if (!(txd_lower & E1000_TXD_CMD_EOP))
699         return;
700     if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) {
701         xmit_seg(s);
702     }
703     tp->tso_frames = 0;
704     tp->sum_needed = 0;
705     tp->vlan_needed = 0;
706     tp->size = 0;
707     tp->cptse = 0;
708 }
709 
710 static uint32_t
711 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
712 {
713     PCIDevice *d = PCI_DEVICE(s);
714     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
715 
716     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
717         return 0;
718     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
719                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
720     dp->upper.data = cpu_to_le32(txd_upper);
721     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
722                   &dp->upper, sizeof(dp->upper));
723     return E1000_ICR_TXDW;
724 }
725 
726 static uint64_t tx_desc_base(E1000State *s)
727 {
728     uint64_t bah = s->mac_reg[TDBAH];
729     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
730 
731     return (bah << 32) + bal;
732 }
733 
734 static void
735 start_xmit(E1000State *s)
736 {
737     PCIDevice *d = PCI_DEVICE(s);
738     dma_addr_t base;
739     struct e1000_tx_desc desc;
740     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
741 
742     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
743         DBGOUT(TX, "tx disabled\n");
744         return;
745     }
746 
747     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
748         base = tx_desc_base(s) +
749                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
750         pci_dma_read(d, base, &desc, sizeof(desc));
751 
752         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
753                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
754                desc.upper.data);
755 
756         process_tx_desc(s, &desc);
757         cause |= txdesc_writeback(s, base, &desc);
758 
759         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
760             s->mac_reg[TDH] = 0;
761         /*
762          * the following could happen only if guest sw assigns
763          * bogus values to TDT/TDLEN.
764          * there's nothing too intelligent we could do about this.
765          */
766         if (s->mac_reg[TDH] == tdh_start ||
767             tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
768             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
769                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
770             break;
771         }
772     }
773     set_ics(s, 0, cause);
774 }
775 
776 static int
777 receive_filter(E1000State *s, const uint8_t *buf, int size)
778 {
779     uint32_t rctl = s->mac_reg[RCTL];
780     int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1);
781 
782     if (e1000x_is_vlan_packet(buf, le16_to_cpu(s->mac_reg[VET])) &&
783         e1000x_vlan_rx_filter_enabled(s->mac_reg)) {
784         uint16_t vid = lduw_be_p(buf + 14);
785         uint32_t vfta = ldl_le_p((uint32_t*)(s->mac_reg + VFTA) +
786                                  ((vid >> 5) & 0x7f));
787         if ((vfta & (1 << (vid & 0x1f))) == 0)
788             return 0;
789     }
790 
791     if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
792         return 1;
793     }
794 
795     if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
796         e1000x_inc_reg_if_not_full(s->mac_reg, MPRC);
797         return 1;
798     }
799 
800     if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
801         e1000x_inc_reg_if_not_full(s->mac_reg, BPRC);
802         return 1;
803     }
804 
805     return e1000x_rx_group_filter(s->mac_reg, buf);
806 }
807 
808 static void
809 e1000_set_link_status(NetClientState *nc)
810 {
811     E1000State *s = qemu_get_nic_opaque(nc);
812     uint32_t old_status = s->mac_reg[STATUS];
813 
814     if (nc->link_down) {
815         e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg);
816     } else {
817         if (have_autoneg(s) &&
818             !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
819             e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
820         } else {
821             e1000_link_up(s);
822         }
823     }
824 
825     if (s->mac_reg[STATUS] != old_status)
826         set_ics(s, 0, E1000_ICR_LSC);
827 }
828 
829 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
830 {
831     int bufs;
832     /* Fast-path short packets */
833     if (total_size <= s->rxbuf_size) {
834         return s->mac_reg[RDH] != s->mac_reg[RDT];
835     }
836     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
837         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
838     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
839         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
840             s->mac_reg[RDT] - s->mac_reg[RDH];
841     } else {
842         return false;
843     }
844     return total_size <= bufs * s->rxbuf_size;
845 }
846 
847 static int
848 e1000_can_receive(NetClientState *nc)
849 {
850     E1000State *s = qemu_get_nic_opaque(nc);
851 
852     return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
853         e1000_has_rxbufs(s, 1) && !timer_pending(s->flush_queue_timer);
854 }
855 
856 static uint64_t rx_desc_base(E1000State *s)
857 {
858     uint64_t bah = s->mac_reg[RDBAH];
859     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
860 
861     return (bah << 32) + bal;
862 }
863 
864 static void
865 e1000_receiver_overrun(E1000State *s, size_t size)
866 {
867     trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
868     e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
869     e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
870     set_ics(s, 0, E1000_ICS_RXO);
871 }
872 
873 static ssize_t
874 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
875 {
876     E1000State *s = qemu_get_nic_opaque(nc);
877     PCIDevice *d = PCI_DEVICE(s);
878     struct e1000_rx_desc desc;
879     dma_addr_t base;
880     unsigned int n, rdt;
881     uint32_t rdh_start;
882     uint16_t vlan_special = 0;
883     uint8_t vlan_status = 0;
884     uint8_t min_buf[MIN_BUF_SIZE];
885     struct iovec min_iov;
886     uint8_t *filter_buf = iov->iov_base;
887     size_t size = iov_size(iov, iovcnt);
888     size_t iov_ofs = 0;
889     size_t desc_offset;
890     size_t desc_size;
891     size_t total_size;
892 
893     if (!e1000x_hw_rx_enabled(s->mac_reg)) {
894         return -1;
895     }
896 
897     if (timer_pending(s->flush_queue_timer)) {
898         return 0;
899     }
900 
901     /* Pad to minimum Ethernet frame length */
902     if (size < sizeof(min_buf)) {
903         iov_to_buf(iov, iovcnt, 0, min_buf, size);
904         memset(&min_buf[size], 0, sizeof(min_buf) - size);
905         min_iov.iov_base = filter_buf = min_buf;
906         min_iov.iov_len = size = sizeof(min_buf);
907         iovcnt = 1;
908         iov = &min_iov;
909     } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
910         /* This is very unlikely, but may happen. */
911         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
912         filter_buf = min_buf;
913     }
914 
915     /* Discard oversized packets if !LPE and !SBP. */
916     if (e1000x_is_oversized(s->mac_reg, size)) {
917         return size;
918     }
919 
920     if (!receive_filter(s, filter_buf, size)) {
921         return size;
922     }
923 
924     if (e1000x_vlan_enabled(s->mac_reg) &&
925         e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) {
926         vlan_special = cpu_to_le16(lduw_be_p(filter_buf + 14));
927         iov_ofs = 4;
928         if (filter_buf == iov->iov_base) {
929             memmove(filter_buf + 4, filter_buf, 12);
930         } else {
931             iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
932             while (iov->iov_len <= iov_ofs) {
933                 iov_ofs -= iov->iov_len;
934                 iov++;
935             }
936         }
937         vlan_status = E1000_RXD_STAT_VP;
938         size -= 4;
939     }
940 
941     rdh_start = s->mac_reg[RDH];
942     desc_offset = 0;
943     total_size = size + e1000x_fcs_len(s->mac_reg);
944     if (!e1000_has_rxbufs(s, total_size)) {
945         e1000_receiver_overrun(s, total_size);
946         return -1;
947     }
948     do {
949         desc_size = total_size - desc_offset;
950         if (desc_size > s->rxbuf_size) {
951             desc_size = s->rxbuf_size;
952         }
953         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
954         pci_dma_read(d, base, &desc, sizeof(desc));
955         desc.special = vlan_special;
956         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
957         if (desc.buffer_addr) {
958             if (desc_offset < size) {
959                 size_t iov_copy;
960                 hwaddr ba = le64_to_cpu(desc.buffer_addr);
961                 size_t copy_size = size - desc_offset;
962                 if (copy_size > s->rxbuf_size) {
963                     copy_size = s->rxbuf_size;
964                 }
965                 do {
966                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
967                     pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
968                     copy_size -= iov_copy;
969                     ba += iov_copy;
970                     iov_ofs += iov_copy;
971                     if (iov_ofs == iov->iov_len) {
972                         iov++;
973                         iov_ofs = 0;
974                     }
975                 } while (copy_size);
976             }
977             desc_offset += desc_size;
978             desc.length = cpu_to_le16(desc_size);
979             if (desc_offset >= total_size) {
980                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
981             } else {
982                 /* Guest zeroing out status is not a hardware requirement.
983                    Clear EOP in case guest didn't do it. */
984                 desc.status &= ~E1000_RXD_STAT_EOP;
985             }
986         } else { // as per intel docs; skip descriptors with null buf addr
987             DBGOUT(RX, "Null RX descriptor!!\n");
988         }
989         pci_dma_write(d, base, &desc, sizeof(desc));
990 
991         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
992             s->mac_reg[RDH] = 0;
993         /* see comment in start_xmit; same here */
994         if (s->mac_reg[RDH] == rdh_start ||
995             rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
996             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
997                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
998             e1000_receiver_overrun(s, total_size);
999             return -1;
1000         }
1001     } while (desc_offset < total_size);
1002 
1003     e1000x_update_rx_total_stats(s->mac_reg, size, total_size);
1004 
1005     n = E1000_ICS_RXT0;
1006     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1007         rdt += s->mac_reg[RDLEN] / sizeof(desc);
1008     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1009         s->rxbuf_min_shift)
1010         n |= E1000_ICS_RXDMT0;
1011 
1012     set_ics(s, 0, n);
1013 
1014     return size;
1015 }
1016 
1017 static ssize_t
1018 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1019 {
1020     const struct iovec iov = {
1021         .iov_base = (uint8_t *)buf,
1022         .iov_len = size
1023     };
1024 
1025     return e1000_receive_iov(nc, &iov, 1);
1026 }
1027 
1028 static uint32_t
1029 mac_readreg(E1000State *s, int index)
1030 {
1031     return s->mac_reg[index];
1032 }
1033 
1034 static uint32_t
1035 mac_low4_read(E1000State *s, int index)
1036 {
1037     return s->mac_reg[index] & 0xf;
1038 }
1039 
1040 static uint32_t
1041 mac_low11_read(E1000State *s, int index)
1042 {
1043     return s->mac_reg[index] & 0x7ff;
1044 }
1045 
1046 static uint32_t
1047 mac_low13_read(E1000State *s, int index)
1048 {
1049     return s->mac_reg[index] & 0x1fff;
1050 }
1051 
1052 static uint32_t
1053 mac_low16_read(E1000State *s, int index)
1054 {
1055     return s->mac_reg[index] & 0xffff;
1056 }
1057 
1058 static uint32_t
1059 mac_icr_read(E1000State *s, int index)
1060 {
1061     uint32_t ret = s->mac_reg[ICR];
1062 
1063     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1064     set_interrupt_cause(s, 0, 0);
1065     return ret;
1066 }
1067 
1068 static uint32_t
1069 mac_read_clr4(E1000State *s, int index)
1070 {
1071     uint32_t ret = s->mac_reg[index];
1072 
1073     s->mac_reg[index] = 0;
1074     return ret;
1075 }
1076 
1077 static uint32_t
1078 mac_read_clr8(E1000State *s, int index)
1079 {
1080     uint32_t ret = s->mac_reg[index];
1081 
1082     s->mac_reg[index] = 0;
1083     s->mac_reg[index-1] = 0;
1084     return ret;
1085 }
1086 
1087 static void
1088 mac_writereg(E1000State *s, int index, uint32_t val)
1089 {
1090     uint32_t macaddr[2];
1091 
1092     s->mac_reg[index] = val;
1093 
1094     if (index == RA + 1) {
1095         macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1096         macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1097         qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1098     }
1099 }
1100 
1101 static void
1102 set_rdt(E1000State *s, int index, uint32_t val)
1103 {
1104     s->mac_reg[index] = val & 0xffff;
1105     if (e1000_has_rxbufs(s, 1)) {
1106         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1107     }
1108 }
1109 
1110 static void
1111 set_16bit(E1000State *s, int index, uint32_t val)
1112 {
1113     s->mac_reg[index] = val & 0xffff;
1114 }
1115 
1116 static void
1117 set_dlen(E1000State *s, int index, uint32_t val)
1118 {
1119     s->mac_reg[index] = val & 0xfff80;
1120 }
1121 
1122 static void
1123 set_tctl(E1000State *s, int index, uint32_t val)
1124 {
1125     s->mac_reg[index] = val;
1126     s->mac_reg[TDT] &= 0xffff;
1127     start_xmit(s);
1128 }
1129 
1130 static void
1131 set_icr(E1000State *s, int index, uint32_t val)
1132 {
1133     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1134     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1135 }
1136 
1137 static void
1138 set_imc(E1000State *s, int index, uint32_t val)
1139 {
1140     s->mac_reg[IMS] &= ~val;
1141     set_ics(s, 0, 0);
1142 }
1143 
1144 static void
1145 set_ims(E1000State *s, int index, uint32_t val)
1146 {
1147     s->mac_reg[IMS] |= val;
1148     set_ics(s, 0, 0);
1149 }
1150 
1151 #define getreg(x)    [x] = mac_readreg
1152 static uint32_t (*macreg_readops[])(E1000State *, int) = {
1153     getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1154     getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1155     getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1156     getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1157     getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1158     getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1159     getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1160     getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1161     getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1162     getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1163     getreg(TNCRS),    getreg(SEQEC),    getreg(CEXTERR),  getreg(RLEC),
1164     getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1165     getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1166     getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1167     getreg(GOTCL),
1168 
1169     [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1170     [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1171     [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1172     [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1173     [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1174     [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1175     [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1176     [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1177     [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1178     [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1179     [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1180     [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1181     [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1182     [MPTC]    = mac_read_clr4,
1183     [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1184     [EERD]    = flash_eerd_read,
1185     [RDFH]    = mac_low13_read,     [RDFT]    = mac_low13_read,
1186     [RDFHS]   = mac_low13_read,     [RDFTS]   = mac_low13_read,
1187     [RDFPC]   = mac_low13_read,
1188     [TDFH]    = mac_low11_read,     [TDFT]    = mac_low11_read,
1189     [TDFHS]   = mac_low13_read,     [TDFTS]   = mac_low13_read,
1190     [TDFPC]   = mac_low13_read,
1191     [AIT]     = mac_low16_read,
1192 
1193     [CRCERRS ... MPC]   = &mac_readreg,
1194     [IP6AT ... IP6AT+3] = &mac_readreg,    [IP4AT ... IP4AT+6] = &mac_readreg,
1195     [FFLT ... FFLT+6]   = &mac_low11_read,
1196     [RA ... RA+31]      = &mac_readreg,
1197     [WUPM ... WUPM+31]  = &mac_readreg,
1198     [MTA ... MTA+127]   = &mac_readreg,
1199     [VFTA ... VFTA+127] = &mac_readreg,
1200     [FFMT ... FFMT+254] = &mac_low4_read,
1201     [FFVT ... FFVT+254] = &mac_readreg,
1202     [PBM ... PBM+16383] = &mac_readreg,
1203 };
1204 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1205 
1206 #define putreg(x)    [x] = mac_writereg
1207 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1208     putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1209     putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1210     putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1211     putreg(TDFH),     putreg(TDFT),     putreg(TDFHS),    putreg(TDFTS),
1212     putreg(TDFPC),    putreg(RDFH),     putreg(RDFT),     putreg(RDFHS),
1213     putreg(RDFTS),    putreg(RDFPC),    putreg(IPAV),     putreg(WUC),
1214     putreg(WUS),      putreg(AIT),
1215 
1216     [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL] = set_tctl,
1217     [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]  = set_ics,
1218     [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]  = set_rdt,
1219     [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]  = set_icr,
1220     [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL] = set_ctrl,
1221     [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV] = set_16bit,
1222     [ITR]    = set_16bit,
1223 
1224     [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg,
1225     [FFLT ... FFLT+6]   = &mac_writereg,
1226     [RA ... RA+31]      = &mac_writereg,
1227     [WUPM ... WUPM+31]  = &mac_writereg,
1228     [MTA ... MTA+127]   = &mac_writereg,
1229     [VFTA ... VFTA+127] = &mac_writereg,
1230     [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg,
1231     [PBM ... PBM+16383] = &mac_writereg,
1232 };
1233 
1234 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1235 
1236 enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1237 
1238 #define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1239 /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1240  * f - flag bits (up to 6 possible flags)
1241  * n - flag needed
1242  * p - partially implenented */
1243 static const uint8_t mac_reg_access[0x8000] = {
1244     [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1245     [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1246 
1247     [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1248     [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1249     [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1250     [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1251     [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1252     [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1253     [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1254     [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1255     [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1256     [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1257     [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1258     [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1259     [SEQEC]   = markflag(MAC),    [CEXTERR] = markflag(MAC),
1260     [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1261     [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1262     [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1263     [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1264     [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1265     [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1266     [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1267     [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1268     [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1269     [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1270     [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1271     [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1272     [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1273     [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1274     [BPTC]    = markflag(MAC),
1275 
1276     [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1277     [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1278     [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1279     [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1280     [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1281     [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1282     [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1283     [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1284     [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1285     [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1286     [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1287 };
1288 
1289 static void
1290 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1291                  unsigned size)
1292 {
1293     E1000State *s = opaque;
1294     unsigned int index = (addr & 0x1ffff) >> 2;
1295 
1296     if (index < NWRITEOPS && macreg_writeops[index]) {
1297         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1298             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1299             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1300                 DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1301                        "It is not fully implemented.\n", index<<2);
1302             }
1303             macreg_writeops[index](s, index, val);
1304         } else {    /* "flag needed" bit is set, but the flag is not active */
1305             DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1306                    index<<2);
1307         }
1308     } else if (index < NREADOPS && macreg_readops[index]) {
1309         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1310                index<<2, val);
1311     } else {
1312         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1313                index<<2, val);
1314     }
1315 }
1316 
1317 static uint64_t
1318 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1319 {
1320     E1000State *s = opaque;
1321     unsigned int index = (addr & 0x1ffff) >> 2;
1322 
1323     if (index < NREADOPS && macreg_readops[index]) {
1324         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1325             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1326             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1327                 DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1328                        "It is not fully implemented.\n", index<<2);
1329             }
1330             return macreg_readops[index](s, index);
1331         } else {    /* "flag needed" bit is set, but the flag is not active */
1332             DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1333                    index<<2);
1334         }
1335     } else {
1336         DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1337     }
1338     return 0;
1339 }
1340 
1341 static const MemoryRegionOps e1000_mmio_ops = {
1342     .read = e1000_mmio_read,
1343     .write = e1000_mmio_write,
1344     .endianness = DEVICE_LITTLE_ENDIAN,
1345     .impl = {
1346         .min_access_size = 4,
1347         .max_access_size = 4,
1348     },
1349 };
1350 
1351 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1352                               unsigned size)
1353 {
1354     E1000State *s = opaque;
1355 
1356     (void)s;
1357     return 0;
1358 }
1359 
1360 static void e1000_io_write(void *opaque, hwaddr addr,
1361                            uint64_t val, unsigned size)
1362 {
1363     E1000State *s = opaque;
1364 
1365     (void)s;
1366 }
1367 
1368 static const MemoryRegionOps e1000_io_ops = {
1369     .read = e1000_io_read,
1370     .write = e1000_io_write,
1371     .endianness = DEVICE_LITTLE_ENDIAN,
1372 };
1373 
1374 static bool is_version_1(void *opaque, int version_id)
1375 {
1376     return version_id == 1;
1377 }
1378 
1379 static int e1000_pre_save(void *opaque)
1380 {
1381     E1000State *s = opaque;
1382     NetClientState *nc = qemu_get_queue(s->nic);
1383 
1384     /*
1385      * If link is down and auto-negotiation is supported and ongoing,
1386      * complete auto-negotiation immediately. This allows us to look
1387      * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1388      */
1389     if (nc->link_down && have_autoneg(s)) {
1390         s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1391     }
1392 
1393     /* Decide which set of props to migrate in the main structure */
1394     if (chkflag(TSO) || !s->use_tso_for_migration) {
1395         /* Either we're migrating with the extra subsection, in which
1396          * case the mig_props is always 'props' OR
1397          * we've not got the subsection, but 'props' was the last
1398          * updated.
1399          */
1400         s->mig_props = s->tx.props;
1401     } else {
1402         /* We're not using the subsection, and 'tso_props' was
1403          * the last updated.
1404          */
1405         s->mig_props = s->tx.tso_props;
1406     }
1407     return 0;
1408 }
1409 
1410 static int e1000_post_load(void *opaque, int version_id)
1411 {
1412     E1000State *s = opaque;
1413     NetClientState *nc = qemu_get_queue(s->nic);
1414 
1415     if (!chkflag(MIT)) {
1416         s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1417             s->mac_reg[TADV] = 0;
1418         s->mit_irq_level = false;
1419     }
1420     s->mit_ide = 0;
1421     s->mit_timer_on = true;
1422     timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1);
1423 
1424     /* nc.link_down can't be migrated, so infer link_down according
1425      * to link status bit in mac_reg[STATUS].
1426      * Alternatively, restart link negotiation if it was in progress. */
1427     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1428 
1429     if (have_autoneg(s) &&
1430         !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1431         nc->link_down = false;
1432         timer_mod(s->autoneg_timer,
1433                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1434     }
1435 
1436     s->tx.props = s->mig_props;
1437     if (!s->received_tx_tso) {
1438         /* We received only one set of offload data (tx.props)
1439          * and haven't got tx.tso_props.  The best we can do
1440          * is dupe the data.
1441          */
1442         s->tx.tso_props = s->mig_props;
1443     }
1444     return 0;
1445 }
1446 
1447 static int e1000_tx_tso_post_load(void *opaque, int version_id)
1448 {
1449     E1000State *s = opaque;
1450     s->received_tx_tso = true;
1451     return 0;
1452 }
1453 
1454 static bool e1000_mit_state_needed(void *opaque)
1455 {
1456     E1000State *s = opaque;
1457 
1458     return chkflag(MIT);
1459 }
1460 
1461 static bool e1000_full_mac_needed(void *opaque)
1462 {
1463     E1000State *s = opaque;
1464 
1465     return chkflag(MAC);
1466 }
1467 
1468 static bool e1000_tso_state_needed(void *opaque)
1469 {
1470     E1000State *s = opaque;
1471 
1472     return chkflag(TSO);
1473 }
1474 
1475 static const VMStateDescription vmstate_e1000_mit_state = {
1476     .name = "e1000/mit_state",
1477     .version_id = 1,
1478     .minimum_version_id = 1,
1479     .needed = e1000_mit_state_needed,
1480     .fields = (VMStateField[]) {
1481         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1482         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1483         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1484         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1485         VMSTATE_BOOL(mit_irq_level, E1000State),
1486         VMSTATE_END_OF_LIST()
1487     }
1488 };
1489 
1490 static const VMStateDescription vmstate_e1000_full_mac_state = {
1491     .name = "e1000/full_mac_state",
1492     .version_id = 1,
1493     .minimum_version_id = 1,
1494     .needed = e1000_full_mac_needed,
1495     .fields = (VMStateField[]) {
1496         VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1497         VMSTATE_END_OF_LIST()
1498     }
1499 };
1500 
1501 static const VMStateDescription vmstate_e1000_tx_tso_state = {
1502     .name = "e1000/tx_tso_state",
1503     .version_id = 1,
1504     .minimum_version_id = 1,
1505     .needed = e1000_tso_state_needed,
1506     .post_load = e1000_tx_tso_post_load,
1507     .fields = (VMStateField[]) {
1508         VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
1509         VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
1510         VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
1511         VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
1512         VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
1513         VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
1514         VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
1515         VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
1516         VMSTATE_UINT16(tx.tso_props.mss, E1000State),
1517         VMSTATE_INT8(tx.tso_props.ip, E1000State),
1518         VMSTATE_INT8(tx.tso_props.tcp, E1000State),
1519         VMSTATE_END_OF_LIST()
1520     }
1521 };
1522 
1523 static const VMStateDescription vmstate_e1000 = {
1524     .name = "e1000",
1525     .version_id = 2,
1526     .minimum_version_id = 1,
1527     .pre_save = e1000_pre_save,
1528     .post_load = e1000_post_load,
1529     .fields = (VMStateField[]) {
1530         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1531         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1532         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1533         VMSTATE_UINT32(rxbuf_size, E1000State),
1534         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1535         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1536         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1537         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1538         VMSTATE_UINT16(eecd_state.reading, E1000State),
1539         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1540         VMSTATE_UINT8(mig_props.ipcss, E1000State),
1541         VMSTATE_UINT8(mig_props.ipcso, E1000State),
1542         VMSTATE_UINT16(mig_props.ipcse, E1000State),
1543         VMSTATE_UINT8(mig_props.tucss, E1000State),
1544         VMSTATE_UINT8(mig_props.tucso, E1000State),
1545         VMSTATE_UINT16(mig_props.tucse, E1000State),
1546         VMSTATE_UINT32(mig_props.paylen, E1000State),
1547         VMSTATE_UINT8(mig_props.hdr_len, E1000State),
1548         VMSTATE_UINT16(mig_props.mss, E1000State),
1549         VMSTATE_UINT16(tx.size, E1000State),
1550         VMSTATE_UINT16(tx.tso_frames, E1000State),
1551         VMSTATE_UINT8(tx.sum_needed, E1000State),
1552         VMSTATE_INT8(mig_props.ip, E1000State),
1553         VMSTATE_INT8(mig_props.tcp, E1000State),
1554         VMSTATE_BUFFER(tx.header, E1000State),
1555         VMSTATE_BUFFER(tx.data, E1000State),
1556         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1557         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1558         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1559         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1560         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1561         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1562         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1563         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1564         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1565         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1566         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1567         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1568         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1569         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1570         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1571         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1572         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1573         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1574         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1575         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1576         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1577         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1578         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1579         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1580         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1581         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1582         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1583         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1584         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1585         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1586         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1587         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1588         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1589         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1590         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1591         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1592         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1593         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1594         VMSTATE_UINT32(mac_reg[VET], E1000State),
1595         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1596         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1597         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1598         VMSTATE_END_OF_LIST()
1599     },
1600     .subsections = (const VMStateDescription*[]) {
1601         &vmstate_e1000_mit_state,
1602         &vmstate_e1000_full_mac_state,
1603         &vmstate_e1000_tx_tso_state,
1604         NULL
1605     }
1606 };
1607 
1608 /*
1609  * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1610  * Note: A valid DevId will be inserted during pci_e1000_init().
1611  */
1612 static const uint16_t e1000_eeprom_template[64] = {
1613     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1614     0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1615     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1616     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1617     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1618     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1619     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1620     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1621 };
1622 
1623 /* PCI interface */
1624 
1625 static void
1626 e1000_mmio_setup(E1000State *d)
1627 {
1628     int i;
1629     const uint32_t excluded_regs[] = {
1630         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1631         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1632     };
1633 
1634     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1635                           "e1000-mmio", PNPMMIO_SIZE);
1636     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1637     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1638         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1639                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1640     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1641 }
1642 
1643 static void
1644 pci_e1000_uninit(PCIDevice *dev)
1645 {
1646     E1000State *d = E1000(dev);
1647 
1648     timer_del(d->autoneg_timer);
1649     timer_free(d->autoneg_timer);
1650     timer_del(d->mit_timer);
1651     timer_free(d->mit_timer);
1652     timer_del(d->flush_queue_timer);
1653     timer_free(d->flush_queue_timer);
1654     qemu_del_nic(d->nic);
1655 }
1656 
1657 static NetClientInfo net_e1000_info = {
1658     .type = NET_CLIENT_DRIVER_NIC,
1659     .size = sizeof(NICState),
1660     .can_receive = e1000_can_receive,
1661     .receive = e1000_receive,
1662     .receive_iov = e1000_receive_iov,
1663     .link_status_changed = e1000_set_link_status,
1664 };
1665 
1666 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1667                                 uint32_t val, int len)
1668 {
1669     E1000State *s = E1000(pci_dev);
1670 
1671     pci_default_write_config(pci_dev, address, val, len);
1672 
1673     if (range_covers_byte(address, len, PCI_COMMAND) &&
1674         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1675         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1676     }
1677 }
1678 
1679 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1680 {
1681     DeviceState *dev = DEVICE(pci_dev);
1682     E1000State *d = E1000(pci_dev);
1683     uint8_t *pci_conf;
1684     uint8_t *macaddr;
1685 
1686     pci_dev->config_write = e1000_write_config;
1687 
1688     pci_conf = pci_dev->config;
1689 
1690     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1691     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1692 
1693     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1694 
1695     e1000_mmio_setup(d);
1696 
1697     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1698 
1699     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1700 
1701     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1702     macaddr = d->conf.macaddr.a;
1703 
1704     e1000x_core_prepare_eeprom(d->eeprom_data,
1705                                e1000_eeprom_template,
1706                                sizeof(e1000_eeprom_template),
1707                                PCI_DEVICE_GET_CLASS(pci_dev)->device_id,
1708                                macaddr);
1709 
1710     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1711                           object_get_typename(OBJECT(d)), dev->id, d);
1712 
1713     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1714 
1715     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1716     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1717     d->flush_queue_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
1718                                         e1000_flush_queue_timer, d);
1719 }
1720 
1721 static void qdev_e1000_reset(DeviceState *dev)
1722 {
1723     E1000State *d = E1000(dev);
1724     e1000_reset(d);
1725 }
1726 
1727 static Property e1000_properties[] = {
1728     DEFINE_NIC_PROPERTIES(E1000State, conf),
1729     DEFINE_PROP_BIT("autonegotiation", E1000State,
1730                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1731     DEFINE_PROP_BIT("mitigation", E1000State,
1732                     compat_flags, E1000_FLAG_MIT_BIT, true),
1733     DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1734                     compat_flags, E1000_FLAG_MAC_BIT, true),
1735     DEFINE_PROP_BIT("migrate_tso_props", E1000State,
1736                     compat_flags, E1000_FLAG_TSO_BIT, true),
1737     DEFINE_PROP_END_OF_LIST(),
1738 };
1739 
1740 typedef struct E1000Info {
1741     const char *name;
1742     uint16_t   device_id;
1743     uint8_t    revision;
1744     uint16_t   phy_id2;
1745 } E1000Info;
1746 
1747 static void e1000_class_init(ObjectClass *klass, void *data)
1748 {
1749     DeviceClass *dc = DEVICE_CLASS(klass);
1750     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1751     E1000BaseClass *e = E1000_DEVICE_CLASS(klass);
1752     const E1000Info *info = data;
1753 
1754     k->realize = pci_e1000_realize;
1755     k->exit = pci_e1000_uninit;
1756     k->romfile = "efi-e1000.rom";
1757     k->vendor_id = PCI_VENDOR_ID_INTEL;
1758     k->device_id = info->device_id;
1759     k->revision = info->revision;
1760     e->phy_id2 = info->phy_id2;
1761     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1762     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1763     dc->desc = "Intel Gigabit Ethernet";
1764     dc->reset = qdev_e1000_reset;
1765     dc->vmsd = &vmstate_e1000;
1766     dc->props = e1000_properties;
1767 }
1768 
1769 static void e1000_instance_init(Object *obj)
1770 {
1771     E1000State *n = E1000(obj);
1772     device_add_bootindex_property(obj, &n->conf.bootindex,
1773                                   "bootindex", "/ethernet-phy@0",
1774                                   DEVICE(n), NULL);
1775 }
1776 
1777 static const TypeInfo e1000_base_info = {
1778     .name          = TYPE_E1000_BASE,
1779     .parent        = TYPE_PCI_DEVICE,
1780     .instance_size = sizeof(E1000State),
1781     .instance_init = e1000_instance_init,
1782     .class_size    = sizeof(E1000BaseClass),
1783     .abstract      = true,
1784     .interfaces = (InterfaceInfo[]) {
1785         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1786         { },
1787     },
1788 };
1789 
1790 static const E1000Info e1000_devices[] = {
1791     {
1792         .name      = "e1000",
1793         .device_id = E1000_DEV_ID_82540EM,
1794         .revision  = 0x03,
1795         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1796     },
1797     {
1798         .name      = "e1000-82544gc",
1799         .device_id = E1000_DEV_ID_82544GC_COPPER,
1800         .revision  = 0x03,
1801         .phy_id2   = E1000_PHY_ID2_82544x,
1802     },
1803     {
1804         .name      = "e1000-82545em",
1805         .device_id = E1000_DEV_ID_82545EM_COPPER,
1806         .revision  = 0x03,
1807         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1808     },
1809 };
1810 
1811 static void e1000_register_types(void)
1812 {
1813     int i;
1814 
1815     type_register_static(&e1000_base_info);
1816     for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1817         const E1000Info *info = &e1000_devices[i];
1818         TypeInfo type_info = {};
1819 
1820         type_info.name = info->name;
1821         type_info.parent = TYPE_E1000_BASE;
1822         type_info.class_data = (void *)info;
1823         type_info.class_init = e1000_class_init;
1824         type_info.instance_init = e1000_instance_init;
1825 
1826         type_register(&type_info);
1827     }
1828 }
1829 
1830 type_init(e1000_register_types)
1831