xref: /openbmc/qemu/hw/net/e1000.c (revision a675ca4c)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2.1 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "qemu/osdep.h"
29 #include "hw/net/mii.h"
30 #include "hw/pci/pci_device.h"
31 #include "hw/qdev-properties.h"
32 #include "migration/vmstate.h"
33 #include "net/eth.h"
34 #include "net/net.h"
35 #include "net/checksum.h"
36 #include "sysemu/sysemu.h"
37 #include "sysemu/dma.h"
38 #include "qemu/iov.h"
39 #include "qemu/module.h"
40 #include "qemu/range.h"
41 
42 #include "e1000_common.h"
43 #include "e1000x_common.h"
44 #include "trace.h"
45 #include "qom/object.h"
46 
47 /* #define E1000_DEBUG */
48 
49 #ifdef E1000_DEBUG
50 enum {
51     DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
52     DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
53     DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
54     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
55 };
56 #define DBGBIT(x)    (1<<DEBUG_##x)
57 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
58 
59 #define DBGOUT(what, fmt, ...) do { \
60     if (debugflags & DBGBIT(what)) \
61         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
62     } while (0)
63 #else
64 #define DBGOUT(what, fmt, ...) do {} while (0)
65 #endif
66 
67 #define IOPORT_SIZE       0x40
68 #define PNPMMIO_SIZE      0x20000
69 
70 #define MAXIMUM_ETHERNET_HDR_LEN (ETH_HLEN + 4)
71 
72 /*
73  * HW models:
74  *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
75  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
76  *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
77  *  Others never tested
78  */
79 
80 struct E1000State_st {
81     /*< private >*/
82     PCIDevice parent_obj;
83     /*< public >*/
84 
85     NICState *nic;
86     NICConf conf;
87     MemoryRegion mmio;
88     MemoryRegion io;
89 
90     uint32_t mac_reg[0x8000];
91     uint16_t phy_reg[0x20];
92     uint16_t eeprom_data[64];
93 
94     uint32_t rxbuf_size;
95     uint32_t rxbuf_min_shift;
96     struct e1000_tx {
97         unsigned char header[256];
98         unsigned char vlan_header[4];
99         /* Fields vlan and data must not be reordered or separated. */
100         unsigned char vlan[4];
101         unsigned char data[0x10000];
102         uint16_t size;
103         unsigned char vlan_needed;
104         unsigned char sum_needed;
105         bool cptse;
106         e1000x_txd_props props;
107         e1000x_txd_props tso_props;
108         uint16_t tso_frames;
109         bool busy;
110     } tx;
111 
112     struct {
113         uint32_t val_in;    /* shifted in from guest driver */
114         uint16_t bitnum_in;
115         uint16_t bitnum_out;
116         uint16_t reading;
117         uint32_t old_eecd;
118     } eecd_state;
119 
120     QEMUTimer *autoneg_timer;
121 
122     QEMUTimer *mit_timer;      /* Mitigation timer. */
123     bool mit_timer_on;         /* Mitigation timer is running. */
124     bool mit_irq_level;        /* Tracks interrupt pin level. */
125     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
126 
127     QEMUTimer *flush_queue_timer;
128 
129 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
130 #define E1000_FLAG_AUTONEG_BIT 0
131 #define E1000_FLAG_MIT_BIT 1
132 #define E1000_FLAG_MAC_BIT 2
133 #define E1000_FLAG_TSO_BIT 3
134 #define E1000_FLAG_VET_BIT 4
135 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
136 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
137 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
138 #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
139 #define E1000_FLAG_VET (1 << E1000_FLAG_VET_BIT)
140 
141     uint32_t compat_flags;
142     bool received_tx_tso;
143     bool use_tso_for_migration;
144     e1000x_txd_props mig_props;
145 };
146 typedef struct E1000State_st E1000State;
147 
148 #define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
149 
150 struct E1000BaseClass {
151     PCIDeviceClass parent_class;
152     uint16_t phy_id2;
153 };
154 typedef struct E1000BaseClass E1000BaseClass;
155 
156 #define TYPE_E1000_BASE "e1000-base"
157 
158 DECLARE_OBJ_CHECKERS(E1000State, E1000BaseClass,
159                      E1000, TYPE_E1000_BASE)
160 
161 
162 static void
163 e1000_link_up(E1000State *s)
164 {
165     e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg);
166 
167     /* E1000_STATUS_LU is tested by e1000_can_receive() */
168     qemu_flush_queued_packets(qemu_get_queue(s->nic));
169 }
170 
171 static void
172 e1000_autoneg_done(E1000State *s)
173 {
174     e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg);
175 
176     /* E1000_STATUS_LU is tested by e1000_can_receive() */
177     qemu_flush_queued_packets(qemu_get_queue(s->nic));
178 }
179 
180 static bool
181 have_autoneg(E1000State *s)
182 {
183     return chkflag(AUTONEG) && (s->phy_reg[MII_BMCR] & MII_BMCR_AUTOEN);
184 }
185 
186 static void
187 set_phy_ctrl(E1000State *s, int index, uint16_t val)
188 {
189     /* bits 0-5 reserved; MII_BMCR_[ANRESTART,RESET] are self clearing */
190     s->phy_reg[MII_BMCR] = val & ~(0x3f |
191                                    MII_BMCR_RESET |
192                                    MII_BMCR_ANRESTART);
193 
194     /*
195      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
196      * migrate during auto negotiation, after migration the link will be
197      * down.
198      */
199     if (have_autoneg(s) && (val & MII_BMCR_ANRESTART)) {
200         e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
201     }
202 }
203 
204 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
205     [MII_BMCR] = set_phy_ctrl,
206 };
207 
208 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
209 
210 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
211 static const char phy_regcap[0x20] = {
212     [MII_BMSR]   = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
213     [MII_PHYID1] = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
214     [MII_BMCR]   = PHY_RW,    [MII_CTRL1000]               = PHY_RW,
215     [MII_ANLPAR] = PHY_R,     [MII_STAT1000]               = PHY_R,
216     [MII_ANAR]   = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
217     [MII_PHYID2] = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
218     [MII_ANER]   = PHY_R,
219 };
220 
221 /* MII_PHYID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
222 static const uint16_t phy_reg_init[] = {
223     [MII_BMCR] = MII_BMCR_SPEED1000 |
224                  MII_BMCR_FD |
225                  MII_BMCR_AUTOEN,
226 
227     [MII_BMSR] = MII_BMSR_EXTCAP |
228                  MII_BMSR_LINK_ST |   /* link initially up */
229                  MII_BMSR_AUTONEG |
230                  /* MII_BMSR_AN_COMP: initially NOT completed */
231                  MII_BMSR_MFPS |
232                  MII_BMSR_EXTSTAT |
233                  MII_BMSR_10T_HD |
234                  MII_BMSR_10T_FD |
235                  MII_BMSR_100TX_HD |
236                  MII_BMSR_100TX_FD,
237 
238     [MII_PHYID1] = 0x141,
239     /* [MII_PHYID2] configured per DevId, from e1000_reset() */
240     [MII_ANAR] = MII_ANAR_CSMACD | MII_ANAR_10 |
241                  MII_ANAR_10FD | MII_ANAR_TX |
242                  MII_ANAR_TXFD | MII_ANAR_PAUSE |
243                  MII_ANAR_PAUSE_ASYM,
244     [MII_ANLPAR] = MII_ANLPAR_10 | MII_ANLPAR_10FD |
245                    MII_ANLPAR_TX | MII_ANLPAR_TXFD,
246     [MII_CTRL1000] = MII_CTRL1000_FULL | MII_CTRL1000_PORT |
247                      MII_CTRL1000_MASTER,
248     [MII_STAT1000] = MII_STAT1000_HALF | MII_STAT1000_FULL |
249                      MII_STAT1000_ROK | MII_STAT1000_LOK,
250     [M88E1000_PHY_SPEC_CTRL] = 0x360,
251     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
252     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
253 };
254 
255 static const uint32_t mac_reg_init[] = {
256     [PBA]     = 0x00100030,
257     [LEDCTL]  = 0x602,
258     [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
259                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
260     [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
261                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
262                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
263                 E1000_STATUS_LU,
264     [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
265                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
266                 E1000_MANC_RMCP_EN,
267 };
268 
269 /* Helper function, *curr == 0 means the value is not set */
270 static inline void
271 mit_update_delay(uint32_t *curr, uint32_t value)
272 {
273     if (value && (*curr == 0 || value < *curr)) {
274         *curr = value;
275     }
276 }
277 
278 static void
279 set_interrupt_cause(E1000State *s, int index, uint32_t val)
280 {
281     PCIDevice *d = PCI_DEVICE(s);
282     uint32_t pending_ints;
283     uint32_t mit_delay;
284 
285     s->mac_reg[ICR] = val;
286 
287     /*
288      * Make sure ICR and ICS registers have the same value.
289      * The spec says that the ICS register is write-only.  However in practice,
290      * on real hardware ICS is readable, and for reads it has the same value as
291      * ICR (except that ICS does not have the clear on read behaviour of ICR).
292      *
293      * The VxWorks PRO/1000 driver uses this behaviour.
294      */
295     s->mac_reg[ICS] = val;
296 
297     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
298     if (!s->mit_irq_level && pending_ints) {
299         /*
300          * Here we detect a potential raising edge. We postpone raising the
301          * interrupt line if we are inside the mitigation delay window
302          * (s->mit_timer_on == 1).
303          * We provide a partial implementation of interrupt mitigation,
304          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
305          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
306          * RADV; relative timers based on TIDV and RDTR are not implemented.
307          */
308         if (s->mit_timer_on) {
309             return;
310         }
311         if (chkflag(MIT)) {
312             /* Compute the next mitigation delay according to pending
313              * interrupts and the current values of RADV (provided
314              * RDTR!=0), TADV and ITR.
315              * Then rearm the timer.
316              */
317             mit_delay = 0;
318             if (s->mit_ide &&
319                     (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
320                 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
321             }
322             if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
323                 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
324             }
325             mit_update_delay(&mit_delay, s->mac_reg[ITR]);
326 
327             /*
328              * According to e1000 SPEC, the Ethernet controller guarantees
329              * a maximum observable interrupt rate of 7813 interrupts/sec.
330              * Thus if mit_delay < 500 then the delay should be set to the
331              * minimum delay possible which is 500.
332              */
333             mit_delay = (mit_delay < 500) ? 500 : mit_delay;
334 
335             s->mit_timer_on = 1;
336             timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
337                       mit_delay * 256);
338             s->mit_ide = 0;
339         }
340     }
341 
342     s->mit_irq_level = (pending_ints != 0);
343     pci_set_irq(d, s->mit_irq_level);
344 }
345 
346 static void
347 e1000_mit_timer(void *opaque)
348 {
349     E1000State *s = opaque;
350 
351     s->mit_timer_on = 0;
352     /* Call set_interrupt_cause to update the irq level (if necessary). */
353     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
354 }
355 
356 static void
357 set_ics(E1000State *s, int index, uint32_t val)
358 {
359     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
360         s->mac_reg[IMS]);
361     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
362 }
363 
364 static void
365 e1000_autoneg_timer(void *opaque)
366 {
367     E1000State *s = opaque;
368     if (!qemu_get_queue(s->nic)->link_down) {
369         e1000_autoneg_done(s);
370         set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
371     }
372 }
373 
374 static bool e1000_vet_init_need(void *opaque)
375 {
376     E1000State *s = opaque;
377 
378     return chkflag(VET);
379 }
380 
381 static void e1000_reset_hold(Object *obj)
382 {
383     E1000State *d = E1000(obj);
384     E1000BaseClass *edc = E1000_GET_CLASS(d);
385     uint8_t *macaddr = d->conf.macaddr.a;
386 
387     timer_del(d->autoneg_timer);
388     timer_del(d->mit_timer);
389     timer_del(d->flush_queue_timer);
390     d->mit_timer_on = 0;
391     d->mit_irq_level = 0;
392     d->mit_ide = 0;
393     memset(d->phy_reg, 0, sizeof d->phy_reg);
394     memcpy(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
395     d->phy_reg[MII_PHYID2] = edc->phy_id2;
396     memset(d->mac_reg, 0, sizeof d->mac_reg);
397     memcpy(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
398     d->rxbuf_min_shift = 1;
399     memset(&d->tx, 0, sizeof d->tx);
400 
401     if (qemu_get_queue(d->nic)->link_down) {
402         e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg);
403     }
404 
405     e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
406 
407     if (e1000_vet_init_need(d)) {
408         d->mac_reg[VET] = ETH_P_VLAN;
409     }
410 }
411 
412 static void
413 set_ctrl(E1000State *s, int index, uint32_t val)
414 {
415     /* RST is self clearing */
416     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
417 }
418 
419 static void
420 e1000_flush_queue_timer(void *opaque)
421 {
422     E1000State *s = opaque;
423 
424     qemu_flush_queued_packets(qemu_get_queue(s->nic));
425 }
426 
427 static void
428 set_rx_control(E1000State *s, int index, uint32_t val)
429 {
430     s->mac_reg[RCTL] = val;
431     s->rxbuf_size = e1000x_rxbufsize(val);
432     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
433     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
434            s->mac_reg[RCTL]);
435     timer_mod(s->flush_queue_timer,
436               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
437 }
438 
439 static void
440 set_mdic(E1000State *s, int index, uint32_t val)
441 {
442     uint32_t data = val & E1000_MDIC_DATA_MASK;
443     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
444 
445     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
446         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
447     else if (val & E1000_MDIC_OP_READ) {
448         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
449         if (!(phy_regcap[addr] & PHY_R)) {
450             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
451             val |= E1000_MDIC_ERROR;
452         } else
453             val = (val ^ data) | s->phy_reg[addr];
454     } else if (val & E1000_MDIC_OP_WRITE) {
455         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
456         if (!(phy_regcap[addr] & PHY_W)) {
457             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
458             val |= E1000_MDIC_ERROR;
459         } else {
460             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
461                 phyreg_writeops[addr](s, index, data);
462             } else {
463                 s->phy_reg[addr] = data;
464             }
465         }
466     }
467     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
468 
469     if (val & E1000_MDIC_INT_EN) {
470         set_ics(s, 0, E1000_ICR_MDAC);
471     }
472 }
473 
474 static uint32_t
475 get_eecd(E1000State *s, int index)
476 {
477     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
478 
479     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
480            s->eecd_state.bitnum_out, s->eecd_state.reading);
481     if (!s->eecd_state.reading ||
482         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
483           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
484         ret |= E1000_EECD_DO;
485     return ret;
486 }
487 
488 static void
489 set_eecd(E1000State *s, int index, uint32_t val)
490 {
491     uint32_t oldval = s->eecd_state.old_eecd;
492 
493     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
494             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
495     if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
496         return;
497     }
498     if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
499         s->eecd_state.val_in = 0;
500         s->eecd_state.bitnum_in = 0;
501         s->eecd_state.bitnum_out = 0;
502         s->eecd_state.reading = 0;
503     }
504     if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
505         return;
506     }
507     if (!(E1000_EECD_SK & val)) {               /* falling edge */
508         s->eecd_state.bitnum_out++;
509         return;
510     }
511     s->eecd_state.val_in <<= 1;
512     if (val & E1000_EECD_DI)
513         s->eecd_state.val_in |= 1;
514     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
515         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
516         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
517             EEPROM_READ_OPCODE_MICROWIRE);
518     }
519     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
520            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
521            s->eecd_state.reading);
522 }
523 
524 static uint32_t
525 flash_eerd_read(E1000State *s, int x)
526 {
527     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
528 
529     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
530         return (s->mac_reg[EERD]);
531 
532     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
533         return (E1000_EEPROM_RW_REG_DONE | r);
534 
535     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
536            E1000_EEPROM_RW_REG_DONE | r);
537 }
538 
539 static void
540 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
541 {
542     uint32_t sum;
543 
544     if (cse && cse < n)
545         n = cse + 1;
546     if (sloc < n-1) {
547         sum = net_checksum_add(n-css, data+css);
548         stw_be_p(data + sloc, net_checksum_finish_nozero(sum));
549     }
550 }
551 
552 static inline void
553 inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
554 {
555     if (is_broadcast_ether_addr(arr)) {
556         e1000x_inc_reg_if_not_full(s->mac_reg, BPTC);
557     } else if (is_multicast_ether_addr(arr)) {
558         e1000x_inc_reg_if_not_full(s->mac_reg, MPTC);
559     }
560 }
561 
562 static void
563 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
564 {
565     static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
566                                     PTC1023, PTC1522 };
567 
568     NetClientState *nc = qemu_get_queue(s->nic);
569     if (s->phy_reg[MII_BMCR] & MII_BMCR_LOOPBACK) {
570         qemu_receive_packet(nc, buf, size);
571     } else {
572         qemu_send_packet(nc, buf, size);
573     }
574     inc_tx_bcast_or_mcast_count(s, buf);
575     e1000x_increase_size_stats(s->mac_reg, PTCregs, size + 4);
576 }
577 
578 static void
579 xmit_seg(E1000State *s)
580 {
581     uint16_t len;
582     unsigned int frames = s->tx.tso_frames, css, sofar;
583     struct e1000_tx *tp = &s->tx;
584     struct e1000x_txd_props *props = tp->cptse ? &tp->tso_props : &tp->props;
585 
586     if (tp->cptse) {
587         css = props->ipcss;
588         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
589                frames, tp->size, css);
590         if (props->ip) {    /* IPv4 */
591             stw_be_p(tp->data+css+2, tp->size - css);
592             stw_be_p(tp->data+css+4,
593                      lduw_be_p(tp->data + css + 4) + frames);
594         } else {         /* IPv6 */
595             stw_be_p(tp->data+css+4, tp->size - css);
596         }
597         css = props->tucss;
598         len = tp->size - css;
599         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", props->tcp, css, len);
600         if (props->tcp) {
601             sofar = frames * props->mss;
602             stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
603             if (props->paylen - sofar > props->mss) {
604                 tp->data[css + 13] &= ~9;    /* PSH, FIN */
605             } else if (frames) {
606                 e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
607             }
608         } else {    /* UDP */
609             stw_be_p(tp->data+css+4, len);
610         }
611         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
612             unsigned int phsum;
613             // add pseudo-header length before checksum calculation
614             void *sp = tp->data + props->tucso;
615 
616             phsum = lduw_be_p(sp) + len;
617             phsum = (phsum >> 16) + (phsum & 0xffff);
618             stw_be_p(sp, phsum);
619         }
620         tp->tso_frames++;
621     }
622 
623     if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
624         putsum(tp->data, tp->size, props->tucso, props->tucss, props->tucse);
625     }
626     if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
627         putsum(tp->data, tp->size, props->ipcso, props->ipcss, props->ipcse);
628     }
629     if (tp->vlan_needed) {
630         memmove(tp->vlan, tp->data, 4);
631         memmove(tp->data, tp->data + 4, 8);
632         memcpy(tp->data + 8, tp->vlan_header, 4);
633         e1000_send_packet(s, tp->vlan, tp->size + 4);
634     } else {
635         e1000_send_packet(s, tp->data, tp->size);
636     }
637 
638     e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
639     e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size + 4);
640     e1000x_inc_reg_if_not_full(s->mac_reg, GPTC);
641     e1000x_grow_8reg_if_not_full(s->mac_reg, GOTCL, s->tx.size + 4);
642 }
643 
644 static void
645 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
646 {
647     PCIDevice *d = PCI_DEVICE(s);
648     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
649     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
650     unsigned int split_size = txd_lower & 0xffff, bytes, sz;
651     unsigned int msh = 0xfffff;
652     uint64_t addr;
653     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
654     struct e1000_tx *tp = &s->tx;
655 
656     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
657     if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
658         if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
659             e1000x_read_tx_ctx_descr(xp, &tp->tso_props);
660             s->use_tso_for_migration = 1;
661             tp->tso_frames = 0;
662         } else {
663             e1000x_read_tx_ctx_descr(xp, &tp->props);
664             s->use_tso_for_migration = 0;
665         }
666         return;
667     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
668         // data descriptor
669         if (tp->size == 0) {
670             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
671         }
672         tp->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
673     } else {
674         // legacy descriptor
675         tp->cptse = 0;
676     }
677 
678     if (e1000x_vlan_enabled(s->mac_reg) &&
679         e1000x_is_vlan_txd(txd_lower) &&
680         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
681         tp->vlan_needed = 1;
682         stw_be_p(tp->vlan_header,
683                       le16_to_cpu(s->mac_reg[VET]));
684         stw_be_p(tp->vlan_header + 2,
685                       le16_to_cpu(dp->upper.fields.special));
686     }
687 
688     addr = le64_to_cpu(dp->buffer_addr);
689     if (tp->cptse) {
690         msh = tp->tso_props.hdr_len + tp->tso_props.mss;
691         do {
692             bytes = split_size;
693             if (tp->size >= msh) {
694                 goto eop;
695             }
696             if (tp->size + bytes > msh)
697                 bytes = msh - tp->size;
698 
699             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
700             pci_dma_read(d, addr, tp->data + tp->size, bytes);
701             sz = tp->size + bytes;
702             if (sz >= tp->tso_props.hdr_len
703                 && tp->size < tp->tso_props.hdr_len) {
704                 memmove(tp->header, tp->data, tp->tso_props.hdr_len);
705             }
706             tp->size = sz;
707             addr += bytes;
708             if (sz == msh) {
709                 xmit_seg(s);
710                 memmove(tp->data, tp->header, tp->tso_props.hdr_len);
711                 tp->size = tp->tso_props.hdr_len;
712             }
713             split_size -= bytes;
714         } while (bytes && split_size);
715     } else {
716         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
717         pci_dma_read(d, addr, tp->data + tp->size, split_size);
718         tp->size += split_size;
719     }
720 
721 eop:
722     if (!(txd_lower & E1000_TXD_CMD_EOP))
723         return;
724     if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) {
725         xmit_seg(s);
726     }
727     tp->tso_frames = 0;
728     tp->sum_needed = 0;
729     tp->vlan_needed = 0;
730     tp->size = 0;
731     tp->cptse = 0;
732 }
733 
734 static uint32_t
735 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
736 {
737     PCIDevice *d = PCI_DEVICE(s);
738     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
739 
740     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
741         return 0;
742     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
743                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
744     dp->upper.data = cpu_to_le32(txd_upper);
745     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
746                   &dp->upper, sizeof(dp->upper));
747     return E1000_ICR_TXDW;
748 }
749 
750 static uint64_t tx_desc_base(E1000State *s)
751 {
752     uint64_t bah = s->mac_reg[TDBAH];
753     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
754 
755     return (bah << 32) + bal;
756 }
757 
758 static void
759 start_xmit(E1000State *s)
760 {
761     PCIDevice *d = PCI_DEVICE(s);
762     dma_addr_t base;
763     struct e1000_tx_desc desc;
764     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
765 
766     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
767         DBGOUT(TX, "tx disabled\n");
768         return;
769     }
770 
771     if (s->tx.busy) {
772         return;
773     }
774     s->tx.busy = true;
775 
776     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
777         base = tx_desc_base(s) +
778                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
779         pci_dma_read(d, base, &desc, sizeof(desc));
780 
781         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
782                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
783                desc.upper.data);
784 
785         process_tx_desc(s, &desc);
786         cause |= txdesc_writeback(s, base, &desc);
787 
788         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
789             s->mac_reg[TDH] = 0;
790         /*
791          * the following could happen only if guest sw assigns
792          * bogus values to TDT/TDLEN.
793          * there's nothing too intelligent we could do about this.
794          */
795         if (s->mac_reg[TDH] == tdh_start ||
796             tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
797             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
798                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
799             break;
800         }
801     }
802     s->tx.busy = false;
803     set_ics(s, 0, cause);
804 }
805 
806 static int
807 receive_filter(E1000State *s, const void *buf)
808 {
809     return (!e1000x_is_vlan_packet(buf, s->mac_reg[VET]) ||
810             e1000x_rx_vlan_filter(s->mac_reg, PKT_GET_VLAN_HDR(buf))) &&
811            e1000x_rx_group_filter(s->mac_reg, buf);
812 }
813 
814 static void
815 e1000_set_link_status(NetClientState *nc)
816 {
817     E1000State *s = qemu_get_nic_opaque(nc);
818     uint32_t old_status = s->mac_reg[STATUS];
819 
820     if (nc->link_down) {
821         e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg);
822     } else {
823         if (have_autoneg(s) &&
824             !(s->phy_reg[MII_BMSR] & MII_BMSR_AN_COMP)) {
825             e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
826         } else {
827             e1000_link_up(s);
828         }
829     }
830 
831     if (s->mac_reg[STATUS] != old_status)
832         set_ics(s, 0, E1000_ICR_LSC);
833 }
834 
835 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
836 {
837     int bufs;
838     /* Fast-path short packets */
839     if (total_size <= s->rxbuf_size) {
840         return s->mac_reg[RDH] != s->mac_reg[RDT];
841     }
842     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
843         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
844     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
845         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
846             s->mac_reg[RDT] - s->mac_reg[RDH];
847     } else {
848         return false;
849     }
850     return total_size <= bufs * s->rxbuf_size;
851 }
852 
853 static bool
854 e1000_can_receive(NetClientState *nc)
855 {
856     E1000State *s = qemu_get_nic_opaque(nc);
857 
858     return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
859         e1000_has_rxbufs(s, 1) && !timer_pending(s->flush_queue_timer);
860 }
861 
862 static uint64_t rx_desc_base(E1000State *s)
863 {
864     uint64_t bah = s->mac_reg[RDBAH];
865     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
866 
867     return (bah << 32) + bal;
868 }
869 
870 static void
871 e1000_receiver_overrun(E1000State *s, size_t size)
872 {
873     trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
874     e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
875     e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
876     set_ics(s, 0, E1000_ICS_RXO);
877 }
878 
879 static ssize_t
880 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
881 {
882     E1000State *s = qemu_get_nic_opaque(nc);
883     PCIDevice *d = PCI_DEVICE(s);
884     struct e1000_rx_desc desc;
885     dma_addr_t base;
886     unsigned int n, rdt;
887     uint32_t rdh_start;
888     uint16_t vlan_special = 0;
889     uint8_t vlan_status = 0;
890     uint8_t min_buf[ETH_ZLEN];
891     uint8_t *filter_buf = iov->iov_base;
892     size_t size = iov_size(iov, iovcnt);
893     size_t iov_ofs = 0;
894     size_t desc_offset;
895     size_t desc_size;
896     size_t total_size;
897     eth_pkt_types_e pkt_type;
898 
899     if (!e1000x_hw_rx_enabled(s->mac_reg)) {
900         return -1;
901     }
902 
903     if (timer_pending(s->flush_queue_timer)) {
904         return 0;
905     }
906 
907     if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
908         /* This is very unlikely, but may happen. */
909         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
910         filter_buf = min_buf;
911     }
912 
913     /* Discard oversized packets if !LPE and !SBP. */
914     if (e1000x_is_oversized(s->mac_reg, size)) {
915         return size;
916     }
917 
918     if (!receive_filter(s, filter_buf)) {
919         return size;
920     }
921 
922     if (e1000x_vlan_enabled(s->mac_reg) &&
923         e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) {
924         vlan_special = cpu_to_le16(lduw_be_p(filter_buf + 14));
925         iov_ofs = 4;
926         if (filter_buf == iov->iov_base) {
927             memmove(filter_buf + 4, filter_buf, 12);
928         } else {
929             iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
930             while (iov->iov_len <= iov_ofs) {
931                 iov_ofs -= iov->iov_len;
932                 iov++;
933             }
934         }
935         vlan_status = E1000_RXD_STAT_VP;
936         size -= 4;
937     }
938 
939     pkt_type = get_eth_packet_type(PKT_GET_ETH_HDR(filter_buf));
940     rdh_start = s->mac_reg[RDH];
941     desc_offset = 0;
942     total_size = size + e1000x_fcs_len(s->mac_reg);
943     if (!e1000_has_rxbufs(s, total_size)) {
944         e1000_receiver_overrun(s, total_size);
945         return -1;
946     }
947     do {
948         desc_size = total_size - desc_offset;
949         if (desc_size > s->rxbuf_size) {
950             desc_size = s->rxbuf_size;
951         }
952         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
953         pci_dma_read(d, base, &desc, sizeof(desc));
954         desc.special = vlan_special;
955         desc.status &= ~E1000_RXD_STAT_DD;
956         if (desc.buffer_addr) {
957             if (desc_offset < size) {
958                 size_t iov_copy;
959                 hwaddr ba = le64_to_cpu(desc.buffer_addr);
960                 size_t copy_size = size - desc_offset;
961                 if (copy_size > s->rxbuf_size) {
962                     copy_size = s->rxbuf_size;
963                 }
964                 do {
965                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
966                     pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
967                     copy_size -= iov_copy;
968                     ba += iov_copy;
969                     iov_ofs += iov_copy;
970                     if (iov_ofs == iov->iov_len) {
971                         iov++;
972                         iov_ofs = 0;
973                     }
974                 } while (copy_size);
975             }
976             desc_offset += desc_size;
977             desc.length = cpu_to_le16(desc_size);
978             if (desc_offset >= total_size) {
979                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
980             } else {
981                 /* Guest zeroing out status is not a hardware requirement.
982                    Clear EOP in case guest didn't do it. */
983                 desc.status &= ~E1000_RXD_STAT_EOP;
984             }
985         } else { // as per intel docs; skip descriptors with null buf addr
986             DBGOUT(RX, "Null RX descriptor!!\n");
987         }
988         pci_dma_write(d, base, &desc, sizeof(desc));
989         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
990         pci_dma_write(d, base + offsetof(struct e1000_rx_desc, status),
991                       &desc.status, sizeof(desc.status));
992 
993         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
994             s->mac_reg[RDH] = 0;
995         /* see comment in start_xmit; same here */
996         if (s->mac_reg[RDH] == rdh_start ||
997             rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
998             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
999                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1000             e1000_receiver_overrun(s, total_size);
1001             return -1;
1002         }
1003     } while (desc_offset < total_size);
1004 
1005     e1000x_update_rx_total_stats(s->mac_reg, pkt_type, size, total_size);
1006 
1007     n = E1000_ICS_RXT0;
1008     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1009         rdt += s->mac_reg[RDLEN] / sizeof(desc);
1010     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1011         s->rxbuf_min_shift)
1012         n |= E1000_ICS_RXDMT0;
1013 
1014     set_ics(s, 0, n);
1015 
1016     return size;
1017 }
1018 
1019 static ssize_t
1020 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1021 {
1022     const struct iovec iov = {
1023         .iov_base = (uint8_t *)buf,
1024         .iov_len = size
1025     };
1026 
1027     return e1000_receive_iov(nc, &iov, 1);
1028 }
1029 
1030 static uint32_t
1031 mac_readreg(E1000State *s, int index)
1032 {
1033     return s->mac_reg[index];
1034 }
1035 
1036 static uint32_t
1037 mac_icr_read(E1000State *s, int index)
1038 {
1039     uint32_t ret = s->mac_reg[ICR];
1040 
1041     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1042     set_interrupt_cause(s, 0, 0);
1043     return ret;
1044 }
1045 
1046 static uint32_t
1047 mac_read_clr4(E1000State *s, int index)
1048 {
1049     uint32_t ret = s->mac_reg[index];
1050 
1051     s->mac_reg[index] = 0;
1052     return ret;
1053 }
1054 
1055 static uint32_t
1056 mac_read_clr8(E1000State *s, int index)
1057 {
1058     uint32_t ret = s->mac_reg[index];
1059 
1060     s->mac_reg[index] = 0;
1061     s->mac_reg[index-1] = 0;
1062     return ret;
1063 }
1064 
1065 static void
1066 mac_writereg(E1000State *s, int index, uint32_t val)
1067 {
1068     uint32_t macaddr[2];
1069 
1070     s->mac_reg[index] = val;
1071 
1072     if (index == RA + 1) {
1073         macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1074         macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1075         qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1076     }
1077 }
1078 
1079 static void
1080 set_rdt(E1000State *s, int index, uint32_t val)
1081 {
1082     s->mac_reg[index] = val & 0xffff;
1083     if (e1000_has_rxbufs(s, 1)) {
1084         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1085     }
1086 }
1087 
1088 #define LOW_BITS_SET_FUNC(num)                             \
1089     static void                                            \
1090     set_##num##bit(E1000State *s, int index, uint32_t val) \
1091     {                                                      \
1092         s->mac_reg[index] = val & (BIT(num) - 1);          \
1093     }
1094 
1095 LOW_BITS_SET_FUNC(4)
1096 LOW_BITS_SET_FUNC(11)
1097 LOW_BITS_SET_FUNC(13)
1098 LOW_BITS_SET_FUNC(16)
1099 
1100 static void
1101 set_dlen(E1000State *s, int index, uint32_t val)
1102 {
1103     s->mac_reg[index] = val & 0xfff80;
1104 }
1105 
1106 static void
1107 set_tctl(E1000State *s, int index, uint32_t val)
1108 {
1109     s->mac_reg[index] = val;
1110     s->mac_reg[TDT] &= 0xffff;
1111     start_xmit(s);
1112 }
1113 
1114 static void
1115 set_icr(E1000State *s, int index, uint32_t val)
1116 {
1117     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1118     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1119 }
1120 
1121 static void
1122 set_imc(E1000State *s, int index, uint32_t val)
1123 {
1124     s->mac_reg[IMS] &= ~val;
1125     set_ics(s, 0, 0);
1126 }
1127 
1128 static void
1129 set_ims(E1000State *s, int index, uint32_t val)
1130 {
1131     s->mac_reg[IMS] |= val;
1132     set_ics(s, 0, 0);
1133 }
1134 
1135 #define getreg(x)    [x] = mac_readreg
1136 typedef uint32_t (*readops)(E1000State *, int);
1137 static const readops macreg_readops[] = {
1138     getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1139     getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1140     getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1141     getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1142     getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1143     getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1144     getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1145     getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1146     getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1147     getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1148     getreg(TNCRS),    getreg(SEQEC),    getreg(CEXTERR),  getreg(RLEC),
1149     getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1150     getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1151     getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1152     getreg(GOTCL),    getreg(RDFH),     getreg(RDFT),     getreg(RDFHS),
1153     getreg(RDFTS),    getreg(RDFPC),    getreg(TDFH),     getreg(TDFT),
1154     getreg(TDFHS),    getreg(TDFTS),    getreg(TDFPC),    getreg(AIT),
1155 
1156     [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1157     [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1158     [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1159     [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1160     [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1161     [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1162     [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1163     [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1164     [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1165     [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1166     [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1167     [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1168     [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1169     [MPTC]    = mac_read_clr4,
1170     [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1171     [EERD]    = flash_eerd_read,
1172 
1173     [CRCERRS ... MPC]     = &mac_readreg,
1174     [IP6AT ... IP6AT + 3] = &mac_readreg,    [IP4AT ... IP4AT + 6] = &mac_readreg,
1175     [FFLT ... FFLT + 6]   = &mac_readreg,
1176     [RA ... RA + 31]      = &mac_readreg,
1177     [WUPM ... WUPM + 31]  = &mac_readreg,
1178     [MTA ... MTA + E1000_MC_TBL_SIZE - 1]   = &mac_readreg,
1179     [VFTA ... VFTA + E1000_VLAN_FILTER_TBL_SIZE - 1] = &mac_readreg,
1180     [FFMT ... FFMT + 254] = &mac_readreg,
1181     [FFVT ... FFVT + 254] = &mac_readreg,
1182     [PBM ... PBM + 16383] = &mac_readreg,
1183 };
1184 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1185 
1186 #define putreg(x)    [x] = mac_writereg
1187 typedef void (*writeops)(E1000State *, int, uint32_t);
1188 static const writeops macreg_writeops[] = {
1189     putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1190     putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1191     putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1192     putreg(IPAV),     putreg(WUC),
1193     putreg(WUS),
1194 
1195     [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL]  = set_tctl,
1196     [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]   = set_ics,
1197     [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]   = set_rdt,
1198     [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]   = set_icr,
1199     [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL]  = set_ctrl,
1200     [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV]  = set_16bit,
1201     [ITR]    = set_16bit,  [TDFH]   = set_11bit,      [TDFT]  = set_11bit,
1202     [TDFHS]  = set_13bit,  [TDFTS]  = set_13bit,      [TDFPC] = set_13bit,
1203     [RDFH]   = set_13bit,  [RDFT]   = set_13bit,      [RDFHS] = set_13bit,
1204     [RDFTS]  = set_13bit,  [RDFPC]  = set_13bit,      [AIT]   = set_16bit,
1205 
1206     [IP6AT ... IP6AT + 3] = &mac_writereg, [IP4AT ... IP4AT + 6] = &mac_writereg,
1207     [FFLT ... FFLT + 6]   = &set_11bit,
1208     [RA ... RA + 31]      = &mac_writereg,
1209     [WUPM ... WUPM + 31]  = &mac_writereg,
1210     [MTA ... MTA + E1000_MC_TBL_SIZE - 1] = &mac_writereg,
1211     [VFTA ... VFTA + E1000_VLAN_FILTER_TBL_SIZE - 1] = &mac_writereg,
1212     [FFMT ... FFMT + 254] = &set_4bit,     [FFVT ... FFVT + 254] = &mac_writereg,
1213     [PBM ... PBM + 16383] = &mac_writereg,
1214 };
1215 
1216 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1217 
1218 enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1219 
1220 #define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1221 /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1222  * f - flag bits (up to 6 possible flags)
1223  * n - flag needed
1224  * p - partially implenented */
1225 static const uint8_t mac_reg_access[0x8000] = {
1226     [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1227     [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1228 
1229     [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1230     [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1231     [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1232     [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1233     [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1234     [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1235     [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1236     [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1237     [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1238     [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1239     [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1240     [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1241     [SEQEC]   = markflag(MAC),    [CEXTERR] = markflag(MAC),
1242     [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1243     [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1244     [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1245     [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1246     [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1247     [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1248     [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1249     [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1250     [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1251     [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1252     [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1253     [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1254     [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1255     [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1256     [BPTC]    = markflag(MAC),
1257 
1258     [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1259     [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1260     [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1261     [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1262     [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1263     [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1264     [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1265     [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1266     [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1267     [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1268     [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1269 };
1270 
1271 static void
1272 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1273                  unsigned size)
1274 {
1275     E1000State *s = opaque;
1276     unsigned int index = (addr & 0x1ffff) >> 2;
1277 
1278     if (index < NWRITEOPS && macreg_writeops[index]) {
1279         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1280             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1281             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1282                 DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1283                        "It is not fully implemented.\n", index<<2);
1284             }
1285             macreg_writeops[index](s, index, val);
1286         } else {    /* "flag needed" bit is set, but the flag is not active */
1287             DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1288                    index<<2);
1289         }
1290     } else if (index < NREADOPS && macreg_readops[index]) {
1291         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1292                index<<2, val);
1293     } else {
1294         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1295                index<<2, val);
1296     }
1297 }
1298 
1299 static uint64_t
1300 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1301 {
1302     E1000State *s = opaque;
1303     unsigned int index = (addr & 0x1ffff) >> 2;
1304 
1305     if (index < NREADOPS && macreg_readops[index]) {
1306         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1307             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1308             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1309                 DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1310                        "It is not fully implemented.\n", index<<2);
1311             }
1312             return macreg_readops[index](s, index);
1313         } else {    /* "flag needed" bit is set, but the flag is not active */
1314             DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1315                    index<<2);
1316         }
1317     } else {
1318         DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1319     }
1320     return 0;
1321 }
1322 
1323 static const MemoryRegionOps e1000_mmio_ops = {
1324     .read = e1000_mmio_read,
1325     .write = e1000_mmio_write,
1326     .endianness = DEVICE_LITTLE_ENDIAN,
1327     .impl = {
1328         .min_access_size = 4,
1329         .max_access_size = 4,
1330     },
1331 };
1332 
1333 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1334                               unsigned size)
1335 {
1336     E1000State *s = opaque;
1337 
1338     (void)s;
1339     return 0;
1340 }
1341 
1342 static void e1000_io_write(void *opaque, hwaddr addr,
1343                            uint64_t val, unsigned size)
1344 {
1345     E1000State *s = opaque;
1346 
1347     (void)s;
1348 }
1349 
1350 static const MemoryRegionOps e1000_io_ops = {
1351     .read = e1000_io_read,
1352     .write = e1000_io_write,
1353     .endianness = DEVICE_LITTLE_ENDIAN,
1354 };
1355 
1356 static bool is_version_1(void *opaque, int version_id)
1357 {
1358     return version_id == 1;
1359 }
1360 
1361 static int e1000_pre_save(void *opaque)
1362 {
1363     E1000State *s = opaque;
1364     NetClientState *nc = qemu_get_queue(s->nic);
1365 
1366     /*
1367      * If link is down and auto-negotiation is supported and ongoing,
1368      * complete auto-negotiation immediately. This allows us to look
1369      * at MII_BMSR_AN_COMP to infer link status on load.
1370      */
1371     if (nc->link_down && have_autoneg(s)) {
1372         s->phy_reg[MII_BMSR] |= MII_BMSR_AN_COMP;
1373     }
1374 
1375     /* Decide which set of props to migrate in the main structure */
1376     if (chkflag(TSO) || !s->use_tso_for_migration) {
1377         /* Either we're migrating with the extra subsection, in which
1378          * case the mig_props is always 'props' OR
1379          * we've not got the subsection, but 'props' was the last
1380          * updated.
1381          */
1382         s->mig_props = s->tx.props;
1383     } else {
1384         /* We're not using the subsection, and 'tso_props' was
1385          * the last updated.
1386          */
1387         s->mig_props = s->tx.tso_props;
1388     }
1389     return 0;
1390 }
1391 
1392 static int e1000_post_load(void *opaque, int version_id)
1393 {
1394     E1000State *s = opaque;
1395     NetClientState *nc = qemu_get_queue(s->nic);
1396 
1397     if (!chkflag(MIT)) {
1398         s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1399             s->mac_reg[TADV] = 0;
1400         s->mit_irq_level = false;
1401     }
1402     s->mit_ide = 0;
1403     s->mit_timer_on = true;
1404     timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1);
1405 
1406     /* nc.link_down can't be migrated, so infer link_down according
1407      * to link status bit in mac_reg[STATUS].
1408      * Alternatively, restart link negotiation if it was in progress. */
1409     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1410 
1411     if (have_autoneg(s) && !(s->phy_reg[MII_BMSR] & MII_BMSR_AN_COMP)) {
1412         nc->link_down = false;
1413         timer_mod(s->autoneg_timer,
1414                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1415     }
1416 
1417     s->tx.props = s->mig_props;
1418     if (!s->received_tx_tso) {
1419         /* We received only one set of offload data (tx.props)
1420          * and haven't got tx.tso_props.  The best we can do
1421          * is dupe the data.
1422          */
1423         s->tx.tso_props = s->mig_props;
1424     }
1425     return 0;
1426 }
1427 
1428 static int e1000_tx_tso_post_load(void *opaque, int version_id)
1429 {
1430     E1000State *s = opaque;
1431     s->received_tx_tso = true;
1432     return 0;
1433 }
1434 
1435 static bool e1000_mit_state_needed(void *opaque)
1436 {
1437     E1000State *s = opaque;
1438 
1439     return chkflag(MIT);
1440 }
1441 
1442 static bool e1000_full_mac_needed(void *opaque)
1443 {
1444     E1000State *s = opaque;
1445 
1446     return chkflag(MAC);
1447 }
1448 
1449 static bool e1000_tso_state_needed(void *opaque)
1450 {
1451     E1000State *s = opaque;
1452 
1453     return chkflag(TSO);
1454 }
1455 
1456 static const VMStateDescription vmstate_e1000_mit_state = {
1457     .name = "e1000/mit_state",
1458     .version_id = 1,
1459     .minimum_version_id = 1,
1460     .needed = e1000_mit_state_needed,
1461     .fields = (VMStateField[]) {
1462         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1463         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1464         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1465         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1466         VMSTATE_BOOL(mit_irq_level, E1000State),
1467         VMSTATE_END_OF_LIST()
1468     }
1469 };
1470 
1471 static const VMStateDescription vmstate_e1000_full_mac_state = {
1472     .name = "e1000/full_mac_state",
1473     .version_id = 1,
1474     .minimum_version_id = 1,
1475     .needed = e1000_full_mac_needed,
1476     .fields = (VMStateField[]) {
1477         VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1478         VMSTATE_END_OF_LIST()
1479     }
1480 };
1481 
1482 static const VMStateDescription vmstate_e1000_tx_tso_state = {
1483     .name = "e1000/tx_tso_state",
1484     .version_id = 1,
1485     .minimum_version_id = 1,
1486     .needed = e1000_tso_state_needed,
1487     .post_load = e1000_tx_tso_post_load,
1488     .fields = (VMStateField[]) {
1489         VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
1490         VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
1491         VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
1492         VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
1493         VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
1494         VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
1495         VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
1496         VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
1497         VMSTATE_UINT16(tx.tso_props.mss, E1000State),
1498         VMSTATE_INT8(tx.tso_props.ip, E1000State),
1499         VMSTATE_INT8(tx.tso_props.tcp, E1000State),
1500         VMSTATE_END_OF_LIST()
1501     }
1502 };
1503 
1504 static const VMStateDescription vmstate_e1000 = {
1505     .name = "e1000",
1506     .version_id = 2,
1507     .minimum_version_id = 1,
1508     .pre_save = e1000_pre_save,
1509     .post_load = e1000_post_load,
1510     .fields = (VMStateField[]) {
1511         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1512         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1513         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1514         VMSTATE_UINT32(rxbuf_size, E1000State),
1515         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1516         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1517         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1518         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1519         VMSTATE_UINT16(eecd_state.reading, E1000State),
1520         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1521         VMSTATE_UINT8(mig_props.ipcss, E1000State),
1522         VMSTATE_UINT8(mig_props.ipcso, E1000State),
1523         VMSTATE_UINT16(mig_props.ipcse, E1000State),
1524         VMSTATE_UINT8(mig_props.tucss, E1000State),
1525         VMSTATE_UINT8(mig_props.tucso, E1000State),
1526         VMSTATE_UINT16(mig_props.tucse, E1000State),
1527         VMSTATE_UINT32(mig_props.paylen, E1000State),
1528         VMSTATE_UINT8(mig_props.hdr_len, E1000State),
1529         VMSTATE_UINT16(mig_props.mss, E1000State),
1530         VMSTATE_UINT16(tx.size, E1000State),
1531         VMSTATE_UINT16(tx.tso_frames, E1000State),
1532         VMSTATE_UINT8(tx.sum_needed, E1000State),
1533         VMSTATE_INT8(mig_props.ip, E1000State),
1534         VMSTATE_INT8(mig_props.tcp, E1000State),
1535         VMSTATE_BUFFER(tx.header, E1000State),
1536         VMSTATE_BUFFER(tx.data, E1000State),
1537         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1538         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1539         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1540         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1541         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1542         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1543         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1544         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1545         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1546         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1547         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1548         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1549         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1550         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1551         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1552         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1553         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1554         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1555         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1556         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1557         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1558         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1559         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1560         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1561         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1562         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1563         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1564         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1565         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1566         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1567         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1568         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1569         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1570         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1571         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1572         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1573         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1574         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1575         VMSTATE_UINT32(mac_reg[VET], E1000State),
1576         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1577         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, E1000_MC_TBL_SIZE),
1578         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA,
1579                                  E1000_VLAN_FILTER_TBL_SIZE),
1580         VMSTATE_END_OF_LIST()
1581     },
1582     .subsections = (const VMStateDescription*[]) {
1583         &vmstate_e1000_mit_state,
1584         &vmstate_e1000_full_mac_state,
1585         &vmstate_e1000_tx_tso_state,
1586         NULL
1587     }
1588 };
1589 
1590 /*
1591  * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1592  * Note: A valid DevId will be inserted during pci_e1000_realize().
1593  */
1594 static const uint16_t e1000_eeprom_template[64] = {
1595     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1596     0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1597     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1598     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1599     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1600     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1601     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1602     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1603 };
1604 
1605 /* PCI interface */
1606 
1607 static void
1608 e1000_mmio_setup(E1000State *d)
1609 {
1610     int i;
1611     const uint32_t excluded_regs[] = {
1612         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1613         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1614     };
1615 
1616     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1617                           "e1000-mmio", PNPMMIO_SIZE);
1618     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1619     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1620         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1621                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1622     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1623 }
1624 
1625 static void
1626 pci_e1000_uninit(PCIDevice *dev)
1627 {
1628     E1000State *d = E1000(dev);
1629 
1630     timer_free(d->autoneg_timer);
1631     timer_free(d->mit_timer);
1632     timer_free(d->flush_queue_timer);
1633     qemu_del_nic(d->nic);
1634 }
1635 
1636 static NetClientInfo net_e1000_info = {
1637     .type = NET_CLIENT_DRIVER_NIC,
1638     .size = sizeof(NICState),
1639     .can_receive = e1000_can_receive,
1640     .receive = e1000_receive,
1641     .receive_iov = e1000_receive_iov,
1642     .link_status_changed = e1000_set_link_status,
1643 };
1644 
1645 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1646                                 uint32_t val, int len)
1647 {
1648     E1000State *s = E1000(pci_dev);
1649 
1650     pci_default_write_config(pci_dev, address, val, len);
1651 
1652     if (range_covers_byte(address, len, PCI_COMMAND) &&
1653         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1654         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1655     }
1656 }
1657 
1658 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1659 {
1660     DeviceState *dev = DEVICE(pci_dev);
1661     E1000State *d = E1000(pci_dev);
1662     uint8_t *pci_conf;
1663     uint8_t *macaddr;
1664 
1665     pci_dev->config_write = e1000_write_config;
1666 
1667     pci_conf = pci_dev->config;
1668 
1669     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1670     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1671 
1672     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1673 
1674     e1000_mmio_setup(d);
1675 
1676     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1677 
1678     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1679 
1680     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1681     macaddr = d->conf.macaddr.a;
1682 
1683     e1000x_core_prepare_eeprom(d->eeprom_data,
1684                                e1000_eeprom_template,
1685                                sizeof(e1000_eeprom_template),
1686                                PCI_DEVICE_GET_CLASS(pci_dev)->device_id,
1687                                macaddr);
1688 
1689     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1690                           object_get_typename(OBJECT(d)), dev->id, d);
1691 
1692     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1693 
1694     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1695     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1696     d->flush_queue_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
1697                                         e1000_flush_queue_timer, d);
1698 }
1699 
1700 static Property e1000_properties[] = {
1701     DEFINE_NIC_PROPERTIES(E1000State, conf),
1702     DEFINE_PROP_BIT("autonegotiation", E1000State,
1703                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1704     DEFINE_PROP_BIT("mitigation", E1000State,
1705                     compat_flags, E1000_FLAG_MIT_BIT, true),
1706     DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1707                     compat_flags, E1000_FLAG_MAC_BIT, true),
1708     DEFINE_PROP_BIT("migrate_tso_props", E1000State,
1709                     compat_flags, E1000_FLAG_TSO_BIT, true),
1710     DEFINE_PROP_BIT("init-vet", E1000State,
1711                     compat_flags, E1000_FLAG_VET_BIT, true),
1712     DEFINE_PROP_END_OF_LIST(),
1713 };
1714 
1715 typedef struct E1000Info {
1716     const char *name;
1717     uint16_t   device_id;
1718     uint8_t    revision;
1719     uint16_t   phy_id2;
1720 } E1000Info;
1721 
1722 static void e1000_class_init(ObjectClass *klass, void *data)
1723 {
1724     DeviceClass *dc = DEVICE_CLASS(klass);
1725     ResettableClass *rc = RESETTABLE_CLASS(klass);
1726     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1727     E1000BaseClass *e = E1000_CLASS(klass);
1728     const E1000Info *info = data;
1729 
1730     k->realize = pci_e1000_realize;
1731     k->exit = pci_e1000_uninit;
1732     k->romfile = "efi-e1000.rom";
1733     k->vendor_id = PCI_VENDOR_ID_INTEL;
1734     k->device_id = info->device_id;
1735     k->revision = info->revision;
1736     e->phy_id2 = info->phy_id2;
1737     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1738     rc->phases.hold = e1000_reset_hold;
1739     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1740     dc->desc = "Intel Gigabit Ethernet";
1741     dc->vmsd = &vmstate_e1000;
1742     device_class_set_props(dc, e1000_properties);
1743 }
1744 
1745 static void e1000_instance_init(Object *obj)
1746 {
1747     E1000State *n = E1000(obj);
1748     device_add_bootindex_property(obj, &n->conf.bootindex,
1749                                   "bootindex", "/ethernet-phy@0",
1750                                   DEVICE(n));
1751 }
1752 
1753 static const TypeInfo e1000_base_info = {
1754     .name          = TYPE_E1000_BASE,
1755     .parent        = TYPE_PCI_DEVICE,
1756     .instance_size = sizeof(E1000State),
1757     .instance_init = e1000_instance_init,
1758     .class_size    = sizeof(E1000BaseClass),
1759     .abstract      = true,
1760     .interfaces = (InterfaceInfo[]) {
1761         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1762         { },
1763     },
1764 };
1765 
1766 static const E1000Info e1000_devices[] = {
1767     {
1768         .name      = "e1000",
1769         .device_id = E1000_DEV_ID_82540EM,
1770         .revision  = 0x03,
1771         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1772     },
1773     {
1774         .name      = "e1000-82544gc",
1775         .device_id = E1000_DEV_ID_82544GC_COPPER,
1776         .revision  = 0x03,
1777         .phy_id2   = E1000_PHY_ID2_82544x,
1778     },
1779     {
1780         .name      = "e1000-82545em",
1781         .device_id = E1000_DEV_ID_82545EM_COPPER,
1782         .revision  = 0x03,
1783         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1784     },
1785 };
1786 
1787 static void e1000_register_types(void)
1788 {
1789     int i;
1790 
1791     type_register_static(&e1000_base_info);
1792     for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1793         const E1000Info *info = &e1000_devices[i];
1794         TypeInfo type_info = {};
1795 
1796         type_info.name = info->name;
1797         type_info.parent = TYPE_E1000_BASE;
1798         type_info.class_data = (void *)info;
1799         type_info.class_init = e1000_class_init;
1800 
1801         type_register(&type_info);
1802     }
1803 }
1804 
1805 type_init(e1000_register_types)
1806