xref: /openbmc/qemu/hw/net/e1000.c (revision 89aafcf2)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2.1 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "qemu/osdep.h"
29 #include "hw/net/mii.h"
30 #include "hw/pci/pci_device.h"
31 #include "hw/qdev-properties.h"
32 #include "migration/vmstate.h"
33 #include "net/eth.h"
34 #include "net/net.h"
35 #include "net/checksum.h"
36 #include "sysemu/sysemu.h"
37 #include "sysemu/dma.h"
38 #include "qemu/iov.h"
39 #include "qemu/module.h"
40 #include "qemu/range.h"
41 
42 #include "e1000_common.h"
43 #include "e1000x_common.h"
44 #include "trace.h"
45 #include "qom/object.h"
46 
47 /* #define E1000_DEBUG */
48 
49 #ifdef E1000_DEBUG
50 enum {
51     DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
52     DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
53     DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
54     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
55 };
56 #define DBGBIT(x)    (1<<DEBUG_##x)
57 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
58 
59 #define DBGOUT(what, fmt, ...) do { \
60     if (debugflags & DBGBIT(what)) \
61         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
62     } while (0)
63 #else
64 #define DBGOUT(what, fmt, ...) do {} while (0)
65 #endif
66 
67 #define IOPORT_SIZE       0x40
68 #define PNPMMIO_SIZE      0x20000
69 
70 #define MAXIMUM_ETHERNET_HDR_LEN (ETH_HLEN + 4)
71 
72 /*
73  * HW models:
74  *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
75  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
76  *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
77  *  Others never tested
78  */
79 
80 struct E1000State_st {
81     /*< private >*/
82     PCIDevice parent_obj;
83     /*< public >*/
84 
85     NICState *nic;
86     NICConf conf;
87     MemoryRegion mmio;
88     MemoryRegion io;
89 
90     uint32_t mac_reg[0x8000];
91     uint16_t phy_reg[0x20];
92     uint16_t eeprom_data[64];
93 
94     uint32_t rxbuf_size;
95     uint32_t rxbuf_min_shift;
96     struct e1000_tx {
97         unsigned char header[256];
98         unsigned char vlan_header[4];
99         /* Fields vlan and data must not be reordered or separated. */
100         unsigned char vlan[4];
101         unsigned char data[0x10000];
102         uint16_t size;
103         unsigned char vlan_needed;
104         unsigned char sum_needed;
105         bool cptse;
106         e1000x_txd_props props;
107         e1000x_txd_props tso_props;
108         uint16_t tso_frames;
109         bool busy;
110     } tx;
111 
112     struct {
113         uint32_t val_in;    /* shifted in from guest driver */
114         uint16_t bitnum_in;
115         uint16_t bitnum_out;
116         uint16_t reading;
117         uint32_t old_eecd;
118     } eecd_state;
119 
120     QEMUTimer *autoneg_timer;
121 
122     QEMUTimer *mit_timer;      /* Mitigation timer. */
123     bool mit_timer_on;         /* Mitigation timer is running. */
124     bool mit_irq_level;        /* Tracks interrupt pin level. */
125     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
126 
127     QEMUTimer *flush_queue_timer;
128 
129 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
130 #define E1000_FLAG_AUTONEG_BIT 0
131 #define E1000_FLAG_MIT_BIT 1
132 #define E1000_FLAG_MAC_BIT 2
133 #define E1000_FLAG_TSO_BIT 3
134 #define E1000_FLAG_VET_BIT 4
135 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
136 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
137 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
138 #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
139 #define E1000_FLAG_VET (1 << E1000_FLAG_VET_BIT)
140 
141     uint32_t compat_flags;
142     bool received_tx_tso;
143     bool use_tso_for_migration;
144     e1000x_txd_props mig_props;
145 };
146 typedef struct E1000State_st E1000State;
147 
148 #define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
149 
150 struct E1000BaseClass {
151     PCIDeviceClass parent_class;
152     uint16_t phy_id2;
153 };
154 typedef struct E1000BaseClass E1000BaseClass;
155 
156 #define TYPE_E1000_BASE "e1000-base"
157 
158 DECLARE_OBJ_CHECKERS(E1000State, E1000BaseClass,
159                      E1000, TYPE_E1000_BASE)
160 
161 
162 static void
163 e1000_link_up(E1000State *s)
164 {
165     e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg);
166 
167     /* E1000_STATUS_LU is tested by e1000_can_receive() */
168     qemu_flush_queued_packets(qemu_get_queue(s->nic));
169 }
170 
171 static void
172 e1000_autoneg_done(E1000State *s)
173 {
174     e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg);
175 
176     /* E1000_STATUS_LU is tested by e1000_can_receive() */
177     qemu_flush_queued_packets(qemu_get_queue(s->nic));
178 }
179 
180 static bool
181 have_autoneg(E1000State *s)
182 {
183     return chkflag(AUTONEG) && (s->phy_reg[MII_BMCR] & MII_BMCR_AUTOEN);
184 }
185 
186 static void
187 set_phy_ctrl(E1000State *s, int index, uint16_t val)
188 {
189     /* bits 0-5 reserved; MII_BMCR_[ANRESTART,RESET] are self clearing */
190     s->phy_reg[MII_BMCR] = val & ~(0x3f |
191                                    MII_BMCR_RESET |
192                                    MII_BMCR_ANRESTART);
193 
194     /*
195      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
196      * migrate during auto negotiation, after migration the link will be
197      * down.
198      */
199     if (have_autoneg(s) && (val & MII_BMCR_ANRESTART)) {
200         e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
201     }
202 }
203 
204 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
205     [MII_BMCR] = set_phy_ctrl,
206 };
207 
208 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
209 
210 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
211 static const char phy_regcap[0x20] = {
212     [MII_BMSR]   = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
213     [MII_PHYID1] = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
214     [MII_BMCR]   = PHY_RW,    [MII_CTRL1000]               = PHY_RW,
215     [MII_ANLPAR] = PHY_R,     [MII_STAT1000]               = PHY_R,
216     [MII_ANAR]   = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
217     [MII_PHYID2] = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
218     [MII_ANER]   = PHY_R,
219 };
220 
221 /* MII_PHYID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
222 static const uint16_t phy_reg_init[] = {
223     [MII_BMCR] = MII_BMCR_SPEED1000 |
224                  MII_BMCR_FD |
225                  MII_BMCR_AUTOEN,
226 
227     [MII_BMSR] = MII_BMSR_EXTCAP |
228                  MII_BMSR_LINK_ST |   /* link initially up */
229                  MII_BMSR_AUTONEG |
230                  /* MII_BMSR_AN_COMP: initially NOT completed */
231                  MII_BMSR_MFPS |
232                  MII_BMSR_EXTSTAT |
233                  MII_BMSR_10T_HD |
234                  MII_BMSR_10T_FD |
235                  MII_BMSR_100TX_HD |
236                  MII_BMSR_100TX_FD,
237 
238     [MII_PHYID1] = 0x141,
239     /* [MII_PHYID2] configured per DevId, from e1000_reset() */
240     [MII_ANAR] = MII_ANAR_CSMACD | MII_ANAR_10 |
241                  MII_ANAR_10FD | MII_ANAR_TX |
242                  MII_ANAR_TXFD | MII_ANAR_PAUSE |
243                  MII_ANAR_PAUSE_ASYM,
244     [MII_ANLPAR] = MII_ANLPAR_10 | MII_ANLPAR_10FD |
245                    MII_ANLPAR_TX | MII_ANLPAR_TXFD,
246     [MII_CTRL1000] = MII_CTRL1000_FULL | MII_CTRL1000_PORT |
247                      MII_CTRL1000_MASTER,
248     [MII_STAT1000] = MII_STAT1000_HALF | MII_STAT1000_FULL |
249                      MII_STAT1000_ROK | MII_STAT1000_LOK,
250     [M88E1000_PHY_SPEC_CTRL] = 0x360,
251     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
252     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
253 };
254 
255 static const uint32_t mac_reg_init[] = {
256     [PBA]     = 0x00100030,
257     [LEDCTL]  = 0x602,
258     [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
259                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
260     [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
261                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
262                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
263                 E1000_STATUS_LU,
264     [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
265                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
266                 E1000_MANC_RMCP_EN,
267 };
268 
269 /* Helper function, *curr == 0 means the value is not set */
270 static inline void
271 mit_update_delay(uint32_t *curr, uint32_t value)
272 {
273     if (value && (*curr == 0 || value < *curr)) {
274         *curr = value;
275     }
276 }
277 
278 static void
279 set_interrupt_cause(E1000State *s, int index, uint32_t val)
280 {
281     PCIDevice *d = PCI_DEVICE(s);
282     uint32_t pending_ints;
283     uint32_t mit_delay;
284 
285     s->mac_reg[ICR] = val;
286 
287     /*
288      * Make sure ICR and ICS registers have the same value.
289      * The spec says that the ICS register is write-only.  However in practice,
290      * on real hardware ICS is readable, and for reads it has the same value as
291      * ICR (except that ICS does not have the clear on read behaviour of ICR).
292      *
293      * The VxWorks PRO/1000 driver uses this behaviour.
294      */
295     s->mac_reg[ICS] = val;
296 
297     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
298     if (!s->mit_irq_level && pending_ints) {
299         /*
300          * Here we detect a potential raising edge. We postpone raising the
301          * interrupt line if we are inside the mitigation delay window
302          * (s->mit_timer_on == 1).
303          * We provide a partial implementation of interrupt mitigation,
304          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
305          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
306          * RADV; relative timers based on TIDV and RDTR are not implemented.
307          */
308         if (s->mit_timer_on) {
309             return;
310         }
311         if (chkflag(MIT)) {
312             /* Compute the next mitigation delay according to pending
313              * interrupts and the current values of RADV (provided
314              * RDTR!=0), TADV and ITR.
315              * Then rearm the timer.
316              */
317             mit_delay = 0;
318             if (s->mit_ide &&
319                     (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
320                 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
321             }
322             if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
323                 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
324             }
325             mit_update_delay(&mit_delay, s->mac_reg[ITR]);
326 
327             /*
328              * According to e1000 SPEC, the Ethernet controller guarantees
329              * a maximum observable interrupt rate of 7813 interrupts/sec.
330              * Thus if mit_delay < 500 then the delay should be set to the
331              * minimum delay possible which is 500.
332              */
333             mit_delay = (mit_delay < 500) ? 500 : mit_delay;
334 
335             s->mit_timer_on = 1;
336             timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
337                       mit_delay * 256);
338             s->mit_ide = 0;
339         }
340     }
341 
342     s->mit_irq_level = (pending_ints != 0);
343     pci_set_irq(d, s->mit_irq_level);
344 }
345 
346 static void
347 e1000_mit_timer(void *opaque)
348 {
349     E1000State *s = opaque;
350 
351     s->mit_timer_on = 0;
352     /* Call set_interrupt_cause to update the irq level (if necessary). */
353     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
354 }
355 
356 static void
357 set_ics(E1000State *s, int index, uint32_t val)
358 {
359     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
360         s->mac_reg[IMS]);
361     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
362 }
363 
364 static void
365 e1000_autoneg_timer(void *opaque)
366 {
367     E1000State *s = opaque;
368     if (!qemu_get_queue(s->nic)->link_down) {
369         e1000_autoneg_done(s);
370         set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
371     }
372 }
373 
374 static bool e1000_vet_init_need(void *opaque)
375 {
376     E1000State *s = opaque;
377 
378     return chkflag(VET);
379 }
380 
381 static void e1000_reset_hold(Object *obj)
382 {
383     E1000State *d = E1000(obj);
384     E1000BaseClass *edc = E1000_GET_CLASS(d);
385     uint8_t *macaddr = d->conf.macaddr.a;
386 
387     timer_del(d->autoneg_timer);
388     timer_del(d->mit_timer);
389     timer_del(d->flush_queue_timer);
390     d->mit_timer_on = 0;
391     d->mit_irq_level = 0;
392     d->mit_ide = 0;
393     memset(d->phy_reg, 0, sizeof d->phy_reg);
394     memcpy(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
395     d->phy_reg[MII_PHYID2] = edc->phy_id2;
396     memset(d->mac_reg, 0, sizeof d->mac_reg);
397     memcpy(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
398     d->rxbuf_min_shift = 1;
399     memset(&d->tx, 0, sizeof d->tx);
400 
401     if (qemu_get_queue(d->nic)->link_down) {
402         e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg);
403     }
404 
405     e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
406 
407     if (e1000_vet_init_need(d)) {
408         d->mac_reg[VET] = ETH_P_VLAN;
409     }
410 }
411 
412 static void
413 set_ctrl(E1000State *s, int index, uint32_t val)
414 {
415     /* RST is self clearing */
416     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
417 }
418 
419 static void
420 e1000_flush_queue_timer(void *opaque)
421 {
422     E1000State *s = opaque;
423 
424     qemu_flush_queued_packets(qemu_get_queue(s->nic));
425 }
426 
427 static void
428 set_rx_control(E1000State *s, int index, uint32_t val)
429 {
430     s->mac_reg[RCTL] = val;
431     s->rxbuf_size = e1000x_rxbufsize(val);
432     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
433     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
434            s->mac_reg[RCTL]);
435     timer_mod(s->flush_queue_timer,
436               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
437 }
438 
439 static void
440 set_mdic(E1000State *s, int index, uint32_t val)
441 {
442     uint32_t data = val & E1000_MDIC_DATA_MASK;
443     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
444 
445     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
446         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
447     else if (val & E1000_MDIC_OP_READ) {
448         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
449         if (!(phy_regcap[addr] & PHY_R)) {
450             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
451             val |= E1000_MDIC_ERROR;
452         } else
453             val = (val ^ data) | s->phy_reg[addr];
454     } else if (val & E1000_MDIC_OP_WRITE) {
455         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
456         if (!(phy_regcap[addr] & PHY_W)) {
457             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
458             val |= E1000_MDIC_ERROR;
459         } else {
460             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
461                 phyreg_writeops[addr](s, index, data);
462             } else {
463                 s->phy_reg[addr] = data;
464             }
465         }
466     }
467     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
468 
469     if (val & E1000_MDIC_INT_EN) {
470         set_ics(s, 0, E1000_ICR_MDAC);
471     }
472 }
473 
474 static uint32_t
475 get_eecd(E1000State *s, int index)
476 {
477     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
478 
479     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
480            s->eecd_state.bitnum_out, s->eecd_state.reading);
481     if (!s->eecd_state.reading ||
482         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
483           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
484         ret |= E1000_EECD_DO;
485     return ret;
486 }
487 
488 static void
489 set_eecd(E1000State *s, int index, uint32_t val)
490 {
491     uint32_t oldval = s->eecd_state.old_eecd;
492 
493     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
494             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
495     if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
496         return;
497     }
498     if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
499         s->eecd_state.val_in = 0;
500         s->eecd_state.bitnum_in = 0;
501         s->eecd_state.bitnum_out = 0;
502         s->eecd_state.reading = 0;
503     }
504     if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
505         return;
506     }
507     if (!(E1000_EECD_SK & val)) {               /* falling edge */
508         s->eecd_state.bitnum_out++;
509         return;
510     }
511     s->eecd_state.val_in <<= 1;
512     if (val & E1000_EECD_DI)
513         s->eecd_state.val_in |= 1;
514     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
515         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
516         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
517             EEPROM_READ_OPCODE_MICROWIRE);
518     }
519     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
520            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
521            s->eecd_state.reading);
522 }
523 
524 static uint32_t
525 flash_eerd_read(E1000State *s, int x)
526 {
527     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
528 
529     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
530         return (s->mac_reg[EERD]);
531 
532     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
533         return (E1000_EEPROM_RW_REG_DONE | r);
534 
535     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
536            E1000_EEPROM_RW_REG_DONE | r);
537 }
538 
539 static void
540 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
541 {
542     uint32_t sum;
543 
544     if (cse && cse < n)
545         n = cse + 1;
546     if (sloc < n-1) {
547         sum = net_checksum_add(n-css, data+css);
548         stw_be_p(data + sloc, net_checksum_finish_nozero(sum));
549     }
550 }
551 
552 static inline void
553 inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
554 {
555     if (is_broadcast_ether_addr(arr)) {
556         e1000x_inc_reg_if_not_full(s->mac_reg, BPTC);
557     } else if (is_multicast_ether_addr(arr)) {
558         e1000x_inc_reg_if_not_full(s->mac_reg, MPTC);
559     }
560 }
561 
562 static void
563 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
564 {
565     static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
566                                     PTC1023, PTC1522 };
567 
568     NetClientState *nc = qemu_get_queue(s->nic);
569     if (s->phy_reg[MII_BMCR] & MII_BMCR_LOOPBACK) {
570         qemu_receive_packet(nc, buf, size);
571     } else {
572         qemu_send_packet(nc, buf, size);
573     }
574     inc_tx_bcast_or_mcast_count(s, buf);
575     e1000x_increase_size_stats(s->mac_reg, PTCregs, size + 4);
576 }
577 
578 static void
579 xmit_seg(E1000State *s)
580 {
581     uint16_t len;
582     unsigned int frames = s->tx.tso_frames, css, sofar;
583     struct e1000_tx *tp = &s->tx;
584     struct e1000x_txd_props *props = tp->cptse ? &tp->tso_props : &tp->props;
585 
586     if (tp->cptse) {
587         css = props->ipcss;
588         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
589                frames, tp->size, css);
590         if (props->ip) {    /* IPv4 */
591             stw_be_p(tp->data+css+2, tp->size - css);
592             stw_be_p(tp->data+css+4,
593                      lduw_be_p(tp->data + css + 4) + frames);
594         } else {         /* IPv6 */
595             stw_be_p(tp->data+css+4, tp->size - css);
596         }
597         css = props->tucss;
598         len = tp->size - css;
599         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", props->tcp, css, len);
600         if (props->tcp) {
601             sofar = frames * props->mss;
602             stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
603             if (props->paylen - sofar > props->mss) {
604                 tp->data[css + 13] &= ~9;    /* PSH, FIN */
605             } else if (frames) {
606                 e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
607             }
608         } else {    /* UDP */
609             stw_be_p(tp->data+css+4, len);
610         }
611         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
612             unsigned int phsum;
613             // add pseudo-header length before checksum calculation
614             void *sp = tp->data + props->tucso;
615 
616             phsum = lduw_be_p(sp) + len;
617             phsum = (phsum >> 16) + (phsum & 0xffff);
618             stw_be_p(sp, phsum);
619         }
620         tp->tso_frames++;
621     }
622 
623     if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
624         putsum(tp->data, tp->size, props->tucso, props->tucss, props->tucse);
625     }
626     if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
627         putsum(tp->data, tp->size, props->ipcso, props->ipcss, props->ipcse);
628     }
629     if (tp->vlan_needed) {
630         memmove(tp->vlan, tp->data, 4);
631         memmove(tp->data, tp->data + 4, 8);
632         memcpy(tp->data + 8, tp->vlan_header, 4);
633         e1000_send_packet(s, tp->vlan, tp->size + 4);
634     } else {
635         e1000_send_packet(s, tp->data, tp->size);
636     }
637 
638     e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
639     e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size + 4);
640     e1000x_inc_reg_if_not_full(s->mac_reg, GPTC);
641     e1000x_grow_8reg_if_not_full(s->mac_reg, GOTCL, s->tx.size + 4);
642 }
643 
644 static void
645 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
646 {
647     PCIDevice *d = PCI_DEVICE(s);
648     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
649     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
650     unsigned int split_size = txd_lower & 0xffff, bytes, sz;
651     unsigned int msh = 0xfffff;
652     uint64_t addr;
653     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
654     struct e1000_tx *tp = &s->tx;
655 
656     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
657     if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
658         if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
659             e1000x_read_tx_ctx_descr(xp, &tp->tso_props);
660             s->use_tso_for_migration = 1;
661             tp->tso_frames = 0;
662         } else {
663             e1000x_read_tx_ctx_descr(xp, &tp->props);
664             s->use_tso_for_migration = 0;
665         }
666         return;
667     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
668         // data descriptor
669         if (tp->size == 0) {
670             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
671         }
672         tp->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
673     } else {
674         // legacy descriptor
675         tp->cptse = 0;
676     }
677 
678     if (e1000x_vlan_enabled(s->mac_reg) &&
679         e1000x_is_vlan_txd(txd_lower) &&
680         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
681         tp->vlan_needed = 1;
682         stw_be_p(tp->vlan_header,
683                       le16_to_cpu(s->mac_reg[VET]));
684         stw_be_p(tp->vlan_header + 2,
685                       le16_to_cpu(dp->upper.fields.special));
686     }
687 
688     addr = le64_to_cpu(dp->buffer_addr);
689     if (tp->cptse) {
690         msh = tp->tso_props.hdr_len + tp->tso_props.mss;
691         do {
692             bytes = split_size;
693             if (tp->size >= msh) {
694                 goto eop;
695             }
696             if (tp->size + bytes > msh)
697                 bytes = msh - tp->size;
698 
699             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
700             pci_dma_read(d, addr, tp->data + tp->size, bytes);
701             sz = tp->size + bytes;
702             if (sz >= tp->tso_props.hdr_len
703                 && tp->size < tp->tso_props.hdr_len) {
704                 memmove(tp->header, tp->data, tp->tso_props.hdr_len);
705             }
706             tp->size = sz;
707             addr += bytes;
708             if (sz == msh) {
709                 xmit_seg(s);
710                 memmove(tp->data, tp->header, tp->tso_props.hdr_len);
711                 tp->size = tp->tso_props.hdr_len;
712             }
713             split_size -= bytes;
714         } while (bytes && split_size);
715     } else {
716         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
717         pci_dma_read(d, addr, tp->data + tp->size, split_size);
718         tp->size += split_size;
719     }
720 
721 eop:
722     if (!(txd_lower & E1000_TXD_CMD_EOP))
723         return;
724     if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) {
725         xmit_seg(s);
726     }
727     tp->tso_frames = 0;
728     tp->sum_needed = 0;
729     tp->vlan_needed = 0;
730     tp->size = 0;
731     tp->cptse = 0;
732 }
733 
734 static uint32_t
735 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
736 {
737     PCIDevice *d = PCI_DEVICE(s);
738     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
739 
740     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
741         return 0;
742     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
743                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
744     dp->upper.data = cpu_to_le32(txd_upper);
745     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
746                   &dp->upper, sizeof(dp->upper));
747     return E1000_ICR_TXDW;
748 }
749 
750 static uint64_t tx_desc_base(E1000State *s)
751 {
752     uint64_t bah = s->mac_reg[TDBAH];
753     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
754 
755     return (bah << 32) + bal;
756 }
757 
758 static void
759 start_xmit(E1000State *s)
760 {
761     PCIDevice *d = PCI_DEVICE(s);
762     dma_addr_t base;
763     struct e1000_tx_desc desc;
764     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
765 
766     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
767         DBGOUT(TX, "tx disabled\n");
768         return;
769     }
770 
771     if (s->tx.busy) {
772         return;
773     }
774     s->tx.busy = true;
775 
776     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
777         base = tx_desc_base(s) +
778                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
779         pci_dma_read(d, base, &desc, sizeof(desc));
780 
781         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
782                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
783                desc.upper.data);
784 
785         process_tx_desc(s, &desc);
786         cause |= txdesc_writeback(s, base, &desc);
787 
788         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
789             s->mac_reg[TDH] = 0;
790         /*
791          * the following could happen only if guest sw assigns
792          * bogus values to TDT/TDLEN.
793          * there's nothing too intelligent we could do about this.
794          */
795         if (s->mac_reg[TDH] == tdh_start ||
796             tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
797             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
798                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
799             break;
800         }
801     }
802     s->tx.busy = false;
803     set_ics(s, 0, cause);
804 }
805 
806 static int
807 receive_filter(E1000State *s, const void *buf)
808 {
809     return (!e1000x_is_vlan_packet(buf, s->mac_reg[VET]) ||
810             e1000x_rx_vlan_filter(s->mac_reg, PKT_GET_VLAN_HDR(buf))) &&
811            e1000x_rx_group_filter(s->mac_reg, buf);
812 }
813 
814 static void
815 e1000_set_link_status(NetClientState *nc)
816 {
817     E1000State *s = qemu_get_nic_opaque(nc);
818     uint32_t old_status = s->mac_reg[STATUS];
819 
820     if (nc->link_down) {
821         e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg);
822     } else {
823         if (have_autoneg(s) &&
824             !(s->phy_reg[MII_BMSR] & MII_BMSR_AN_COMP)) {
825             e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
826         } else {
827             e1000_link_up(s);
828         }
829     }
830 
831     if (s->mac_reg[STATUS] != old_status)
832         set_ics(s, 0, E1000_ICR_LSC);
833 }
834 
835 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
836 {
837     int bufs;
838     /* Fast-path short packets */
839     if (total_size <= s->rxbuf_size) {
840         return s->mac_reg[RDH] != s->mac_reg[RDT];
841     }
842     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
843         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
844     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
845         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
846             s->mac_reg[RDT] - s->mac_reg[RDH];
847     } else {
848         return false;
849     }
850     return total_size <= bufs * s->rxbuf_size;
851 }
852 
853 static bool
854 e1000_can_receive(NetClientState *nc)
855 {
856     E1000State *s = qemu_get_nic_opaque(nc);
857 
858     return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
859         e1000_has_rxbufs(s, 1) && !timer_pending(s->flush_queue_timer);
860 }
861 
862 static uint64_t rx_desc_base(E1000State *s)
863 {
864     uint64_t bah = s->mac_reg[RDBAH];
865     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
866 
867     return (bah << 32) + bal;
868 }
869 
870 static void
871 e1000_receiver_overrun(E1000State *s, size_t size)
872 {
873     trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
874     e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
875     e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
876     set_ics(s, 0, E1000_ICS_RXO);
877 }
878 
879 static ssize_t
880 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
881 {
882     E1000State *s = qemu_get_nic_opaque(nc);
883     PCIDevice *d = PCI_DEVICE(s);
884     struct e1000_rx_desc desc;
885     dma_addr_t base;
886     unsigned int n, rdt;
887     uint32_t rdh_start;
888     uint16_t vlan_special = 0;
889     uint8_t vlan_status = 0;
890     uint8_t min_buf[ETH_ZLEN];
891     struct iovec min_iov;
892     uint8_t *filter_buf = iov->iov_base;
893     size_t size = iov_size(iov, iovcnt);
894     size_t iov_ofs = 0;
895     size_t desc_offset;
896     size_t desc_size;
897     size_t total_size;
898     eth_pkt_types_e pkt_type;
899 
900     if (!e1000x_hw_rx_enabled(s->mac_reg)) {
901         return -1;
902     }
903 
904     if (timer_pending(s->flush_queue_timer)) {
905         return 0;
906     }
907 
908     /* Pad to minimum Ethernet frame length */
909     if (size < sizeof(min_buf)) {
910         iov_to_buf(iov, iovcnt, 0, min_buf, size);
911         memset(&min_buf[size], 0, sizeof(min_buf) - size);
912         min_iov.iov_base = filter_buf = min_buf;
913         min_iov.iov_len = size = sizeof(min_buf);
914         iovcnt = 1;
915         iov = &min_iov;
916     } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
917         /* This is very unlikely, but may happen. */
918         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
919         filter_buf = min_buf;
920     }
921 
922     /* Discard oversized packets if !LPE and !SBP. */
923     if (e1000x_is_oversized(s->mac_reg, size)) {
924         return size;
925     }
926 
927     if (!receive_filter(s, filter_buf)) {
928         return size;
929     }
930 
931     if (e1000x_vlan_enabled(s->mac_reg) &&
932         e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) {
933         vlan_special = cpu_to_le16(lduw_be_p(filter_buf + 14));
934         iov_ofs = 4;
935         if (filter_buf == iov->iov_base) {
936             memmove(filter_buf + 4, filter_buf, 12);
937         } else {
938             iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
939             while (iov->iov_len <= iov_ofs) {
940                 iov_ofs -= iov->iov_len;
941                 iov++;
942             }
943         }
944         vlan_status = E1000_RXD_STAT_VP;
945         size -= 4;
946     }
947 
948     pkt_type = get_eth_packet_type(PKT_GET_ETH_HDR(filter_buf));
949     rdh_start = s->mac_reg[RDH];
950     desc_offset = 0;
951     total_size = size + e1000x_fcs_len(s->mac_reg);
952     if (!e1000_has_rxbufs(s, total_size)) {
953         e1000_receiver_overrun(s, total_size);
954         return -1;
955     }
956     do {
957         desc_size = total_size - desc_offset;
958         if (desc_size > s->rxbuf_size) {
959             desc_size = s->rxbuf_size;
960         }
961         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
962         pci_dma_read(d, base, &desc, sizeof(desc));
963         desc.special = vlan_special;
964         desc.status &= ~E1000_RXD_STAT_DD;
965         if (desc.buffer_addr) {
966             if (desc_offset < size) {
967                 size_t iov_copy;
968                 hwaddr ba = le64_to_cpu(desc.buffer_addr);
969                 size_t copy_size = size - desc_offset;
970                 if (copy_size > s->rxbuf_size) {
971                     copy_size = s->rxbuf_size;
972                 }
973                 do {
974                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
975                     pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
976                     copy_size -= iov_copy;
977                     ba += iov_copy;
978                     iov_ofs += iov_copy;
979                     if (iov_ofs == iov->iov_len) {
980                         iov++;
981                         iov_ofs = 0;
982                     }
983                 } while (copy_size);
984             }
985             desc_offset += desc_size;
986             desc.length = cpu_to_le16(desc_size);
987             if (desc_offset >= total_size) {
988                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
989             } else {
990                 /* Guest zeroing out status is not a hardware requirement.
991                    Clear EOP in case guest didn't do it. */
992                 desc.status &= ~E1000_RXD_STAT_EOP;
993             }
994         } else { // as per intel docs; skip descriptors with null buf addr
995             DBGOUT(RX, "Null RX descriptor!!\n");
996         }
997         pci_dma_write(d, base, &desc, sizeof(desc));
998         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
999         pci_dma_write(d, base + offsetof(struct e1000_rx_desc, status),
1000                       &desc.status, sizeof(desc.status));
1001 
1002         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1003             s->mac_reg[RDH] = 0;
1004         /* see comment in start_xmit; same here */
1005         if (s->mac_reg[RDH] == rdh_start ||
1006             rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
1007             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1008                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1009             e1000_receiver_overrun(s, total_size);
1010             return -1;
1011         }
1012     } while (desc_offset < total_size);
1013 
1014     e1000x_update_rx_total_stats(s->mac_reg, pkt_type, size, total_size);
1015 
1016     n = E1000_ICS_RXT0;
1017     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1018         rdt += s->mac_reg[RDLEN] / sizeof(desc);
1019     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1020         s->rxbuf_min_shift)
1021         n |= E1000_ICS_RXDMT0;
1022 
1023     set_ics(s, 0, n);
1024 
1025     return size;
1026 }
1027 
1028 static ssize_t
1029 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1030 {
1031     const struct iovec iov = {
1032         .iov_base = (uint8_t *)buf,
1033         .iov_len = size
1034     };
1035 
1036     return e1000_receive_iov(nc, &iov, 1);
1037 }
1038 
1039 static uint32_t
1040 mac_readreg(E1000State *s, int index)
1041 {
1042     return s->mac_reg[index];
1043 }
1044 
1045 static uint32_t
1046 mac_icr_read(E1000State *s, int index)
1047 {
1048     uint32_t ret = s->mac_reg[ICR];
1049 
1050     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1051     set_interrupt_cause(s, 0, 0);
1052     return ret;
1053 }
1054 
1055 static uint32_t
1056 mac_read_clr4(E1000State *s, int index)
1057 {
1058     uint32_t ret = s->mac_reg[index];
1059 
1060     s->mac_reg[index] = 0;
1061     return ret;
1062 }
1063 
1064 static uint32_t
1065 mac_read_clr8(E1000State *s, int index)
1066 {
1067     uint32_t ret = s->mac_reg[index];
1068 
1069     s->mac_reg[index] = 0;
1070     s->mac_reg[index-1] = 0;
1071     return ret;
1072 }
1073 
1074 static void
1075 mac_writereg(E1000State *s, int index, uint32_t val)
1076 {
1077     uint32_t macaddr[2];
1078 
1079     s->mac_reg[index] = val;
1080 
1081     if (index == RA + 1) {
1082         macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1083         macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1084         qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1085     }
1086 }
1087 
1088 static void
1089 set_rdt(E1000State *s, int index, uint32_t val)
1090 {
1091     s->mac_reg[index] = val & 0xffff;
1092     if (e1000_has_rxbufs(s, 1)) {
1093         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1094     }
1095 }
1096 
1097 #define LOW_BITS_SET_FUNC(num)                             \
1098     static void                                            \
1099     set_##num##bit(E1000State *s, int index, uint32_t val) \
1100     {                                                      \
1101         s->mac_reg[index] = val & (BIT(num) - 1);          \
1102     }
1103 
1104 LOW_BITS_SET_FUNC(4)
1105 LOW_BITS_SET_FUNC(11)
1106 LOW_BITS_SET_FUNC(13)
1107 LOW_BITS_SET_FUNC(16)
1108 
1109 static void
1110 set_dlen(E1000State *s, int index, uint32_t val)
1111 {
1112     s->mac_reg[index] = val & 0xfff80;
1113 }
1114 
1115 static void
1116 set_tctl(E1000State *s, int index, uint32_t val)
1117 {
1118     s->mac_reg[index] = val;
1119     s->mac_reg[TDT] &= 0xffff;
1120     start_xmit(s);
1121 }
1122 
1123 static void
1124 set_icr(E1000State *s, int index, uint32_t val)
1125 {
1126     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1127     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1128 }
1129 
1130 static void
1131 set_imc(E1000State *s, int index, uint32_t val)
1132 {
1133     s->mac_reg[IMS] &= ~val;
1134     set_ics(s, 0, 0);
1135 }
1136 
1137 static void
1138 set_ims(E1000State *s, int index, uint32_t val)
1139 {
1140     s->mac_reg[IMS] |= val;
1141     set_ics(s, 0, 0);
1142 }
1143 
1144 #define getreg(x)    [x] = mac_readreg
1145 typedef uint32_t (*readops)(E1000State *, int);
1146 static const readops macreg_readops[] = {
1147     getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1148     getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1149     getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1150     getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1151     getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1152     getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1153     getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1154     getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1155     getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1156     getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1157     getreg(TNCRS),    getreg(SEQEC),    getreg(CEXTERR),  getreg(RLEC),
1158     getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1159     getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1160     getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1161     getreg(GOTCL),    getreg(RDFH),     getreg(RDFT),     getreg(RDFHS),
1162     getreg(RDFTS),    getreg(RDFPC),    getreg(TDFH),     getreg(TDFT),
1163     getreg(TDFHS),    getreg(TDFTS),    getreg(TDFPC),    getreg(AIT),
1164 
1165     [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1166     [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1167     [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1168     [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1169     [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1170     [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1171     [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1172     [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1173     [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1174     [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1175     [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1176     [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1177     [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1178     [MPTC]    = mac_read_clr4,
1179     [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1180     [EERD]    = flash_eerd_read,
1181 
1182     [CRCERRS ... MPC]     = &mac_readreg,
1183     [IP6AT ... IP6AT + 3] = &mac_readreg,    [IP4AT ... IP4AT + 6] = &mac_readreg,
1184     [FFLT ... FFLT + 6]   = &mac_readreg,
1185     [RA ... RA + 31]      = &mac_readreg,
1186     [WUPM ... WUPM + 31]  = &mac_readreg,
1187     [MTA ... MTA + E1000_MC_TBL_SIZE - 1]   = &mac_readreg,
1188     [VFTA ... VFTA + E1000_VLAN_FILTER_TBL_SIZE - 1] = &mac_readreg,
1189     [FFMT ... FFMT + 254] = &mac_readreg,
1190     [FFVT ... FFVT + 254] = &mac_readreg,
1191     [PBM ... PBM + 16383] = &mac_readreg,
1192 };
1193 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1194 
1195 #define putreg(x)    [x] = mac_writereg
1196 typedef void (*writeops)(E1000State *, int, uint32_t);
1197 static const writeops macreg_writeops[] = {
1198     putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1199     putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1200     putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1201     putreg(IPAV),     putreg(WUC),
1202     putreg(WUS),
1203 
1204     [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL]  = set_tctl,
1205     [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]   = set_ics,
1206     [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]   = set_rdt,
1207     [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]   = set_icr,
1208     [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL]  = set_ctrl,
1209     [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV]  = set_16bit,
1210     [ITR]    = set_16bit,  [TDFH]   = set_11bit,      [TDFT]  = set_11bit,
1211     [TDFHS]  = set_13bit,  [TDFTS]  = set_13bit,      [TDFPC] = set_13bit,
1212     [RDFH]   = set_13bit,  [RDFT]   = set_13bit,      [RDFHS] = set_13bit,
1213     [RDFTS]  = set_13bit,  [RDFPC]  = set_13bit,      [AIT]   = set_16bit,
1214 
1215     [IP6AT ... IP6AT + 3] = &mac_writereg, [IP4AT ... IP4AT + 6] = &mac_writereg,
1216     [FFLT ... FFLT + 6]   = &set_11bit,
1217     [RA ... RA + 31]      = &mac_writereg,
1218     [WUPM ... WUPM + 31]  = &mac_writereg,
1219     [MTA ... MTA + E1000_MC_TBL_SIZE - 1] = &mac_writereg,
1220     [VFTA ... VFTA + E1000_VLAN_FILTER_TBL_SIZE - 1] = &mac_writereg,
1221     [FFMT ... FFMT + 254] = &set_4bit,     [FFVT ... FFVT + 254] = &mac_writereg,
1222     [PBM ... PBM + 16383] = &mac_writereg,
1223 };
1224 
1225 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1226 
1227 enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1228 
1229 #define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1230 /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1231  * f - flag bits (up to 6 possible flags)
1232  * n - flag needed
1233  * p - partially implenented */
1234 static const uint8_t mac_reg_access[0x8000] = {
1235     [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1236     [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1237 
1238     [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1239     [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1240     [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1241     [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1242     [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1243     [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1244     [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1245     [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1246     [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1247     [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1248     [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1249     [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1250     [SEQEC]   = markflag(MAC),    [CEXTERR] = markflag(MAC),
1251     [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1252     [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1253     [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1254     [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1255     [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1256     [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1257     [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1258     [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1259     [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1260     [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1261     [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1262     [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1263     [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1264     [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1265     [BPTC]    = markflag(MAC),
1266 
1267     [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1268     [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1269     [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1270     [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1271     [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1272     [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1273     [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1274     [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1275     [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1276     [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1277     [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1278 };
1279 
1280 static void
1281 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1282                  unsigned size)
1283 {
1284     E1000State *s = opaque;
1285     unsigned int index = (addr & 0x1ffff) >> 2;
1286 
1287     if (index < NWRITEOPS && macreg_writeops[index]) {
1288         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1289             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1290             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1291                 DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1292                        "It is not fully implemented.\n", index<<2);
1293             }
1294             macreg_writeops[index](s, index, val);
1295         } else {    /* "flag needed" bit is set, but the flag is not active */
1296             DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1297                    index<<2);
1298         }
1299     } else if (index < NREADOPS && macreg_readops[index]) {
1300         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1301                index<<2, val);
1302     } else {
1303         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1304                index<<2, val);
1305     }
1306 }
1307 
1308 static uint64_t
1309 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1310 {
1311     E1000State *s = opaque;
1312     unsigned int index = (addr & 0x1ffff) >> 2;
1313 
1314     if (index < NREADOPS && macreg_readops[index]) {
1315         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1316             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1317             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1318                 DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1319                        "It is not fully implemented.\n", index<<2);
1320             }
1321             return macreg_readops[index](s, index);
1322         } else {    /* "flag needed" bit is set, but the flag is not active */
1323             DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1324                    index<<2);
1325         }
1326     } else {
1327         DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1328     }
1329     return 0;
1330 }
1331 
1332 static const MemoryRegionOps e1000_mmio_ops = {
1333     .read = e1000_mmio_read,
1334     .write = e1000_mmio_write,
1335     .endianness = DEVICE_LITTLE_ENDIAN,
1336     .impl = {
1337         .min_access_size = 4,
1338         .max_access_size = 4,
1339     },
1340 };
1341 
1342 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1343                               unsigned size)
1344 {
1345     E1000State *s = opaque;
1346 
1347     (void)s;
1348     return 0;
1349 }
1350 
1351 static void e1000_io_write(void *opaque, hwaddr addr,
1352                            uint64_t val, unsigned size)
1353 {
1354     E1000State *s = opaque;
1355 
1356     (void)s;
1357 }
1358 
1359 static const MemoryRegionOps e1000_io_ops = {
1360     .read = e1000_io_read,
1361     .write = e1000_io_write,
1362     .endianness = DEVICE_LITTLE_ENDIAN,
1363 };
1364 
1365 static bool is_version_1(void *opaque, int version_id)
1366 {
1367     return version_id == 1;
1368 }
1369 
1370 static int e1000_pre_save(void *opaque)
1371 {
1372     E1000State *s = opaque;
1373     NetClientState *nc = qemu_get_queue(s->nic);
1374 
1375     /*
1376      * If link is down and auto-negotiation is supported and ongoing,
1377      * complete auto-negotiation immediately. This allows us to look
1378      * at MII_BMSR_AN_COMP to infer link status on load.
1379      */
1380     if (nc->link_down && have_autoneg(s)) {
1381         s->phy_reg[MII_BMSR] |= MII_BMSR_AN_COMP;
1382     }
1383 
1384     /* Decide which set of props to migrate in the main structure */
1385     if (chkflag(TSO) || !s->use_tso_for_migration) {
1386         /* Either we're migrating with the extra subsection, in which
1387          * case the mig_props is always 'props' OR
1388          * we've not got the subsection, but 'props' was the last
1389          * updated.
1390          */
1391         s->mig_props = s->tx.props;
1392     } else {
1393         /* We're not using the subsection, and 'tso_props' was
1394          * the last updated.
1395          */
1396         s->mig_props = s->tx.tso_props;
1397     }
1398     return 0;
1399 }
1400 
1401 static int e1000_post_load(void *opaque, int version_id)
1402 {
1403     E1000State *s = opaque;
1404     NetClientState *nc = qemu_get_queue(s->nic);
1405 
1406     if (!chkflag(MIT)) {
1407         s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1408             s->mac_reg[TADV] = 0;
1409         s->mit_irq_level = false;
1410     }
1411     s->mit_ide = 0;
1412     s->mit_timer_on = true;
1413     timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1);
1414 
1415     /* nc.link_down can't be migrated, so infer link_down according
1416      * to link status bit in mac_reg[STATUS].
1417      * Alternatively, restart link negotiation if it was in progress. */
1418     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1419 
1420     if (have_autoneg(s) && !(s->phy_reg[MII_BMSR] & MII_BMSR_AN_COMP)) {
1421         nc->link_down = false;
1422         timer_mod(s->autoneg_timer,
1423                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1424     }
1425 
1426     s->tx.props = s->mig_props;
1427     if (!s->received_tx_tso) {
1428         /* We received only one set of offload data (tx.props)
1429          * and haven't got tx.tso_props.  The best we can do
1430          * is dupe the data.
1431          */
1432         s->tx.tso_props = s->mig_props;
1433     }
1434     return 0;
1435 }
1436 
1437 static int e1000_tx_tso_post_load(void *opaque, int version_id)
1438 {
1439     E1000State *s = opaque;
1440     s->received_tx_tso = true;
1441     return 0;
1442 }
1443 
1444 static bool e1000_mit_state_needed(void *opaque)
1445 {
1446     E1000State *s = opaque;
1447 
1448     return chkflag(MIT);
1449 }
1450 
1451 static bool e1000_full_mac_needed(void *opaque)
1452 {
1453     E1000State *s = opaque;
1454 
1455     return chkflag(MAC);
1456 }
1457 
1458 static bool e1000_tso_state_needed(void *opaque)
1459 {
1460     E1000State *s = opaque;
1461 
1462     return chkflag(TSO);
1463 }
1464 
1465 static const VMStateDescription vmstate_e1000_mit_state = {
1466     .name = "e1000/mit_state",
1467     .version_id = 1,
1468     .minimum_version_id = 1,
1469     .needed = e1000_mit_state_needed,
1470     .fields = (VMStateField[]) {
1471         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1472         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1473         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1474         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1475         VMSTATE_BOOL(mit_irq_level, E1000State),
1476         VMSTATE_END_OF_LIST()
1477     }
1478 };
1479 
1480 static const VMStateDescription vmstate_e1000_full_mac_state = {
1481     .name = "e1000/full_mac_state",
1482     .version_id = 1,
1483     .minimum_version_id = 1,
1484     .needed = e1000_full_mac_needed,
1485     .fields = (VMStateField[]) {
1486         VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1487         VMSTATE_END_OF_LIST()
1488     }
1489 };
1490 
1491 static const VMStateDescription vmstate_e1000_tx_tso_state = {
1492     .name = "e1000/tx_tso_state",
1493     .version_id = 1,
1494     .minimum_version_id = 1,
1495     .needed = e1000_tso_state_needed,
1496     .post_load = e1000_tx_tso_post_load,
1497     .fields = (VMStateField[]) {
1498         VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
1499         VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
1500         VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
1501         VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
1502         VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
1503         VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
1504         VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
1505         VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
1506         VMSTATE_UINT16(tx.tso_props.mss, E1000State),
1507         VMSTATE_INT8(tx.tso_props.ip, E1000State),
1508         VMSTATE_INT8(tx.tso_props.tcp, E1000State),
1509         VMSTATE_END_OF_LIST()
1510     }
1511 };
1512 
1513 static const VMStateDescription vmstate_e1000 = {
1514     .name = "e1000",
1515     .version_id = 2,
1516     .minimum_version_id = 1,
1517     .pre_save = e1000_pre_save,
1518     .post_load = e1000_post_load,
1519     .fields = (VMStateField[]) {
1520         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1521         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1522         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1523         VMSTATE_UINT32(rxbuf_size, E1000State),
1524         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1525         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1526         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1527         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1528         VMSTATE_UINT16(eecd_state.reading, E1000State),
1529         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1530         VMSTATE_UINT8(mig_props.ipcss, E1000State),
1531         VMSTATE_UINT8(mig_props.ipcso, E1000State),
1532         VMSTATE_UINT16(mig_props.ipcse, E1000State),
1533         VMSTATE_UINT8(mig_props.tucss, E1000State),
1534         VMSTATE_UINT8(mig_props.tucso, E1000State),
1535         VMSTATE_UINT16(mig_props.tucse, E1000State),
1536         VMSTATE_UINT32(mig_props.paylen, E1000State),
1537         VMSTATE_UINT8(mig_props.hdr_len, E1000State),
1538         VMSTATE_UINT16(mig_props.mss, E1000State),
1539         VMSTATE_UINT16(tx.size, E1000State),
1540         VMSTATE_UINT16(tx.tso_frames, E1000State),
1541         VMSTATE_UINT8(tx.sum_needed, E1000State),
1542         VMSTATE_INT8(mig_props.ip, E1000State),
1543         VMSTATE_INT8(mig_props.tcp, E1000State),
1544         VMSTATE_BUFFER(tx.header, E1000State),
1545         VMSTATE_BUFFER(tx.data, E1000State),
1546         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1547         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1548         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1549         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1550         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1551         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1552         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1553         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1554         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1555         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1556         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1557         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1558         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1559         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1560         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1561         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1562         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1563         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1564         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1565         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1566         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1567         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1568         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1569         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1570         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1571         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1572         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1573         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1574         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1575         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1576         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1577         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1578         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1579         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1580         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1581         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1582         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1583         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1584         VMSTATE_UINT32(mac_reg[VET], E1000State),
1585         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1586         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, E1000_MC_TBL_SIZE),
1587         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA,
1588                                  E1000_VLAN_FILTER_TBL_SIZE),
1589         VMSTATE_END_OF_LIST()
1590     },
1591     .subsections = (const VMStateDescription*[]) {
1592         &vmstate_e1000_mit_state,
1593         &vmstate_e1000_full_mac_state,
1594         &vmstate_e1000_tx_tso_state,
1595         NULL
1596     }
1597 };
1598 
1599 /*
1600  * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1601  * Note: A valid DevId will be inserted during pci_e1000_realize().
1602  */
1603 static const uint16_t e1000_eeprom_template[64] = {
1604     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1605     0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1606     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1607     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1608     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1609     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1610     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1611     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1612 };
1613 
1614 /* PCI interface */
1615 
1616 static void
1617 e1000_mmio_setup(E1000State *d)
1618 {
1619     int i;
1620     const uint32_t excluded_regs[] = {
1621         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1622         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1623     };
1624 
1625     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1626                           "e1000-mmio", PNPMMIO_SIZE);
1627     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1628     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1629         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1630                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1631     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1632 }
1633 
1634 static void
1635 pci_e1000_uninit(PCIDevice *dev)
1636 {
1637     E1000State *d = E1000(dev);
1638 
1639     timer_free(d->autoneg_timer);
1640     timer_free(d->mit_timer);
1641     timer_free(d->flush_queue_timer);
1642     qemu_del_nic(d->nic);
1643 }
1644 
1645 static NetClientInfo net_e1000_info = {
1646     .type = NET_CLIENT_DRIVER_NIC,
1647     .size = sizeof(NICState),
1648     .can_receive = e1000_can_receive,
1649     .receive = e1000_receive,
1650     .receive_iov = e1000_receive_iov,
1651     .link_status_changed = e1000_set_link_status,
1652 };
1653 
1654 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1655                                 uint32_t val, int len)
1656 {
1657     E1000State *s = E1000(pci_dev);
1658 
1659     pci_default_write_config(pci_dev, address, val, len);
1660 
1661     if (range_covers_byte(address, len, PCI_COMMAND) &&
1662         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1663         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1664     }
1665 }
1666 
1667 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1668 {
1669     DeviceState *dev = DEVICE(pci_dev);
1670     E1000State *d = E1000(pci_dev);
1671     uint8_t *pci_conf;
1672     uint8_t *macaddr;
1673 
1674     pci_dev->config_write = e1000_write_config;
1675 
1676     pci_conf = pci_dev->config;
1677 
1678     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1679     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1680 
1681     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1682 
1683     e1000_mmio_setup(d);
1684 
1685     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1686 
1687     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1688 
1689     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1690     macaddr = d->conf.macaddr.a;
1691 
1692     e1000x_core_prepare_eeprom(d->eeprom_data,
1693                                e1000_eeprom_template,
1694                                sizeof(e1000_eeprom_template),
1695                                PCI_DEVICE_GET_CLASS(pci_dev)->device_id,
1696                                macaddr);
1697 
1698     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1699                           object_get_typename(OBJECT(d)), dev->id, d);
1700 
1701     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1702 
1703     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1704     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1705     d->flush_queue_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
1706                                         e1000_flush_queue_timer, d);
1707 }
1708 
1709 static Property e1000_properties[] = {
1710     DEFINE_NIC_PROPERTIES(E1000State, conf),
1711     DEFINE_PROP_BIT("autonegotiation", E1000State,
1712                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1713     DEFINE_PROP_BIT("mitigation", E1000State,
1714                     compat_flags, E1000_FLAG_MIT_BIT, true),
1715     DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1716                     compat_flags, E1000_FLAG_MAC_BIT, true),
1717     DEFINE_PROP_BIT("migrate_tso_props", E1000State,
1718                     compat_flags, E1000_FLAG_TSO_BIT, true),
1719     DEFINE_PROP_BIT("init-vet", E1000State,
1720                     compat_flags, E1000_FLAG_VET_BIT, true),
1721     DEFINE_PROP_END_OF_LIST(),
1722 };
1723 
1724 typedef struct E1000Info {
1725     const char *name;
1726     uint16_t   device_id;
1727     uint8_t    revision;
1728     uint16_t   phy_id2;
1729 } E1000Info;
1730 
1731 static void e1000_class_init(ObjectClass *klass, void *data)
1732 {
1733     DeviceClass *dc = DEVICE_CLASS(klass);
1734     ResettableClass *rc = RESETTABLE_CLASS(klass);
1735     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1736     E1000BaseClass *e = E1000_CLASS(klass);
1737     const E1000Info *info = data;
1738 
1739     k->realize = pci_e1000_realize;
1740     k->exit = pci_e1000_uninit;
1741     k->romfile = "efi-e1000.rom";
1742     k->vendor_id = PCI_VENDOR_ID_INTEL;
1743     k->device_id = info->device_id;
1744     k->revision = info->revision;
1745     e->phy_id2 = info->phy_id2;
1746     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1747     rc->phases.hold = e1000_reset_hold;
1748     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1749     dc->desc = "Intel Gigabit Ethernet";
1750     dc->vmsd = &vmstate_e1000;
1751     device_class_set_props(dc, e1000_properties);
1752 }
1753 
1754 static void e1000_instance_init(Object *obj)
1755 {
1756     E1000State *n = E1000(obj);
1757     device_add_bootindex_property(obj, &n->conf.bootindex,
1758                                   "bootindex", "/ethernet-phy@0",
1759                                   DEVICE(n));
1760 }
1761 
1762 static const TypeInfo e1000_base_info = {
1763     .name          = TYPE_E1000_BASE,
1764     .parent        = TYPE_PCI_DEVICE,
1765     .instance_size = sizeof(E1000State),
1766     .instance_init = e1000_instance_init,
1767     .class_size    = sizeof(E1000BaseClass),
1768     .abstract      = true,
1769     .interfaces = (InterfaceInfo[]) {
1770         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1771         { },
1772     },
1773 };
1774 
1775 static const E1000Info e1000_devices[] = {
1776     {
1777         .name      = "e1000",
1778         .device_id = E1000_DEV_ID_82540EM,
1779         .revision  = 0x03,
1780         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1781     },
1782     {
1783         .name      = "e1000-82544gc",
1784         .device_id = E1000_DEV_ID_82544GC_COPPER,
1785         .revision  = 0x03,
1786         .phy_id2   = E1000_PHY_ID2_82544x,
1787     },
1788     {
1789         .name      = "e1000-82545em",
1790         .device_id = E1000_DEV_ID_82545EM_COPPER,
1791         .revision  = 0x03,
1792         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1793     },
1794 };
1795 
1796 static void e1000_register_types(void)
1797 {
1798     int i;
1799 
1800     type_register_static(&e1000_base_info);
1801     for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1802         const E1000Info *info = &e1000_devices[i];
1803         TypeInfo type_info = {};
1804 
1805         type_info.name = info->name;
1806         type_info.parent = TYPE_E1000_BASE;
1807         type_info.class_data = (void *)info;
1808         type_info.class_init = e1000_class_init;
1809 
1810         type_register(&type_info);
1811     }
1812 }
1813 
1814 type_init(e1000_register_types)
1815