xref: /openbmc/qemu/hw/net/e1000.c (revision 7e10ce2706e2dbed6a59825dc0286b3810395afa)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2.1 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "qemu/osdep.h"
29 #include "hw/net/mii.h"
30 #include "hw/pci/pci_device.h"
31 #include "hw/qdev-properties.h"
32 #include "migration/vmstate.h"
33 #include "net/eth.h"
34 #include "net/net.h"
35 #include "net/checksum.h"
36 #include "sysemu/sysemu.h"
37 #include "sysemu/dma.h"
38 #include "qemu/iov.h"
39 #include "qemu/module.h"
40 #include "qemu/range.h"
41 
42 #include "e1000_common.h"
43 #include "e1000x_common.h"
44 #include "trace.h"
45 #include "qom/object.h"
46 
47 /* #define E1000_DEBUG */
48 
49 #ifdef E1000_DEBUG
50 enum {
51     DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
52     DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
53     DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
54     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
55 };
56 #define DBGBIT(x)    (1<<DEBUG_##x)
57 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
58 
59 #define DBGOUT(what, fmt, ...) do { \
60     if (debugflags & DBGBIT(what)) \
61         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
62     } while (0)
63 #else
64 #define DBGOUT(what, fmt, ...) do {} while (0)
65 #endif
66 
67 #define IOPORT_SIZE       0x40
68 #define PNPMMIO_SIZE      0x20000
69 
70 #define MAXIMUM_ETHERNET_HDR_LEN (ETH_HLEN + 4)
71 
72 /*
73  * HW models:
74  *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
75  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
76  *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
77  *  Others never tested
78  */
79 
80 struct E1000State_st {
81     /*< private >*/
82     PCIDevice parent_obj;
83     /*< public >*/
84 
85     NICState *nic;
86     NICConf conf;
87     MemoryRegion mmio;
88     MemoryRegion io;
89 
90     uint32_t mac_reg[0x8000];
91     uint16_t phy_reg[0x20];
92     uint16_t eeprom_data[64];
93 
94     uint32_t rxbuf_size;
95     uint32_t rxbuf_min_shift;
96     struct e1000_tx {
97         unsigned char header[256];
98         unsigned char vlan_header[4];
99         /* Fields vlan and data must not be reordered or separated. */
100         unsigned char vlan[4];
101         unsigned char data[0x10000];
102         uint16_t size;
103         unsigned char vlan_needed;
104         unsigned char sum_needed;
105         bool cptse;
106         e1000x_txd_props props;
107         e1000x_txd_props tso_props;
108         uint16_t tso_frames;
109         bool busy;
110     } tx;
111 
112     struct {
113         uint32_t val_in;    /* shifted in from guest driver */
114         uint16_t bitnum_in;
115         uint16_t bitnum_out;
116         uint16_t reading;
117         uint32_t old_eecd;
118     } eecd_state;
119 
120     QEMUTimer *autoneg_timer;
121 
122     QEMUTimer *mit_timer;      /* Mitigation timer. */
123     bool mit_timer_on;         /* Mitigation timer is running. */
124     bool mit_irq_level;        /* Tracks interrupt pin level. */
125     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
126 
127     QEMUTimer *flush_queue_timer;
128 
129 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
130 #define E1000_FLAG_MAC_BIT 2
131 #define E1000_FLAG_TSO_BIT 3
132 #define E1000_FLAG_VET_BIT 4
133 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
134 #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
135 #define E1000_FLAG_VET (1 << E1000_FLAG_VET_BIT)
136 
137     uint32_t compat_flags;
138     bool received_tx_tso;
139     bool use_tso_for_migration;
140     e1000x_txd_props mig_props;
141 };
142 typedef struct E1000State_st E1000State;
143 
144 #define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
145 
146 struct E1000BaseClass {
147     PCIDeviceClass parent_class;
148     uint16_t phy_id2;
149 };
150 typedef struct E1000BaseClass E1000BaseClass;
151 
152 #define TYPE_E1000_BASE "e1000-base"
153 
154 DECLARE_OBJ_CHECKERS(E1000State, E1000BaseClass,
155                      E1000, TYPE_E1000_BASE)
156 
157 
158 static void
159 e1000_link_up(E1000State *s)
160 {
161     e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg);
162 
163     /* E1000_STATUS_LU is tested by e1000_can_receive() */
164     qemu_flush_queued_packets(qemu_get_queue(s->nic));
165 }
166 
167 static void
168 e1000_autoneg_done(E1000State *s)
169 {
170     e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg);
171 
172     /* E1000_STATUS_LU is tested by e1000_can_receive() */
173     qemu_flush_queued_packets(qemu_get_queue(s->nic));
174 }
175 
176 static bool
177 have_autoneg(E1000State *s)
178 {
179     return (s->phy_reg[MII_BMCR] & MII_BMCR_AUTOEN);
180 }
181 
182 static void
183 set_phy_ctrl(E1000State *s, int index, uint16_t val)
184 {
185     /* bits 0-5 reserved; MII_BMCR_[ANRESTART,RESET] are self clearing */
186     s->phy_reg[MII_BMCR] = val & ~(0x3f |
187                                    MII_BMCR_RESET |
188                                    MII_BMCR_ANRESTART);
189 
190     /*
191      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
192      * migrate during auto negotiation, after migration the link will be
193      * down.
194      */
195     if (have_autoneg(s) && (val & MII_BMCR_ANRESTART)) {
196         e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
197     }
198 }
199 
200 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
201     [MII_BMCR] = set_phy_ctrl,
202 };
203 
204 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
205 
206 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
207 static const char phy_regcap[0x20] = {
208     [MII_BMSR]   = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
209     [MII_PHYID1] = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
210     [MII_BMCR]   = PHY_RW,    [MII_CTRL1000]               = PHY_RW,
211     [MII_ANLPAR] = PHY_R,     [MII_STAT1000]               = PHY_R,
212     [MII_ANAR]   = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
213     [MII_PHYID2] = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
214     [MII_ANER]   = PHY_R,
215 };
216 
217 /* MII_PHYID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
218 static const uint16_t phy_reg_init[] = {
219     [MII_BMCR] = MII_BMCR_SPEED1000 |
220                  MII_BMCR_FD |
221                  MII_BMCR_AUTOEN,
222 
223     [MII_BMSR] = MII_BMSR_EXTCAP |
224                  MII_BMSR_LINK_ST |   /* link initially up */
225                  MII_BMSR_AUTONEG |
226                  /* MII_BMSR_AN_COMP: initially NOT completed */
227                  MII_BMSR_MFPS |
228                  MII_BMSR_EXTSTAT |
229                  MII_BMSR_10T_HD |
230                  MII_BMSR_10T_FD |
231                  MII_BMSR_100TX_HD |
232                  MII_BMSR_100TX_FD,
233 
234     [MII_PHYID1] = 0x141,
235     /* [MII_PHYID2] configured per DevId, from e1000_reset() */
236     [MII_ANAR] = MII_ANAR_CSMACD | MII_ANAR_10 |
237                  MII_ANAR_10FD | MII_ANAR_TX |
238                  MII_ANAR_TXFD | MII_ANAR_PAUSE |
239                  MII_ANAR_PAUSE_ASYM,
240     [MII_ANLPAR] = MII_ANLPAR_10 | MII_ANLPAR_10FD |
241                    MII_ANLPAR_TX | MII_ANLPAR_TXFD,
242     [MII_CTRL1000] = MII_CTRL1000_FULL | MII_CTRL1000_PORT |
243                      MII_CTRL1000_MASTER,
244     [MII_STAT1000] = MII_STAT1000_HALF | MII_STAT1000_FULL |
245                      MII_STAT1000_ROK | MII_STAT1000_LOK,
246     [M88E1000_PHY_SPEC_CTRL] = 0x360,
247     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
248     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
249 };
250 
251 static const uint32_t mac_reg_init[] = {
252     [PBA]     = 0x00100030,
253     [LEDCTL]  = 0x602,
254     [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
255                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
256     [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
257                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
258                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
259                 E1000_STATUS_LU,
260     [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
261                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
262                 E1000_MANC_RMCP_EN,
263 };
264 
265 /* Helper function, *curr == 0 means the value is not set */
266 static inline void
267 mit_update_delay(uint32_t *curr, uint32_t value)
268 {
269     if (value && (*curr == 0 || value < *curr)) {
270         *curr = value;
271     }
272 }
273 
274 static void
275 set_interrupt_cause(E1000State *s, int index, uint32_t val)
276 {
277     PCIDevice *d = PCI_DEVICE(s);
278     uint32_t pending_ints;
279     uint32_t mit_delay;
280 
281     s->mac_reg[ICR] = val;
282 
283     /*
284      * Make sure ICR and ICS registers have the same value.
285      * The spec says that the ICS register is write-only.  However in practice,
286      * on real hardware ICS is readable, and for reads it has the same value as
287      * ICR (except that ICS does not have the clear on read behaviour of ICR).
288      *
289      * The VxWorks PRO/1000 driver uses this behaviour.
290      */
291     s->mac_reg[ICS] = val;
292 
293     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
294     if (!s->mit_irq_level && pending_ints) {
295         /*
296          * Here we detect a potential raising edge. We postpone raising the
297          * interrupt line if we are inside the mitigation delay window
298          * (s->mit_timer_on == 1).
299          * We provide a partial implementation of interrupt mitigation,
300          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
301          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
302          * RADV; relative timers based on TIDV and RDTR are not implemented.
303          */
304         if (s->mit_timer_on) {
305             return;
306         }
307 
308         /* Compute the next mitigation delay according to pending
309          * interrupts and the current values of RADV (provided
310          * RDTR!=0), TADV and ITR.
311          * Then rearm the timer.
312          */
313         mit_delay = 0;
314         if (s->mit_ide &&
315                 (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
316             mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
317         }
318         if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
319             mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
320         }
321         mit_update_delay(&mit_delay, s->mac_reg[ITR]);
322 
323         /*
324          * According to e1000 SPEC, the Ethernet controller guarantees
325          * a maximum observable interrupt rate of 7813 interrupts/sec.
326          * Thus if mit_delay < 500 then the delay should be set to the
327          * minimum delay possible which is 500.
328          */
329         mit_delay = (mit_delay < 500) ? 500 : mit_delay;
330 
331         s->mit_timer_on = 1;
332         timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
333                   mit_delay * 256);
334         s->mit_ide = 0;
335     }
336 
337     s->mit_irq_level = (pending_ints != 0);
338     pci_set_irq(d, s->mit_irq_level);
339 }
340 
341 static void
342 e1000_mit_timer(void *opaque)
343 {
344     E1000State *s = opaque;
345 
346     s->mit_timer_on = 0;
347     /* Call set_interrupt_cause to update the irq level (if necessary). */
348     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
349 }
350 
351 static void
352 set_ics(E1000State *s, int index, uint32_t val)
353 {
354     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
355         s->mac_reg[IMS]);
356     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
357 }
358 
359 static void
360 e1000_autoneg_timer(void *opaque)
361 {
362     E1000State *s = opaque;
363     if (!qemu_get_queue(s->nic)->link_down) {
364         e1000_autoneg_done(s);
365         set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
366     }
367 }
368 
369 static bool e1000_vet_init_need(void *opaque)
370 {
371     E1000State *s = opaque;
372 
373     return chkflag(VET);
374 }
375 
376 static void e1000_reset_hold(Object *obj, ResetType type)
377 {
378     E1000State *d = E1000(obj);
379     E1000BaseClass *edc = E1000_GET_CLASS(d);
380     uint8_t *macaddr = d->conf.macaddr.a;
381 
382     timer_del(d->autoneg_timer);
383     timer_del(d->mit_timer);
384     timer_del(d->flush_queue_timer);
385     d->mit_timer_on = 0;
386     d->mit_irq_level = 0;
387     d->mit_ide = 0;
388     memset(d->phy_reg, 0, sizeof d->phy_reg);
389     memcpy(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
390     d->phy_reg[MII_PHYID2] = edc->phy_id2;
391     memset(d->mac_reg, 0, sizeof d->mac_reg);
392     memcpy(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
393     d->rxbuf_min_shift = 1;
394     memset(&d->tx, 0, sizeof d->tx);
395 
396     if (qemu_get_queue(d->nic)->link_down) {
397         e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg);
398     }
399 
400     e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
401 
402     if (e1000_vet_init_need(d)) {
403         d->mac_reg[VET] = ETH_P_VLAN;
404     }
405 }
406 
407 static void
408 set_ctrl(E1000State *s, int index, uint32_t val)
409 {
410     /* RST is self clearing */
411     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
412 }
413 
414 static void
415 e1000_flush_queue_timer(void *opaque)
416 {
417     E1000State *s = opaque;
418 
419     qemu_flush_queued_packets(qemu_get_queue(s->nic));
420 }
421 
422 static void
423 set_rx_control(E1000State *s, int index, uint32_t val)
424 {
425     s->mac_reg[RCTL] = val;
426     s->rxbuf_size = e1000x_rxbufsize(val);
427     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
428     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
429            s->mac_reg[RCTL]);
430     timer_mod(s->flush_queue_timer,
431               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
432 }
433 
434 static void
435 set_mdic(E1000State *s, int index, uint32_t val)
436 {
437     uint32_t data = val & E1000_MDIC_DATA_MASK;
438     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
439 
440     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
441         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
442     else if (val & E1000_MDIC_OP_READ) {
443         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
444         if (!(phy_regcap[addr] & PHY_R)) {
445             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
446             val |= E1000_MDIC_ERROR;
447         } else
448             val = (val ^ data) | s->phy_reg[addr];
449     } else if (val & E1000_MDIC_OP_WRITE) {
450         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
451         if (!(phy_regcap[addr] & PHY_W)) {
452             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
453             val |= E1000_MDIC_ERROR;
454         } else {
455             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
456                 phyreg_writeops[addr](s, index, data);
457             } else {
458                 s->phy_reg[addr] = data;
459             }
460         }
461     }
462     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
463 
464     if (val & E1000_MDIC_INT_EN) {
465         set_ics(s, 0, E1000_ICR_MDAC);
466     }
467 }
468 
469 static uint32_t
470 get_eecd(E1000State *s, int index)
471 {
472     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
473 
474     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
475            s->eecd_state.bitnum_out, s->eecd_state.reading);
476     if (!s->eecd_state.reading ||
477         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
478           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
479         ret |= E1000_EECD_DO;
480     return ret;
481 }
482 
483 static void
484 set_eecd(E1000State *s, int index, uint32_t val)
485 {
486     uint32_t oldval = s->eecd_state.old_eecd;
487 
488     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
489             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
490     if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
491         return;
492     }
493     if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
494         s->eecd_state.val_in = 0;
495         s->eecd_state.bitnum_in = 0;
496         s->eecd_state.bitnum_out = 0;
497         s->eecd_state.reading = 0;
498     }
499     if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
500         return;
501     }
502     if (!(E1000_EECD_SK & val)) {               /* falling edge */
503         s->eecd_state.bitnum_out++;
504         return;
505     }
506     s->eecd_state.val_in <<= 1;
507     if (val & E1000_EECD_DI)
508         s->eecd_state.val_in |= 1;
509     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
510         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
511         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
512             EEPROM_READ_OPCODE_MICROWIRE);
513     }
514     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
515            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
516            s->eecd_state.reading);
517 }
518 
519 static uint32_t
520 flash_eerd_read(E1000State *s, int x)
521 {
522     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
523 
524     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
525         return (s->mac_reg[EERD]);
526 
527     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
528         return (E1000_EEPROM_RW_REG_DONE | r);
529 
530     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
531            E1000_EEPROM_RW_REG_DONE | r);
532 }
533 
534 static void
535 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
536 {
537     uint32_t sum;
538 
539     if (cse && cse < n)
540         n = cse + 1;
541     if (sloc < n-1) {
542         sum = net_checksum_add(n-css, data+css);
543         stw_be_p(data + sloc, net_checksum_finish_nozero(sum));
544     }
545 }
546 
547 static inline void
548 inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
549 {
550     if (is_broadcast_ether_addr(arr)) {
551         e1000x_inc_reg_if_not_full(s->mac_reg, BPTC);
552     } else if (is_multicast_ether_addr(arr)) {
553         e1000x_inc_reg_if_not_full(s->mac_reg, MPTC);
554     }
555 }
556 
557 static void
558 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
559 {
560     static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
561                                     PTC1023, PTC1522 };
562 
563     NetClientState *nc = qemu_get_queue(s->nic);
564     if (s->phy_reg[MII_BMCR] & MII_BMCR_LOOPBACK) {
565         qemu_receive_packet(nc, buf, size);
566     } else {
567         qemu_send_packet(nc, buf, size);
568     }
569     inc_tx_bcast_or_mcast_count(s, buf);
570     e1000x_increase_size_stats(s->mac_reg, PTCregs, size + 4);
571 }
572 
573 static void
574 xmit_seg(E1000State *s)
575 {
576     uint16_t len;
577     unsigned int frames = s->tx.tso_frames, css, sofar;
578     struct e1000_tx *tp = &s->tx;
579     struct e1000x_txd_props *props = tp->cptse ? &tp->tso_props : &tp->props;
580 
581     if (tp->cptse) {
582         css = props->ipcss;
583         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
584                frames, tp->size, css);
585         if (props->ip) {    /* IPv4 */
586             stw_be_p(tp->data+css+2, tp->size - css);
587             stw_be_p(tp->data+css+4,
588                      lduw_be_p(tp->data + css + 4) + frames);
589         } else {         /* IPv6 */
590             stw_be_p(tp->data+css+4, tp->size - css);
591         }
592         css = props->tucss;
593         len = tp->size - css;
594         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", props->tcp, css, len);
595         if (props->tcp) {
596             sofar = frames * props->mss;
597             stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
598             if (props->paylen - sofar > props->mss) {
599                 tp->data[css + 13] &= ~9;    /* PSH, FIN */
600             } else if (frames) {
601                 e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
602             }
603         } else {    /* UDP */
604             stw_be_p(tp->data+css+4, len);
605         }
606         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
607             unsigned int phsum;
608             // add pseudo-header length before checksum calculation
609             void *sp = tp->data + props->tucso;
610 
611             phsum = lduw_be_p(sp) + len;
612             phsum = (phsum >> 16) + (phsum & 0xffff);
613             stw_be_p(sp, phsum);
614         }
615         tp->tso_frames++;
616     }
617 
618     if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
619         putsum(tp->data, tp->size, props->tucso, props->tucss, props->tucse);
620     }
621     if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
622         putsum(tp->data, tp->size, props->ipcso, props->ipcss, props->ipcse);
623     }
624     if (tp->vlan_needed) {
625         memmove(tp->vlan, tp->data, 4);
626         memmove(tp->data, tp->data + 4, 8);
627         memcpy(tp->data + 8, tp->vlan_header, 4);
628         e1000_send_packet(s, tp->vlan, tp->size + 4);
629     } else {
630         e1000_send_packet(s, tp->data, tp->size);
631     }
632 
633     e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
634     e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size + 4);
635     e1000x_inc_reg_if_not_full(s->mac_reg, GPTC);
636     e1000x_grow_8reg_if_not_full(s->mac_reg, GOTCL, s->tx.size + 4);
637 }
638 
639 static void
640 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
641 {
642     PCIDevice *d = PCI_DEVICE(s);
643     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
644     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
645     unsigned int split_size = txd_lower & 0xffff, bytes, sz;
646     unsigned int msh = 0xfffff;
647     uint64_t addr;
648     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
649     struct e1000_tx *tp = &s->tx;
650 
651     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
652     if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
653         if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
654             e1000x_read_tx_ctx_descr(xp, &tp->tso_props);
655             s->use_tso_for_migration = 1;
656             tp->tso_frames = 0;
657         } else {
658             e1000x_read_tx_ctx_descr(xp, &tp->props);
659             s->use_tso_for_migration = 0;
660         }
661         return;
662     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
663         // data descriptor
664         if (tp->size == 0) {
665             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
666         }
667         tp->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
668     } else {
669         // legacy descriptor
670         tp->cptse = 0;
671     }
672 
673     if (e1000x_vlan_enabled(s->mac_reg) &&
674         e1000x_is_vlan_txd(txd_lower) &&
675         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
676         tp->vlan_needed = 1;
677         stw_be_p(tp->vlan_header,
678                       le16_to_cpu(s->mac_reg[VET]));
679         stw_be_p(tp->vlan_header + 2,
680                       le16_to_cpu(dp->upper.fields.special));
681     }
682 
683     addr = le64_to_cpu(dp->buffer_addr);
684     if (tp->cptse) {
685         msh = tp->tso_props.hdr_len + tp->tso_props.mss;
686         do {
687             bytes = split_size;
688             if (tp->size >= msh) {
689                 goto eop;
690             }
691             if (tp->size + bytes > msh)
692                 bytes = msh - tp->size;
693 
694             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
695             pci_dma_read(d, addr, tp->data + tp->size, bytes);
696             sz = tp->size + bytes;
697             if (sz >= tp->tso_props.hdr_len
698                 && tp->size < tp->tso_props.hdr_len) {
699                 memmove(tp->header, tp->data, tp->tso_props.hdr_len);
700             }
701             tp->size = sz;
702             addr += bytes;
703             if (sz == msh) {
704                 xmit_seg(s);
705                 memmove(tp->data, tp->header, tp->tso_props.hdr_len);
706                 tp->size = tp->tso_props.hdr_len;
707             }
708             split_size -= bytes;
709         } while (bytes && split_size);
710     } else {
711         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
712         pci_dma_read(d, addr, tp->data + tp->size, split_size);
713         tp->size += split_size;
714     }
715 
716 eop:
717     if (!(txd_lower & E1000_TXD_CMD_EOP))
718         return;
719     if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) {
720         xmit_seg(s);
721     }
722     tp->tso_frames = 0;
723     tp->sum_needed = 0;
724     tp->vlan_needed = 0;
725     tp->size = 0;
726     tp->cptse = 0;
727 }
728 
729 static uint32_t
730 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
731 {
732     PCIDevice *d = PCI_DEVICE(s);
733     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
734 
735     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
736         return 0;
737     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
738                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
739     dp->upper.data = cpu_to_le32(txd_upper);
740     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
741                   &dp->upper, sizeof(dp->upper));
742     return E1000_ICR_TXDW;
743 }
744 
745 static uint64_t tx_desc_base(E1000State *s)
746 {
747     uint64_t bah = s->mac_reg[TDBAH];
748     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
749 
750     return (bah << 32) + bal;
751 }
752 
753 static void
754 start_xmit(E1000State *s)
755 {
756     PCIDevice *d = PCI_DEVICE(s);
757     dma_addr_t base;
758     struct e1000_tx_desc desc;
759     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
760 
761     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
762         DBGOUT(TX, "tx disabled\n");
763         return;
764     }
765 
766     if (s->tx.busy) {
767         return;
768     }
769     s->tx.busy = true;
770 
771     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
772         base = tx_desc_base(s) +
773                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
774         pci_dma_read(d, base, &desc, sizeof(desc));
775 
776         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
777                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
778                desc.upper.data);
779 
780         process_tx_desc(s, &desc);
781         cause |= txdesc_writeback(s, base, &desc);
782 
783         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
784             s->mac_reg[TDH] = 0;
785         /*
786          * the following could happen only if guest sw assigns
787          * bogus values to TDT/TDLEN.
788          * there's nothing too intelligent we could do about this.
789          */
790         if (s->mac_reg[TDH] == tdh_start ||
791             tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
792             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
793                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
794             break;
795         }
796     }
797     s->tx.busy = false;
798     set_ics(s, 0, cause);
799 }
800 
801 static int
802 receive_filter(E1000State *s, const void *buf)
803 {
804     return (!e1000x_is_vlan_packet(buf, s->mac_reg[VET]) ||
805             e1000x_rx_vlan_filter(s->mac_reg, PKT_GET_VLAN_HDR(buf))) &&
806            e1000x_rx_group_filter(s->mac_reg, buf);
807 }
808 
809 static void
810 e1000_set_link_status(NetClientState *nc)
811 {
812     E1000State *s = qemu_get_nic_opaque(nc);
813     uint32_t old_status = s->mac_reg[STATUS];
814 
815     if (nc->link_down) {
816         e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg);
817     } else {
818         if (have_autoneg(s) &&
819             !(s->phy_reg[MII_BMSR] & MII_BMSR_AN_COMP)) {
820             e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
821         } else {
822             e1000_link_up(s);
823         }
824     }
825 
826     if (s->mac_reg[STATUS] != old_status)
827         set_ics(s, 0, E1000_ICR_LSC);
828 }
829 
830 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
831 {
832     int bufs;
833     /* Fast-path short packets */
834     if (total_size <= s->rxbuf_size) {
835         return s->mac_reg[RDH] != s->mac_reg[RDT];
836     }
837     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
838         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
839     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
840         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
841             s->mac_reg[RDT] - s->mac_reg[RDH];
842     } else {
843         return false;
844     }
845     return total_size <= bufs * s->rxbuf_size;
846 }
847 
848 static bool
849 e1000_can_receive(NetClientState *nc)
850 {
851     E1000State *s = qemu_get_nic_opaque(nc);
852 
853     return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
854         e1000_has_rxbufs(s, 1) && !timer_pending(s->flush_queue_timer);
855 }
856 
857 static uint64_t rx_desc_base(E1000State *s)
858 {
859     uint64_t bah = s->mac_reg[RDBAH];
860     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
861 
862     return (bah << 32) + bal;
863 }
864 
865 static void
866 e1000_receiver_overrun(E1000State *s, size_t size)
867 {
868     trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
869     e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
870     e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
871     set_ics(s, 0, E1000_ICS_RXO);
872 }
873 
874 static ssize_t
875 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
876 {
877     E1000State *s = qemu_get_nic_opaque(nc);
878     PCIDevice *d = PCI_DEVICE(s);
879     struct e1000_rx_desc desc;
880     dma_addr_t base;
881     unsigned int n, rdt;
882     uint32_t rdh_start;
883     uint16_t vlan_special = 0;
884     uint8_t vlan_status = 0;
885     uint8_t min_buf[ETH_ZLEN];
886     uint8_t *filter_buf = iov->iov_base;
887     size_t size = iov_size(iov, iovcnt);
888     size_t iov_ofs = 0;
889     size_t desc_offset;
890     size_t desc_size;
891     size_t total_size;
892     eth_pkt_types_e pkt_type;
893 
894     if (!e1000x_hw_rx_enabled(s->mac_reg)) {
895         return -1;
896     }
897 
898     if (timer_pending(s->flush_queue_timer)) {
899         return 0;
900     }
901 
902     if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
903         /* This is very unlikely, but may happen. */
904         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
905         filter_buf = min_buf;
906     }
907 
908     /* Discard oversized packets if !LPE and !SBP. */
909     if (e1000x_is_oversized(s->mac_reg, size)) {
910         return size;
911     }
912 
913     if (!receive_filter(s, filter_buf)) {
914         return size;
915     }
916 
917     if (e1000x_vlan_enabled(s->mac_reg) &&
918         e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) {
919         vlan_special = cpu_to_le16(lduw_be_p(filter_buf + 14));
920         iov_ofs = 4;
921         if (filter_buf == iov->iov_base) {
922             memmove(filter_buf + 4, filter_buf, 12);
923         } else {
924             iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
925             while (iov->iov_len <= iov_ofs) {
926                 iov_ofs -= iov->iov_len;
927                 iov++;
928             }
929         }
930         vlan_status = E1000_RXD_STAT_VP;
931         size -= 4;
932     }
933 
934     pkt_type = get_eth_packet_type(PKT_GET_ETH_HDR(filter_buf));
935     rdh_start = s->mac_reg[RDH];
936     desc_offset = 0;
937     total_size = size + e1000x_fcs_len(s->mac_reg);
938     if (!e1000_has_rxbufs(s, total_size)) {
939         e1000_receiver_overrun(s, total_size);
940         return -1;
941     }
942     do {
943         desc_size = total_size - desc_offset;
944         if (desc_size > s->rxbuf_size) {
945             desc_size = s->rxbuf_size;
946         }
947         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
948         pci_dma_read(d, base, &desc, sizeof(desc));
949         desc.special = vlan_special;
950         desc.status &= ~E1000_RXD_STAT_DD;
951         if (desc.buffer_addr) {
952             if (desc_offset < size) {
953                 size_t iov_copy;
954                 hwaddr ba = le64_to_cpu(desc.buffer_addr);
955                 size_t copy_size = size - desc_offset;
956                 if (copy_size > s->rxbuf_size) {
957                     copy_size = s->rxbuf_size;
958                 }
959                 do {
960                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
961                     pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
962                     copy_size -= iov_copy;
963                     ba += iov_copy;
964                     iov_ofs += iov_copy;
965                     if (iov_ofs == iov->iov_len) {
966                         iov++;
967                         iov_ofs = 0;
968                     }
969                 } while (copy_size);
970             }
971             desc_offset += desc_size;
972             desc.length = cpu_to_le16(desc_size);
973             if (desc_offset >= total_size) {
974                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
975             } else {
976                 /* Guest zeroing out status is not a hardware requirement.
977                    Clear EOP in case guest didn't do it. */
978                 desc.status &= ~E1000_RXD_STAT_EOP;
979             }
980         } else { // as per intel docs; skip descriptors with null buf addr
981             DBGOUT(RX, "Null RX descriptor!!\n");
982         }
983         pci_dma_write(d, base, &desc, sizeof(desc));
984         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
985         pci_dma_write(d, base + offsetof(struct e1000_rx_desc, status),
986                       &desc.status, sizeof(desc.status));
987 
988         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
989             s->mac_reg[RDH] = 0;
990         /* see comment in start_xmit; same here */
991         if (s->mac_reg[RDH] == rdh_start ||
992             rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
993             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
994                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
995             e1000_receiver_overrun(s, total_size);
996             return -1;
997         }
998     } while (desc_offset < total_size);
999 
1000     e1000x_update_rx_total_stats(s->mac_reg, pkt_type, size, total_size);
1001 
1002     n = E1000_ICS_RXT0;
1003     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1004         rdt += s->mac_reg[RDLEN] / sizeof(desc);
1005     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1006         s->rxbuf_min_shift)
1007         n |= E1000_ICS_RXDMT0;
1008 
1009     set_ics(s, 0, n);
1010 
1011     return size;
1012 }
1013 
1014 static ssize_t
1015 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1016 {
1017     const struct iovec iov = {
1018         .iov_base = (uint8_t *)buf,
1019         .iov_len = size
1020     };
1021 
1022     return e1000_receive_iov(nc, &iov, 1);
1023 }
1024 
1025 static uint32_t
1026 mac_readreg(E1000State *s, int index)
1027 {
1028     return s->mac_reg[index];
1029 }
1030 
1031 static uint32_t
1032 mac_icr_read(E1000State *s, int index)
1033 {
1034     uint32_t ret = s->mac_reg[ICR];
1035 
1036     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1037     set_interrupt_cause(s, 0, 0);
1038     return ret;
1039 }
1040 
1041 static uint32_t
1042 mac_read_clr4(E1000State *s, int index)
1043 {
1044     uint32_t ret = s->mac_reg[index];
1045 
1046     s->mac_reg[index] = 0;
1047     return ret;
1048 }
1049 
1050 static uint32_t
1051 mac_read_clr8(E1000State *s, int index)
1052 {
1053     uint32_t ret = s->mac_reg[index];
1054 
1055     s->mac_reg[index] = 0;
1056     s->mac_reg[index-1] = 0;
1057     return ret;
1058 }
1059 
1060 static void
1061 mac_writereg(E1000State *s, int index, uint32_t val)
1062 {
1063     uint32_t macaddr[2];
1064 
1065     s->mac_reg[index] = val;
1066 
1067     if (index == RA + 1) {
1068         macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1069         macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1070         qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1071     }
1072 }
1073 
1074 static void
1075 set_rdt(E1000State *s, int index, uint32_t val)
1076 {
1077     s->mac_reg[index] = val & 0xffff;
1078     if (e1000_has_rxbufs(s, 1)) {
1079         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1080     }
1081 }
1082 
1083 #define LOW_BITS_SET_FUNC(num)                             \
1084     static void                                            \
1085     set_##num##bit(E1000State *s, int index, uint32_t val) \
1086     {                                                      \
1087         s->mac_reg[index] = val & (BIT(num) - 1);          \
1088     }
1089 
1090 LOW_BITS_SET_FUNC(4)
1091 LOW_BITS_SET_FUNC(11)
1092 LOW_BITS_SET_FUNC(13)
1093 LOW_BITS_SET_FUNC(16)
1094 
1095 static void
1096 set_dlen(E1000State *s, int index, uint32_t val)
1097 {
1098     s->mac_reg[index] = val & 0xfff80;
1099 }
1100 
1101 static void
1102 set_tctl(E1000State *s, int index, uint32_t val)
1103 {
1104     s->mac_reg[index] = val;
1105     s->mac_reg[TDT] &= 0xffff;
1106     start_xmit(s);
1107 }
1108 
1109 static void
1110 set_icr(E1000State *s, int index, uint32_t val)
1111 {
1112     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1113     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1114 }
1115 
1116 static void
1117 set_imc(E1000State *s, int index, uint32_t val)
1118 {
1119     s->mac_reg[IMS] &= ~val;
1120     set_ics(s, 0, 0);
1121 }
1122 
1123 static void
1124 set_ims(E1000State *s, int index, uint32_t val)
1125 {
1126     s->mac_reg[IMS] |= val;
1127     set_ics(s, 0, 0);
1128 }
1129 
1130 #define getreg(x)    [x] = mac_readreg
1131 typedef uint32_t (*readops)(E1000State *, int);
1132 static const readops macreg_readops[] = {
1133     getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1134     getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1135     getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1136     getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1137     getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1138     getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1139     getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1140     getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1141     getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1142     getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1143     getreg(TNCRS),    getreg(SEQEC),    getreg(CEXTERR),  getreg(RLEC),
1144     getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1145     getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1146     getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1147     getreg(GOTCL),    getreg(RDFH),     getreg(RDFT),     getreg(RDFHS),
1148     getreg(RDFTS),    getreg(RDFPC),    getreg(TDFH),     getreg(TDFT),
1149     getreg(TDFHS),    getreg(TDFTS),    getreg(TDFPC),    getreg(AIT),
1150 
1151     [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1152     [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1153     [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1154     [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1155     [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1156     [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1157     [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1158     [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1159     [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1160     [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1161     [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1162     [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1163     [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1164     [MPTC]    = mac_read_clr4,
1165     [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1166     [EERD]    = flash_eerd_read,
1167 
1168     [CRCERRS ... MPC]     = &mac_readreg,
1169     [IP6AT ... IP6AT + 3] = &mac_readreg,    [IP4AT ... IP4AT + 6] = &mac_readreg,
1170     [FFLT ... FFLT + 6]   = &mac_readreg,
1171     [RA ... RA + 31]      = &mac_readreg,
1172     [WUPM ... WUPM + 31]  = &mac_readreg,
1173     [MTA ... MTA + E1000_MC_TBL_SIZE - 1]   = &mac_readreg,
1174     [VFTA ... VFTA + E1000_VLAN_FILTER_TBL_SIZE - 1] = &mac_readreg,
1175     [FFMT ... FFMT + 254] = &mac_readreg,
1176     [FFVT ... FFVT + 254] = &mac_readreg,
1177     [PBM ... PBM + 16383] = &mac_readreg,
1178 };
1179 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1180 
1181 #define putreg(x)    [x] = mac_writereg
1182 typedef void (*writeops)(E1000State *, int, uint32_t);
1183 static const writeops macreg_writeops[] = {
1184     putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1185     putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1186     putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1187     putreg(IPAV),     putreg(WUC),
1188     putreg(WUS),
1189 
1190     [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL]  = set_tctl,
1191     [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]   = set_ics,
1192     [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]   = set_rdt,
1193     [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]   = set_icr,
1194     [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL]  = set_ctrl,
1195     [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV]  = set_16bit,
1196     [ITR]    = set_16bit,  [TDFH]   = set_11bit,      [TDFT]  = set_11bit,
1197     [TDFHS]  = set_13bit,  [TDFTS]  = set_13bit,      [TDFPC] = set_13bit,
1198     [RDFH]   = set_13bit,  [RDFT]   = set_13bit,      [RDFHS] = set_13bit,
1199     [RDFTS]  = set_13bit,  [RDFPC]  = set_13bit,      [AIT]   = set_16bit,
1200 
1201     [IP6AT ... IP6AT + 3] = &mac_writereg, [IP4AT ... IP4AT + 6] = &mac_writereg,
1202     [FFLT ... FFLT + 6]   = &set_11bit,
1203     [RA ... RA + 31]      = &mac_writereg,
1204     [WUPM ... WUPM + 31]  = &mac_writereg,
1205     [MTA ... MTA + E1000_MC_TBL_SIZE - 1] = &mac_writereg,
1206     [VFTA ... VFTA + E1000_VLAN_FILTER_TBL_SIZE - 1] = &mac_writereg,
1207     [FFMT ... FFMT + 254] = &set_4bit,     [FFVT ... FFVT + 254] = &mac_writereg,
1208     [PBM ... PBM + 16383] = &mac_writereg,
1209 };
1210 
1211 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1212 
1213 enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1214 
1215 #define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1216 /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1217  * f - flag bits (up to 6 possible flags)
1218  * n - flag needed
1219  * p - partially implenented */
1220 static const uint8_t mac_reg_access[0x8000] = {
1221     [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1222     [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1223     [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1224     [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1225     [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1226     [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1227     [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1228     [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1229     [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1230     [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1231     [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1232     [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1233     [SEQEC]   = markflag(MAC),    [CEXTERR] = markflag(MAC),
1234     [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1235     [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1236     [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1237     [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1238     [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1239     [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1240     [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1241     [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1242     [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1243     [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1244     [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1245     [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1246     [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1247     [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1248     [BPTC]    = markflag(MAC),
1249 
1250     [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1251     [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1252     [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1253     [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1254     [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1255     [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1256     [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1257     [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1258     [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1259     [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1260     [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1261 };
1262 
1263 static void
1264 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1265                  unsigned size)
1266 {
1267     E1000State *s = opaque;
1268     unsigned int index = (addr & 0x1ffff) >> 2;
1269 
1270     if (index < NWRITEOPS && macreg_writeops[index]) {
1271         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1272             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1273             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1274                 DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1275                        "It is not fully implemented.\n", index<<2);
1276             }
1277             macreg_writeops[index](s, index, val);
1278         } else {    /* "flag needed" bit is set, but the flag is not active */
1279             DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1280                    index<<2);
1281         }
1282     } else if (index < NREADOPS && macreg_readops[index]) {
1283         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1284                index<<2, val);
1285     } else {
1286         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1287                index<<2, val);
1288     }
1289 }
1290 
1291 static uint64_t
1292 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1293 {
1294     E1000State *s = opaque;
1295     unsigned int index = (addr & 0x1ffff) >> 2;
1296 
1297     if (index < NREADOPS && macreg_readops[index]) {
1298         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1299             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1300             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1301                 DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1302                        "It is not fully implemented.\n", index<<2);
1303             }
1304             return macreg_readops[index](s, index);
1305         } else {    /* "flag needed" bit is set, but the flag is not active */
1306             DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1307                    index<<2);
1308         }
1309     } else {
1310         DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1311     }
1312     return 0;
1313 }
1314 
1315 static const MemoryRegionOps e1000_mmio_ops = {
1316     .read = e1000_mmio_read,
1317     .write = e1000_mmio_write,
1318     .endianness = DEVICE_LITTLE_ENDIAN,
1319     .impl = {
1320         .min_access_size = 4,
1321         .max_access_size = 4,
1322     },
1323 };
1324 
1325 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1326                               unsigned size)
1327 {
1328     E1000State *s = opaque;
1329 
1330     (void)s;
1331     return 0;
1332 }
1333 
1334 static void e1000_io_write(void *opaque, hwaddr addr,
1335                            uint64_t val, unsigned size)
1336 {
1337     E1000State *s = opaque;
1338 
1339     (void)s;
1340 }
1341 
1342 static const MemoryRegionOps e1000_io_ops = {
1343     .read = e1000_io_read,
1344     .write = e1000_io_write,
1345     .endianness = DEVICE_LITTLE_ENDIAN,
1346 };
1347 
1348 static bool is_version_1(void *opaque, int version_id)
1349 {
1350     return version_id == 1;
1351 }
1352 
1353 static int e1000_pre_save(void *opaque)
1354 {
1355     E1000State *s = opaque;
1356     NetClientState *nc = qemu_get_queue(s->nic);
1357 
1358     /*
1359      * If link is down and auto-negotiation is supported and ongoing,
1360      * complete auto-negotiation immediately. This allows us to look
1361      * at MII_BMSR_AN_COMP to infer link status on load.
1362      */
1363     if (nc->link_down && have_autoneg(s)) {
1364         s->phy_reg[MII_BMSR] |= MII_BMSR_AN_COMP;
1365     }
1366 
1367     /* Decide which set of props to migrate in the main structure */
1368     if (chkflag(TSO) || !s->use_tso_for_migration) {
1369         /* Either we're migrating with the extra subsection, in which
1370          * case the mig_props is always 'props' OR
1371          * we've not got the subsection, but 'props' was the last
1372          * updated.
1373          */
1374         s->mig_props = s->tx.props;
1375     } else {
1376         /* We're not using the subsection, and 'tso_props' was
1377          * the last updated.
1378          */
1379         s->mig_props = s->tx.tso_props;
1380     }
1381     return 0;
1382 }
1383 
1384 static int e1000_post_load(void *opaque, int version_id)
1385 {
1386     E1000State *s = opaque;
1387     NetClientState *nc = qemu_get_queue(s->nic);
1388 
1389     s->mit_ide = 0;
1390     s->mit_timer_on = true;
1391     timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1);
1392 
1393     /* nc.link_down can't be migrated, so infer link_down according
1394      * to link status bit in mac_reg[STATUS].
1395      * Alternatively, restart link negotiation if it was in progress. */
1396     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1397 
1398     if (have_autoneg(s) && !(s->phy_reg[MII_BMSR] & MII_BMSR_AN_COMP)) {
1399         nc->link_down = false;
1400         timer_mod(s->autoneg_timer,
1401                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1402     }
1403 
1404     s->tx.props = s->mig_props;
1405     if (!s->received_tx_tso) {
1406         /* We received only one set of offload data (tx.props)
1407          * and haven't got tx.tso_props.  The best we can do
1408          * is dupe the data.
1409          */
1410         s->tx.tso_props = s->mig_props;
1411     }
1412     return 0;
1413 }
1414 
1415 static int e1000_tx_tso_post_load(void *opaque, int version_id)
1416 {
1417     E1000State *s = opaque;
1418     s->received_tx_tso = true;
1419     return 0;
1420 }
1421 
1422 static bool e1000_full_mac_needed(void *opaque)
1423 {
1424     E1000State *s = opaque;
1425 
1426     return chkflag(MAC);
1427 }
1428 
1429 static bool e1000_tso_state_needed(void *opaque)
1430 {
1431     E1000State *s = opaque;
1432 
1433     return chkflag(TSO);
1434 }
1435 
1436 static const VMStateDescription vmstate_e1000_mit_state = {
1437     .name = "e1000/mit_state",
1438     .version_id = 1,
1439     .minimum_version_id = 1,
1440     .fields = (const VMStateField[]) {
1441         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1442         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1443         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1444         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1445         VMSTATE_BOOL(mit_irq_level, E1000State),
1446         VMSTATE_END_OF_LIST()
1447     }
1448 };
1449 
1450 static const VMStateDescription vmstate_e1000_full_mac_state = {
1451     .name = "e1000/full_mac_state",
1452     .version_id = 1,
1453     .minimum_version_id = 1,
1454     .needed = e1000_full_mac_needed,
1455     .fields = (const VMStateField[]) {
1456         VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1457         VMSTATE_END_OF_LIST()
1458     }
1459 };
1460 
1461 static const VMStateDescription vmstate_e1000_tx_tso_state = {
1462     .name = "e1000/tx_tso_state",
1463     .version_id = 1,
1464     .minimum_version_id = 1,
1465     .needed = e1000_tso_state_needed,
1466     .post_load = e1000_tx_tso_post_load,
1467     .fields = (const VMStateField[]) {
1468         VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
1469         VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
1470         VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
1471         VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
1472         VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
1473         VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
1474         VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
1475         VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
1476         VMSTATE_UINT16(tx.tso_props.mss, E1000State),
1477         VMSTATE_INT8(tx.tso_props.ip, E1000State),
1478         VMSTATE_INT8(tx.tso_props.tcp, E1000State),
1479         VMSTATE_END_OF_LIST()
1480     }
1481 };
1482 
1483 static const VMStateDescription vmstate_e1000 = {
1484     .name = "e1000",
1485     .version_id = 2,
1486     .minimum_version_id = 1,
1487     .pre_save = e1000_pre_save,
1488     .post_load = e1000_post_load,
1489     .fields = (const VMStateField[]) {
1490         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1491         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1492         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1493         VMSTATE_UINT32(rxbuf_size, E1000State),
1494         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1495         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1496         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1497         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1498         VMSTATE_UINT16(eecd_state.reading, E1000State),
1499         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1500         VMSTATE_UINT8(mig_props.ipcss, E1000State),
1501         VMSTATE_UINT8(mig_props.ipcso, E1000State),
1502         VMSTATE_UINT16(mig_props.ipcse, E1000State),
1503         VMSTATE_UINT8(mig_props.tucss, E1000State),
1504         VMSTATE_UINT8(mig_props.tucso, E1000State),
1505         VMSTATE_UINT16(mig_props.tucse, E1000State),
1506         VMSTATE_UINT32(mig_props.paylen, E1000State),
1507         VMSTATE_UINT8(mig_props.hdr_len, E1000State),
1508         VMSTATE_UINT16(mig_props.mss, E1000State),
1509         VMSTATE_UINT16(tx.size, E1000State),
1510         VMSTATE_UINT16(tx.tso_frames, E1000State),
1511         VMSTATE_UINT8(tx.sum_needed, E1000State),
1512         VMSTATE_INT8(mig_props.ip, E1000State),
1513         VMSTATE_INT8(mig_props.tcp, E1000State),
1514         VMSTATE_BUFFER(tx.header, E1000State),
1515         VMSTATE_BUFFER(tx.data, E1000State),
1516         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1517         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1518         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1519         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1520         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1521         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1522         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1523         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1524         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1525         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1526         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1527         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1528         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1529         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1530         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1531         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1532         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1533         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1534         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1535         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1536         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1537         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1538         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1539         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1540         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1541         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1542         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1543         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1544         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1545         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1546         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1547         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1548         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1549         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1550         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1551         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1552         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1553         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1554         VMSTATE_UINT32(mac_reg[VET], E1000State),
1555         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1556         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, E1000_MC_TBL_SIZE),
1557         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA,
1558                                  E1000_VLAN_FILTER_TBL_SIZE),
1559         VMSTATE_END_OF_LIST()
1560     },
1561     .subsections = (const VMStateDescription * const []) {
1562         &vmstate_e1000_mit_state,
1563         &vmstate_e1000_full_mac_state,
1564         &vmstate_e1000_tx_tso_state,
1565         NULL
1566     }
1567 };
1568 
1569 /*
1570  * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1571  * Note: A valid DevId will be inserted during pci_e1000_realize().
1572  */
1573 static const uint16_t e1000_eeprom_template[64] = {
1574     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1575     0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1576     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1577     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1578     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1579     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1580     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1581     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1582 };
1583 
1584 /* PCI interface */
1585 
1586 static void
1587 e1000_mmio_setup(E1000State *d)
1588 {
1589     int i;
1590     const uint32_t excluded_regs[] = {
1591         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1592         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1593     };
1594 
1595     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1596                           "e1000-mmio", PNPMMIO_SIZE);
1597     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1598     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1599         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1600                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1601     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1602 }
1603 
1604 static void
1605 pci_e1000_uninit(PCIDevice *dev)
1606 {
1607     E1000State *d = E1000(dev);
1608 
1609     timer_free(d->autoneg_timer);
1610     timer_free(d->mit_timer);
1611     timer_free(d->flush_queue_timer);
1612     qemu_del_nic(d->nic);
1613 }
1614 
1615 static NetClientInfo net_e1000_info = {
1616     .type = NET_CLIENT_DRIVER_NIC,
1617     .size = sizeof(NICState),
1618     .can_receive = e1000_can_receive,
1619     .receive = e1000_receive,
1620     .receive_iov = e1000_receive_iov,
1621     .link_status_changed = e1000_set_link_status,
1622 };
1623 
1624 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1625                                 uint32_t val, int len)
1626 {
1627     E1000State *s = E1000(pci_dev);
1628 
1629     pci_default_write_config(pci_dev, address, val, len);
1630 
1631     if (range_covers_byte(address, len, PCI_COMMAND) &&
1632         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1633         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1634     }
1635 }
1636 
1637 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1638 {
1639     DeviceState *dev = DEVICE(pci_dev);
1640     E1000State *d = E1000(pci_dev);
1641     uint8_t *pci_conf;
1642     uint8_t *macaddr;
1643 
1644     pci_dev->config_write = e1000_write_config;
1645 
1646     pci_conf = pci_dev->config;
1647 
1648     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1649     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1650 
1651     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1652 
1653     e1000_mmio_setup(d);
1654 
1655     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1656 
1657     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1658 
1659     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1660     macaddr = d->conf.macaddr.a;
1661 
1662     e1000x_core_prepare_eeprom(d->eeprom_data,
1663                                e1000_eeprom_template,
1664                                sizeof(e1000_eeprom_template),
1665                                PCI_DEVICE_GET_CLASS(pci_dev)->device_id,
1666                                macaddr);
1667 
1668     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1669                           object_get_typename(OBJECT(d)), dev->id,
1670                           &dev->mem_reentrancy_guard, d);
1671 
1672     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1673 
1674     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1675     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1676     d->flush_queue_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
1677                                         e1000_flush_queue_timer, d);
1678 }
1679 
1680 static Property e1000_properties[] = {
1681     DEFINE_NIC_PROPERTIES(E1000State, conf),
1682     DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1683                     compat_flags, E1000_FLAG_MAC_BIT, true),
1684     DEFINE_PROP_BIT("migrate_tso_props", E1000State,
1685                     compat_flags, E1000_FLAG_TSO_BIT, true),
1686     DEFINE_PROP_BIT("init-vet", E1000State,
1687                     compat_flags, E1000_FLAG_VET_BIT, true),
1688     DEFINE_PROP_END_OF_LIST(),
1689 };
1690 
1691 typedef struct E1000Info {
1692     const char *name;
1693     uint16_t   device_id;
1694     uint8_t    revision;
1695     uint16_t   phy_id2;
1696 } E1000Info;
1697 
1698 static void e1000_class_init(ObjectClass *klass, void *data)
1699 {
1700     DeviceClass *dc = DEVICE_CLASS(klass);
1701     ResettableClass *rc = RESETTABLE_CLASS(klass);
1702     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1703     E1000BaseClass *e = E1000_CLASS(klass);
1704     const E1000Info *info = data;
1705 
1706     k->realize = pci_e1000_realize;
1707     k->exit = pci_e1000_uninit;
1708     k->romfile = "efi-e1000.rom";
1709     k->vendor_id = PCI_VENDOR_ID_INTEL;
1710     k->device_id = info->device_id;
1711     k->revision = info->revision;
1712     e->phy_id2 = info->phy_id2;
1713     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1714     rc->phases.hold = e1000_reset_hold;
1715     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1716     dc->desc = "Intel Gigabit Ethernet";
1717     dc->vmsd = &vmstate_e1000;
1718     device_class_set_props(dc, e1000_properties);
1719 }
1720 
1721 static void e1000_instance_init(Object *obj)
1722 {
1723     E1000State *n = E1000(obj);
1724     device_add_bootindex_property(obj, &n->conf.bootindex,
1725                                   "bootindex", "/ethernet-phy@0",
1726                                   DEVICE(n));
1727 }
1728 
1729 static const TypeInfo e1000_base_info = {
1730     .name          = TYPE_E1000_BASE,
1731     .parent        = TYPE_PCI_DEVICE,
1732     .instance_size = sizeof(E1000State),
1733     .instance_init = e1000_instance_init,
1734     .class_size    = sizeof(E1000BaseClass),
1735     .abstract      = true,
1736     .interfaces = (InterfaceInfo[]) {
1737         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1738         { },
1739     },
1740 };
1741 
1742 static const E1000Info e1000_devices[] = {
1743     {
1744         .name      = "e1000",
1745         .device_id = E1000_DEV_ID_82540EM,
1746         .revision  = 0x03,
1747         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1748     },
1749     {
1750         .name      = "e1000-82544gc",
1751         .device_id = E1000_DEV_ID_82544GC_COPPER,
1752         .revision  = 0x03,
1753         .phy_id2   = E1000_PHY_ID2_82544x,
1754     },
1755     {
1756         .name      = "e1000-82545em",
1757         .device_id = E1000_DEV_ID_82545EM_COPPER,
1758         .revision  = 0x03,
1759         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1760     },
1761 };
1762 
1763 static void e1000_register_types(void)
1764 {
1765     int i;
1766 
1767     type_register_static(&e1000_base_info);
1768     for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1769         const E1000Info *info = &e1000_devices[i];
1770         TypeInfo type_info = {};
1771 
1772         type_info.name = info->name;
1773         type_info.parent = TYPE_E1000_BASE;
1774         type_info.class_data = (void *)info;
1775         type_info.class_init = e1000_class_init;
1776 
1777         type_register(&type_info);
1778     }
1779 }
1780 
1781 type_init(e1000_register_types)
1782