xref: /openbmc/qemu/hw/net/e1000.c (revision 30b6852c)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2.1 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "qemu/osdep.h"
29 #include "hw/pci/pci.h"
30 #include "hw/qdev-properties.h"
31 #include "migration/vmstate.h"
32 #include "net/eth.h"
33 #include "net/net.h"
34 #include "net/checksum.h"
35 #include "sysemu/sysemu.h"
36 #include "sysemu/dma.h"
37 #include "qemu/iov.h"
38 #include "qemu/module.h"
39 #include "qemu/range.h"
40 
41 #include "e1000x_common.h"
42 #include "trace.h"
43 #include "qom/object.h"
44 
45 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
46 
47 /* #define E1000_DEBUG */
48 
49 #ifdef E1000_DEBUG
50 enum {
51     DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
52     DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
53     DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
54     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
55 };
56 #define DBGBIT(x)    (1<<DEBUG_##x)
57 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
58 
59 #define DBGOUT(what, fmt, ...) do { \
60     if (debugflags & DBGBIT(what)) \
61         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
62     } while (0)
63 #else
64 #define DBGOUT(what, fmt, ...) do {} while (0)
65 #endif
66 
67 #define IOPORT_SIZE       0x40
68 #define PNPMMIO_SIZE      0x20000
69 #define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
70 
71 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
72 
73 /*
74  * HW models:
75  *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
76  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
77  *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
78  *  Others never tested
79  */
80 
81 struct E1000State_st {
82     /*< private >*/
83     PCIDevice parent_obj;
84     /*< public >*/
85 
86     NICState *nic;
87     NICConf conf;
88     MemoryRegion mmio;
89     MemoryRegion io;
90 
91     uint32_t mac_reg[0x8000];
92     uint16_t phy_reg[0x20];
93     uint16_t eeprom_data[64];
94 
95     uint32_t rxbuf_size;
96     uint32_t rxbuf_min_shift;
97     struct e1000_tx {
98         unsigned char header[256];
99         unsigned char vlan_header[4];
100         /* Fields vlan and data must not be reordered or separated. */
101         unsigned char vlan[4];
102         unsigned char data[0x10000];
103         uint16_t size;
104         unsigned char vlan_needed;
105         unsigned char sum_needed;
106         bool cptse;
107         e1000x_txd_props props;
108         e1000x_txd_props tso_props;
109         uint16_t tso_frames;
110     } tx;
111 
112     struct {
113         uint32_t val_in;    /* shifted in from guest driver */
114         uint16_t bitnum_in;
115         uint16_t bitnum_out;
116         uint16_t reading;
117         uint32_t old_eecd;
118     } eecd_state;
119 
120     QEMUTimer *autoneg_timer;
121 
122     QEMUTimer *mit_timer;      /* Mitigation timer. */
123     bool mit_timer_on;         /* Mitigation timer is running. */
124     bool mit_irq_level;        /* Tracks interrupt pin level. */
125     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
126 
127     QEMUTimer *flush_queue_timer;
128 
129 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
130 #define E1000_FLAG_AUTONEG_BIT 0
131 #define E1000_FLAG_MIT_BIT 1
132 #define E1000_FLAG_MAC_BIT 2
133 #define E1000_FLAG_TSO_BIT 3
134 #define E1000_FLAG_VET_BIT 4
135 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
136 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
137 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
138 #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
139 #define E1000_FLAG_VET (1 << E1000_FLAG_VET_BIT)
140 
141     uint32_t compat_flags;
142     bool received_tx_tso;
143     bool use_tso_for_migration;
144     e1000x_txd_props mig_props;
145 };
146 typedef struct E1000State_st E1000State;
147 
148 #define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
149 
150 struct E1000BaseClass {
151     PCIDeviceClass parent_class;
152     uint16_t phy_id2;
153 };
154 typedef struct E1000BaseClass E1000BaseClass;
155 
156 #define TYPE_E1000_BASE "e1000-base"
157 
158 DECLARE_OBJ_CHECKERS(E1000State, E1000BaseClass,
159                      E1000, TYPE_E1000_BASE)
160 
161 
162 static void
163 e1000_link_up(E1000State *s)
164 {
165     e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg);
166 
167     /* E1000_STATUS_LU is tested by e1000_can_receive() */
168     qemu_flush_queued_packets(qemu_get_queue(s->nic));
169 }
170 
171 static void
172 e1000_autoneg_done(E1000State *s)
173 {
174     e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg);
175 
176     /* E1000_STATUS_LU is tested by e1000_can_receive() */
177     qemu_flush_queued_packets(qemu_get_queue(s->nic));
178 }
179 
180 static bool
181 have_autoneg(E1000State *s)
182 {
183     return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
184 }
185 
186 static void
187 set_phy_ctrl(E1000State *s, int index, uint16_t val)
188 {
189     /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
190     s->phy_reg[PHY_CTRL] = val & ~(0x3f |
191                                    MII_CR_RESET |
192                                    MII_CR_RESTART_AUTO_NEG);
193 
194     /*
195      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
196      * migrate during auto negotiation, after migration the link will be
197      * down.
198      */
199     if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
200         e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
201     }
202 }
203 
204 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
205     [PHY_CTRL] = set_phy_ctrl,
206 };
207 
208 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
209 
210 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
211 static const char phy_regcap[0x20] = {
212     [PHY_STATUS]      = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
213     [PHY_ID1]         = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
214     [PHY_CTRL]        = PHY_RW,    [PHY_1000T_CTRL]             = PHY_RW,
215     [PHY_LP_ABILITY]  = PHY_R,     [PHY_1000T_STATUS]           = PHY_R,
216     [PHY_AUTONEG_ADV] = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
217     [PHY_ID2]         = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
218     [PHY_AUTONEG_EXP] = PHY_R,
219 };
220 
221 /* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
222 static const uint16_t phy_reg_init[] = {
223     [PHY_CTRL]   = MII_CR_SPEED_SELECT_MSB |
224                    MII_CR_FULL_DUPLEX |
225                    MII_CR_AUTO_NEG_EN,
226 
227     [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
228                    MII_SR_LINK_STATUS |   /* link initially up */
229                    MII_SR_AUTONEG_CAPS |
230                    /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
231                    MII_SR_PREAMBLE_SUPPRESS |
232                    MII_SR_EXTENDED_STATUS |
233                    MII_SR_10T_HD_CAPS |
234                    MII_SR_10T_FD_CAPS |
235                    MII_SR_100X_HD_CAPS |
236                    MII_SR_100X_FD_CAPS,
237 
238     [PHY_ID1] = 0x141,
239     /* [PHY_ID2] configured per DevId, from e1000_reset() */
240     [PHY_AUTONEG_ADV] = 0xde1,
241     [PHY_LP_ABILITY] = 0x1e0,
242     [PHY_1000T_CTRL] = 0x0e00,
243     [PHY_1000T_STATUS] = 0x3c00,
244     [M88E1000_PHY_SPEC_CTRL] = 0x360,
245     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
246     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
247 };
248 
249 static const uint32_t mac_reg_init[] = {
250     [PBA]     = 0x00100030,
251     [LEDCTL]  = 0x602,
252     [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
253                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
254     [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
255                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
256                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
257                 E1000_STATUS_LU,
258     [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
259                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
260                 E1000_MANC_RMCP_EN,
261 };
262 
263 /* Helper function, *curr == 0 means the value is not set */
264 static inline void
265 mit_update_delay(uint32_t *curr, uint32_t value)
266 {
267     if (value && (*curr == 0 || value < *curr)) {
268         *curr = value;
269     }
270 }
271 
272 static void
273 set_interrupt_cause(E1000State *s, int index, uint32_t val)
274 {
275     PCIDevice *d = PCI_DEVICE(s);
276     uint32_t pending_ints;
277     uint32_t mit_delay;
278 
279     s->mac_reg[ICR] = val;
280 
281     /*
282      * Make sure ICR and ICS registers have the same value.
283      * The spec says that the ICS register is write-only.  However in practice,
284      * on real hardware ICS is readable, and for reads it has the same value as
285      * ICR (except that ICS does not have the clear on read behaviour of ICR).
286      *
287      * The VxWorks PRO/1000 driver uses this behaviour.
288      */
289     s->mac_reg[ICS] = val;
290 
291     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
292     if (!s->mit_irq_level && pending_ints) {
293         /*
294          * Here we detect a potential raising edge. We postpone raising the
295          * interrupt line if we are inside the mitigation delay window
296          * (s->mit_timer_on == 1).
297          * We provide a partial implementation of interrupt mitigation,
298          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
299          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
300          * RADV; relative timers based on TIDV and RDTR are not implemented.
301          */
302         if (s->mit_timer_on) {
303             return;
304         }
305         if (chkflag(MIT)) {
306             /* Compute the next mitigation delay according to pending
307              * interrupts and the current values of RADV (provided
308              * RDTR!=0), TADV and ITR.
309              * Then rearm the timer.
310              */
311             mit_delay = 0;
312             if (s->mit_ide &&
313                     (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
314                 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
315             }
316             if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
317                 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
318             }
319             mit_update_delay(&mit_delay, s->mac_reg[ITR]);
320 
321             /*
322              * According to e1000 SPEC, the Ethernet controller guarantees
323              * a maximum observable interrupt rate of 7813 interrupts/sec.
324              * Thus if mit_delay < 500 then the delay should be set to the
325              * minimum delay possible which is 500.
326              */
327             mit_delay = (mit_delay < 500) ? 500 : mit_delay;
328 
329             s->mit_timer_on = 1;
330             timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
331                       mit_delay * 256);
332             s->mit_ide = 0;
333         }
334     }
335 
336     s->mit_irq_level = (pending_ints != 0);
337     pci_set_irq(d, s->mit_irq_level);
338 }
339 
340 static void
341 e1000_mit_timer(void *opaque)
342 {
343     E1000State *s = opaque;
344 
345     s->mit_timer_on = 0;
346     /* Call set_interrupt_cause to update the irq level (if necessary). */
347     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
348 }
349 
350 static void
351 set_ics(E1000State *s, int index, uint32_t val)
352 {
353     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
354         s->mac_reg[IMS]);
355     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
356 }
357 
358 static void
359 e1000_autoneg_timer(void *opaque)
360 {
361     E1000State *s = opaque;
362     if (!qemu_get_queue(s->nic)->link_down) {
363         e1000_autoneg_done(s);
364         set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
365     }
366 }
367 
368 static bool e1000_vet_init_need(void *opaque)
369 {
370     E1000State *s = opaque;
371 
372     return chkflag(VET);
373 }
374 
375 static void e1000_reset(void *opaque)
376 {
377     E1000State *d = opaque;
378     E1000BaseClass *edc = E1000_GET_CLASS(d);
379     uint8_t *macaddr = d->conf.macaddr.a;
380 
381     timer_del(d->autoneg_timer);
382     timer_del(d->mit_timer);
383     timer_del(d->flush_queue_timer);
384     d->mit_timer_on = 0;
385     d->mit_irq_level = 0;
386     d->mit_ide = 0;
387     memset(d->phy_reg, 0, sizeof d->phy_reg);
388     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
389     d->phy_reg[PHY_ID2] = edc->phy_id2;
390     memset(d->mac_reg, 0, sizeof d->mac_reg);
391     memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
392     d->rxbuf_min_shift = 1;
393     memset(&d->tx, 0, sizeof d->tx);
394 
395     if (qemu_get_queue(d->nic)->link_down) {
396         e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg);
397     }
398 
399     e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
400 
401     if (e1000_vet_init_need(d)) {
402         d->mac_reg[VET] = ETH_P_VLAN;
403     }
404 }
405 
406 static void
407 set_ctrl(E1000State *s, int index, uint32_t val)
408 {
409     /* RST is self clearing */
410     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
411 }
412 
413 static void
414 e1000_flush_queue_timer(void *opaque)
415 {
416     E1000State *s = opaque;
417 
418     qemu_flush_queued_packets(qemu_get_queue(s->nic));
419 }
420 
421 static void
422 set_rx_control(E1000State *s, int index, uint32_t val)
423 {
424     s->mac_reg[RCTL] = val;
425     s->rxbuf_size = e1000x_rxbufsize(val);
426     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
427     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
428            s->mac_reg[RCTL]);
429     timer_mod(s->flush_queue_timer,
430               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
431 }
432 
433 static void
434 set_mdic(E1000State *s, int index, uint32_t val)
435 {
436     uint32_t data = val & E1000_MDIC_DATA_MASK;
437     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
438 
439     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
440         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
441     else if (val & E1000_MDIC_OP_READ) {
442         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
443         if (!(phy_regcap[addr] & PHY_R)) {
444             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
445             val |= E1000_MDIC_ERROR;
446         } else
447             val = (val ^ data) | s->phy_reg[addr];
448     } else if (val & E1000_MDIC_OP_WRITE) {
449         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
450         if (!(phy_regcap[addr] & PHY_W)) {
451             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
452             val |= E1000_MDIC_ERROR;
453         } else {
454             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
455                 phyreg_writeops[addr](s, index, data);
456             } else {
457                 s->phy_reg[addr] = data;
458             }
459         }
460     }
461     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
462 
463     if (val & E1000_MDIC_INT_EN) {
464         set_ics(s, 0, E1000_ICR_MDAC);
465     }
466 }
467 
468 static uint32_t
469 get_eecd(E1000State *s, int index)
470 {
471     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
472 
473     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
474            s->eecd_state.bitnum_out, s->eecd_state.reading);
475     if (!s->eecd_state.reading ||
476         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
477           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
478         ret |= E1000_EECD_DO;
479     return ret;
480 }
481 
482 static void
483 set_eecd(E1000State *s, int index, uint32_t val)
484 {
485     uint32_t oldval = s->eecd_state.old_eecd;
486 
487     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
488             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
489     if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
490         return;
491     }
492     if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
493         s->eecd_state.val_in = 0;
494         s->eecd_state.bitnum_in = 0;
495         s->eecd_state.bitnum_out = 0;
496         s->eecd_state.reading = 0;
497     }
498     if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
499         return;
500     }
501     if (!(E1000_EECD_SK & val)) {               /* falling edge */
502         s->eecd_state.bitnum_out++;
503         return;
504     }
505     s->eecd_state.val_in <<= 1;
506     if (val & E1000_EECD_DI)
507         s->eecd_state.val_in |= 1;
508     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
509         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
510         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
511             EEPROM_READ_OPCODE_MICROWIRE);
512     }
513     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
514            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
515            s->eecd_state.reading);
516 }
517 
518 static uint32_t
519 flash_eerd_read(E1000State *s, int x)
520 {
521     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
522 
523     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
524         return (s->mac_reg[EERD]);
525 
526     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
527         return (E1000_EEPROM_RW_REG_DONE | r);
528 
529     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
530            E1000_EEPROM_RW_REG_DONE | r);
531 }
532 
533 static void
534 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
535 {
536     uint32_t sum;
537 
538     if (cse && cse < n)
539         n = cse + 1;
540     if (sloc < n-1) {
541         sum = net_checksum_add(n-css, data+css);
542         stw_be_p(data + sloc, net_checksum_finish_nozero(sum));
543     }
544 }
545 
546 static inline void
547 inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
548 {
549     if (!memcmp(arr, bcast, sizeof bcast)) {
550         e1000x_inc_reg_if_not_full(s->mac_reg, BPTC);
551     } else if (arr[0] & 1) {
552         e1000x_inc_reg_if_not_full(s->mac_reg, MPTC);
553     }
554 }
555 
556 static void
557 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
558 {
559     static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
560                                     PTC1023, PTC1522 };
561 
562     NetClientState *nc = qemu_get_queue(s->nic);
563     if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
564         qemu_receive_packet(nc, buf, size);
565     } else {
566         qemu_send_packet(nc, buf, size);
567     }
568     inc_tx_bcast_or_mcast_count(s, buf);
569     e1000x_increase_size_stats(s->mac_reg, PTCregs, size);
570 }
571 
572 static void
573 xmit_seg(E1000State *s)
574 {
575     uint16_t len;
576     unsigned int frames = s->tx.tso_frames, css, sofar;
577     struct e1000_tx *tp = &s->tx;
578     struct e1000x_txd_props *props = tp->cptse ? &tp->tso_props : &tp->props;
579 
580     if (tp->cptse) {
581         css = props->ipcss;
582         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
583                frames, tp->size, css);
584         if (props->ip) {    /* IPv4 */
585             stw_be_p(tp->data+css+2, tp->size - css);
586             stw_be_p(tp->data+css+4,
587                      lduw_be_p(tp->data + css + 4) + frames);
588         } else {         /* IPv6 */
589             stw_be_p(tp->data+css+4, tp->size - css);
590         }
591         css = props->tucss;
592         len = tp->size - css;
593         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", props->tcp, css, len);
594         if (props->tcp) {
595             sofar = frames * props->mss;
596             stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
597             if (props->paylen - sofar > props->mss) {
598                 tp->data[css + 13] &= ~9;    /* PSH, FIN */
599             } else if (frames) {
600                 e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
601             }
602         } else {    /* UDP */
603             stw_be_p(tp->data+css+4, len);
604         }
605         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
606             unsigned int phsum;
607             // add pseudo-header length before checksum calculation
608             void *sp = tp->data + props->tucso;
609 
610             phsum = lduw_be_p(sp) + len;
611             phsum = (phsum >> 16) + (phsum & 0xffff);
612             stw_be_p(sp, phsum);
613         }
614         tp->tso_frames++;
615     }
616 
617     if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
618         putsum(tp->data, tp->size, props->tucso, props->tucss, props->tucse);
619     }
620     if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
621         putsum(tp->data, tp->size, props->ipcso, props->ipcss, props->ipcse);
622     }
623     if (tp->vlan_needed) {
624         memmove(tp->vlan, tp->data, 4);
625         memmove(tp->data, tp->data + 4, 8);
626         memcpy(tp->data + 8, tp->vlan_header, 4);
627         e1000_send_packet(s, tp->vlan, tp->size + 4);
628     } else {
629         e1000_send_packet(s, tp->data, tp->size);
630     }
631 
632     e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
633     e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size);
634     s->mac_reg[GPTC] = s->mac_reg[TPT];
635     s->mac_reg[GOTCL] = s->mac_reg[TOTL];
636     s->mac_reg[GOTCH] = s->mac_reg[TOTH];
637 }
638 
639 static void
640 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
641 {
642     PCIDevice *d = PCI_DEVICE(s);
643     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
644     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
645     unsigned int split_size = txd_lower & 0xffff, bytes, sz;
646     unsigned int msh = 0xfffff;
647     uint64_t addr;
648     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
649     struct e1000_tx *tp = &s->tx;
650 
651     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
652     if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
653         if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
654             e1000x_read_tx_ctx_descr(xp, &tp->tso_props);
655             s->use_tso_for_migration = 1;
656             tp->tso_frames = 0;
657         } else {
658             e1000x_read_tx_ctx_descr(xp, &tp->props);
659             s->use_tso_for_migration = 0;
660         }
661         return;
662     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
663         // data descriptor
664         if (tp->size == 0) {
665             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
666         }
667         tp->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
668     } else {
669         // legacy descriptor
670         tp->cptse = 0;
671     }
672 
673     if (e1000x_vlan_enabled(s->mac_reg) &&
674         e1000x_is_vlan_txd(txd_lower) &&
675         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
676         tp->vlan_needed = 1;
677         stw_be_p(tp->vlan_header,
678                       le16_to_cpu(s->mac_reg[VET]));
679         stw_be_p(tp->vlan_header + 2,
680                       le16_to_cpu(dp->upper.fields.special));
681     }
682 
683     addr = le64_to_cpu(dp->buffer_addr);
684     if (tp->cptse) {
685         msh = tp->tso_props.hdr_len + tp->tso_props.mss;
686         do {
687             bytes = split_size;
688             if (tp->size >= msh) {
689                 goto eop;
690             }
691             if (tp->size + bytes > msh)
692                 bytes = msh - tp->size;
693 
694             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
695             pci_dma_read(d, addr, tp->data + tp->size, bytes);
696             sz = tp->size + bytes;
697             if (sz >= tp->tso_props.hdr_len
698                 && tp->size < tp->tso_props.hdr_len) {
699                 memmove(tp->header, tp->data, tp->tso_props.hdr_len);
700             }
701             tp->size = sz;
702             addr += bytes;
703             if (sz == msh) {
704                 xmit_seg(s);
705                 memmove(tp->data, tp->header, tp->tso_props.hdr_len);
706                 tp->size = tp->tso_props.hdr_len;
707             }
708             split_size -= bytes;
709         } while (bytes && split_size);
710     } else {
711         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
712         pci_dma_read(d, addr, tp->data + tp->size, split_size);
713         tp->size += split_size;
714     }
715 
716 eop:
717     if (!(txd_lower & E1000_TXD_CMD_EOP))
718         return;
719     if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) {
720         xmit_seg(s);
721     }
722     tp->tso_frames = 0;
723     tp->sum_needed = 0;
724     tp->vlan_needed = 0;
725     tp->size = 0;
726     tp->cptse = 0;
727 }
728 
729 static uint32_t
730 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
731 {
732     PCIDevice *d = PCI_DEVICE(s);
733     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
734 
735     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
736         return 0;
737     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
738                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
739     dp->upper.data = cpu_to_le32(txd_upper);
740     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
741                   &dp->upper, sizeof(dp->upper));
742     return E1000_ICR_TXDW;
743 }
744 
745 static uint64_t tx_desc_base(E1000State *s)
746 {
747     uint64_t bah = s->mac_reg[TDBAH];
748     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
749 
750     return (bah << 32) + bal;
751 }
752 
753 static void
754 start_xmit(E1000State *s)
755 {
756     PCIDevice *d = PCI_DEVICE(s);
757     dma_addr_t base;
758     struct e1000_tx_desc desc;
759     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
760 
761     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
762         DBGOUT(TX, "tx disabled\n");
763         return;
764     }
765 
766     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
767         base = tx_desc_base(s) +
768                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
769         pci_dma_read(d, base, &desc, sizeof(desc));
770 
771         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
772                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
773                desc.upper.data);
774 
775         process_tx_desc(s, &desc);
776         cause |= txdesc_writeback(s, base, &desc);
777 
778         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
779             s->mac_reg[TDH] = 0;
780         /*
781          * the following could happen only if guest sw assigns
782          * bogus values to TDT/TDLEN.
783          * there's nothing too intelligent we could do about this.
784          */
785         if (s->mac_reg[TDH] == tdh_start ||
786             tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
787             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
788                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
789             break;
790         }
791     }
792     set_ics(s, 0, cause);
793 }
794 
795 static int
796 receive_filter(E1000State *s, const uint8_t *buf, int size)
797 {
798     uint32_t rctl = s->mac_reg[RCTL];
799     int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1);
800 
801     if (e1000x_is_vlan_packet(buf, le16_to_cpu(s->mac_reg[VET])) &&
802         e1000x_vlan_rx_filter_enabled(s->mac_reg)) {
803         uint16_t vid = lduw_be_p(buf + 14);
804         uint32_t vfta = ldl_le_p((uint32_t*)(s->mac_reg + VFTA) +
805                                  ((vid >> 5) & 0x7f));
806         if ((vfta & (1 << (vid & 0x1f))) == 0)
807             return 0;
808     }
809 
810     if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
811         return 1;
812     }
813 
814     if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
815         e1000x_inc_reg_if_not_full(s->mac_reg, MPRC);
816         return 1;
817     }
818 
819     if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
820         e1000x_inc_reg_if_not_full(s->mac_reg, BPRC);
821         return 1;
822     }
823 
824     return e1000x_rx_group_filter(s->mac_reg, buf);
825 }
826 
827 static void
828 e1000_set_link_status(NetClientState *nc)
829 {
830     E1000State *s = qemu_get_nic_opaque(nc);
831     uint32_t old_status = s->mac_reg[STATUS];
832 
833     if (nc->link_down) {
834         e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg);
835     } else {
836         if (have_autoneg(s) &&
837             !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
838             e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
839         } else {
840             e1000_link_up(s);
841         }
842     }
843 
844     if (s->mac_reg[STATUS] != old_status)
845         set_ics(s, 0, E1000_ICR_LSC);
846 }
847 
848 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
849 {
850     int bufs;
851     /* Fast-path short packets */
852     if (total_size <= s->rxbuf_size) {
853         return s->mac_reg[RDH] != s->mac_reg[RDT];
854     }
855     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
856         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
857     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
858         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
859             s->mac_reg[RDT] - s->mac_reg[RDH];
860     } else {
861         return false;
862     }
863     return total_size <= bufs * s->rxbuf_size;
864 }
865 
866 static bool
867 e1000_can_receive(NetClientState *nc)
868 {
869     E1000State *s = qemu_get_nic_opaque(nc);
870 
871     return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
872         e1000_has_rxbufs(s, 1) && !timer_pending(s->flush_queue_timer);
873 }
874 
875 static uint64_t rx_desc_base(E1000State *s)
876 {
877     uint64_t bah = s->mac_reg[RDBAH];
878     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
879 
880     return (bah << 32) + bal;
881 }
882 
883 static void
884 e1000_receiver_overrun(E1000State *s, size_t size)
885 {
886     trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
887     e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
888     e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
889     set_ics(s, 0, E1000_ICS_RXO);
890 }
891 
892 static ssize_t
893 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
894 {
895     E1000State *s = qemu_get_nic_opaque(nc);
896     PCIDevice *d = PCI_DEVICE(s);
897     struct e1000_rx_desc desc;
898     dma_addr_t base;
899     unsigned int n, rdt;
900     uint32_t rdh_start;
901     uint16_t vlan_special = 0;
902     uint8_t vlan_status = 0;
903     uint8_t min_buf[MIN_BUF_SIZE];
904     struct iovec min_iov;
905     uint8_t *filter_buf = iov->iov_base;
906     size_t size = iov_size(iov, iovcnt);
907     size_t iov_ofs = 0;
908     size_t desc_offset;
909     size_t desc_size;
910     size_t total_size;
911 
912     if (!e1000x_hw_rx_enabled(s->mac_reg)) {
913         return -1;
914     }
915 
916     if (timer_pending(s->flush_queue_timer)) {
917         return 0;
918     }
919 
920     /* Pad to minimum Ethernet frame length */
921     if (size < sizeof(min_buf)) {
922         iov_to_buf(iov, iovcnt, 0, min_buf, size);
923         memset(&min_buf[size], 0, sizeof(min_buf) - size);
924         min_iov.iov_base = filter_buf = min_buf;
925         min_iov.iov_len = size = sizeof(min_buf);
926         iovcnt = 1;
927         iov = &min_iov;
928     } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
929         /* This is very unlikely, but may happen. */
930         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
931         filter_buf = min_buf;
932     }
933 
934     /* Discard oversized packets if !LPE and !SBP. */
935     if (e1000x_is_oversized(s->mac_reg, size)) {
936         return size;
937     }
938 
939     if (!receive_filter(s, filter_buf, size)) {
940         return size;
941     }
942 
943     if (e1000x_vlan_enabled(s->mac_reg) &&
944         e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) {
945         vlan_special = cpu_to_le16(lduw_be_p(filter_buf + 14));
946         iov_ofs = 4;
947         if (filter_buf == iov->iov_base) {
948             memmove(filter_buf + 4, filter_buf, 12);
949         } else {
950             iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
951             while (iov->iov_len <= iov_ofs) {
952                 iov_ofs -= iov->iov_len;
953                 iov++;
954             }
955         }
956         vlan_status = E1000_RXD_STAT_VP;
957         size -= 4;
958     }
959 
960     rdh_start = s->mac_reg[RDH];
961     desc_offset = 0;
962     total_size = size + e1000x_fcs_len(s->mac_reg);
963     if (!e1000_has_rxbufs(s, total_size)) {
964         e1000_receiver_overrun(s, total_size);
965         return -1;
966     }
967     do {
968         desc_size = total_size - desc_offset;
969         if (desc_size > s->rxbuf_size) {
970             desc_size = s->rxbuf_size;
971         }
972         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
973         pci_dma_read(d, base, &desc, sizeof(desc));
974         desc.special = vlan_special;
975         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
976         if (desc.buffer_addr) {
977             if (desc_offset < size) {
978                 size_t iov_copy;
979                 hwaddr ba = le64_to_cpu(desc.buffer_addr);
980                 size_t copy_size = size - desc_offset;
981                 if (copy_size > s->rxbuf_size) {
982                     copy_size = s->rxbuf_size;
983                 }
984                 do {
985                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
986                     pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
987                     copy_size -= iov_copy;
988                     ba += iov_copy;
989                     iov_ofs += iov_copy;
990                     if (iov_ofs == iov->iov_len) {
991                         iov++;
992                         iov_ofs = 0;
993                     }
994                 } while (copy_size);
995             }
996             desc_offset += desc_size;
997             desc.length = cpu_to_le16(desc_size);
998             if (desc_offset >= total_size) {
999                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1000             } else {
1001                 /* Guest zeroing out status is not a hardware requirement.
1002                    Clear EOP in case guest didn't do it. */
1003                 desc.status &= ~E1000_RXD_STAT_EOP;
1004             }
1005         } else { // as per intel docs; skip descriptors with null buf addr
1006             DBGOUT(RX, "Null RX descriptor!!\n");
1007         }
1008         pci_dma_write(d, base, &desc, sizeof(desc));
1009 
1010         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1011             s->mac_reg[RDH] = 0;
1012         /* see comment in start_xmit; same here */
1013         if (s->mac_reg[RDH] == rdh_start ||
1014             rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
1015             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1016                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1017             e1000_receiver_overrun(s, total_size);
1018             return -1;
1019         }
1020     } while (desc_offset < total_size);
1021 
1022     e1000x_update_rx_total_stats(s->mac_reg, size, total_size);
1023 
1024     n = E1000_ICS_RXT0;
1025     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1026         rdt += s->mac_reg[RDLEN] / sizeof(desc);
1027     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1028         s->rxbuf_min_shift)
1029         n |= E1000_ICS_RXDMT0;
1030 
1031     set_ics(s, 0, n);
1032 
1033     return size;
1034 }
1035 
1036 static ssize_t
1037 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1038 {
1039     const struct iovec iov = {
1040         .iov_base = (uint8_t *)buf,
1041         .iov_len = size
1042     };
1043 
1044     return e1000_receive_iov(nc, &iov, 1);
1045 }
1046 
1047 static uint32_t
1048 mac_readreg(E1000State *s, int index)
1049 {
1050     return s->mac_reg[index];
1051 }
1052 
1053 static uint32_t
1054 mac_low4_read(E1000State *s, int index)
1055 {
1056     return s->mac_reg[index] & 0xf;
1057 }
1058 
1059 static uint32_t
1060 mac_low11_read(E1000State *s, int index)
1061 {
1062     return s->mac_reg[index] & 0x7ff;
1063 }
1064 
1065 static uint32_t
1066 mac_low13_read(E1000State *s, int index)
1067 {
1068     return s->mac_reg[index] & 0x1fff;
1069 }
1070 
1071 static uint32_t
1072 mac_low16_read(E1000State *s, int index)
1073 {
1074     return s->mac_reg[index] & 0xffff;
1075 }
1076 
1077 static uint32_t
1078 mac_icr_read(E1000State *s, int index)
1079 {
1080     uint32_t ret = s->mac_reg[ICR];
1081 
1082     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1083     set_interrupt_cause(s, 0, 0);
1084     return ret;
1085 }
1086 
1087 static uint32_t
1088 mac_read_clr4(E1000State *s, int index)
1089 {
1090     uint32_t ret = s->mac_reg[index];
1091 
1092     s->mac_reg[index] = 0;
1093     return ret;
1094 }
1095 
1096 static uint32_t
1097 mac_read_clr8(E1000State *s, int index)
1098 {
1099     uint32_t ret = s->mac_reg[index];
1100 
1101     s->mac_reg[index] = 0;
1102     s->mac_reg[index-1] = 0;
1103     return ret;
1104 }
1105 
1106 static void
1107 mac_writereg(E1000State *s, int index, uint32_t val)
1108 {
1109     uint32_t macaddr[2];
1110 
1111     s->mac_reg[index] = val;
1112 
1113     if (index == RA + 1) {
1114         macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1115         macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1116         qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1117     }
1118 }
1119 
1120 static void
1121 set_rdt(E1000State *s, int index, uint32_t val)
1122 {
1123     s->mac_reg[index] = val & 0xffff;
1124     if (e1000_has_rxbufs(s, 1)) {
1125         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1126     }
1127 }
1128 
1129 static void
1130 set_16bit(E1000State *s, int index, uint32_t val)
1131 {
1132     s->mac_reg[index] = val & 0xffff;
1133 }
1134 
1135 static void
1136 set_dlen(E1000State *s, int index, uint32_t val)
1137 {
1138     s->mac_reg[index] = val & 0xfff80;
1139 }
1140 
1141 static void
1142 set_tctl(E1000State *s, int index, uint32_t val)
1143 {
1144     s->mac_reg[index] = val;
1145     s->mac_reg[TDT] &= 0xffff;
1146     start_xmit(s);
1147 }
1148 
1149 static void
1150 set_icr(E1000State *s, int index, uint32_t val)
1151 {
1152     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1153     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1154 }
1155 
1156 static void
1157 set_imc(E1000State *s, int index, uint32_t val)
1158 {
1159     s->mac_reg[IMS] &= ~val;
1160     set_ics(s, 0, 0);
1161 }
1162 
1163 static void
1164 set_ims(E1000State *s, int index, uint32_t val)
1165 {
1166     s->mac_reg[IMS] |= val;
1167     set_ics(s, 0, 0);
1168 }
1169 
1170 #define getreg(x)    [x] = mac_readreg
1171 typedef uint32_t (*readops)(E1000State *, int);
1172 static const readops macreg_readops[] = {
1173     getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1174     getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1175     getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1176     getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1177     getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1178     getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1179     getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1180     getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1181     getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1182     getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1183     getreg(TNCRS),    getreg(SEQEC),    getreg(CEXTERR),  getreg(RLEC),
1184     getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1185     getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1186     getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1187     getreg(GOTCL),
1188 
1189     [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1190     [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1191     [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1192     [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1193     [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1194     [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1195     [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1196     [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1197     [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1198     [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1199     [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1200     [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1201     [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1202     [MPTC]    = mac_read_clr4,
1203     [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1204     [EERD]    = flash_eerd_read,
1205     [RDFH]    = mac_low13_read,     [RDFT]    = mac_low13_read,
1206     [RDFHS]   = mac_low13_read,     [RDFTS]   = mac_low13_read,
1207     [RDFPC]   = mac_low13_read,
1208     [TDFH]    = mac_low11_read,     [TDFT]    = mac_low11_read,
1209     [TDFHS]   = mac_low13_read,     [TDFTS]   = mac_low13_read,
1210     [TDFPC]   = mac_low13_read,
1211     [AIT]     = mac_low16_read,
1212 
1213     [CRCERRS ... MPC]   = &mac_readreg,
1214     [IP6AT ... IP6AT+3] = &mac_readreg,    [IP4AT ... IP4AT+6] = &mac_readreg,
1215     [FFLT ... FFLT+6]   = &mac_low11_read,
1216     [RA ... RA+31]      = &mac_readreg,
1217     [WUPM ... WUPM+31]  = &mac_readreg,
1218     [MTA ... MTA+127]   = &mac_readreg,
1219     [VFTA ... VFTA+127] = &mac_readreg,
1220     [FFMT ... FFMT+254] = &mac_low4_read,
1221     [FFVT ... FFVT+254] = &mac_readreg,
1222     [PBM ... PBM+16383] = &mac_readreg,
1223 };
1224 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1225 
1226 #define putreg(x)    [x] = mac_writereg
1227 typedef void (*writeops)(E1000State *, int, uint32_t);
1228 static const writeops macreg_writeops[] = {
1229     putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1230     putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1231     putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1232     putreg(TDFH),     putreg(TDFT),     putreg(TDFHS),    putreg(TDFTS),
1233     putreg(TDFPC),    putreg(RDFH),     putreg(RDFT),     putreg(RDFHS),
1234     putreg(RDFTS),    putreg(RDFPC),    putreg(IPAV),     putreg(WUC),
1235     putreg(WUS),      putreg(AIT),
1236 
1237     [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL] = set_tctl,
1238     [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]  = set_ics,
1239     [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]  = set_rdt,
1240     [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]  = set_icr,
1241     [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL] = set_ctrl,
1242     [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV] = set_16bit,
1243     [ITR]    = set_16bit,
1244 
1245     [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg,
1246     [FFLT ... FFLT+6]   = &mac_writereg,
1247     [RA ... RA+31]      = &mac_writereg,
1248     [WUPM ... WUPM+31]  = &mac_writereg,
1249     [MTA ... MTA+127]   = &mac_writereg,
1250     [VFTA ... VFTA+127] = &mac_writereg,
1251     [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg,
1252     [PBM ... PBM+16383] = &mac_writereg,
1253 };
1254 
1255 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1256 
1257 enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1258 
1259 #define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1260 /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1261  * f - flag bits (up to 6 possible flags)
1262  * n - flag needed
1263  * p - partially implenented */
1264 static const uint8_t mac_reg_access[0x8000] = {
1265     [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1266     [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1267 
1268     [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1269     [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1270     [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1271     [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1272     [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1273     [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1274     [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1275     [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1276     [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1277     [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1278     [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1279     [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1280     [SEQEC]   = markflag(MAC),    [CEXTERR] = markflag(MAC),
1281     [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1282     [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1283     [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1284     [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1285     [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1286     [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1287     [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1288     [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1289     [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1290     [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1291     [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1292     [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1293     [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1294     [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1295     [BPTC]    = markflag(MAC),
1296 
1297     [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1298     [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1299     [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1300     [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1301     [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1302     [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1303     [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1304     [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1305     [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1306     [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1307     [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1308 };
1309 
1310 static void
1311 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1312                  unsigned size)
1313 {
1314     E1000State *s = opaque;
1315     unsigned int index = (addr & 0x1ffff) >> 2;
1316 
1317     if (index < NWRITEOPS && macreg_writeops[index]) {
1318         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1319             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1320             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1321                 DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1322                        "It is not fully implemented.\n", index<<2);
1323             }
1324             macreg_writeops[index](s, index, val);
1325         } else {    /* "flag needed" bit is set, but the flag is not active */
1326             DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1327                    index<<2);
1328         }
1329     } else if (index < NREADOPS && macreg_readops[index]) {
1330         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1331                index<<2, val);
1332     } else {
1333         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1334                index<<2, val);
1335     }
1336 }
1337 
1338 static uint64_t
1339 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1340 {
1341     E1000State *s = opaque;
1342     unsigned int index = (addr & 0x1ffff) >> 2;
1343 
1344     if (index < NREADOPS && macreg_readops[index]) {
1345         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1346             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1347             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1348                 DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1349                        "It is not fully implemented.\n", index<<2);
1350             }
1351             return macreg_readops[index](s, index);
1352         } else {    /* "flag needed" bit is set, but the flag is not active */
1353             DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1354                    index<<2);
1355         }
1356     } else {
1357         DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1358     }
1359     return 0;
1360 }
1361 
1362 static const MemoryRegionOps e1000_mmio_ops = {
1363     .read = e1000_mmio_read,
1364     .write = e1000_mmio_write,
1365     .endianness = DEVICE_LITTLE_ENDIAN,
1366     .impl = {
1367         .min_access_size = 4,
1368         .max_access_size = 4,
1369     },
1370 };
1371 
1372 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1373                               unsigned size)
1374 {
1375     E1000State *s = opaque;
1376 
1377     (void)s;
1378     return 0;
1379 }
1380 
1381 static void e1000_io_write(void *opaque, hwaddr addr,
1382                            uint64_t val, unsigned size)
1383 {
1384     E1000State *s = opaque;
1385 
1386     (void)s;
1387 }
1388 
1389 static const MemoryRegionOps e1000_io_ops = {
1390     .read = e1000_io_read,
1391     .write = e1000_io_write,
1392     .endianness = DEVICE_LITTLE_ENDIAN,
1393 };
1394 
1395 static bool is_version_1(void *opaque, int version_id)
1396 {
1397     return version_id == 1;
1398 }
1399 
1400 static int e1000_pre_save(void *opaque)
1401 {
1402     E1000State *s = opaque;
1403     NetClientState *nc = qemu_get_queue(s->nic);
1404 
1405     /*
1406      * If link is down and auto-negotiation is supported and ongoing,
1407      * complete auto-negotiation immediately. This allows us to look
1408      * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1409      */
1410     if (nc->link_down && have_autoneg(s)) {
1411         s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1412     }
1413 
1414     /* Decide which set of props to migrate in the main structure */
1415     if (chkflag(TSO) || !s->use_tso_for_migration) {
1416         /* Either we're migrating with the extra subsection, in which
1417          * case the mig_props is always 'props' OR
1418          * we've not got the subsection, but 'props' was the last
1419          * updated.
1420          */
1421         s->mig_props = s->tx.props;
1422     } else {
1423         /* We're not using the subsection, and 'tso_props' was
1424          * the last updated.
1425          */
1426         s->mig_props = s->tx.tso_props;
1427     }
1428     return 0;
1429 }
1430 
1431 static int e1000_post_load(void *opaque, int version_id)
1432 {
1433     E1000State *s = opaque;
1434     NetClientState *nc = qemu_get_queue(s->nic);
1435 
1436     if (!chkflag(MIT)) {
1437         s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1438             s->mac_reg[TADV] = 0;
1439         s->mit_irq_level = false;
1440     }
1441     s->mit_ide = 0;
1442     s->mit_timer_on = true;
1443     timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1);
1444 
1445     /* nc.link_down can't be migrated, so infer link_down according
1446      * to link status bit in mac_reg[STATUS].
1447      * Alternatively, restart link negotiation if it was in progress. */
1448     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1449 
1450     if (have_autoneg(s) &&
1451         !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1452         nc->link_down = false;
1453         timer_mod(s->autoneg_timer,
1454                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1455     }
1456 
1457     s->tx.props = s->mig_props;
1458     if (!s->received_tx_tso) {
1459         /* We received only one set of offload data (tx.props)
1460          * and haven't got tx.tso_props.  The best we can do
1461          * is dupe the data.
1462          */
1463         s->tx.tso_props = s->mig_props;
1464     }
1465     return 0;
1466 }
1467 
1468 static int e1000_tx_tso_post_load(void *opaque, int version_id)
1469 {
1470     E1000State *s = opaque;
1471     s->received_tx_tso = true;
1472     return 0;
1473 }
1474 
1475 static bool e1000_mit_state_needed(void *opaque)
1476 {
1477     E1000State *s = opaque;
1478 
1479     return chkflag(MIT);
1480 }
1481 
1482 static bool e1000_full_mac_needed(void *opaque)
1483 {
1484     E1000State *s = opaque;
1485 
1486     return chkflag(MAC);
1487 }
1488 
1489 static bool e1000_tso_state_needed(void *opaque)
1490 {
1491     E1000State *s = opaque;
1492 
1493     return chkflag(TSO);
1494 }
1495 
1496 static const VMStateDescription vmstate_e1000_mit_state = {
1497     .name = "e1000/mit_state",
1498     .version_id = 1,
1499     .minimum_version_id = 1,
1500     .needed = e1000_mit_state_needed,
1501     .fields = (VMStateField[]) {
1502         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1503         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1504         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1505         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1506         VMSTATE_BOOL(mit_irq_level, E1000State),
1507         VMSTATE_END_OF_LIST()
1508     }
1509 };
1510 
1511 static const VMStateDescription vmstate_e1000_full_mac_state = {
1512     .name = "e1000/full_mac_state",
1513     .version_id = 1,
1514     .minimum_version_id = 1,
1515     .needed = e1000_full_mac_needed,
1516     .fields = (VMStateField[]) {
1517         VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1518         VMSTATE_END_OF_LIST()
1519     }
1520 };
1521 
1522 static const VMStateDescription vmstate_e1000_tx_tso_state = {
1523     .name = "e1000/tx_tso_state",
1524     .version_id = 1,
1525     .minimum_version_id = 1,
1526     .needed = e1000_tso_state_needed,
1527     .post_load = e1000_tx_tso_post_load,
1528     .fields = (VMStateField[]) {
1529         VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
1530         VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
1531         VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
1532         VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
1533         VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
1534         VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
1535         VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
1536         VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
1537         VMSTATE_UINT16(tx.tso_props.mss, E1000State),
1538         VMSTATE_INT8(tx.tso_props.ip, E1000State),
1539         VMSTATE_INT8(tx.tso_props.tcp, E1000State),
1540         VMSTATE_END_OF_LIST()
1541     }
1542 };
1543 
1544 static const VMStateDescription vmstate_e1000 = {
1545     .name = "e1000",
1546     .version_id = 2,
1547     .minimum_version_id = 1,
1548     .pre_save = e1000_pre_save,
1549     .post_load = e1000_post_load,
1550     .fields = (VMStateField[]) {
1551         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1552         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1553         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1554         VMSTATE_UINT32(rxbuf_size, E1000State),
1555         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1556         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1557         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1558         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1559         VMSTATE_UINT16(eecd_state.reading, E1000State),
1560         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1561         VMSTATE_UINT8(mig_props.ipcss, E1000State),
1562         VMSTATE_UINT8(mig_props.ipcso, E1000State),
1563         VMSTATE_UINT16(mig_props.ipcse, E1000State),
1564         VMSTATE_UINT8(mig_props.tucss, E1000State),
1565         VMSTATE_UINT8(mig_props.tucso, E1000State),
1566         VMSTATE_UINT16(mig_props.tucse, E1000State),
1567         VMSTATE_UINT32(mig_props.paylen, E1000State),
1568         VMSTATE_UINT8(mig_props.hdr_len, E1000State),
1569         VMSTATE_UINT16(mig_props.mss, E1000State),
1570         VMSTATE_UINT16(tx.size, E1000State),
1571         VMSTATE_UINT16(tx.tso_frames, E1000State),
1572         VMSTATE_UINT8(tx.sum_needed, E1000State),
1573         VMSTATE_INT8(mig_props.ip, E1000State),
1574         VMSTATE_INT8(mig_props.tcp, E1000State),
1575         VMSTATE_BUFFER(tx.header, E1000State),
1576         VMSTATE_BUFFER(tx.data, E1000State),
1577         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1578         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1579         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1580         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1581         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1582         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1583         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1584         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1585         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1586         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1587         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1588         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1589         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1590         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1591         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1592         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1593         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1594         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1595         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1596         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1597         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1598         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1599         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1600         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1601         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1602         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1603         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1604         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1605         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1606         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1607         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1608         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1609         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1610         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1611         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1612         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1613         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1614         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1615         VMSTATE_UINT32(mac_reg[VET], E1000State),
1616         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1617         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1618         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1619         VMSTATE_END_OF_LIST()
1620     },
1621     .subsections = (const VMStateDescription*[]) {
1622         &vmstate_e1000_mit_state,
1623         &vmstate_e1000_full_mac_state,
1624         &vmstate_e1000_tx_tso_state,
1625         NULL
1626     }
1627 };
1628 
1629 /*
1630  * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1631  * Note: A valid DevId will be inserted during pci_e1000_realize().
1632  */
1633 static const uint16_t e1000_eeprom_template[64] = {
1634     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1635     0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1636     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1637     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1638     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1639     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1640     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1641     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1642 };
1643 
1644 /* PCI interface */
1645 
1646 static void
1647 e1000_mmio_setup(E1000State *d)
1648 {
1649     int i;
1650     const uint32_t excluded_regs[] = {
1651         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1652         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1653     };
1654 
1655     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1656                           "e1000-mmio", PNPMMIO_SIZE);
1657     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1658     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1659         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1660                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1661     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1662 }
1663 
1664 static void
1665 pci_e1000_uninit(PCIDevice *dev)
1666 {
1667     E1000State *d = E1000(dev);
1668 
1669     timer_free(d->autoneg_timer);
1670     timer_free(d->mit_timer);
1671     timer_free(d->flush_queue_timer);
1672     qemu_del_nic(d->nic);
1673 }
1674 
1675 static NetClientInfo net_e1000_info = {
1676     .type = NET_CLIENT_DRIVER_NIC,
1677     .size = sizeof(NICState),
1678     .can_receive = e1000_can_receive,
1679     .receive = e1000_receive,
1680     .receive_iov = e1000_receive_iov,
1681     .link_status_changed = e1000_set_link_status,
1682 };
1683 
1684 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1685                                 uint32_t val, int len)
1686 {
1687     E1000State *s = E1000(pci_dev);
1688 
1689     pci_default_write_config(pci_dev, address, val, len);
1690 
1691     if (range_covers_byte(address, len, PCI_COMMAND) &&
1692         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1693         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1694     }
1695 }
1696 
1697 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1698 {
1699     DeviceState *dev = DEVICE(pci_dev);
1700     E1000State *d = E1000(pci_dev);
1701     uint8_t *pci_conf;
1702     uint8_t *macaddr;
1703 
1704     pci_dev->config_write = e1000_write_config;
1705 
1706     pci_conf = pci_dev->config;
1707 
1708     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1709     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1710 
1711     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1712 
1713     e1000_mmio_setup(d);
1714 
1715     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1716 
1717     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1718 
1719     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1720     macaddr = d->conf.macaddr.a;
1721 
1722     e1000x_core_prepare_eeprom(d->eeprom_data,
1723                                e1000_eeprom_template,
1724                                sizeof(e1000_eeprom_template),
1725                                PCI_DEVICE_GET_CLASS(pci_dev)->device_id,
1726                                macaddr);
1727 
1728     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1729                           object_get_typename(OBJECT(d)), dev->id, d);
1730 
1731     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1732 
1733     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1734     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1735     d->flush_queue_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
1736                                         e1000_flush_queue_timer, d);
1737 }
1738 
1739 static void qdev_e1000_reset(DeviceState *dev)
1740 {
1741     E1000State *d = E1000(dev);
1742     e1000_reset(d);
1743 }
1744 
1745 static Property e1000_properties[] = {
1746     DEFINE_NIC_PROPERTIES(E1000State, conf),
1747     DEFINE_PROP_BIT("autonegotiation", E1000State,
1748                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1749     DEFINE_PROP_BIT("mitigation", E1000State,
1750                     compat_flags, E1000_FLAG_MIT_BIT, true),
1751     DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1752                     compat_flags, E1000_FLAG_MAC_BIT, true),
1753     DEFINE_PROP_BIT("migrate_tso_props", E1000State,
1754                     compat_flags, E1000_FLAG_TSO_BIT, true),
1755     DEFINE_PROP_BIT("init-vet", E1000State,
1756                     compat_flags, E1000_FLAG_VET_BIT, true),
1757     DEFINE_PROP_END_OF_LIST(),
1758 };
1759 
1760 typedef struct E1000Info {
1761     const char *name;
1762     uint16_t   device_id;
1763     uint8_t    revision;
1764     uint16_t   phy_id2;
1765 } E1000Info;
1766 
1767 static void e1000_class_init(ObjectClass *klass, void *data)
1768 {
1769     DeviceClass *dc = DEVICE_CLASS(klass);
1770     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1771     E1000BaseClass *e = E1000_CLASS(klass);
1772     const E1000Info *info = data;
1773 
1774     k->realize = pci_e1000_realize;
1775     k->exit = pci_e1000_uninit;
1776     k->romfile = "efi-e1000.rom";
1777     k->vendor_id = PCI_VENDOR_ID_INTEL;
1778     k->device_id = info->device_id;
1779     k->revision = info->revision;
1780     e->phy_id2 = info->phy_id2;
1781     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1782     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1783     dc->desc = "Intel Gigabit Ethernet";
1784     dc->reset = qdev_e1000_reset;
1785     dc->vmsd = &vmstate_e1000;
1786     device_class_set_props(dc, e1000_properties);
1787 }
1788 
1789 static void e1000_instance_init(Object *obj)
1790 {
1791     E1000State *n = E1000(obj);
1792     device_add_bootindex_property(obj, &n->conf.bootindex,
1793                                   "bootindex", "/ethernet-phy@0",
1794                                   DEVICE(n));
1795 }
1796 
1797 static const TypeInfo e1000_base_info = {
1798     .name          = TYPE_E1000_BASE,
1799     .parent        = TYPE_PCI_DEVICE,
1800     .instance_size = sizeof(E1000State),
1801     .instance_init = e1000_instance_init,
1802     .class_size    = sizeof(E1000BaseClass),
1803     .abstract      = true,
1804     .interfaces = (InterfaceInfo[]) {
1805         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1806         { },
1807     },
1808 };
1809 
1810 static const E1000Info e1000_devices[] = {
1811     {
1812         .name      = "e1000",
1813         .device_id = E1000_DEV_ID_82540EM,
1814         .revision  = 0x03,
1815         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1816     },
1817     {
1818         .name      = "e1000-82544gc",
1819         .device_id = E1000_DEV_ID_82544GC_COPPER,
1820         .revision  = 0x03,
1821         .phy_id2   = E1000_PHY_ID2_82544x,
1822     },
1823     {
1824         .name      = "e1000-82545em",
1825         .device_id = E1000_DEV_ID_82545EM_COPPER,
1826         .revision  = 0x03,
1827         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1828     },
1829 };
1830 
1831 static void e1000_register_types(void)
1832 {
1833     int i;
1834 
1835     type_register_static(&e1000_base_info);
1836     for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1837         const E1000Info *info = &e1000_devices[i];
1838         TypeInfo type_info = {};
1839 
1840         type_info.name = info->name;
1841         type_info.parent = TYPE_E1000_BASE;
1842         type_info.class_data = (void *)info;
1843         type_info.class_init = e1000_class_init;
1844 
1845         type_register(&type_info);
1846     }
1847 }
1848 
1849 type_init(e1000_register_types)
1850