xref: /openbmc/qemu/hw/net/e1000.c (revision 44602af8)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2.1 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "qemu/osdep.h"
29 #include "hw/pci/pci.h"
30 #include "hw/qdev-properties.h"
31 #include "migration/vmstate.h"
32 #include "net/eth.h"
33 #include "net/net.h"
34 #include "net/checksum.h"
35 #include "sysemu/sysemu.h"
36 #include "sysemu/dma.h"
37 #include "qemu/iov.h"
38 #include "qemu/module.h"
39 #include "qemu/range.h"
40 
41 #include "e1000x_common.h"
42 #include "trace.h"
43 #include "qom/object.h"
44 
45 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
46 
47 /* #define E1000_DEBUG */
48 
49 #ifdef E1000_DEBUG
50 enum {
51     DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
52     DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
53     DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
54     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
55 };
56 #define DBGBIT(x)    (1<<DEBUG_##x)
57 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
58 
59 #define DBGOUT(what, fmt, ...) do { \
60     if (debugflags & DBGBIT(what)) \
61         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
62     } while (0)
63 #else
64 #define DBGOUT(what, fmt, ...) do {} while (0)
65 #endif
66 
67 #define IOPORT_SIZE       0x40
68 #define PNPMMIO_SIZE      0x20000
69 #define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
70 
71 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
72 
73 /*
74  * HW models:
75  *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
76  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
77  *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
78  *  Others never tested
79  */
80 
81 struct E1000State_st {
82     /*< private >*/
83     PCIDevice parent_obj;
84     /*< public >*/
85 
86     NICState *nic;
87     NICConf conf;
88     MemoryRegion mmio;
89     MemoryRegion io;
90 
91     uint32_t mac_reg[0x8000];
92     uint16_t phy_reg[0x20];
93     uint16_t eeprom_data[64];
94 
95     uint32_t rxbuf_size;
96     uint32_t rxbuf_min_shift;
97     struct e1000_tx {
98         unsigned char header[256];
99         unsigned char vlan_header[4];
100         /* Fields vlan and data must not be reordered or separated. */
101         unsigned char vlan[4];
102         unsigned char data[0x10000];
103         uint16_t size;
104         unsigned char vlan_needed;
105         unsigned char sum_needed;
106         bool cptse;
107         e1000x_txd_props props;
108         e1000x_txd_props tso_props;
109         uint16_t tso_frames;
110         bool busy;
111     } tx;
112 
113     struct {
114         uint32_t val_in;    /* shifted in from guest driver */
115         uint16_t bitnum_in;
116         uint16_t bitnum_out;
117         uint16_t reading;
118         uint32_t old_eecd;
119     } eecd_state;
120 
121     QEMUTimer *autoneg_timer;
122 
123     QEMUTimer *mit_timer;      /* Mitigation timer. */
124     bool mit_timer_on;         /* Mitigation timer is running. */
125     bool mit_irq_level;        /* Tracks interrupt pin level. */
126     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
127 
128     QEMUTimer *flush_queue_timer;
129 
130 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
131 #define E1000_FLAG_AUTONEG_BIT 0
132 #define E1000_FLAG_MIT_BIT 1
133 #define E1000_FLAG_MAC_BIT 2
134 #define E1000_FLAG_TSO_BIT 3
135 #define E1000_FLAG_VET_BIT 4
136 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
137 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
138 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
139 #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
140 #define E1000_FLAG_VET (1 << E1000_FLAG_VET_BIT)
141 
142     uint32_t compat_flags;
143     bool received_tx_tso;
144     bool use_tso_for_migration;
145     e1000x_txd_props mig_props;
146 };
147 typedef struct E1000State_st E1000State;
148 
149 #define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
150 
151 struct E1000BaseClass {
152     PCIDeviceClass parent_class;
153     uint16_t phy_id2;
154 };
155 typedef struct E1000BaseClass E1000BaseClass;
156 
157 #define TYPE_E1000_BASE "e1000-base"
158 
159 DECLARE_OBJ_CHECKERS(E1000State, E1000BaseClass,
160                      E1000, TYPE_E1000_BASE)
161 
162 
163 static void
164 e1000_link_up(E1000State *s)
165 {
166     e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg);
167 
168     /* E1000_STATUS_LU is tested by e1000_can_receive() */
169     qemu_flush_queued_packets(qemu_get_queue(s->nic));
170 }
171 
172 static void
173 e1000_autoneg_done(E1000State *s)
174 {
175     e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg);
176 
177     /* E1000_STATUS_LU is tested by e1000_can_receive() */
178     qemu_flush_queued_packets(qemu_get_queue(s->nic));
179 }
180 
181 static bool
182 have_autoneg(E1000State *s)
183 {
184     return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
185 }
186 
187 static void
188 set_phy_ctrl(E1000State *s, int index, uint16_t val)
189 {
190     /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
191     s->phy_reg[PHY_CTRL] = val & ~(0x3f |
192                                    MII_CR_RESET |
193                                    MII_CR_RESTART_AUTO_NEG);
194 
195     /*
196      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
197      * migrate during auto negotiation, after migration the link will be
198      * down.
199      */
200     if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
201         e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
202     }
203 }
204 
205 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
206     [PHY_CTRL] = set_phy_ctrl,
207 };
208 
209 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
210 
211 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
212 static const char phy_regcap[0x20] = {
213     [PHY_STATUS]      = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
214     [PHY_ID1]         = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
215     [PHY_CTRL]        = PHY_RW,    [PHY_1000T_CTRL]             = PHY_RW,
216     [PHY_LP_ABILITY]  = PHY_R,     [PHY_1000T_STATUS]           = PHY_R,
217     [PHY_AUTONEG_ADV] = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
218     [PHY_ID2]         = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
219     [PHY_AUTONEG_EXP] = PHY_R,
220 };
221 
222 /* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
223 static const uint16_t phy_reg_init[] = {
224     [PHY_CTRL]   = MII_CR_SPEED_SELECT_MSB |
225                    MII_CR_FULL_DUPLEX |
226                    MII_CR_AUTO_NEG_EN,
227 
228     [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
229                    MII_SR_LINK_STATUS |   /* link initially up */
230                    MII_SR_AUTONEG_CAPS |
231                    /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
232                    MII_SR_PREAMBLE_SUPPRESS |
233                    MII_SR_EXTENDED_STATUS |
234                    MII_SR_10T_HD_CAPS |
235                    MII_SR_10T_FD_CAPS |
236                    MII_SR_100X_HD_CAPS |
237                    MII_SR_100X_FD_CAPS,
238 
239     [PHY_ID1] = 0x141,
240     /* [PHY_ID2] configured per DevId, from e1000_reset() */
241     [PHY_AUTONEG_ADV] = 0xde1,
242     [PHY_LP_ABILITY] = 0x1e0,
243     [PHY_1000T_CTRL] = 0x0e00,
244     [PHY_1000T_STATUS] = 0x3c00,
245     [M88E1000_PHY_SPEC_CTRL] = 0x360,
246     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
247     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
248 };
249 
250 static const uint32_t mac_reg_init[] = {
251     [PBA]     = 0x00100030,
252     [LEDCTL]  = 0x602,
253     [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
254                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
255     [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
256                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
257                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
258                 E1000_STATUS_LU,
259     [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
260                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
261                 E1000_MANC_RMCP_EN,
262 };
263 
264 /* Helper function, *curr == 0 means the value is not set */
265 static inline void
266 mit_update_delay(uint32_t *curr, uint32_t value)
267 {
268     if (value && (*curr == 0 || value < *curr)) {
269         *curr = value;
270     }
271 }
272 
273 static void
274 set_interrupt_cause(E1000State *s, int index, uint32_t val)
275 {
276     PCIDevice *d = PCI_DEVICE(s);
277     uint32_t pending_ints;
278     uint32_t mit_delay;
279 
280     s->mac_reg[ICR] = val;
281 
282     /*
283      * Make sure ICR and ICS registers have the same value.
284      * The spec says that the ICS register is write-only.  However in practice,
285      * on real hardware ICS is readable, and for reads it has the same value as
286      * ICR (except that ICS does not have the clear on read behaviour of ICR).
287      *
288      * The VxWorks PRO/1000 driver uses this behaviour.
289      */
290     s->mac_reg[ICS] = val;
291 
292     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
293     if (!s->mit_irq_level && pending_ints) {
294         /*
295          * Here we detect a potential raising edge. We postpone raising the
296          * interrupt line if we are inside the mitigation delay window
297          * (s->mit_timer_on == 1).
298          * We provide a partial implementation of interrupt mitigation,
299          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
300          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
301          * RADV; relative timers based on TIDV and RDTR are not implemented.
302          */
303         if (s->mit_timer_on) {
304             return;
305         }
306         if (chkflag(MIT)) {
307             /* Compute the next mitigation delay according to pending
308              * interrupts and the current values of RADV (provided
309              * RDTR!=0), TADV and ITR.
310              * Then rearm the timer.
311              */
312             mit_delay = 0;
313             if (s->mit_ide &&
314                     (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
315                 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
316             }
317             if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
318                 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
319             }
320             mit_update_delay(&mit_delay, s->mac_reg[ITR]);
321 
322             /*
323              * According to e1000 SPEC, the Ethernet controller guarantees
324              * a maximum observable interrupt rate of 7813 interrupts/sec.
325              * Thus if mit_delay < 500 then the delay should be set to the
326              * minimum delay possible which is 500.
327              */
328             mit_delay = (mit_delay < 500) ? 500 : mit_delay;
329 
330             s->mit_timer_on = 1;
331             timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
332                       mit_delay * 256);
333             s->mit_ide = 0;
334         }
335     }
336 
337     s->mit_irq_level = (pending_ints != 0);
338     pci_set_irq(d, s->mit_irq_level);
339 }
340 
341 static void
342 e1000_mit_timer(void *opaque)
343 {
344     E1000State *s = opaque;
345 
346     s->mit_timer_on = 0;
347     /* Call set_interrupt_cause to update the irq level (if necessary). */
348     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
349 }
350 
351 static void
352 set_ics(E1000State *s, int index, uint32_t val)
353 {
354     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
355         s->mac_reg[IMS]);
356     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
357 }
358 
359 static void
360 e1000_autoneg_timer(void *opaque)
361 {
362     E1000State *s = opaque;
363     if (!qemu_get_queue(s->nic)->link_down) {
364         e1000_autoneg_done(s);
365         set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
366     }
367 }
368 
369 static bool e1000_vet_init_need(void *opaque)
370 {
371     E1000State *s = opaque;
372 
373     return chkflag(VET);
374 }
375 
376 static void e1000_reset(void *opaque)
377 {
378     E1000State *d = opaque;
379     E1000BaseClass *edc = E1000_GET_CLASS(d);
380     uint8_t *macaddr = d->conf.macaddr.a;
381 
382     timer_del(d->autoneg_timer);
383     timer_del(d->mit_timer);
384     timer_del(d->flush_queue_timer);
385     d->mit_timer_on = 0;
386     d->mit_irq_level = 0;
387     d->mit_ide = 0;
388     memset(d->phy_reg, 0, sizeof d->phy_reg);
389     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
390     d->phy_reg[PHY_ID2] = edc->phy_id2;
391     memset(d->mac_reg, 0, sizeof d->mac_reg);
392     memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
393     d->rxbuf_min_shift = 1;
394     memset(&d->tx, 0, sizeof d->tx);
395 
396     if (qemu_get_queue(d->nic)->link_down) {
397         e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg);
398     }
399 
400     e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
401 
402     if (e1000_vet_init_need(d)) {
403         d->mac_reg[VET] = ETH_P_VLAN;
404     }
405 }
406 
407 static void
408 set_ctrl(E1000State *s, int index, uint32_t val)
409 {
410     /* RST is self clearing */
411     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
412 }
413 
414 static void
415 e1000_flush_queue_timer(void *opaque)
416 {
417     E1000State *s = opaque;
418 
419     qemu_flush_queued_packets(qemu_get_queue(s->nic));
420 }
421 
422 static void
423 set_rx_control(E1000State *s, int index, uint32_t val)
424 {
425     s->mac_reg[RCTL] = val;
426     s->rxbuf_size = e1000x_rxbufsize(val);
427     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
428     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
429            s->mac_reg[RCTL]);
430     timer_mod(s->flush_queue_timer,
431               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
432 }
433 
434 static void
435 set_mdic(E1000State *s, int index, uint32_t val)
436 {
437     uint32_t data = val & E1000_MDIC_DATA_MASK;
438     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
439 
440     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
441         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
442     else if (val & E1000_MDIC_OP_READ) {
443         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
444         if (!(phy_regcap[addr] & PHY_R)) {
445             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
446             val |= E1000_MDIC_ERROR;
447         } else
448             val = (val ^ data) | s->phy_reg[addr];
449     } else if (val & E1000_MDIC_OP_WRITE) {
450         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
451         if (!(phy_regcap[addr] & PHY_W)) {
452             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
453             val |= E1000_MDIC_ERROR;
454         } else {
455             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
456                 phyreg_writeops[addr](s, index, data);
457             } else {
458                 s->phy_reg[addr] = data;
459             }
460         }
461     }
462     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
463 
464     if (val & E1000_MDIC_INT_EN) {
465         set_ics(s, 0, E1000_ICR_MDAC);
466     }
467 }
468 
469 static uint32_t
470 get_eecd(E1000State *s, int index)
471 {
472     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
473 
474     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
475            s->eecd_state.bitnum_out, s->eecd_state.reading);
476     if (!s->eecd_state.reading ||
477         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
478           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
479         ret |= E1000_EECD_DO;
480     return ret;
481 }
482 
483 static void
484 set_eecd(E1000State *s, int index, uint32_t val)
485 {
486     uint32_t oldval = s->eecd_state.old_eecd;
487 
488     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
489             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
490     if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
491         return;
492     }
493     if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
494         s->eecd_state.val_in = 0;
495         s->eecd_state.bitnum_in = 0;
496         s->eecd_state.bitnum_out = 0;
497         s->eecd_state.reading = 0;
498     }
499     if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
500         return;
501     }
502     if (!(E1000_EECD_SK & val)) {               /* falling edge */
503         s->eecd_state.bitnum_out++;
504         return;
505     }
506     s->eecd_state.val_in <<= 1;
507     if (val & E1000_EECD_DI)
508         s->eecd_state.val_in |= 1;
509     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
510         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
511         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
512             EEPROM_READ_OPCODE_MICROWIRE);
513     }
514     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
515            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
516            s->eecd_state.reading);
517 }
518 
519 static uint32_t
520 flash_eerd_read(E1000State *s, int x)
521 {
522     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
523 
524     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
525         return (s->mac_reg[EERD]);
526 
527     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
528         return (E1000_EEPROM_RW_REG_DONE | r);
529 
530     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
531            E1000_EEPROM_RW_REG_DONE | r);
532 }
533 
534 static void
535 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
536 {
537     uint32_t sum;
538 
539     if (cse && cse < n)
540         n = cse + 1;
541     if (sloc < n-1) {
542         sum = net_checksum_add(n-css, data+css);
543         stw_be_p(data + sloc, net_checksum_finish_nozero(sum));
544     }
545 }
546 
547 static inline void
548 inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
549 {
550     if (!memcmp(arr, bcast, sizeof bcast)) {
551         e1000x_inc_reg_if_not_full(s->mac_reg, BPTC);
552     } else if (arr[0] & 1) {
553         e1000x_inc_reg_if_not_full(s->mac_reg, MPTC);
554     }
555 }
556 
557 static void
558 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
559 {
560     static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
561                                     PTC1023, PTC1522 };
562 
563     NetClientState *nc = qemu_get_queue(s->nic);
564     if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
565         qemu_receive_packet(nc, buf, size);
566     } else {
567         qemu_send_packet(nc, buf, size);
568     }
569     inc_tx_bcast_or_mcast_count(s, buf);
570     e1000x_increase_size_stats(s->mac_reg, PTCregs, size);
571 }
572 
573 static void
574 xmit_seg(E1000State *s)
575 {
576     uint16_t len;
577     unsigned int frames = s->tx.tso_frames, css, sofar;
578     struct e1000_tx *tp = &s->tx;
579     struct e1000x_txd_props *props = tp->cptse ? &tp->tso_props : &tp->props;
580 
581     if (tp->cptse) {
582         css = props->ipcss;
583         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
584                frames, tp->size, css);
585         if (props->ip) {    /* IPv4 */
586             stw_be_p(tp->data+css+2, tp->size - css);
587             stw_be_p(tp->data+css+4,
588                      lduw_be_p(tp->data + css + 4) + frames);
589         } else {         /* IPv6 */
590             stw_be_p(tp->data+css+4, tp->size - css);
591         }
592         css = props->tucss;
593         len = tp->size - css;
594         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", props->tcp, css, len);
595         if (props->tcp) {
596             sofar = frames * props->mss;
597             stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
598             if (props->paylen - sofar > props->mss) {
599                 tp->data[css + 13] &= ~9;    /* PSH, FIN */
600             } else if (frames) {
601                 e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
602             }
603         } else {    /* UDP */
604             stw_be_p(tp->data+css+4, len);
605         }
606         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
607             unsigned int phsum;
608             // add pseudo-header length before checksum calculation
609             void *sp = tp->data + props->tucso;
610 
611             phsum = lduw_be_p(sp) + len;
612             phsum = (phsum >> 16) + (phsum & 0xffff);
613             stw_be_p(sp, phsum);
614         }
615         tp->tso_frames++;
616     }
617 
618     if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
619         putsum(tp->data, tp->size, props->tucso, props->tucss, props->tucse);
620     }
621     if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
622         putsum(tp->data, tp->size, props->ipcso, props->ipcss, props->ipcse);
623     }
624     if (tp->vlan_needed) {
625         memmove(tp->vlan, tp->data, 4);
626         memmove(tp->data, tp->data + 4, 8);
627         memcpy(tp->data + 8, tp->vlan_header, 4);
628         e1000_send_packet(s, tp->vlan, tp->size + 4);
629     } else {
630         e1000_send_packet(s, tp->data, tp->size);
631     }
632 
633     e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
634     e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size);
635     s->mac_reg[GPTC] = s->mac_reg[TPT];
636     s->mac_reg[GOTCL] = s->mac_reg[TOTL];
637     s->mac_reg[GOTCH] = s->mac_reg[TOTH];
638 }
639 
640 static void
641 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
642 {
643     PCIDevice *d = PCI_DEVICE(s);
644     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
645     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
646     unsigned int split_size = txd_lower & 0xffff, bytes, sz;
647     unsigned int msh = 0xfffff;
648     uint64_t addr;
649     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
650     struct e1000_tx *tp = &s->tx;
651 
652     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
653     if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
654         if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
655             e1000x_read_tx_ctx_descr(xp, &tp->tso_props);
656             s->use_tso_for_migration = 1;
657             tp->tso_frames = 0;
658         } else {
659             e1000x_read_tx_ctx_descr(xp, &tp->props);
660             s->use_tso_for_migration = 0;
661         }
662         return;
663     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
664         // data descriptor
665         if (tp->size == 0) {
666             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
667         }
668         tp->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
669     } else {
670         // legacy descriptor
671         tp->cptse = 0;
672     }
673 
674     if (e1000x_vlan_enabled(s->mac_reg) &&
675         e1000x_is_vlan_txd(txd_lower) &&
676         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
677         tp->vlan_needed = 1;
678         stw_be_p(tp->vlan_header,
679                       le16_to_cpu(s->mac_reg[VET]));
680         stw_be_p(tp->vlan_header + 2,
681                       le16_to_cpu(dp->upper.fields.special));
682     }
683 
684     addr = le64_to_cpu(dp->buffer_addr);
685     if (tp->cptse) {
686         msh = tp->tso_props.hdr_len + tp->tso_props.mss;
687         do {
688             bytes = split_size;
689             if (tp->size >= msh) {
690                 goto eop;
691             }
692             if (tp->size + bytes > msh)
693                 bytes = msh - tp->size;
694 
695             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
696             pci_dma_read(d, addr, tp->data + tp->size, bytes);
697             sz = tp->size + bytes;
698             if (sz >= tp->tso_props.hdr_len
699                 && tp->size < tp->tso_props.hdr_len) {
700                 memmove(tp->header, tp->data, tp->tso_props.hdr_len);
701             }
702             tp->size = sz;
703             addr += bytes;
704             if (sz == msh) {
705                 xmit_seg(s);
706                 memmove(tp->data, tp->header, tp->tso_props.hdr_len);
707                 tp->size = tp->tso_props.hdr_len;
708             }
709             split_size -= bytes;
710         } while (bytes && split_size);
711     } else {
712         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
713         pci_dma_read(d, addr, tp->data + tp->size, split_size);
714         tp->size += split_size;
715     }
716 
717 eop:
718     if (!(txd_lower & E1000_TXD_CMD_EOP))
719         return;
720     if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) {
721         xmit_seg(s);
722     }
723     tp->tso_frames = 0;
724     tp->sum_needed = 0;
725     tp->vlan_needed = 0;
726     tp->size = 0;
727     tp->cptse = 0;
728 }
729 
730 static uint32_t
731 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
732 {
733     PCIDevice *d = PCI_DEVICE(s);
734     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
735 
736     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
737         return 0;
738     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
739                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
740     dp->upper.data = cpu_to_le32(txd_upper);
741     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
742                   &dp->upper, sizeof(dp->upper));
743     return E1000_ICR_TXDW;
744 }
745 
746 static uint64_t tx_desc_base(E1000State *s)
747 {
748     uint64_t bah = s->mac_reg[TDBAH];
749     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
750 
751     return (bah << 32) + bal;
752 }
753 
754 static void
755 start_xmit(E1000State *s)
756 {
757     PCIDevice *d = PCI_DEVICE(s);
758     dma_addr_t base;
759     struct e1000_tx_desc desc;
760     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
761 
762     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
763         DBGOUT(TX, "tx disabled\n");
764         return;
765     }
766 
767     if (s->tx.busy) {
768         return;
769     }
770     s->tx.busy = true;
771 
772     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
773         base = tx_desc_base(s) +
774                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
775         pci_dma_read(d, base, &desc, sizeof(desc));
776 
777         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
778                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
779                desc.upper.data);
780 
781         process_tx_desc(s, &desc);
782         cause |= txdesc_writeback(s, base, &desc);
783 
784         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
785             s->mac_reg[TDH] = 0;
786         /*
787          * the following could happen only if guest sw assigns
788          * bogus values to TDT/TDLEN.
789          * there's nothing too intelligent we could do about this.
790          */
791         if (s->mac_reg[TDH] == tdh_start ||
792             tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
793             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
794                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
795             break;
796         }
797     }
798     s->tx.busy = false;
799     set_ics(s, 0, cause);
800 }
801 
802 static int
803 receive_filter(E1000State *s, const uint8_t *buf, int size)
804 {
805     uint32_t rctl = s->mac_reg[RCTL];
806     int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1);
807 
808     if (e1000x_is_vlan_packet(buf, le16_to_cpu(s->mac_reg[VET])) &&
809         e1000x_vlan_rx_filter_enabled(s->mac_reg)) {
810         uint16_t vid = lduw_be_p(buf + 14);
811         uint32_t vfta = ldl_le_p((uint32_t*)(s->mac_reg + VFTA) +
812                                  ((vid >> 5) & 0x7f));
813         if ((vfta & (1 << (vid & 0x1f))) == 0)
814             return 0;
815     }
816 
817     if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
818         return 1;
819     }
820 
821     if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
822         e1000x_inc_reg_if_not_full(s->mac_reg, MPRC);
823         return 1;
824     }
825 
826     if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
827         e1000x_inc_reg_if_not_full(s->mac_reg, BPRC);
828         return 1;
829     }
830 
831     return e1000x_rx_group_filter(s->mac_reg, buf);
832 }
833 
834 static void
835 e1000_set_link_status(NetClientState *nc)
836 {
837     E1000State *s = qemu_get_nic_opaque(nc);
838     uint32_t old_status = s->mac_reg[STATUS];
839 
840     if (nc->link_down) {
841         e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg);
842     } else {
843         if (have_autoneg(s) &&
844             !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
845             e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
846         } else {
847             e1000_link_up(s);
848         }
849     }
850 
851     if (s->mac_reg[STATUS] != old_status)
852         set_ics(s, 0, E1000_ICR_LSC);
853 }
854 
855 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
856 {
857     int bufs;
858     /* Fast-path short packets */
859     if (total_size <= s->rxbuf_size) {
860         return s->mac_reg[RDH] != s->mac_reg[RDT];
861     }
862     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
863         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
864     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
865         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
866             s->mac_reg[RDT] - s->mac_reg[RDH];
867     } else {
868         return false;
869     }
870     return total_size <= bufs * s->rxbuf_size;
871 }
872 
873 static bool
874 e1000_can_receive(NetClientState *nc)
875 {
876     E1000State *s = qemu_get_nic_opaque(nc);
877 
878     return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
879         e1000_has_rxbufs(s, 1) && !timer_pending(s->flush_queue_timer);
880 }
881 
882 static uint64_t rx_desc_base(E1000State *s)
883 {
884     uint64_t bah = s->mac_reg[RDBAH];
885     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
886 
887     return (bah << 32) + bal;
888 }
889 
890 static void
891 e1000_receiver_overrun(E1000State *s, size_t size)
892 {
893     trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
894     e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
895     e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
896     set_ics(s, 0, E1000_ICS_RXO);
897 }
898 
899 static ssize_t
900 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
901 {
902     E1000State *s = qemu_get_nic_opaque(nc);
903     PCIDevice *d = PCI_DEVICE(s);
904     struct e1000_rx_desc desc;
905     dma_addr_t base;
906     unsigned int n, rdt;
907     uint32_t rdh_start;
908     uint16_t vlan_special = 0;
909     uint8_t vlan_status = 0;
910     uint8_t min_buf[MIN_BUF_SIZE];
911     struct iovec min_iov;
912     uint8_t *filter_buf = iov->iov_base;
913     size_t size = iov_size(iov, iovcnt);
914     size_t iov_ofs = 0;
915     size_t desc_offset;
916     size_t desc_size;
917     size_t total_size;
918 
919     if (!e1000x_hw_rx_enabled(s->mac_reg)) {
920         return -1;
921     }
922 
923     if (timer_pending(s->flush_queue_timer)) {
924         return 0;
925     }
926 
927     /* Pad to minimum Ethernet frame length */
928     if (size < sizeof(min_buf)) {
929         iov_to_buf(iov, iovcnt, 0, min_buf, size);
930         memset(&min_buf[size], 0, sizeof(min_buf) - size);
931         min_iov.iov_base = filter_buf = min_buf;
932         min_iov.iov_len = size = sizeof(min_buf);
933         iovcnt = 1;
934         iov = &min_iov;
935     } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
936         /* This is very unlikely, but may happen. */
937         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
938         filter_buf = min_buf;
939     }
940 
941     /* Discard oversized packets if !LPE and !SBP. */
942     if (e1000x_is_oversized(s->mac_reg, size)) {
943         return size;
944     }
945 
946     if (!receive_filter(s, filter_buf, size)) {
947         return size;
948     }
949 
950     if (e1000x_vlan_enabled(s->mac_reg) &&
951         e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) {
952         vlan_special = cpu_to_le16(lduw_be_p(filter_buf + 14));
953         iov_ofs = 4;
954         if (filter_buf == iov->iov_base) {
955             memmove(filter_buf + 4, filter_buf, 12);
956         } else {
957             iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
958             while (iov->iov_len <= iov_ofs) {
959                 iov_ofs -= iov->iov_len;
960                 iov++;
961             }
962         }
963         vlan_status = E1000_RXD_STAT_VP;
964         size -= 4;
965     }
966 
967     rdh_start = s->mac_reg[RDH];
968     desc_offset = 0;
969     total_size = size + e1000x_fcs_len(s->mac_reg);
970     if (!e1000_has_rxbufs(s, total_size)) {
971         e1000_receiver_overrun(s, total_size);
972         return -1;
973     }
974     do {
975         desc_size = total_size - desc_offset;
976         if (desc_size > s->rxbuf_size) {
977             desc_size = s->rxbuf_size;
978         }
979         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
980         pci_dma_read(d, base, &desc, sizeof(desc));
981         desc.special = vlan_special;
982         desc.status &= ~E1000_RXD_STAT_DD;
983         if (desc.buffer_addr) {
984             if (desc_offset < size) {
985                 size_t iov_copy;
986                 hwaddr ba = le64_to_cpu(desc.buffer_addr);
987                 size_t copy_size = size - desc_offset;
988                 if (copy_size > s->rxbuf_size) {
989                     copy_size = s->rxbuf_size;
990                 }
991                 do {
992                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
993                     pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
994                     copy_size -= iov_copy;
995                     ba += iov_copy;
996                     iov_ofs += iov_copy;
997                     if (iov_ofs == iov->iov_len) {
998                         iov++;
999                         iov_ofs = 0;
1000                     }
1001                 } while (copy_size);
1002             }
1003             desc_offset += desc_size;
1004             desc.length = cpu_to_le16(desc_size);
1005             if (desc_offset >= total_size) {
1006                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1007             } else {
1008                 /* Guest zeroing out status is not a hardware requirement.
1009                    Clear EOP in case guest didn't do it. */
1010                 desc.status &= ~E1000_RXD_STAT_EOP;
1011             }
1012         } else { // as per intel docs; skip descriptors with null buf addr
1013             DBGOUT(RX, "Null RX descriptor!!\n");
1014         }
1015         pci_dma_write(d, base, &desc, sizeof(desc));
1016         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
1017         pci_dma_write(d, base + offsetof(struct e1000_rx_desc, status),
1018                       &desc.status, sizeof(desc.status));
1019 
1020         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1021             s->mac_reg[RDH] = 0;
1022         /* see comment in start_xmit; same here */
1023         if (s->mac_reg[RDH] == rdh_start ||
1024             rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
1025             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1026                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1027             e1000_receiver_overrun(s, total_size);
1028             return -1;
1029         }
1030     } while (desc_offset < total_size);
1031 
1032     e1000x_update_rx_total_stats(s->mac_reg, size, total_size);
1033 
1034     n = E1000_ICS_RXT0;
1035     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1036         rdt += s->mac_reg[RDLEN] / sizeof(desc);
1037     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1038         s->rxbuf_min_shift)
1039         n |= E1000_ICS_RXDMT0;
1040 
1041     set_ics(s, 0, n);
1042 
1043     return size;
1044 }
1045 
1046 static ssize_t
1047 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1048 {
1049     const struct iovec iov = {
1050         .iov_base = (uint8_t *)buf,
1051         .iov_len = size
1052     };
1053 
1054     return e1000_receive_iov(nc, &iov, 1);
1055 }
1056 
1057 static uint32_t
1058 mac_readreg(E1000State *s, int index)
1059 {
1060     return s->mac_reg[index];
1061 }
1062 
1063 static uint32_t
1064 mac_low4_read(E1000State *s, int index)
1065 {
1066     return s->mac_reg[index] & 0xf;
1067 }
1068 
1069 static uint32_t
1070 mac_low11_read(E1000State *s, int index)
1071 {
1072     return s->mac_reg[index] & 0x7ff;
1073 }
1074 
1075 static uint32_t
1076 mac_low13_read(E1000State *s, int index)
1077 {
1078     return s->mac_reg[index] & 0x1fff;
1079 }
1080 
1081 static uint32_t
1082 mac_low16_read(E1000State *s, int index)
1083 {
1084     return s->mac_reg[index] & 0xffff;
1085 }
1086 
1087 static uint32_t
1088 mac_icr_read(E1000State *s, int index)
1089 {
1090     uint32_t ret = s->mac_reg[ICR];
1091 
1092     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1093     set_interrupt_cause(s, 0, 0);
1094     return ret;
1095 }
1096 
1097 static uint32_t
1098 mac_read_clr4(E1000State *s, int index)
1099 {
1100     uint32_t ret = s->mac_reg[index];
1101 
1102     s->mac_reg[index] = 0;
1103     return ret;
1104 }
1105 
1106 static uint32_t
1107 mac_read_clr8(E1000State *s, int index)
1108 {
1109     uint32_t ret = s->mac_reg[index];
1110 
1111     s->mac_reg[index] = 0;
1112     s->mac_reg[index-1] = 0;
1113     return ret;
1114 }
1115 
1116 static void
1117 mac_writereg(E1000State *s, int index, uint32_t val)
1118 {
1119     uint32_t macaddr[2];
1120 
1121     s->mac_reg[index] = val;
1122 
1123     if (index == RA + 1) {
1124         macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1125         macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1126         qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1127     }
1128 }
1129 
1130 static void
1131 set_rdt(E1000State *s, int index, uint32_t val)
1132 {
1133     s->mac_reg[index] = val & 0xffff;
1134     if (e1000_has_rxbufs(s, 1)) {
1135         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1136     }
1137 }
1138 
1139 static void
1140 set_16bit(E1000State *s, int index, uint32_t val)
1141 {
1142     s->mac_reg[index] = val & 0xffff;
1143 }
1144 
1145 static void
1146 set_dlen(E1000State *s, int index, uint32_t val)
1147 {
1148     s->mac_reg[index] = val & 0xfff80;
1149 }
1150 
1151 static void
1152 set_tctl(E1000State *s, int index, uint32_t val)
1153 {
1154     s->mac_reg[index] = val;
1155     s->mac_reg[TDT] &= 0xffff;
1156     start_xmit(s);
1157 }
1158 
1159 static void
1160 set_icr(E1000State *s, int index, uint32_t val)
1161 {
1162     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1163     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1164 }
1165 
1166 static void
1167 set_imc(E1000State *s, int index, uint32_t val)
1168 {
1169     s->mac_reg[IMS] &= ~val;
1170     set_ics(s, 0, 0);
1171 }
1172 
1173 static void
1174 set_ims(E1000State *s, int index, uint32_t val)
1175 {
1176     s->mac_reg[IMS] |= val;
1177     set_ics(s, 0, 0);
1178 }
1179 
1180 #define getreg(x)    [x] = mac_readreg
1181 typedef uint32_t (*readops)(E1000State *, int);
1182 static const readops macreg_readops[] = {
1183     getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1184     getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1185     getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1186     getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1187     getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1188     getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1189     getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1190     getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1191     getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1192     getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1193     getreg(TNCRS),    getreg(SEQEC),    getreg(CEXTERR),  getreg(RLEC),
1194     getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1195     getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1196     getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1197     getreg(GOTCL),
1198 
1199     [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1200     [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1201     [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1202     [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1203     [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1204     [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1205     [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1206     [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1207     [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1208     [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1209     [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1210     [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1211     [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1212     [MPTC]    = mac_read_clr4,
1213     [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1214     [EERD]    = flash_eerd_read,
1215     [RDFH]    = mac_low13_read,     [RDFT]    = mac_low13_read,
1216     [RDFHS]   = mac_low13_read,     [RDFTS]   = mac_low13_read,
1217     [RDFPC]   = mac_low13_read,
1218     [TDFH]    = mac_low11_read,     [TDFT]    = mac_low11_read,
1219     [TDFHS]   = mac_low13_read,     [TDFTS]   = mac_low13_read,
1220     [TDFPC]   = mac_low13_read,
1221     [AIT]     = mac_low16_read,
1222 
1223     [CRCERRS ... MPC]   = &mac_readreg,
1224     [IP6AT ... IP6AT+3] = &mac_readreg,    [IP4AT ... IP4AT+6] = &mac_readreg,
1225     [FFLT ... FFLT+6]   = &mac_low11_read,
1226     [RA ... RA+31]      = &mac_readreg,
1227     [WUPM ... WUPM+31]  = &mac_readreg,
1228     [MTA ... MTA+127]   = &mac_readreg,
1229     [VFTA ... VFTA+127] = &mac_readreg,
1230     [FFMT ... FFMT+254] = &mac_low4_read,
1231     [FFVT ... FFVT+254] = &mac_readreg,
1232     [PBM ... PBM+16383] = &mac_readreg,
1233 };
1234 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1235 
1236 #define putreg(x)    [x] = mac_writereg
1237 typedef void (*writeops)(E1000State *, int, uint32_t);
1238 static const writeops macreg_writeops[] = {
1239     putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1240     putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1241     putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1242     putreg(TDFH),     putreg(TDFT),     putreg(TDFHS),    putreg(TDFTS),
1243     putreg(TDFPC),    putreg(RDFH),     putreg(RDFT),     putreg(RDFHS),
1244     putreg(RDFTS),    putreg(RDFPC),    putreg(IPAV),     putreg(WUC),
1245     putreg(WUS),      putreg(AIT),
1246 
1247     [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL] = set_tctl,
1248     [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]  = set_ics,
1249     [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]  = set_rdt,
1250     [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]  = set_icr,
1251     [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL] = set_ctrl,
1252     [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV] = set_16bit,
1253     [ITR]    = set_16bit,
1254 
1255     [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg,
1256     [FFLT ... FFLT+6]   = &mac_writereg,
1257     [RA ... RA+31]      = &mac_writereg,
1258     [WUPM ... WUPM+31]  = &mac_writereg,
1259     [MTA ... MTA+127]   = &mac_writereg,
1260     [VFTA ... VFTA+127] = &mac_writereg,
1261     [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg,
1262     [PBM ... PBM+16383] = &mac_writereg,
1263 };
1264 
1265 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1266 
1267 enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1268 
1269 #define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1270 /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1271  * f - flag bits (up to 6 possible flags)
1272  * n - flag needed
1273  * p - partially implenented */
1274 static const uint8_t mac_reg_access[0x8000] = {
1275     [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1276     [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1277 
1278     [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1279     [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1280     [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1281     [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1282     [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1283     [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1284     [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1285     [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1286     [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1287     [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1288     [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1289     [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1290     [SEQEC]   = markflag(MAC),    [CEXTERR] = markflag(MAC),
1291     [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1292     [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1293     [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1294     [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1295     [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1296     [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1297     [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1298     [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1299     [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1300     [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1301     [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1302     [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1303     [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1304     [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1305     [BPTC]    = markflag(MAC),
1306 
1307     [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1308     [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1309     [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1310     [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1311     [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1312     [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1313     [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1314     [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1315     [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1316     [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1317     [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1318 };
1319 
1320 static void
1321 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1322                  unsigned size)
1323 {
1324     E1000State *s = opaque;
1325     unsigned int index = (addr & 0x1ffff) >> 2;
1326 
1327     if (index < NWRITEOPS && macreg_writeops[index]) {
1328         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1329             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1330             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1331                 DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1332                        "It is not fully implemented.\n", index<<2);
1333             }
1334             macreg_writeops[index](s, index, val);
1335         } else {    /* "flag needed" bit is set, but the flag is not active */
1336             DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1337                    index<<2);
1338         }
1339     } else if (index < NREADOPS && macreg_readops[index]) {
1340         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1341                index<<2, val);
1342     } else {
1343         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1344                index<<2, val);
1345     }
1346 }
1347 
1348 static uint64_t
1349 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1350 {
1351     E1000State *s = opaque;
1352     unsigned int index = (addr & 0x1ffff) >> 2;
1353 
1354     if (index < NREADOPS && macreg_readops[index]) {
1355         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1356             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1357             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1358                 DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1359                        "It is not fully implemented.\n", index<<2);
1360             }
1361             return macreg_readops[index](s, index);
1362         } else {    /* "flag needed" bit is set, but the flag is not active */
1363             DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1364                    index<<2);
1365         }
1366     } else {
1367         DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1368     }
1369     return 0;
1370 }
1371 
1372 static const MemoryRegionOps e1000_mmio_ops = {
1373     .read = e1000_mmio_read,
1374     .write = e1000_mmio_write,
1375     .endianness = DEVICE_LITTLE_ENDIAN,
1376     .impl = {
1377         .min_access_size = 4,
1378         .max_access_size = 4,
1379     },
1380 };
1381 
1382 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1383                               unsigned size)
1384 {
1385     E1000State *s = opaque;
1386 
1387     (void)s;
1388     return 0;
1389 }
1390 
1391 static void e1000_io_write(void *opaque, hwaddr addr,
1392                            uint64_t val, unsigned size)
1393 {
1394     E1000State *s = opaque;
1395 
1396     (void)s;
1397 }
1398 
1399 static const MemoryRegionOps e1000_io_ops = {
1400     .read = e1000_io_read,
1401     .write = e1000_io_write,
1402     .endianness = DEVICE_LITTLE_ENDIAN,
1403 };
1404 
1405 static bool is_version_1(void *opaque, int version_id)
1406 {
1407     return version_id == 1;
1408 }
1409 
1410 static int e1000_pre_save(void *opaque)
1411 {
1412     E1000State *s = opaque;
1413     NetClientState *nc = qemu_get_queue(s->nic);
1414 
1415     /*
1416      * If link is down and auto-negotiation is supported and ongoing,
1417      * complete auto-negotiation immediately. This allows us to look
1418      * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1419      */
1420     if (nc->link_down && have_autoneg(s)) {
1421         s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1422     }
1423 
1424     /* Decide which set of props to migrate in the main structure */
1425     if (chkflag(TSO) || !s->use_tso_for_migration) {
1426         /* Either we're migrating with the extra subsection, in which
1427          * case the mig_props is always 'props' OR
1428          * we've not got the subsection, but 'props' was the last
1429          * updated.
1430          */
1431         s->mig_props = s->tx.props;
1432     } else {
1433         /* We're not using the subsection, and 'tso_props' was
1434          * the last updated.
1435          */
1436         s->mig_props = s->tx.tso_props;
1437     }
1438     return 0;
1439 }
1440 
1441 static int e1000_post_load(void *opaque, int version_id)
1442 {
1443     E1000State *s = opaque;
1444     NetClientState *nc = qemu_get_queue(s->nic);
1445 
1446     if (!chkflag(MIT)) {
1447         s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1448             s->mac_reg[TADV] = 0;
1449         s->mit_irq_level = false;
1450     }
1451     s->mit_ide = 0;
1452     s->mit_timer_on = true;
1453     timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1);
1454 
1455     /* nc.link_down can't be migrated, so infer link_down according
1456      * to link status bit in mac_reg[STATUS].
1457      * Alternatively, restart link negotiation if it was in progress. */
1458     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1459 
1460     if (have_autoneg(s) &&
1461         !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1462         nc->link_down = false;
1463         timer_mod(s->autoneg_timer,
1464                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1465     }
1466 
1467     s->tx.props = s->mig_props;
1468     if (!s->received_tx_tso) {
1469         /* We received only one set of offload data (tx.props)
1470          * and haven't got tx.tso_props.  The best we can do
1471          * is dupe the data.
1472          */
1473         s->tx.tso_props = s->mig_props;
1474     }
1475     return 0;
1476 }
1477 
1478 static int e1000_tx_tso_post_load(void *opaque, int version_id)
1479 {
1480     E1000State *s = opaque;
1481     s->received_tx_tso = true;
1482     return 0;
1483 }
1484 
1485 static bool e1000_mit_state_needed(void *opaque)
1486 {
1487     E1000State *s = opaque;
1488 
1489     return chkflag(MIT);
1490 }
1491 
1492 static bool e1000_full_mac_needed(void *opaque)
1493 {
1494     E1000State *s = opaque;
1495 
1496     return chkflag(MAC);
1497 }
1498 
1499 static bool e1000_tso_state_needed(void *opaque)
1500 {
1501     E1000State *s = opaque;
1502 
1503     return chkflag(TSO);
1504 }
1505 
1506 static const VMStateDescription vmstate_e1000_mit_state = {
1507     .name = "e1000/mit_state",
1508     .version_id = 1,
1509     .minimum_version_id = 1,
1510     .needed = e1000_mit_state_needed,
1511     .fields = (VMStateField[]) {
1512         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1513         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1514         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1515         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1516         VMSTATE_BOOL(mit_irq_level, E1000State),
1517         VMSTATE_END_OF_LIST()
1518     }
1519 };
1520 
1521 static const VMStateDescription vmstate_e1000_full_mac_state = {
1522     .name = "e1000/full_mac_state",
1523     .version_id = 1,
1524     .minimum_version_id = 1,
1525     .needed = e1000_full_mac_needed,
1526     .fields = (VMStateField[]) {
1527         VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1528         VMSTATE_END_OF_LIST()
1529     }
1530 };
1531 
1532 static const VMStateDescription vmstate_e1000_tx_tso_state = {
1533     .name = "e1000/tx_tso_state",
1534     .version_id = 1,
1535     .minimum_version_id = 1,
1536     .needed = e1000_tso_state_needed,
1537     .post_load = e1000_tx_tso_post_load,
1538     .fields = (VMStateField[]) {
1539         VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
1540         VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
1541         VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
1542         VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
1543         VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
1544         VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
1545         VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
1546         VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
1547         VMSTATE_UINT16(tx.tso_props.mss, E1000State),
1548         VMSTATE_INT8(tx.tso_props.ip, E1000State),
1549         VMSTATE_INT8(tx.tso_props.tcp, E1000State),
1550         VMSTATE_END_OF_LIST()
1551     }
1552 };
1553 
1554 static const VMStateDescription vmstate_e1000 = {
1555     .name = "e1000",
1556     .version_id = 2,
1557     .minimum_version_id = 1,
1558     .pre_save = e1000_pre_save,
1559     .post_load = e1000_post_load,
1560     .fields = (VMStateField[]) {
1561         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1562         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1563         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1564         VMSTATE_UINT32(rxbuf_size, E1000State),
1565         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1566         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1567         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1568         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1569         VMSTATE_UINT16(eecd_state.reading, E1000State),
1570         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1571         VMSTATE_UINT8(mig_props.ipcss, E1000State),
1572         VMSTATE_UINT8(mig_props.ipcso, E1000State),
1573         VMSTATE_UINT16(mig_props.ipcse, E1000State),
1574         VMSTATE_UINT8(mig_props.tucss, E1000State),
1575         VMSTATE_UINT8(mig_props.tucso, E1000State),
1576         VMSTATE_UINT16(mig_props.tucse, E1000State),
1577         VMSTATE_UINT32(mig_props.paylen, E1000State),
1578         VMSTATE_UINT8(mig_props.hdr_len, E1000State),
1579         VMSTATE_UINT16(mig_props.mss, E1000State),
1580         VMSTATE_UINT16(tx.size, E1000State),
1581         VMSTATE_UINT16(tx.tso_frames, E1000State),
1582         VMSTATE_UINT8(tx.sum_needed, E1000State),
1583         VMSTATE_INT8(mig_props.ip, E1000State),
1584         VMSTATE_INT8(mig_props.tcp, E1000State),
1585         VMSTATE_BUFFER(tx.header, E1000State),
1586         VMSTATE_BUFFER(tx.data, E1000State),
1587         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1588         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1589         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1590         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1591         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1592         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1593         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1594         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1595         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1596         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1597         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1598         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1599         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1600         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1601         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1602         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1603         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1604         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1605         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1606         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1607         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1608         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1609         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1610         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1611         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1612         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1613         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1614         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1615         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1616         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1617         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1618         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1619         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1620         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1621         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1622         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1623         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1624         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1625         VMSTATE_UINT32(mac_reg[VET], E1000State),
1626         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1627         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1628         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1629         VMSTATE_END_OF_LIST()
1630     },
1631     .subsections = (const VMStateDescription*[]) {
1632         &vmstate_e1000_mit_state,
1633         &vmstate_e1000_full_mac_state,
1634         &vmstate_e1000_tx_tso_state,
1635         NULL
1636     }
1637 };
1638 
1639 /*
1640  * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1641  * Note: A valid DevId will be inserted during pci_e1000_realize().
1642  */
1643 static const uint16_t e1000_eeprom_template[64] = {
1644     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1645     0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1646     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1647     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1648     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1649     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1650     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1651     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1652 };
1653 
1654 /* PCI interface */
1655 
1656 static void
1657 e1000_mmio_setup(E1000State *d)
1658 {
1659     int i;
1660     const uint32_t excluded_regs[] = {
1661         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1662         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1663     };
1664 
1665     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1666                           "e1000-mmio", PNPMMIO_SIZE);
1667     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1668     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1669         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1670                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1671     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1672 }
1673 
1674 static void
1675 pci_e1000_uninit(PCIDevice *dev)
1676 {
1677     E1000State *d = E1000(dev);
1678 
1679     timer_free(d->autoneg_timer);
1680     timer_free(d->mit_timer);
1681     timer_free(d->flush_queue_timer);
1682     qemu_del_nic(d->nic);
1683 }
1684 
1685 static NetClientInfo net_e1000_info = {
1686     .type = NET_CLIENT_DRIVER_NIC,
1687     .size = sizeof(NICState),
1688     .can_receive = e1000_can_receive,
1689     .receive = e1000_receive,
1690     .receive_iov = e1000_receive_iov,
1691     .link_status_changed = e1000_set_link_status,
1692 };
1693 
1694 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1695                                 uint32_t val, int len)
1696 {
1697     E1000State *s = E1000(pci_dev);
1698 
1699     pci_default_write_config(pci_dev, address, val, len);
1700 
1701     if (range_covers_byte(address, len, PCI_COMMAND) &&
1702         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1703         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1704     }
1705 }
1706 
1707 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1708 {
1709     DeviceState *dev = DEVICE(pci_dev);
1710     E1000State *d = E1000(pci_dev);
1711     uint8_t *pci_conf;
1712     uint8_t *macaddr;
1713 
1714     pci_dev->config_write = e1000_write_config;
1715 
1716     pci_conf = pci_dev->config;
1717 
1718     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1719     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1720 
1721     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1722 
1723     e1000_mmio_setup(d);
1724 
1725     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1726 
1727     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1728 
1729     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1730     macaddr = d->conf.macaddr.a;
1731 
1732     e1000x_core_prepare_eeprom(d->eeprom_data,
1733                                e1000_eeprom_template,
1734                                sizeof(e1000_eeprom_template),
1735                                PCI_DEVICE_GET_CLASS(pci_dev)->device_id,
1736                                macaddr);
1737 
1738     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1739                           object_get_typename(OBJECT(d)), dev->id, d);
1740 
1741     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1742 
1743     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1744     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1745     d->flush_queue_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
1746                                         e1000_flush_queue_timer, d);
1747 }
1748 
1749 static void qdev_e1000_reset(DeviceState *dev)
1750 {
1751     E1000State *d = E1000(dev);
1752     e1000_reset(d);
1753 }
1754 
1755 static Property e1000_properties[] = {
1756     DEFINE_NIC_PROPERTIES(E1000State, conf),
1757     DEFINE_PROP_BIT("autonegotiation", E1000State,
1758                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1759     DEFINE_PROP_BIT("mitigation", E1000State,
1760                     compat_flags, E1000_FLAG_MIT_BIT, true),
1761     DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1762                     compat_flags, E1000_FLAG_MAC_BIT, true),
1763     DEFINE_PROP_BIT("migrate_tso_props", E1000State,
1764                     compat_flags, E1000_FLAG_TSO_BIT, true),
1765     DEFINE_PROP_BIT("init-vet", E1000State,
1766                     compat_flags, E1000_FLAG_VET_BIT, true),
1767     DEFINE_PROP_END_OF_LIST(),
1768 };
1769 
1770 typedef struct E1000Info {
1771     const char *name;
1772     uint16_t   device_id;
1773     uint8_t    revision;
1774     uint16_t   phy_id2;
1775 } E1000Info;
1776 
1777 static void e1000_class_init(ObjectClass *klass, void *data)
1778 {
1779     DeviceClass *dc = DEVICE_CLASS(klass);
1780     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1781     E1000BaseClass *e = E1000_CLASS(klass);
1782     const E1000Info *info = data;
1783 
1784     k->realize = pci_e1000_realize;
1785     k->exit = pci_e1000_uninit;
1786     k->romfile = "efi-e1000.rom";
1787     k->vendor_id = PCI_VENDOR_ID_INTEL;
1788     k->device_id = info->device_id;
1789     k->revision = info->revision;
1790     e->phy_id2 = info->phy_id2;
1791     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1792     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1793     dc->desc = "Intel Gigabit Ethernet";
1794     dc->reset = qdev_e1000_reset;
1795     dc->vmsd = &vmstate_e1000;
1796     device_class_set_props(dc, e1000_properties);
1797 }
1798 
1799 static void e1000_instance_init(Object *obj)
1800 {
1801     E1000State *n = E1000(obj);
1802     device_add_bootindex_property(obj, &n->conf.bootindex,
1803                                   "bootindex", "/ethernet-phy@0",
1804                                   DEVICE(n));
1805 }
1806 
1807 static const TypeInfo e1000_base_info = {
1808     .name          = TYPE_E1000_BASE,
1809     .parent        = TYPE_PCI_DEVICE,
1810     .instance_size = sizeof(E1000State),
1811     .instance_init = e1000_instance_init,
1812     .class_size    = sizeof(E1000BaseClass),
1813     .abstract      = true,
1814     .interfaces = (InterfaceInfo[]) {
1815         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1816         { },
1817     },
1818 };
1819 
1820 static const E1000Info e1000_devices[] = {
1821     {
1822         .name      = "e1000",
1823         .device_id = E1000_DEV_ID_82540EM,
1824         .revision  = 0x03,
1825         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1826     },
1827     {
1828         .name      = "e1000-82544gc",
1829         .device_id = E1000_DEV_ID_82544GC_COPPER,
1830         .revision  = 0x03,
1831         .phy_id2   = E1000_PHY_ID2_82544x,
1832     },
1833     {
1834         .name      = "e1000-82545em",
1835         .device_id = E1000_DEV_ID_82545EM_COPPER,
1836         .revision  = 0x03,
1837         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1838     },
1839 };
1840 
1841 static void e1000_register_types(void)
1842 {
1843     int i;
1844 
1845     type_register_static(&e1000_base_info);
1846     for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1847         const E1000Info *info = &e1000_devices[i];
1848         TypeInfo type_info = {};
1849 
1850         type_info.name = info->name;
1851         type_info.parent = TYPE_E1000_BASE;
1852         type_info.class_data = (void *)info;
1853         type_info.class_init = e1000_class_init;
1854 
1855         type_register(&type_info);
1856     }
1857 }
1858 
1859 type_init(e1000_register_types)
1860