xref: /openbmc/qemu/hw/net/e1000.c (revision a9484b8a417246b5ebb1d3b8c41b58a7e0862a72)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2.1 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "qemu/osdep.h"
29 #include "hw/net/mii.h"
30 #include "hw/pci/pci_device.h"
31 #include "hw/qdev-properties.h"
32 #include "migration/vmstate.h"
33 #include "net/eth.h"
34 #include "net/net.h"
35 #include "net/checksum.h"
36 #include "sysemu/sysemu.h"
37 #include "sysemu/dma.h"
38 #include "qemu/iov.h"
39 #include "qemu/module.h"
40 #include "qemu/range.h"
41 
42 #include "e1000x_common.h"
43 #include "trace.h"
44 #include "qom/object.h"
45 
46 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
47 
48 /* #define E1000_DEBUG */
49 
50 #ifdef E1000_DEBUG
51 enum {
52     DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
53     DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
54     DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
55     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
56 };
57 #define DBGBIT(x)    (1<<DEBUG_##x)
58 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
59 
60 #define DBGOUT(what, fmt, ...) do { \
61     if (debugflags & DBGBIT(what)) \
62         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
63     } while (0)
64 #else
65 #define DBGOUT(what, fmt, ...) do {} while (0)
66 #endif
67 
68 #define IOPORT_SIZE       0x40
69 #define PNPMMIO_SIZE      0x20000
70 #define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
71 
72 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
73 
74 /*
75  * HW models:
76  *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
77  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
78  *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
79  *  Others never tested
80  */
81 
82 struct E1000State_st {
83     /*< private >*/
84     PCIDevice parent_obj;
85     /*< public >*/
86 
87     NICState *nic;
88     NICConf conf;
89     MemoryRegion mmio;
90     MemoryRegion io;
91 
92     uint32_t mac_reg[0x8000];
93     uint16_t phy_reg[0x20];
94     uint16_t eeprom_data[64];
95 
96     uint32_t rxbuf_size;
97     uint32_t rxbuf_min_shift;
98     struct e1000_tx {
99         unsigned char header[256];
100         unsigned char vlan_header[4];
101         /* Fields vlan and data must not be reordered or separated. */
102         unsigned char vlan[4];
103         unsigned char data[0x10000];
104         uint16_t size;
105         unsigned char vlan_needed;
106         unsigned char sum_needed;
107         bool cptse;
108         e1000x_txd_props props;
109         e1000x_txd_props tso_props;
110         uint16_t tso_frames;
111         bool busy;
112     } tx;
113 
114     struct {
115         uint32_t val_in;    /* shifted in from guest driver */
116         uint16_t bitnum_in;
117         uint16_t bitnum_out;
118         uint16_t reading;
119         uint32_t old_eecd;
120     } eecd_state;
121 
122     QEMUTimer *autoneg_timer;
123 
124     QEMUTimer *mit_timer;      /* Mitigation timer. */
125     bool mit_timer_on;         /* Mitigation timer is running. */
126     bool mit_irq_level;        /* Tracks interrupt pin level. */
127     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
128 
129     QEMUTimer *flush_queue_timer;
130 
131 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
132 #define E1000_FLAG_AUTONEG_BIT 0
133 #define E1000_FLAG_MIT_BIT 1
134 #define E1000_FLAG_MAC_BIT 2
135 #define E1000_FLAG_TSO_BIT 3
136 #define E1000_FLAG_VET_BIT 4
137 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
138 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
139 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
140 #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
141 #define E1000_FLAG_VET (1 << E1000_FLAG_VET_BIT)
142 
143     uint32_t compat_flags;
144     bool received_tx_tso;
145     bool use_tso_for_migration;
146     e1000x_txd_props mig_props;
147 };
148 typedef struct E1000State_st E1000State;
149 
150 #define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
151 
152 struct E1000BaseClass {
153     PCIDeviceClass parent_class;
154     uint16_t phy_id2;
155 };
156 typedef struct E1000BaseClass E1000BaseClass;
157 
158 #define TYPE_E1000_BASE "e1000-base"
159 
160 DECLARE_OBJ_CHECKERS(E1000State, E1000BaseClass,
161                      E1000, TYPE_E1000_BASE)
162 
163 
164 static void
165 e1000_link_up(E1000State *s)
166 {
167     e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg);
168 
169     /* E1000_STATUS_LU is tested by e1000_can_receive() */
170     qemu_flush_queued_packets(qemu_get_queue(s->nic));
171 }
172 
173 static void
174 e1000_autoneg_done(E1000State *s)
175 {
176     e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg);
177 
178     /* E1000_STATUS_LU is tested by e1000_can_receive() */
179     qemu_flush_queued_packets(qemu_get_queue(s->nic));
180 }
181 
182 static bool
183 have_autoneg(E1000State *s)
184 {
185     return chkflag(AUTONEG) && (s->phy_reg[MII_BMCR] & MII_BMCR_AUTOEN);
186 }
187 
188 static void
189 set_phy_ctrl(E1000State *s, int index, uint16_t val)
190 {
191     /* bits 0-5 reserved; MII_BMCR_[ANRESTART,RESET] are self clearing */
192     s->phy_reg[MII_BMCR] = val & ~(0x3f |
193                                    MII_BMCR_RESET |
194                                    MII_BMCR_ANRESTART);
195 
196     /*
197      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
198      * migrate during auto negotiation, after migration the link will be
199      * down.
200      */
201     if (have_autoneg(s) && (val & MII_BMCR_ANRESTART)) {
202         e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
203     }
204 }
205 
206 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
207     [MII_BMCR] = set_phy_ctrl,
208 };
209 
210 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
211 
212 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
213 static const char phy_regcap[0x20] = {
214     [MII_BMSR]   = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
215     [MII_PHYID1] = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
216     [MII_BMCR]   = PHY_RW,    [MII_CTRL1000]               = PHY_RW,
217     [MII_ANLPAR] = PHY_R,     [MII_STAT1000]               = PHY_R,
218     [MII_ANAR]   = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
219     [MII_PHYID2] = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
220     [MII_ANER]   = PHY_R,
221 };
222 
223 /* MII_PHYID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
224 static const uint16_t phy_reg_init[] = {
225     [MII_BMCR] = MII_BMCR_SPEED1000 |
226                  MII_BMCR_FD |
227                  MII_BMCR_AUTOEN,
228 
229     [MII_BMSR] = MII_BMSR_EXTCAP |
230                  MII_BMSR_LINK_ST |   /* link initially up */
231                  MII_BMSR_AUTONEG |
232                  /* MII_BMSR_AN_COMP: initially NOT completed */
233                  MII_BMSR_MFPS |
234                  MII_BMSR_EXTSTAT |
235                  MII_BMSR_10T_HD |
236                  MII_BMSR_10T_FD |
237                  MII_BMSR_100TX_HD |
238                  MII_BMSR_100TX_FD,
239 
240     [MII_PHYID1] = 0x141,
241     /* [MII_PHYID2] configured per DevId, from e1000_reset() */
242     [MII_ANAR] = 0xde1,
243     [MII_ANLPAR] = 0x1e0,
244     [MII_CTRL1000] = 0x0e00,
245     [MII_STAT1000] = 0x3c00,
246     [M88E1000_PHY_SPEC_CTRL] = 0x360,
247     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
248     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
249 };
250 
251 static const uint32_t mac_reg_init[] = {
252     [PBA]     = 0x00100030,
253     [LEDCTL]  = 0x602,
254     [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
255                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
256     [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
257                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
258                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
259                 E1000_STATUS_LU,
260     [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
261                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
262                 E1000_MANC_RMCP_EN,
263 };
264 
265 /* Helper function, *curr == 0 means the value is not set */
266 static inline void
267 mit_update_delay(uint32_t *curr, uint32_t value)
268 {
269     if (value && (*curr == 0 || value < *curr)) {
270         *curr = value;
271     }
272 }
273 
274 static void
275 set_interrupt_cause(E1000State *s, int index, uint32_t val)
276 {
277     PCIDevice *d = PCI_DEVICE(s);
278     uint32_t pending_ints;
279     uint32_t mit_delay;
280 
281     s->mac_reg[ICR] = val;
282 
283     /*
284      * Make sure ICR and ICS registers have the same value.
285      * The spec says that the ICS register is write-only.  However in practice,
286      * on real hardware ICS is readable, and for reads it has the same value as
287      * ICR (except that ICS does not have the clear on read behaviour of ICR).
288      *
289      * The VxWorks PRO/1000 driver uses this behaviour.
290      */
291     s->mac_reg[ICS] = val;
292 
293     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
294     if (!s->mit_irq_level && pending_ints) {
295         /*
296          * Here we detect a potential raising edge. We postpone raising the
297          * interrupt line if we are inside the mitigation delay window
298          * (s->mit_timer_on == 1).
299          * We provide a partial implementation of interrupt mitigation,
300          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
301          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
302          * RADV; relative timers based on TIDV and RDTR are not implemented.
303          */
304         if (s->mit_timer_on) {
305             return;
306         }
307         if (chkflag(MIT)) {
308             /* Compute the next mitigation delay according to pending
309              * interrupts and the current values of RADV (provided
310              * RDTR!=0), TADV and ITR.
311              * Then rearm the timer.
312              */
313             mit_delay = 0;
314             if (s->mit_ide &&
315                     (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
316                 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
317             }
318             if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
319                 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
320             }
321             mit_update_delay(&mit_delay, s->mac_reg[ITR]);
322 
323             /*
324              * According to e1000 SPEC, the Ethernet controller guarantees
325              * a maximum observable interrupt rate of 7813 interrupts/sec.
326              * Thus if mit_delay < 500 then the delay should be set to the
327              * minimum delay possible which is 500.
328              */
329             mit_delay = (mit_delay < 500) ? 500 : mit_delay;
330 
331             s->mit_timer_on = 1;
332             timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
333                       mit_delay * 256);
334             s->mit_ide = 0;
335         }
336     }
337 
338     s->mit_irq_level = (pending_ints != 0);
339     pci_set_irq(d, s->mit_irq_level);
340 }
341 
342 static void
343 e1000_mit_timer(void *opaque)
344 {
345     E1000State *s = opaque;
346 
347     s->mit_timer_on = 0;
348     /* Call set_interrupt_cause to update the irq level (if necessary). */
349     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
350 }
351 
352 static void
353 set_ics(E1000State *s, int index, uint32_t val)
354 {
355     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
356         s->mac_reg[IMS]);
357     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
358 }
359 
360 static void
361 e1000_autoneg_timer(void *opaque)
362 {
363     E1000State *s = opaque;
364     if (!qemu_get_queue(s->nic)->link_down) {
365         e1000_autoneg_done(s);
366         set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
367     }
368 }
369 
370 static bool e1000_vet_init_need(void *opaque)
371 {
372     E1000State *s = opaque;
373 
374     return chkflag(VET);
375 }
376 
377 static void e1000_reset(void *opaque)
378 {
379     E1000State *d = opaque;
380     E1000BaseClass *edc = E1000_GET_CLASS(d);
381     uint8_t *macaddr = d->conf.macaddr.a;
382 
383     timer_del(d->autoneg_timer);
384     timer_del(d->mit_timer);
385     timer_del(d->flush_queue_timer);
386     d->mit_timer_on = 0;
387     d->mit_irq_level = 0;
388     d->mit_ide = 0;
389     memset(d->phy_reg, 0, sizeof d->phy_reg);
390     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
391     d->phy_reg[MII_PHYID2] = edc->phy_id2;
392     memset(d->mac_reg, 0, sizeof d->mac_reg);
393     memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
394     d->rxbuf_min_shift = 1;
395     memset(&d->tx, 0, sizeof d->tx);
396 
397     if (qemu_get_queue(d->nic)->link_down) {
398         e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg);
399     }
400 
401     e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
402 
403     if (e1000_vet_init_need(d)) {
404         d->mac_reg[VET] = ETH_P_VLAN;
405     }
406 }
407 
408 static void
409 set_ctrl(E1000State *s, int index, uint32_t val)
410 {
411     /* RST is self clearing */
412     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
413 }
414 
415 static void
416 e1000_flush_queue_timer(void *opaque)
417 {
418     E1000State *s = opaque;
419 
420     qemu_flush_queued_packets(qemu_get_queue(s->nic));
421 }
422 
423 static void
424 set_rx_control(E1000State *s, int index, uint32_t val)
425 {
426     s->mac_reg[RCTL] = val;
427     s->rxbuf_size = e1000x_rxbufsize(val);
428     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
429     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
430            s->mac_reg[RCTL]);
431     timer_mod(s->flush_queue_timer,
432               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
433 }
434 
435 static void
436 set_mdic(E1000State *s, int index, uint32_t val)
437 {
438     uint32_t data = val & E1000_MDIC_DATA_MASK;
439     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
440 
441     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
442         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
443     else if (val & E1000_MDIC_OP_READ) {
444         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
445         if (!(phy_regcap[addr] & PHY_R)) {
446             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
447             val |= E1000_MDIC_ERROR;
448         } else
449             val = (val ^ data) | s->phy_reg[addr];
450     } else if (val & E1000_MDIC_OP_WRITE) {
451         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
452         if (!(phy_regcap[addr] & PHY_W)) {
453             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
454             val |= E1000_MDIC_ERROR;
455         } else {
456             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
457                 phyreg_writeops[addr](s, index, data);
458             } else {
459                 s->phy_reg[addr] = data;
460             }
461         }
462     }
463     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
464 
465     if (val & E1000_MDIC_INT_EN) {
466         set_ics(s, 0, E1000_ICR_MDAC);
467     }
468 }
469 
470 static uint32_t
471 get_eecd(E1000State *s, int index)
472 {
473     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
474 
475     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
476            s->eecd_state.bitnum_out, s->eecd_state.reading);
477     if (!s->eecd_state.reading ||
478         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
479           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
480         ret |= E1000_EECD_DO;
481     return ret;
482 }
483 
484 static void
485 set_eecd(E1000State *s, int index, uint32_t val)
486 {
487     uint32_t oldval = s->eecd_state.old_eecd;
488 
489     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
490             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
491     if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
492         return;
493     }
494     if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
495         s->eecd_state.val_in = 0;
496         s->eecd_state.bitnum_in = 0;
497         s->eecd_state.bitnum_out = 0;
498         s->eecd_state.reading = 0;
499     }
500     if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
501         return;
502     }
503     if (!(E1000_EECD_SK & val)) {               /* falling edge */
504         s->eecd_state.bitnum_out++;
505         return;
506     }
507     s->eecd_state.val_in <<= 1;
508     if (val & E1000_EECD_DI)
509         s->eecd_state.val_in |= 1;
510     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
511         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
512         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
513             EEPROM_READ_OPCODE_MICROWIRE);
514     }
515     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
516            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
517            s->eecd_state.reading);
518 }
519 
520 static uint32_t
521 flash_eerd_read(E1000State *s, int x)
522 {
523     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
524 
525     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
526         return (s->mac_reg[EERD]);
527 
528     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
529         return (E1000_EEPROM_RW_REG_DONE | r);
530 
531     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
532            E1000_EEPROM_RW_REG_DONE | r);
533 }
534 
535 static void
536 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
537 {
538     uint32_t sum;
539 
540     if (cse && cse < n)
541         n = cse + 1;
542     if (sloc < n-1) {
543         sum = net_checksum_add(n-css, data+css);
544         stw_be_p(data + sloc, net_checksum_finish_nozero(sum));
545     }
546 }
547 
548 static inline void
549 inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
550 {
551     if (!memcmp(arr, bcast, sizeof bcast)) {
552         e1000x_inc_reg_if_not_full(s->mac_reg, BPTC);
553     } else if (arr[0] & 1) {
554         e1000x_inc_reg_if_not_full(s->mac_reg, MPTC);
555     }
556 }
557 
558 static void
559 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
560 {
561     static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
562                                     PTC1023, PTC1522 };
563 
564     NetClientState *nc = qemu_get_queue(s->nic);
565     if (s->phy_reg[MII_BMCR] & MII_BMCR_LOOPBACK) {
566         qemu_receive_packet(nc, buf, size);
567     } else {
568         qemu_send_packet(nc, buf, size);
569     }
570     inc_tx_bcast_or_mcast_count(s, buf);
571     e1000x_increase_size_stats(s->mac_reg, PTCregs, size);
572 }
573 
574 static void
575 xmit_seg(E1000State *s)
576 {
577     uint16_t len;
578     unsigned int frames = s->tx.tso_frames, css, sofar;
579     struct e1000_tx *tp = &s->tx;
580     struct e1000x_txd_props *props = tp->cptse ? &tp->tso_props : &tp->props;
581 
582     if (tp->cptse) {
583         css = props->ipcss;
584         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
585                frames, tp->size, css);
586         if (props->ip) {    /* IPv4 */
587             stw_be_p(tp->data+css+2, tp->size - css);
588             stw_be_p(tp->data+css+4,
589                      lduw_be_p(tp->data + css + 4) + frames);
590         } else {         /* IPv6 */
591             stw_be_p(tp->data+css+4, tp->size - css);
592         }
593         css = props->tucss;
594         len = tp->size - css;
595         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", props->tcp, css, len);
596         if (props->tcp) {
597             sofar = frames * props->mss;
598             stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
599             if (props->paylen - sofar > props->mss) {
600                 tp->data[css + 13] &= ~9;    /* PSH, FIN */
601             } else if (frames) {
602                 e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
603             }
604         } else {    /* UDP */
605             stw_be_p(tp->data+css+4, len);
606         }
607         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
608             unsigned int phsum;
609             // add pseudo-header length before checksum calculation
610             void *sp = tp->data + props->tucso;
611 
612             phsum = lduw_be_p(sp) + len;
613             phsum = (phsum >> 16) + (phsum & 0xffff);
614             stw_be_p(sp, phsum);
615         }
616         tp->tso_frames++;
617     }
618 
619     if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
620         putsum(tp->data, tp->size, props->tucso, props->tucss, props->tucse);
621     }
622     if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
623         putsum(tp->data, tp->size, props->ipcso, props->ipcss, props->ipcse);
624     }
625     if (tp->vlan_needed) {
626         memmove(tp->vlan, tp->data, 4);
627         memmove(tp->data, tp->data + 4, 8);
628         memcpy(tp->data + 8, tp->vlan_header, 4);
629         e1000_send_packet(s, tp->vlan, tp->size + 4);
630     } else {
631         e1000_send_packet(s, tp->data, tp->size);
632     }
633 
634     e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
635     e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size);
636     s->mac_reg[GPTC] = s->mac_reg[TPT];
637     s->mac_reg[GOTCL] = s->mac_reg[TOTL];
638     s->mac_reg[GOTCH] = s->mac_reg[TOTH];
639 }
640 
641 static void
642 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
643 {
644     PCIDevice *d = PCI_DEVICE(s);
645     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
646     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
647     unsigned int split_size = txd_lower & 0xffff, bytes, sz;
648     unsigned int msh = 0xfffff;
649     uint64_t addr;
650     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
651     struct e1000_tx *tp = &s->tx;
652 
653     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
654     if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
655         if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
656             e1000x_read_tx_ctx_descr(xp, &tp->tso_props);
657             s->use_tso_for_migration = 1;
658             tp->tso_frames = 0;
659         } else {
660             e1000x_read_tx_ctx_descr(xp, &tp->props);
661             s->use_tso_for_migration = 0;
662         }
663         return;
664     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
665         // data descriptor
666         if (tp->size == 0) {
667             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
668         }
669         tp->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
670     } else {
671         // legacy descriptor
672         tp->cptse = 0;
673     }
674 
675     if (e1000x_vlan_enabled(s->mac_reg) &&
676         e1000x_is_vlan_txd(txd_lower) &&
677         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
678         tp->vlan_needed = 1;
679         stw_be_p(tp->vlan_header,
680                       le16_to_cpu(s->mac_reg[VET]));
681         stw_be_p(tp->vlan_header + 2,
682                       le16_to_cpu(dp->upper.fields.special));
683     }
684 
685     addr = le64_to_cpu(dp->buffer_addr);
686     if (tp->cptse) {
687         msh = tp->tso_props.hdr_len + tp->tso_props.mss;
688         do {
689             bytes = split_size;
690             if (tp->size >= msh) {
691                 goto eop;
692             }
693             if (tp->size + bytes > msh)
694                 bytes = msh - tp->size;
695 
696             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
697             pci_dma_read(d, addr, tp->data + tp->size, bytes);
698             sz = tp->size + bytes;
699             if (sz >= tp->tso_props.hdr_len
700                 && tp->size < tp->tso_props.hdr_len) {
701                 memmove(tp->header, tp->data, tp->tso_props.hdr_len);
702             }
703             tp->size = sz;
704             addr += bytes;
705             if (sz == msh) {
706                 xmit_seg(s);
707                 memmove(tp->data, tp->header, tp->tso_props.hdr_len);
708                 tp->size = tp->tso_props.hdr_len;
709             }
710             split_size -= bytes;
711         } while (bytes && split_size);
712     } else {
713         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
714         pci_dma_read(d, addr, tp->data + tp->size, split_size);
715         tp->size += split_size;
716     }
717 
718 eop:
719     if (!(txd_lower & E1000_TXD_CMD_EOP))
720         return;
721     if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) {
722         xmit_seg(s);
723     }
724     tp->tso_frames = 0;
725     tp->sum_needed = 0;
726     tp->vlan_needed = 0;
727     tp->size = 0;
728     tp->cptse = 0;
729 }
730 
731 static uint32_t
732 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
733 {
734     PCIDevice *d = PCI_DEVICE(s);
735     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
736 
737     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
738         return 0;
739     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
740                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
741     dp->upper.data = cpu_to_le32(txd_upper);
742     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
743                   &dp->upper, sizeof(dp->upper));
744     return E1000_ICR_TXDW;
745 }
746 
747 static uint64_t tx_desc_base(E1000State *s)
748 {
749     uint64_t bah = s->mac_reg[TDBAH];
750     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
751 
752     return (bah << 32) + bal;
753 }
754 
755 static void
756 start_xmit(E1000State *s)
757 {
758     PCIDevice *d = PCI_DEVICE(s);
759     dma_addr_t base;
760     struct e1000_tx_desc desc;
761     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
762 
763     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
764         DBGOUT(TX, "tx disabled\n");
765         return;
766     }
767 
768     if (s->tx.busy) {
769         return;
770     }
771     s->tx.busy = true;
772 
773     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
774         base = tx_desc_base(s) +
775                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
776         pci_dma_read(d, base, &desc, sizeof(desc));
777 
778         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
779                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
780                desc.upper.data);
781 
782         process_tx_desc(s, &desc);
783         cause |= txdesc_writeback(s, base, &desc);
784 
785         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
786             s->mac_reg[TDH] = 0;
787         /*
788          * the following could happen only if guest sw assigns
789          * bogus values to TDT/TDLEN.
790          * there's nothing too intelligent we could do about this.
791          */
792         if (s->mac_reg[TDH] == tdh_start ||
793             tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
794             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
795                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
796             break;
797         }
798     }
799     s->tx.busy = false;
800     set_ics(s, 0, cause);
801 }
802 
803 static int
804 receive_filter(E1000State *s, const uint8_t *buf, int size)
805 {
806     uint32_t rctl = s->mac_reg[RCTL];
807     int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1);
808 
809     if (e1000x_is_vlan_packet(buf, le16_to_cpu(s->mac_reg[VET])) &&
810         e1000x_vlan_rx_filter_enabled(s->mac_reg)) {
811         uint16_t vid = lduw_be_p(buf + 14);
812         uint32_t vfta = ldl_le_p((uint32_t *)(s->mac_reg + VFTA) +
813                                  ((vid >> 5) & 0x7f));
814         if ((vfta & (1 << (vid & 0x1f))) == 0) {
815             return 0;
816         }
817     }
818 
819     if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
820         return 1;
821     }
822 
823     if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
824         e1000x_inc_reg_if_not_full(s->mac_reg, MPRC);
825         return 1;
826     }
827 
828     if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
829         e1000x_inc_reg_if_not_full(s->mac_reg, BPRC);
830         return 1;
831     }
832 
833     return e1000x_rx_group_filter(s->mac_reg, buf);
834 }
835 
836 static void
837 e1000_set_link_status(NetClientState *nc)
838 {
839     E1000State *s = qemu_get_nic_opaque(nc);
840     uint32_t old_status = s->mac_reg[STATUS];
841 
842     if (nc->link_down) {
843         e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg);
844     } else {
845         if (have_autoneg(s) &&
846             !(s->phy_reg[MII_BMSR] & MII_BMSR_AN_COMP)) {
847             e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
848         } else {
849             e1000_link_up(s);
850         }
851     }
852 
853     if (s->mac_reg[STATUS] != old_status)
854         set_ics(s, 0, E1000_ICR_LSC);
855 }
856 
857 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
858 {
859     int bufs;
860     /* Fast-path short packets */
861     if (total_size <= s->rxbuf_size) {
862         return s->mac_reg[RDH] != s->mac_reg[RDT];
863     }
864     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
865         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
866     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
867         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
868             s->mac_reg[RDT] - s->mac_reg[RDH];
869     } else {
870         return false;
871     }
872     return total_size <= bufs * s->rxbuf_size;
873 }
874 
875 static bool
876 e1000_can_receive(NetClientState *nc)
877 {
878     E1000State *s = qemu_get_nic_opaque(nc);
879 
880     return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
881         e1000_has_rxbufs(s, 1) && !timer_pending(s->flush_queue_timer);
882 }
883 
884 static uint64_t rx_desc_base(E1000State *s)
885 {
886     uint64_t bah = s->mac_reg[RDBAH];
887     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
888 
889     return (bah << 32) + bal;
890 }
891 
892 static void
893 e1000_receiver_overrun(E1000State *s, size_t size)
894 {
895     trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
896     e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
897     e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
898     set_ics(s, 0, E1000_ICS_RXO);
899 }
900 
901 static ssize_t
902 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
903 {
904     E1000State *s = qemu_get_nic_opaque(nc);
905     PCIDevice *d = PCI_DEVICE(s);
906     struct e1000_rx_desc desc;
907     dma_addr_t base;
908     unsigned int n, rdt;
909     uint32_t rdh_start;
910     uint16_t vlan_special = 0;
911     uint8_t vlan_status = 0;
912     uint8_t min_buf[MIN_BUF_SIZE];
913     struct iovec min_iov;
914     uint8_t *filter_buf = iov->iov_base;
915     size_t size = iov_size(iov, iovcnt);
916     size_t iov_ofs = 0;
917     size_t desc_offset;
918     size_t desc_size;
919     size_t total_size;
920 
921     if (!e1000x_hw_rx_enabled(s->mac_reg)) {
922         return -1;
923     }
924 
925     if (timer_pending(s->flush_queue_timer)) {
926         return 0;
927     }
928 
929     /* Pad to minimum Ethernet frame length */
930     if (size < sizeof(min_buf)) {
931         iov_to_buf(iov, iovcnt, 0, min_buf, size);
932         memset(&min_buf[size], 0, sizeof(min_buf) - size);
933         min_iov.iov_base = filter_buf = min_buf;
934         min_iov.iov_len = size = sizeof(min_buf);
935         iovcnt = 1;
936         iov = &min_iov;
937     } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
938         /* This is very unlikely, but may happen. */
939         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
940         filter_buf = min_buf;
941     }
942 
943     /* Discard oversized packets if !LPE and !SBP. */
944     if (e1000x_is_oversized(s->mac_reg, size)) {
945         return size;
946     }
947 
948     if (!receive_filter(s, filter_buf, size)) {
949         return size;
950     }
951 
952     if (e1000x_vlan_enabled(s->mac_reg) &&
953         e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) {
954         vlan_special = cpu_to_le16(lduw_be_p(filter_buf + 14));
955         iov_ofs = 4;
956         if (filter_buf == iov->iov_base) {
957             memmove(filter_buf + 4, filter_buf, 12);
958         } else {
959             iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
960             while (iov->iov_len <= iov_ofs) {
961                 iov_ofs -= iov->iov_len;
962                 iov++;
963             }
964         }
965         vlan_status = E1000_RXD_STAT_VP;
966         size -= 4;
967     }
968 
969     rdh_start = s->mac_reg[RDH];
970     desc_offset = 0;
971     total_size = size + e1000x_fcs_len(s->mac_reg);
972     if (!e1000_has_rxbufs(s, total_size)) {
973         e1000_receiver_overrun(s, total_size);
974         return -1;
975     }
976     do {
977         desc_size = total_size - desc_offset;
978         if (desc_size > s->rxbuf_size) {
979             desc_size = s->rxbuf_size;
980         }
981         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
982         pci_dma_read(d, base, &desc, sizeof(desc));
983         desc.special = vlan_special;
984         desc.status &= ~E1000_RXD_STAT_DD;
985         if (desc.buffer_addr) {
986             if (desc_offset < size) {
987                 size_t iov_copy;
988                 hwaddr ba = le64_to_cpu(desc.buffer_addr);
989                 size_t copy_size = size - desc_offset;
990                 if (copy_size > s->rxbuf_size) {
991                     copy_size = s->rxbuf_size;
992                 }
993                 do {
994                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
995                     pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
996                     copy_size -= iov_copy;
997                     ba += iov_copy;
998                     iov_ofs += iov_copy;
999                     if (iov_ofs == iov->iov_len) {
1000                         iov++;
1001                         iov_ofs = 0;
1002                     }
1003                 } while (copy_size);
1004             }
1005             desc_offset += desc_size;
1006             desc.length = cpu_to_le16(desc_size);
1007             if (desc_offset >= total_size) {
1008                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1009             } else {
1010                 /* Guest zeroing out status is not a hardware requirement.
1011                    Clear EOP in case guest didn't do it. */
1012                 desc.status &= ~E1000_RXD_STAT_EOP;
1013             }
1014         } else { // as per intel docs; skip descriptors with null buf addr
1015             DBGOUT(RX, "Null RX descriptor!!\n");
1016         }
1017         pci_dma_write(d, base, &desc, sizeof(desc));
1018         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
1019         pci_dma_write(d, base + offsetof(struct e1000_rx_desc, status),
1020                       &desc.status, sizeof(desc.status));
1021 
1022         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1023             s->mac_reg[RDH] = 0;
1024         /* see comment in start_xmit; same here */
1025         if (s->mac_reg[RDH] == rdh_start ||
1026             rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
1027             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1028                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1029             e1000_receiver_overrun(s, total_size);
1030             return -1;
1031         }
1032     } while (desc_offset < total_size);
1033 
1034     e1000x_update_rx_total_stats(s->mac_reg, size, total_size);
1035 
1036     n = E1000_ICS_RXT0;
1037     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1038         rdt += s->mac_reg[RDLEN] / sizeof(desc);
1039     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1040         s->rxbuf_min_shift)
1041         n |= E1000_ICS_RXDMT0;
1042 
1043     set_ics(s, 0, n);
1044 
1045     return size;
1046 }
1047 
1048 static ssize_t
1049 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1050 {
1051     const struct iovec iov = {
1052         .iov_base = (uint8_t *)buf,
1053         .iov_len = size
1054     };
1055 
1056     return e1000_receive_iov(nc, &iov, 1);
1057 }
1058 
1059 static uint32_t
1060 mac_readreg(E1000State *s, int index)
1061 {
1062     return s->mac_reg[index];
1063 }
1064 
1065 static uint32_t
1066 mac_icr_read(E1000State *s, int index)
1067 {
1068     uint32_t ret = s->mac_reg[ICR];
1069 
1070     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1071     set_interrupt_cause(s, 0, 0);
1072     return ret;
1073 }
1074 
1075 static uint32_t
1076 mac_read_clr4(E1000State *s, int index)
1077 {
1078     uint32_t ret = s->mac_reg[index];
1079 
1080     s->mac_reg[index] = 0;
1081     return ret;
1082 }
1083 
1084 static uint32_t
1085 mac_read_clr8(E1000State *s, int index)
1086 {
1087     uint32_t ret = s->mac_reg[index];
1088 
1089     s->mac_reg[index] = 0;
1090     s->mac_reg[index-1] = 0;
1091     return ret;
1092 }
1093 
1094 static void
1095 mac_writereg(E1000State *s, int index, uint32_t val)
1096 {
1097     uint32_t macaddr[2];
1098 
1099     s->mac_reg[index] = val;
1100 
1101     if (index == RA + 1) {
1102         macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1103         macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1104         qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1105     }
1106 }
1107 
1108 static void
1109 set_rdt(E1000State *s, int index, uint32_t val)
1110 {
1111     s->mac_reg[index] = val & 0xffff;
1112     if (e1000_has_rxbufs(s, 1)) {
1113         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1114     }
1115 }
1116 
1117 #define LOW_BITS_SET_FUNC(num)                             \
1118     static void                                            \
1119     set_##num##bit(E1000State *s, int index, uint32_t val) \
1120     {                                                      \
1121         s->mac_reg[index] = val & (BIT(num) - 1);          \
1122     }
1123 
1124 LOW_BITS_SET_FUNC(4)
1125 LOW_BITS_SET_FUNC(11)
1126 LOW_BITS_SET_FUNC(13)
1127 LOW_BITS_SET_FUNC(16)
1128 
1129 static void
1130 set_dlen(E1000State *s, int index, uint32_t val)
1131 {
1132     s->mac_reg[index] = val & 0xfff80;
1133 }
1134 
1135 static void
1136 set_tctl(E1000State *s, int index, uint32_t val)
1137 {
1138     s->mac_reg[index] = val;
1139     s->mac_reg[TDT] &= 0xffff;
1140     start_xmit(s);
1141 }
1142 
1143 static void
1144 set_icr(E1000State *s, int index, uint32_t val)
1145 {
1146     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1147     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1148 }
1149 
1150 static void
1151 set_imc(E1000State *s, int index, uint32_t val)
1152 {
1153     s->mac_reg[IMS] &= ~val;
1154     set_ics(s, 0, 0);
1155 }
1156 
1157 static void
1158 set_ims(E1000State *s, int index, uint32_t val)
1159 {
1160     s->mac_reg[IMS] |= val;
1161     set_ics(s, 0, 0);
1162 }
1163 
1164 #define getreg(x)    [x] = mac_readreg
1165 typedef uint32_t (*readops)(E1000State *, int);
1166 static const readops macreg_readops[] = {
1167     getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1168     getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1169     getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1170     getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1171     getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1172     getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1173     getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1174     getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1175     getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1176     getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1177     getreg(TNCRS),    getreg(SEQEC),    getreg(CEXTERR),  getreg(RLEC),
1178     getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1179     getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1180     getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1181     getreg(GOTCL),    getreg(RDFH),     getreg(RDFT),     getreg(RDFHS),
1182     getreg(RDFTS),    getreg(RDFPC),    getreg(TDFH),     getreg(TDFT),
1183     getreg(TDFHS),    getreg(TDFTS),    getreg(TDFPC),    getreg(AIT),
1184 
1185     [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1186     [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1187     [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1188     [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1189     [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1190     [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1191     [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1192     [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1193     [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1194     [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1195     [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1196     [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1197     [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1198     [MPTC]    = mac_read_clr4,
1199     [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1200     [EERD]    = flash_eerd_read,
1201 
1202     [CRCERRS ... MPC]     = &mac_readreg,
1203     [IP6AT ... IP6AT + 3] = &mac_readreg,    [IP4AT ... IP4AT + 6] = &mac_readreg,
1204     [FFLT ... FFLT + 6]   = &mac_readreg,
1205     [RA ... RA + 31]      = &mac_readreg,
1206     [WUPM ... WUPM + 31]  = &mac_readreg,
1207     [MTA ... MTA + 127]   = &mac_readreg,
1208     [VFTA ... VFTA + 127] = &mac_readreg,
1209     [FFMT ... FFMT + 254] = &mac_readreg,
1210     [FFVT ... FFVT + 254] = &mac_readreg,
1211     [PBM ... PBM + 16383] = &mac_readreg,
1212 };
1213 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1214 
1215 #define putreg(x)    [x] = mac_writereg
1216 typedef void (*writeops)(E1000State *, int, uint32_t);
1217 static const writeops macreg_writeops[] = {
1218     putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1219     putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1220     putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1221     putreg(IPAV),     putreg(WUC),
1222     putreg(WUS),
1223 
1224     [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL]  = set_tctl,
1225     [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]   = set_ics,
1226     [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]   = set_rdt,
1227     [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]   = set_icr,
1228     [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL]  = set_ctrl,
1229     [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV]  = set_16bit,
1230     [ITR]    = set_16bit,  [TDFH]   = set_11bit,      [TDFT]  = set_11bit,
1231     [TDFHS]  = set_13bit,  [TDFTS]  = set_13bit,      [TDFPC] = set_13bit,
1232     [RDFH]   = set_13bit,  [RDFT]   = set_13bit,      [RDFHS] = set_13bit,
1233     [RDFTS]  = set_13bit,  [RDFPC]  = set_13bit,      [AIT]   = set_16bit,
1234 
1235     [IP6AT ... IP6AT + 3] = &mac_writereg, [IP4AT ... IP4AT + 6] = &mac_writereg,
1236     [FFLT ... FFLT + 6]   = &set_11bit,
1237     [RA ... RA + 31]      = &mac_writereg,
1238     [WUPM ... WUPM + 31]  = &mac_writereg,
1239     [MTA ... MTA + 127]   = &mac_writereg,
1240     [VFTA ... VFTA + 127] = &mac_writereg,
1241     [FFMT ... FFMT + 254] = &set_4bit,     [FFVT ... FFVT + 254] = &mac_writereg,
1242     [PBM ... PBM + 16383] = &mac_writereg,
1243 };
1244 
1245 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1246 
1247 enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1248 
1249 #define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1250 /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1251  * f - flag bits (up to 6 possible flags)
1252  * n - flag needed
1253  * p - partially implenented */
1254 static const uint8_t mac_reg_access[0x8000] = {
1255     [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1256     [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1257 
1258     [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1259     [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1260     [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1261     [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1262     [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1263     [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1264     [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1265     [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1266     [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1267     [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1268     [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1269     [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1270     [SEQEC]   = markflag(MAC),    [CEXTERR] = markflag(MAC),
1271     [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1272     [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1273     [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1274     [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1275     [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1276     [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1277     [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1278     [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1279     [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1280     [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1281     [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1282     [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1283     [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1284     [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1285     [BPTC]    = markflag(MAC),
1286 
1287     [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1288     [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1289     [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1290     [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1291     [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1292     [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1293     [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1294     [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1295     [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1296     [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1297     [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1298 };
1299 
1300 static void
1301 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1302                  unsigned size)
1303 {
1304     E1000State *s = opaque;
1305     unsigned int index = (addr & 0x1ffff) >> 2;
1306 
1307     if (index < NWRITEOPS && macreg_writeops[index]) {
1308         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1309             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1310             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1311                 DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1312                        "It is not fully implemented.\n", index<<2);
1313             }
1314             macreg_writeops[index](s, index, val);
1315         } else {    /* "flag needed" bit is set, but the flag is not active */
1316             DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1317                    index<<2);
1318         }
1319     } else if (index < NREADOPS && macreg_readops[index]) {
1320         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1321                index<<2, val);
1322     } else {
1323         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1324                index<<2, val);
1325     }
1326 }
1327 
1328 static uint64_t
1329 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1330 {
1331     E1000State *s = opaque;
1332     unsigned int index = (addr & 0x1ffff) >> 2;
1333 
1334     if (index < NREADOPS && macreg_readops[index]) {
1335         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1336             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1337             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1338                 DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1339                        "It is not fully implemented.\n", index<<2);
1340             }
1341             return macreg_readops[index](s, index);
1342         } else {    /* "flag needed" bit is set, but the flag is not active */
1343             DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1344                    index<<2);
1345         }
1346     } else {
1347         DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1348     }
1349     return 0;
1350 }
1351 
1352 static const MemoryRegionOps e1000_mmio_ops = {
1353     .read = e1000_mmio_read,
1354     .write = e1000_mmio_write,
1355     .endianness = DEVICE_LITTLE_ENDIAN,
1356     .impl = {
1357         .min_access_size = 4,
1358         .max_access_size = 4,
1359     },
1360 };
1361 
1362 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1363                               unsigned size)
1364 {
1365     E1000State *s = opaque;
1366 
1367     (void)s;
1368     return 0;
1369 }
1370 
1371 static void e1000_io_write(void *opaque, hwaddr addr,
1372                            uint64_t val, unsigned size)
1373 {
1374     E1000State *s = opaque;
1375 
1376     (void)s;
1377 }
1378 
1379 static const MemoryRegionOps e1000_io_ops = {
1380     .read = e1000_io_read,
1381     .write = e1000_io_write,
1382     .endianness = DEVICE_LITTLE_ENDIAN,
1383 };
1384 
1385 static bool is_version_1(void *opaque, int version_id)
1386 {
1387     return version_id == 1;
1388 }
1389 
1390 static int e1000_pre_save(void *opaque)
1391 {
1392     E1000State *s = opaque;
1393     NetClientState *nc = qemu_get_queue(s->nic);
1394 
1395     /*
1396      * If link is down and auto-negotiation is supported and ongoing,
1397      * complete auto-negotiation immediately. This allows us to look
1398      * at MII_BMSR_AN_COMP to infer link status on load.
1399      */
1400     if (nc->link_down && have_autoneg(s)) {
1401         s->phy_reg[MII_BMSR] |= MII_BMSR_AN_COMP;
1402     }
1403 
1404     /* Decide which set of props to migrate in the main structure */
1405     if (chkflag(TSO) || !s->use_tso_for_migration) {
1406         /* Either we're migrating with the extra subsection, in which
1407          * case the mig_props is always 'props' OR
1408          * we've not got the subsection, but 'props' was the last
1409          * updated.
1410          */
1411         s->mig_props = s->tx.props;
1412     } else {
1413         /* We're not using the subsection, and 'tso_props' was
1414          * the last updated.
1415          */
1416         s->mig_props = s->tx.tso_props;
1417     }
1418     return 0;
1419 }
1420 
1421 static int e1000_post_load(void *opaque, int version_id)
1422 {
1423     E1000State *s = opaque;
1424     NetClientState *nc = qemu_get_queue(s->nic);
1425 
1426     if (!chkflag(MIT)) {
1427         s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1428             s->mac_reg[TADV] = 0;
1429         s->mit_irq_level = false;
1430     }
1431     s->mit_ide = 0;
1432     s->mit_timer_on = true;
1433     timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1);
1434 
1435     /* nc.link_down can't be migrated, so infer link_down according
1436      * to link status bit in mac_reg[STATUS].
1437      * Alternatively, restart link negotiation if it was in progress. */
1438     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1439 
1440     if (have_autoneg(s) && !(s->phy_reg[MII_BMSR] & MII_BMSR_AN_COMP)) {
1441         nc->link_down = false;
1442         timer_mod(s->autoneg_timer,
1443                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1444     }
1445 
1446     s->tx.props = s->mig_props;
1447     if (!s->received_tx_tso) {
1448         /* We received only one set of offload data (tx.props)
1449          * and haven't got tx.tso_props.  The best we can do
1450          * is dupe the data.
1451          */
1452         s->tx.tso_props = s->mig_props;
1453     }
1454     return 0;
1455 }
1456 
1457 static int e1000_tx_tso_post_load(void *opaque, int version_id)
1458 {
1459     E1000State *s = opaque;
1460     s->received_tx_tso = true;
1461     return 0;
1462 }
1463 
1464 static bool e1000_mit_state_needed(void *opaque)
1465 {
1466     E1000State *s = opaque;
1467 
1468     return chkflag(MIT);
1469 }
1470 
1471 static bool e1000_full_mac_needed(void *opaque)
1472 {
1473     E1000State *s = opaque;
1474 
1475     return chkflag(MAC);
1476 }
1477 
1478 static bool e1000_tso_state_needed(void *opaque)
1479 {
1480     E1000State *s = opaque;
1481 
1482     return chkflag(TSO);
1483 }
1484 
1485 static const VMStateDescription vmstate_e1000_mit_state = {
1486     .name = "e1000/mit_state",
1487     .version_id = 1,
1488     .minimum_version_id = 1,
1489     .needed = e1000_mit_state_needed,
1490     .fields = (VMStateField[]) {
1491         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1492         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1493         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1494         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1495         VMSTATE_BOOL(mit_irq_level, E1000State),
1496         VMSTATE_END_OF_LIST()
1497     }
1498 };
1499 
1500 static const VMStateDescription vmstate_e1000_full_mac_state = {
1501     .name = "e1000/full_mac_state",
1502     .version_id = 1,
1503     .minimum_version_id = 1,
1504     .needed = e1000_full_mac_needed,
1505     .fields = (VMStateField[]) {
1506         VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1507         VMSTATE_END_OF_LIST()
1508     }
1509 };
1510 
1511 static const VMStateDescription vmstate_e1000_tx_tso_state = {
1512     .name = "e1000/tx_tso_state",
1513     .version_id = 1,
1514     .minimum_version_id = 1,
1515     .needed = e1000_tso_state_needed,
1516     .post_load = e1000_tx_tso_post_load,
1517     .fields = (VMStateField[]) {
1518         VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
1519         VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
1520         VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
1521         VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
1522         VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
1523         VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
1524         VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
1525         VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
1526         VMSTATE_UINT16(tx.tso_props.mss, E1000State),
1527         VMSTATE_INT8(tx.tso_props.ip, E1000State),
1528         VMSTATE_INT8(tx.tso_props.tcp, E1000State),
1529         VMSTATE_END_OF_LIST()
1530     }
1531 };
1532 
1533 static const VMStateDescription vmstate_e1000 = {
1534     .name = "e1000",
1535     .version_id = 2,
1536     .minimum_version_id = 1,
1537     .pre_save = e1000_pre_save,
1538     .post_load = e1000_post_load,
1539     .fields = (VMStateField[]) {
1540         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1541         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1542         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1543         VMSTATE_UINT32(rxbuf_size, E1000State),
1544         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1545         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1546         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1547         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1548         VMSTATE_UINT16(eecd_state.reading, E1000State),
1549         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1550         VMSTATE_UINT8(mig_props.ipcss, E1000State),
1551         VMSTATE_UINT8(mig_props.ipcso, E1000State),
1552         VMSTATE_UINT16(mig_props.ipcse, E1000State),
1553         VMSTATE_UINT8(mig_props.tucss, E1000State),
1554         VMSTATE_UINT8(mig_props.tucso, E1000State),
1555         VMSTATE_UINT16(mig_props.tucse, E1000State),
1556         VMSTATE_UINT32(mig_props.paylen, E1000State),
1557         VMSTATE_UINT8(mig_props.hdr_len, E1000State),
1558         VMSTATE_UINT16(mig_props.mss, E1000State),
1559         VMSTATE_UINT16(tx.size, E1000State),
1560         VMSTATE_UINT16(tx.tso_frames, E1000State),
1561         VMSTATE_UINT8(tx.sum_needed, E1000State),
1562         VMSTATE_INT8(mig_props.ip, E1000State),
1563         VMSTATE_INT8(mig_props.tcp, E1000State),
1564         VMSTATE_BUFFER(tx.header, E1000State),
1565         VMSTATE_BUFFER(tx.data, E1000State),
1566         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1567         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1568         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1569         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1570         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1571         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1572         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1573         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1574         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1575         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1576         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1577         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1578         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1579         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1580         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1581         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1582         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1583         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1584         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1585         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1586         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1587         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1588         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1589         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1590         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1591         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1592         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1593         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1594         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1595         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1596         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1597         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1598         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1599         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1600         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1601         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1602         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1603         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1604         VMSTATE_UINT32(mac_reg[VET], E1000State),
1605         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1606         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1607         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1608         VMSTATE_END_OF_LIST()
1609     },
1610     .subsections = (const VMStateDescription*[]) {
1611         &vmstate_e1000_mit_state,
1612         &vmstate_e1000_full_mac_state,
1613         &vmstate_e1000_tx_tso_state,
1614         NULL
1615     }
1616 };
1617 
1618 /*
1619  * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1620  * Note: A valid DevId will be inserted during pci_e1000_realize().
1621  */
1622 static const uint16_t e1000_eeprom_template[64] = {
1623     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1624     0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1625     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1626     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1627     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1628     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1629     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1630     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1631 };
1632 
1633 /* PCI interface */
1634 
1635 static void
1636 e1000_mmio_setup(E1000State *d)
1637 {
1638     int i;
1639     const uint32_t excluded_regs[] = {
1640         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1641         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1642     };
1643 
1644     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1645                           "e1000-mmio", PNPMMIO_SIZE);
1646     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1647     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1648         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1649                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1650     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1651 }
1652 
1653 static void
1654 pci_e1000_uninit(PCIDevice *dev)
1655 {
1656     E1000State *d = E1000(dev);
1657 
1658     timer_free(d->autoneg_timer);
1659     timer_free(d->mit_timer);
1660     timer_free(d->flush_queue_timer);
1661     qemu_del_nic(d->nic);
1662 }
1663 
1664 static NetClientInfo net_e1000_info = {
1665     .type = NET_CLIENT_DRIVER_NIC,
1666     .size = sizeof(NICState),
1667     .can_receive = e1000_can_receive,
1668     .receive = e1000_receive,
1669     .receive_iov = e1000_receive_iov,
1670     .link_status_changed = e1000_set_link_status,
1671 };
1672 
1673 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1674                                 uint32_t val, int len)
1675 {
1676     E1000State *s = E1000(pci_dev);
1677 
1678     pci_default_write_config(pci_dev, address, val, len);
1679 
1680     if (range_covers_byte(address, len, PCI_COMMAND) &&
1681         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1682         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1683     }
1684 }
1685 
1686 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1687 {
1688     DeviceState *dev = DEVICE(pci_dev);
1689     E1000State *d = E1000(pci_dev);
1690     uint8_t *pci_conf;
1691     uint8_t *macaddr;
1692 
1693     pci_dev->config_write = e1000_write_config;
1694 
1695     pci_conf = pci_dev->config;
1696 
1697     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1698     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1699 
1700     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1701 
1702     e1000_mmio_setup(d);
1703 
1704     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1705 
1706     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1707 
1708     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1709     macaddr = d->conf.macaddr.a;
1710 
1711     e1000x_core_prepare_eeprom(d->eeprom_data,
1712                                e1000_eeprom_template,
1713                                sizeof(e1000_eeprom_template),
1714                                PCI_DEVICE_GET_CLASS(pci_dev)->device_id,
1715                                macaddr);
1716 
1717     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1718                           object_get_typename(OBJECT(d)), dev->id, d);
1719 
1720     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1721 
1722     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1723     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1724     d->flush_queue_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
1725                                         e1000_flush_queue_timer, d);
1726 }
1727 
1728 static void qdev_e1000_reset(DeviceState *dev)
1729 {
1730     E1000State *d = E1000(dev);
1731     e1000_reset(d);
1732 }
1733 
1734 static Property e1000_properties[] = {
1735     DEFINE_NIC_PROPERTIES(E1000State, conf),
1736     DEFINE_PROP_BIT("autonegotiation", E1000State,
1737                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1738     DEFINE_PROP_BIT("mitigation", E1000State,
1739                     compat_flags, E1000_FLAG_MIT_BIT, true),
1740     DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1741                     compat_flags, E1000_FLAG_MAC_BIT, true),
1742     DEFINE_PROP_BIT("migrate_tso_props", E1000State,
1743                     compat_flags, E1000_FLAG_TSO_BIT, true),
1744     DEFINE_PROP_BIT("init-vet", E1000State,
1745                     compat_flags, E1000_FLAG_VET_BIT, true),
1746     DEFINE_PROP_END_OF_LIST(),
1747 };
1748 
1749 typedef struct E1000Info {
1750     const char *name;
1751     uint16_t   device_id;
1752     uint8_t    revision;
1753     uint16_t   phy_id2;
1754 } E1000Info;
1755 
1756 static void e1000_class_init(ObjectClass *klass, void *data)
1757 {
1758     DeviceClass *dc = DEVICE_CLASS(klass);
1759     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1760     E1000BaseClass *e = E1000_CLASS(klass);
1761     const E1000Info *info = data;
1762 
1763     k->realize = pci_e1000_realize;
1764     k->exit = pci_e1000_uninit;
1765     k->romfile = "efi-e1000.rom";
1766     k->vendor_id = PCI_VENDOR_ID_INTEL;
1767     k->device_id = info->device_id;
1768     k->revision = info->revision;
1769     e->phy_id2 = info->phy_id2;
1770     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1771     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1772     dc->desc = "Intel Gigabit Ethernet";
1773     dc->reset = qdev_e1000_reset;
1774     dc->vmsd = &vmstate_e1000;
1775     device_class_set_props(dc, e1000_properties);
1776 }
1777 
1778 static void e1000_instance_init(Object *obj)
1779 {
1780     E1000State *n = E1000(obj);
1781     device_add_bootindex_property(obj, &n->conf.bootindex,
1782                                   "bootindex", "/ethernet-phy@0",
1783                                   DEVICE(n));
1784 }
1785 
1786 static const TypeInfo e1000_base_info = {
1787     .name          = TYPE_E1000_BASE,
1788     .parent        = TYPE_PCI_DEVICE,
1789     .instance_size = sizeof(E1000State),
1790     .instance_init = e1000_instance_init,
1791     .class_size    = sizeof(E1000BaseClass),
1792     .abstract      = true,
1793     .interfaces = (InterfaceInfo[]) {
1794         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1795         { },
1796     },
1797 };
1798 
1799 static const E1000Info e1000_devices[] = {
1800     {
1801         .name      = "e1000",
1802         .device_id = E1000_DEV_ID_82540EM,
1803         .revision  = 0x03,
1804         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1805     },
1806     {
1807         .name      = "e1000-82544gc",
1808         .device_id = E1000_DEV_ID_82544GC_COPPER,
1809         .revision  = 0x03,
1810         .phy_id2   = E1000_PHY_ID2_82544x,
1811     },
1812     {
1813         .name      = "e1000-82545em",
1814         .device_id = E1000_DEV_ID_82545EM_COPPER,
1815         .revision  = 0x03,
1816         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1817     },
1818 };
1819 
1820 static void e1000_register_types(void)
1821 {
1822     int i;
1823 
1824     type_register_static(&e1000_base_info);
1825     for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1826         const E1000Info *info = &e1000_devices[i];
1827         TypeInfo type_info = {};
1828 
1829         type_info.name = info->name;
1830         type_info.parent = TYPE_E1000_BASE;
1831         type_info.class_data = (void *)info;
1832         type_info.class_init = e1000_class_init;
1833 
1834         type_register(&type_info);
1835     }
1836 }
1837 
1838 type_init(e1000_register_types)
1839