xref: /openbmc/qemu/hw/net/e1000.c (revision 63e7af2035242dda6e2460f4eadbbe6f58c67614)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2.1 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "qemu/osdep.h"
29 #include "hw/net/mii.h"
30 #include "hw/pci/pci_device.h"
31 #include "hw/qdev-properties.h"
32 #include "migration/vmstate.h"
33 #include "net/eth.h"
34 #include "net/net.h"
35 #include "net/checksum.h"
36 #include "system/system.h"
37 #include "system/dma.h"
38 #include "qemu/iov.h"
39 #include "qemu/module.h"
40 #include "qemu/range.h"
41 
42 #include "e1000_common.h"
43 #include "e1000x_common.h"
44 #include "trace.h"
45 #include "qom/object.h"
46 
47 /* #define E1000_DEBUG */
48 
49 #ifdef E1000_DEBUG
50 enum {
51     DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
52     DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
53     DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
54     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
55 };
56 #define DBGBIT(x)    (1<<DEBUG_##x)
57 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
58 
59 #define DBGOUT(what, fmt, ...) do { \
60     if (debugflags & DBGBIT(what)) \
61         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
62     } while (0)
63 #else
64 #define DBGOUT(what, fmt, ...) do {} while (0)
65 #endif
66 
67 #define IOPORT_SIZE       0x40
68 #define PNPMMIO_SIZE      0x20000
69 
70 #define MAXIMUM_ETHERNET_HDR_LEN (ETH_HLEN + 4)
71 
72 /*
73  * HW models:
74  *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
75  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
76  *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
77  *  Others never tested
78  */
79 
80 struct E1000State_st {
81     /*< private >*/
82     PCIDevice parent_obj;
83     /*< public >*/
84 
85     NICState *nic;
86     NICConf conf;
87     MemoryRegion mmio;
88     MemoryRegion io;
89 
90     uint32_t mac_reg[0x8000];
91     uint16_t phy_reg[0x20];
92     uint16_t eeprom_data[64];
93 
94     uint32_t rxbuf_size;
95     uint32_t rxbuf_min_shift;
96     struct e1000_tx {
97         unsigned char header[256];
98         unsigned char vlan_header[4];
99         /* Fields vlan and data must not be reordered or separated. */
100         unsigned char vlan[4];
101         unsigned char data[0x10000];
102         uint16_t size;
103         unsigned char vlan_needed;
104         unsigned char sum_needed;
105         bool cptse;
106         e1000x_txd_props props;
107         e1000x_txd_props tso_props;
108         uint16_t tso_frames;
109         bool busy;
110     } tx;
111 
112     struct {
113         uint32_t val_in;    /* shifted in from guest driver */
114         uint16_t bitnum_in;
115         uint16_t bitnum_out;
116         uint16_t reading;
117         uint32_t old_eecd;
118     } eecd_state;
119 
120     QEMUTimer *autoneg_timer;
121 
122     QEMUTimer *mit_timer;      /* Mitigation timer. */
123     bool mit_timer_on;         /* Mitigation timer is running. */
124     bool mit_irq_level;        /* Tracks interrupt pin level. */
125     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
126 
127     QEMUTimer *flush_queue_timer;
128 
129 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
130 #define E1000_FLAG_TSO_BIT 3
131 #define E1000_FLAG_VET_BIT 4
132 #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
133 #define E1000_FLAG_VET (1 << E1000_FLAG_VET_BIT)
134 
135     uint32_t compat_flags;
136     bool received_tx_tso;
137     bool use_tso_for_migration;
138     e1000x_txd_props mig_props;
139 };
140 typedef struct E1000State_st E1000State;
141 
142 #define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
143 
144 struct E1000BaseClass {
145     PCIDeviceClass parent_class;
146     uint16_t phy_id2;
147 };
148 typedef struct E1000BaseClass E1000BaseClass;
149 
150 #define TYPE_E1000_BASE "e1000-base"
151 
152 DECLARE_OBJ_CHECKERS(E1000State, E1000BaseClass,
153                      E1000, TYPE_E1000_BASE)
154 
155 
156 static void
157 e1000_link_up(E1000State *s)
158 {
159     e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg);
160 
161     /* E1000_STATUS_LU is tested by e1000_can_receive() */
162     qemu_flush_queued_packets(qemu_get_queue(s->nic));
163 }
164 
165 static void
166 e1000_autoneg_done(E1000State *s)
167 {
168     e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg);
169 
170     /* E1000_STATUS_LU is tested by e1000_can_receive() */
171     qemu_flush_queued_packets(qemu_get_queue(s->nic));
172 }
173 
174 static bool
175 have_autoneg(E1000State *s)
176 {
177     return (s->phy_reg[MII_BMCR] & MII_BMCR_AUTOEN);
178 }
179 
180 static void
181 set_phy_ctrl(E1000State *s, int index, uint16_t val)
182 {
183     /* bits 0-5 reserved; MII_BMCR_[ANRESTART,RESET] are self clearing */
184     s->phy_reg[MII_BMCR] = val & ~(0x3f |
185                                    MII_BMCR_RESET |
186                                    MII_BMCR_ANRESTART);
187 
188     /*
189      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
190      * migrate during auto negotiation, after migration the link will be
191      * down.
192      */
193     if (have_autoneg(s) && (val & MII_BMCR_ANRESTART)) {
194         e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
195     }
196 }
197 
198 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
199     [MII_BMCR] = set_phy_ctrl,
200 };
201 
202 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
203 
204 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
205 static const char phy_regcap[0x20] = {
206     [MII_BMSR]   = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
207     [MII_PHYID1] = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
208     [MII_BMCR]   = PHY_RW,    [MII_CTRL1000]               = PHY_RW,
209     [MII_ANLPAR] = PHY_R,     [MII_STAT1000]               = PHY_R,
210     [MII_ANAR]   = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
211     [MII_PHYID2] = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
212     [MII_ANER]   = PHY_R,
213 };
214 
215 /* MII_PHYID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
216 static const uint16_t phy_reg_init[] = {
217     [MII_BMCR] = MII_BMCR_SPEED1000 |
218                  MII_BMCR_FD |
219                  MII_BMCR_AUTOEN,
220 
221     [MII_BMSR] = MII_BMSR_EXTCAP |
222                  MII_BMSR_LINK_ST |   /* link initially up */
223                  MII_BMSR_AUTONEG |
224                  /* MII_BMSR_AN_COMP: initially NOT completed */
225                  MII_BMSR_MFPS |
226                  MII_BMSR_EXTSTAT |
227                  MII_BMSR_10T_HD |
228                  MII_BMSR_10T_FD |
229                  MII_BMSR_100TX_HD |
230                  MII_BMSR_100TX_FD,
231 
232     [MII_PHYID1] = 0x141,
233     /* [MII_PHYID2] configured per DevId, from e1000_reset() */
234     [MII_ANAR] = MII_ANAR_CSMACD | MII_ANAR_10 |
235                  MII_ANAR_10FD | MII_ANAR_TX |
236                  MII_ANAR_TXFD | MII_ANAR_PAUSE |
237                  MII_ANAR_PAUSE_ASYM,
238     [MII_ANLPAR] = MII_ANLPAR_10 | MII_ANLPAR_10FD |
239                    MII_ANLPAR_TX | MII_ANLPAR_TXFD,
240     [MII_CTRL1000] = MII_CTRL1000_FULL | MII_CTRL1000_PORT |
241                      MII_CTRL1000_MASTER,
242     [MII_STAT1000] = MII_STAT1000_HALF | MII_STAT1000_FULL |
243                      MII_STAT1000_ROK | MII_STAT1000_LOK,
244     [M88E1000_PHY_SPEC_CTRL] = 0x360,
245     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
246     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
247 };
248 
249 static const uint32_t mac_reg_init[] = {
250     [PBA]     = 0x00100030,
251     [LEDCTL]  = 0x602,
252     [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
253                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
254     [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
255                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
256                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
257                 E1000_STATUS_LU,
258     [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
259                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
260                 E1000_MANC_RMCP_EN,
261 };
262 
263 /* Helper function, *curr == 0 means the value is not set */
264 static inline void
265 mit_update_delay(uint32_t *curr, uint32_t value)
266 {
267     if (value && (*curr == 0 || value < *curr)) {
268         *curr = value;
269     }
270 }
271 
272 static void
273 set_interrupt_cause(E1000State *s, int index, uint32_t val)
274 {
275     PCIDevice *d = PCI_DEVICE(s);
276     uint32_t pending_ints;
277     uint32_t mit_delay;
278 
279     s->mac_reg[ICR] = val;
280 
281     /*
282      * Make sure ICR and ICS registers have the same value.
283      * The spec says that the ICS register is write-only.  However in practice,
284      * on real hardware ICS is readable, and for reads it has the same value as
285      * ICR (except that ICS does not have the clear on read behaviour of ICR).
286      *
287      * The VxWorks PRO/1000 driver uses this behaviour.
288      */
289     s->mac_reg[ICS] = val;
290 
291     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
292     if (!s->mit_irq_level && pending_ints) {
293         /*
294          * Here we detect a potential raising edge. We postpone raising the
295          * interrupt line if we are inside the mitigation delay window
296          * (s->mit_timer_on == 1).
297          * We provide a partial implementation of interrupt mitigation,
298          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
299          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
300          * RADV; relative timers based on TIDV and RDTR are not implemented.
301          */
302         if (s->mit_timer_on) {
303             return;
304         }
305 
306         /* Compute the next mitigation delay according to pending
307          * interrupts and the current values of RADV (provided
308          * RDTR!=0), TADV and ITR.
309          * Then rearm the timer.
310          */
311         mit_delay = 0;
312         if (s->mit_ide &&
313                 (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
314             mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
315         }
316         if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
317             mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
318         }
319         mit_update_delay(&mit_delay, s->mac_reg[ITR]);
320 
321         /*
322          * According to e1000 SPEC, the Ethernet controller guarantees
323          * a maximum observable interrupt rate of 7813 interrupts/sec.
324          * Thus if mit_delay < 500 then the delay should be set to the
325          * minimum delay possible which is 500.
326          */
327         mit_delay = (mit_delay < 500) ? 500 : mit_delay;
328 
329         s->mit_timer_on = 1;
330         timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
331                   mit_delay * 256);
332         s->mit_ide = 0;
333     }
334 
335     s->mit_irq_level = (pending_ints != 0);
336     pci_set_irq(d, s->mit_irq_level);
337 }
338 
339 static void
340 e1000_mit_timer(void *opaque)
341 {
342     E1000State *s = opaque;
343 
344     s->mit_timer_on = 0;
345     /* Call set_interrupt_cause to update the irq level (if necessary). */
346     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
347 }
348 
349 static void
350 set_ics(E1000State *s, int index, uint32_t val)
351 {
352     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
353         s->mac_reg[IMS]);
354     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
355 }
356 
357 static void
358 e1000_autoneg_timer(void *opaque)
359 {
360     E1000State *s = opaque;
361     if (!qemu_get_queue(s->nic)->link_down) {
362         e1000_autoneg_done(s);
363         set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
364     }
365 }
366 
367 static bool e1000_vet_init_need(void *opaque)
368 {
369     E1000State *s = opaque;
370 
371     return chkflag(VET);
372 }
373 
374 static void e1000_reset_hold(Object *obj, ResetType type)
375 {
376     E1000State *d = E1000(obj);
377     E1000BaseClass *edc = E1000_GET_CLASS(d);
378     uint8_t *macaddr = d->conf.macaddr.a;
379 
380     timer_del(d->autoneg_timer);
381     timer_del(d->mit_timer);
382     timer_del(d->flush_queue_timer);
383     d->mit_timer_on = 0;
384     d->mit_irq_level = 0;
385     d->mit_ide = 0;
386     memset(d->phy_reg, 0, sizeof d->phy_reg);
387     memcpy(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
388     d->phy_reg[MII_PHYID2] = edc->phy_id2;
389     memset(d->mac_reg, 0, sizeof d->mac_reg);
390     memcpy(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
391     d->rxbuf_min_shift = 1;
392     memset(&d->tx, 0, sizeof d->tx);
393 
394     if (qemu_get_queue(d->nic)->link_down) {
395         e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg);
396     }
397 
398     e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
399 
400     if (e1000_vet_init_need(d)) {
401         d->mac_reg[VET] = ETH_P_VLAN;
402     }
403 }
404 
405 static void
406 set_ctrl(E1000State *s, int index, uint32_t val)
407 {
408     /* RST is self clearing */
409     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
410 }
411 
412 static void
413 e1000_flush_queue_timer(void *opaque)
414 {
415     E1000State *s = opaque;
416 
417     qemu_flush_queued_packets(qemu_get_queue(s->nic));
418 }
419 
420 static void
421 set_rx_control(E1000State *s, int index, uint32_t val)
422 {
423     s->mac_reg[RCTL] = val;
424     s->rxbuf_size = e1000x_rxbufsize(val);
425     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
426     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
427            s->mac_reg[RCTL]);
428     timer_mod(s->flush_queue_timer,
429               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
430 }
431 
432 static void
433 set_mdic(E1000State *s, int index, uint32_t val)
434 {
435     uint32_t data = val & E1000_MDIC_DATA_MASK;
436     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
437 
438     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
439         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
440     else if (val & E1000_MDIC_OP_READ) {
441         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
442         if (!(phy_regcap[addr] & PHY_R)) {
443             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
444             val |= E1000_MDIC_ERROR;
445         } else
446             val = (val ^ data) | s->phy_reg[addr];
447     } else if (val & E1000_MDIC_OP_WRITE) {
448         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
449         if (!(phy_regcap[addr] & PHY_W)) {
450             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
451             val |= E1000_MDIC_ERROR;
452         } else {
453             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
454                 phyreg_writeops[addr](s, index, data);
455             } else {
456                 s->phy_reg[addr] = data;
457             }
458         }
459     }
460     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
461 
462     if (val & E1000_MDIC_INT_EN) {
463         set_ics(s, 0, E1000_ICR_MDAC);
464     }
465 }
466 
467 static uint32_t
468 get_eecd(E1000State *s, int index)
469 {
470     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
471 
472     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
473            s->eecd_state.bitnum_out, s->eecd_state.reading);
474     if (!s->eecd_state.reading ||
475         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
476           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
477         ret |= E1000_EECD_DO;
478     return ret;
479 }
480 
481 static void
482 set_eecd(E1000State *s, int index, uint32_t val)
483 {
484     uint32_t oldval = s->eecd_state.old_eecd;
485 
486     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
487             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
488     if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
489         return;
490     }
491     if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
492         s->eecd_state.val_in = 0;
493         s->eecd_state.bitnum_in = 0;
494         s->eecd_state.bitnum_out = 0;
495         s->eecd_state.reading = 0;
496     }
497     if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
498         return;
499     }
500     if (!(E1000_EECD_SK & val)) {               /* falling edge */
501         s->eecd_state.bitnum_out++;
502         return;
503     }
504     s->eecd_state.val_in <<= 1;
505     if (val & E1000_EECD_DI)
506         s->eecd_state.val_in |= 1;
507     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
508         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
509         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
510             EEPROM_READ_OPCODE_MICROWIRE);
511     }
512     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
513            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
514            s->eecd_state.reading);
515 }
516 
517 static uint32_t
518 flash_eerd_read(E1000State *s, int x)
519 {
520     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
521 
522     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
523         return (s->mac_reg[EERD]);
524 
525     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
526         return (E1000_EEPROM_RW_REG_DONE | r);
527 
528     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
529            E1000_EEPROM_RW_REG_DONE | r);
530 }
531 
532 static void
533 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
534 {
535     uint32_t sum;
536 
537     if (cse && cse < n)
538         n = cse + 1;
539     if (sloc < n-1) {
540         sum = net_checksum_add(n-css, data+css);
541         stw_be_p(data + sloc, net_checksum_finish_nozero(sum));
542     }
543 }
544 
545 static inline void
546 inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
547 {
548     if (is_broadcast_ether_addr(arr)) {
549         e1000x_inc_reg_if_not_full(s->mac_reg, BPTC);
550     } else if (is_multicast_ether_addr(arr)) {
551         e1000x_inc_reg_if_not_full(s->mac_reg, MPTC);
552     }
553 }
554 
555 static void
556 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
557 {
558     static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
559                                     PTC1023, PTC1522 };
560 
561     NetClientState *nc = qemu_get_queue(s->nic);
562     if (s->phy_reg[MII_BMCR] & MII_BMCR_LOOPBACK) {
563         qemu_receive_packet(nc, buf, size);
564     } else {
565         qemu_send_packet(nc, buf, size);
566     }
567     inc_tx_bcast_or_mcast_count(s, buf);
568     e1000x_increase_size_stats(s->mac_reg, PTCregs, size + 4);
569 }
570 
571 static void
572 xmit_seg(E1000State *s)
573 {
574     uint16_t len;
575     unsigned int frames = s->tx.tso_frames, css, sofar;
576     struct e1000_tx *tp = &s->tx;
577     struct e1000x_txd_props *props = tp->cptse ? &tp->tso_props : &tp->props;
578 
579     if (tp->cptse) {
580         css = props->ipcss;
581         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
582                frames, tp->size, css);
583         if (props->ip) {    /* IPv4 */
584             stw_be_p(tp->data+css+2, tp->size - css);
585             stw_be_p(tp->data+css+4,
586                      lduw_be_p(tp->data + css + 4) + frames);
587         } else {         /* IPv6 */
588             stw_be_p(tp->data+css+4, tp->size - css);
589         }
590         css = props->tucss;
591         len = tp->size - css;
592         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", props->tcp, css, len);
593         if (props->tcp) {
594             sofar = frames * props->mss;
595             stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
596             if (props->paylen - sofar > props->mss) {
597                 tp->data[css + 13] &= ~9;    /* PSH, FIN */
598             } else if (frames) {
599                 e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
600             }
601         } else {    /* UDP */
602             stw_be_p(tp->data+css+4, len);
603         }
604         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
605             unsigned int phsum;
606             // add pseudo-header length before checksum calculation
607             void *sp = tp->data + props->tucso;
608 
609             phsum = lduw_be_p(sp) + len;
610             phsum = (phsum >> 16) + (phsum & 0xffff);
611             stw_be_p(sp, phsum);
612         }
613         tp->tso_frames++;
614     }
615 
616     if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
617         putsum(tp->data, tp->size, props->tucso, props->tucss, props->tucse);
618     }
619     if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
620         putsum(tp->data, tp->size, props->ipcso, props->ipcss, props->ipcse);
621     }
622     if (tp->vlan_needed) {
623         memmove(tp->vlan, tp->data, 4);
624         memmove(tp->data, tp->data + 4, 8);
625         memcpy(tp->data + 8, tp->vlan_header, 4);
626         e1000_send_packet(s, tp->vlan, tp->size + 4);
627     } else {
628         e1000_send_packet(s, tp->data, tp->size);
629     }
630 
631     e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
632     e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size + 4);
633     e1000x_inc_reg_if_not_full(s->mac_reg, GPTC);
634     e1000x_grow_8reg_if_not_full(s->mac_reg, GOTCL, s->tx.size + 4);
635 }
636 
637 static void
638 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
639 {
640     PCIDevice *d = PCI_DEVICE(s);
641     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
642     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
643     unsigned int split_size = txd_lower & 0xffff, bytes, sz;
644     unsigned int msh = 0xfffff;
645     uint64_t addr;
646     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
647     struct e1000_tx *tp = &s->tx;
648 
649     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
650     if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
651         if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
652             e1000x_read_tx_ctx_descr(xp, &tp->tso_props);
653             s->use_tso_for_migration = 1;
654             tp->tso_frames = 0;
655         } else {
656             e1000x_read_tx_ctx_descr(xp, &tp->props);
657             s->use_tso_for_migration = 0;
658         }
659         return;
660     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
661         // data descriptor
662         if (tp->size == 0) {
663             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
664         }
665         tp->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
666     } else {
667         // legacy descriptor
668         tp->cptse = 0;
669     }
670 
671     if (e1000x_vlan_enabled(s->mac_reg) &&
672         e1000x_is_vlan_txd(txd_lower) &&
673         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
674         tp->vlan_needed = 1;
675         stw_be_p(tp->vlan_header,
676                       le16_to_cpu(s->mac_reg[VET]));
677         stw_be_p(tp->vlan_header + 2,
678                       le16_to_cpu(dp->upper.fields.special));
679     }
680 
681     addr = le64_to_cpu(dp->buffer_addr);
682     if (tp->cptse) {
683         msh = tp->tso_props.hdr_len + tp->tso_props.mss;
684         do {
685             bytes = split_size;
686             if (tp->size >= msh) {
687                 goto eop;
688             }
689             if (tp->size + bytes > msh)
690                 bytes = msh - tp->size;
691 
692             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
693             pci_dma_read(d, addr, tp->data + tp->size, bytes);
694             sz = tp->size + bytes;
695             if (sz >= tp->tso_props.hdr_len
696                 && tp->size < tp->tso_props.hdr_len) {
697                 memmove(tp->header, tp->data, tp->tso_props.hdr_len);
698             }
699             tp->size = sz;
700             addr += bytes;
701             if (sz == msh) {
702                 xmit_seg(s);
703                 memmove(tp->data, tp->header, tp->tso_props.hdr_len);
704                 tp->size = tp->tso_props.hdr_len;
705             }
706             split_size -= bytes;
707         } while (bytes && split_size);
708     } else {
709         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
710         pci_dma_read(d, addr, tp->data + tp->size, split_size);
711         tp->size += split_size;
712     }
713 
714 eop:
715     if (!(txd_lower & E1000_TXD_CMD_EOP))
716         return;
717     if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) {
718         xmit_seg(s);
719     }
720     tp->tso_frames = 0;
721     tp->sum_needed = 0;
722     tp->vlan_needed = 0;
723     tp->size = 0;
724     tp->cptse = 0;
725 }
726 
727 static uint32_t
728 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
729 {
730     PCIDevice *d = PCI_DEVICE(s);
731     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
732 
733     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
734         return 0;
735     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
736                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
737     dp->upper.data = cpu_to_le32(txd_upper);
738     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
739                   &dp->upper, sizeof(dp->upper));
740     return E1000_ICR_TXDW;
741 }
742 
743 static uint64_t tx_desc_base(E1000State *s)
744 {
745     uint64_t bah = s->mac_reg[TDBAH];
746     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
747 
748     return (bah << 32) + bal;
749 }
750 
751 static void
752 start_xmit(E1000State *s)
753 {
754     PCIDevice *d = PCI_DEVICE(s);
755     dma_addr_t base;
756     struct e1000_tx_desc desc;
757     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
758 
759     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
760         DBGOUT(TX, "tx disabled\n");
761         return;
762     }
763 
764     if (s->tx.busy) {
765         return;
766     }
767     s->tx.busy = true;
768 
769     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
770         base = tx_desc_base(s) +
771                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
772         pci_dma_read(d, base, &desc, sizeof(desc));
773 
774         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
775                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
776                desc.upper.data);
777 
778         process_tx_desc(s, &desc);
779         cause |= txdesc_writeback(s, base, &desc);
780 
781         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
782             s->mac_reg[TDH] = 0;
783         /*
784          * the following could happen only if guest sw assigns
785          * bogus values to TDT/TDLEN.
786          * there's nothing too intelligent we could do about this.
787          */
788         if (s->mac_reg[TDH] == tdh_start ||
789             tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
790             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
791                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
792             break;
793         }
794     }
795     s->tx.busy = false;
796     set_ics(s, 0, cause);
797 }
798 
799 static int
800 receive_filter(E1000State *s, const void *buf)
801 {
802     return (!e1000x_is_vlan_packet(buf, s->mac_reg[VET]) ||
803             e1000x_rx_vlan_filter(s->mac_reg, PKT_GET_VLAN_HDR(buf))) &&
804            e1000x_rx_group_filter(s->mac_reg, buf);
805 }
806 
807 static void
808 e1000_set_link_status(NetClientState *nc)
809 {
810     E1000State *s = qemu_get_nic_opaque(nc);
811     uint32_t old_status = s->mac_reg[STATUS];
812 
813     if (nc->link_down) {
814         e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg);
815     } else {
816         if (have_autoneg(s) &&
817             !(s->phy_reg[MII_BMSR] & MII_BMSR_AN_COMP)) {
818             e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
819         } else {
820             e1000_link_up(s);
821         }
822     }
823 
824     if (s->mac_reg[STATUS] != old_status)
825         set_ics(s, 0, E1000_ICR_LSC);
826 }
827 
828 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
829 {
830     int bufs;
831     /* Fast-path short packets */
832     if (total_size <= s->rxbuf_size) {
833         return s->mac_reg[RDH] != s->mac_reg[RDT];
834     }
835     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
836         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
837     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
838         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
839             s->mac_reg[RDT] - s->mac_reg[RDH];
840     } else {
841         return false;
842     }
843     return total_size <= bufs * s->rxbuf_size;
844 }
845 
846 static bool
847 e1000_can_receive(NetClientState *nc)
848 {
849     E1000State *s = qemu_get_nic_opaque(nc);
850 
851     return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
852         e1000_has_rxbufs(s, 1) && !timer_pending(s->flush_queue_timer);
853 }
854 
855 static uint64_t rx_desc_base(E1000State *s)
856 {
857     uint64_t bah = s->mac_reg[RDBAH];
858     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
859 
860     return (bah << 32) + bal;
861 }
862 
863 static void
864 e1000_receiver_overrun(E1000State *s, size_t size)
865 {
866     trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
867     e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
868     e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
869     set_ics(s, 0, E1000_ICS_RXO);
870 }
871 
872 static ssize_t
873 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
874 {
875     E1000State *s = qemu_get_nic_opaque(nc);
876     PCIDevice *d = PCI_DEVICE(s);
877     struct e1000_rx_desc desc;
878     dma_addr_t base;
879     unsigned int n, rdt;
880     uint32_t rdh_start;
881     uint16_t vlan_special = 0;
882     uint8_t vlan_status = 0;
883     uint8_t min_buf[ETH_ZLEN];
884     uint8_t *filter_buf = iov->iov_base;
885     size_t size = iov_size(iov, iovcnt);
886     size_t iov_ofs = 0;
887     size_t desc_offset;
888     size_t desc_size;
889     size_t total_size;
890     eth_pkt_types_e pkt_type;
891 
892     if (!e1000x_hw_rx_enabled(s->mac_reg)) {
893         return -1;
894     }
895 
896     if (timer_pending(s->flush_queue_timer)) {
897         return 0;
898     }
899 
900     if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
901         /* This is very unlikely, but may happen. */
902         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
903         filter_buf = min_buf;
904     }
905 
906     /* Discard oversized packets if !LPE and !SBP. */
907     if (e1000x_is_oversized(s->mac_reg, size)) {
908         return size;
909     }
910 
911     if (!receive_filter(s, filter_buf)) {
912         return size;
913     }
914 
915     if (e1000x_vlan_enabled(s->mac_reg) &&
916         e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) {
917         vlan_special = cpu_to_le16(lduw_be_p(filter_buf + 14));
918         iov_ofs = 4;
919         if (filter_buf == iov->iov_base) {
920             memmove(filter_buf + 4, filter_buf, 12);
921         } else {
922             iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
923             while (iov->iov_len <= iov_ofs) {
924                 iov_ofs -= iov->iov_len;
925                 iov++;
926             }
927         }
928         vlan_status = E1000_RXD_STAT_VP;
929         size -= 4;
930     }
931 
932     pkt_type = get_eth_packet_type(PKT_GET_ETH_HDR(filter_buf));
933     rdh_start = s->mac_reg[RDH];
934     desc_offset = 0;
935     total_size = size + e1000x_fcs_len(s->mac_reg);
936     if (!e1000_has_rxbufs(s, total_size)) {
937         e1000_receiver_overrun(s, total_size);
938         return -1;
939     }
940     do {
941         desc_size = total_size - desc_offset;
942         if (desc_size > s->rxbuf_size) {
943             desc_size = s->rxbuf_size;
944         }
945         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
946         pci_dma_read(d, base, &desc, sizeof(desc));
947         desc.special = vlan_special;
948         desc.status &= ~E1000_RXD_STAT_DD;
949         if (desc.buffer_addr) {
950             if (desc_offset < size) {
951                 size_t iov_copy;
952                 hwaddr ba = le64_to_cpu(desc.buffer_addr);
953                 size_t copy_size = size - desc_offset;
954                 if (copy_size > s->rxbuf_size) {
955                     copy_size = s->rxbuf_size;
956                 }
957                 do {
958                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
959                     pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
960                     copy_size -= iov_copy;
961                     ba += iov_copy;
962                     iov_ofs += iov_copy;
963                     if (iov_ofs == iov->iov_len) {
964                         iov++;
965                         iov_ofs = 0;
966                     }
967                 } while (copy_size);
968             }
969             desc_offset += desc_size;
970             desc.length = cpu_to_le16(desc_size);
971             if (desc_offset >= total_size) {
972                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
973             } else {
974                 /* Guest zeroing out status is not a hardware requirement.
975                    Clear EOP in case guest didn't do it. */
976                 desc.status &= ~E1000_RXD_STAT_EOP;
977             }
978         } else { // as per intel docs; skip descriptors with null buf addr
979             DBGOUT(RX, "Null RX descriptor!!\n");
980         }
981         pci_dma_write(d, base, &desc, sizeof(desc));
982         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
983         pci_dma_write(d, base + offsetof(struct e1000_rx_desc, status),
984                       &desc.status, sizeof(desc.status));
985 
986         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
987             s->mac_reg[RDH] = 0;
988         /* see comment in start_xmit; same here */
989         if (s->mac_reg[RDH] == rdh_start ||
990             rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
991             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
992                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
993             e1000_receiver_overrun(s, total_size);
994             return -1;
995         }
996     } while (desc_offset < total_size);
997 
998     e1000x_update_rx_total_stats(s->mac_reg, pkt_type, size, total_size);
999 
1000     n = E1000_ICS_RXT0;
1001     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1002         rdt += s->mac_reg[RDLEN] / sizeof(desc);
1003     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1004         s->rxbuf_min_shift)
1005         n |= E1000_ICS_RXDMT0;
1006 
1007     set_ics(s, 0, n);
1008 
1009     return size;
1010 }
1011 
1012 static ssize_t
1013 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1014 {
1015     const struct iovec iov = {
1016         .iov_base = (uint8_t *)buf,
1017         .iov_len = size
1018     };
1019 
1020     return e1000_receive_iov(nc, &iov, 1);
1021 }
1022 
1023 static uint32_t
1024 mac_readreg(E1000State *s, int index)
1025 {
1026     return s->mac_reg[index];
1027 }
1028 
1029 static uint32_t
1030 mac_icr_read(E1000State *s, int index)
1031 {
1032     uint32_t ret = s->mac_reg[ICR];
1033 
1034     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1035     set_interrupt_cause(s, 0, 0);
1036     return ret;
1037 }
1038 
1039 static uint32_t
1040 mac_read_clr4(E1000State *s, int index)
1041 {
1042     uint32_t ret = s->mac_reg[index];
1043 
1044     s->mac_reg[index] = 0;
1045     return ret;
1046 }
1047 
1048 static uint32_t
1049 mac_read_clr8(E1000State *s, int index)
1050 {
1051     uint32_t ret = s->mac_reg[index];
1052 
1053     s->mac_reg[index] = 0;
1054     s->mac_reg[index-1] = 0;
1055     return ret;
1056 }
1057 
1058 static void
1059 mac_writereg(E1000State *s, int index, uint32_t val)
1060 {
1061     uint32_t macaddr[2];
1062 
1063     s->mac_reg[index] = val;
1064 
1065     if (index == RA + 1) {
1066         macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1067         macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1068         qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1069     }
1070 }
1071 
1072 static void
1073 set_rdt(E1000State *s, int index, uint32_t val)
1074 {
1075     s->mac_reg[index] = val & 0xffff;
1076     if (e1000_has_rxbufs(s, 1)) {
1077         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1078     }
1079 }
1080 
1081 #define LOW_BITS_SET_FUNC(num)                             \
1082     static void                                            \
1083     set_##num##bit(E1000State *s, int index, uint32_t val) \
1084     {                                                      \
1085         s->mac_reg[index] = val & (BIT(num) - 1);          \
1086     }
1087 
1088 LOW_BITS_SET_FUNC(4)
1089 LOW_BITS_SET_FUNC(11)
1090 LOW_BITS_SET_FUNC(13)
1091 LOW_BITS_SET_FUNC(16)
1092 
1093 static void
1094 set_dlen(E1000State *s, int index, uint32_t val)
1095 {
1096     s->mac_reg[index] = val & 0xfff80;
1097 }
1098 
1099 static void
1100 set_tctl(E1000State *s, int index, uint32_t val)
1101 {
1102     s->mac_reg[index] = val;
1103     s->mac_reg[TDT] &= 0xffff;
1104     start_xmit(s);
1105 }
1106 
1107 static void
1108 set_icr(E1000State *s, int index, uint32_t val)
1109 {
1110     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1111     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1112 }
1113 
1114 static void
1115 set_imc(E1000State *s, int index, uint32_t val)
1116 {
1117     s->mac_reg[IMS] &= ~val;
1118     set_ics(s, 0, 0);
1119 }
1120 
1121 static void
1122 set_ims(E1000State *s, int index, uint32_t val)
1123 {
1124     s->mac_reg[IMS] |= val;
1125     set_ics(s, 0, 0);
1126 }
1127 
1128 #define getreg(x)    [x] = mac_readreg
1129 typedef uint32_t (*readops)(E1000State *, int);
1130 static const readops macreg_readops[] = {
1131     getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1132     getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1133     getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1134     getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1135     getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1136     getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1137     getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1138     getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1139     getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1140     getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1141     getreg(TNCRS),    getreg(SEQEC),    getreg(CEXTERR),  getreg(RLEC),
1142     getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1143     getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1144     getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1145     getreg(GOTCL),    getreg(RDFH),     getreg(RDFT),     getreg(RDFHS),
1146     getreg(RDFTS),    getreg(RDFPC),    getreg(TDFH),     getreg(TDFT),
1147     getreg(TDFHS),    getreg(TDFTS),    getreg(TDFPC),    getreg(AIT),
1148 
1149     [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1150     [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1151     [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1152     [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1153     [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1154     [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1155     [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1156     [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1157     [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1158     [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1159     [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1160     [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1161     [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1162     [MPTC]    = mac_read_clr4,
1163     [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1164     [EERD]    = flash_eerd_read,
1165 
1166     [CRCERRS ... MPC]     = &mac_readreg,
1167     [IP6AT ... IP6AT + 3] = &mac_readreg,    [IP4AT ... IP4AT + 6] = &mac_readreg,
1168     [FFLT ... FFLT + 6]   = &mac_readreg,
1169     [RA ... RA + 31]      = &mac_readreg,
1170     [WUPM ... WUPM + 31]  = &mac_readreg,
1171     [MTA ... MTA + E1000_MC_TBL_SIZE - 1]   = &mac_readreg,
1172     [VFTA ... VFTA + E1000_VLAN_FILTER_TBL_SIZE - 1] = &mac_readreg,
1173     [FFMT ... FFMT + 254] = &mac_readreg,
1174     [FFVT ... FFVT + 254] = &mac_readreg,
1175     [PBM ... PBM + 16383] = &mac_readreg,
1176 };
1177 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1178 
1179 #define putreg(x)    [x] = mac_writereg
1180 typedef void (*writeops)(E1000State *, int, uint32_t);
1181 static const writeops macreg_writeops[] = {
1182     putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1183     putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1184     putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1185     putreg(IPAV),     putreg(WUC),
1186     putreg(WUS),
1187 
1188     [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL]  = set_tctl,
1189     [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]   = set_ics,
1190     [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]   = set_rdt,
1191     [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]   = set_icr,
1192     [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL]  = set_ctrl,
1193     [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV]  = set_16bit,
1194     [ITR]    = set_16bit,  [TDFH]   = set_11bit,      [TDFT]  = set_11bit,
1195     [TDFHS]  = set_13bit,  [TDFTS]  = set_13bit,      [TDFPC] = set_13bit,
1196     [RDFH]   = set_13bit,  [RDFT]   = set_13bit,      [RDFHS] = set_13bit,
1197     [RDFTS]  = set_13bit,  [RDFPC]  = set_13bit,      [AIT]   = set_16bit,
1198 
1199     [IP6AT ... IP6AT + 3] = &mac_writereg, [IP4AT ... IP4AT + 6] = &mac_writereg,
1200     [FFLT ... FFLT + 6]   = &set_11bit,
1201     [RA ... RA + 31]      = &mac_writereg,
1202     [WUPM ... WUPM + 31]  = &mac_writereg,
1203     [MTA ... MTA + E1000_MC_TBL_SIZE - 1] = &mac_writereg,
1204     [VFTA ... VFTA + E1000_VLAN_FILTER_TBL_SIZE - 1] = &mac_writereg,
1205     [FFMT ... FFMT + 254] = &set_4bit,     [FFVT ... FFVT + 254] = &mac_writereg,
1206     [PBM ... PBM + 16383] = &mac_writereg,
1207 };
1208 
1209 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1210 
1211 enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1212 
1213 /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1214  * f - flag bits (up to 6 possible flags)
1215  * n - flag needed
1216  * p - partially implemented */
1217 static const uint8_t mac_reg_access[0x8000] = {
1218     [IPAV]    = MAC_ACCESS_FLAG_NEEDED,    [WUC]     = MAC_ACCESS_FLAG_NEEDED,
1219     [IP6AT]   = MAC_ACCESS_FLAG_NEEDED,    [IP4AT]   = MAC_ACCESS_FLAG_NEEDED,
1220     [FFVT]    = MAC_ACCESS_FLAG_NEEDED,    [WUPM]    = MAC_ACCESS_FLAG_NEEDED,
1221     [ECOL]    = MAC_ACCESS_FLAG_NEEDED,    [MCC]     = MAC_ACCESS_FLAG_NEEDED,
1222     [DC]      = MAC_ACCESS_FLAG_NEEDED,    [TNCRS]   = MAC_ACCESS_FLAG_NEEDED,
1223     [RLEC]    = MAC_ACCESS_FLAG_NEEDED,    [XONRXC]  = MAC_ACCESS_FLAG_NEEDED,
1224     [XOFFTXC] = MAC_ACCESS_FLAG_NEEDED,    [RFC]     = MAC_ACCESS_FLAG_NEEDED,
1225     [TSCTFC]  = MAC_ACCESS_FLAG_NEEDED,    [MGTPRC]  = MAC_ACCESS_FLAG_NEEDED,
1226     [WUS]     = MAC_ACCESS_FLAG_NEEDED,    [AIT]     = MAC_ACCESS_FLAG_NEEDED,
1227     [FFLT]    = MAC_ACCESS_FLAG_NEEDED,    [FFMT]    = MAC_ACCESS_FLAG_NEEDED,
1228     [SCC]     = MAC_ACCESS_FLAG_NEEDED,    [FCRUC]   = MAC_ACCESS_FLAG_NEEDED,
1229     [LATECOL] = MAC_ACCESS_FLAG_NEEDED,    [COLC]    = MAC_ACCESS_FLAG_NEEDED,
1230     [SEQEC]   = MAC_ACCESS_FLAG_NEEDED,    [CEXTERR] = MAC_ACCESS_FLAG_NEEDED,
1231     [XONTXC]  = MAC_ACCESS_FLAG_NEEDED,    [XOFFRXC] = MAC_ACCESS_FLAG_NEEDED,
1232     [RJC]     = MAC_ACCESS_FLAG_NEEDED,    [RNBC]    = MAC_ACCESS_FLAG_NEEDED,
1233     [MGTPDC]  = MAC_ACCESS_FLAG_NEEDED,    [MGTPTC]  = MAC_ACCESS_FLAG_NEEDED,
1234     [RUC]     = MAC_ACCESS_FLAG_NEEDED,    [ROC]     = MAC_ACCESS_FLAG_NEEDED,
1235     [GORCL]   = MAC_ACCESS_FLAG_NEEDED,    [GORCH]   = MAC_ACCESS_FLAG_NEEDED,
1236     [GOTCL]   = MAC_ACCESS_FLAG_NEEDED,    [GOTCH]   = MAC_ACCESS_FLAG_NEEDED,
1237     [BPRC]    = MAC_ACCESS_FLAG_NEEDED,    [MPRC]    = MAC_ACCESS_FLAG_NEEDED,
1238     [TSCTC]   = MAC_ACCESS_FLAG_NEEDED,    [PRC64]   = MAC_ACCESS_FLAG_NEEDED,
1239     [PRC127]  = MAC_ACCESS_FLAG_NEEDED,    [PRC255]  = MAC_ACCESS_FLAG_NEEDED,
1240     [PRC511]  = MAC_ACCESS_FLAG_NEEDED,    [PRC1023] = MAC_ACCESS_FLAG_NEEDED,
1241     [PRC1522] = MAC_ACCESS_FLAG_NEEDED,    [PTC64]   = MAC_ACCESS_FLAG_NEEDED,
1242     [PTC127]  = MAC_ACCESS_FLAG_NEEDED,    [PTC255]  = MAC_ACCESS_FLAG_NEEDED,
1243     [PTC511]  = MAC_ACCESS_FLAG_NEEDED,    [PTC1023] = MAC_ACCESS_FLAG_NEEDED,
1244     [PTC1522] = MAC_ACCESS_FLAG_NEEDED,    [MPTC]    = MAC_ACCESS_FLAG_NEEDED,
1245     [BPTC]    = MAC_ACCESS_FLAG_NEEDED,
1246 
1247     [TDFH]    = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
1248     [TDFT]    = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
1249     [TDFHS]   = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
1250     [TDFTS]   = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
1251     [TDFPC]   = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
1252     [RDFH]    = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
1253     [RDFT]    = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
1254     [RDFHS]   = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
1255     [RDFTS]   = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
1256     [RDFPC]   = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
1257     [PBM]     = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
1258 };
1259 
1260 static void
1261 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1262                  unsigned size)
1263 {
1264     E1000State *s = opaque;
1265     unsigned int index = (addr & 0x1ffff) >> 2;
1266 
1267     if (index < NWRITEOPS && macreg_writeops[index]) {
1268         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1269             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1270             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1271                 DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1272                        "It is not fully implemented.\n", index<<2);
1273             }
1274             macreg_writeops[index](s, index, val);
1275         } else {    /* "flag needed" bit is set, but the flag is not active */
1276             DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1277                    index<<2);
1278         }
1279     } else if (index < NREADOPS && macreg_readops[index]) {
1280         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1281                index<<2, val);
1282     } else {
1283         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1284                index<<2, val);
1285     }
1286 }
1287 
1288 static uint64_t
1289 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1290 {
1291     E1000State *s = opaque;
1292     unsigned int index = (addr & 0x1ffff) >> 2;
1293 
1294     if (index < NREADOPS && macreg_readops[index]) {
1295         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1296             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1297             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1298                 DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1299                        "It is not fully implemented.\n", index<<2);
1300             }
1301             return macreg_readops[index](s, index);
1302         } else {    /* "flag needed" bit is set, but the flag is not active */
1303             DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1304                    index<<2);
1305         }
1306     } else {
1307         DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1308     }
1309     return 0;
1310 }
1311 
1312 static const MemoryRegionOps e1000_mmio_ops = {
1313     .read = e1000_mmio_read,
1314     .write = e1000_mmio_write,
1315     .endianness = DEVICE_LITTLE_ENDIAN,
1316     .impl = {
1317         .min_access_size = 4,
1318         .max_access_size = 4,
1319     },
1320 };
1321 
1322 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1323                               unsigned size)
1324 {
1325     E1000State *s = opaque;
1326 
1327     (void)s;
1328     return 0;
1329 }
1330 
1331 static void e1000_io_write(void *opaque, hwaddr addr,
1332                            uint64_t val, unsigned size)
1333 {
1334     E1000State *s = opaque;
1335 
1336     (void)s;
1337 }
1338 
1339 static const MemoryRegionOps e1000_io_ops = {
1340     .read = e1000_io_read,
1341     .write = e1000_io_write,
1342     .endianness = DEVICE_LITTLE_ENDIAN,
1343 };
1344 
1345 static bool is_version_1(void *opaque, int version_id)
1346 {
1347     return version_id == 1;
1348 }
1349 
1350 static int e1000_pre_save(void *opaque)
1351 {
1352     E1000State *s = opaque;
1353     NetClientState *nc = qemu_get_queue(s->nic);
1354 
1355     /*
1356      * If link is down and auto-negotiation is supported and ongoing,
1357      * complete auto-negotiation immediately. This allows us to look
1358      * at MII_BMSR_AN_COMP to infer link status on load.
1359      */
1360     if (nc->link_down && have_autoneg(s)) {
1361         s->phy_reg[MII_BMSR] |= MII_BMSR_AN_COMP;
1362     }
1363 
1364     /* Decide which set of props to migrate in the main structure */
1365     if (chkflag(TSO) || !s->use_tso_for_migration) {
1366         /* Either we're migrating with the extra subsection, in which
1367          * case the mig_props is always 'props' OR
1368          * we've not got the subsection, but 'props' was the last
1369          * updated.
1370          */
1371         s->mig_props = s->tx.props;
1372     } else {
1373         /* We're not using the subsection, and 'tso_props' was
1374          * the last updated.
1375          */
1376         s->mig_props = s->tx.tso_props;
1377     }
1378     return 0;
1379 }
1380 
1381 static int e1000_post_load(void *opaque, int version_id)
1382 {
1383     E1000State *s = opaque;
1384     NetClientState *nc = qemu_get_queue(s->nic);
1385 
1386     s->mit_ide = 0;
1387     s->mit_timer_on = true;
1388     timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1);
1389 
1390     /* nc.link_down can't be migrated, so infer link_down according
1391      * to link status bit in mac_reg[STATUS].
1392      * Alternatively, restart link negotiation if it was in progress. */
1393     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1394 
1395     if (have_autoneg(s) && !(s->phy_reg[MII_BMSR] & MII_BMSR_AN_COMP)) {
1396         nc->link_down = false;
1397         timer_mod(s->autoneg_timer,
1398                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1399     }
1400 
1401     s->tx.props = s->mig_props;
1402     if (!s->received_tx_tso) {
1403         /* We received only one set of offload data (tx.props)
1404          * and haven't got tx.tso_props.  The best we can do
1405          * is dupe the data.
1406          */
1407         s->tx.tso_props = s->mig_props;
1408     }
1409     return 0;
1410 }
1411 
1412 static int e1000_tx_tso_post_load(void *opaque, int version_id)
1413 {
1414     E1000State *s = opaque;
1415     s->received_tx_tso = true;
1416     return 0;
1417 }
1418 
1419 static bool e1000_tso_state_needed(void *opaque)
1420 {
1421     E1000State *s = opaque;
1422 
1423     return chkflag(TSO);
1424 }
1425 
1426 static const VMStateDescription vmstate_e1000_mit_state = {
1427     .name = "e1000/mit_state",
1428     .version_id = 1,
1429     .minimum_version_id = 1,
1430     .fields = (const VMStateField[]) {
1431         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1432         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1433         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1434         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1435         VMSTATE_BOOL(mit_irq_level, E1000State),
1436         VMSTATE_END_OF_LIST()
1437     }
1438 };
1439 
1440 static const VMStateDescription vmstate_e1000_full_mac_state = {
1441     .name = "e1000/full_mac_state",
1442     .version_id = 1,
1443     .minimum_version_id = 1,
1444     .fields = (const VMStateField[]) {
1445         VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1446         VMSTATE_END_OF_LIST()
1447     }
1448 };
1449 
1450 static const VMStateDescription vmstate_e1000_tx_tso_state = {
1451     .name = "e1000/tx_tso_state",
1452     .version_id = 1,
1453     .minimum_version_id = 1,
1454     .needed = e1000_tso_state_needed,
1455     .post_load = e1000_tx_tso_post_load,
1456     .fields = (const VMStateField[]) {
1457         VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
1458         VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
1459         VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
1460         VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
1461         VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
1462         VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
1463         VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
1464         VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
1465         VMSTATE_UINT16(tx.tso_props.mss, E1000State),
1466         VMSTATE_INT8(tx.tso_props.ip, E1000State),
1467         VMSTATE_INT8(tx.tso_props.tcp, E1000State),
1468         VMSTATE_END_OF_LIST()
1469     }
1470 };
1471 
1472 static const VMStateDescription vmstate_e1000 = {
1473     .name = "e1000",
1474     .version_id = 2,
1475     .minimum_version_id = 1,
1476     .pre_save = e1000_pre_save,
1477     .post_load = e1000_post_load,
1478     .fields = (const VMStateField[]) {
1479         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1480         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1481         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1482         VMSTATE_UINT32(rxbuf_size, E1000State),
1483         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1484         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1485         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1486         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1487         VMSTATE_UINT16(eecd_state.reading, E1000State),
1488         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1489         VMSTATE_UINT8(mig_props.ipcss, E1000State),
1490         VMSTATE_UINT8(mig_props.ipcso, E1000State),
1491         VMSTATE_UINT16(mig_props.ipcse, E1000State),
1492         VMSTATE_UINT8(mig_props.tucss, E1000State),
1493         VMSTATE_UINT8(mig_props.tucso, E1000State),
1494         VMSTATE_UINT16(mig_props.tucse, E1000State),
1495         VMSTATE_UINT32(mig_props.paylen, E1000State),
1496         VMSTATE_UINT8(mig_props.hdr_len, E1000State),
1497         VMSTATE_UINT16(mig_props.mss, E1000State),
1498         VMSTATE_UINT16(tx.size, E1000State),
1499         VMSTATE_UINT16(tx.tso_frames, E1000State),
1500         VMSTATE_UINT8(tx.sum_needed, E1000State),
1501         VMSTATE_INT8(mig_props.ip, E1000State),
1502         VMSTATE_INT8(mig_props.tcp, E1000State),
1503         VMSTATE_BUFFER(tx.header, E1000State),
1504         VMSTATE_BUFFER(tx.data, E1000State),
1505         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1506         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1507         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1508         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1509         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1510         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1511         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1512         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1513         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1514         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1515         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1516         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1517         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1518         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1519         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1520         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1521         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1522         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1523         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1524         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1525         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1526         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1527         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1528         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1529         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1530         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1531         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1532         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1533         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1534         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1535         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1536         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1537         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1538         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1539         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1540         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1541         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1542         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1543         VMSTATE_UINT32(mac_reg[VET], E1000State),
1544         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1545         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, E1000_MC_TBL_SIZE),
1546         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA,
1547                                  E1000_VLAN_FILTER_TBL_SIZE),
1548         VMSTATE_END_OF_LIST()
1549     },
1550     .subsections = (const VMStateDescription * const []) {
1551         &vmstate_e1000_mit_state,
1552         &vmstate_e1000_full_mac_state,
1553         &vmstate_e1000_tx_tso_state,
1554         NULL
1555     }
1556 };
1557 
1558 /*
1559  * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1560  * Note: A valid DevId will be inserted during pci_e1000_realize().
1561  */
1562 static const uint16_t e1000_eeprom_template[64] = {
1563     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1564     0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1565     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1566     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1567     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1568     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1569     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1570     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1571 };
1572 
1573 /* PCI interface */
1574 
1575 static void
1576 e1000_mmio_setup(E1000State *d)
1577 {
1578     int i;
1579     const uint32_t excluded_regs[] = {
1580         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1581         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1582     };
1583 
1584     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1585                           "e1000-mmio", PNPMMIO_SIZE);
1586     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1587     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1588         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1589                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1590     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1591 }
1592 
1593 static void
1594 pci_e1000_uninit(PCIDevice *dev)
1595 {
1596     E1000State *d = E1000(dev);
1597 
1598     timer_free(d->autoneg_timer);
1599     timer_free(d->mit_timer);
1600     timer_free(d->flush_queue_timer);
1601     qemu_del_nic(d->nic);
1602 }
1603 
1604 static NetClientInfo net_e1000_info = {
1605     .type = NET_CLIENT_DRIVER_NIC,
1606     .size = sizeof(NICState),
1607     .can_receive = e1000_can_receive,
1608     .receive = e1000_receive,
1609     .receive_iov = e1000_receive_iov,
1610     .link_status_changed = e1000_set_link_status,
1611 };
1612 
1613 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1614                                 uint32_t val, int len)
1615 {
1616     E1000State *s = E1000(pci_dev);
1617 
1618     pci_default_write_config(pci_dev, address, val, len);
1619 
1620     if (range_covers_byte(address, len, PCI_COMMAND) &&
1621         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1622         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1623     }
1624 }
1625 
1626 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1627 {
1628     DeviceState *dev = DEVICE(pci_dev);
1629     E1000State *d = E1000(pci_dev);
1630     uint8_t *pci_conf;
1631     uint8_t *macaddr;
1632 
1633     pci_dev->config_write = e1000_write_config;
1634 
1635     pci_conf = pci_dev->config;
1636 
1637     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1638     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1639 
1640     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1641 
1642     e1000_mmio_setup(d);
1643 
1644     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1645 
1646     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1647 
1648     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1649     macaddr = d->conf.macaddr.a;
1650 
1651     e1000x_core_prepare_eeprom(d->eeprom_data,
1652                                e1000_eeprom_template,
1653                                sizeof(e1000_eeprom_template),
1654                                PCI_DEVICE_GET_CLASS(pci_dev)->device_id,
1655                                macaddr);
1656 
1657     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1658                           object_get_typename(OBJECT(d)), dev->id,
1659                           &dev->mem_reentrancy_guard, d);
1660 
1661     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1662 
1663     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1664     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1665     d->flush_queue_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
1666                                         e1000_flush_queue_timer, d);
1667 }
1668 
1669 static const Property e1000_properties[] = {
1670     DEFINE_NIC_PROPERTIES(E1000State, conf),
1671     DEFINE_PROP_BIT("migrate_tso_props", E1000State,
1672                     compat_flags, E1000_FLAG_TSO_BIT, true),
1673     DEFINE_PROP_BIT("init-vet", E1000State,
1674                     compat_flags, E1000_FLAG_VET_BIT, true),
1675 };
1676 
1677 typedef struct E1000Info {
1678     const char *name;
1679     uint16_t   device_id;
1680     uint8_t    revision;
1681     uint16_t   phy_id2;
1682 } E1000Info;
1683 
1684 static void e1000_class_init(ObjectClass *klass, const void *data)
1685 {
1686     DeviceClass *dc = DEVICE_CLASS(klass);
1687     ResettableClass *rc = RESETTABLE_CLASS(klass);
1688     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1689     E1000BaseClass *e = E1000_CLASS(klass);
1690     const E1000Info *info = data;
1691 
1692     k->realize = pci_e1000_realize;
1693     k->exit = pci_e1000_uninit;
1694     k->romfile = "efi-e1000.rom";
1695     k->vendor_id = PCI_VENDOR_ID_INTEL;
1696     k->device_id = info->device_id;
1697     k->revision = info->revision;
1698     e->phy_id2 = info->phy_id2;
1699     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1700     rc->phases.hold = e1000_reset_hold;
1701     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1702     dc->desc = "Intel Gigabit Ethernet";
1703     dc->vmsd = &vmstate_e1000;
1704     device_class_set_props(dc, e1000_properties);
1705 }
1706 
1707 static void e1000_instance_init(Object *obj)
1708 {
1709     E1000State *n = E1000(obj);
1710     device_add_bootindex_property(obj, &n->conf.bootindex,
1711                                   "bootindex", "/ethernet-phy@0",
1712                                   DEVICE(n));
1713 }
1714 
1715 static const TypeInfo e1000_base_info = {
1716     .name          = TYPE_E1000_BASE,
1717     .parent        = TYPE_PCI_DEVICE,
1718     .instance_size = sizeof(E1000State),
1719     .instance_init = e1000_instance_init,
1720     .class_size    = sizeof(E1000BaseClass),
1721     .abstract      = true,
1722     .interfaces = (const InterfaceInfo[]) {
1723         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1724         { },
1725     },
1726 };
1727 
1728 static const E1000Info e1000_devices[] = {
1729     {
1730         .name      = "e1000",
1731         .device_id = E1000_DEV_ID_82540EM,
1732         .revision  = 0x03,
1733         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1734     },
1735     {
1736         .name      = "e1000-82544gc",
1737         .device_id = E1000_DEV_ID_82544GC_COPPER,
1738         .revision  = 0x03,
1739         .phy_id2   = E1000_PHY_ID2_82544x,
1740     },
1741     {
1742         .name      = "e1000-82545em",
1743         .device_id = E1000_DEV_ID_82545EM_COPPER,
1744         .revision  = 0x03,
1745         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1746     },
1747 };
1748 
1749 static void e1000_register_types(void)
1750 {
1751     int i;
1752 
1753     type_register_static(&e1000_base_info);
1754     for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1755         const E1000Info *info = &e1000_devices[i];
1756         TypeInfo type_info = {};
1757 
1758         type_info.name = info->name;
1759         type_info.parent = TYPE_E1000_BASE;
1760         type_info.class_data = info;
1761         type_info.class_init = e1000_class_init;
1762 
1763         type_register_static(&type_info);
1764     }
1765 }
1766 
1767 type_init(e1000_register_types)
1768