xref: /openbmc/qemu/hw/net/e1000.c (revision 8e6c718a)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2.1 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "qemu/osdep.h"
29 #include "hw/net/mii.h"
30 #include "hw/pci/pci_device.h"
31 #include "hw/qdev-properties.h"
32 #include "migration/vmstate.h"
33 #include "net/eth.h"
34 #include "net/net.h"
35 #include "net/checksum.h"
36 #include "sysemu/sysemu.h"
37 #include "sysemu/dma.h"
38 #include "qemu/iov.h"
39 #include "qemu/module.h"
40 #include "qemu/range.h"
41 
42 #include "e1000_common.h"
43 #include "e1000x_common.h"
44 #include "trace.h"
45 #include "qom/object.h"
46 
47 /* #define E1000_DEBUG */
48 
49 #ifdef E1000_DEBUG
50 enum {
51     DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
52     DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
53     DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
54     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
55 };
56 #define DBGBIT(x)    (1<<DEBUG_##x)
57 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
58 
59 #define DBGOUT(what, fmt, ...) do { \
60     if (debugflags & DBGBIT(what)) \
61         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
62     } while (0)
63 #else
64 #define DBGOUT(what, fmt, ...) do {} while (0)
65 #endif
66 
67 #define IOPORT_SIZE       0x40
68 #define PNPMMIO_SIZE      0x20000
69 
70 #define MAXIMUM_ETHERNET_HDR_LEN (ETH_HLEN + 4)
71 
72 /*
73  * HW models:
74  *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
75  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
76  *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
77  *  Others never tested
78  */
79 
80 struct E1000State_st {
81     /*< private >*/
82     PCIDevice parent_obj;
83     /*< public >*/
84 
85     NICState *nic;
86     NICConf conf;
87     MemoryRegion mmio;
88     MemoryRegion io;
89 
90     uint32_t mac_reg[0x8000];
91     uint16_t phy_reg[0x20];
92     uint16_t eeprom_data[64];
93 
94     uint32_t rxbuf_size;
95     uint32_t rxbuf_min_shift;
96     struct e1000_tx {
97         unsigned char header[256];
98         unsigned char vlan_header[4];
99         /* Fields vlan and data must not be reordered or separated. */
100         unsigned char vlan[4];
101         unsigned char data[0x10000];
102         uint16_t size;
103         unsigned char vlan_needed;
104         unsigned char sum_needed;
105         bool cptse;
106         e1000x_txd_props props;
107         e1000x_txd_props tso_props;
108         uint16_t tso_frames;
109         bool busy;
110     } tx;
111 
112     struct {
113         uint32_t val_in;    /* shifted in from guest driver */
114         uint16_t bitnum_in;
115         uint16_t bitnum_out;
116         uint16_t reading;
117         uint32_t old_eecd;
118     } eecd_state;
119 
120     QEMUTimer *autoneg_timer;
121 
122     QEMUTimer *mit_timer;      /* Mitigation timer. */
123     bool mit_timer_on;         /* Mitigation timer is running. */
124     bool mit_irq_level;        /* Tracks interrupt pin level. */
125     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
126 
127     QEMUTimer *flush_queue_timer;
128 
129 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
130 #define E1000_FLAG_AUTONEG_BIT 0
131 #define E1000_FLAG_MIT_BIT 1
132 #define E1000_FLAG_MAC_BIT 2
133 #define E1000_FLAG_TSO_BIT 3
134 #define E1000_FLAG_VET_BIT 4
135 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
136 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
137 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
138 #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
139 #define E1000_FLAG_VET (1 << E1000_FLAG_VET_BIT)
140 
141     uint32_t compat_flags;
142     bool received_tx_tso;
143     bool use_tso_for_migration;
144     e1000x_txd_props mig_props;
145 };
146 typedef struct E1000State_st E1000State;
147 
148 #define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
149 
150 struct E1000BaseClass {
151     PCIDeviceClass parent_class;
152     uint16_t phy_id2;
153 };
154 typedef struct E1000BaseClass E1000BaseClass;
155 
156 #define TYPE_E1000_BASE "e1000-base"
157 
158 DECLARE_OBJ_CHECKERS(E1000State, E1000BaseClass,
159                      E1000, TYPE_E1000_BASE)
160 
161 
162 static void
163 e1000_link_up(E1000State *s)
164 {
165     e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg);
166 
167     /* E1000_STATUS_LU is tested by e1000_can_receive() */
168     qemu_flush_queued_packets(qemu_get_queue(s->nic));
169 }
170 
171 static void
172 e1000_autoneg_done(E1000State *s)
173 {
174     e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg);
175 
176     /* E1000_STATUS_LU is tested by e1000_can_receive() */
177     qemu_flush_queued_packets(qemu_get_queue(s->nic));
178 }
179 
180 static bool
181 have_autoneg(E1000State *s)
182 {
183     return chkflag(AUTONEG) && (s->phy_reg[MII_BMCR] & MII_BMCR_AUTOEN);
184 }
185 
186 static void
187 set_phy_ctrl(E1000State *s, int index, uint16_t val)
188 {
189     /* bits 0-5 reserved; MII_BMCR_[ANRESTART,RESET] are self clearing */
190     s->phy_reg[MII_BMCR] = val & ~(0x3f |
191                                    MII_BMCR_RESET |
192                                    MII_BMCR_ANRESTART);
193 
194     /*
195      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
196      * migrate during auto negotiation, after migration the link will be
197      * down.
198      */
199     if (have_autoneg(s) && (val & MII_BMCR_ANRESTART)) {
200         e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
201     }
202 }
203 
204 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
205     [MII_BMCR] = set_phy_ctrl,
206 };
207 
208 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
209 
210 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
211 static const char phy_regcap[0x20] = {
212     [MII_BMSR]   = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
213     [MII_PHYID1] = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
214     [MII_BMCR]   = PHY_RW,    [MII_CTRL1000]               = PHY_RW,
215     [MII_ANLPAR] = PHY_R,     [MII_STAT1000]               = PHY_R,
216     [MII_ANAR]   = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
217     [MII_PHYID2] = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
218     [MII_ANER]   = PHY_R,
219 };
220 
221 /* MII_PHYID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
222 static const uint16_t phy_reg_init[] = {
223     [MII_BMCR] = MII_BMCR_SPEED1000 |
224                  MII_BMCR_FD |
225                  MII_BMCR_AUTOEN,
226 
227     [MII_BMSR] = MII_BMSR_EXTCAP |
228                  MII_BMSR_LINK_ST |   /* link initially up */
229                  MII_BMSR_AUTONEG |
230                  /* MII_BMSR_AN_COMP: initially NOT completed */
231                  MII_BMSR_MFPS |
232                  MII_BMSR_EXTSTAT |
233                  MII_BMSR_10T_HD |
234                  MII_BMSR_10T_FD |
235                  MII_BMSR_100TX_HD |
236                  MII_BMSR_100TX_FD,
237 
238     [MII_PHYID1] = 0x141,
239     /* [MII_PHYID2] configured per DevId, from e1000_reset() */
240     [MII_ANAR] = MII_ANAR_CSMACD | MII_ANAR_10 |
241                  MII_ANAR_10FD | MII_ANAR_TX |
242                  MII_ANAR_TXFD | MII_ANAR_PAUSE |
243                  MII_ANAR_PAUSE_ASYM,
244     [MII_ANLPAR] = MII_ANLPAR_10 | MII_ANLPAR_10FD |
245                    MII_ANLPAR_TX | MII_ANLPAR_TXFD,
246     [MII_CTRL1000] = MII_CTRL1000_FULL | MII_CTRL1000_PORT |
247                      MII_CTRL1000_MASTER,
248     [MII_STAT1000] = MII_STAT1000_HALF | MII_STAT1000_FULL |
249                      MII_STAT1000_ROK | MII_STAT1000_LOK,
250     [M88E1000_PHY_SPEC_CTRL] = 0x360,
251     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
252     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
253 };
254 
255 static const uint32_t mac_reg_init[] = {
256     [PBA]     = 0x00100030,
257     [LEDCTL]  = 0x602,
258     [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
259                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
260     [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
261                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
262                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
263                 E1000_STATUS_LU,
264     [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
265                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
266                 E1000_MANC_RMCP_EN,
267 };
268 
269 /* Helper function, *curr == 0 means the value is not set */
270 static inline void
271 mit_update_delay(uint32_t *curr, uint32_t value)
272 {
273     if (value && (*curr == 0 || value < *curr)) {
274         *curr = value;
275     }
276 }
277 
278 static void
279 set_interrupt_cause(E1000State *s, int index, uint32_t val)
280 {
281     PCIDevice *d = PCI_DEVICE(s);
282     uint32_t pending_ints;
283     uint32_t mit_delay;
284 
285     s->mac_reg[ICR] = val;
286 
287     /*
288      * Make sure ICR and ICS registers have the same value.
289      * The spec says that the ICS register is write-only.  However in practice,
290      * on real hardware ICS is readable, and for reads it has the same value as
291      * ICR (except that ICS does not have the clear on read behaviour of ICR).
292      *
293      * The VxWorks PRO/1000 driver uses this behaviour.
294      */
295     s->mac_reg[ICS] = val;
296 
297     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
298     if (!s->mit_irq_level && pending_ints) {
299         /*
300          * Here we detect a potential raising edge. We postpone raising the
301          * interrupt line if we are inside the mitigation delay window
302          * (s->mit_timer_on == 1).
303          * We provide a partial implementation of interrupt mitigation,
304          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
305          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
306          * RADV; relative timers based on TIDV and RDTR are not implemented.
307          */
308         if (s->mit_timer_on) {
309             return;
310         }
311         if (chkflag(MIT)) {
312             /* Compute the next mitigation delay according to pending
313              * interrupts and the current values of RADV (provided
314              * RDTR!=0), TADV and ITR.
315              * Then rearm the timer.
316              */
317             mit_delay = 0;
318             if (s->mit_ide &&
319                     (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
320                 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
321             }
322             if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
323                 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
324             }
325             mit_update_delay(&mit_delay, s->mac_reg[ITR]);
326 
327             /*
328              * According to e1000 SPEC, the Ethernet controller guarantees
329              * a maximum observable interrupt rate of 7813 interrupts/sec.
330              * Thus if mit_delay < 500 then the delay should be set to the
331              * minimum delay possible which is 500.
332              */
333             mit_delay = (mit_delay < 500) ? 500 : mit_delay;
334 
335             s->mit_timer_on = 1;
336             timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
337                       mit_delay * 256);
338             s->mit_ide = 0;
339         }
340     }
341 
342     s->mit_irq_level = (pending_ints != 0);
343     pci_set_irq(d, s->mit_irq_level);
344 }
345 
346 static void
347 e1000_mit_timer(void *opaque)
348 {
349     E1000State *s = opaque;
350 
351     s->mit_timer_on = 0;
352     /* Call set_interrupt_cause to update the irq level (if necessary). */
353     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
354 }
355 
356 static void
357 set_ics(E1000State *s, int index, uint32_t val)
358 {
359     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
360         s->mac_reg[IMS]);
361     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
362 }
363 
364 static void
365 e1000_autoneg_timer(void *opaque)
366 {
367     E1000State *s = opaque;
368     if (!qemu_get_queue(s->nic)->link_down) {
369         e1000_autoneg_done(s);
370         set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
371     }
372 }
373 
374 static bool e1000_vet_init_need(void *opaque)
375 {
376     E1000State *s = opaque;
377 
378     return chkflag(VET);
379 }
380 
381 static void e1000_reset_hold(Object *obj)
382 {
383     E1000State *d = E1000(obj);
384     E1000BaseClass *edc = E1000_GET_CLASS(d);
385     uint8_t *macaddr = d->conf.macaddr.a;
386 
387     timer_del(d->autoneg_timer);
388     timer_del(d->mit_timer);
389     timer_del(d->flush_queue_timer);
390     d->mit_timer_on = 0;
391     d->mit_irq_level = 0;
392     d->mit_ide = 0;
393     memset(d->phy_reg, 0, sizeof d->phy_reg);
394     memcpy(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
395     d->phy_reg[MII_PHYID2] = edc->phy_id2;
396     memset(d->mac_reg, 0, sizeof d->mac_reg);
397     memcpy(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
398     d->rxbuf_min_shift = 1;
399     memset(&d->tx, 0, sizeof d->tx);
400 
401     if (qemu_get_queue(d->nic)->link_down) {
402         e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg);
403     }
404 
405     e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
406 
407     if (e1000_vet_init_need(d)) {
408         d->mac_reg[VET] = ETH_P_VLAN;
409     }
410 }
411 
412 static void
413 set_ctrl(E1000State *s, int index, uint32_t val)
414 {
415     /* RST is self clearing */
416     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
417 }
418 
419 static void
420 e1000_flush_queue_timer(void *opaque)
421 {
422     E1000State *s = opaque;
423 
424     qemu_flush_queued_packets(qemu_get_queue(s->nic));
425 }
426 
427 static void
428 set_rx_control(E1000State *s, int index, uint32_t val)
429 {
430     s->mac_reg[RCTL] = val;
431     s->rxbuf_size = e1000x_rxbufsize(val);
432     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
433     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
434            s->mac_reg[RCTL]);
435     timer_mod(s->flush_queue_timer,
436               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
437 }
438 
439 static void
440 set_mdic(E1000State *s, int index, uint32_t val)
441 {
442     uint32_t data = val & E1000_MDIC_DATA_MASK;
443     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
444 
445     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
446         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
447     else if (val & E1000_MDIC_OP_READ) {
448         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
449         if (!(phy_regcap[addr] & PHY_R)) {
450             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
451             val |= E1000_MDIC_ERROR;
452         } else
453             val = (val ^ data) | s->phy_reg[addr];
454     } else if (val & E1000_MDIC_OP_WRITE) {
455         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
456         if (!(phy_regcap[addr] & PHY_W)) {
457             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
458             val |= E1000_MDIC_ERROR;
459         } else {
460             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
461                 phyreg_writeops[addr](s, index, data);
462             } else {
463                 s->phy_reg[addr] = data;
464             }
465         }
466     }
467     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
468 
469     if (val & E1000_MDIC_INT_EN) {
470         set_ics(s, 0, E1000_ICR_MDAC);
471     }
472 }
473 
474 static uint32_t
475 get_eecd(E1000State *s, int index)
476 {
477     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
478 
479     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
480            s->eecd_state.bitnum_out, s->eecd_state.reading);
481     if (!s->eecd_state.reading ||
482         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
483           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
484         ret |= E1000_EECD_DO;
485     return ret;
486 }
487 
488 static void
489 set_eecd(E1000State *s, int index, uint32_t val)
490 {
491     uint32_t oldval = s->eecd_state.old_eecd;
492 
493     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
494             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
495     if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
496         return;
497     }
498     if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
499         s->eecd_state.val_in = 0;
500         s->eecd_state.bitnum_in = 0;
501         s->eecd_state.bitnum_out = 0;
502         s->eecd_state.reading = 0;
503     }
504     if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
505         return;
506     }
507     if (!(E1000_EECD_SK & val)) {               /* falling edge */
508         s->eecd_state.bitnum_out++;
509         return;
510     }
511     s->eecd_state.val_in <<= 1;
512     if (val & E1000_EECD_DI)
513         s->eecd_state.val_in |= 1;
514     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
515         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
516         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
517             EEPROM_READ_OPCODE_MICROWIRE);
518     }
519     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
520            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
521            s->eecd_state.reading);
522 }
523 
524 static uint32_t
525 flash_eerd_read(E1000State *s, int x)
526 {
527     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
528 
529     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
530         return (s->mac_reg[EERD]);
531 
532     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
533         return (E1000_EEPROM_RW_REG_DONE | r);
534 
535     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
536            E1000_EEPROM_RW_REG_DONE | r);
537 }
538 
539 static void
540 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
541 {
542     uint32_t sum;
543 
544     if (cse && cse < n)
545         n = cse + 1;
546     if (sloc < n-1) {
547         sum = net_checksum_add(n-css, data+css);
548         stw_be_p(data + sloc, net_checksum_finish_nozero(sum));
549     }
550 }
551 
552 static inline void
553 inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
554 {
555     if (is_broadcast_ether_addr(arr)) {
556         e1000x_inc_reg_if_not_full(s->mac_reg, BPTC);
557     } else if (is_multicast_ether_addr(arr)) {
558         e1000x_inc_reg_if_not_full(s->mac_reg, MPTC);
559     }
560 }
561 
562 static void
563 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
564 {
565     static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
566                                     PTC1023, PTC1522 };
567 
568     NetClientState *nc = qemu_get_queue(s->nic);
569     if (s->phy_reg[MII_BMCR] & MII_BMCR_LOOPBACK) {
570         qemu_receive_packet(nc, buf, size);
571     } else {
572         qemu_send_packet(nc, buf, size);
573     }
574     inc_tx_bcast_or_mcast_count(s, buf);
575     e1000x_increase_size_stats(s->mac_reg, PTCregs, size + 4);
576 }
577 
578 static void
579 xmit_seg(E1000State *s)
580 {
581     uint16_t len;
582     unsigned int frames = s->tx.tso_frames, css, sofar;
583     struct e1000_tx *tp = &s->tx;
584     struct e1000x_txd_props *props = tp->cptse ? &tp->tso_props : &tp->props;
585 
586     if (tp->cptse) {
587         css = props->ipcss;
588         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
589                frames, tp->size, css);
590         if (props->ip) {    /* IPv4 */
591             stw_be_p(tp->data+css+2, tp->size - css);
592             stw_be_p(tp->data+css+4,
593                      lduw_be_p(tp->data + css + 4) + frames);
594         } else {         /* IPv6 */
595             stw_be_p(tp->data+css+4, tp->size - css);
596         }
597         css = props->tucss;
598         len = tp->size - css;
599         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", props->tcp, css, len);
600         if (props->tcp) {
601             sofar = frames * props->mss;
602             stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
603             if (props->paylen - sofar > props->mss) {
604                 tp->data[css + 13] &= ~9;    /* PSH, FIN */
605             } else if (frames) {
606                 e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
607             }
608         } else {    /* UDP */
609             stw_be_p(tp->data+css+4, len);
610         }
611         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
612             unsigned int phsum;
613             // add pseudo-header length before checksum calculation
614             void *sp = tp->data + props->tucso;
615 
616             phsum = lduw_be_p(sp) + len;
617             phsum = (phsum >> 16) + (phsum & 0xffff);
618             stw_be_p(sp, phsum);
619         }
620         tp->tso_frames++;
621     }
622 
623     if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
624         putsum(tp->data, tp->size, props->tucso, props->tucss, props->tucse);
625     }
626     if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
627         putsum(tp->data, tp->size, props->ipcso, props->ipcss, props->ipcse);
628     }
629     if (tp->vlan_needed) {
630         memmove(tp->vlan, tp->data, 4);
631         memmove(tp->data, tp->data + 4, 8);
632         memcpy(tp->data + 8, tp->vlan_header, 4);
633         e1000_send_packet(s, tp->vlan, tp->size + 4);
634     } else {
635         e1000_send_packet(s, tp->data, tp->size);
636     }
637 
638     e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
639     e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size + 4);
640     e1000x_inc_reg_if_not_full(s->mac_reg, GPTC);
641     e1000x_grow_8reg_if_not_full(s->mac_reg, GOTCL, s->tx.size + 4);
642 }
643 
644 static void
645 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
646 {
647     PCIDevice *d = PCI_DEVICE(s);
648     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
649     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
650     unsigned int split_size = txd_lower & 0xffff, bytes, sz;
651     unsigned int msh = 0xfffff;
652     uint64_t addr;
653     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
654     struct e1000_tx *tp = &s->tx;
655 
656     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
657     if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
658         if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
659             e1000x_read_tx_ctx_descr(xp, &tp->tso_props);
660             s->use_tso_for_migration = 1;
661             tp->tso_frames = 0;
662         } else {
663             e1000x_read_tx_ctx_descr(xp, &tp->props);
664             s->use_tso_for_migration = 0;
665         }
666         return;
667     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
668         // data descriptor
669         if (tp->size == 0) {
670             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
671         }
672         tp->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
673     } else {
674         // legacy descriptor
675         tp->cptse = 0;
676     }
677 
678     if (e1000x_vlan_enabled(s->mac_reg) &&
679         e1000x_is_vlan_txd(txd_lower) &&
680         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
681         tp->vlan_needed = 1;
682         stw_be_p(tp->vlan_header,
683                       le16_to_cpu(s->mac_reg[VET]));
684         stw_be_p(tp->vlan_header + 2,
685                       le16_to_cpu(dp->upper.fields.special));
686     }
687 
688     addr = le64_to_cpu(dp->buffer_addr);
689     if (tp->cptse) {
690         msh = tp->tso_props.hdr_len + tp->tso_props.mss;
691         do {
692             bytes = split_size;
693             if (tp->size >= msh) {
694                 goto eop;
695             }
696             if (tp->size + bytes > msh)
697                 bytes = msh - tp->size;
698 
699             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
700             pci_dma_read(d, addr, tp->data + tp->size, bytes);
701             sz = tp->size + bytes;
702             if (sz >= tp->tso_props.hdr_len
703                 && tp->size < tp->tso_props.hdr_len) {
704                 memmove(tp->header, tp->data, tp->tso_props.hdr_len);
705             }
706             tp->size = sz;
707             addr += bytes;
708             if (sz == msh) {
709                 xmit_seg(s);
710                 memmove(tp->data, tp->header, tp->tso_props.hdr_len);
711                 tp->size = tp->tso_props.hdr_len;
712             }
713             split_size -= bytes;
714         } while (bytes && split_size);
715     } else {
716         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
717         pci_dma_read(d, addr, tp->data + tp->size, split_size);
718         tp->size += split_size;
719     }
720 
721 eop:
722     if (!(txd_lower & E1000_TXD_CMD_EOP))
723         return;
724     if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) {
725         xmit_seg(s);
726     }
727     tp->tso_frames = 0;
728     tp->sum_needed = 0;
729     tp->vlan_needed = 0;
730     tp->size = 0;
731     tp->cptse = 0;
732 }
733 
734 static uint32_t
735 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
736 {
737     PCIDevice *d = PCI_DEVICE(s);
738     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
739 
740     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
741         return 0;
742     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
743                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
744     dp->upper.data = cpu_to_le32(txd_upper);
745     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
746                   &dp->upper, sizeof(dp->upper));
747     return E1000_ICR_TXDW;
748 }
749 
750 static uint64_t tx_desc_base(E1000State *s)
751 {
752     uint64_t bah = s->mac_reg[TDBAH];
753     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
754 
755     return (bah << 32) + bal;
756 }
757 
758 static void
759 start_xmit(E1000State *s)
760 {
761     PCIDevice *d = PCI_DEVICE(s);
762     dma_addr_t base;
763     struct e1000_tx_desc desc;
764     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
765 
766     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
767         DBGOUT(TX, "tx disabled\n");
768         return;
769     }
770 
771     if (s->tx.busy) {
772         return;
773     }
774     s->tx.busy = true;
775 
776     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
777         base = tx_desc_base(s) +
778                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
779         pci_dma_read(d, base, &desc, sizeof(desc));
780 
781         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
782                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
783                desc.upper.data);
784 
785         process_tx_desc(s, &desc);
786         cause |= txdesc_writeback(s, base, &desc);
787 
788         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
789             s->mac_reg[TDH] = 0;
790         /*
791          * the following could happen only if guest sw assigns
792          * bogus values to TDT/TDLEN.
793          * there's nothing too intelligent we could do about this.
794          */
795         if (s->mac_reg[TDH] == tdh_start ||
796             tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
797             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
798                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
799             break;
800         }
801     }
802     s->tx.busy = false;
803     set_ics(s, 0, cause);
804 }
805 
806 static int
807 receive_filter(E1000State *s, const uint8_t *buf, int size)
808 {
809     uint32_t rctl = s->mac_reg[RCTL];
810     int isbcast = is_broadcast_ether_addr(buf);
811     int ismcast = is_multicast_ether_addr(buf);
812 
813     if (e1000x_is_vlan_packet(buf, le16_to_cpu(s->mac_reg[VET])) &&
814         e1000x_vlan_rx_filter_enabled(s->mac_reg)) {
815         uint16_t vid = lduw_be_p(&PKT_GET_VLAN_HDR(buf)->h_tci);
816         uint32_t vfta =
817             ldl_le_p((uint32_t *)(s->mac_reg + VFTA) +
818                      ((vid >> E1000_VFTA_ENTRY_SHIFT) & E1000_VFTA_ENTRY_MASK));
819         if ((vfta & (1 << (vid & E1000_VFTA_ENTRY_BIT_SHIFT_MASK))) == 0) {
820             return 0;
821         }
822     }
823 
824     if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
825         return 1;
826     }
827 
828     if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
829         return 1;
830     }
831 
832     if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
833         return 1;
834     }
835 
836     return e1000x_rx_group_filter(s->mac_reg, buf);
837 }
838 
839 static void
840 e1000_set_link_status(NetClientState *nc)
841 {
842     E1000State *s = qemu_get_nic_opaque(nc);
843     uint32_t old_status = s->mac_reg[STATUS];
844 
845     if (nc->link_down) {
846         e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg);
847     } else {
848         if (have_autoneg(s) &&
849             !(s->phy_reg[MII_BMSR] & MII_BMSR_AN_COMP)) {
850             e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
851         } else {
852             e1000_link_up(s);
853         }
854     }
855 
856     if (s->mac_reg[STATUS] != old_status)
857         set_ics(s, 0, E1000_ICR_LSC);
858 }
859 
860 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
861 {
862     int bufs;
863     /* Fast-path short packets */
864     if (total_size <= s->rxbuf_size) {
865         return s->mac_reg[RDH] != s->mac_reg[RDT];
866     }
867     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
868         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
869     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
870         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
871             s->mac_reg[RDT] - s->mac_reg[RDH];
872     } else {
873         return false;
874     }
875     return total_size <= bufs * s->rxbuf_size;
876 }
877 
878 static bool
879 e1000_can_receive(NetClientState *nc)
880 {
881     E1000State *s = qemu_get_nic_opaque(nc);
882 
883     return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
884         e1000_has_rxbufs(s, 1) && !timer_pending(s->flush_queue_timer);
885 }
886 
887 static uint64_t rx_desc_base(E1000State *s)
888 {
889     uint64_t bah = s->mac_reg[RDBAH];
890     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
891 
892     return (bah << 32) + bal;
893 }
894 
895 static void
896 e1000_receiver_overrun(E1000State *s, size_t size)
897 {
898     trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
899     e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
900     e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
901     set_ics(s, 0, E1000_ICS_RXO);
902 }
903 
904 static ssize_t
905 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
906 {
907     E1000State *s = qemu_get_nic_opaque(nc);
908     PCIDevice *d = PCI_DEVICE(s);
909     struct e1000_rx_desc desc;
910     dma_addr_t base;
911     unsigned int n, rdt;
912     uint32_t rdh_start;
913     uint16_t vlan_special = 0;
914     uint8_t vlan_status = 0;
915     uint8_t min_buf[ETH_ZLEN];
916     struct iovec min_iov;
917     uint8_t *filter_buf = iov->iov_base;
918     size_t size = iov_size(iov, iovcnt);
919     size_t iov_ofs = 0;
920     size_t desc_offset;
921     size_t desc_size;
922     size_t total_size;
923     eth_pkt_types_e pkt_type;
924 
925     if (!e1000x_hw_rx_enabled(s->mac_reg)) {
926         return -1;
927     }
928 
929     if (timer_pending(s->flush_queue_timer)) {
930         return 0;
931     }
932 
933     /* Pad to minimum Ethernet frame length */
934     if (size < sizeof(min_buf)) {
935         iov_to_buf(iov, iovcnt, 0, min_buf, size);
936         memset(&min_buf[size], 0, sizeof(min_buf) - size);
937         min_iov.iov_base = filter_buf = min_buf;
938         min_iov.iov_len = size = sizeof(min_buf);
939         iovcnt = 1;
940         iov = &min_iov;
941     } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
942         /* This is very unlikely, but may happen. */
943         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
944         filter_buf = min_buf;
945     }
946 
947     /* Discard oversized packets if !LPE and !SBP. */
948     if (e1000x_is_oversized(s->mac_reg, size)) {
949         return size;
950     }
951 
952     if (!receive_filter(s, filter_buf, size)) {
953         return size;
954     }
955 
956     if (e1000x_vlan_enabled(s->mac_reg) &&
957         e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) {
958         vlan_special = cpu_to_le16(lduw_be_p(filter_buf + 14));
959         iov_ofs = 4;
960         if (filter_buf == iov->iov_base) {
961             memmove(filter_buf + 4, filter_buf, 12);
962         } else {
963             iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
964             while (iov->iov_len <= iov_ofs) {
965                 iov_ofs -= iov->iov_len;
966                 iov++;
967             }
968         }
969         vlan_status = E1000_RXD_STAT_VP;
970         size -= 4;
971     }
972 
973     pkt_type = get_eth_packet_type(PKT_GET_ETH_HDR(filter_buf));
974     rdh_start = s->mac_reg[RDH];
975     desc_offset = 0;
976     total_size = size + e1000x_fcs_len(s->mac_reg);
977     if (!e1000_has_rxbufs(s, total_size)) {
978         e1000_receiver_overrun(s, total_size);
979         return -1;
980     }
981     do {
982         desc_size = total_size - desc_offset;
983         if (desc_size > s->rxbuf_size) {
984             desc_size = s->rxbuf_size;
985         }
986         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
987         pci_dma_read(d, base, &desc, sizeof(desc));
988         desc.special = vlan_special;
989         desc.status &= ~E1000_RXD_STAT_DD;
990         if (desc.buffer_addr) {
991             if (desc_offset < size) {
992                 size_t iov_copy;
993                 hwaddr ba = le64_to_cpu(desc.buffer_addr);
994                 size_t copy_size = size - desc_offset;
995                 if (copy_size > s->rxbuf_size) {
996                     copy_size = s->rxbuf_size;
997                 }
998                 do {
999                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
1000                     pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
1001                     copy_size -= iov_copy;
1002                     ba += iov_copy;
1003                     iov_ofs += iov_copy;
1004                     if (iov_ofs == iov->iov_len) {
1005                         iov++;
1006                         iov_ofs = 0;
1007                     }
1008                 } while (copy_size);
1009             }
1010             desc_offset += desc_size;
1011             desc.length = cpu_to_le16(desc_size);
1012             if (desc_offset >= total_size) {
1013                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1014             } else {
1015                 /* Guest zeroing out status is not a hardware requirement.
1016                    Clear EOP in case guest didn't do it. */
1017                 desc.status &= ~E1000_RXD_STAT_EOP;
1018             }
1019         } else { // as per intel docs; skip descriptors with null buf addr
1020             DBGOUT(RX, "Null RX descriptor!!\n");
1021         }
1022         pci_dma_write(d, base, &desc, sizeof(desc));
1023         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
1024         pci_dma_write(d, base + offsetof(struct e1000_rx_desc, status),
1025                       &desc.status, sizeof(desc.status));
1026 
1027         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1028             s->mac_reg[RDH] = 0;
1029         /* see comment in start_xmit; same here */
1030         if (s->mac_reg[RDH] == rdh_start ||
1031             rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
1032             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1033                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1034             e1000_receiver_overrun(s, total_size);
1035             return -1;
1036         }
1037     } while (desc_offset < total_size);
1038 
1039     e1000x_update_rx_total_stats(s->mac_reg, pkt_type, size, total_size);
1040 
1041     n = E1000_ICS_RXT0;
1042     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1043         rdt += s->mac_reg[RDLEN] / sizeof(desc);
1044     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1045         s->rxbuf_min_shift)
1046         n |= E1000_ICS_RXDMT0;
1047 
1048     set_ics(s, 0, n);
1049 
1050     return size;
1051 }
1052 
1053 static ssize_t
1054 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1055 {
1056     const struct iovec iov = {
1057         .iov_base = (uint8_t *)buf,
1058         .iov_len = size
1059     };
1060 
1061     return e1000_receive_iov(nc, &iov, 1);
1062 }
1063 
1064 static uint32_t
1065 mac_readreg(E1000State *s, int index)
1066 {
1067     return s->mac_reg[index];
1068 }
1069 
1070 static uint32_t
1071 mac_icr_read(E1000State *s, int index)
1072 {
1073     uint32_t ret = s->mac_reg[ICR];
1074 
1075     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1076     set_interrupt_cause(s, 0, 0);
1077     return ret;
1078 }
1079 
1080 static uint32_t
1081 mac_read_clr4(E1000State *s, int index)
1082 {
1083     uint32_t ret = s->mac_reg[index];
1084 
1085     s->mac_reg[index] = 0;
1086     return ret;
1087 }
1088 
1089 static uint32_t
1090 mac_read_clr8(E1000State *s, int index)
1091 {
1092     uint32_t ret = s->mac_reg[index];
1093 
1094     s->mac_reg[index] = 0;
1095     s->mac_reg[index-1] = 0;
1096     return ret;
1097 }
1098 
1099 static void
1100 mac_writereg(E1000State *s, int index, uint32_t val)
1101 {
1102     uint32_t macaddr[2];
1103 
1104     s->mac_reg[index] = val;
1105 
1106     if (index == RA + 1) {
1107         macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1108         macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1109         qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1110     }
1111 }
1112 
1113 static void
1114 set_rdt(E1000State *s, int index, uint32_t val)
1115 {
1116     s->mac_reg[index] = val & 0xffff;
1117     if (e1000_has_rxbufs(s, 1)) {
1118         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1119     }
1120 }
1121 
1122 #define LOW_BITS_SET_FUNC(num)                             \
1123     static void                                            \
1124     set_##num##bit(E1000State *s, int index, uint32_t val) \
1125     {                                                      \
1126         s->mac_reg[index] = val & (BIT(num) - 1);          \
1127     }
1128 
1129 LOW_BITS_SET_FUNC(4)
1130 LOW_BITS_SET_FUNC(11)
1131 LOW_BITS_SET_FUNC(13)
1132 LOW_BITS_SET_FUNC(16)
1133 
1134 static void
1135 set_dlen(E1000State *s, int index, uint32_t val)
1136 {
1137     s->mac_reg[index] = val & 0xfff80;
1138 }
1139 
1140 static void
1141 set_tctl(E1000State *s, int index, uint32_t val)
1142 {
1143     s->mac_reg[index] = val;
1144     s->mac_reg[TDT] &= 0xffff;
1145     start_xmit(s);
1146 }
1147 
1148 static void
1149 set_icr(E1000State *s, int index, uint32_t val)
1150 {
1151     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1152     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1153 }
1154 
1155 static void
1156 set_imc(E1000State *s, int index, uint32_t val)
1157 {
1158     s->mac_reg[IMS] &= ~val;
1159     set_ics(s, 0, 0);
1160 }
1161 
1162 static void
1163 set_ims(E1000State *s, int index, uint32_t val)
1164 {
1165     s->mac_reg[IMS] |= val;
1166     set_ics(s, 0, 0);
1167 }
1168 
1169 #define getreg(x)    [x] = mac_readreg
1170 typedef uint32_t (*readops)(E1000State *, int);
1171 static const readops macreg_readops[] = {
1172     getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1173     getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1174     getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1175     getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1176     getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1177     getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1178     getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1179     getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1180     getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1181     getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1182     getreg(TNCRS),    getreg(SEQEC),    getreg(CEXTERR),  getreg(RLEC),
1183     getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1184     getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1185     getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1186     getreg(GOTCL),    getreg(RDFH),     getreg(RDFT),     getreg(RDFHS),
1187     getreg(RDFTS),    getreg(RDFPC),    getreg(TDFH),     getreg(TDFT),
1188     getreg(TDFHS),    getreg(TDFTS),    getreg(TDFPC),    getreg(AIT),
1189 
1190     [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1191     [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1192     [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1193     [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1194     [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1195     [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1196     [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1197     [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1198     [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1199     [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1200     [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1201     [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1202     [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1203     [MPTC]    = mac_read_clr4,
1204     [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1205     [EERD]    = flash_eerd_read,
1206 
1207     [CRCERRS ... MPC]     = &mac_readreg,
1208     [IP6AT ... IP6AT + 3] = &mac_readreg,    [IP4AT ... IP4AT + 6] = &mac_readreg,
1209     [FFLT ... FFLT + 6]   = &mac_readreg,
1210     [RA ... RA + 31]      = &mac_readreg,
1211     [WUPM ... WUPM + 31]  = &mac_readreg,
1212     [MTA ... MTA + E1000_MC_TBL_SIZE - 1]   = &mac_readreg,
1213     [VFTA ... VFTA + E1000_VLAN_FILTER_TBL_SIZE - 1] = &mac_readreg,
1214     [FFMT ... FFMT + 254] = &mac_readreg,
1215     [FFVT ... FFVT + 254] = &mac_readreg,
1216     [PBM ... PBM + 16383] = &mac_readreg,
1217 };
1218 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1219 
1220 #define putreg(x)    [x] = mac_writereg
1221 typedef void (*writeops)(E1000State *, int, uint32_t);
1222 static const writeops macreg_writeops[] = {
1223     putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1224     putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1225     putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1226     putreg(IPAV),     putreg(WUC),
1227     putreg(WUS),
1228 
1229     [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL]  = set_tctl,
1230     [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]   = set_ics,
1231     [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]   = set_rdt,
1232     [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]   = set_icr,
1233     [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL]  = set_ctrl,
1234     [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV]  = set_16bit,
1235     [ITR]    = set_16bit,  [TDFH]   = set_11bit,      [TDFT]  = set_11bit,
1236     [TDFHS]  = set_13bit,  [TDFTS]  = set_13bit,      [TDFPC] = set_13bit,
1237     [RDFH]   = set_13bit,  [RDFT]   = set_13bit,      [RDFHS] = set_13bit,
1238     [RDFTS]  = set_13bit,  [RDFPC]  = set_13bit,      [AIT]   = set_16bit,
1239 
1240     [IP6AT ... IP6AT + 3] = &mac_writereg, [IP4AT ... IP4AT + 6] = &mac_writereg,
1241     [FFLT ... FFLT + 6]   = &set_11bit,
1242     [RA ... RA + 31]      = &mac_writereg,
1243     [WUPM ... WUPM + 31]  = &mac_writereg,
1244     [MTA ... MTA + E1000_MC_TBL_SIZE - 1] = &mac_writereg,
1245     [VFTA ... VFTA + E1000_VLAN_FILTER_TBL_SIZE - 1] = &mac_writereg,
1246     [FFMT ... FFMT + 254] = &set_4bit,     [FFVT ... FFVT + 254] = &mac_writereg,
1247     [PBM ... PBM + 16383] = &mac_writereg,
1248 };
1249 
1250 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1251 
1252 enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1253 
1254 #define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1255 /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1256  * f - flag bits (up to 6 possible flags)
1257  * n - flag needed
1258  * p - partially implenented */
1259 static const uint8_t mac_reg_access[0x8000] = {
1260     [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1261     [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1262 
1263     [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1264     [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1265     [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1266     [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1267     [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1268     [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1269     [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1270     [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1271     [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1272     [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1273     [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1274     [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1275     [SEQEC]   = markflag(MAC),    [CEXTERR] = markflag(MAC),
1276     [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1277     [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1278     [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1279     [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1280     [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1281     [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1282     [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1283     [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1284     [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1285     [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1286     [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1287     [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1288     [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1289     [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1290     [BPTC]    = markflag(MAC),
1291 
1292     [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1293     [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1294     [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1295     [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1296     [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1297     [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1298     [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1299     [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1300     [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1301     [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1302     [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1303 };
1304 
1305 static void
1306 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1307                  unsigned size)
1308 {
1309     E1000State *s = opaque;
1310     unsigned int index = (addr & 0x1ffff) >> 2;
1311 
1312     if (index < NWRITEOPS && macreg_writeops[index]) {
1313         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1314             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1315             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1316                 DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1317                        "It is not fully implemented.\n", index<<2);
1318             }
1319             macreg_writeops[index](s, index, val);
1320         } else {    /* "flag needed" bit is set, but the flag is not active */
1321             DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1322                    index<<2);
1323         }
1324     } else if (index < NREADOPS && macreg_readops[index]) {
1325         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1326                index<<2, val);
1327     } else {
1328         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1329                index<<2, val);
1330     }
1331 }
1332 
1333 static uint64_t
1334 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1335 {
1336     E1000State *s = opaque;
1337     unsigned int index = (addr & 0x1ffff) >> 2;
1338 
1339     if (index < NREADOPS && macreg_readops[index]) {
1340         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1341             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1342             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1343                 DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1344                        "It is not fully implemented.\n", index<<2);
1345             }
1346             return macreg_readops[index](s, index);
1347         } else {    /* "flag needed" bit is set, but the flag is not active */
1348             DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1349                    index<<2);
1350         }
1351     } else {
1352         DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1353     }
1354     return 0;
1355 }
1356 
1357 static const MemoryRegionOps e1000_mmio_ops = {
1358     .read = e1000_mmio_read,
1359     .write = e1000_mmio_write,
1360     .endianness = DEVICE_LITTLE_ENDIAN,
1361     .impl = {
1362         .min_access_size = 4,
1363         .max_access_size = 4,
1364     },
1365 };
1366 
1367 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1368                               unsigned size)
1369 {
1370     E1000State *s = opaque;
1371 
1372     (void)s;
1373     return 0;
1374 }
1375 
1376 static void e1000_io_write(void *opaque, hwaddr addr,
1377                            uint64_t val, unsigned size)
1378 {
1379     E1000State *s = opaque;
1380 
1381     (void)s;
1382 }
1383 
1384 static const MemoryRegionOps e1000_io_ops = {
1385     .read = e1000_io_read,
1386     .write = e1000_io_write,
1387     .endianness = DEVICE_LITTLE_ENDIAN,
1388 };
1389 
1390 static bool is_version_1(void *opaque, int version_id)
1391 {
1392     return version_id == 1;
1393 }
1394 
1395 static int e1000_pre_save(void *opaque)
1396 {
1397     E1000State *s = opaque;
1398     NetClientState *nc = qemu_get_queue(s->nic);
1399 
1400     /*
1401      * If link is down and auto-negotiation is supported and ongoing,
1402      * complete auto-negotiation immediately. This allows us to look
1403      * at MII_BMSR_AN_COMP to infer link status on load.
1404      */
1405     if (nc->link_down && have_autoneg(s)) {
1406         s->phy_reg[MII_BMSR] |= MII_BMSR_AN_COMP;
1407     }
1408 
1409     /* Decide which set of props to migrate in the main structure */
1410     if (chkflag(TSO) || !s->use_tso_for_migration) {
1411         /* Either we're migrating with the extra subsection, in which
1412          * case the mig_props is always 'props' OR
1413          * we've not got the subsection, but 'props' was the last
1414          * updated.
1415          */
1416         s->mig_props = s->tx.props;
1417     } else {
1418         /* We're not using the subsection, and 'tso_props' was
1419          * the last updated.
1420          */
1421         s->mig_props = s->tx.tso_props;
1422     }
1423     return 0;
1424 }
1425 
1426 static int e1000_post_load(void *opaque, int version_id)
1427 {
1428     E1000State *s = opaque;
1429     NetClientState *nc = qemu_get_queue(s->nic);
1430 
1431     if (!chkflag(MIT)) {
1432         s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1433             s->mac_reg[TADV] = 0;
1434         s->mit_irq_level = false;
1435     }
1436     s->mit_ide = 0;
1437     s->mit_timer_on = true;
1438     timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1);
1439 
1440     /* nc.link_down can't be migrated, so infer link_down according
1441      * to link status bit in mac_reg[STATUS].
1442      * Alternatively, restart link negotiation if it was in progress. */
1443     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1444 
1445     if (have_autoneg(s) && !(s->phy_reg[MII_BMSR] & MII_BMSR_AN_COMP)) {
1446         nc->link_down = false;
1447         timer_mod(s->autoneg_timer,
1448                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1449     }
1450 
1451     s->tx.props = s->mig_props;
1452     if (!s->received_tx_tso) {
1453         /* We received only one set of offload data (tx.props)
1454          * and haven't got tx.tso_props.  The best we can do
1455          * is dupe the data.
1456          */
1457         s->tx.tso_props = s->mig_props;
1458     }
1459     return 0;
1460 }
1461 
1462 static int e1000_tx_tso_post_load(void *opaque, int version_id)
1463 {
1464     E1000State *s = opaque;
1465     s->received_tx_tso = true;
1466     return 0;
1467 }
1468 
1469 static bool e1000_mit_state_needed(void *opaque)
1470 {
1471     E1000State *s = opaque;
1472 
1473     return chkflag(MIT);
1474 }
1475 
1476 static bool e1000_full_mac_needed(void *opaque)
1477 {
1478     E1000State *s = opaque;
1479 
1480     return chkflag(MAC);
1481 }
1482 
1483 static bool e1000_tso_state_needed(void *opaque)
1484 {
1485     E1000State *s = opaque;
1486 
1487     return chkflag(TSO);
1488 }
1489 
1490 static const VMStateDescription vmstate_e1000_mit_state = {
1491     .name = "e1000/mit_state",
1492     .version_id = 1,
1493     .minimum_version_id = 1,
1494     .needed = e1000_mit_state_needed,
1495     .fields = (VMStateField[]) {
1496         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1497         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1498         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1499         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1500         VMSTATE_BOOL(mit_irq_level, E1000State),
1501         VMSTATE_END_OF_LIST()
1502     }
1503 };
1504 
1505 static const VMStateDescription vmstate_e1000_full_mac_state = {
1506     .name = "e1000/full_mac_state",
1507     .version_id = 1,
1508     .minimum_version_id = 1,
1509     .needed = e1000_full_mac_needed,
1510     .fields = (VMStateField[]) {
1511         VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1512         VMSTATE_END_OF_LIST()
1513     }
1514 };
1515 
1516 static const VMStateDescription vmstate_e1000_tx_tso_state = {
1517     .name = "e1000/tx_tso_state",
1518     .version_id = 1,
1519     .minimum_version_id = 1,
1520     .needed = e1000_tso_state_needed,
1521     .post_load = e1000_tx_tso_post_load,
1522     .fields = (VMStateField[]) {
1523         VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
1524         VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
1525         VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
1526         VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
1527         VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
1528         VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
1529         VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
1530         VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
1531         VMSTATE_UINT16(tx.tso_props.mss, E1000State),
1532         VMSTATE_INT8(tx.tso_props.ip, E1000State),
1533         VMSTATE_INT8(tx.tso_props.tcp, E1000State),
1534         VMSTATE_END_OF_LIST()
1535     }
1536 };
1537 
1538 static const VMStateDescription vmstate_e1000 = {
1539     .name = "e1000",
1540     .version_id = 2,
1541     .minimum_version_id = 1,
1542     .pre_save = e1000_pre_save,
1543     .post_load = e1000_post_load,
1544     .fields = (VMStateField[]) {
1545         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1546         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1547         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1548         VMSTATE_UINT32(rxbuf_size, E1000State),
1549         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1550         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1551         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1552         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1553         VMSTATE_UINT16(eecd_state.reading, E1000State),
1554         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1555         VMSTATE_UINT8(mig_props.ipcss, E1000State),
1556         VMSTATE_UINT8(mig_props.ipcso, E1000State),
1557         VMSTATE_UINT16(mig_props.ipcse, E1000State),
1558         VMSTATE_UINT8(mig_props.tucss, E1000State),
1559         VMSTATE_UINT8(mig_props.tucso, E1000State),
1560         VMSTATE_UINT16(mig_props.tucse, E1000State),
1561         VMSTATE_UINT32(mig_props.paylen, E1000State),
1562         VMSTATE_UINT8(mig_props.hdr_len, E1000State),
1563         VMSTATE_UINT16(mig_props.mss, E1000State),
1564         VMSTATE_UINT16(tx.size, E1000State),
1565         VMSTATE_UINT16(tx.tso_frames, E1000State),
1566         VMSTATE_UINT8(tx.sum_needed, E1000State),
1567         VMSTATE_INT8(mig_props.ip, E1000State),
1568         VMSTATE_INT8(mig_props.tcp, E1000State),
1569         VMSTATE_BUFFER(tx.header, E1000State),
1570         VMSTATE_BUFFER(tx.data, E1000State),
1571         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1572         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1573         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1574         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1575         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1576         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1577         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1578         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1579         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1580         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1581         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1582         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1583         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1584         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1585         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1586         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1587         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1588         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1589         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1590         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1591         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1592         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1593         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1594         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1595         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1596         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1597         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1598         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1599         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1600         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1601         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1602         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1603         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1604         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1605         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1606         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1607         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1608         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1609         VMSTATE_UINT32(mac_reg[VET], E1000State),
1610         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1611         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, E1000_MC_TBL_SIZE),
1612         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA,
1613                                  E1000_VLAN_FILTER_TBL_SIZE),
1614         VMSTATE_END_OF_LIST()
1615     },
1616     .subsections = (const VMStateDescription*[]) {
1617         &vmstate_e1000_mit_state,
1618         &vmstate_e1000_full_mac_state,
1619         &vmstate_e1000_tx_tso_state,
1620         NULL
1621     }
1622 };
1623 
1624 /*
1625  * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1626  * Note: A valid DevId will be inserted during pci_e1000_realize().
1627  */
1628 static const uint16_t e1000_eeprom_template[64] = {
1629     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1630     0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1631     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1632     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1633     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1634     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1635     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1636     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1637 };
1638 
1639 /* PCI interface */
1640 
1641 static void
1642 e1000_mmio_setup(E1000State *d)
1643 {
1644     int i;
1645     const uint32_t excluded_regs[] = {
1646         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1647         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1648     };
1649 
1650     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1651                           "e1000-mmio", PNPMMIO_SIZE);
1652     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1653     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1654         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1655                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1656     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1657 }
1658 
1659 static void
1660 pci_e1000_uninit(PCIDevice *dev)
1661 {
1662     E1000State *d = E1000(dev);
1663 
1664     timer_free(d->autoneg_timer);
1665     timer_free(d->mit_timer);
1666     timer_free(d->flush_queue_timer);
1667     qemu_del_nic(d->nic);
1668 }
1669 
1670 static NetClientInfo net_e1000_info = {
1671     .type = NET_CLIENT_DRIVER_NIC,
1672     .size = sizeof(NICState),
1673     .can_receive = e1000_can_receive,
1674     .receive = e1000_receive,
1675     .receive_iov = e1000_receive_iov,
1676     .link_status_changed = e1000_set_link_status,
1677 };
1678 
1679 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1680                                 uint32_t val, int len)
1681 {
1682     E1000State *s = E1000(pci_dev);
1683 
1684     pci_default_write_config(pci_dev, address, val, len);
1685 
1686     if (range_covers_byte(address, len, PCI_COMMAND) &&
1687         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1688         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1689     }
1690 }
1691 
1692 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1693 {
1694     DeviceState *dev = DEVICE(pci_dev);
1695     E1000State *d = E1000(pci_dev);
1696     uint8_t *pci_conf;
1697     uint8_t *macaddr;
1698 
1699     pci_dev->config_write = e1000_write_config;
1700 
1701     pci_conf = pci_dev->config;
1702 
1703     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1704     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1705 
1706     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1707 
1708     e1000_mmio_setup(d);
1709 
1710     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1711 
1712     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1713 
1714     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1715     macaddr = d->conf.macaddr.a;
1716 
1717     e1000x_core_prepare_eeprom(d->eeprom_data,
1718                                e1000_eeprom_template,
1719                                sizeof(e1000_eeprom_template),
1720                                PCI_DEVICE_GET_CLASS(pci_dev)->device_id,
1721                                macaddr);
1722 
1723     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1724                           object_get_typename(OBJECT(d)), dev->id, d);
1725 
1726     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1727 
1728     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1729     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1730     d->flush_queue_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
1731                                         e1000_flush_queue_timer, d);
1732 }
1733 
1734 static Property e1000_properties[] = {
1735     DEFINE_NIC_PROPERTIES(E1000State, conf),
1736     DEFINE_PROP_BIT("autonegotiation", E1000State,
1737                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1738     DEFINE_PROP_BIT("mitigation", E1000State,
1739                     compat_flags, E1000_FLAG_MIT_BIT, true),
1740     DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1741                     compat_flags, E1000_FLAG_MAC_BIT, true),
1742     DEFINE_PROP_BIT("migrate_tso_props", E1000State,
1743                     compat_flags, E1000_FLAG_TSO_BIT, true),
1744     DEFINE_PROP_BIT("init-vet", E1000State,
1745                     compat_flags, E1000_FLAG_VET_BIT, true),
1746     DEFINE_PROP_END_OF_LIST(),
1747 };
1748 
1749 typedef struct E1000Info {
1750     const char *name;
1751     uint16_t   device_id;
1752     uint8_t    revision;
1753     uint16_t   phy_id2;
1754 } E1000Info;
1755 
1756 static void e1000_class_init(ObjectClass *klass, void *data)
1757 {
1758     DeviceClass *dc = DEVICE_CLASS(klass);
1759     ResettableClass *rc = RESETTABLE_CLASS(klass);
1760     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1761     E1000BaseClass *e = E1000_CLASS(klass);
1762     const E1000Info *info = data;
1763 
1764     k->realize = pci_e1000_realize;
1765     k->exit = pci_e1000_uninit;
1766     k->romfile = "efi-e1000.rom";
1767     k->vendor_id = PCI_VENDOR_ID_INTEL;
1768     k->device_id = info->device_id;
1769     k->revision = info->revision;
1770     e->phy_id2 = info->phy_id2;
1771     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1772     rc->phases.hold = e1000_reset_hold;
1773     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1774     dc->desc = "Intel Gigabit Ethernet";
1775     dc->vmsd = &vmstate_e1000;
1776     device_class_set_props(dc, e1000_properties);
1777 }
1778 
1779 static void e1000_instance_init(Object *obj)
1780 {
1781     E1000State *n = E1000(obj);
1782     device_add_bootindex_property(obj, &n->conf.bootindex,
1783                                   "bootindex", "/ethernet-phy@0",
1784                                   DEVICE(n));
1785 }
1786 
1787 static const TypeInfo e1000_base_info = {
1788     .name          = TYPE_E1000_BASE,
1789     .parent        = TYPE_PCI_DEVICE,
1790     .instance_size = sizeof(E1000State),
1791     .instance_init = e1000_instance_init,
1792     .class_size    = sizeof(E1000BaseClass),
1793     .abstract      = true,
1794     .interfaces = (InterfaceInfo[]) {
1795         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1796         { },
1797     },
1798 };
1799 
1800 static const E1000Info e1000_devices[] = {
1801     {
1802         .name      = "e1000",
1803         .device_id = E1000_DEV_ID_82540EM,
1804         .revision  = 0x03,
1805         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1806     },
1807     {
1808         .name      = "e1000-82544gc",
1809         .device_id = E1000_DEV_ID_82544GC_COPPER,
1810         .revision  = 0x03,
1811         .phy_id2   = E1000_PHY_ID2_82544x,
1812     },
1813     {
1814         .name      = "e1000-82545em",
1815         .device_id = E1000_DEV_ID_82545EM_COPPER,
1816         .revision  = 0x03,
1817         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1818     },
1819 };
1820 
1821 static void e1000_register_types(void)
1822 {
1823     int i;
1824 
1825     type_register_static(&e1000_base_info);
1826     for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1827         const E1000Info *info = &e1000_devices[i];
1828         TypeInfo type_info = {};
1829 
1830         type_info.name = info->name;
1831         type_info.parent = TYPE_E1000_BASE;
1832         type_info.class_data = (void *)info;
1833         type_info.class_init = e1000_class_init;
1834 
1835         type_register(&type_info);
1836     }
1837 }
1838 
1839 type_init(e1000_register_types)
1840