xref: /openbmc/qemu/hw/net/e1000.c (revision b7728c9f62d8ee5c4772a08ebe2f21bd789c73f7)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2.1 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "qemu/osdep.h"
29 #include "hw/net/mii.h"
30 #include "hw/pci/pci_device.h"
31 #include "hw/qdev-properties.h"
32 #include "migration/vmstate.h"
33 #include "net/eth.h"
34 #include "net/net.h"
35 #include "net/checksum.h"
36 #include "sysemu/sysemu.h"
37 #include "sysemu/dma.h"
38 #include "qemu/iov.h"
39 #include "qemu/module.h"
40 #include "qemu/range.h"
41 
42 #include "e1000x_common.h"
43 #include "trace.h"
44 #include "qom/object.h"
45 
46 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
47 
48 /* #define E1000_DEBUG */
49 
50 #ifdef E1000_DEBUG
51 enum {
52     DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
53     DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
54     DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
55     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
56 };
57 #define DBGBIT(x)    (1<<DEBUG_##x)
58 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
59 
60 #define DBGOUT(what, fmt, ...) do { \
61     if (debugflags & DBGBIT(what)) \
62         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
63     } while (0)
64 #else
65 #define DBGOUT(what, fmt, ...) do {} while (0)
66 #endif
67 
68 #define IOPORT_SIZE       0x40
69 #define PNPMMIO_SIZE      0x20000
70 #define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
71 
72 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
73 
74 /*
75  * HW models:
76  *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
77  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
78  *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
79  *  Others never tested
80  */
81 
82 struct E1000State_st {
83     /*< private >*/
84     PCIDevice parent_obj;
85     /*< public >*/
86 
87     NICState *nic;
88     NICConf conf;
89     MemoryRegion mmio;
90     MemoryRegion io;
91 
92     uint32_t mac_reg[0x8000];
93     uint16_t phy_reg[0x20];
94     uint16_t eeprom_data[64];
95 
96     uint32_t rxbuf_size;
97     uint32_t rxbuf_min_shift;
98     struct e1000_tx {
99         unsigned char header[256];
100         unsigned char vlan_header[4];
101         /* Fields vlan and data must not be reordered or separated. */
102         unsigned char vlan[4];
103         unsigned char data[0x10000];
104         uint16_t size;
105         unsigned char vlan_needed;
106         unsigned char sum_needed;
107         bool cptse;
108         e1000x_txd_props props;
109         e1000x_txd_props tso_props;
110         uint16_t tso_frames;
111         bool busy;
112     } tx;
113 
114     struct {
115         uint32_t val_in;    /* shifted in from guest driver */
116         uint16_t bitnum_in;
117         uint16_t bitnum_out;
118         uint16_t reading;
119         uint32_t old_eecd;
120     } eecd_state;
121 
122     QEMUTimer *autoneg_timer;
123 
124     QEMUTimer *mit_timer;      /* Mitigation timer. */
125     bool mit_timer_on;         /* Mitigation timer is running. */
126     bool mit_irq_level;        /* Tracks interrupt pin level. */
127     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
128 
129     QEMUTimer *flush_queue_timer;
130 
131 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
132 #define E1000_FLAG_AUTONEG_BIT 0
133 #define E1000_FLAG_MIT_BIT 1
134 #define E1000_FLAG_MAC_BIT 2
135 #define E1000_FLAG_TSO_BIT 3
136 #define E1000_FLAG_VET_BIT 4
137 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
138 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
139 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
140 #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
141 #define E1000_FLAG_VET (1 << E1000_FLAG_VET_BIT)
142 
143     uint32_t compat_flags;
144     bool received_tx_tso;
145     bool use_tso_for_migration;
146     e1000x_txd_props mig_props;
147 };
148 typedef struct E1000State_st E1000State;
149 
150 #define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
151 
152 struct E1000BaseClass {
153     PCIDeviceClass parent_class;
154     uint16_t phy_id2;
155 };
156 typedef struct E1000BaseClass E1000BaseClass;
157 
158 #define TYPE_E1000_BASE "e1000-base"
159 
160 DECLARE_OBJ_CHECKERS(E1000State, E1000BaseClass,
161                      E1000, TYPE_E1000_BASE)
162 
163 
164 static void
165 e1000_link_up(E1000State *s)
166 {
167     e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg);
168 
169     /* E1000_STATUS_LU is tested by e1000_can_receive() */
170     qemu_flush_queued_packets(qemu_get_queue(s->nic));
171 }
172 
173 static void
174 e1000_autoneg_done(E1000State *s)
175 {
176     e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg);
177 
178     /* E1000_STATUS_LU is tested by e1000_can_receive() */
179     qemu_flush_queued_packets(qemu_get_queue(s->nic));
180 }
181 
182 static bool
183 have_autoneg(E1000State *s)
184 {
185     return chkflag(AUTONEG) && (s->phy_reg[MII_BMCR] & MII_BMCR_AUTOEN);
186 }
187 
188 static void
189 set_phy_ctrl(E1000State *s, int index, uint16_t val)
190 {
191     /* bits 0-5 reserved; MII_BMCR_[ANRESTART,RESET] are self clearing */
192     s->phy_reg[MII_BMCR] = val & ~(0x3f |
193                                    MII_BMCR_RESET |
194                                    MII_BMCR_ANRESTART);
195 
196     /*
197      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
198      * migrate during auto negotiation, after migration the link will be
199      * down.
200      */
201     if (have_autoneg(s) && (val & MII_BMCR_ANRESTART)) {
202         e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
203     }
204 }
205 
206 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
207     [MII_BMCR] = set_phy_ctrl,
208 };
209 
210 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
211 
212 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
213 static const char phy_regcap[0x20] = {
214     [MII_BMSR]   = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
215     [MII_PHYID1] = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
216     [MII_BMCR]   = PHY_RW,    [MII_CTRL1000]               = PHY_RW,
217     [MII_ANLPAR] = PHY_R,     [MII_STAT1000]               = PHY_R,
218     [MII_ANAR]   = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
219     [MII_PHYID2] = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
220     [MII_ANER]   = PHY_R,
221 };
222 
223 /* MII_PHYID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
224 static const uint16_t phy_reg_init[] = {
225     [MII_BMCR] = MII_BMCR_SPEED1000 |
226                  MII_BMCR_FD |
227                  MII_BMCR_AUTOEN,
228 
229     [MII_BMSR] = MII_BMSR_EXTCAP |
230                  MII_BMSR_LINK_ST |   /* link initially up */
231                  MII_BMSR_AUTONEG |
232                  /* MII_BMSR_AN_COMP: initially NOT completed */
233                  MII_BMSR_MFPS |
234                  MII_BMSR_EXTSTAT |
235                  MII_BMSR_10T_HD |
236                  MII_BMSR_10T_FD |
237                  MII_BMSR_100TX_HD |
238                  MII_BMSR_100TX_FD,
239 
240     [MII_PHYID1] = 0x141,
241     /* [MII_PHYID2] configured per DevId, from e1000_reset() */
242     [MII_ANAR] = 0xde1,
243     [MII_ANLPAR] = 0x1e0,
244     [MII_CTRL1000] = 0x0e00,
245     [MII_STAT1000] = 0x3c00,
246     [M88E1000_PHY_SPEC_CTRL] = 0x360,
247     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
248     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
249 };
250 
251 static const uint32_t mac_reg_init[] = {
252     [PBA]     = 0x00100030,
253     [LEDCTL]  = 0x602,
254     [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
255                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
256     [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
257                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
258                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
259                 E1000_STATUS_LU,
260     [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
261                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
262                 E1000_MANC_RMCP_EN,
263 };
264 
265 /* Helper function, *curr == 0 means the value is not set */
266 static inline void
267 mit_update_delay(uint32_t *curr, uint32_t value)
268 {
269     if (value && (*curr == 0 || value < *curr)) {
270         *curr = value;
271     }
272 }
273 
274 static void
275 set_interrupt_cause(E1000State *s, int index, uint32_t val)
276 {
277     PCIDevice *d = PCI_DEVICE(s);
278     uint32_t pending_ints;
279     uint32_t mit_delay;
280 
281     s->mac_reg[ICR] = val;
282 
283     /*
284      * Make sure ICR and ICS registers have the same value.
285      * The spec says that the ICS register is write-only.  However in practice,
286      * on real hardware ICS is readable, and for reads it has the same value as
287      * ICR (except that ICS does not have the clear on read behaviour of ICR).
288      *
289      * The VxWorks PRO/1000 driver uses this behaviour.
290      */
291     s->mac_reg[ICS] = val;
292 
293     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
294     if (!s->mit_irq_level && pending_ints) {
295         /*
296          * Here we detect a potential raising edge. We postpone raising the
297          * interrupt line if we are inside the mitigation delay window
298          * (s->mit_timer_on == 1).
299          * We provide a partial implementation of interrupt mitigation,
300          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
301          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
302          * RADV; relative timers based on TIDV and RDTR are not implemented.
303          */
304         if (s->mit_timer_on) {
305             return;
306         }
307         if (chkflag(MIT)) {
308             /* Compute the next mitigation delay according to pending
309              * interrupts and the current values of RADV (provided
310              * RDTR!=0), TADV and ITR.
311              * Then rearm the timer.
312              */
313             mit_delay = 0;
314             if (s->mit_ide &&
315                     (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
316                 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
317             }
318             if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
319                 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
320             }
321             mit_update_delay(&mit_delay, s->mac_reg[ITR]);
322 
323             /*
324              * According to e1000 SPEC, the Ethernet controller guarantees
325              * a maximum observable interrupt rate of 7813 interrupts/sec.
326              * Thus if mit_delay < 500 then the delay should be set to the
327              * minimum delay possible which is 500.
328              */
329             mit_delay = (mit_delay < 500) ? 500 : mit_delay;
330 
331             s->mit_timer_on = 1;
332             timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
333                       mit_delay * 256);
334             s->mit_ide = 0;
335         }
336     }
337 
338     s->mit_irq_level = (pending_ints != 0);
339     pci_set_irq(d, s->mit_irq_level);
340 }
341 
342 static void
343 e1000_mit_timer(void *opaque)
344 {
345     E1000State *s = opaque;
346 
347     s->mit_timer_on = 0;
348     /* Call set_interrupt_cause to update the irq level (if necessary). */
349     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
350 }
351 
352 static void
353 set_ics(E1000State *s, int index, uint32_t val)
354 {
355     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
356         s->mac_reg[IMS]);
357     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
358 }
359 
360 static void
361 e1000_autoneg_timer(void *opaque)
362 {
363     E1000State *s = opaque;
364     if (!qemu_get_queue(s->nic)->link_down) {
365         e1000_autoneg_done(s);
366         set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
367     }
368 }
369 
370 static bool e1000_vet_init_need(void *opaque)
371 {
372     E1000State *s = opaque;
373 
374     return chkflag(VET);
375 }
376 
377 static void e1000_reset(void *opaque)
378 {
379     E1000State *d = opaque;
380     E1000BaseClass *edc = E1000_GET_CLASS(d);
381     uint8_t *macaddr = d->conf.macaddr.a;
382 
383     timer_del(d->autoneg_timer);
384     timer_del(d->mit_timer);
385     timer_del(d->flush_queue_timer);
386     d->mit_timer_on = 0;
387     d->mit_irq_level = 0;
388     d->mit_ide = 0;
389     memset(d->phy_reg, 0, sizeof d->phy_reg);
390     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
391     d->phy_reg[MII_PHYID2] = edc->phy_id2;
392     memset(d->mac_reg, 0, sizeof d->mac_reg);
393     memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
394     d->rxbuf_min_shift = 1;
395     memset(&d->tx, 0, sizeof d->tx);
396 
397     if (qemu_get_queue(d->nic)->link_down) {
398         e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg);
399     }
400 
401     e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
402 
403     if (e1000_vet_init_need(d)) {
404         d->mac_reg[VET] = ETH_P_VLAN;
405     }
406 }
407 
408 static void
409 set_ctrl(E1000State *s, int index, uint32_t val)
410 {
411     /* RST is self clearing */
412     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
413 }
414 
415 static void
416 e1000_flush_queue_timer(void *opaque)
417 {
418     E1000State *s = opaque;
419 
420     qemu_flush_queued_packets(qemu_get_queue(s->nic));
421 }
422 
423 static void
424 set_rx_control(E1000State *s, int index, uint32_t val)
425 {
426     s->mac_reg[RCTL] = val;
427     s->rxbuf_size = e1000x_rxbufsize(val);
428     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
429     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
430            s->mac_reg[RCTL]);
431     timer_mod(s->flush_queue_timer,
432               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
433 }
434 
435 static void
436 set_mdic(E1000State *s, int index, uint32_t val)
437 {
438     uint32_t data = val & E1000_MDIC_DATA_MASK;
439     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
440 
441     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
442         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
443     else if (val & E1000_MDIC_OP_READ) {
444         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
445         if (!(phy_regcap[addr] & PHY_R)) {
446             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
447             val |= E1000_MDIC_ERROR;
448         } else
449             val = (val ^ data) | s->phy_reg[addr];
450     } else if (val & E1000_MDIC_OP_WRITE) {
451         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
452         if (!(phy_regcap[addr] & PHY_W)) {
453             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
454             val |= E1000_MDIC_ERROR;
455         } else {
456             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
457                 phyreg_writeops[addr](s, index, data);
458             } else {
459                 s->phy_reg[addr] = data;
460             }
461         }
462     }
463     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
464 
465     if (val & E1000_MDIC_INT_EN) {
466         set_ics(s, 0, E1000_ICR_MDAC);
467     }
468 }
469 
470 static uint32_t
471 get_eecd(E1000State *s, int index)
472 {
473     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
474 
475     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
476            s->eecd_state.bitnum_out, s->eecd_state.reading);
477     if (!s->eecd_state.reading ||
478         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
479           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
480         ret |= E1000_EECD_DO;
481     return ret;
482 }
483 
484 static void
485 set_eecd(E1000State *s, int index, uint32_t val)
486 {
487     uint32_t oldval = s->eecd_state.old_eecd;
488 
489     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
490             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
491     if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
492         return;
493     }
494     if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
495         s->eecd_state.val_in = 0;
496         s->eecd_state.bitnum_in = 0;
497         s->eecd_state.bitnum_out = 0;
498         s->eecd_state.reading = 0;
499     }
500     if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
501         return;
502     }
503     if (!(E1000_EECD_SK & val)) {               /* falling edge */
504         s->eecd_state.bitnum_out++;
505         return;
506     }
507     s->eecd_state.val_in <<= 1;
508     if (val & E1000_EECD_DI)
509         s->eecd_state.val_in |= 1;
510     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
511         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
512         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
513             EEPROM_READ_OPCODE_MICROWIRE);
514     }
515     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
516            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
517            s->eecd_state.reading);
518 }
519 
520 static uint32_t
521 flash_eerd_read(E1000State *s, int x)
522 {
523     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
524 
525     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
526         return (s->mac_reg[EERD]);
527 
528     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
529         return (E1000_EEPROM_RW_REG_DONE | r);
530 
531     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
532            E1000_EEPROM_RW_REG_DONE | r);
533 }
534 
535 static void
536 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
537 {
538     uint32_t sum;
539 
540     if (cse && cse < n)
541         n = cse + 1;
542     if (sloc < n-1) {
543         sum = net_checksum_add(n-css, data+css);
544         stw_be_p(data + sloc, net_checksum_finish_nozero(sum));
545     }
546 }
547 
548 static inline void
549 inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
550 {
551     if (!memcmp(arr, bcast, sizeof bcast)) {
552         e1000x_inc_reg_if_not_full(s->mac_reg, BPTC);
553     } else if (arr[0] & 1) {
554         e1000x_inc_reg_if_not_full(s->mac_reg, MPTC);
555     }
556 }
557 
558 static void
559 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
560 {
561     static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
562                                     PTC1023, PTC1522 };
563 
564     NetClientState *nc = qemu_get_queue(s->nic);
565     if (s->phy_reg[MII_BMCR] & MII_BMCR_LOOPBACK) {
566         qemu_receive_packet(nc, buf, size);
567     } else {
568         qemu_send_packet(nc, buf, size);
569     }
570     inc_tx_bcast_or_mcast_count(s, buf);
571     e1000x_increase_size_stats(s->mac_reg, PTCregs, size);
572 }
573 
574 static void
575 xmit_seg(E1000State *s)
576 {
577     uint16_t len;
578     unsigned int frames = s->tx.tso_frames, css, sofar;
579     struct e1000_tx *tp = &s->tx;
580     struct e1000x_txd_props *props = tp->cptse ? &tp->tso_props : &tp->props;
581 
582     if (tp->cptse) {
583         css = props->ipcss;
584         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
585                frames, tp->size, css);
586         if (props->ip) {    /* IPv4 */
587             stw_be_p(tp->data+css+2, tp->size - css);
588             stw_be_p(tp->data+css+4,
589                      lduw_be_p(tp->data + css + 4) + frames);
590         } else {         /* IPv6 */
591             stw_be_p(tp->data+css+4, tp->size - css);
592         }
593         css = props->tucss;
594         len = tp->size - css;
595         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", props->tcp, css, len);
596         if (props->tcp) {
597             sofar = frames * props->mss;
598             stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
599             if (props->paylen - sofar > props->mss) {
600                 tp->data[css + 13] &= ~9;    /* PSH, FIN */
601             } else if (frames) {
602                 e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
603             }
604         } else {    /* UDP */
605             stw_be_p(tp->data+css+4, len);
606         }
607         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
608             unsigned int phsum;
609             // add pseudo-header length before checksum calculation
610             void *sp = tp->data + props->tucso;
611 
612             phsum = lduw_be_p(sp) + len;
613             phsum = (phsum >> 16) + (phsum & 0xffff);
614             stw_be_p(sp, phsum);
615         }
616         tp->tso_frames++;
617     }
618 
619     if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
620         putsum(tp->data, tp->size, props->tucso, props->tucss, props->tucse);
621     }
622     if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
623         putsum(tp->data, tp->size, props->ipcso, props->ipcss, props->ipcse);
624     }
625     if (tp->vlan_needed) {
626         memmove(tp->vlan, tp->data, 4);
627         memmove(tp->data, tp->data + 4, 8);
628         memcpy(tp->data + 8, tp->vlan_header, 4);
629         e1000_send_packet(s, tp->vlan, tp->size + 4);
630     } else {
631         e1000_send_packet(s, tp->data, tp->size);
632     }
633 
634     e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
635     e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size);
636     s->mac_reg[GPTC] = s->mac_reg[TPT];
637     s->mac_reg[GOTCL] = s->mac_reg[TOTL];
638     s->mac_reg[GOTCH] = s->mac_reg[TOTH];
639 }
640 
641 static void
642 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
643 {
644     PCIDevice *d = PCI_DEVICE(s);
645     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
646     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
647     unsigned int split_size = txd_lower & 0xffff, bytes, sz;
648     unsigned int msh = 0xfffff;
649     uint64_t addr;
650     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
651     struct e1000_tx *tp = &s->tx;
652 
653     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
654     if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
655         if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
656             e1000x_read_tx_ctx_descr(xp, &tp->tso_props);
657             s->use_tso_for_migration = 1;
658             tp->tso_frames = 0;
659         } else {
660             e1000x_read_tx_ctx_descr(xp, &tp->props);
661             s->use_tso_for_migration = 0;
662         }
663         return;
664     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
665         // data descriptor
666         if (tp->size == 0) {
667             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
668         }
669         tp->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
670     } else {
671         // legacy descriptor
672         tp->cptse = 0;
673     }
674 
675     if (e1000x_vlan_enabled(s->mac_reg) &&
676         e1000x_is_vlan_txd(txd_lower) &&
677         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
678         tp->vlan_needed = 1;
679         stw_be_p(tp->vlan_header,
680                       le16_to_cpu(s->mac_reg[VET]));
681         stw_be_p(tp->vlan_header + 2,
682                       le16_to_cpu(dp->upper.fields.special));
683     }
684 
685     addr = le64_to_cpu(dp->buffer_addr);
686     if (tp->cptse) {
687         msh = tp->tso_props.hdr_len + tp->tso_props.mss;
688         do {
689             bytes = split_size;
690             if (tp->size >= msh) {
691                 goto eop;
692             }
693             if (tp->size + bytes > msh)
694                 bytes = msh - tp->size;
695 
696             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
697             pci_dma_read(d, addr, tp->data + tp->size, bytes);
698             sz = tp->size + bytes;
699             if (sz >= tp->tso_props.hdr_len
700                 && tp->size < tp->tso_props.hdr_len) {
701                 memmove(tp->header, tp->data, tp->tso_props.hdr_len);
702             }
703             tp->size = sz;
704             addr += bytes;
705             if (sz == msh) {
706                 xmit_seg(s);
707                 memmove(tp->data, tp->header, tp->tso_props.hdr_len);
708                 tp->size = tp->tso_props.hdr_len;
709             }
710             split_size -= bytes;
711         } while (bytes && split_size);
712     } else {
713         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
714         pci_dma_read(d, addr, tp->data + tp->size, split_size);
715         tp->size += split_size;
716     }
717 
718 eop:
719     if (!(txd_lower & E1000_TXD_CMD_EOP))
720         return;
721     if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) {
722         xmit_seg(s);
723     }
724     tp->tso_frames = 0;
725     tp->sum_needed = 0;
726     tp->vlan_needed = 0;
727     tp->size = 0;
728     tp->cptse = 0;
729 }
730 
731 static uint32_t
732 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
733 {
734     PCIDevice *d = PCI_DEVICE(s);
735     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
736 
737     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
738         return 0;
739     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
740                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
741     dp->upper.data = cpu_to_le32(txd_upper);
742     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
743                   &dp->upper, sizeof(dp->upper));
744     return E1000_ICR_TXDW;
745 }
746 
747 static uint64_t tx_desc_base(E1000State *s)
748 {
749     uint64_t bah = s->mac_reg[TDBAH];
750     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
751 
752     return (bah << 32) + bal;
753 }
754 
755 static void
756 start_xmit(E1000State *s)
757 {
758     PCIDevice *d = PCI_DEVICE(s);
759     dma_addr_t base;
760     struct e1000_tx_desc desc;
761     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
762 
763     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
764         DBGOUT(TX, "tx disabled\n");
765         return;
766     }
767 
768     if (s->tx.busy) {
769         return;
770     }
771     s->tx.busy = true;
772 
773     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
774         base = tx_desc_base(s) +
775                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
776         pci_dma_read(d, base, &desc, sizeof(desc));
777 
778         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
779                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
780                desc.upper.data);
781 
782         process_tx_desc(s, &desc);
783         cause |= txdesc_writeback(s, base, &desc);
784 
785         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
786             s->mac_reg[TDH] = 0;
787         /*
788          * the following could happen only if guest sw assigns
789          * bogus values to TDT/TDLEN.
790          * there's nothing too intelligent we could do about this.
791          */
792         if (s->mac_reg[TDH] == tdh_start ||
793             tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
794             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
795                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
796             break;
797         }
798     }
799     s->tx.busy = false;
800     set_ics(s, 0, cause);
801 }
802 
803 static int
804 receive_filter(E1000State *s, const uint8_t *buf, int size)
805 {
806     uint32_t rctl = s->mac_reg[RCTL];
807     int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1);
808 
809     if (e1000x_is_vlan_packet(buf, le16_to_cpu(s->mac_reg[VET])) &&
810         e1000x_vlan_rx_filter_enabled(s->mac_reg)) {
811         uint16_t vid = lduw_be_p(buf + 14);
812         uint32_t vfta = ldl_le_p((uint32_t *)(s->mac_reg + VFTA) +
813                                  ((vid >> 5) & 0x7f));
814         if ((vfta & (1 << (vid & 0x1f))) == 0) {
815             return 0;
816         }
817     }
818 
819     if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
820         return 1;
821     }
822 
823     if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
824         e1000x_inc_reg_if_not_full(s->mac_reg, MPRC);
825         return 1;
826     }
827 
828     if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
829         e1000x_inc_reg_if_not_full(s->mac_reg, BPRC);
830         return 1;
831     }
832 
833     return e1000x_rx_group_filter(s->mac_reg, buf);
834 }
835 
836 static void
837 e1000_set_link_status(NetClientState *nc)
838 {
839     E1000State *s = qemu_get_nic_opaque(nc);
840     uint32_t old_status = s->mac_reg[STATUS];
841 
842     if (nc->link_down) {
843         e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg);
844     } else {
845         if (have_autoneg(s) &&
846             !(s->phy_reg[MII_BMSR] & MII_BMSR_AN_COMP)) {
847             e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
848         } else {
849             e1000_link_up(s);
850         }
851     }
852 
853     if (s->mac_reg[STATUS] != old_status)
854         set_ics(s, 0, E1000_ICR_LSC);
855 }
856 
857 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
858 {
859     int bufs;
860     /* Fast-path short packets */
861     if (total_size <= s->rxbuf_size) {
862         return s->mac_reg[RDH] != s->mac_reg[RDT];
863     }
864     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
865         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
866     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
867         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
868             s->mac_reg[RDT] - s->mac_reg[RDH];
869     } else {
870         return false;
871     }
872     return total_size <= bufs * s->rxbuf_size;
873 }
874 
875 static bool
876 e1000_can_receive(NetClientState *nc)
877 {
878     E1000State *s = qemu_get_nic_opaque(nc);
879 
880     return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
881         e1000_has_rxbufs(s, 1) && !timer_pending(s->flush_queue_timer);
882 }
883 
884 static uint64_t rx_desc_base(E1000State *s)
885 {
886     uint64_t bah = s->mac_reg[RDBAH];
887     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
888 
889     return (bah << 32) + bal;
890 }
891 
892 static void
893 e1000_receiver_overrun(E1000State *s, size_t size)
894 {
895     trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
896     e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
897     e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
898     set_ics(s, 0, E1000_ICS_RXO);
899 }
900 
901 static ssize_t
902 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
903 {
904     E1000State *s = qemu_get_nic_opaque(nc);
905     PCIDevice *d = PCI_DEVICE(s);
906     struct e1000_rx_desc desc;
907     dma_addr_t base;
908     unsigned int n, rdt;
909     uint32_t rdh_start;
910     uint16_t vlan_special = 0;
911     uint8_t vlan_status = 0;
912     uint8_t min_buf[MIN_BUF_SIZE];
913     struct iovec min_iov;
914     uint8_t *filter_buf = iov->iov_base;
915     size_t size = iov_size(iov, iovcnt);
916     size_t iov_ofs = 0;
917     size_t desc_offset;
918     size_t desc_size;
919     size_t total_size;
920 
921     if (!e1000x_hw_rx_enabled(s->mac_reg)) {
922         return -1;
923     }
924 
925     if (timer_pending(s->flush_queue_timer)) {
926         return 0;
927     }
928 
929     /* Pad to minimum Ethernet frame length */
930     if (size < sizeof(min_buf)) {
931         iov_to_buf(iov, iovcnt, 0, min_buf, size);
932         memset(&min_buf[size], 0, sizeof(min_buf) - size);
933         min_iov.iov_base = filter_buf = min_buf;
934         min_iov.iov_len = size = sizeof(min_buf);
935         iovcnt = 1;
936         iov = &min_iov;
937     } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
938         /* This is very unlikely, but may happen. */
939         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
940         filter_buf = min_buf;
941     }
942 
943     /* Discard oversized packets if !LPE and !SBP. */
944     if (e1000x_is_oversized(s->mac_reg, size)) {
945         return size;
946     }
947 
948     if (!receive_filter(s, filter_buf, size)) {
949         return size;
950     }
951 
952     if (e1000x_vlan_enabled(s->mac_reg) &&
953         e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) {
954         vlan_special = cpu_to_le16(lduw_be_p(filter_buf + 14));
955         iov_ofs = 4;
956         if (filter_buf == iov->iov_base) {
957             memmove(filter_buf + 4, filter_buf, 12);
958         } else {
959             iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
960             while (iov->iov_len <= iov_ofs) {
961                 iov_ofs -= iov->iov_len;
962                 iov++;
963             }
964         }
965         vlan_status = E1000_RXD_STAT_VP;
966         size -= 4;
967     }
968 
969     rdh_start = s->mac_reg[RDH];
970     desc_offset = 0;
971     total_size = size + e1000x_fcs_len(s->mac_reg);
972     if (!e1000_has_rxbufs(s, total_size)) {
973         e1000_receiver_overrun(s, total_size);
974         return -1;
975     }
976     do {
977         desc_size = total_size - desc_offset;
978         if (desc_size > s->rxbuf_size) {
979             desc_size = s->rxbuf_size;
980         }
981         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
982         pci_dma_read(d, base, &desc, sizeof(desc));
983         desc.special = vlan_special;
984         desc.status &= ~E1000_RXD_STAT_DD;
985         if (desc.buffer_addr) {
986             if (desc_offset < size) {
987                 size_t iov_copy;
988                 hwaddr ba = le64_to_cpu(desc.buffer_addr);
989                 size_t copy_size = size - desc_offset;
990                 if (copy_size > s->rxbuf_size) {
991                     copy_size = s->rxbuf_size;
992                 }
993                 do {
994                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
995                     pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
996                     copy_size -= iov_copy;
997                     ba += iov_copy;
998                     iov_ofs += iov_copy;
999                     if (iov_ofs == iov->iov_len) {
1000                         iov++;
1001                         iov_ofs = 0;
1002                     }
1003                 } while (copy_size);
1004             }
1005             desc_offset += desc_size;
1006             desc.length = cpu_to_le16(desc_size);
1007             if (desc_offset >= total_size) {
1008                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1009             } else {
1010                 /* Guest zeroing out status is not a hardware requirement.
1011                    Clear EOP in case guest didn't do it. */
1012                 desc.status &= ~E1000_RXD_STAT_EOP;
1013             }
1014         } else { // as per intel docs; skip descriptors with null buf addr
1015             DBGOUT(RX, "Null RX descriptor!!\n");
1016         }
1017         pci_dma_write(d, base, &desc, sizeof(desc));
1018         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
1019         pci_dma_write(d, base + offsetof(struct e1000_rx_desc, status),
1020                       &desc.status, sizeof(desc.status));
1021 
1022         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1023             s->mac_reg[RDH] = 0;
1024         /* see comment in start_xmit; same here */
1025         if (s->mac_reg[RDH] == rdh_start ||
1026             rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
1027             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1028                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1029             e1000_receiver_overrun(s, total_size);
1030             return -1;
1031         }
1032     } while (desc_offset < total_size);
1033 
1034     e1000x_update_rx_total_stats(s->mac_reg, size, total_size);
1035 
1036     n = E1000_ICS_RXT0;
1037     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1038         rdt += s->mac_reg[RDLEN] / sizeof(desc);
1039     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1040         s->rxbuf_min_shift)
1041         n |= E1000_ICS_RXDMT0;
1042 
1043     set_ics(s, 0, n);
1044 
1045     return size;
1046 }
1047 
1048 static ssize_t
1049 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1050 {
1051     const struct iovec iov = {
1052         .iov_base = (uint8_t *)buf,
1053         .iov_len = size
1054     };
1055 
1056     return e1000_receive_iov(nc, &iov, 1);
1057 }
1058 
1059 static uint32_t
1060 mac_readreg(E1000State *s, int index)
1061 {
1062     return s->mac_reg[index];
1063 }
1064 
1065 static uint32_t
1066 mac_low4_read(E1000State *s, int index)
1067 {
1068     return s->mac_reg[index] & 0xf;
1069 }
1070 
1071 static uint32_t
1072 mac_low11_read(E1000State *s, int index)
1073 {
1074     return s->mac_reg[index] & 0x7ff;
1075 }
1076 
1077 static uint32_t
1078 mac_low13_read(E1000State *s, int index)
1079 {
1080     return s->mac_reg[index] & 0x1fff;
1081 }
1082 
1083 static uint32_t
1084 mac_low16_read(E1000State *s, int index)
1085 {
1086     return s->mac_reg[index] & 0xffff;
1087 }
1088 
1089 static uint32_t
1090 mac_icr_read(E1000State *s, int index)
1091 {
1092     uint32_t ret = s->mac_reg[ICR];
1093 
1094     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1095     set_interrupt_cause(s, 0, 0);
1096     return ret;
1097 }
1098 
1099 static uint32_t
1100 mac_read_clr4(E1000State *s, int index)
1101 {
1102     uint32_t ret = s->mac_reg[index];
1103 
1104     s->mac_reg[index] = 0;
1105     return ret;
1106 }
1107 
1108 static uint32_t
1109 mac_read_clr8(E1000State *s, int index)
1110 {
1111     uint32_t ret = s->mac_reg[index];
1112 
1113     s->mac_reg[index] = 0;
1114     s->mac_reg[index-1] = 0;
1115     return ret;
1116 }
1117 
1118 static void
1119 mac_writereg(E1000State *s, int index, uint32_t val)
1120 {
1121     uint32_t macaddr[2];
1122 
1123     s->mac_reg[index] = val;
1124 
1125     if (index == RA + 1) {
1126         macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1127         macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1128         qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1129     }
1130 }
1131 
1132 static void
1133 set_rdt(E1000State *s, int index, uint32_t val)
1134 {
1135     s->mac_reg[index] = val & 0xffff;
1136     if (e1000_has_rxbufs(s, 1)) {
1137         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1138     }
1139 }
1140 
1141 static void
1142 set_16bit(E1000State *s, int index, uint32_t val)
1143 {
1144     s->mac_reg[index] = val & 0xffff;
1145 }
1146 
1147 static void
1148 set_dlen(E1000State *s, int index, uint32_t val)
1149 {
1150     s->mac_reg[index] = val & 0xfff80;
1151 }
1152 
1153 static void
1154 set_tctl(E1000State *s, int index, uint32_t val)
1155 {
1156     s->mac_reg[index] = val;
1157     s->mac_reg[TDT] &= 0xffff;
1158     start_xmit(s);
1159 }
1160 
1161 static void
1162 set_icr(E1000State *s, int index, uint32_t val)
1163 {
1164     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1165     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1166 }
1167 
1168 static void
1169 set_imc(E1000State *s, int index, uint32_t val)
1170 {
1171     s->mac_reg[IMS] &= ~val;
1172     set_ics(s, 0, 0);
1173 }
1174 
1175 static void
1176 set_ims(E1000State *s, int index, uint32_t val)
1177 {
1178     s->mac_reg[IMS] |= val;
1179     set_ics(s, 0, 0);
1180 }
1181 
1182 #define getreg(x)    [x] = mac_readreg
1183 typedef uint32_t (*readops)(E1000State *, int);
1184 static const readops macreg_readops[] = {
1185     getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1186     getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1187     getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1188     getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1189     getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1190     getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1191     getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1192     getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1193     getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1194     getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1195     getreg(TNCRS),    getreg(SEQEC),    getreg(CEXTERR),  getreg(RLEC),
1196     getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1197     getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1198     getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1199     getreg(GOTCL),
1200 
1201     [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1202     [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1203     [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1204     [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1205     [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1206     [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1207     [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1208     [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1209     [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1210     [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1211     [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1212     [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1213     [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1214     [MPTC]    = mac_read_clr4,
1215     [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1216     [EERD]    = flash_eerd_read,
1217     [RDFH]    = mac_low13_read,     [RDFT]    = mac_low13_read,
1218     [RDFHS]   = mac_low13_read,     [RDFTS]   = mac_low13_read,
1219     [RDFPC]   = mac_low13_read,
1220     [TDFH]    = mac_low11_read,     [TDFT]    = mac_low11_read,
1221     [TDFHS]   = mac_low13_read,     [TDFTS]   = mac_low13_read,
1222     [TDFPC]   = mac_low13_read,
1223     [AIT]     = mac_low16_read,
1224 
1225     [CRCERRS ... MPC]     = &mac_readreg,
1226     [IP6AT ... IP6AT + 3] = &mac_readreg,    [IP4AT ... IP4AT + 6] = &mac_readreg,
1227     [FFLT ... FFLT + 6]   = &mac_low11_read,
1228     [RA ... RA + 31]      = &mac_readreg,
1229     [WUPM ... WUPM + 31]  = &mac_readreg,
1230     [MTA ... MTA + 127]   = &mac_readreg,
1231     [VFTA ... VFTA + 127] = &mac_readreg,
1232     [FFMT ... FFMT + 254] = &mac_low4_read,
1233     [FFVT ... FFVT + 254] = &mac_readreg,
1234     [PBM ... PBM + 16383] = &mac_readreg,
1235 };
1236 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1237 
1238 #define putreg(x)    [x] = mac_writereg
1239 typedef void (*writeops)(E1000State *, int, uint32_t);
1240 static const writeops macreg_writeops[] = {
1241     putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1242     putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1243     putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1244     putreg(TDFH),     putreg(TDFT),     putreg(TDFHS),    putreg(TDFTS),
1245     putreg(TDFPC),    putreg(RDFH),     putreg(RDFT),     putreg(RDFHS),
1246     putreg(RDFTS),    putreg(RDFPC),    putreg(IPAV),     putreg(WUC),
1247     putreg(WUS),      putreg(AIT),
1248 
1249     [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL] = set_tctl,
1250     [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]  = set_ics,
1251     [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]  = set_rdt,
1252     [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]  = set_icr,
1253     [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL] = set_ctrl,
1254     [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV] = set_16bit,
1255     [ITR]    = set_16bit,
1256 
1257     [IP6AT ... IP6AT + 3] = &mac_writereg, [IP4AT ... IP4AT + 6] = &mac_writereg,
1258     [FFLT ... FFLT + 6]   = &mac_writereg,
1259     [RA ... RA + 31]      = &mac_writereg,
1260     [WUPM ... WUPM + 31]  = &mac_writereg,
1261     [MTA ... MTA + 127]   = &mac_writereg,
1262     [VFTA ... VFTA + 127] = &mac_writereg,
1263     [FFMT ... FFMT + 254] = &mac_writereg, [FFVT ... FFVT + 254] = &mac_writereg,
1264     [PBM ... PBM + 16383] = &mac_writereg,
1265 };
1266 
1267 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1268 
1269 enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1270 
1271 #define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1272 /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1273  * f - flag bits (up to 6 possible flags)
1274  * n - flag needed
1275  * p - partially implenented */
1276 static const uint8_t mac_reg_access[0x8000] = {
1277     [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1278     [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1279 
1280     [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1281     [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1282     [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1283     [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1284     [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1285     [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1286     [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1287     [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1288     [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1289     [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1290     [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1291     [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1292     [SEQEC]   = markflag(MAC),    [CEXTERR] = markflag(MAC),
1293     [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1294     [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1295     [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1296     [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1297     [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1298     [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1299     [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1300     [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1301     [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1302     [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1303     [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1304     [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1305     [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1306     [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1307     [BPTC]    = markflag(MAC),
1308 
1309     [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1310     [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1311     [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1312     [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1313     [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1314     [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1315     [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1316     [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1317     [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1318     [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1319     [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1320 };
1321 
1322 static void
1323 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1324                  unsigned size)
1325 {
1326     E1000State *s = opaque;
1327     unsigned int index = (addr & 0x1ffff) >> 2;
1328 
1329     if (index < NWRITEOPS && macreg_writeops[index]) {
1330         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1331             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1332             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1333                 DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1334                        "It is not fully implemented.\n", index<<2);
1335             }
1336             macreg_writeops[index](s, index, val);
1337         } else {    /* "flag needed" bit is set, but the flag is not active */
1338             DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1339                    index<<2);
1340         }
1341     } else if (index < NREADOPS && macreg_readops[index]) {
1342         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1343                index<<2, val);
1344     } else {
1345         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1346                index<<2, val);
1347     }
1348 }
1349 
1350 static uint64_t
1351 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1352 {
1353     E1000State *s = opaque;
1354     unsigned int index = (addr & 0x1ffff) >> 2;
1355 
1356     if (index < NREADOPS && macreg_readops[index]) {
1357         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1358             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1359             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1360                 DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1361                        "It is not fully implemented.\n", index<<2);
1362             }
1363             return macreg_readops[index](s, index);
1364         } else {    /* "flag needed" bit is set, but the flag is not active */
1365             DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1366                    index<<2);
1367         }
1368     } else {
1369         DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1370     }
1371     return 0;
1372 }
1373 
1374 static const MemoryRegionOps e1000_mmio_ops = {
1375     .read = e1000_mmio_read,
1376     .write = e1000_mmio_write,
1377     .endianness = DEVICE_LITTLE_ENDIAN,
1378     .impl = {
1379         .min_access_size = 4,
1380         .max_access_size = 4,
1381     },
1382 };
1383 
1384 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1385                               unsigned size)
1386 {
1387     E1000State *s = opaque;
1388 
1389     (void)s;
1390     return 0;
1391 }
1392 
1393 static void e1000_io_write(void *opaque, hwaddr addr,
1394                            uint64_t val, unsigned size)
1395 {
1396     E1000State *s = opaque;
1397 
1398     (void)s;
1399 }
1400 
1401 static const MemoryRegionOps e1000_io_ops = {
1402     .read = e1000_io_read,
1403     .write = e1000_io_write,
1404     .endianness = DEVICE_LITTLE_ENDIAN,
1405 };
1406 
1407 static bool is_version_1(void *opaque, int version_id)
1408 {
1409     return version_id == 1;
1410 }
1411 
1412 static int e1000_pre_save(void *opaque)
1413 {
1414     E1000State *s = opaque;
1415     NetClientState *nc = qemu_get_queue(s->nic);
1416 
1417     /*
1418      * If link is down and auto-negotiation is supported and ongoing,
1419      * complete auto-negotiation immediately. This allows us to look
1420      * at MII_BMSR_AN_COMP to infer link status on load.
1421      */
1422     if (nc->link_down && have_autoneg(s)) {
1423         s->phy_reg[MII_BMSR] |= MII_BMSR_AN_COMP;
1424     }
1425 
1426     /* Decide which set of props to migrate in the main structure */
1427     if (chkflag(TSO) || !s->use_tso_for_migration) {
1428         /* Either we're migrating with the extra subsection, in which
1429          * case the mig_props is always 'props' OR
1430          * we've not got the subsection, but 'props' was the last
1431          * updated.
1432          */
1433         s->mig_props = s->tx.props;
1434     } else {
1435         /* We're not using the subsection, and 'tso_props' was
1436          * the last updated.
1437          */
1438         s->mig_props = s->tx.tso_props;
1439     }
1440     return 0;
1441 }
1442 
1443 static int e1000_post_load(void *opaque, int version_id)
1444 {
1445     E1000State *s = opaque;
1446     NetClientState *nc = qemu_get_queue(s->nic);
1447 
1448     if (!chkflag(MIT)) {
1449         s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1450             s->mac_reg[TADV] = 0;
1451         s->mit_irq_level = false;
1452     }
1453     s->mit_ide = 0;
1454     s->mit_timer_on = true;
1455     timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1);
1456 
1457     /* nc.link_down can't be migrated, so infer link_down according
1458      * to link status bit in mac_reg[STATUS].
1459      * Alternatively, restart link negotiation if it was in progress. */
1460     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1461 
1462     if (have_autoneg(s) && !(s->phy_reg[MII_BMSR] & MII_BMSR_AN_COMP)) {
1463         nc->link_down = false;
1464         timer_mod(s->autoneg_timer,
1465                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1466     }
1467 
1468     s->tx.props = s->mig_props;
1469     if (!s->received_tx_tso) {
1470         /* We received only one set of offload data (tx.props)
1471          * and haven't got tx.tso_props.  The best we can do
1472          * is dupe the data.
1473          */
1474         s->tx.tso_props = s->mig_props;
1475     }
1476     return 0;
1477 }
1478 
1479 static int e1000_tx_tso_post_load(void *opaque, int version_id)
1480 {
1481     E1000State *s = opaque;
1482     s->received_tx_tso = true;
1483     return 0;
1484 }
1485 
1486 static bool e1000_mit_state_needed(void *opaque)
1487 {
1488     E1000State *s = opaque;
1489 
1490     return chkflag(MIT);
1491 }
1492 
1493 static bool e1000_full_mac_needed(void *opaque)
1494 {
1495     E1000State *s = opaque;
1496 
1497     return chkflag(MAC);
1498 }
1499 
1500 static bool e1000_tso_state_needed(void *opaque)
1501 {
1502     E1000State *s = opaque;
1503 
1504     return chkflag(TSO);
1505 }
1506 
1507 static const VMStateDescription vmstate_e1000_mit_state = {
1508     .name = "e1000/mit_state",
1509     .version_id = 1,
1510     .minimum_version_id = 1,
1511     .needed = e1000_mit_state_needed,
1512     .fields = (VMStateField[]) {
1513         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1514         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1515         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1516         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1517         VMSTATE_BOOL(mit_irq_level, E1000State),
1518         VMSTATE_END_OF_LIST()
1519     }
1520 };
1521 
1522 static const VMStateDescription vmstate_e1000_full_mac_state = {
1523     .name = "e1000/full_mac_state",
1524     .version_id = 1,
1525     .minimum_version_id = 1,
1526     .needed = e1000_full_mac_needed,
1527     .fields = (VMStateField[]) {
1528         VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1529         VMSTATE_END_OF_LIST()
1530     }
1531 };
1532 
1533 static const VMStateDescription vmstate_e1000_tx_tso_state = {
1534     .name = "e1000/tx_tso_state",
1535     .version_id = 1,
1536     .minimum_version_id = 1,
1537     .needed = e1000_tso_state_needed,
1538     .post_load = e1000_tx_tso_post_load,
1539     .fields = (VMStateField[]) {
1540         VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
1541         VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
1542         VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
1543         VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
1544         VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
1545         VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
1546         VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
1547         VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
1548         VMSTATE_UINT16(tx.tso_props.mss, E1000State),
1549         VMSTATE_INT8(tx.tso_props.ip, E1000State),
1550         VMSTATE_INT8(tx.tso_props.tcp, E1000State),
1551         VMSTATE_END_OF_LIST()
1552     }
1553 };
1554 
1555 static const VMStateDescription vmstate_e1000 = {
1556     .name = "e1000",
1557     .version_id = 2,
1558     .minimum_version_id = 1,
1559     .pre_save = e1000_pre_save,
1560     .post_load = e1000_post_load,
1561     .fields = (VMStateField[]) {
1562         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1563         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1564         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1565         VMSTATE_UINT32(rxbuf_size, E1000State),
1566         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1567         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1568         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1569         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1570         VMSTATE_UINT16(eecd_state.reading, E1000State),
1571         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1572         VMSTATE_UINT8(mig_props.ipcss, E1000State),
1573         VMSTATE_UINT8(mig_props.ipcso, E1000State),
1574         VMSTATE_UINT16(mig_props.ipcse, E1000State),
1575         VMSTATE_UINT8(mig_props.tucss, E1000State),
1576         VMSTATE_UINT8(mig_props.tucso, E1000State),
1577         VMSTATE_UINT16(mig_props.tucse, E1000State),
1578         VMSTATE_UINT32(mig_props.paylen, E1000State),
1579         VMSTATE_UINT8(mig_props.hdr_len, E1000State),
1580         VMSTATE_UINT16(mig_props.mss, E1000State),
1581         VMSTATE_UINT16(tx.size, E1000State),
1582         VMSTATE_UINT16(tx.tso_frames, E1000State),
1583         VMSTATE_UINT8(tx.sum_needed, E1000State),
1584         VMSTATE_INT8(mig_props.ip, E1000State),
1585         VMSTATE_INT8(mig_props.tcp, E1000State),
1586         VMSTATE_BUFFER(tx.header, E1000State),
1587         VMSTATE_BUFFER(tx.data, E1000State),
1588         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1589         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1590         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1591         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1592         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1593         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1594         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1595         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1596         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1597         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1598         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1599         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1600         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1601         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1602         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1603         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1604         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1605         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1606         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1607         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1608         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1609         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1610         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1611         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1612         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1613         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1614         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1615         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1616         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1617         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1618         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1619         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1620         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1621         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1622         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1623         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1624         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1625         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1626         VMSTATE_UINT32(mac_reg[VET], E1000State),
1627         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1628         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1629         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1630         VMSTATE_END_OF_LIST()
1631     },
1632     .subsections = (const VMStateDescription*[]) {
1633         &vmstate_e1000_mit_state,
1634         &vmstate_e1000_full_mac_state,
1635         &vmstate_e1000_tx_tso_state,
1636         NULL
1637     }
1638 };
1639 
1640 /*
1641  * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1642  * Note: A valid DevId will be inserted during pci_e1000_realize().
1643  */
1644 static const uint16_t e1000_eeprom_template[64] = {
1645     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1646     0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1647     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1648     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1649     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1650     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1651     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1652     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1653 };
1654 
1655 /* PCI interface */
1656 
1657 static void
1658 e1000_mmio_setup(E1000State *d)
1659 {
1660     int i;
1661     const uint32_t excluded_regs[] = {
1662         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1663         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1664     };
1665 
1666     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1667                           "e1000-mmio", PNPMMIO_SIZE);
1668     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1669     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1670         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1671                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1672     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1673 }
1674 
1675 static void
1676 pci_e1000_uninit(PCIDevice *dev)
1677 {
1678     E1000State *d = E1000(dev);
1679 
1680     timer_free(d->autoneg_timer);
1681     timer_free(d->mit_timer);
1682     timer_free(d->flush_queue_timer);
1683     qemu_del_nic(d->nic);
1684 }
1685 
1686 static NetClientInfo net_e1000_info = {
1687     .type = NET_CLIENT_DRIVER_NIC,
1688     .size = sizeof(NICState),
1689     .can_receive = e1000_can_receive,
1690     .receive = e1000_receive,
1691     .receive_iov = e1000_receive_iov,
1692     .link_status_changed = e1000_set_link_status,
1693 };
1694 
1695 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1696                                 uint32_t val, int len)
1697 {
1698     E1000State *s = E1000(pci_dev);
1699 
1700     pci_default_write_config(pci_dev, address, val, len);
1701 
1702     if (range_covers_byte(address, len, PCI_COMMAND) &&
1703         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1704         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1705     }
1706 }
1707 
1708 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1709 {
1710     DeviceState *dev = DEVICE(pci_dev);
1711     E1000State *d = E1000(pci_dev);
1712     uint8_t *pci_conf;
1713     uint8_t *macaddr;
1714 
1715     pci_dev->config_write = e1000_write_config;
1716 
1717     pci_conf = pci_dev->config;
1718 
1719     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1720     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1721 
1722     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1723 
1724     e1000_mmio_setup(d);
1725 
1726     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1727 
1728     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1729 
1730     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1731     macaddr = d->conf.macaddr.a;
1732 
1733     e1000x_core_prepare_eeprom(d->eeprom_data,
1734                                e1000_eeprom_template,
1735                                sizeof(e1000_eeprom_template),
1736                                PCI_DEVICE_GET_CLASS(pci_dev)->device_id,
1737                                macaddr);
1738 
1739     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1740                           object_get_typename(OBJECT(d)), dev->id, d);
1741 
1742     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1743 
1744     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1745     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1746     d->flush_queue_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
1747                                         e1000_flush_queue_timer, d);
1748 }
1749 
1750 static void qdev_e1000_reset(DeviceState *dev)
1751 {
1752     E1000State *d = E1000(dev);
1753     e1000_reset(d);
1754 }
1755 
1756 static Property e1000_properties[] = {
1757     DEFINE_NIC_PROPERTIES(E1000State, conf),
1758     DEFINE_PROP_BIT("autonegotiation", E1000State,
1759                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1760     DEFINE_PROP_BIT("mitigation", E1000State,
1761                     compat_flags, E1000_FLAG_MIT_BIT, true),
1762     DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1763                     compat_flags, E1000_FLAG_MAC_BIT, true),
1764     DEFINE_PROP_BIT("migrate_tso_props", E1000State,
1765                     compat_flags, E1000_FLAG_TSO_BIT, true),
1766     DEFINE_PROP_BIT("init-vet", E1000State,
1767                     compat_flags, E1000_FLAG_VET_BIT, true),
1768     DEFINE_PROP_END_OF_LIST(),
1769 };
1770 
1771 typedef struct E1000Info {
1772     const char *name;
1773     uint16_t   device_id;
1774     uint8_t    revision;
1775     uint16_t   phy_id2;
1776 } E1000Info;
1777 
1778 static void e1000_class_init(ObjectClass *klass, void *data)
1779 {
1780     DeviceClass *dc = DEVICE_CLASS(klass);
1781     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1782     E1000BaseClass *e = E1000_CLASS(klass);
1783     const E1000Info *info = data;
1784 
1785     k->realize = pci_e1000_realize;
1786     k->exit = pci_e1000_uninit;
1787     k->romfile = "efi-e1000.rom";
1788     k->vendor_id = PCI_VENDOR_ID_INTEL;
1789     k->device_id = info->device_id;
1790     k->revision = info->revision;
1791     e->phy_id2 = info->phy_id2;
1792     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1793     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1794     dc->desc = "Intel Gigabit Ethernet";
1795     dc->reset = qdev_e1000_reset;
1796     dc->vmsd = &vmstate_e1000;
1797     device_class_set_props(dc, e1000_properties);
1798 }
1799 
1800 static void e1000_instance_init(Object *obj)
1801 {
1802     E1000State *n = E1000(obj);
1803     device_add_bootindex_property(obj, &n->conf.bootindex,
1804                                   "bootindex", "/ethernet-phy@0",
1805                                   DEVICE(n));
1806 }
1807 
1808 static const TypeInfo e1000_base_info = {
1809     .name          = TYPE_E1000_BASE,
1810     .parent        = TYPE_PCI_DEVICE,
1811     .instance_size = sizeof(E1000State),
1812     .instance_init = e1000_instance_init,
1813     .class_size    = sizeof(E1000BaseClass),
1814     .abstract      = true,
1815     .interfaces = (InterfaceInfo[]) {
1816         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1817         { },
1818     },
1819 };
1820 
1821 static const E1000Info e1000_devices[] = {
1822     {
1823         .name      = "e1000",
1824         .device_id = E1000_DEV_ID_82540EM,
1825         .revision  = 0x03,
1826         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1827     },
1828     {
1829         .name      = "e1000-82544gc",
1830         .device_id = E1000_DEV_ID_82544GC_COPPER,
1831         .revision  = 0x03,
1832         .phy_id2   = E1000_PHY_ID2_82544x,
1833     },
1834     {
1835         .name      = "e1000-82545em",
1836         .device_id = E1000_DEV_ID_82545EM_COPPER,
1837         .revision  = 0x03,
1838         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1839     },
1840 };
1841 
1842 static void e1000_register_types(void)
1843 {
1844     int i;
1845 
1846     type_register_static(&e1000_base_info);
1847     for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1848         const E1000Info *info = &e1000_devices[i];
1849         TypeInfo type_info = {};
1850 
1851         type_info.name = info->name;
1852         type_info.parent = TYPE_E1000_BASE;
1853         type_info.class_data = (void *)info;
1854         type_info.class_init = e1000_class_init;
1855 
1856         type_register(&type_info);
1857     }
1858 }
1859 
1860 type_init(e1000_register_types)
1861