xref: /openbmc/qemu/hw/net/e1000.c (revision 89854803)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "qemu/osdep.h"
29 #include "hw/hw.h"
30 #include "hw/pci/pci.h"
31 #include "net/net.h"
32 #include "net/checksum.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/dma.h"
35 #include "qemu/iov.h"
36 #include "qemu/range.h"
37 
38 #include "e1000x_common.h"
39 
40 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
41 
42 /* #define E1000_DEBUG */
43 
44 #ifdef E1000_DEBUG
45 enum {
46     DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
47     DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
48     DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
49     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
50 };
51 #define DBGBIT(x)    (1<<DEBUG_##x)
52 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
53 
54 #define DBGOUT(what, fmt, ...) do { \
55     if (debugflags & DBGBIT(what)) \
56         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
57     } while (0)
58 #else
59 #define DBGOUT(what, fmt, ...) do {} while (0)
60 #endif
61 
62 #define IOPORT_SIZE       0x40
63 #define PNPMMIO_SIZE      0x20000
64 #define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
65 
66 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
67 
68 /*
69  * HW models:
70  *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
71  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
72  *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
73  *  Others never tested
74  */
75 
76 typedef struct E1000State_st {
77     /*< private >*/
78     PCIDevice parent_obj;
79     /*< public >*/
80 
81     NICState *nic;
82     NICConf conf;
83     MemoryRegion mmio;
84     MemoryRegion io;
85 
86     uint32_t mac_reg[0x8000];
87     uint16_t phy_reg[0x20];
88     uint16_t eeprom_data[64];
89 
90     uint32_t rxbuf_size;
91     uint32_t rxbuf_min_shift;
92     struct e1000_tx {
93         unsigned char header[256];
94         unsigned char vlan_header[4];
95         /* Fields vlan and data must not be reordered or separated. */
96         unsigned char vlan[4];
97         unsigned char data[0x10000];
98         uint16_t size;
99         unsigned char vlan_needed;
100         unsigned char sum_needed;
101         bool cptse;
102         e1000x_txd_props props;
103         e1000x_txd_props tso_props;
104         uint16_t tso_frames;
105     } tx;
106 
107     struct {
108         uint32_t val_in;    /* shifted in from guest driver */
109         uint16_t bitnum_in;
110         uint16_t bitnum_out;
111         uint16_t reading;
112         uint32_t old_eecd;
113     } eecd_state;
114 
115     QEMUTimer *autoneg_timer;
116 
117     QEMUTimer *mit_timer;      /* Mitigation timer. */
118     bool mit_timer_on;         /* Mitigation timer is running. */
119     bool mit_irq_level;        /* Tracks interrupt pin level. */
120     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
121 
122 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
123 #define E1000_FLAG_AUTONEG_BIT 0
124 #define E1000_FLAG_MIT_BIT 1
125 #define E1000_FLAG_MAC_BIT 2
126 #define E1000_FLAG_TSO_BIT 3
127 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
128 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
129 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
130 #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
131     uint32_t compat_flags;
132     bool received_tx_tso;
133     bool use_tso_for_migration;
134     e1000x_txd_props mig_props;
135 } E1000State;
136 
137 #define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
138 
139 typedef struct E1000BaseClass {
140     PCIDeviceClass parent_class;
141     uint16_t phy_id2;
142 } E1000BaseClass;
143 
144 #define TYPE_E1000_BASE "e1000-base"
145 
146 #define E1000(obj) \
147     OBJECT_CHECK(E1000State, (obj), TYPE_E1000_BASE)
148 
149 #define E1000_DEVICE_CLASS(klass) \
150      OBJECT_CLASS_CHECK(E1000BaseClass, (klass), TYPE_E1000_BASE)
151 #define E1000_DEVICE_GET_CLASS(obj) \
152     OBJECT_GET_CLASS(E1000BaseClass, (obj), TYPE_E1000_BASE)
153 
154 static void
155 e1000_link_up(E1000State *s)
156 {
157     e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg);
158 
159     /* E1000_STATUS_LU is tested by e1000_can_receive() */
160     qemu_flush_queued_packets(qemu_get_queue(s->nic));
161 }
162 
163 static void
164 e1000_autoneg_done(E1000State *s)
165 {
166     e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg);
167 
168     /* E1000_STATUS_LU is tested by e1000_can_receive() */
169     qemu_flush_queued_packets(qemu_get_queue(s->nic));
170 }
171 
172 static bool
173 have_autoneg(E1000State *s)
174 {
175     return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
176 }
177 
178 static void
179 set_phy_ctrl(E1000State *s, int index, uint16_t val)
180 {
181     /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
182     s->phy_reg[PHY_CTRL] = val & ~(0x3f |
183                                    MII_CR_RESET |
184                                    MII_CR_RESTART_AUTO_NEG);
185 
186     /*
187      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
188      * migrate during auto negotiation, after migration the link will be
189      * down.
190      */
191     if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
192         e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
193     }
194 }
195 
196 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
197     [PHY_CTRL] = set_phy_ctrl,
198 };
199 
200 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
201 
202 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
203 static const char phy_regcap[0x20] = {
204     [PHY_STATUS]      = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
205     [PHY_ID1]         = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
206     [PHY_CTRL]        = PHY_RW,    [PHY_1000T_CTRL]             = PHY_RW,
207     [PHY_LP_ABILITY]  = PHY_R,     [PHY_1000T_STATUS]           = PHY_R,
208     [PHY_AUTONEG_ADV] = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
209     [PHY_ID2]         = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
210     [PHY_AUTONEG_EXP] = PHY_R,
211 };
212 
213 /* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
214 static const uint16_t phy_reg_init[] = {
215     [PHY_CTRL]   = MII_CR_SPEED_SELECT_MSB |
216                    MII_CR_FULL_DUPLEX |
217                    MII_CR_AUTO_NEG_EN,
218 
219     [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
220                    MII_SR_LINK_STATUS |   /* link initially up */
221                    MII_SR_AUTONEG_CAPS |
222                    /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
223                    MII_SR_PREAMBLE_SUPPRESS |
224                    MII_SR_EXTENDED_STATUS |
225                    MII_SR_10T_HD_CAPS |
226                    MII_SR_10T_FD_CAPS |
227                    MII_SR_100X_HD_CAPS |
228                    MII_SR_100X_FD_CAPS,
229 
230     [PHY_ID1] = 0x141,
231     /* [PHY_ID2] configured per DevId, from e1000_reset() */
232     [PHY_AUTONEG_ADV] = 0xde1,
233     [PHY_LP_ABILITY] = 0x1e0,
234     [PHY_1000T_CTRL] = 0x0e00,
235     [PHY_1000T_STATUS] = 0x3c00,
236     [M88E1000_PHY_SPEC_CTRL] = 0x360,
237     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
238     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
239 };
240 
241 static const uint32_t mac_reg_init[] = {
242     [PBA]     = 0x00100030,
243     [LEDCTL]  = 0x602,
244     [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
245                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
246     [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
247                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
248                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
249                 E1000_STATUS_LU,
250     [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
251                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
252                 E1000_MANC_RMCP_EN,
253 };
254 
255 /* Helper function, *curr == 0 means the value is not set */
256 static inline void
257 mit_update_delay(uint32_t *curr, uint32_t value)
258 {
259     if (value && (*curr == 0 || value < *curr)) {
260         *curr = value;
261     }
262 }
263 
264 static void
265 set_interrupt_cause(E1000State *s, int index, uint32_t val)
266 {
267     PCIDevice *d = PCI_DEVICE(s);
268     uint32_t pending_ints;
269     uint32_t mit_delay;
270 
271     s->mac_reg[ICR] = val;
272 
273     /*
274      * Make sure ICR and ICS registers have the same value.
275      * The spec says that the ICS register is write-only.  However in practice,
276      * on real hardware ICS is readable, and for reads it has the same value as
277      * ICR (except that ICS does not have the clear on read behaviour of ICR).
278      *
279      * The VxWorks PRO/1000 driver uses this behaviour.
280      */
281     s->mac_reg[ICS] = val;
282 
283     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
284     if (!s->mit_irq_level && pending_ints) {
285         /*
286          * Here we detect a potential raising edge. We postpone raising the
287          * interrupt line if we are inside the mitigation delay window
288          * (s->mit_timer_on == 1).
289          * We provide a partial implementation of interrupt mitigation,
290          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
291          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
292          * RADV; relative timers based on TIDV and RDTR are not implemented.
293          */
294         if (s->mit_timer_on) {
295             return;
296         }
297         if (chkflag(MIT)) {
298             /* Compute the next mitigation delay according to pending
299              * interrupts and the current values of RADV (provided
300              * RDTR!=0), TADV and ITR.
301              * Then rearm the timer.
302              */
303             mit_delay = 0;
304             if (s->mit_ide &&
305                     (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
306                 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
307             }
308             if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
309                 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
310             }
311             mit_update_delay(&mit_delay, s->mac_reg[ITR]);
312 
313             /*
314              * According to e1000 SPEC, the Ethernet controller guarantees
315              * a maximum observable interrupt rate of 7813 interrupts/sec.
316              * Thus if mit_delay < 500 then the delay should be set to the
317              * minimum delay possible which is 500.
318              */
319             mit_delay = (mit_delay < 500) ? 500 : mit_delay;
320 
321             s->mit_timer_on = 1;
322             timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
323                       mit_delay * 256);
324             s->mit_ide = 0;
325         }
326     }
327 
328     s->mit_irq_level = (pending_ints != 0);
329     pci_set_irq(d, s->mit_irq_level);
330 }
331 
332 static void
333 e1000_mit_timer(void *opaque)
334 {
335     E1000State *s = opaque;
336 
337     s->mit_timer_on = 0;
338     /* Call set_interrupt_cause to update the irq level (if necessary). */
339     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
340 }
341 
342 static void
343 set_ics(E1000State *s, int index, uint32_t val)
344 {
345     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
346         s->mac_reg[IMS]);
347     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
348 }
349 
350 static void
351 e1000_autoneg_timer(void *opaque)
352 {
353     E1000State *s = opaque;
354     if (!qemu_get_queue(s->nic)->link_down) {
355         e1000_autoneg_done(s);
356         set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
357     }
358 }
359 
360 static void e1000_reset(void *opaque)
361 {
362     E1000State *d = opaque;
363     E1000BaseClass *edc = E1000_DEVICE_GET_CLASS(d);
364     uint8_t *macaddr = d->conf.macaddr.a;
365 
366     timer_del(d->autoneg_timer);
367     timer_del(d->mit_timer);
368     d->mit_timer_on = 0;
369     d->mit_irq_level = 0;
370     d->mit_ide = 0;
371     memset(d->phy_reg, 0, sizeof d->phy_reg);
372     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
373     d->phy_reg[PHY_ID2] = edc->phy_id2;
374     memset(d->mac_reg, 0, sizeof d->mac_reg);
375     memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
376     d->rxbuf_min_shift = 1;
377     memset(&d->tx, 0, sizeof d->tx);
378 
379     if (qemu_get_queue(d->nic)->link_down) {
380         e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg);
381     }
382 
383     e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
384 }
385 
386 static void
387 set_ctrl(E1000State *s, int index, uint32_t val)
388 {
389     /* RST is self clearing */
390     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
391 }
392 
393 static void
394 set_rx_control(E1000State *s, int index, uint32_t val)
395 {
396     s->mac_reg[RCTL] = val;
397     s->rxbuf_size = e1000x_rxbufsize(val);
398     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
399     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
400            s->mac_reg[RCTL]);
401     qemu_flush_queued_packets(qemu_get_queue(s->nic));
402 }
403 
404 static void
405 set_mdic(E1000State *s, int index, uint32_t val)
406 {
407     uint32_t data = val & E1000_MDIC_DATA_MASK;
408     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
409 
410     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
411         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
412     else if (val & E1000_MDIC_OP_READ) {
413         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
414         if (!(phy_regcap[addr] & PHY_R)) {
415             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
416             val |= E1000_MDIC_ERROR;
417         } else
418             val = (val ^ data) | s->phy_reg[addr];
419     } else if (val & E1000_MDIC_OP_WRITE) {
420         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
421         if (!(phy_regcap[addr] & PHY_W)) {
422             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
423             val |= E1000_MDIC_ERROR;
424         } else {
425             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
426                 phyreg_writeops[addr](s, index, data);
427             } else {
428                 s->phy_reg[addr] = data;
429             }
430         }
431     }
432     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
433 
434     if (val & E1000_MDIC_INT_EN) {
435         set_ics(s, 0, E1000_ICR_MDAC);
436     }
437 }
438 
439 static uint32_t
440 get_eecd(E1000State *s, int index)
441 {
442     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
443 
444     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
445            s->eecd_state.bitnum_out, s->eecd_state.reading);
446     if (!s->eecd_state.reading ||
447         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
448           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
449         ret |= E1000_EECD_DO;
450     return ret;
451 }
452 
453 static void
454 set_eecd(E1000State *s, int index, uint32_t val)
455 {
456     uint32_t oldval = s->eecd_state.old_eecd;
457 
458     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
459             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
460     if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
461         return;
462     }
463     if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
464         s->eecd_state.val_in = 0;
465         s->eecd_state.bitnum_in = 0;
466         s->eecd_state.bitnum_out = 0;
467         s->eecd_state.reading = 0;
468     }
469     if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
470         return;
471     }
472     if (!(E1000_EECD_SK & val)) {               /* falling edge */
473         s->eecd_state.bitnum_out++;
474         return;
475     }
476     s->eecd_state.val_in <<= 1;
477     if (val & E1000_EECD_DI)
478         s->eecd_state.val_in |= 1;
479     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
480         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
481         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
482             EEPROM_READ_OPCODE_MICROWIRE);
483     }
484     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
485            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
486            s->eecd_state.reading);
487 }
488 
489 static uint32_t
490 flash_eerd_read(E1000State *s, int x)
491 {
492     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
493 
494     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
495         return (s->mac_reg[EERD]);
496 
497     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
498         return (E1000_EEPROM_RW_REG_DONE | r);
499 
500     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
501            E1000_EEPROM_RW_REG_DONE | r);
502 }
503 
504 static void
505 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
506 {
507     uint32_t sum;
508 
509     if (cse && cse < n)
510         n = cse + 1;
511     if (sloc < n-1) {
512         sum = net_checksum_add(n-css, data+css);
513         stw_be_p(data + sloc, net_checksum_finish_nozero(sum));
514     }
515 }
516 
517 static inline void
518 inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
519 {
520     if (!memcmp(arr, bcast, sizeof bcast)) {
521         e1000x_inc_reg_if_not_full(s->mac_reg, BPTC);
522     } else if (arr[0] & 1) {
523         e1000x_inc_reg_if_not_full(s->mac_reg, MPTC);
524     }
525 }
526 
527 static void
528 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
529 {
530     static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
531                                     PTC1023, PTC1522 };
532 
533     NetClientState *nc = qemu_get_queue(s->nic);
534     if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
535         nc->info->receive(nc, buf, size);
536     } else {
537         qemu_send_packet(nc, buf, size);
538     }
539     inc_tx_bcast_or_mcast_count(s, buf);
540     e1000x_increase_size_stats(s->mac_reg, PTCregs, size);
541 }
542 
543 static void
544 xmit_seg(E1000State *s)
545 {
546     uint16_t len;
547     unsigned int frames = s->tx.tso_frames, css, sofar;
548     struct e1000_tx *tp = &s->tx;
549     struct e1000x_txd_props *props = tp->cptse ? &tp->tso_props : &tp->props;
550 
551     if (tp->cptse) {
552         css = props->ipcss;
553         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
554                frames, tp->size, css);
555         if (props->ip) {    /* IPv4 */
556             stw_be_p(tp->data+css+2, tp->size - css);
557             stw_be_p(tp->data+css+4,
558                      lduw_be_p(tp->data + css + 4) + frames);
559         } else {         /* IPv6 */
560             stw_be_p(tp->data+css+4, tp->size - css);
561         }
562         css = props->tucss;
563         len = tp->size - css;
564         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", props->tcp, css, len);
565         if (props->tcp) {
566             sofar = frames * props->mss;
567             stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
568             if (props->paylen - sofar > props->mss) {
569                 tp->data[css + 13] &= ~9;    /* PSH, FIN */
570             } else if (frames) {
571                 e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
572             }
573         } else {    /* UDP */
574             stw_be_p(tp->data+css+4, len);
575         }
576         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
577             unsigned int phsum;
578             // add pseudo-header length before checksum calculation
579             void *sp = tp->data + props->tucso;
580 
581             phsum = lduw_be_p(sp) + len;
582             phsum = (phsum >> 16) + (phsum & 0xffff);
583             stw_be_p(sp, phsum);
584         }
585         tp->tso_frames++;
586     }
587 
588     if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
589         putsum(tp->data, tp->size, props->tucso, props->tucss, props->tucse);
590     }
591     if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
592         putsum(tp->data, tp->size, props->ipcso, props->ipcss, props->ipcse);
593     }
594     if (tp->vlan_needed) {
595         memmove(tp->vlan, tp->data, 4);
596         memmove(tp->data, tp->data + 4, 8);
597         memcpy(tp->data + 8, tp->vlan_header, 4);
598         e1000_send_packet(s, tp->vlan, tp->size + 4);
599     } else {
600         e1000_send_packet(s, tp->data, tp->size);
601     }
602 
603     e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
604     e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size);
605     s->mac_reg[GPTC] = s->mac_reg[TPT];
606     s->mac_reg[GOTCL] = s->mac_reg[TOTL];
607     s->mac_reg[GOTCH] = s->mac_reg[TOTH];
608 }
609 
610 static void
611 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
612 {
613     PCIDevice *d = PCI_DEVICE(s);
614     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
615     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
616     unsigned int split_size = txd_lower & 0xffff, bytes, sz;
617     unsigned int msh = 0xfffff;
618     uint64_t addr;
619     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
620     struct e1000_tx *tp = &s->tx;
621 
622     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
623     if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
624         if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
625             e1000x_read_tx_ctx_descr(xp, &tp->tso_props);
626             s->use_tso_for_migration = 1;
627             tp->tso_frames = 0;
628         } else {
629             e1000x_read_tx_ctx_descr(xp, &tp->props);
630             s->use_tso_for_migration = 0;
631         }
632         return;
633     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
634         // data descriptor
635         if (tp->size == 0) {
636             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
637         }
638         tp->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
639     } else {
640         // legacy descriptor
641         tp->cptse = 0;
642     }
643 
644     if (e1000x_vlan_enabled(s->mac_reg) &&
645         e1000x_is_vlan_txd(txd_lower) &&
646         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
647         tp->vlan_needed = 1;
648         stw_be_p(tp->vlan_header,
649                       le16_to_cpu(s->mac_reg[VET]));
650         stw_be_p(tp->vlan_header + 2,
651                       le16_to_cpu(dp->upper.fields.special));
652     }
653 
654     addr = le64_to_cpu(dp->buffer_addr);
655     if (tp->cptse) {
656         msh = tp->tso_props.hdr_len + tp->tso_props.mss;
657         do {
658             bytes = split_size;
659             if (tp->size + bytes > msh)
660                 bytes = msh - tp->size;
661 
662             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
663             pci_dma_read(d, addr, tp->data + tp->size, bytes);
664             sz = tp->size + bytes;
665             if (sz >= tp->tso_props.hdr_len
666                 && tp->size < tp->tso_props.hdr_len) {
667                 memmove(tp->header, tp->data, tp->tso_props.hdr_len);
668             }
669             tp->size = sz;
670             addr += bytes;
671             if (sz == msh) {
672                 xmit_seg(s);
673                 memmove(tp->data, tp->header, tp->tso_props.hdr_len);
674                 tp->size = tp->tso_props.hdr_len;
675             }
676             split_size -= bytes;
677         } while (bytes && split_size);
678     } else {
679         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
680         pci_dma_read(d, addr, tp->data + tp->size, split_size);
681         tp->size += split_size;
682     }
683 
684     if (!(txd_lower & E1000_TXD_CMD_EOP))
685         return;
686     if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) {
687         xmit_seg(s);
688     }
689     tp->tso_frames = 0;
690     tp->sum_needed = 0;
691     tp->vlan_needed = 0;
692     tp->size = 0;
693     tp->cptse = 0;
694 }
695 
696 static uint32_t
697 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
698 {
699     PCIDevice *d = PCI_DEVICE(s);
700     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
701 
702     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
703         return 0;
704     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
705                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
706     dp->upper.data = cpu_to_le32(txd_upper);
707     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
708                   &dp->upper, sizeof(dp->upper));
709     return E1000_ICR_TXDW;
710 }
711 
712 static uint64_t tx_desc_base(E1000State *s)
713 {
714     uint64_t bah = s->mac_reg[TDBAH];
715     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
716 
717     return (bah << 32) + bal;
718 }
719 
720 static void
721 start_xmit(E1000State *s)
722 {
723     PCIDevice *d = PCI_DEVICE(s);
724     dma_addr_t base;
725     struct e1000_tx_desc desc;
726     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
727 
728     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
729         DBGOUT(TX, "tx disabled\n");
730         return;
731     }
732 
733     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
734         base = tx_desc_base(s) +
735                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
736         pci_dma_read(d, base, &desc, sizeof(desc));
737 
738         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
739                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
740                desc.upper.data);
741 
742         process_tx_desc(s, &desc);
743         cause |= txdesc_writeback(s, base, &desc);
744 
745         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
746             s->mac_reg[TDH] = 0;
747         /*
748          * the following could happen only if guest sw assigns
749          * bogus values to TDT/TDLEN.
750          * there's nothing too intelligent we could do about this.
751          */
752         if (s->mac_reg[TDH] == tdh_start ||
753             tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
754             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
755                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
756             break;
757         }
758     }
759     set_ics(s, 0, cause);
760 }
761 
762 static int
763 receive_filter(E1000State *s, const uint8_t *buf, int size)
764 {
765     uint32_t rctl = s->mac_reg[RCTL];
766     int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1);
767 
768     if (e1000x_is_vlan_packet(buf, le16_to_cpu(s->mac_reg[VET])) &&
769         e1000x_vlan_rx_filter_enabled(s->mac_reg)) {
770         uint16_t vid = lduw_be_p(buf + 14);
771         uint32_t vfta = ldl_le_p((uint32_t*)(s->mac_reg + VFTA) +
772                                  ((vid >> 5) & 0x7f));
773         if ((vfta & (1 << (vid & 0x1f))) == 0)
774             return 0;
775     }
776 
777     if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
778         return 1;
779     }
780 
781     if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
782         e1000x_inc_reg_if_not_full(s->mac_reg, MPRC);
783         return 1;
784     }
785 
786     if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
787         e1000x_inc_reg_if_not_full(s->mac_reg, BPRC);
788         return 1;
789     }
790 
791     return e1000x_rx_group_filter(s->mac_reg, buf);
792 }
793 
794 static void
795 e1000_set_link_status(NetClientState *nc)
796 {
797     E1000State *s = qemu_get_nic_opaque(nc);
798     uint32_t old_status = s->mac_reg[STATUS];
799 
800     if (nc->link_down) {
801         e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg);
802     } else {
803         if (have_autoneg(s) &&
804             !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
805             e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
806         } else {
807             e1000_link_up(s);
808         }
809     }
810 
811     if (s->mac_reg[STATUS] != old_status)
812         set_ics(s, 0, E1000_ICR_LSC);
813 }
814 
815 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
816 {
817     int bufs;
818     /* Fast-path short packets */
819     if (total_size <= s->rxbuf_size) {
820         return s->mac_reg[RDH] != s->mac_reg[RDT];
821     }
822     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
823         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
824     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
825         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
826             s->mac_reg[RDT] - s->mac_reg[RDH];
827     } else {
828         return false;
829     }
830     return total_size <= bufs * s->rxbuf_size;
831 }
832 
833 static int
834 e1000_can_receive(NetClientState *nc)
835 {
836     E1000State *s = qemu_get_nic_opaque(nc);
837 
838     return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
839         e1000_has_rxbufs(s, 1);
840 }
841 
842 static uint64_t rx_desc_base(E1000State *s)
843 {
844     uint64_t bah = s->mac_reg[RDBAH];
845     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
846 
847     return (bah << 32) + bal;
848 }
849 
850 static ssize_t
851 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
852 {
853     E1000State *s = qemu_get_nic_opaque(nc);
854     PCIDevice *d = PCI_DEVICE(s);
855     struct e1000_rx_desc desc;
856     dma_addr_t base;
857     unsigned int n, rdt;
858     uint32_t rdh_start;
859     uint16_t vlan_special = 0;
860     uint8_t vlan_status = 0;
861     uint8_t min_buf[MIN_BUF_SIZE];
862     struct iovec min_iov;
863     uint8_t *filter_buf = iov->iov_base;
864     size_t size = iov_size(iov, iovcnt);
865     size_t iov_ofs = 0;
866     size_t desc_offset;
867     size_t desc_size;
868     size_t total_size;
869 
870     if (!e1000x_hw_rx_enabled(s->mac_reg)) {
871         return -1;
872     }
873 
874     /* Pad to minimum Ethernet frame length */
875     if (size < sizeof(min_buf)) {
876         iov_to_buf(iov, iovcnt, 0, min_buf, size);
877         memset(&min_buf[size], 0, sizeof(min_buf) - size);
878         e1000x_inc_reg_if_not_full(s->mac_reg, RUC);
879         min_iov.iov_base = filter_buf = min_buf;
880         min_iov.iov_len = size = sizeof(min_buf);
881         iovcnt = 1;
882         iov = &min_iov;
883     } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
884         /* This is very unlikely, but may happen. */
885         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
886         filter_buf = min_buf;
887     }
888 
889     /* Discard oversized packets if !LPE and !SBP. */
890     if (e1000x_is_oversized(s->mac_reg, size)) {
891         return size;
892     }
893 
894     if (!receive_filter(s, filter_buf, size)) {
895         return size;
896     }
897 
898     if (e1000x_vlan_enabled(s->mac_reg) &&
899         e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) {
900         vlan_special = cpu_to_le16(lduw_be_p(filter_buf + 14));
901         iov_ofs = 4;
902         if (filter_buf == iov->iov_base) {
903             memmove(filter_buf + 4, filter_buf, 12);
904         } else {
905             iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
906             while (iov->iov_len <= iov_ofs) {
907                 iov_ofs -= iov->iov_len;
908                 iov++;
909             }
910         }
911         vlan_status = E1000_RXD_STAT_VP;
912         size -= 4;
913     }
914 
915     rdh_start = s->mac_reg[RDH];
916     desc_offset = 0;
917     total_size = size + e1000x_fcs_len(s->mac_reg);
918     if (!e1000_has_rxbufs(s, total_size)) {
919             set_ics(s, 0, E1000_ICS_RXO);
920             return -1;
921     }
922     do {
923         desc_size = total_size - desc_offset;
924         if (desc_size > s->rxbuf_size) {
925             desc_size = s->rxbuf_size;
926         }
927         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
928         pci_dma_read(d, base, &desc, sizeof(desc));
929         desc.special = vlan_special;
930         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
931         if (desc.buffer_addr) {
932             if (desc_offset < size) {
933                 size_t iov_copy;
934                 hwaddr ba = le64_to_cpu(desc.buffer_addr);
935                 size_t copy_size = size - desc_offset;
936                 if (copy_size > s->rxbuf_size) {
937                     copy_size = s->rxbuf_size;
938                 }
939                 do {
940                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
941                     pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
942                     copy_size -= iov_copy;
943                     ba += iov_copy;
944                     iov_ofs += iov_copy;
945                     if (iov_ofs == iov->iov_len) {
946                         iov++;
947                         iov_ofs = 0;
948                     }
949                 } while (copy_size);
950             }
951             desc_offset += desc_size;
952             desc.length = cpu_to_le16(desc_size);
953             if (desc_offset >= total_size) {
954                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
955             } else {
956                 /* Guest zeroing out status is not a hardware requirement.
957                    Clear EOP in case guest didn't do it. */
958                 desc.status &= ~E1000_RXD_STAT_EOP;
959             }
960         } else { // as per intel docs; skip descriptors with null buf addr
961             DBGOUT(RX, "Null RX descriptor!!\n");
962         }
963         pci_dma_write(d, base, &desc, sizeof(desc));
964 
965         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
966             s->mac_reg[RDH] = 0;
967         /* see comment in start_xmit; same here */
968         if (s->mac_reg[RDH] == rdh_start ||
969             rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
970             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
971                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
972             set_ics(s, 0, E1000_ICS_RXO);
973             return -1;
974         }
975     } while (desc_offset < total_size);
976 
977     e1000x_update_rx_total_stats(s->mac_reg, size, total_size);
978 
979     n = E1000_ICS_RXT0;
980     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
981         rdt += s->mac_reg[RDLEN] / sizeof(desc);
982     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
983         s->rxbuf_min_shift)
984         n |= E1000_ICS_RXDMT0;
985 
986     set_ics(s, 0, n);
987 
988     return size;
989 }
990 
991 static ssize_t
992 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
993 {
994     const struct iovec iov = {
995         .iov_base = (uint8_t *)buf,
996         .iov_len = size
997     };
998 
999     return e1000_receive_iov(nc, &iov, 1);
1000 }
1001 
1002 static uint32_t
1003 mac_readreg(E1000State *s, int index)
1004 {
1005     return s->mac_reg[index];
1006 }
1007 
1008 static uint32_t
1009 mac_low4_read(E1000State *s, int index)
1010 {
1011     return s->mac_reg[index] & 0xf;
1012 }
1013 
1014 static uint32_t
1015 mac_low11_read(E1000State *s, int index)
1016 {
1017     return s->mac_reg[index] & 0x7ff;
1018 }
1019 
1020 static uint32_t
1021 mac_low13_read(E1000State *s, int index)
1022 {
1023     return s->mac_reg[index] & 0x1fff;
1024 }
1025 
1026 static uint32_t
1027 mac_low16_read(E1000State *s, int index)
1028 {
1029     return s->mac_reg[index] & 0xffff;
1030 }
1031 
1032 static uint32_t
1033 mac_icr_read(E1000State *s, int index)
1034 {
1035     uint32_t ret = s->mac_reg[ICR];
1036 
1037     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1038     set_interrupt_cause(s, 0, 0);
1039     return ret;
1040 }
1041 
1042 static uint32_t
1043 mac_read_clr4(E1000State *s, int index)
1044 {
1045     uint32_t ret = s->mac_reg[index];
1046 
1047     s->mac_reg[index] = 0;
1048     return ret;
1049 }
1050 
1051 static uint32_t
1052 mac_read_clr8(E1000State *s, int index)
1053 {
1054     uint32_t ret = s->mac_reg[index];
1055 
1056     s->mac_reg[index] = 0;
1057     s->mac_reg[index-1] = 0;
1058     return ret;
1059 }
1060 
1061 static void
1062 mac_writereg(E1000State *s, int index, uint32_t val)
1063 {
1064     uint32_t macaddr[2];
1065 
1066     s->mac_reg[index] = val;
1067 
1068     if (index == RA + 1) {
1069         macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1070         macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1071         qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1072     }
1073 }
1074 
1075 static void
1076 set_rdt(E1000State *s, int index, uint32_t val)
1077 {
1078     s->mac_reg[index] = val & 0xffff;
1079     if (e1000_has_rxbufs(s, 1)) {
1080         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1081     }
1082 }
1083 
1084 static void
1085 set_16bit(E1000State *s, int index, uint32_t val)
1086 {
1087     s->mac_reg[index] = val & 0xffff;
1088 }
1089 
1090 static void
1091 set_dlen(E1000State *s, int index, uint32_t val)
1092 {
1093     s->mac_reg[index] = val & 0xfff80;
1094 }
1095 
1096 static void
1097 set_tctl(E1000State *s, int index, uint32_t val)
1098 {
1099     s->mac_reg[index] = val;
1100     s->mac_reg[TDT] &= 0xffff;
1101     start_xmit(s);
1102 }
1103 
1104 static void
1105 set_icr(E1000State *s, int index, uint32_t val)
1106 {
1107     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1108     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1109 }
1110 
1111 static void
1112 set_imc(E1000State *s, int index, uint32_t val)
1113 {
1114     s->mac_reg[IMS] &= ~val;
1115     set_ics(s, 0, 0);
1116 }
1117 
1118 static void
1119 set_ims(E1000State *s, int index, uint32_t val)
1120 {
1121     s->mac_reg[IMS] |= val;
1122     set_ics(s, 0, 0);
1123 }
1124 
1125 #define getreg(x)    [x] = mac_readreg
1126 static uint32_t (*macreg_readops[])(E1000State *, int) = {
1127     getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1128     getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1129     getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1130     getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1131     getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1132     getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1133     getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1134     getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1135     getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1136     getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1137     getreg(TNCRS),    getreg(SEQEC),    getreg(CEXTERR),  getreg(RLEC),
1138     getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1139     getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1140     getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1141     getreg(GOTCL),
1142 
1143     [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1144     [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1145     [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1146     [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1147     [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1148     [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1149     [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1150     [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1151     [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1152     [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1153     [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1154     [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1155     [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1156     [MPTC]    = mac_read_clr4,
1157     [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1158     [EERD]    = flash_eerd_read,
1159     [RDFH]    = mac_low13_read,     [RDFT]    = mac_low13_read,
1160     [RDFHS]   = mac_low13_read,     [RDFTS]   = mac_low13_read,
1161     [RDFPC]   = mac_low13_read,
1162     [TDFH]    = mac_low11_read,     [TDFT]    = mac_low11_read,
1163     [TDFHS]   = mac_low13_read,     [TDFTS]   = mac_low13_read,
1164     [TDFPC]   = mac_low13_read,
1165     [AIT]     = mac_low16_read,
1166 
1167     [CRCERRS ... MPC]   = &mac_readreg,
1168     [IP6AT ... IP6AT+3] = &mac_readreg,    [IP4AT ... IP4AT+6] = &mac_readreg,
1169     [FFLT ... FFLT+6]   = &mac_low11_read,
1170     [RA ... RA+31]      = &mac_readreg,
1171     [WUPM ... WUPM+31]  = &mac_readreg,
1172     [MTA ... MTA+127]   = &mac_readreg,
1173     [VFTA ... VFTA+127] = &mac_readreg,
1174     [FFMT ... FFMT+254] = &mac_low4_read,
1175     [FFVT ... FFVT+254] = &mac_readreg,
1176     [PBM ... PBM+16383] = &mac_readreg,
1177 };
1178 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1179 
1180 #define putreg(x)    [x] = mac_writereg
1181 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1182     putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1183     putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1184     putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1185     putreg(TDFH),     putreg(TDFT),     putreg(TDFHS),    putreg(TDFTS),
1186     putreg(TDFPC),    putreg(RDFH),     putreg(RDFT),     putreg(RDFHS),
1187     putreg(RDFTS),    putreg(RDFPC),    putreg(IPAV),     putreg(WUC),
1188     putreg(WUS),      putreg(AIT),
1189 
1190     [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL] = set_tctl,
1191     [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]  = set_ics,
1192     [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]  = set_rdt,
1193     [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]  = set_icr,
1194     [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL] = set_ctrl,
1195     [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV] = set_16bit,
1196     [ITR]    = set_16bit,
1197 
1198     [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg,
1199     [FFLT ... FFLT+6]   = &mac_writereg,
1200     [RA ... RA+31]      = &mac_writereg,
1201     [WUPM ... WUPM+31]  = &mac_writereg,
1202     [MTA ... MTA+127]   = &mac_writereg,
1203     [VFTA ... VFTA+127] = &mac_writereg,
1204     [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg,
1205     [PBM ... PBM+16383] = &mac_writereg,
1206 };
1207 
1208 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1209 
1210 enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1211 
1212 #define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1213 /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1214  * f - flag bits (up to 6 possible flags)
1215  * n - flag needed
1216  * p - partially implenented */
1217 static const uint8_t mac_reg_access[0x8000] = {
1218     [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1219     [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1220 
1221     [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1222     [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1223     [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1224     [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1225     [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1226     [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1227     [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1228     [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1229     [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1230     [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1231     [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1232     [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1233     [SEQEC]   = markflag(MAC),    [CEXTERR] = markflag(MAC),
1234     [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1235     [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1236     [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1237     [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1238     [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1239     [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1240     [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1241     [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1242     [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1243     [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1244     [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1245     [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1246     [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1247     [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1248     [BPTC]    = markflag(MAC),
1249 
1250     [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1251     [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1252     [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1253     [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1254     [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1255     [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1256     [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1257     [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1258     [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1259     [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1260     [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1261 };
1262 
1263 static void
1264 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1265                  unsigned size)
1266 {
1267     E1000State *s = opaque;
1268     unsigned int index = (addr & 0x1ffff) >> 2;
1269 
1270     if (index < NWRITEOPS && macreg_writeops[index]) {
1271         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1272             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1273             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1274                 DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1275                        "It is not fully implemented.\n", index<<2);
1276             }
1277             macreg_writeops[index](s, index, val);
1278         } else {    /* "flag needed" bit is set, but the flag is not active */
1279             DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1280                    index<<2);
1281         }
1282     } else if (index < NREADOPS && macreg_readops[index]) {
1283         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1284                index<<2, val);
1285     } else {
1286         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1287                index<<2, val);
1288     }
1289 }
1290 
1291 static uint64_t
1292 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1293 {
1294     E1000State *s = opaque;
1295     unsigned int index = (addr & 0x1ffff) >> 2;
1296 
1297     if (index < NREADOPS && macreg_readops[index]) {
1298         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1299             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1300             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1301                 DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1302                        "It is not fully implemented.\n", index<<2);
1303             }
1304             return macreg_readops[index](s, index);
1305         } else {    /* "flag needed" bit is set, but the flag is not active */
1306             DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1307                    index<<2);
1308         }
1309     } else {
1310         DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1311     }
1312     return 0;
1313 }
1314 
1315 static const MemoryRegionOps e1000_mmio_ops = {
1316     .read = e1000_mmio_read,
1317     .write = e1000_mmio_write,
1318     .endianness = DEVICE_LITTLE_ENDIAN,
1319     .impl = {
1320         .min_access_size = 4,
1321         .max_access_size = 4,
1322     },
1323 };
1324 
1325 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1326                               unsigned size)
1327 {
1328     E1000State *s = opaque;
1329 
1330     (void)s;
1331     return 0;
1332 }
1333 
1334 static void e1000_io_write(void *opaque, hwaddr addr,
1335                            uint64_t val, unsigned size)
1336 {
1337     E1000State *s = opaque;
1338 
1339     (void)s;
1340 }
1341 
1342 static const MemoryRegionOps e1000_io_ops = {
1343     .read = e1000_io_read,
1344     .write = e1000_io_write,
1345     .endianness = DEVICE_LITTLE_ENDIAN,
1346 };
1347 
1348 static bool is_version_1(void *opaque, int version_id)
1349 {
1350     return version_id == 1;
1351 }
1352 
1353 static int e1000_pre_save(void *opaque)
1354 {
1355     E1000State *s = opaque;
1356     NetClientState *nc = qemu_get_queue(s->nic);
1357 
1358     /* If the mitigation timer is active, emulate a timeout now. */
1359     if (s->mit_timer_on) {
1360         e1000_mit_timer(s);
1361     }
1362 
1363     /*
1364      * If link is down and auto-negotiation is supported and ongoing,
1365      * complete auto-negotiation immediately. This allows us to look
1366      * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1367      */
1368     if (nc->link_down && have_autoneg(s)) {
1369         s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1370     }
1371 
1372     /* Decide which set of props to migrate in the main structure */
1373     if (chkflag(TSO) || !s->use_tso_for_migration) {
1374         /* Either we're migrating with the extra subsection, in which
1375          * case the mig_props is always 'props' OR
1376          * we've not got the subsection, but 'props' was the last
1377          * updated.
1378          */
1379         s->mig_props = s->tx.props;
1380     } else {
1381         /* We're not using the subsection, and 'tso_props' was
1382          * the last updated.
1383          */
1384         s->mig_props = s->tx.tso_props;
1385     }
1386     return 0;
1387 }
1388 
1389 static int e1000_post_load(void *opaque, int version_id)
1390 {
1391     E1000State *s = opaque;
1392     NetClientState *nc = qemu_get_queue(s->nic);
1393 
1394     if (!chkflag(MIT)) {
1395         s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1396             s->mac_reg[TADV] = 0;
1397         s->mit_irq_level = false;
1398     }
1399     s->mit_ide = 0;
1400     s->mit_timer_on = false;
1401 
1402     /* nc.link_down can't be migrated, so infer link_down according
1403      * to link status bit in mac_reg[STATUS].
1404      * Alternatively, restart link negotiation if it was in progress. */
1405     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1406 
1407     if (have_autoneg(s) &&
1408         !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1409         nc->link_down = false;
1410         timer_mod(s->autoneg_timer,
1411                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1412     }
1413 
1414     s->tx.props = s->mig_props;
1415     if (!s->received_tx_tso) {
1416         /* We received only one set of offload data (tx.props)
1417          * and haven't got tx.tso_props.  The best we can do
1418          * is dupe the data.
1419          */
1420         s->tx.tso_props = s->mig_props;
1421     }
1422     return 0;
1423 }
1424 
1425 static int e1000_tx_tso_post_load(void *opaque, int version_id)
1426 {
1427     E1000State *s = opaque;
1428     s->received_tx_tso = true;
1429     return 0;
1430 }
1431 
1432 static bool e1000_mit_state_needed(void *opaque)
1433 {
1434     E1000State *s = opaque;
1435 
1436     return chkflag(MIT);
1437 }
1438 
1439 static bool e1000_full_mac_needed(void *opaque)
1440 {
1441     E1000State *s = opaque;
1442 
1443     return chkflag(MAC);
1444 }
1445 
1446 static bool e1000_tso_state_needed(void *opaque)
1447 {
1448     E1000State *s = opaque;
1449 
1450     return chkflag(TSO);
1451 }
1452 
1453 static const VMStateDescription vmstate_e1000_mit_state = {
1454     .name = "e1000/mit_state",
1455     .version_id = 1,
1456     .minimum_version_id = 1,
1457     .needed = e1000_mit_state_needed,
1458     .fields = (VMStateField[]) {
1459         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1460         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1461         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1462         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1463         VMSTATE_BOOL(mit_irq_level, E1000State),
1464         VMSTATE_END_OF_LIST()
1465     }
1466 };
1467 
1468 static const VMStateDescription vmstate_e1000_full_mac_state = {
1469     .name = "e1000/full_mac_state",
1470     .version_id = 1,
1471     .minimum_version_id = 1,
1472     .needed = e1000_full_mac_needed,
1473     .fields = (VMStateField[]) {
1474         VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1475         VMSTATE_END_OF_LIST()
1476     }
1477 };
1478 
1479 static const VMStateDescription vmstate_e1000_tx_tso_state = {
1480     .name = "e1000/tx_tso_state",
1481     .version_id = 1,
1482     .minimum_version_id = 1,
1483     .needed = e1000_tso_state_needed,
1484     .post_load = e1000_tx_tso_post_load,
1485     .fields = (VMStateField[]) {
1486         VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
1487         VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
1488         VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
1489         VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
1490         VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
1491         VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
1492         VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
1493         VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
1494         VMSTATE_UINT16(tx.tso_props.mss, E1000State),
1495         VMSTATE_INT8(tx.tso_props.ip, E1000State),
1496         VMSTATE_INT8(tx.tso_props.tcp, E1000State),
1497         VMSTATE_END_OF_LIST()
1498     }
1499 };
1500 
1501 static const VMStateDescription vmstate_e1000 = {
1502     .name = "e1000",
1503     .version_id = 2,
1504     .minimum_version_id = 1,
1505     .pre_save = e1000_pre_save,
1506     .post_load = e1000_post_load,
1507     .fields = (VMStateField[]) {
1508         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1509         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1510         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1511         VMSTATE_UINT32(rxbuf_size, E1000State),
1512         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1513         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1514         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1515         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1516         VMSTATE_UINT16(eecd_state.reading, E1000State),
1517         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1518         VMSTATE_UINT8(mig_props.ipcss, E1000State),
1519         VMSTATE_UINT8(mig_props.ipcso, E1000State),
1520         VMSTATE_UINT16(mig_props.ipcse, E1000State),
1521         VMSTATE_UINT8(mig_props.tucss, E1000State),
1522         VMSTATE_UINT8(mig_props.tucso, E1000State),
1523         VMSTATE_UINT16(mig_props.tucse, E1000State),
1524         VMSTATE_UINT32(mig_props.paylen, E1000State),
1525         VMSTATE_UINT8(mig_props.hdr_len, E1000State),
1526         VMSTATE_UINT16(mig_props.mss, E1000State),
1527         VMSTATE_UINT16(tx.size, E1000State),
1528         VMSTATE_UINT16(tx.tso_frames, E1000State),
1529         VMSTATE_UINT8(tx.sum_needed, E1000State),
1530         VMSTATE_INT8(mig_props.ip, E1000State),
1531         VMSTATE_INT8(mig_props.tcp, E1000State),
1532         VMSTATE_BUFFER(tx.header, E1000State),
1533         VMSTATE_BUFFER(tx.data, E1000State),
1534         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1535         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1536         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1537         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1538         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1539         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1540         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1541         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1542         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1543         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1544         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1545         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1546         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1547         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1548         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1549         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1550         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1551         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1552         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1553         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1554         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1555         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1556         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1557         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1558         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1559         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1560         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1561         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1562         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1563         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1564         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1565         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1566         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1567         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1568         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1569         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1570         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1571         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1572         VMSTATE_UINT32(mac_reg[VET], E1000State),
1573         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1574         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1575         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1576         VMSTATE_END_OF_LIST()
1577     },
1578     .subsections = (const VMStateDescription*[]) {
1579         &vmstate_e1000_mit_state,
1580         &vmstate_e1000_full_mac_state,
1581         &vmstate_e1000_tx_tso_state,
1582         NULL
1583     }
1584 };
1585 
1586 /*
1587  * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1588  * Note: A valid DevId will be inserted during pci_e1000_init().
1589  */
1590 static const uint16_t e1000_eeprom_template[64] = {
1591     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1592     0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1593     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1594     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1595     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1596     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1597     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1598     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1599 };
1600 
1601 /* PCI interface */
1602 
1603 static void
1604 e1000_mmio_setup(E1000State *d)
1605 {
1606     int i;
1607     const uint32_t excluded_regs[] = {
1608         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1609         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1610     };
1611 
1612     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1613                           "e1000-mmio", PNPMMIO_SIZE);
1614     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1615     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1616         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1617                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1618     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1619 }
1620 
1621 static void
1622 pci_e1000_uninit(PCIDevice *dev)
1623 {
1624     E1000State *d = E1000(dev);
1625 
1626     timer_del(d->autoneg_timer);
1627     timer_free(d->autoneg_timer);
1628     timer_del(d->mit_timer);
1629     timer_free(d->mit_timer);
1630     qemu_del_nic(d->nic);
1631 }
1632 
1633 static NetClientInfo net_e1000_info = {
1634     .type = NET_CLIENT_DRIVER_NIC,
1635     .size = sizeof(NICState),
1636     .can_receive = e1000_can_receive,
1637     .receive = e1000_receive,
1638     .receive_iov = e1000_receive_iov,
1639     .link_status_changed = e1000_set_link_status,
1640 };
1641 
1642 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1643                                 uint32_t val, int len)
1644 {
1645     E1000State *s = E1000(pci_dev);
1646 
1647     pci_default_write_config(pci_dev, address, val, len);
1648 
1649     if (range_covers_byte(address, len, PCI_COMMAND) &&
1650         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1651         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1652     }
1653 }
1654 
1655 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1656 {
1657     DeviceState *dev = DEVICE(pci_dev);
1658     E1000State *d = E1000(pci_dev);
1659     uint8_t *pci_conf;
1660     uint8_t *macaddr;
1661 
1662     pci_dev->config_write = e1000_write_config;
1663 
1664     pci_conf = pci_dev->config;
1665 
1666     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1667     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1668 
1669     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1670 
1671     e1000_mmio_setup(d);
1672 
1673     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1674 
1675     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1676 
1677     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1678     macaddr = d->conf.macaddr.a;
1679 
1680     e1000x_core_prepare_eeprom(d->eeprom_data,
1681                                e1000_eeprom_template,
1682                                sizeof(e1000_eeprom_template),
1683                                PCI_DEVICE_GET_CLASS(pci_dev)->device_id,
1684                                macaddr);
1685 
1686     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1687                           object_get_typename(OBJECT(d)), dev->id, d);
1688 
1689     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1690 
1691     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1692     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1693 }
1694 
1695 static void qdev_e1000_reset(DeviceState *dev)
1696 {
1697     E1000State *d = E1000(dev);
1698     e1000_reset(d);
1699 }
1700 
1701 static Property e1000_properties[] = {
1702     DEFINE_NIC_PROPERTIES(E1000State, conf),
1703     DEFINE_PROP_BIT("autonegotiation", E1000State,
1704                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1705     DEFINE_PROP_BIT("mitigation", E1000State,
1706                     compat_flags, E1000_FLAG_MIT_BIT, true),
1707     DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1708                     compat_flags, E1000_FLAG_MAC_BIT, true),
1709     DEFINE_PROP_BIT("migrate_tso_props", E1000State,
1710                     compat_flags, E1000_FLAG_TSO_BIT, true),
1711     DEFINE_PROP_END_OF_LIST(),
1712 };
1713 
1714 typedef struct E1000Info {
1715     const char *name;
1716     uint16_t   device_id;
1717     uint8_t    revision;
1718     uint16_t   phy_id2;
1719 } E1000Info;
1720 
1721 static void e1000_class_init(ObjectClass *klass, void *data)
1722 {
1723     DeviceClass *dc = DEVICE_CLASS(klass);
1724     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1725     E1000BaseClass *e = E1000_DEVICE_CLASS(klass);
1726     const E1000Info *info = data;
1727 
1728     k->realize = pci_e1000_realize;
1729     k->exit = pci_e1000_uninit;
1730     k->romfile = "efi-e1000.rom";
1731     k->vendor_id = PCI_VENDOR_ID_INTEL;
1732     k->device_id = info->device_id;
1733     k->revision = info->revision;
1734     e->phy_id2 = info->phy_id2;
1735     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1736     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1737     dc->desc = "Intel Gigabit Ethernet";
1738     dc->reset = qdev_e1000_reset;
1739     dc->vmsd = &vmstate_e1000;
1740     dc->props = e1000_properties;
1741 }
1742 
1743 static void e1000_instance_init(Object *obj)
1744 {
1745     E1000State *n = E1000(obj);
1746     device_add_bootindex_property(obj, &n->conf.bootindex,
1747                                   "bootindex", "/ethernet-phy@0",
1748                                   DEVICE(n), NULL);
1749 }
1750 
1751 static const TypeInfo e1000_base_info = {
1752     .name          = TYPE_E1000_BASE,
1753     .parent        = TYPE_PCI_DEVICE,
1754     .instance_size = sizeof(E1000State),
1755     .instance_init = e1000_instance_init,
1756     .class_size    = sizeof(E1000BaseClass),
1757     .abstract      = true,
1758     .interfaces = (InterfaceInfo[]) {
1759         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1760         { },
1761     },
1762 };
1763 
1764 static const E1000Info e1000_devices[] = {
1765     {
1766         .name      = "e1000",
1767         .device_id = E1000_DEV_ID_82540EM,
1768         .revision  = 0x03,
1769         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1770     },
1771     {
1772         .name      = "e1000-82544gc",
1773         .device_id = E1000_DEV_ID_82544GC_COPPER,
1774         .revision  = 0x03,
1775         .phy_id2   = E1000_PHY_ID2_82544x,
1776     },
1777     {
1778         .name      = "e1000-82545em",
1779         .device_id = E1000_DEV_ID_82545EM_COPPER,
1780         .revision  = 0x03,
1781         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1782     },
1783 };
1784 
1785 static void e1000_register_types(void)
1786 {
1787     int i;
1788 
1789     type_register_static(&e1000_base_info);
1790     for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1791         const E1000Info *info = &e1000_devices[i];
1792         TypeInfo type_info = {};
1793 
1794         type_info.name = info->name;
1795         type_info.parent = TYPE_E1000_BASE;
1796         type_info.class_data = (void *)info;
1797         type_info.class_init = e1000_class_init;
1798         type_info.instance_init = e1000_instance_init;
1799 
1800         type_register(&type_info);
1801     }
1802 }
1803 
1804 type_init(e1000_register_types)
1805