xref: /openbmc/qemu/hw/net/e1000.c (revision 2993683b)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "hw/hw.h"
29 #include "hw/pci/pci.h"
30 #include "net/net.h"
31 #include "net/checksum.h"
32 #include "hw/loader.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/dma.h"
35 
36 #include "e1000_regs.h"
37 
38 #define E1000_DEBUG
39 
40 #ifdef E1000_DEBUG
41 enum {
42     DEBUG_GENERAL,	DEBUG_IO,	DEBUG_MMIO,	DEBUG_INTERRUPT,
43     DEBUG_RX,		DEBUG_TX,	DEBUG_MDIC,	DEBUG_EEPROM,
44     DEBUG_UNKNOWN,	DEBUG_TXSUM,	DEBUG_TXERR,	DEBUG_RXERR,
45     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
46 };
47 #define DBGBIT(x)	(1<<DEBUG_##x)
48 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
49 
50 #define	DBGOUT(what, fmt, ...) do { \
51     if (debugflags & DBGBIT(what)) \
52         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
53     } while (0)
54 #else
55 #define	DBGOUT(what, fmt, ...) do {} while (0)
56 #endif
57 
58 #define IOPORT_SIZE       0x40
59 #define PNPMMIO_SIZE      0x20000
60 #define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
61 
62 /* this is the size past which hardware will drop packets when setting LPE=0 */
63 #define MAXIMUM_ETHERNET_VLAN_SIZE 1522
64 /* this is the size past which hardware will drop packets when setting LPE=1 */
65 #define MAXIMUM_ETHERNET_LPE_SIZE 16384
66 
67 /*
68  * HW models:
69  *  E1000_DEV_ID_82540EM works with Windows and Linux
70  *  E1000_DEV_ID_82573L OK with windoze and Linux 2.6.22,
71  *	appears to perform better than 82540EM, but breaks with Linux 2.6.18
72  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
73  *  Others never tested
74  */
75 enum { E1000_DEVID = E1000_DEV_ID_82540EM };
76 
77 /*
78  * May need to specify additional MAC-to-PHY entries --
79  * Intel's Windows driver refuses to initialize unless they match
80  */
81 enum {
82     PHY_ID2_INIT = E1000_DEVID == E1000_DEV_ID_82573L ?		0xcc2 :
83                    E1000_DEVID == E1000_DEV_ID_82544GC_COPPER ?	0xc30 :
84                    /* default to E1000_DEV_ID_82540EM */	0xc20
85 };
86 
87 typedef struct E1000State_st {
88     PCIDevice dev;
89     NICState *nic;
90     NICConf conf;
91     MemoryRegion mmio;
92     MemoryRegion io;
93 
94     uint32_t mac_reg[0x8000];
95     uint16_t phy_reg[0x20];
96     uint16_t eeprom_data[64];
97 
98     uint32_t rxbuf_size;
99     uint32_t rxbuf_min_shift;
100     struct e1000_tx {
101         unsigned char header[256];
102         unsigned char vlan_header[4];
103         /* Fields vlan and data must not be reordered or separated. */
104         unsigned char vlan[4];
105         unsigned char data[0x10000];
106         uint16_t size;
107         unsigned char sum_needed;
108         unsigned char vlan_needed;
109         uint8_t ipcss;
110         uint8_t ipcso;
111         uint16_t ipcse;
112         uint8_t tucss;
113         uint8_t tucso;
114         uint16_t tucse;
115         uint8_t hdr_len;
116         uint16_t mss;
117         uint32_t paylen;
118         uint16_t tso_frames;
119         char tse;
120         int8_t ip;
121         int8_t tcp;
122         char cptse;     // current packet tse bit
123     } tx;
124 
125     struct {
126         uint32_t val_in;	// shifted in from guest driver
127         uint16_t bitnum_in;
128         uint16_t bitnum_out;
129         uint16_t reading;
130         uint32_t old_eecd;
131     } eecd_state;
132 
133     QEMUTimer *autoneg_timer;
134 
135 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
136 #define E1000_FLAG_AUTONEG_BIT 0
137 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
138     uint32_t compat_flags;
139 } E1000State;
140 
141 #define	defreg(x)	x = (E1000_##x>>2)
142 enum {
143     defreg(CTRL),	defreg(EECD),	defreg(EERD),	defreg(GPRC),
144     defreg(GPTC),	defreg(ICR),	defreg(ICS),	defreg(IMC),
145     defreg(IMS),	defreg(LEDCTL),	defreg(MANC),	defreg(MDIC),
146     defreg(MPC),	defreg(PBA),	defreg(RCTL),	defreg(RDBAH),
147     defreg(RDBAL),	defreg(RDH),	defreg(RDLEN),	defreg(RDT),
148     defreg(STATUS),	defreg(SWSM),	defreg(TCTL),	defreg(TDBAH),
149     defreg(TDBAL),	defreg(TDH),	defreg(TDLEN),	defreg(TDT),
150     defreg(TORH),	defreg(TORL),	defreg(TOTH),	defreg(TOTL),
151     defreg(TPR),	defreg(TPT),	defreg(TXDCTL),	defreg(WUFC),
152     defreg(RA),		defreg(MTA),	defreg(CRCERRS),defreg(VFTA),
153     defreg(VET),
154 };
155 
156 static void
157 e1000_link_down(E1000State *s)
158 {
159     s->mac_reg[STATUS] &= ~E1000_STATUS_LU;
160     s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS;
161 }
162 
163 static void
164 e1000_link_up(E1000State *s)
165 {
166     s->mac_reg[STATUS] |= E1000_STATUS_LU;
167     s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
168 }
169 
170 static void
171 set_phy_ctrl(E1000State *s, int index, uint16_t val)
172 {
173     /*
174      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
175      * migrate during auto negotiation, after migration the link will be
176      * down.
177      */
178     if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
179         return;
180     }
181     if ((val & MII_CR_AUTO_NEG_EN) && (val & MII_CR_RESTART_AUTO_NEG)) {
182         e1000_link_down(s);
183         s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE;
184         DBGOUT(PHY, "Start link auto negotiation\n");
185         qemu_mod_timer(s->autoneg_timer, qemu_get_clock_ms(vm_clock) + 500);
186     }
187 }
188 
189 static void
190 e1000_autoneg_timer(void *opaque)
191 {
192     E1000State *s = opaque;
193     if (!qemu_get_queue(s->nic)->link_down) {
194         e1000_link_up(s);
195     }
196     s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
197     DBGOUT(PHY, "Auto negotiation is completed\n");
198 }
199 
200 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
201     [PHY_CTRL] = set_phy_ctrl,
202 };
203 
204 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
205 
206 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
207 static const char phy_regcap[0x20] = {
208     [PHY_STATUS] = PHY_R,	[M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
209     [PHY_ID1] = PHY_R,		[M88E1000_PHY_SPEC_CTRL] = PHY_RW,
210     [PHY_CTRL] = PHY_RW,	[PHY_1000T_CTRL] = PHY_RW,
211     [PHY_LP_ABILITY] = PHY_R,	[PHY_1000T_STATUS] = PHY_R,
212     [PHY_AUTONEG_ADV] = PHY_RW,	[M88E1000_RX_ERR_CNTR] = PHY_R,
213     [PHY_ID2] = PHY_R,		[M88E1000_PHY_SPEC_STATUS] = PHY_R
214 };
215 
216 static const uint16_t phy_reg_init[] = {
217     [PHY_CTRL] = 0x1140,
218     [PHY_STATUS] = 0x794d, /* link initially up with not completed autoneg */
219     [PHY_ID1] = 0x141,				[PHY_ID2] = PHY_ID2_INIT,
220     [PHY_1000T_CTRL] = 0x0e00,			[M88E1000_PHY_SPEC_CTRL] = 0x360,
221     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,	[PHY_AUTONEG_ADV] = 0xde1,
222     [PHY_LP_ABILITY] = 0x1e0,			[PHY_1000T_STATUS] = 0x3c00,
223     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
224 };
225 
226 static const uint32_t mac_reg_init[] = {
227     [PBA] =     0x00100030,
228     [LEDCTL] =  0x602,
229     [CTRL] =    E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
230                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
231     [STATUS] =  0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
232                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
233                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
234                 E1000_STATUS_LU,
235     [MANC] =    E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
236                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
237                 E1000_MANC_RMCP_EN,
238 };
239 
240 static void
241 set_interrupt_cause(E1000State *s, int index, uint32_t val)
242 {
243     if (val && (E1000_DEVID >= E1000_DEV_ID_82547EI_MOBILE)) {
244         /* Only for 8257x */
245         val |= E1000_ICR_INT_ASSERTED;
246     }
247     s->mac_reg[ICR] = val;
248 
249     /*
250      * Make sure ICR and ICS registers have the same value.
251      * The spec says that the ICS register is write-only.  However in practice,
252      * on real hardware ICS is readable, and for reads it has the same value as
253      * ICR (except that ICS does not have the clear on read behaviour of ICR).
254      *
255      * The VxWorks PRO/1000 driver uses this behaviour.
256      */
257     s->mac_reg[ICS] = val;
258 
259     qemu_set_irq(s->dev.irq[0], (s->mac_reg[IMS] & s->mac_reg[ICR]) != 0);
260 }
261 
262 static void
263 set_ics(E1000State *s, int index, uint32_t val)
264 {
265     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
266         s->mac_reg[IMS]);
267     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
268 }
269 
270 static int
271 rxbufsize(uint32_t v)
272 {
273     v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 |
274          E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 |
275          E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256;
276     switch (v) {
277     case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384:
278         return 16384;
279     case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192:
280         return 8192;
281     case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096:
282         return 4096;
283     case E1000_RCTL_SZ_1024:
284         return 1024;
285     case E1000_RCTL_SZ_512:
286         return 512;
287     case E1000_RCTL_SZ_256:
288         return 256;
289     }
290     return 2048;
291 }
292 
293 static void e1000_reset(void *opaque)
294 {
295     E1000State *d = opaque;
296     uint8_t *macaddr = d->conf.macaddr.a;
297     int i;
298 
299     qemu_del_timer(d->autoneg_timer);
300     memset(d->phy_reg, 0, sizeof d->phy_reg);
301     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
302     memset(d->mac_reg, 0, sizeof d->mac_reg);
303     memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
304     d->rxbuf_min_shift = 1;
305     memset(&d->tx, 0, sizeof d->tx);
306 
307     if (qemu_get_queue(d->nic)->link_down) {
308         e1000_link_down(d);
309     }
310 
311     /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */
312     d->mac_reg[RA] = 0;
313     d->mac_reg[RA + 1] = E1000_RAH_AV;
314     for (i = 0; i < 4; i++) {
315         d->mac_reg[RA] |= macaddr[i] << (8 * i);
316         d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0;
317     }
318 }
319 
320 static void
321 set_ctrl(E1000State *s, int index, uint32_t val)
322 {
323     /* RST is self clearing */
324     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
325 }
326 
327 static void
328 set_rx_control(E1000State *s, int index, uint32_t val)
329 {
330     s->mac_reg[RCTL] = val;
331     s->rxbuf_size = rxbufsize(val);
332     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
333     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
334            s->mac_reg[RCTL]);
335     qemu_flush_queued_packets(qemu_get_queue(s->nic));
336 }
337 
338 static void
339 set_mdic(E1000State *s, int index, uint32_t val)
340 {
341     uint32_t data = val & E1000_MDIC_DATA_MASK;
342     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
343 
344     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
345         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
346     else if (val & E1000_MDIC_OP_READ) {
347         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
348         if (!(phy_regcap[addr] & PHY_R)) {
349             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
350             val |= E1000_MDIC_ERROR;
351         } else
352             val = (val ^ data) | s->phy_reg[addr];
353     } else if (val & E1000_MDIC_OP_WRITE) {
354         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
355         if (!(phy_regcap[addr] & PHY_W)) {
356             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
357             val |= E1000_MDIC_ERROR;
358         } else {
359             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
360                 phyreg_writeops[addr](s, index, data);
361             }
362             s->phy_reg[addr] = data;
363         }
364     }
365     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
366 
367     if (val & E1000_MDIC_INT_EN) {
368         set_ics(s, 0, E1000_ICR_MDAC);
369     }
370 }
371 
372 static uint32_t
373 get_eecd(E1000State *s, int index)
374 {
375     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
376 
377     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
378            s->eecd_state.bitnum_out, s->eecd_state.reading);
379     if (!s->eecd_state.reading ||
380         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
381           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
382         ret |= E1000_EECD_DO;
383     return ret;
384 }
385 
386 static void
387 set_eecd(E1000State *s, int index, uint32_t val)
388 {
389     uint32_t oldval = s->eecd_state.old_eecd;
390 
391     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
392             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
393     if (!(E1000_EECD_CS & val))			// CS inactive; nothing to do
394 	return;
395     if (E1000_EECD_CS & (val ^ oldval)) {	// CS rise edge; reset state
396 	s->eecd_state.val_in = 0;
397 	s->eecd_state.bitnum_in = 0;
398 	s->eecd_state.bitnum_out = 0;
399 	s->eecd_state.reading = 0;
400     }
401     if (!(E1000_EECD_SK & (val ^ oldval)))	// no clock edge
402         return;
403     if (!(E1000_EECD_SK & val)) {		// falling edge
404         s->eecd_state.bitnum_out++;
405         return;
406     }
407     s->eecd_state.val_in <<= 1;
408     if (val & E1000_EECD_DI)
409         s->eecd_state.val_in |= 1;
410     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
411         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
412         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
413             EEPROM_READ_OPCODE_MICROWIRE);
414     }
415     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
416            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
417            s->eecd_state.reading);
418 }
419 
420 static uint32_t
421 flash_eerd_read(E1000State *s, int x)
422 {
423     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
424 
425     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
426         return (s->mac_reg[EERD]);
427 
428     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
429         return (E1000_EEPROM_RW_REG_DONE | r);
430 
431     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
432            E1000_EEPROM_RW_REG_DONE | r);
433 }
434 
435 static void
436 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
437 {
438     uint32_t sum;
439 
440     if (cse && cse < n)
441         n = cse + 1;
442     if (sloc < n-1) {
443         sum = net_checksum_add(n-css, data+css);
444         cpu_to_be16wu((uint16_t *)(data + sloc),
445                       net_checksum_finish(sum));
446     }
447 }
448 
449 static inline int
450 vlan_enabled(E1000State *s)
451 {
452     return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0);
453 }
454 
455 static inline int
456 vlan_rx_filter_enabled(E1000State *s)
457 {
458     return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0);
459 }
460 
461 static inline int
462 is_vlan_packet(E1000State *s, const uint8_t *buf)
463 {
464     return (be16_to_cpup((uint16_t *)(buf + 12)) ==
465                 le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
466 }
467 
468 static inline int
469 is_vlan_txd(uint32_t txd_lower)
470 {
471     return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
472 }
473 
474 /* FCS aka Ethernet CRC-32. We don't get it from backends and can't
475  * fill it in, just pad descriptor length by 4 bytes unless guest
476  * told us to strip it off the packet. */
477 static inline int
478 fcs_len(E1000State *s)
479 {
480     return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
481 }
482 
483 static void
484 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
485 {
486     NetClientState *nc = qemu_get_queue(s->nic);
487     if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
488         nc->info->receive(nc, buf, size);
489     } else {
490         qemu_send_packet(nc, buf, size);
491     }
492 }
493 
494 static void
495 xmit_seg(E1000State *s)
496 {
497     uint16_t len, *sp;
498     unsigned int frames = s->tx.tso_frames, css, sofar, n;
499     struct e1000_tx *tp = &s->tx;
500 
501     if (tp->tse && tp->cptse) {
502         css = tp->ipcss;
503         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
504                frames, tp->size, css);
505         if (tp->ip) {		// IPv4
506             cpu_to_be16wu((uint16_t *)(tp->data+css+2),
507                           tp->size - css);
508             cpu_to_be16wu((uint16_t *)(tp->data+css+4),
509                           be16_to_cpup((uint16_t *)(tp->data+css+4))+frames);
510         } else			// IPv6
511             cpu_to_be16wu((uint16_t *)(tp->data+css+4),
512                           tp->size - css);
513         css = tp->tucss;
514         len = tp->size - css;
515         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len);
516         if (tp->tcp) {
517             sofar = frames * tp->mss;
518             cpu_to_be32wu((uint32_t *)(tp->data+css+4),	// seq
519                 be32_to_cpupu((uint32_t *)(tp->data+css+4))+sofar);
520             if (tp->paylen - sofar > tp->mss)
521                 tp->data[css + 13] &= ~9;		// PSH, FIN
522         } else	// UDP
523             cpu_to_be16wu((uint16_t *)(tp->data+css+4), len);
524         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
525             unsigned int phsum;
526             // add pseudo-header length before checksum calculation
527             sp = (uint16_t *)(tp->data + tp->tucso);
528             phsum = be16_to_cpup(sp) + len;
529             phsum = (phsum >> 16) + (phsum & 0xffff);
530             cpu_to_be16wu(sp, phsum);
531         }
532         tp->tso_frames++;
533     }
534 
535     if (tp->sum_needed & E1000_TXD_POPTS_TXSM)
536         putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse);
537     if (tp->sum_needed & E1000_TXD_POPTS_IXSM)
538         putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse);
539     if (tp->vlan_needed) {
540         memmove(tp->vlan, tp->data, 4);
541         memmove(tp->data, tp->data + 4, 8);
542         memcpy(tp->data + 8, tp->vlan_header, 4);
543         e1000_send_packet(s, tp->vlan, tp->size + 4);
544     } else
545         e1000_send_packet(s, tp->data, tp->size);
546     s->mac_reg[TPT]++;
547     s->mac_reg[GPTC]++;
548     n = s->mac_reg[TOTL];
549     if ((s->mac_reg[TOTL] += s->tx.size) < n)
550         s->mac_reg[TOTH]++;
551 }
552 
553 static void
554 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
555 {
556     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
557     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
558     unsigned int split_size = txd_lower & 0xffff, bytes, sz, op;
559     unsigned int msh = 0xfffff, hdr = 0;
560     uint64_t addr;
561     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
562     struct e1000_tx *tp = &s->tx;
563 
564     if (dtype == E1000_TXD_CMD_DEXT) {	// context descriptor
565         op = le32_to_cpu(xp->cmd_and_length);
566         tp->ipcss = xp->lower_setup.ip_fields.ipcss;
567         tp->ipcso = xp->lower_setup.ip_fields.ipcso;
568         tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse);
569         tp->tucss = xp->upper_setup.tcp_fields.tucss;
570         tp->tucso = xp->upper_setup.tcp_fields.tucso;
571         tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse);
572         tp->paylen = op & 0xfffff;
573         tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len;
574         tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss);
575         tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0;
576         tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
577         tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
578         tp->tso_frames = 0;
579         if (tp->tucso == 0) {	// this is probably wrong
580             DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
581             tp->tucso = tp->tucss + (tp->tcp ? 16 : 6);
582         }
583         return;
584     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
585         // data descriptor
586         if (tp->size == 0) {
587             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
588         }
589         tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0;
590     } else {
591         // legacy descriptor
592         tp->cptse = 0;
593     }
594 
595     if (vlan_enabled(s) && is_vlan_txd(txd_lower) &&
596         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
597         tp->vlan_needed = 1;
598         cpu_to_be16wu((uint16_t *)(tp->vlan_header),
599                       le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
600         cpu_to_be16wu((uint16_t *)(tp->vlan_header + 2),
601                       le16_to_cpu(dp->upper.fields.special));
602     }
603 
604     addr = le64_to_cpu(dp->buffer_addr);
605     if (tp->tse && tp->cptse) {
606         hdr = tp->hdr_len;
607         msh = hdr + tp->mss;
608         do {
609             bytes = split_size;
610             if (tp->size + bytes > msh)
611                 bytes = msh - tp->size;
612 
613             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
614             pci_dma_read(&s->dev, addr, tp->data + tp->size, bytes);
615             if ((sz = tp->size + bytes) >= hdr && tp->size < hdr)
616                 memmove(tp->header, tp->data, hdr);
617             tp->size = sz;
618             addr += bytes;
619             if (sz == msh) {
620                 xmit_seg(s);
621                 memmove(tp->data, tp->header, hdr);
622                 tp->size = hdr;
623             }
624         } while (split_size -= bytes);
625     } else if (!tp->tse && tp->cptse) {
626         // context descriptor TSE is not set, while data descriptor TSE is set
627         DBGOUT(TXERR, "TCP segmentation error\n");
628     } else {
629         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
630         pci_dma_read(&s->dev, addr, tp->data + tp->size, split_size);
631         tp->size += split_size;
632     }
633 
634     if (!(txd_lower & E1000_TXD_CMD_EOP))
635         return;
636     if (!(tp->tse && tp->cptse && tp->size < hdr))
637         xmit_seg(s);
638     tp->tso_frames = 0;
639     tp->sum_needed = 0;
640     tp->vlan_needed = 0;
641     tp->size = 0;
642     tp->cptse = 0;
643 }
644 
645 static uint32_t
646 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
647 {
648     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
649 
650     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
651         return 0;
652     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
653                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
654     dp->upper.data = cpu_to_le32(txd_upper);
655     pci_dma_write(&s->dev, base + ((char *)&dp->upper - (char *)dp),
656                   &dp->upper, sizeof(dp->upper));
657     return E1000_ICR_TXDW;
658 }
659 
660 static uint64_t tx_desc_base(E1000State *s)
661 {
662     uint64_t bah = s->mac_reg[TDBAH];
663     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
664 
665     return (bah << 32) + bal;
666 }
667 
668 static void
669 start_xmit(E1000State *s)
670 {
671     dma_addr_t base;
672     struct e1000_tx_desc desc;
673     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
674 
675     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
676         DBGOUT(TX, "tx disabled\n");
677         return;
678     }
679 
680     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
681         base = tx_desc_base(s) +
682                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
683         pci_dma_read(&s->dev, base, &desc, sizeof(desc));
684 
685         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
686                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
687                desc.upper.data);
688 
689         process_tx_desc(s, &desc);
690         cause |= txdesc_writeback(s, base, &desc);
691 
692         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
693             s->mac_reg[TDH] = 0;
694         /*
695          * the following could happen only if guest sw assigns
696          * bogus values to TDT/TDLEN.
697          * there's nothing too intelligent we could do about this.
698          */
699         if (s->mac_reg[TDH] == tdh_start) {
700             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
701                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
702             break;
703         }
704     }
705     set_ics(s, 0, cause);
706 }
707 
708 static int
709 receive_filter(E1000State *s, const uint8_t *buf, int size)
710 {
711     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
712     static const int mta_shift[] = {4, 3, 2, 0};
713     uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp;
714 
715     if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) {
716         uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14));
717         uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) +
718                                      ((vid >> 5) & 0x7f));
719         if ((vfta & (1 << (vid & 0x1f))) == 0)
720             return 0;
721     }
722 
723     if (rctl & E1000_RCTL_UPE)			// promiscuous
724         return 1;
725 
726     if ((buf[0] & 1) && (rctl & E1000_RCTL_MPE))	// promiscuous mcast
727         return 1;
728 
729     if ((rctl & E1000_RCTL_BAM) && !memcmp(buf, bcast, sizeof bcast))
730         return 1;
731 
732     for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) {
733         if (!(rp[1] & E1000_RAH_AV))
734             continue;
735         ra[0] = cpu_to_le32(rp[0]);
736         ra[1] = cpu_to_le32(rp[1]);
737         if (!memcmp(buf, (uint8_t *)ra, 6)) {
738             DBGOUT(RXFILTER,
739                    "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n",
740                    (int)(rp - s->mac_reg - RA)/2,
741                    buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
742             return 1;
743         }
744     }
745     DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n",
746            buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
747 
748     f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
749     f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
750     if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f)))
751         return 1;
752     DBGOUT(RXFILTER,
753            "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n",
754            buf[0], buf[1], buf[2], buf[3], buf[4], buf[5],
755            (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5,
756            s->mac_reg[MTA + (f >> 5)]);
757 
758     return 0;
759 }
760 
761 static void
762 e1000_set_link_status(NetClientState *nc)
763 {
764     E1000State *s = qemu_get_nic_opaque(nc);
765     uint32_t old_status = s->mac_reg[STATUS];
766 
767     if (nc->link_down) {
768         e1000_link_down(s);
769     } else {
770         e1000_link_up(s);
771     }
772 
773     if (s->mac_reg[STATUS] != old_status)
774         set_ics(s, 0, E1000_ICR_LSC);
775 }
776 
777 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
778 {
779     int bufs;
780     /* Fast-path short packets */
781     if (total_size <= s->rxbuf_size) {
782         return s->mac_reg[RDH] != s->mac_reg[RDT];
783     }
784     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
785         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
786     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
787         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
788             s->mac_reg[RDT] - s->mac_reg[RDH];
789     } else {
790         return false;
791     }
792     return total_size <= bufs * s->rxbuf_size;
793 }
794 
795 static int
796 e1000_can_receive(NetClientState *nc)
797 {
798     E1000State *s = qemu_get_nic_opaque(nc);
799 
800     return (s->mac_reg[STATUS] & E1000_STATUS_LU) &&
801         (s->mac_reg[RCTL] & E1000_RCTL_EN) && e1000_has_rxbufs(s, 1);
802 }
803 
804 static uint64_t rx_desc_base(E1000State *s)
805 {
806     uint64_t bah = s->mac_reg[RDBAH];
807     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
808 
809     return (bah << 32) + bal;
810 }
811 
812 static ssize_t
813 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
814 {
815     E1000State *s = qemu_get_nic_opaque(nc);
816     struct e1000_rx_desc desc;
817     dma_addr_t base;
818     unsigned int n, rdt;
819     uint32_t rdh_start;
820     uint16_t vlan_special = 0;
821     uint8_t vlan_status = 0, vlan_offset = 0;
822     uint8_t min_buf[MIN_BUF_SIZE];
823     size_t desc_offset;
824     size_t desc_size;
825     size_t total_size;
826 
827     if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) {
828         return -1;
829     }
830 
831     if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) {
832         return -1;
833     }
834 
835     /* Pad to minimum Ethernet frame length */
836     if (size < sizeof(min_buf)) {
837         memcpy(min_buf, buf, size);
838         memset(&min_buf[size], 0, sizeof(min_buf) - size);
839         buf = min_buf;
840         size = sizeof(min_buf);
841     }
842 
843     /* Discard oversized packets if !LPE and !SBP. */
844     if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
845         (size > MAXIMUM_ETHERNET_VLAN_SIZE
846         && !(s->mac_reg[RCTL] & E1000_RCTL_LPE)))
847         && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) {
848         return size;
849     }
850 
851     if (!receive_filter(s, buf, size))
852         return size;
853 
854     if (vlan_enabled(s) && is_vlan_packet(s, buf)) {
855         vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(buf + 14)));
856         memmove((uint8_t *)buf + 4, buf, 12);
857         vlan_status = E1000_RXD_STAT_VP;
858         vlan_offset = 4;
859         size -= 4;
860     }
861 
862     rdh_start = s->mac_reg[RDH];
863     desc_offset = 0;
864     total_size = size + fcs_len(s);
865     if (!e1000_has_rxbufs(s, total_size)) {
866             set_ics(s, 0, E1000_ICS_RXO);
867             return -1;
868     }
869     do {
870         desc_size = total_size - desc_offset;
871         if (desc_size > s->rxbuf_size) {
872             desc_size = s->rxbuf_size;
873         }
874         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
875         pci_dma_read(&s->dev, base, &desc, sizeof(desc));
876         desc.special = vlan_special;
877         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
878         if (desc.buffer_addr) {
879             if (desc_offset < size) {
880                 size_t copy_size = size - desc_offset;
881                 if (copy_size > s->rxbuf_size) {
882                     copy_size = s->rxbuf_size;
883                 }
884                 pci_dma_write(&s->dev, le64_to_cpu(desc.buffer_addr),
885                               buf + desc_offset + vlan_offset, copy_size);
886             }
887             desc_offset += desc_size;
888             desc.length = cpu_to_le16(desc_size);
889             if (desc_offset >= total_size) {
890                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
891             } else {
892                 /* Guest zeroing out status is not a hardware requirement.
893                    Clear EOP in case guest didn't do it. */
894                 desc.status &= ~E1000_RXD_STAT_EOP;
895             }
896         } else { // as per intel docs; skip descriptors with null buf addr
897             DBGOUT(RX, "Null RX descriptor!!\n");
898         }
899         pci_dma_write(&s->dev, base, &desc, sizeof(desc));
900 
901         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
902             s->mac_reg[RDH] = 0;
903         /* see comment in start_xmit; same here */
904         if (s->mac_reg[RDH] == rdh_start) {
905             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
906                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
907             set_ics(s, 0, E1000_ICS_RXO);
908             return -1;
909         }
910     } while (desc_offset < total_size);
911 
912     s->mac_reg[GPRC]++;
913     s->mac_reg[TPR]++;
914     /* TOR - Total Octets Received:
915      * This register includes bytes received in a packet from the <Destination
916      * Address> field through the <CRC> field, inclusively.
917      */
918     n = s->mac_reg[TORL] + size + /* Always include FCS length. */ 4;
919     if (n < s->mac_reg[TORL])
920         s->mac_reg[TORH]++;
921     s->mac_reg[TORL] = n;
922 
923     n = E1000_ICS_RXT0;
924     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
925         rdt += s->mac_reg[RDLEN] / sizeof(desc);
926     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
927         s->rxbuf_min_shift)
928         n |= E1000_ICS_RXDMT0;
929 
930     set_ics(s, 0, n);
931 
932     return size;
933 }
934 
935 static uint32_t
936 mac_readreg(E1000State *s, int index)
937 {
938     return s->mac_reg[index];
939 }
940 
941 static uint32_t
942 mac_icr_read(E1000State *s, int index)
943 {
944     uint32_t ret = s->mac_reg[ICR];
945 
946     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
947     set_interrupt_cause(s, 0, 0);
948     return ret;
949 }
950 
951 static uint32_t
952 mac_read_clr4(E1000State *s, int index)
953 {
954     uint32_t ret = s->mac_reg[index];
955 
956     s->mac_reg[index] = 0;
957     return ret;
958 }
959 
960 static uint32_t
961 mac_read_clr8(E1000State *s, int index)
962 {
963     uint32_t ret = s->mac_reg[index];
964 
965     s->mac_reg[index] = 0;
966     s->mac_reg[index-1] = 0;
967     return ret;
968 }
969 
970 static void
971 mac_writereg(E1000State *s, int index, uint32_t val)
972 {
973     s->mac_reg[index] = val;
974 }
975 
976 static void
977 set_rdt(E1000State *s, int index, uint32_t val)
978 {
979     s->mac_reg[index] = val & 0xffff;
980     if (e1000_has_rxbufs(s, 1)) {
981         qemu_flush_queued_packets(qemu_get_queue(s->nic));
982     }
983 }
984 
985 static void
986 set_16bit(E1000State *s, int index, uint32_t val)
987 {
988     s->mac_reg[index] = val & 0xffff;
989 }
990 
991 static void
992 set_dlen(E1000State *s, int index, uint32_t val)
993 {
994     s->mac_reg[index] = val & 0xfff80;
995 }
996 
997 static void
998 set_tctl(E1000State *s, int index, uint32_t val)
999 {
1000     s->mac_reg[index] = val;
1001     s->mac_reg[TDT] &= 0xffff;
1002     start_xmit(s);
1003 }
1004 
1005 static void
1006 set_icr(E1000State *s, int index, uint32_t val)
1007 {
1008     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1009     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1010 }
1011 
1012 static void
1013 set_imc(E1000State *s, int index, uint32_t val)
1014 {
1015     s->mac_reg[IMS] &= ~val;
1016     set_ics(s, 0, 0);
1017 }
1018 
1019 static void
1020 set_ims(E1000State *s, int index, uint32_t val)
1021 {
1022     s->mac_reg[IMS] |= val;
1023     set_ics(s, 0, 0);
1024 }
1025 
1026 #define getreg(x)	[x] = mac_readreg
1027 static uint32_t (*macreg_readops[])(E1000State *, int) = {
1028     getreg(PBA),	getreg(RCTL),	getreg(TDH),	getreg(TXDCTL),
1029     getreg(WUFC),	getreg(TDT),	getreg(CTRL),	getreg(LEDCTL),
1030     getreg(MANC),	getreg(MDIC),	getreg(SWSM),	getreg(STATUS),
1031     getreg(TORL),	getreg(TOTL),	getreg(IMS),	getreg(TCTL),
1032     getreg(RDH),	getreg(RDT),	getreg(VET),	getreg(ICS),
1033     getreg(TDBAL),	getreg(TDBAH),	getreg(RDBAH),	getreg(RDBAL),
1034     getreg(TDLEN),	getreg(RDLEN),
1035 
1036     [TOTH] = mac_read_clr8,	[TORH] = mac_read_clr8,	[GPRC] = mac_read_clr4,
1037     [GPTC] = mac_read_clr4,	[TPR] = mac_read_clr4,	[TPT] = mac_read_clr4,
1038     [ICR] = mac_icr_read,	[EECD] = get_eecd,	[EERD] = flash_eerd_read,
1039     [CRCERRS ... MPC] = &mac_readreg,
1040     [RA ... RA+31] = &mac_readreg,
1041     [MTA ... MTA+127] = &mac_readreg,
1042     [VFTA ... VFTA+127] = &mac_readreg,
1043 };
1044 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1045 
1046 #define putreg(x)	[x] = mac_writereg
1047 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1048     putreg(PBA),	putreg(EERD),	putreg(SWSM),	putreg(WUFC),
1049     putreg(TDBAL),	putreg(TDBAH),	putreg(TXDCTL),	putreg(RDBAH),
1050     putreg(RDBAL),	putreg(LEDCTL), putreg(VET),
1051     [TDLEN] = set_dlen,	[RDLEN] = set_dlen,	[TCTL] = set_tctl,
1052     [TDT] = set_tctl,	[MDIC] = set_mdic,	[ICS] = set_ics,
1053     [TDH] = set_16bit,	[RDH] = set_16bit,	[RDT] = set_rdt,
1054     [IMC] = set_imc,	[IMS] = set_ims,	[ICR] = set_icr,
1055     [EECD] = set_eecd,	[RCTL] = set_rx_control, [CTRL] = set_ctrl,
1056     [RA ... RA+31] = &mac_writereg,
1057     [MTA ... MTA+127] = &mac_writereg,
1058     [VFTA ... VFTA+127] = &mac_writereg,
1059 };
1060 
1061 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1062 
1063 static void
1064 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1065                  unsigned size)
1066 {
1067     E1000State *s = opaque;
1068     unsigned int index = (addr & 0x1ffff) >> 2;
1069 
1070     if (index < NWRITEOPS && macreg_writeops[index]) {
1071         macreg_writeops[index](s, index, val);
1072     } else if (index < NREADOPS && macreg_readops[index]) {
1073         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n", index<<2, val);
1074     } else {
1075         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1076                index<<2, val);
1077     }
1078 }
1079 
1080 static uint64_t
1081 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1082 {
1083     E1000State *s = opaque;
1084     unsigned int index = (addr & 0x1ffff) >> 2;
1085 
1086     if (index < NREADOPS && macreg_readops[index])
1087     {
1088         return macreg_readops[index](s, index);
1089     }
1090     DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1091     return 0;
1092 }
1093 
1094 static const MemoryRegionOps e1000_mmio_ops = {
1095     .read = e1000_mmio_read,
1096     .write = e1000_mmio_write,
1097     .endianness = DEVICE_LITTLE_ENDIAN,
1098     .impl = {
1099         .min_access_size = 4,
1100         .max_access_size = 4,
1101     },
1102 };
1103 
1104 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1105                               unsigned size)
1106 {
1107     E1000State *s = opaque;
1108 
1109     (void)s;
1110     return 0;
1111 }
1112 
1113 static void e1000_io_write(void *opaque, hwaddr addr,
1114                            uint64_t val, unsigned size)
1115 {
1116     E1000State *s = opaque;
1117 
1118     (void)s;
1119 }
1120 
1121 static const MemoryRegionOps e1000_io_ops = {
1122     .read = e1000_io_read,
1123     .write = e1000_io_write,
1124     .endianness = DEVICE_LITTLE_ENDIAN,
1125 };
1126 
1127 static bool is_version_1(void *opaque, int version_id)
1128 {
1129     return version_id == 1;
1130 }
1131 
1132 static void e1000_pre_save(void *opaque)
1133 {
1134     E1000State *s = opaque;
1135     NetClientState *nc = qemu_get_queue(s->nic);
1136 
1137     if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
1138         return;
1139     }
1140 
1141     /*
1142      * If link is down and auto-negotiation is ongoing, complete
1143      * auto-negotiation immediately.  This allows is to look at
1144      * MII_SR_AUTONEG_COMPLETE to infer link status on load.
1145      */
1146     if (nc->link_down &&
1147         s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN &&
1148         s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG) {
1149          s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1150     }
1151 }
1152 
1153 static int e1000_post_load(void *opaque, int version_id)
1154 {
1155     E1000State *s = opaque;
1156     NetClientState *nc = qemu_get_queue(s->nic);
1157 
1158     /* nc.link_down can't be migrated, so infer link_down according
1159      * to link status bit in mac_reg[STATUS].
1160      * Alternatively, restart link negotiation if it was in progress. */
1161     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1162 
1163     if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
1164         return 0;
1165     }
1166 
1167     if (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN &&
1168         s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG &&
1169         !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1170         nc->link_down = false;
1171         qemu_mod_timer(s->autoneg_timer, qemu_get_clock_ms(vm_clock) + 500);
1172     }
1173 
1174     return 0;
1175 }
1176 
1177 static const VMStateDescription vmstate_e1000 = {
1178     .name = "e1000",
1179     .version_id = 2,
1180     .minimum_version_id = 1,
1181     .minimum_version_id_old = 1,
1182     .pre_save = e1000_pre_save,
1183     .post_load = e1000_post_load,
1184     .fields      = (VMStateField []) {
1185         VMSTATE_PCI_DEVICE(dev, E1000State),
1186         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1187         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1188         VMSTATE_UINT32(rxbuf_size, E1000State),
1189         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1190         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1191         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1192         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1193         VMSTATE_UINT16(eecd_state.reading, E1000State),
1194         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1195         VMSTATE_UINT8(tx.ipcss, E1000State),
1196         VMSTATE_UINT8(tx.ipcso, E1000State),
1197         VMSTATE_UINT16(tx.ipcse, E1000State),
1198         VMSTATE_UINT8(tx.tucss, E1000State),
1199         VMSTATE_UINT8(tx.tucso, E1000State),
1200         VMSTATE_UINT16(tx.tucse, E1000State),
1201         VMSTATE_UINT32(tx.paylen, E1000State),
1202         VMSTATE_UINT8(tx.hdr_len, E1000State),
1203         VMSTATE_UINT16(tx.mss, E1000State),
1204         VMSTATE_UINT16(tx.size, E1000State),
1205         VMSTATE_UINT16(tx.tso_frames, E1000State),
1206         VMSTATE_UINT8(tx.sum_needed, E1000State),
1207         VMSTATE_INT8(tx.ip, E1000State),
1208         VMSTATE_INT8(tx.tcp, E1000State),
1209         VMSTATE_BUFFER(tx.header, E1000State),
1210         VMSTATE_BUFFER(tx.data, E1000State),
1211         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1212         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1213         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1214         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1215         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1216         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1217         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1218         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1219         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1220         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1221         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1222         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1223         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1224         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1225         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1226         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1227         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1228         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1229         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1230         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1231         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1232         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1233         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1234         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1235         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1236         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1237         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1238         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1239         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1240         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1241         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1242         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1243         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1244         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1245         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1246         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1247         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1248         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1249         VMSTATE_UINT32(mac_reg[VET], E1000State),
1250         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1251         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1252         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1253         VMSTATE_END_OF_LIST()
1254     }
1255 };
1256 
1257 static const uint16_t e1000_eeprom_template[64] = {
1258     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1259     0x3000, 0x1000, 0x6403, E1000_DEVID, 0x8086, E1000_DEVID, 0x8086, 0x3040,
1260     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1261     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1262     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1263     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1264     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1265     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1266 };
1267 
1268 /* PCI interface */
1269 
1270 static void
1271 e1000_mmio_setup(E1000State *d)
1272 {
1273     int i;
1274     const uint32_t excluded_regs[] = {
1275         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1276         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1277     };
1278 
1279     memory_region_init_io(&d->mmio, &e1000_mmio_ops, d, "e1000-mmio",
1280                           PNPMMIO_SIZE);
1281     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1282     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1283         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1284                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1285     memory_region_init_io(&d->io, &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1286 }
1287 
1288 static void
1289 e1000_cleanup(NetClientState *nc)
1290 {
1291     E1000State *s = qemu_get_nic_opaque(nc);
1292 
1293     s->nic = NULL;
1294 }
1295 
1296 static void
1297 pci_e1000_uninit(PCIDevice *dev)
1298 {
1299     E1000State *d = DO_UPCAST(E1000State, dev, dev);
1300 
1301     qemu_del_timer(d->autoneg_timer);
1302     qemu_free_timer(d->autoneg_timer);
1303     memory_region_destroy(&d->mmio);
1304     memory_region_destroy(&d->io);
1305     qemu_del_nic(d->nic);
1306 }
1307 
1308 static NetClientInfo net_e1000_info = {
1309     .type = NET_CLIENT_OPTIONS_KIND_NIC,
1310     .size = sizeof(NICState),
1311     .can_receive = e1000_can_receive,
1312     .receive = e1000_receive,
1313     .cleanup = e1000_cleanup,
1314     .link_status_changed = e1000_set_link_status,
1315 };
1316 
1317 static int pci_e1000_init(PCIDevice *pci_dev)
1318 {
1319     E1000State *d = DO_UPCAST(E1000State, dev, pci_dev);
1320     uint8_t *pci_conf;
1321     uint16_t checksum = 0;
1322     int i;
1323     uint8_t *macaddr;
1324 
1325     pci_conf = d->dev.config;
1326 
1327     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1328     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1329 
1330     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1331 
1332     e1000_mmio_setup(d);
1333 
1334     pci_register_bar(&d->dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1335 
1336     pci_register_bar(&d->dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1337 
1338     memmove(d->eeprom_data, e1000_eeprom_template,
1339         sizeof e1000_eeprom_template);
1340     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1341     macaddr = d->conf.macaddr.a;
1342     for (i = 0; i < 3; i++)
1343         d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i];
1344     for (i = 0; i < EEPROM_CHECKSUM_REG; i++)
1345         checksum += d->eeprom_data[i];
1346     checksum = (uint16_t) EEPROM_SUM - checksum;
1347     d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum;
1348 
1349     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1350                           object_get_typename(OBJECT(d)), d->dev.qdev.id, d);
1351 
1352     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1353 
1354     add_boot_device_path(d->conf.bootindex, &pci_dev->qdev, "/ethernet-phy@0");
1355 
1356     d->autoneg_timer = qemu_new_timer_ms(vm_clock, e1000_autoneg_timer, d);
1357 
1358     return 0;
1359 }
1360 
1361 static void qdev_e1000_reset(DeviceState *dev)
1362 {
1363     E1000State *d = DO_UPCAST(E1000State, dev.qdev, dev);
1364     e1000_reset(d);
1365 }
1366 
1367 static Property e1000_properties[] = {
1368     DEFINE_NIC_PROPERTIES(E1000State, conf),
1369     DEFINE_PROP_BIT("autonegotiation", E1000State,
1370                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1371     DEFINE_PROP_END_OF_LIST(),
1372 };
1373 
1374 static void e1000_class_init(ObjectClass *klass, void *data)
1375 {
1376     DeviceClass *dc = DEVICE_CLASS(klass);
1377     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1378 
1379     k->init = pci_e1000_init;
1380     k->exit = pci_e1000_uninit;
1381     k->romfile = "efi-e1000.rom";
1382     k->vendor_id = PCI_VENDOR_ID_INTEL;
1383     k->device_id = E1000_DEVID;
1384     k->revision = 0x03;
1385     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1386     dc->desc = "Intel Gigabit Ethernet";
1387     dc->reset = qdev_e1000_reset;
1388     dc->vmsd = &vmstate_e1000;
1389     dc->props = e1000_properties;
1390 }
1391 
1392 static const TypeInfo e1000_info = {
1393     .name          = "e1000",
1394     .parent        = TYPE_PCI_DEVICE,
1395     .instance_size = sizeof(E1000State),
1396     .class_init    = e1000_class_init,
1397 };
1398 
1399 static void e1000_register_types(void)
1400 {
1401     type_register_static(&e1000_info);
1402 }
1403 
1404 type_init(e1000_register_types)
1405