xref: /openbmc/qemu/hw/net/rocker/rocker.c (revision dc688246)
1 /*
2  * QEMU rocker switch emulation - PCI device
3  *
4  * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
5  * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  */
17 
18 #include "qemu/osdep.h"
19 #include "hw/pci/pci_device.h"
20 #include "hw/qdev-properties.h"
21 #include "hw/qdev-properties-system.h"
22 #include "migration/vmstate.h"
23 #include "hw/pci/msix.h"
24 #include "net/net.h"
25 #include "net/eth.h"
26 #include "qapi/error.h"
27 #include "qapi/qapi-commands-rocker.h"
28 #include "qemu/iov.h"
29 #include "qemu/module.h"
30 #include "qemu/bitops.h"
31 #include "qemu/log.h"
32 
33 #include "rocker.h"
34 #include "rocker_hw.h"
35 #include "rocker_fp.h"
36 #include "rocker_desc.h"
37 #include "rocker_tlv.h"
38 #include "rocker_world.h"
39 #include "rocker_of_dpa.h"
40 
41 struct rocker {
42     /* private */
43     PCIDevice parent_obj;
44     /* public */
45 
46     MemoryRegion mmio;
47     MemoryRegion msix_bar;
48 
49     /* switch configuration */
50     char *name;                  /* switch name */
51     char *world_name;            /* world name */
52     uint32_t fp_ports;           /* front-panel port count */
53     NICPeers *fp_ports_peers;
54     MACAddr fp_start_macaddr;    /* front-panel port 0 mac addr */
55     uint64_t switch_id;          /* switch id */
56 
57     /* front-panel ports */
58     FpPort *fp_port[ROCKER_FP_PORTS_MAX];
59 
60     /* register backings */
61     uint32_t test_reg;
62     uint64_t test_reg64;
63     dma_addr_t test_dma_addr;
64     uint32_t test_dma_size;
65     uint64_t lower32;            /* lower 32-bit val in 2-part 64-bit access */
66 
67     /* desc rings */
68     DescRing **rings;
69 
70     /* switch worlds */
71     World *worlds[ROCKER_WORLD_TYPE_MAX];
72     World *world_dflt;
73 
74     QLIST_ENTRY(rocker) next;
75 };
76 
77 static QLIST_HEAD(, rocker) rockers;
78 
79 Rocker *rocker_find(const char *name)
80 {
81     Rocker *r;
82 
83     QLIST_FOREACH(r, &rockers, next)
84         if (strcmp(r->name, name) == 0) {
85             return r;
86         }
87 
88     return NULL;
89 }
90 
91 World *rocker_get_world(Rocker *r, enum rocker_world_type type)
92 {
93     if (type < ROCKER_WORLD_TYPE_MAX) {
94         return r->worlds[type];
95     }
96     return NULL;
97 }
98 
99 RockerSwitch *qmp_query_rocker(const char *name, Error **errp)
100 {
101     RockerSwitch *rocker;
102     Rocker *r;
103 
104     r = rocker_find(name);
105     if (!r) {
106         error_setg(errp, "rocker %s not found", name);
107         return NULL;
108     }
109 
110     rocker = g_new0(RockerSwitch, 1);
111     rocker->name = g_strdup(r->name);
112     rocker->id = r->switch_id;
113     rocker->ports = r->fp_ports;
114 
115     return rocker;
116 }
117 
118 RockerPortList *qmp_query_rocker_ports(const char *name, Error **errp)
119 {
120     RockerPortList *list = NULL;
121     Rocker *r;
122     int i;
123 
124     r = rocker_find(name);
125     if (!r) {
126         error_setg(errp, "rocker %s not found", name);
127         return NULL;
128     }
129 
130     for (i = r->fp_ports - 1; i >= 0; i--) {
131         QAPI_LIST_PREPEND(list, fp_port_get_info(r->fp_port[i]));
132     }
133 
134     return list;
135 }
136 
137 uint32_t rocker_fp_ports(Rocker *r)
138 {
139     return r->fp_ports;
140 }
141 
142 static uint32_t rocker_get_pport_by_tx_ring(Rocker *r,
143                                             DescRing *ring)
144 {
145     return (desc_ring_index(ring) - 2) / 2 + 1;
146 }
147 
148 static int tx_consume(Rocker *r, DescInfo *info)
149 {
150     PCIDevice *dev = PCI_DEVICE(r);
151     char *buf = desc_get_buf(info, true);
152     RockerTlv *tlv_frag;
153     RockerTlv *tlvs[ROCKER_TLV_TX_MAX + 1];
154     struct iovec iov[ROCKER_TX_FRAGS_MAX] = { { 0, }, };
155     uint32_t pport;
156     uint32_t port;
157     uint16_t tx_offload = ROCKER_TX_OFFLOAD_NONE;
158     uint16_t tx_l3_csum_off = 0;
159     uint16_t tx_tso_mss = 0;
160     uint16_t tx_tso_hdr_len = 0;
161     int iovcnt = 0;
162     int err = ROCKER_OK;
163     int rem;
164     int i;
165 
166     if (!buf) {
167         return -ROCKER_ENXIO;
168     }
169 
170     rocker_tlv_parse(tlvs, ROCKER_TLV_TX_MAX, buf, desc_tlv_size(info));
171 
172     if (!tlvs[ROCKER_TLV_TX_FRAGS]) {
173         return -ROCKER_EINVAL;
174     }
175 
176     pport = rocker_get_pport_by_tx_ring(r, desc_get_ring(info));
177     if (!fp_port_from_pport(pport, &port)) {
178         return -ROCKER_EINVAL;
179     }
180 
181     if (tlvs[ROCKER_TLV_TX_OFFLOAD]) {
182         tx_offload = rocker_tlv_get_u8(tlvs[ROCKER_TLV_TX_OFFLOAD]);
183     }
184 
185     switch (tx_offload) {
186     case ROCKER_TX_OFFLOAD_L3_CSUM:
187         if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
188             return -ROCKER_EINVAL;
189         }
190         break;
191     case ROCKER_TX_OFFLOAD_TSO:
192         if (!tlvs[ROCKER_TLV_TX_TSO_MSS] ||
193             !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
194             return -ROCKER_EINVAL;
195         }
196         break;
197     }
198 
199     if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
200         tx_l3_csum_off = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]);
201         qemu_log_mask(LOG_UNIMP, "rocker %s: L3 not implemented"
202                                  " (cksum off: %u)\n",
203                       __func__, tx_l3_csum_off);
204     }
205 
206     if (tlvs[ROCKER_TLV_TX_TSO_MSS]) {
207         tx_tso_mss = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_MSS]);
208         qemu_log_mask(LOG_UNIMP, "rocker %s: TSO not implemented (MSS: %u)\n",
209                       __func__, tx_tso_mss);
210     }
211 
212     if (tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
213         tx_tso_hdr_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]);
214         qemu_log_mask(LOG_UNIMP, "rocker %s: TSO not implemented"
215                                  " (hdr length: %u)\n",
216                       __func__, tx_tso_hdr_len);
217     }
218 
219     rocker_tlv_for_each_nested(tlv_frag, tlvs[ROCKER_TLV_TX_FRAGS], rem) {
220         hwaddr frag_addr;
221         uint16_t frag_len;
222 
223         if (rocker_tlv_type(tlv_frag) != ROCKER_TLV_TX_FRAG) {
224             err = -ROCKER_EINVAL;
225             goto err_bad_attr;
226         }
227 
228         rocker_tlv_parse_nested(tlvs, ROCKER_TLV_TX_FRAG_ATTR_MAX, tlv_frag);
229 
230         if (!tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR] ||
231             !tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]) {
232             err = -ROCKER_EINVAL;
233             goto err_bad_attr;
234         }
235 
236         frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]);
237         frag_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]);
238 
239         if (iovcnt >= ROCKER_TX_FRAGS_MAX) {
240             goto err_too_many_frags;
241         }
242         iov[iovcnt].iov_len = frag_len;
243         iov[iovcnt].iov_base = g_malloc(frag_len);
244 
245         pci_dma_read(dev, frag_addr, iov[iovcnt].iov_base,
246                      iov[iovcnt].iov_len);
247 
248         iovcnt++;
249     }
250 
251     err = fp_port_eg(r->fp_port[port], iov, iovcnt);
252 
253 err_too_many_frags:
254 err_bad_attr:
255     for (i = 0; i < ROCKER_TX_FRAGS_MAX; i++) {
256         g_free(iov[i].iov_base);
257     }
258 
259     return err;
260 }
261 
262 static int cmd_get_port_settings(Rocker *r,
263                                  DescInfo *info, char *buf,
264                                  RockerTlv *cmd_info_tlv)
265 {
266     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
267     RockerTlv *nest;
268     FpPort *fp_port;
269     uint32_t pport;
270     uint32_t port;
271     uint32_t speed;
272     uint8_t duplex;
273     uint8_t autoneg;
274     uint8_t learning;
275     char *phys_name;
276     MACAddr macaddr;
277     enum rocker_world_type mode;
278     size_t tlv_size;
279     int pos;
280     int err;
281 
282     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
283                             cmd_info_tlv);
284 
285     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
286         return -ROCKER_EINVAL;
287     }
288 
289     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
290     if (!fp_port_from_pport(pport, &port)) {
291         return -ROCKER_EINVAL;
292     }
293     fp_port = r->fp_port[port];
294 
295     err = fp_port_get_settings(fp_port, &speed, &duplex, &autoneg);
296     if (err) {
297         return err;
298     }
299 
300     fp_port_get_macaddr(fp_port, &macaddr);
301     mode = world_type(fp_port_get_world(fp_port));
302     learning = fp_port_get_learning(fp_port);
303     phys_name = fp_port_get_name(fp_port);
304 
305     tlv_size = rocker_tlv_total_size(0) +                 /* nest */
306                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
307                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   speed */
308                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   duplex */
309                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   autoneg */
310                rocker_tlv_total_size(sizeof(macaddr.a)) + /*   macaddr */
311                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   mode */
312                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   learning */
313                rocker_tlv_total_size(strlen(phys_name));
314 
315     if (tlv_size > desc_buf_size(info)) {
316         return -ROCKER_EMSGSIZE;
317     }
318 
319     pos = 0;
320     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_CMD_INFO);
321     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, pport);
322     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, speed);
323     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, duplex);
324     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, autoneg);
325     rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR,
326                    sizeof(macaddr.a), macaddr.a);
327     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MODE, mode);
328     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING,
329                       learning);
330     rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME,
331                    strlen(phys_name), phys_name);
332     rocker_tlv_nest_end(buf, &pos, nest);
333 
334     return desc_set_buf(info, tlv_size);
335 }
336 
337 static int cmd_set_port_settings(Rocker *r,
338                                  RockerTlv *cmd_info_tlv)
339 {
340     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
341     FpPort *fp_port;
342     uint32_t pport;
343     uint32_t port;
344     uint32_t speed;
345     uint8_t duplex;
346     uint8_t autoneg;
347     uint8_t learning;
348     MACAddr macaddr;
349     enum rocker_world_type mode;
350     int err;
351 
352     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
353                             cmd_info_tlv);
354 
355     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
356         return -ROCKER_EINVAL;
357     }
358 
359     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
360     if (!fp_port_from_pport(pport, &port)) {
361         return -ROCKER_EINVAL;
362     }
363     fp_port = r->fp_port[port];
364 
365     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED] &&
366         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX] &&
367         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]) {
368 
369         speed = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED]);
370         duplex = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]);
371         autoneg = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]);
372 
373         err = fp_port_set_settings(fp_port, speed, duplex, autoneg);
374         if (err) {
375             return err;
376         }
377     }
378 
379     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) {
380         if (rocker_tlv_len(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) !=
381             sizeof(macaddr.a)) {
382             return -ROCKER_EINVAL;
383         }
384         memcpy(macaddr.a,
385                rocker_tlv_data(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]),
386                sizeof(macaddr.a));
387         fp_port_set_macaddr(fp_port, &macaddr);
388     }
389 
390     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]) {
391         mode = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]);
392         if (mode >= ROCKER_WORLD_TYPE_MAX) {
393             return -ROCKER_EINVAL;
394         }
395         /* We don't support world change. */
396         if (!fp_port_check_world(fp_port, r->worlds[mode])) {
397             return -ROCKER_EINVAL;
398         }
399     }
400 
401     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]) {
402         learning =
403             rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]);
404         fp_port_set_learning(fp_port, learning);
405     }
406 
407     return ROCKER_OK;
408 }
409 
410 static int cmd_consume(Rocker *r, DescInfo *info)
411 {
412     char *buf = desc_get_buf(info, false);
413     RockerTlv *tlvs[ROCKER_TLV_CMD_MAX + 1];
414     RockerTlv *info_tlv;
415     World *world;
416     uint16_t cmd;
417     int err;
418 
419     if (!buf) {
420         return -ROCKER_ENXIO;
421     }
422 
423     rocker_tlv_parse(tlvs, ROCKER_TLV_CMD_MAX, buf, desc_tlv_size(info));
424 
425     if (!tlvs[ROCKER_TLV_CMD_TYPE] || !tlvs[ROCKER_TLV_CMD_INFO]) {
426         return -ROCKER_EINVAL;
427     }
428 
429     cmd = rocker_tlv_get_le16(tlvs[ROCKER_TLV_CMD_TYPE]);
430     info_tlv = tlvs[ROCKER_TLV_CMD_INFO];
431 
432     /* This might be reworked to something like this:
433      * Every world will have an array of command handlers from
434      * ROCKER_TLV_CMD_TYPE_UNSPEC to ROCKER_TLV_CMD_TYPE_MAX. There is
435      * up to each world to implement whatever command it want.
436      * It can reference "generic" commands as cmd_set_port_settings or
437      * cmd_get_port_settings
438      */
439 
440     switch (cmd) {
441     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD:
442     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD:
443     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL:
444     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS:
445     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD:
446     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD:
447     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL:
448     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS:
449         world = r->worlds[ROCKER_WORLD_TYPE_OF_DPA];
450         err = world_do_cmd(world, info, buf, cmd, info_tlv);
451         break;
452     case ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS:
453         err = cmd_get_port_settings(r, info, buf, info_tlv);
454         break;
455     case ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS:
456         err = cmd_set_port_settings(r, info_tlv);
457         break;
458     default:
459         err = -ROCKER_EINVAL;
460         break;
461     }
462 
463     return err;
464 }
465 
466 static void rocker_msix_irq(Rocker *r, unsigned vector)
467 {
468     PCIDevice *dev = PCI_DEVICE(r);
469 
470     DPRINTF("MSI-X notify request for vector %d\n", vector);
471     if (vector >= ROCKER_MSIX_VEC_COUNT(r->fp_ports)) {
472         DPRINTF("incorrect vector %d\n", vector);
473         return;
474     }
475     msix_notify(dev, vector);
476 }
477 
478 int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up)
479 {
480     DescRing *ring = r->rings[ROCKER_RING_EVENT];
481     DescInfo *info = desc_ring_fetch_desc(ring);
482     RockerTlv *nest;
483     char *buf;
484     size_t tlv_size;
485     int pos;
486     int err;
487 
488     if (!info) {
489         return -ROCKER_ENOBUFS;
490     }
491 
492     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
493                rocker_tlv_total_size(0) +                 /* nest */
494                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
495                rocker_tlv_total_size(sizeof(uint8_t));    /*   link up */
496 
497     if (tlv_size > desc_buf_size(info)) {
498         err = -ROCKER_EMSGSIZE;
499         goto err_too_big;
500     }
501 
502     buf = desc_get_buf(info, false);
503     if (!buf) {
504         err = -ROCKER_ENOMEM;
505         goto err_no_mem;
506     }
507 
508     pos = 0;
509     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
510                         ROCKER_TLV_EVENT_TYPE_LINK_CHANGED);
511     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
512     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, pport);
513     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP,
514                       link_up ? 1 : 0);
515     rocker_tlv_nest_end(buf, &pos, nest);
516 
517     err = desc_set_buf(info, tlv_size);
518 
519 err_too_big:
520 err_no_mem:
521     if (desc_ring_post_desc(ring, err)) {
522         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
523     }
524 
525     return err;
526 }
527 
528 int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr,
529                                uint16_t vlan_id)
530 {
531     DescRing *ring = r->rings[ROCKER_RING_EVENT];
532     DescInfo *info;
533     FpPort *fp_port;
534     uint32_t port;
535     RockerTlv *nest;
536     char *buf;
537     size_t tlv_size;
538     int pos;
539     int err;
540 
541     if (!fp_port_from_pport(pport, &port)) {
542         return -ROCKER_EINVAL;
543     }
544     fp_port = r->fp_port[port];
545     if (!fp_port_get_learning(fp_port)) {
546         return ROCKER_OK;
547     }
548 
549     info = desc_ring_fetch_desc(ring);
550     if (!info) {
551         return -ROCKER_ENOBUFS;
552     }
553 
554     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
555                rocker_tlv_total_size(0) +                 /* nest */
556                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
557                rocker_tlv_total_size(ETH_ALEN) +          /*   mac addr */
558                rocker_tlv_total_size(sizeof(uint16_t));   /*   vlan_id */
559 
560     if (tlv_size > desc_buf_size(info)) {
561         err = -ROCKER_EMSGSIZE;
562         goto err_too_big;
563     }
564 
565     buf = desc_get_buf(info, false);
566     if (!buf) {
567         err = -ROCKER_ENOMEM;
568         goto err_no_mem;
569     }
570 
571     pos = 0;
572     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
573                         ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN);
574     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
575     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_PPORT, pport);
576     rocker_tlv_put(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_MAC, ETH_ALEN, addr);
577     rocker_tlv_put_u16(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, vlan_id);
578     rocker_tlv_nest_end(buf, &pos, nest);
579 
580     err = desc_set_buf(info, tlv_size);
581 
582 err_too_big:
583 err_no_mem:
584     if (desc_ring_post_desc(ring, err)) {
585         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
586     }
587 
588     return err;
589 }
590 
591 static DescRing *rocker_get_rx_ring_by_pport(Rocker *r,
592                                                      uint32_t pport)
593 {
594     return r->rings[(pport - 1) * 2 + 3];
595 }
596 
597 int rx_produce(World *world, uint32_t pport,
598                const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu)
599 {
600     Rocker *r = world_rocker(world);
601     PCIDevice *dev = (PCIDevice *)r;
602     DescRing *ring = rocker_get_rx_ring_by_pport(r, pport);
603     DescInfo *info = desc_ring_fetch_desc(ring);
604     char *data;
605     size_t data_size = iov_size(iov, iovcnt);
606     char *buf;
607     uint16_t rx_flags = 0;
608     uint16_t rx_csum = 0;
609     size_t tlv_size;
610     RockerTlv *tlvs[ROCKER_TLV_RX_MAX + 1];
611     hwaddr frag_addr;
612     uint16_t frag_max_len;
613     int pos;
614     int err;
615 
616     if (!info) {
617         return -ROCKER_ENOBUFS;
618     }
619 
620     buf = desc_get_buf(info, false);
621     if (!buf) {
622         err = -ROCKER_ENXIO;
623         goto out;
624     }
625     rocker_tlv_parse(tlvs, ROCKER_TLV_RX_MAX, buf, desc_tlv_size(info));
626 
627     if (!tlvs[ROCKER_TLV_RX_FRAG_ADDR] ||
628         !tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]) {
629         err = -ROCKER_EINVAL;
630         goto out;
631     }
632 
633     frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_RX_FRAG_ADDR]);
634     frag_max_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]);
635 
636     if (data_size > frag_max_len) {
637         err = -ROCKER_EMSGSIZE;
638         goto out;
639     }
640 
641     if (copy_to_cpu) {
642         rx_flags |= ROCKER_RX_FLAGS_FWD_OFFLOAD;
643     }
644 
645     /* XXX calc rx flags/csum */
646 
647     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */
648                rocker_tlv_total_size(sizeof(uint16_t)) + /* scum */
649                rocker_tlv_total_size(sizeof(uint64_t)) + /* frag addr */
650                rocker_tlv_total_size(sizeof(uint16_t)) + /* frag max len */
651                rocker_tlv_total_size(sizeof(uint16_t));  /* frag len */
652 
653     if (tlv_size > desc_buf_size(info)) {
654         err = -ROCKER_EMSGSIZE;
655         goto out;
656     }
657 
658     /* TODO:
659      * iov dma write can be optimized in similar way e1000 does it in
660      * e1000_receive_iov. But maybe if would make sense to introduce
661      * generic helper iov_dma_write.
662      */
663 
664     data = g_malloc(data_size);
665 
666     iov_to_buf(iov, iovcnt, 0, data, data_size);
667     pci_dma_write(dev, frag_addr, data, data_size);
668     g_free(data);
669 
670     pos = 0;
671     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FLAGS, rx_flags);
672     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_CSUM, rx_csum);
673     rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_RX_FRAG_ADDR, frag_addr);
674     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_MAX_LEN, frag_max_len);
675     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_LEN, data_size);
676 
677     err = desc_set_buf(info, tlv_size);
678 
679 out:
680     if (desc_ring_post_desc(ring, err)) {
681         rocker_msix_irq(r, ROCKER_MSIX_VEC_RX(pport - 1));
682     }
683 
684     return err;
685 }
686 
687 int rocker_port_eg(Rocker *r, uint32_t pport,
688                    const struct iovec *iov, int iovcnt)
689 {
690     FpPort *fp_port;
691     uint32_t port;
692 
693     if (!fp_port_from_pport(pport, &port)) {
694         return -ROCKER_EINVAL;
695     }
696 
697     fp_port = r->fp_port[port];
698 
699     return fp_port_eg(fp_port, iov, iovcnt);
700 }
701 
702 static void rocker_test_dma_ctrl(Rocker *r, uint32_t val)
703 {
704     PCIDevice *dev = PCI_DEVICE(r);
705     char *buf;
706     int i;
707 
708     buf = g_malloc(r->test_dma_size);
709 
710     switch (val) {
711     case ROCKER_TEST_DMA_CTRL_CLEAR:
712         memset(buf, 0, r->test_dma_size);
713         break;
714     case ROCKER_TEST_DMA_CTRL_FILL:
715         memset(buf, 0x96, r->test_dma_size);
716         break;
717     case ROCKER_TEST_DMA_CTRL_INVERT:
718         pci_dma_read(dev, r->test_dma_addr, buf, r->test_dma_size);
719         for (i = 0; i < r->test_dma_size; i++) {
720             buf[i] = ~buf[i];
721         }
722         break;
723     default:
724         DPRINTF("not test dma control val=0x%08x\n", val);
725         goto err_out;
726     }
727     pci_dma_write(dev, r->test_dma_addr, buf, r->test_dma_size);
728 
729     rocker_msix_irq(r, ROCKER_MSIX_VEC_TEST);
730 
731 err_out:
732     g_free(buf);
733 }
734 
735 static void rocker_reset(DeviceState *dev);
736 
737 static void rocker_control(Rocker *r, uint32_t val)
738 {
739     if (val & ROCKER_CONTROL_RESET) {
740         rocker_reset(DEVICE(r));
741     }
742 }
743 
744 static int rocker_pci_ring_count(Rocker *r)
745 {
746     /* There are:
747      * - command ring
748      * - event ring
749      * - tx and rx ring per each port
750      */
751     return 2 + (2 * r->fp_ports);
752 }
753 
754 static bool rocker_addr_is_desc_reg(Rocker *r, hwaddr addr)
755 {
756     hwaddr start = ROCKER_DMA_DESC_BASE;
757     hwaddr end = start + (ROCKER_DMA_DESC_SIZE * rocker_pci_ring_count(r));
758 
759     return addr >= start && addr < end;
760 }
761 
762 static void rocker_port_phys_enable_write(Rocker *r, uint64_t new)
763 {
764     int i;
765     bool old_enabled;
766     bool new_enabled;
767     FpPort *fp_port;
768 
769     for (i = 0; i < r->fp_ports; i++) {
770         fp_port = r->fp_port[i];
771         old_enabled = fp_port_enabled(fp_port);
772         new_enabled = (new >> (i + 1)) & 0x1;
773         if (new_enabled == old_enabled) {
774             continue;
775         }
776         if (new_enabled) {
777             fp_port_enable(r->fp_port[i]);
778         } else {
779             fp_port_disable(r->fp_port[i]);
780         }
781     }
782 }
783 
784 static void rocker_io_writel(void *opaque, hwaddr addr, uint32_t val)
785 {
786     Rocker *r = opaque;
787 
788     if (rocker_addr_is_desc_reg(r, addr)) {
789         unsigned index = ROCKER_RING_INDEX(addr);
790         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
791 
792         switch (offset) {
793         case ROCKER_DMA_DESC_ADDR_OFFSET:
794             r->lower32 = (uint64_t)val;
795             break;
796         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
797             desc_ring_set_base_addr(r->rings[index],
798                                     ((uint64_t)val) << 32 | r->lower32);
799             r->lower32 = 0;
800             break;
801         case ROCKER_DMA_DESC_SIZE_OFFSET:
802             desc_ring_set_size(r->rings[index], val);
803             break;
804         case ROCKER_DMA_DESC_HEAD_OFFSET:
805             if (desc_ring_set_head(r->rings[index], val)) {
806                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
807             }
808             break;
809         case ROCKER_DMA_DESC_CTRL_OFFSET:
810             desc_ring_set_ctrl(r->rings[index], val);
811             break;
812         case ROCKER_DMA_DESC_CREDITS_OFFSET:
813             if (desc_ring_ret_credits(r->rings[index], val)) {
814                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
815             }
816             break;
817         default:
818             DPRINTF("not implemented dma reg write(l) addr=0x" HWADDR_FMT_plx
819                     " val=0x%08x (ring %d, addr=0x%02x)\n",
820                     addr, val, index, offset);
821             break;
822         }
823         return;
824     }
825 
826     switch (addr) {
827     case ROCKER_TEST_REG:
828         r->test_reg = val;
829         break;
830     case ROCKER_TEST_REG64:
831     case ROCKER_TEST_DMA_ADDR:
832     case ROCKER_PORT_PHYS_ENABLE:
833         r->lower32 = (uint64_t)val;
834         break;
835     case ROCKER_TEST_REG64 + 4:
836         r->test_reg64 = ((uint64_t)val) << 32 | r->lower32;
837         r->lower32 = 0;
838         break;
839     case ROCKER_TEST_IRQ:
840         rocker_msix_irq(r, val);
841         break;
842     case ROCKER_TEST_DMA_SIZE:
843         r->test_dma_size = val & 0xFFFF;
844         break;
845     case ROCKER_TEST_DMA_ADDR + 4:
846         r->test_dma_addr = ((uint64_t)val) << 32 | r->lower32;
847         r->lower32 = 0;
848         break;
849     case ROCKER_TEST_DMA_CTRL:
850         rocker_test_dma_ctrl(r, val);
851         break;
852     case ROCKER_CONTROL:
853         rocker_control(r, val);
854         break;
855     case ROCKER_PORT_PHYS_ENABLE + 4:
856         rocker_port_phys_enable_write(r, ((uint64_t)val) << 32 | r->lower32);
857         r->lower32 = 0;
858         break;
859     default:
860         DPRINTF("not implemented write(l) addr=0x" HWADDR_FMT_plx
861                 " val=0x%08x\n", addr, val);
862         break;
863     }
864 }
865 
866 static void rocker_io_writeq(void *opaque, hwaddr addr, uint64_t val)
867 {
868     Rocker *r = opaque;
869 
870     if (rocker_addr_is_desc_reg(r, addr)) {
871         unsigned index = ROCKER_RING_INDEX(addr);
872         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
873 
874         switch (offset) {
875         case ROCKER_DMA_DESC_ADDR_OFFSET:
876             desc_ring_set_base_addr(r->rings[index], val);
877             break;
878         default:
879             DPRINTF("not implemented dma reg write(q) addr=0x" HWADDR_FMT_plx
880                     " val=0x" HWADDR_FMT_plx " (ring %d, offset=0x%02x)\n",
881                     addr, val, index, offset);
882             break;
883         }
884         return;
885     }
886 
887     switch (addr) {
888     case ROCKER_TEST_REG64:
889         r->test_reg64 = val;
890         break;
891     case ROCKER_TEST_DMA_ADDR:
892         r->test_dma_addr = val;
893         break;
894     case ROCKER_PORT_PHYS_ENABLE:
895         rocker_port_phys_enable_write(r, val);
896         break;
897     default:
898         DPRINTF("not implemented write(q) addr=0x" HWADDR_FMT_plx
899                 " val=0x" HWADDR_FMT_plx "\n", addr, val);
900         break;
901     }
902 }
903 
904 #ifdef DEBUG_ROCKER
905 #define regname(reg) case (reg): return #reg
906 static const char *rocker_reg_name(void *opaque, hwaddr addr)
907 {
908     Rocker *r = opaque;
909 
910     if (rocker_addr_is_desc_reg(r, addr)) {
911         unsigned index = ROCKER_RING_INDEX(addr);
912         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
913         static char buf[100];
914         char ring_name[10];
915 
916         switch (index) {
917         case 0:
918             sprintf(ring_name, "cmd");
919             break;
920         case 1:
921             sprintf(ring_name, "event");
922             break;
923         default:
924             sprintf(ring_name, "%s-%d", index % 2 ? "rx" : "tx",
925                     (index - 2) / 2);
926         }
927 
928         switch (offset) {
929         case ROCKER_DMA_DESC_ADDR_OFFSET:
930             sprintf(buf, "Ring[%s] ADDR", ring_name);
931             return buf;
932         case ROCKER_DMA_DESC_ADDR_OFFSET+4:
933             sprintf(buf, "Ring[%s] ADDR+4", ring_name);
934             return buf;
935         case ROCKER_DMA_DESC_SIZE_OFFSET:
936             sprintf(buf, "Ring[%s] SIZE", ring_name);
937             return buf;
938         case ROCKER_DMA_DESC_HEAD_OFFSET:
939             sprintf(buf, "Ring[%s] HEAD", ring_name);
940             return buf;
941         case ROCKER_DMA_DESC_TAIL_OFFSET:
942             sprintf(buf, "Ring[%s] TAIL", ring_name);
943             return buf;
944         case ROCKER_DMA_DESC_CTRL_OFFSET:
945             sprintf(buf, "Ring[%s] CTRL", ring_name);
946             return buf;
947         case ROCKER_DMA_DESC_CREDITS_OFFSET:
948             sprintf(buf, "Ring[%s] CREDITS", ring_name);
949             return buf;
950         default:
951             sprintf(buf, "Ring[%s] ???", ring_name);
952             return buf;
953         }
954     } else {
955         switch (addr) {
956             regname(ROCKER_BOGUS_REG0);
957             regname(ROCKER_BOGUS_REG1);
958             regname(ROCKER_BOGUS_REG2);
959             regname(ROCKER_BOGUS_REG3);
960             regname(ROCKER_TEST_REG);
961             regname(ROCKER_TEST_REG64);
962             regname(ROCKER_TEST_REG64+4);
963             regname(ROCKER_TEST_IRQ);
964             regname(ROCKER_TEST_DMA_ADDR);
965             regname(ROCKER_TEST_DMA_ADDR+4);
966             regname(ROCKER_TEST_DMA_SIZE);
967             regname(ROCKER_TEST_DMA_CTRL);
968             regname(ROCKER_CONTROL);
969             regname(ROCKER_PORT_PHYS_COUNT);
970             regname(ROCKER_PORT_PHYS_LINK_STATUS);
971             regname(ROCKER_PORT_PHYS_LINK_STATUS+4);
972             regname(ROCKER_PORT_PHYS_ENABLE);
973             regname(ROCKER_PORT_PHYS_ENABLE+4);
974             regname(ROCKER_SWITCH_ID);
975             regname(ROCKER_SWITCH_ID+4);
976         }
977     }
978     return "???";
979 }
980 #else
981 static const char *rocker_reg_name(void *opaque, hwaddr addr)
982 {
983     return NULL;
984 }
985 #endif
986 
987 static void rocker_mmio_write(void *opaque, hwaddr addr, uint64_t val,
988                               unsigned size)
989 {
990     DPRINTF("Write %s addr " HWADDR_FMT_plx
991             ", size %u, val " HWADDR_FMT_plx "\n",
992             rocker_reg_name(opaque, addr), addr, size, val);
993 
994     switch (size) {
995     case 4:
996         rocker_io_writel(opaque, addr, val);
997         break;
998     case 8:
999         rocker_io_writeq(opaque, addr, val);
1000         break;
1001     }
1002 }
1003 
1004 static uint64_t rocker_port_phys_link_status(Rocker *r)
1005 {
1006     int i;
1007     uint64_t status = 0;
1008 
1009     for (i = 0; i < r->fp_ports; i++) {
1010         FpPort *port = r->fp_port[i];
1011 
1012         if (fp_port_get_link_up(port)) {
1013             status |= 1ULL << (i + 1);
1014         }
1015     }
1016     return status;
1017 }
1018 
1019 static uint64_t rocker_port_phys_enable_read(Rocker *r)
1020 {
1021     int i;
1022     uint64_t ret = 0;
1023 
1024     for (i = 0; i < r->fp_ports; i++) {
1025         FpPort *port = r->fp_port[i];
1026 
1027         if (fp_port_enabled(port)) {
1028             ret |= 1ULL << (i + 1);
1029         }
1030     }
1031     return ret;
1032 }
1033 
1034 static uint32_t rocker_io_readl(void *opaque, hwaddr addr)
1035 {
1036     Rocker *r = opaque;
1037     uint32_t ret;
1038 
1039     if (rocker_addr_is_desc_reg(r, addr)) {
1040         unsigned index = ROCKER_RING_INDEX(addr);
1041         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1042 
1043         switch (offset) {
1044         case ROCKER_DMA_DESC_ADDR_OFFSET:
1045             ret = (uint32_t)desc_ring_get_base_addr(r->rings[index]);
1046             break;
1047         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
1048             ret = (uint32_t)(desc_ring_get_base_addr(r->rings[index]) >> 32);
1049             break;
1050         case ROCKER_DMA_DESC_SIZE_OFFSET:
1051             ret = desc_ring_get_size(r->rings[index]);
1052             break;
1053         case ROCKER_DMA_DESC_HEAD_OFFSET:
1054             ret = desc_ring_get_head(r->rings[index]);
1055             break;
1056         case ROCKER_DMA_DESC_TAIL_OFFSET:
1057             ret = desc_ring_get_tail(r->rings[index]);
1058             break;
1059         case ROCKER_DMA_DESC_CREDITS_OFFSET:
1060             ret = desc_ring_get_credits(r->rings[index]);
1061             break;
1062         default:
1063             DPRINTF("not implemented dma reg read(l) addr=0x" HWADDR_FMT_plx
1064                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1065             ret = 0;
1066             break;
1067         }
1068         return ret;
1069     }
1070 
1071     switch (addr) {
1072     case ROCKER_BOGUS_REG0:
1073     case ROCKER_BOGUS_REG1:
1074     case ROCKER_BOGUS_REG2:
1075     case ROCKER_BOGUS_REG3:
1076         ret = 0xDEADBABE;
1077         break;
1078     case ROCKER_TEST_REG:
1079         ret = r->test_reg * 2;
1080         break;
1081     case ROCKER_TEST_REG64:
1082         ret = (uint32_t)(r->test_reg64 * 2);
1083         break;
1084     case ROCKER_TEST_REG64 + 4:
1085         ret = (uint32_t)((r->test_reg64 * 2) >> 32);
1086         break;
1087     case ROCKER_TEST_DMA_SIZE:
1088         ret = r->test_dma_size;
1089         break;
1090     case ROCKER_TEST_DMA_ADDR:
1091         ret = (uint32_t)r->test_dma_addr;
1092         break;
1093     case ROCKER_TEST_DMA_ADDR + 4:
1094         ret = (uint32_t)(r->test_dma_addr >> 32);
1095         break;
1096     case ROCKER_PORT_PHYS_COUNT:
1097         ret = r->fp_ports;
1098         break;
1099     case ROCKER_PORT_PHYS_LINK_STATUS:
1100         ret = (uint32_t)rocker_port_phys_link_status(r);
1101         break;
1102     case ROCKER_PORT_PHYS_LINK_STATUS + 4:
1103         ret = (uint32_t)(rocker_port_phys_link_status(r) >> 32);
1104         break;
1105     case ROCKER_PORT_PHYS_ENABLE:
1106         ret = (uint32_t)rocker_port_phys_enable_read(r);
1107         break;
1108     case ROCKER_PORT_PHYS_ENABLE + 4:
1109         ret = (uint32_t)(rocker_port_phys_enable_read(r) >> 32);
1110         break;
1111     case ROCKER_SWITCH_ID:
1112         ret = (uint32_t)r->switch_id;
1113         break;
1114     case ROCKER_SWITCH_ID + 4:
1115         ret = (uint32_t)(r->switch_id >> 32);
1116         break;
1117     default:
1118         DPRINTF("not implemented read(l) addr=0x" HWADDR_FMT_plx "\n", addr);
1119         ret = 0;
1120         break;
1121     }
1122     return ret;
1123 }
1124 
1125 static uint64_t rocker_io_readq(void *opaque, hwaddr addr)
1126 {
1127     Rocker *r = opaque;
1128     uint64_t ret;
1129 
1130     if (rocker_addr_is_desc_reg(r, addr)) {
1131         unsigned index = ROCKER_RING_INDEX(addr);
1132         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1133 
1134         switch (addr & ROCKER_DMA_DESC_MASK) {
1135         case ROCKER_DMA_DESC_ADDR_OFFSET:
1136             ret = desc_ring_get_base_addr(r->rings[index]);
1137             break;
1138         default:
1139             DPRINTF("not implemented dma reg read(q) addr=0x" HWADDR_FMT_plx
1140                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1141             ret = 0;
1142             break;
1143         }
1144         return ret;
1145     }
1146 
1147     switch (addr) {
1148     case ROCKER_BOGUS_REG0:
1149     case ROCKER_BOGUS_REG2:
1150         ret = 0xDEADBABEDEADBABEULL;
1151         break;
1152     case ROCKER_TEST_REG64:
1153         ret = r->test_reg64 * 2;
1154         break;
1155     case ROCKER_TEST_DMA_ADDR:
1156         ret = r->test_dma_addr;
1157         break;
1158     case ROCKER_PORT_PHYS_LINK_STATUS:
1159         ret = rocker_port_phys_link_status(r);
1160         break;
1161     case ROCKER_PORT_PHYS_ENABLE:
1162         ret = rocker_port_phys_enable_read(r);
1163         break;
1164     case ROCKER_SWITCH_ID:
1165         ret = r->switch_id;
1166         break;
1167     default:
1168         DPRINTF("not implemented read(q) addr=0x" HWADDR_FMT_plx "\n", addr);
1169         ret = 0;
1170         break;
1171     }
1172     return ret;
1173 }
1174 
1175 static uint64_t rocker_mmio_read(void *opaque, hwaddr addr, unsigned size)
1176 {
1177     DPRINTF("Read %s addr " HWADDR_FMT_plx ", size %u\n",
1178             rocker_reg_name(opaque, addr), addr, size);
1179 
1180     switch (size) {
1181     case 4:
1182         return rocker_io_readl(opaque, addr);
1183     case 8:
1184         return rocker_io_readq(opaque, addr);
1185     }
1186 
1187     return -1;
1188 }
1189 
1190 static const MemoryRegionOps rocker_mmio_ops = {
1191     .read = rocker_mmio_read,
1192     .write = rocker_mmio_write,
1193     .endianness = DEVICE_LITTLE_ENDIAN,
1194     .valid = {
1195         .min_access_size = 4,
1196         .max_access_size = 8,
1197     },
1198     .impl = {
1199         .min_access_size = 4,
1200         .max_access_size = 8,
1201     },
1202 };
1203 
1204 static void rocker_msix_vectors_unuse(Rocker *r,
1205                                       unsigned int num_vectors)
1206 {
1207     PCIDevice *dev = PCI_DEVICE(r);
1208     int i;
1209 
1210     for (i = 0; i < num_vectors; i++) {
1211         msix_vector_unuse(dev, i);
1212     }
1213 }
1214 
1215 static void rocker_msix_vectors_use(Rocker *r, unsigned int num_vectors)
1216 {
1217     PCIDevice *dev = PCI_DEVICE(r);
1218     int i;
1219 
1220     for (i = 0; i < num_vectors; i++) {
1221         msix_vector_use(dev, i);
1222     }
1223 }
1224 
1225 static int rocker_msix_init(Rocker *r, Error **errp)
1226 {
1227     PCIDevice *dev = PCI_DEVICE(r);
1228     int err;
1229 
1230     err = msix_init(dev, ROCKER_MSIX_VEC_COUNT(r->fp_ports),
1231                     &r->msix_bar,
1232                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_TABLE_OFFSET,
1233                     &r->msix_bar,
1234                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_PBA_OFFSET,
1235                     0, errp);
1236     if (err) {
1237         return err;
1238     }
1239 
1240     rocker_msix_vectors_use(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1241 
1242     return 0;
1243 }
1244 
1245 static void rocker_msix_uninit(Rocker *r)
1246 {
1247     PCIDevice *dev = PCI_DEVICE(r);
1248 
1249     msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1250     rocker_msix_vectors_unuse(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1251 }
1252 
1253 static World *rocker_world_type_by_name(Rocker *r, const char *name)
1254 {
1255     int i;
1256 
1257     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1258         if (strcmp(name, world_name(r->worlds[i])) == 0) {
1259             return r->worlds[i];
1260         }
1261     }
1262     return NULL;
1263 }
1264 
1265 static void pci_rocker_realize(PCIDevice *dev, Error **errp)
1266 {
1267     Rocker *r = ROCKER(dev);
1268     const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
1269     const MACAddr dflt = { .a = { 0x52, 0x54, 0x00, 0x12, 0x35, 0x01 } };
1270     static int sw_index;
1271     int i, err = 0;
1272 
1273     /* allocate worlds */
1274 
1275     r->worlds[ROCKER_WORLD_TYPE_OF_DPA] = of_dpa_world_alloc(r);
1276 
1277     if (!r->world_name) {
1278         r->world_name = g_strdup(world_name(r->worlds[ROCKER_WORLD_TYPE_OF_DPA]));
1279     }
1280 
1281     r->world_dflt = rocker_world_type_by_name(r, r->world_name);
1282     if (!r->world_dflt) {
1283         error_setg(errp,
1284                 "invalid argument requested world %s does not exist",
1285                 r->world_name);
1286         goto err_world_type_by_name;
1287     }
1288 
1289     /* set up memory-mapped region at BAR0 */
1290 
1291     memory_region_init_io(&r->mmio, OBJECT(r), &rocker_mmio_ops, r,
1292                           "rocker-mmio", ROCKER_PCI_BAR0_SIZE);
1293     pci_register_bar(dev, ROCKER_PCI_BAR0_IDX,
1294                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->mmio);
1295 
1296     /* set up memory-mapped region for MSI-X */
1297 
1298     memory_region_init(&r->msix_bar, OBJECT(r), "rocker-msix-bar",
1299                        ROCKER_PCI_MSIX_BAR_SIZE);
1300     pci_register_bar(dev, ROCKER_PCI_MSIX_BAR_IDX,
1301                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->msix_bar);
1302 
1303     /* MSI-X init */
1304 
1305     err = rocker_msix_init(r, errp);
1306     if (err) {
1307         goto err_msix_init;
1308     }
1309 
1310     /* validate switch properties */
1311 
1312     if (!r->name) {
1313         r->name = g_strdup(TYPE_ROCKER);
1314     }
1315 
1316     if (rocker_find(r->name)) {
1317         error_setg(errp, "%s already exists", r->name);
1318         goto err_duplicate;
1319     }
1320 
1321     /* Rocker name is passed in port name requests to OS with the intention
1322      * that the name is used in interface names. Limit the length of the
1323      * rocker name to avoid naming problems in the OS. Also, adding the
1324      * port number as p# and unganged breakout b#, where # is at most 2
1325      * digits, so leave room for it too (-1 for string terminator, -3 for
1326      * p# and -3 for b#)
1327      */
1328 #define ROCKER_IFNAMSIZ 16
1329 #define MAX_ROCKER_NAME_LEN  (ROCKER_IFNAMSIZ - 1 - 3 - 3)
1330     if (strlen(r->name) > MAX_ROCKER_NAME_LEN) {
1331         error_setg(errp,
1332                 "name too long; please shorten to at most %d chars",
1333                 MAX_ROCKER_NAME_LEN);
1334         goto err_name_too_long;
1335     }
1336 
1337     if (memcmp(&r->fp_start_macaddr, &zero, sizeof(zero)) == 0) {
1338         memcpy(&r->fp_start_macaddr, &dflt, sizeof(dflt));
1339         r->fp_start_macaddr.a[4] += (sw_index++);
1340     }
1341 
1342     if (!r->switch_id) {
1343         memcpy(&r->switch_id, &r->fp_start_macaddr,
1344                sizeof(r->fp_start_macaddr));
1345     }
1346 
1347     if (r->fp_ports > ROCKER_FP_PORTS_MAX) {
1348         r->fp_ports = ROCKER_FP_PORTS_MAX;
1349     }
1350 
1351     r->rings = g_new(DescRing *, rocker_pci_ring_count(r));
1352 
1353     /* Rings are ordered like this:
1354      * - command ring
1355      * - event ring
1356      * - port0 tx ring
1357      * - port0 rx ring
1358      * - port1 tx ring
1359      * - port1 rx ring
1360      * .....
1361      */
1362 
1363     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1364         DescRing *ring = desc_ring_alloc(r, i);
1365 
1366         if (i == ROCKER_RING_CMD) {
1367             desc_ring_set_consume(ring, cmd_consume, ROCKER_MSIX_VEC_CMD);
1368         } else if (i == ROCKER_RING_EVENT) {
1369             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_EVENT);
1370         } else if (i % 2 == 0) {
1371             desc_ring_set_consume(ring, tx_consume,
1372                                   ROCKER_MSIX_VEC_TX((i - 2) / 2));
1373         } else if (i % 2 == 1) {
1374             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_RX((i - 3) / 2));
1375         }
1376 
1377         r->rings[i] = ring;
1378     }
1379 
1380     for (i = 0; i < r->fp_ports; i++) {
1381         FpPort *port =
1382             fp_port_alloc(r, r->name, &r->fp_start_macaddr,
1383                           i, &r->fp_ports_peers[i]);
1384 
1385         r->fp_port[i] = port;
1386         fp_port_set_world(port, r->world_dflt);
1387     }
1388 
1389     QLIST_INSERT_HEAD(&rockers, r, next);
1390 
1391     return;
1392 
1393 err_name_too_long:
1394 err_duplicate:
1395     rocker_msix_uninit(r);
1396 err_msix_init:
1397     object_unparent(OBJECT(&r->msix_bar));
1398     object_unparent(OBJECT(&r->mmio));
1399 err_world_type_by_name:
1400     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1401         if (r->worlds[i]) {
1402             world_free(r->worlds[i]);
1403         }
1404     }
1405 }
1406 
1407 static void pci_rocker_uninit(PCIDevice *dev)
1408 {
1409     Rocker *r = ROCKER(dev);
1410     int i;
1411 
1412     QLIST_REMOVE(r, next);
1413 
1414     for (i = 0; i < r->fp_ports; i++) {
1415         FpPort *port = r->fp_port[i];
1416 
1417         fp_port_free(port);
1418         r->fp_port[i] = NULL;
1419     }
1420 
1421     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1422         if (r->rings[i]) {
1423             desc_ring_free(r->rings[i]);
1424         }
1425     }
1426     g_free(r->rings);
1427 
1428     rocker_msix_uninit(r);
1429     object_unparent(OBJECT(&r->msix_bar));
1430     object_unparent(OBJECT(&r->mmio));
1431 
1432     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1433         if (r->worlds[i]) {
1434             world_free(r->worlds[i]);
1435         }
1436     }
1437     g_free(r->fp_ports_peers);
1438 }
1439 
1440 static void rocker_reset(DeviceState *dev)
1441 {
1442     Rocker *r = ROCKER(dev);
1443     int i;
1444 
1445     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1446         if (r->worlds[i]) {
1447             world_reset(r->worlds[i]);
1448         }
1449     }
1450     for (i = 0; i < r->fp_ports; i++) {
1451         fp_port_reset(r->fp_port[i]);
1452         fp_port_set_world(r->fp_port[i], r->world_dflt);
1453     }
1454 
1455     r->test_reg = 0;
1456     r->test_reg64 = 0;
1457     r->test_dma_addr = 0;
1458     r->test_dma_size = 0;
1459 
1460     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1461         desc_ring_reset(r->rings[i]);
1462     }
1463 
1464     DPRINTF("Reset done\n");
1465 }
1466 
1467 static Property rocker_properties[] = {
1468     DEFINE_PROP_STRING("name", Rocker, name),
1469     DEFINE_PROP_STRING("world", Rocker, world_name),
1470     DEFINE_PROP_MACADDR("fp_start_macaddr", Rocker,
1471                         fp_start_macaddr),
1472     DEFINE_PROP_UINT64("switch_id", Rocker,
1473                        switch_id, 0),
1474     DEFINE_PROP_ARRAY("ports", Rocker, fp_ports,
1475                       fp_ports_peers, qdev_prop_netdev, NICPeers),
1476     DEFINE_PROP_END_OF_LIST(),
1477 };
1478 
1479 static const VMStateDescription rocker_vmsd = {
1480     .name = TYPE_ROCKER,
1481     .unmigratable = 1,
1482 };
1483 
1484 static void rocker_class_init(ObjectClass *klass, void *data)
1485 {
1486     DeviceClass *dc = DEVICE_CLASS(klass);
1487     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1488 
1489     k->realize = pci_rocker_realize;
1490     k->exit = pci_rocker_uninit;
1491     k->vendor_id = PCI_VENDOR_ID_REDHAT;
1492     k->device_id = PCI_DEVICE_ID_REDHAT_ROCKER;
1493     k->revision = ROCKER_PCI_REVISION;
1494     k->class_id = PCI_CLASS_NETWORK_OTHER;
1495     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1496     dc->desc = "Rocker Switch";
1497     dc->reset = rocker_reset;
1498     device_class_set_props(dc, rocker_properties);
1499     dc->vmsd = &rocker_vmsd;
1500 }
1501 
1502 static const TypeInfo rocker_info = {
1503     .name          = TYPE_ROCKER,
1504     .parent        = TYPE_PCI_DEVICE,
1505     .instance_size = sizeof(Rocker),
1506     .class_init    = rocker_class_init,
1507     .interfaces = (InterfaceInfo[]) {
1508         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1509         { },
1510     },
1511 };
1512 
1513 static void rocker_register_types(void)
1514 {
1515     type_register_static(&rocker_info);
1516 }
1517 
1518 type_init(rocker_register_types)
1519